| { | |
| "best_metric": 0.782608695652174, | |
| "best_model_checkpoint": "Swin-dmae-DA3-N-Colab\\checkpoint-1170", | |
| "epoch": 117.33333333333333, | |
| "eval_steps": 500, | |
| "global_step": 2640, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 1.8939393939393941e-06, | |
| "loss": 1.4044, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 3.7878787878787882e-06, | |
| "loss": 1.4177, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "eval_accuracy": 0.43478260869565216, | |
| "eval_loss": 1.3454896211624146, | |
| "eval_runtime": 0.8077, | |
| "eval_samples_per_second": 56.952, | |
| "eval_steps_per_second": 3.714, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 5.681818181818182e-06, | |
| "loss": 1.4099, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 7.5757575757575764e-06, | |
| "loss": 1.4043, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.45652173913043476, | |
| "eval_loss": 1.3475043773651123, | |
| "eval_runtime": 0.5832, | |
| "eval_samples_per_second": 78.881, | |
| "eval_steps_per_second": 5.144, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 9.46969696969697e-06, | |
| "loss": 1.3826, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 1.1363636363636365e-05, | |
| "loss": 1.3628, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "eval_accuracy": 0.45652173913043476, | |
| "eval_loss": 1.339664340019226, | |
| "eval_runtime": 0.6431, | |
| "eval_samples_per_second": 71.523, | |
| "eval_steps_per_second": 4.665, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 3.11, | |
| "learning_rate": 1.3257575757575758e-05, | |
| "loss": 1.3198, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 3.56, | |
| "learning_rate": 1.5151515151515153e-05, | |
| "loss": 1.2586, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 1.7045454545454546e-05, | |
| "loss": 1.21, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.45652173913043476, | |
| "eval_loss": 1.259264588356018, | |
| "eval_runtime": 0.5817, | |
| "eval_samples_per_second": 79.084, | |
| "eval_steps_per_second": 5.158, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 4.44, | |
| "learning_rate": 1.893939393939394e-05, | |
| "loss": 1.1047, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 4.89, | |
| "learning_rate": 2.0833333333333336e-05, | |
| "loss": 1.0504, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 4.98, | |
| "eval_accuracy": 0.41304347826086957, | |
| "eval_loss": 1.1193994283676147, | |
| "eval_runtime": 0.5866, | |
| "eval_samples_per_second": 78.413, | |
| "eval_steps_per_second": 5.114, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 5.33, | |
| "learning_rate": 2.272727272727273e-05, | |
| "loss": 0.975, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 5.78, | |
| "learning_rate": 2.4621212121212123e-05, | |
| "loss": 0.9129, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.41304347826086957, | |
| "eval_loss": 1.0521512031555176, | |
| "eval_runtime": 0.5861, | |
| "eval_samples_per_second": 78.48, | |
| "eval_steps_per_second": 5.118, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 6.22, | |
| "learning_rate": 2.6515151515151516e-05, | |
| "loss": 0.8413, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 6.67, | |
| "learning_rate": 2.8409090909090912e-05, | |
| "loss": 0.7811, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 6.98, | |
| "eval_accuracy": 0.43478260869565216, | |
| "eval_loss": 1.1183944940567017, | |
| "eval_runtime": 0.6136, | |
| "eval_samples_per_second": 74.962, | |
| "eval_steps_per_second": 4.889, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 7.11, | |
| "learning_rate": 3.0303030303030306e-05, | |
| "loss": 0.7098, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 7.56, | |
| "learning_rate": 3.2196969696969696e-05, | |
| "loss": 0.629, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "learning_rate": 3.409090909090909e-05, | |
| "loss": 0.6572, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.5869565217391305, | |
| "eval_loss": 0.995121419429779, | |
| "eval_runtime": 0.6492, | |
| "eval_samples_per_second": 70.86, | |
| "eval_steps_per_second": 4.621, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 8.44, | |
| "learning_rate": 3.598484848484849e-05, | |
| "loss": 0.6062, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 8.89, | |
| "learning_rate": 3.787878787878788e-05, | |
| "loss": 0.5207, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 8.98, | |
| "eval_accuracy": 0.6521739130434783, | |
| "eval_loss": 0.9054760336875916, | |
| "eval_runtime": 0.6757, | |
| "eval_samples_per_second": 68.08, | |
| "eval_steps_per_second": 4.44, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 9.33, | |
| "learning_rate": 3.9772727272727275e-05, | |
| "loss": 0.5179, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 9.78, | |
| "learning_rate": 4.166666666666667e-05, | |
| "loss": 0.6234, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.6086956521739131, | |
| "eval_loss": 0.9276577234268188, | |
| "eval_runtime": 0.8717, | |
| "eval_samples_per_second": 52.769, | |
| "eval_steps_per_second": 3.441, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 10.22, | |
| "learning_rate": 4.356060606060606e-05, | |
| "loss": 0.5149, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 10.67, | |
| "learning_rate": 4.545454545454546e-05, | |
| "loss": 0.4721, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 10.98, | |
| "eval_accuracy": 0.6739130434782609, | |
| "eval_loss": 0.8458214402198792, | |
| "eval_runtime": 0.6357, | |
| "eval_samples_per_second": 72.366, | |
| "eval_steps_per_second": 4.72, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 11.11, | |
| "learning_rate": 4.7348484848484855e-05, | |
| "loss": 0.4504, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 11.56, | |
| "learning_rate": 4.9242424242424245e-05, | |
| "loss": 0.4534, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "learning_rate": 4.9873737373737375e-05, | |
| "loss": 0.3944, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.6521739130434783, | |
| "eval_loss": 0.8836989998817444, | |
| "eval_runtime": 0.6522, | |
| "eval_samples_per_second": 70.533, | |
| "eval_steps_per_second": 4.6, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 12.44, | |
| "learning_rate": 4.966329966329967e-05, | |
| "loss": 0.3923, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 12.89, | |
| "learning_rate": 4.945286195286196e-05, | |
| "loss": 0.3572, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 12.98, | |
| "eval_accuracy": 0.717391304347826, | |
| "eval_loss": 0.8719282746315002, | |
| "eval_runtime": 0.6304, | |
| "eval_samples_per_second": 72.972, | |
| "eval_steps_per_second": 4.759, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 13.33, | |
| "learning_rate": 4.9242424242424245e-05, | |
| "loss": 0.3524, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 13.78, | |
| "learning_rate": 4.903198653198653e-05, | |
| "loss": 0.2911, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.6304347826086957, | |
| "eval_loss": 1.036683201789856, | |
| "eval_runtime": 0.6022, | |
| "eval_samples_per_second": 76.392, | |
| "eval_steps_per_second": 4.982, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 14.22, | |
| "learning_rate": 4.882154882154882e-05, | |
| "loss": 0.3364, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 14.67, | |
| "learning_rate": 4.8611111111111115e-05, | |
| "loss": 0.3224, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 14.98, | |
| "eval_accuracy": 0.6086956521739131, | |
| "eval_loss": 0.9640800356864929, | |
| "eval_runtime": 0.5942, | |
| "eval_samples_per_second": 77.42, | |
| "eval_steps_per_second": 5.049, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 15.11, | |
| "learning_rate": 4.84006734006734e-05, | |
| "loss": 0.2519, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 15.56, | |
| "learning_rate": 4.819023569023569e-05, | |
| "loss": 0.266, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "learning_rate": 4.797979797979798e-05, | |
| "loss": 0.2663, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.5869565217391305, | |
| "eval_loss": 1.3670061826705933, | |
| "eval_runtime": 0.6168, | |
| "eval_samples_per_second": 74.573, | |
| "eval_steps_per_second": 4.863, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 16.44, | |
| "learning_rate": 4.7769360269360266e-05, | |
| "loss": 0.3245, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 16.89, | |
| "learning_rate": 4.755892255892256e-05, | |
| "loss": 0.2132, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 16.98, | |
| "eval_accuracy": 0.6304347826086957, | |
| "eval_loss": 1.309033751487732, | |
| "eval_runtime": 0.6001, | |
| "eval_samples_per_second": 76.649, | |
| "eval_steps_per_second": 4.999, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 17.33, | |
| "learning_rate": 4.7348484848484855e-05, | |
| "loss": 0.2687, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 17.78, | |
| "learning_rate": 4.713804713804714e-05, | |
| "loss": 0.266, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.717391304347826, | |
| "eval_loss": 1.1246769428253174, | |
| "eval_runtime": 0.6912, | |
| "eval_samples_per_second": 66.554, | |
| "eval_steps_per_second": 4.34, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 18.22, | |
| "learning_rate": 4.692760942760943e-05, | |
| "loss": 0.2472, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 18.67, | |
| "learning_rate": 4.671717171717172e-05, | |
| "loss": 0.1929, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 18.98, | |
| "eval_accuracy": 0.6086956521739131, | |
| "eval_loss": 1.1458401679992676, | |
| "eval_runtime": 0.6044, | |
| "eval_samples_per_second": 76.11, | |
| "eval_steps_per_second": 4.964, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 19.11, | |
| "learning_rate": 4.6506734006734006e-05, | |
| "loss": 0.2374, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 19.56, | |
| "learning_rate": 4.62962962962963e-05, | |
| "loss": 0.2191, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "learning_rate": 4.608585858585859e-05, | |
| "loss": 0.1831, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.7391304347826086, | |
| "eval_loss": 1.026720404624939, | |
| "eval_runtime": 0.6282, | |
| "eval_samples_per_second": 73.23, | |
| "eval_steps_per_second": 4.776, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 20.44, | |
| "learning_rate": 4.5875420875420876e-05, | |
| "loss": 0.196, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 20.89, | |
| "learning_rate": 4.5664983164983164e-05, | |
| "loss": 0.2298, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 20.98, | |
| "eval_accuracy": 0.6304347826086957, | |
| "eval_loss": 1.1862982511520386, | |
| "eval_runtime": 0.6377, | |
| "eval_samples_per_second": 72.135, | |
| "eval_steps_per_second": 4.704, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 21.33, | |
| "learning_rate": 4.545454545454546e-05, | |
| "loss": 0.1991, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 21.78, | |
| "learning_rate": 4.5244107744107746e-05, | |
| "loss": 0.1825, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_accuracy": 0.6956521739130435, | |
| "eval_loss": 1.0457817316055298, | |
| "eval_runtime": 0.6892, | |
| "eval_samples_per_second": 66.748, | |
| "eval_steps_per_second": 4.353, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 22.22, | |
| "learning_rate": 4.5033670033670034e-05, | |
| "loss": 0.1844, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 22.67, | |
| "learning_rate": 4.482323232323233e-05, | |
| "loss": 0.1701, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 22.98, | |
| "eval_accuracy": 0.6086956521739131, | |
| "eval_loss": 1.3519866466522217, | |
| "eval_runtime": 0.6687, | |
| "eval_samples_per_second": 68.792, | |
| "eval_steps_per_second": 4.486, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 23.11, | |
| "learning_rate": 4.4612794612794616e-05, | |
| "loss": 0.1672, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 23.56, | |
| "learning_rate": 4.4402356902356904e-05, | |
| "loss": 0.1697, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "learning_rate": 4.41919191919192e-05, | |
| "loss": 0.1964, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.6521739130434783, | |
| "eval_loss": 1.3927210569381714, | |
| "eval_runtime": 0.6387, | |
| "eval_samples_per_second": 72.025, | |
| "eval_steps_per_second": 4.697, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 24.44, | |
| "learning_rate": 4.3981481481481486e-05, | |
| "loss": 0.1503, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 24.89, | |
| "learning_rate": 4.3771043771043774e-05, | |
| "loss": 0.1731, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 24.98, | |
| "eval_accuracy": 0.6521739130434783, | |
| "eval_loss": 1.4360977411270142, | |
| "eval_runtime": 0.7162, | |
| "eval_samples_per_second": 64.231, | |
| "eval_steps_per_second": 4.189, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 25.33, | |
| "learning_rate": 4.356060606060606e-05, | |
| "loss": 0.1654, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 25.78, | |
| "learning_rate": 4.335016835016835e-05, | |
| "loss": 0.1565, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_accuracy": 0.6956521739130435, | |
| "eval_loss": 1.0449482202529907, | |
| "eval_runtime": 0.6361, | |
| "eval_samples_per_second": 72.311, | |
| "eval_steps_per_second": 4.716, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 26.22, | |
| "learning_rate": 4.3139730639730644e-05, | |
| "loss": 0.1639, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 26.67, | |
| "learning_rate": 4.292929292929293e-05, | |
| "loss": 0.1844, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 26.98, | |
| "eval_accuracy": 0.6086956521739131, | |
| "eval_loss": 1.3166261911392212, | |
| "eval_runtime": 0.6943, | |
| "eval_samples_per_second": 66.252, | |
| "eval_steps_per_second": 4.321, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 27.11, | |
| "learning_rate": 4.271885521885522e-05, | |
| "loss": 0.1478, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 27.56, | |
| "learning_rate": 4.250841750841751e-05, | |
| "loss": 0.1709, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "learning_rate": 4.2297979797979795e-05, | |
| "loss": 0.1187, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_accuracy": 0.6304347826086957, | |
| "eval_loss": 1.7950323820114136, | |
| "eval_runtime": 0.7205, | |
| "eval_samples_per_second": 63.842, | |
| "eval_steps_per_second": 4.164, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 28.44, | |
| "learning_rate": 4.208754208754209e-05, | |
| "loss": 0.1371, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 28.89, | |
| "learning_rate": 4.1877104377104384e-05, | |
| "loss": 0.129, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 28.98, | |
| "eval_accuracy": 0.6956521739130435, | |
| "eval_loss": 1.2752825021743774, | |
| "eval_runtime": 0.6192, | |
| "eval_samples_per_second": 74.293, | |
| "eval_steps_per_second": 4.845, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 29.33, | |
| "learning_rate": 4.166666666666667e-05, | |
| "loss": 0.1172, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 29.78, | |
| "learning_rate": 4.145622895622896e-05, | |
| "loss": 0.1269, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_accuracy": 0.6739130434782609, | |
| "eval_loss": 1.4243661165237427, | |
| "eval_runtime": 0.6637, | |
| "eval_samples_per_second": 69.309, | |
| "eval_steps_per_second": 4.52, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 30.22, | |
| "learning_rate": 4.124579124579125e-05, | |
| "loss": 0.1689, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 30.67, | |
| "learning_rate": 4.1035353535353535e-05, | |
| "loss": 0.1522, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 30.98, | |
| "eval_accuracy": 0.6521739130434783, | |
| "eval_loss": 1.4873253107070923, | |
| "eval_runtime": 0.6497, | |
| "eval_samples_per_second": 70.807, | |
| "eval_steps_per_second": 4.618, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 31.11, | |
| "learning_rate": 4.082491582491583e-05, | |
| "loss": 0.1308, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 31.56, | |
| "learning_rate": 4.061447811447812e-05, | |
| "loss": 0.1535, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "learning_rate": 4.0404040404040405e-05, | |
| "loss": 0.1497, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_accuracy": 0.6739130434782609, | |
| "eval_loss": 1.3692686557769775, | |
| "eval_runtime": 0.8302, | |
| "eval_samples_per_second": 55.406, | |
| "eval_steps_per_second": 3.613, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 32.44, | |
| "learning_rate": 4.019360269360269e-05, | |
| "loss": 0.122, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 32.89, | |
| "learning_rate": 3.998316498316498e-05, | |
| "loss": 0.1215, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 32.98, | |
| "eval_accuracy": 0.6739130434782609, | |
| "eval_loss": 1.8168487548828125, | |
| "eval_runtime": 0.6432, | |
| "eval_samples_per_second": 71.52, | |
| "eval_steps_per_second": 4.664, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 33.33, | |
| "learning_rate": 3.9772727272727275e-05, | |
| "loss": 0.1429, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 33.78, | |
| "learning_rate": 3.956228956228956e-05, | |
| "loss": 0.1049, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "eval_accuracy": 0.7608695652173914, | |
| "eval_loss": 1.2748662233352661, | |
| "eval_runtime": 0.6487, | |
| "eval_samples_per_second": 70.915, | |
| "eval_steps_per_second": 4.625, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 34.22, | |
| "learning_rate": 3.935185185185186e-05, | |
| "loss": 0.0886, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 34.67, | |
| "learning_rate": 3.9141414141414145e-05, | |
| "loss": 0.1013, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 34.98, | |
| "eval_accuracy": 0.7608695652173914, | |
| "eval_loss": 1.5097851753234863, | |
| "eval_runtime": 0.6176, | |
| "eval_samples_per_second": 74.477, | |
| "eval_steps_per_second": 4.857, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 35.11, | |
| "learning_rate": 3.893097643097643e-05, | |
| "loss": 0.0993, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 35.56, | |
| "learning_rate": 3.872053872053872e-05, | |
| "loss": 0.1178, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "learning_rate": 3.8510101010101015e-05, | |
| "loss": 0.1499, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "eval_accuracy": 0.6304347826086957, | |
| "eval_loss": 1.6463576555252075, | |
| "eval_runtime": 0.9393, | |
| "eval_samples_per_second": 48.974, | |
| "eval_steps_per_second": 3.194, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 36.44, | |
| "learning_rate": 3.82996632996633e-05, | |
| "loss": 0.132, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 36.89, | |
| "learning_rate": 3.808922558922559e-05, | |
| "loss": 0.0823, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 36.98, | |
| "eval_accuracy": 0.6956521739130435, | |
| "eval_loss": 1.7892482280731201, | |
| "eval_runtime": 0.6542, | |
| "eval_samples_per_second": 70.32, | |
| "eval_steps_per_second": 4.586, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 37.33, | |
| "learning_rate": 3.787878787878788e-05, | |
| "loss": 0.1058, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 37.78, | |
| "learning_rate": 3.7668350168350167e-05, | |
| "loss": 0.092, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "eval_accuracy": 0.6739130434782609, | |
| "eval_loss": 1.6448333263397217, | |
| "eval_runtime": 0.7064, | |
| "eval_samples_per_second": 65.122, | |
| "eval_steps_per_second": 4.247, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 38.22, | |
| "learning_rate": 3.745791245791246e-05, | |
| "loss": 0.0832, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 38.67, | |
| "learning_rate": 3.724747474747475e-05, | |
| "loss": 0.1076, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 38.98, | |
| "eval_accuracy": 0.6304347826086957, | |
| "eval_loss": 1.6955077648162842, | |
| "eval_runtime": 0.6982, | |
| "eval_samples_per_second": 65.886, | |
| "eval_steps_per_second": 4.297, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 39.11, | |
| "learning_rate": 3.7037037037037037e-05, | |
| "loss": 0.1159, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 39.56, | |
| "learning_rate": 3.6826599326599324e-05, | |
| "loss": 0.1204, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "learning_rate": 3.661616161616162e-05, | |
| "loss": 0.1163, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "eval_accuracy": 0.6739130434782609, | |
| "eval_loss": 1.5779627561569214, | |
| "eval_runtime": 0.6312, | |
| "eval_samples_per_second": 72.882, | |
| "eval_steps_per_second": 4.753, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 40.44, | |
| "learning_rate": 3.6405723905723907e-05, | |
| "loss": 0.1122, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 40.89, | |
| "learning_rate": 3.61952861952862e-05, | |
| "loss": 0.0952, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 40.98, | |
| "eval_accuracy": 0.6521739130434783, | |
| "eval_loss": 1.8121063709259033, | |
| "eval_runtime": 0.7017, | |
| "eval_samples_per_second": 65.557, | |
| "eval_steps_per_second": 4.275, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 41.33, | |
| "learning_rate": 3.598484848484849e-05, | |
| "loss": 0.0946, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 41.78, | |
| "learning_rate": 3.577441077441078e-05, | |
| "loss": 0.0833, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 42.0, | |
| "eval_accuracy": 0.717391304347826, | |
| "eval_loss": 1.4458924531936646, | |
| "eval_runtime": 0.6203, | |
| "eval_samples_per_second": 74.161, | |
| "eval_steps_per_second": 4.837, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 42.22, | |
| "learning_rate": 3.5563973063973064e-05, | |
| "loss": 0.0783, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 42.67, | |
| "learning_rate": 3.535353535353535e-05, | |
| "loss": 0.1045, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 42.98, | |
| "eval_accuracy": 0.6739130434782609, | |
| "eval_loss": 1.7306798696517944, | |
| "eval_runtime": 0.6341, | |
| "eval_samples_per_second": 72.539, | |
| "eval_steps_per_second": 4.731, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 43.11, | |
| "learning_rate": 3.514309764309765e-05, | |
| "loss": 0.0713, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 43.56, | |
| "learning_rate": 3.4932659932659934e-05, | |
| "loss": 0.1254, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 44.0, | |
| "learning_rate": 3.472222222222222e-05, | |
| "loss": 0.094, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 44.0, | |
| "eval_accuracy": 0.7391304347826086, | |
| "eval_loss": 1.4969877004623413, | |
| "eval_runtime": 0.6602, | |
| "eval_samples_per_second": 69.678, | |
| "eval_steps_per_second": 4.544, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 44.44, | |
| "learning_rate": 3.451178451178451e-05, | |
| "loss": 0.108, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 44.89, | |
| "learning_rate": 3.43013468013468e-05, | |
| "loss": 0.092, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 44.98, | |
| "eval_accuracy": 0.717391304347826, | |
| "eval_loss": 1.5766059160232544, | |
| "eval_runtime": 0.6201, | |
| "eval_samples_per_second": 74.177, | |
| "eval_steps_per_second": 4.838, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 45.33, | |
| "learning_rate": 3.409090909090909e-05, | |
| "loss": 0.1059, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 45.78, | |
| "learning_rate": 3.388047138047139e-05, | |
| "loss": 0.0863, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 46.0, | |
| "eval_accuracy": 0.6521739130434783, | |
| "eval_loss": 1.7600295543670654, | |
| "eval_runtime": 0.7127, | |
| "eval_samples_per_second": 64.546, | |
| "eval_steps_per_second": 4.21, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 46.22, | |
| "learning_rate": 3.3670033670033675e-05, | |
| "loss": 0.106, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 46.67, | |
| "learning_rate": 3.345959595959596e-05, | |
| "loss": 0.101, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 46.98, | |
| "eval_accuracy": 0.6956521739130435, | |
| "eval_loss": 1.4763251543045044, | |
| "eval_runtime": 0.6232, | |
| "eval_samples_per_second": 73.817, | |
| "eval_steps_per_second": 4.814, | |
| "step": 1057 | |
| }, | |
| { | |
| "epoch": 47.11, | |
| "learning_rate": 3.324915824915825e-05, | |
| "loss": 0.0873, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 47.56, | |
| "learning_rate": 3.3038720538720545e-05, | |
| "loss": 0.0731, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 48.0, | |
| "learning_rate": 3.282828282828283e-05, | |
| "loss": 0.0995, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 48.0, | |
| "eval_accuracy": 0.6521739130434783, | |
| "eval_loss": 2.0017707347869873, | |
| "eval_runtime": 0.8091, | |
| "eval_samples_per_second": 56.852, | |
| "eval_steps_per_second": 3.708, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 48.44, | |
| "learning_rate": 3.261784511784512e-05, | |
| "loss": 0.1169, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 48.89, | |
| "learning_rate": 3.240740740740741e-05, | |
| "loss": 0.0893, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 48.98, | |
| "eval_accuracy": 0.7391304347826086, | |
| "eval_loss": 1.4871541261672974, | |
| "eval_runtime": 0.6386, | |
| "eval_samples_per_second": 72.027, | |
| "eval_steps_per_second": 4.697, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 49.33, | |
| "learning_rate": 3.2196969696969696e-05, | |
| "loss": 0.0607, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 49.78, | |
| "learning_rate": 3.198653198653199e-05, | |
| "loss": 0.0965, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "eval_accuracy": 0.7391304347826086, | |
| "eval_loss": 1.6165415048599243, | |
| "eval_runtime": 0.6157, | |
| "eval_samples_per_second": 74.713, | |
| "eval_steps_per_second": 4.873, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 50.22, | |
| "learning_rate": 3.177609427609428e-05, | |
| "loss": 0.079, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 50.67, | |
| "learning_rate": 3.1565656565656566e-05, | |
| "loss": 0.0595, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 50.98, | |
| "eval_accuracy": 0.7391304347826086, | |
| "eval_loss": 1.66079580783844, | |
| "eval_runtime": 0.6159, | |
| "eval_samples_per_second": 74.687, | |
| "eval_steps_per_second": 4.871, | |
| "step": 1147 | |
| }, | |
| { | |
| "epoch": 51.11, | |
| "learning_rate": 3.135521885521885e-05, | |
| "loss": 0.0755, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 51.56, | |
| "learning_rate": 3.114478114478115e-05, | |
| "loss": 0.0702, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 52.0, | |
| "learning_rate": 3.0934343434343436e-05, | |
| "loss": 0.0606, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 52.0, | |
| "eval_accuracy": 0.782608695652174, | |
| "eval_loss": 1.6604318618774414, | |
| "eval_runtime": 0.6672, | |
| "eval_samples_per_second": 68.949, | |
| "eval_steps_per_second": 4.497, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 52.44, | |
| "learning_rate": 3.072390572390573e-05, | |
| "loss": 0.0722, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 52.89, | |
| "learning_rate": 3.0513468013468015e-05, | |
| "loss": 0.0794, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 52.98, | |
| "eval_accuracy": 0.717391304347826, | |
| "eval_loss": 1.9967399835586548, | |
| "eval_runtime": 0.6242, | |
| "eval_samples_per_second": 73.688, | |
| "eval_steps_per_second": 4.806, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 53.33, | |
| "learning_rate": 3.0303030303030306e-05, | |
| "loss": 0.0731, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 53.78, | |
| "learning_rate": 3.0092592592592593e-05, | |
| "loss": 0.0919, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 54.0, | |
| "eval_accuracy": 0.6956521739130435, | |
| "eval_loss": 1.7727715969085693, | |
| "eval_runtime": 0.6228, | |
| "eval_samples_per_second": 73.864, | |
| "eval_steps_per_second": 4.817, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 54.22, | |
| "learning_rate": 2.9882154882154885e-05, | |
| "loss": 0.0776, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 54.67, | |
| "learning_rate": 2.9671717171717172e-05, | |
| "loss": 0.0666, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 54.98, | |
| "eval_accuracy": 0.7391304347826086, | |
| "eval_loss": 1.7363907098770142, | |
| "eval_runtime": 0.6531, | |
| "eval_samples_per_second": 70.43, | |
| "eval_steps_per_second": 4.593, | |
| "step": 1237 | |
| }, | |
| { | |
| "epoch": 55.11, | |
| "learning_rate": 2.946127946127946e-05, | |
| "loss": 0.0663, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 55.56, | |
| "learning_rate": 2.925084175084175e-05, | |
| "loss": 0.0574, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 56.0, | |
| "learning_rate": 2.904040404040404e-05, | |
| "loss": 0.0842, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 56.0, | |
| "eval_accuracy": 0.7608695652173914, | |
| "eval_loss": 1.6661043167114258, | |
| "eval_runtime": 0.6312, | |
| "eval_samples_per_second": 72.88, | |
| "eval_steps_per_second": 4.753, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 56.44, | |
| "learning_rate": 2.882996632996633e-05, | |
| "loss": 0.0545, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 56.89, | |
| "learning_rate": 2.8619528619528618e-05, | |
| "loss": 0.0781, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 56.98, | |
| "eval_accuracy": 0.717391304347826, | |
| "eval_loss": 1.9340128898620605, | |
| "eval_runtime": 0.9102, | |
| "eval_samples_per_second": 50.537, | |
| "eval_steps_per_second": 3.296, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 57.33, | |
| "learning_rate": 2.8409090909090912e-05, | |
| "loss": 0.0455, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 57.78, | |
| "learning_rate": 2.8198653198653204e-05, | |
| "loss": 0.0565, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 58.0, | |
| "eval_accuracy": 0.7391304347826086, | |
| "eval_loss": 1.7399216890335083, | |
| "eval_runtime": 0.6902, | |
| "eval_samples_per_second": 66.647, | |
| "eval_steps_per_second": 4.347, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 58.22, | |
| "learning_rate": 2.798821548821549e-05, | |
| "loss": 0.0867, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 58.67, | |
| "learning_rate": 2.777777777777778e-05, | |
| "loss": 0.0939, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 58.98, | |
| "eval_accuracy": 0.7608695652173914, | |
| "eval_loss": 1.664444088935852, | |
| "eval_runtime": 0.6351, | |
| "eval_samples_per_second": 72.424, | |
| "eval_steps_per_second": 4.723, | |
| "step": 1327 | |
| }, | |
| { | |
| "epoch": 59.11, | |
| "learning_rate": 2.756734006734007e-05, | |
| "loss": 0.0603, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 59.56, | |
| "learning_rate": 2.7356902356902358e-05, | |
| "loss": 0.0605, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 60.0, | |
| "learning_rate": 2.714646464646465e-05, | |
| "loss": 0.0666, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 60.0, | |
| "eval_accuracy": 0.6956521739130435, | |
| "eval_loss": 1.6804474592208862, | |
| "eval_runtime": 0.6187, | |
| "eval_samples_per_second": 74.347, | |
| "eval_steps_per_second": 4.849, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 60.44, | |
| "learning_rate": 2.6936026936026937e-05, | |
| "loss": 0.045, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 60.89, | |
| "learning_rate": 2.6725589225589225e-05, | |
| "loss": 0.0577, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 60.98, | |
| "eval_accuracy": 0.6956521739130435, | |
| "eval_loss": 1.8968379497528076, | |
| "eval_runtime": 0.6547, | |
| "eval_samples_per_second": 70.263, | |
| "eval_steps_per_second": 4.582, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 61.33, | |
| "learning_rate": 2.6515151515151516e-05, | |
| "loss": 0.0895, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 61.78, | |
| "learning_rate": 2.6304713804713804e-05, | |
| "loss": 0.0534, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 62.0, | |
| "eval_accuracy": 0.7391304347826086, | |
| "eval_loss": 1.8967163562774658, | |
| "eval_runtime": 0.7952, | |
| "eval_samples_per_second": 57.844, | |
| "eval_steps_per_second": 3.772, | |
| "step": 1395 | |
| }, | |
| { | |
| "epoch": 62.22, | |
| "learning_rate": 2.6094276094276095e-05, | |
| "loss": 0.1, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 62.67, | |
| "learning_rate": 2.5883838383838382e-05, | |
| "loss": 0.0592, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 62.98, | |
| "eval_accuracy": 0.717391304347826, | |
| "eval_loss": 2.0113070011138916, | |
| "eval_runtime": 0.6402, | |
| "eval_samples_per_second": 71.858, | |
| "eval_steps_per_second": 4.686, | |
| "step": 1417 | |
| }, | |
| { | |
| "epoch": 63.11, | |
| "learning_rate": 2.5673400673400677e-05, | |
| "loss": 0.0862, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 63.56, | |
| "learning_rate": 2.5462962962962965e-05, | |
| "loss": 0.0761, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 64.0, | |
| "learning_rate": 2.5252525252525256e-05, | |
| "loss": 0.0732, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 64.0, | |
| "eval_accuracy": 0.6521739130434783, | |
| "eval_loss": 1.9213429689407349, | |
| "eval_runtime": 0.6532, | |
| "eval_samples_per_second": 70.426, | |
| "eval_steps_per_second": 4.593, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 64.44, | |
| "learning_rate": 2.5042087542087544e-05, | |
| "loss": 0.0412, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 64.89, | |
| "learning_rate": 2.4831649831649835e-05, | |
| "loss": 0.0768, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 64.98, | |
| "eval_accuracy": 0.7391304347826086, | |
| "eval_loss": 1.8911714553833008, | |
| "eval_runtime": 0.6397, | |
| "eval_samples_per_second": 71.914, | |
| "eval_steps_per_second": 4.69, | |
| "step": 1462 | |
| }, | |
| { | |
| "epoch": 65.33, | |
| "learning_rate": 2.4621212121212123e-05, | |
| "loss": 0.0724, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 65.78, | |
| "learning_rate": 2.441077441077441e-05, | |
| "loss": 0.0415, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 66.0, | |
| "eval_accuracy": 0.7391304347826086, | |
| "eval_loss": 1.7955325841903687, | |
| "eval_runtime": 0.6727, | |
| "eval_samples_per_second": 68.382, | |
| "eval_steps_per_second": 4.46, | |
| "step": 1485 | |
| }, | |
| { | |
| "epoch": 66.22, | |
| "learning_rate": 2.42003367003367e-05, | |
| "loss": 0.0646, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 66.67, | |
| "learning_rate": 2.398989898989899e-05, | |
| "loss": 0.036, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 66.98, | |
| "eval_accuracy": 0.7391304347826086, | |
| "eval_loss": 1.6583586931228638, | |
| "eval_runtime": 0.7332, | |
| "eval_samples_per_second": 62.739, | |
| "eval_steps_per_second": 4.092, | |
| "step": 1507 | |
| }, | |
| { | |
| "epoch": 67.11, | |
| "learning_rate": 2.377946127946128e-05, | |
| "loss": 0.0679, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 67.56, | |
| "learning_rate": 2.356902356902357e-05, | |
| "loss": 0.0601, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 68.0, | |
| "learning_rate": 2.335858585858586e-05, | |
| "loss": 0.0617, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 68.0, | |
| "eval_accuracy": 0.7391304347826086, | |
| "eval_loss": 1.9461021423339844, | |
| "eval_runtime": 0.6382, | |
| "eval_samples_per_second": 72.082, | |
| "eval_steps_per_second": 4.701, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 68.44, | |
| "learning_rate": 2.314814814814815e-05, | |
| "loss": 0.0743, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 68.89, | |
| "learning_rate": 2.2937710437710438e-05, | |
| "loss": 0.0622, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 68.98, | |
| "eval_accuracy": 0.782608695652174, | |
| "eval_loss": 1.7301748991012573, | |
| "eval_runtime": 0.6432, | |
| "eval_samples_per_second": 71.522, | |
| "eval_steps_per_second": 4.664, | |
| "step": 1552 | |
| }, | |
| { | |
| "epoch": 69.33, | |
| "learning_rate": 2.272727272727273e-05, | |
| "loss": 0.0592, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 69.78, | |
| "learning_rate": 2.2516835016835017e-05, | |
| "loss": 0.0362, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 70.0, | |
| "eval_accuracy": 0.7608695652173914, | |
| "eval_loss": 1.7996348142623901, | |
| "eval_runtime": 0.6392, | |
| "eval_samples_per_second": 71.968, | |
| "eval_steps_per_second": 4.694, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 70.22, | |
| "learning_rate": 2.2306397306397308e-05, | |
| "loss": 0.0514, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 70.67, | |
| "learning_rate": 2.20959595959596e-05, | |
| "loss": 0.0526, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 70.98, | |
| "eval_accuracy": 0.7391304347826086, | |
| "eval_loss": 1.6478713750839233, | |
| "eval_runtime": 0.6542, | |
| "eval_samples_per_second": 70.32, | |
| "eval_steps_per_second": 4.586, | |
| "step": 1597 | |
| }, | |
| { | |
| "epoch": 71.11, | |
| "learning_rate": 2.1885521885521887e-05, | |
| "loss": 0.0944, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 71.56, | |
| "learning_rate": 2.1675084175084175e-05, | |
| "loss": 0.0458, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 72.0, | |
| "learning_rate": 2.1464646464646466e-05, | |
| "loss": 0.0493, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 72.0, | |
| "eval_accuracy": 0.6739130434782609, | |
| "eval_loss": 1.725121021270752, | |
| "eval_runtime": 0.6347, | |
| "eval_samples_per_second": 72.476, | |
| "eval_steps_per_second": 4.727, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 72.44, | |
| "learning_rate": 2.1254208754208754e-05, | |
| "loss": 0.073, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 72.89, | |
| "learning_rate": 2.1043771043771045e-05, | |
| "loss": 0.0703, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 72.98, | |
| "eval_accuracy": 0.7391304347826086, | |
| "eval_loss": 2.0377814769744873, | |
| "eval_runtime": 0.7602, | |
| "eval_samples_per_second": 60.513, | |
| "eval_steps_per_second": 3.946, | |
| "step": 1642 | |
| }, | |
| { | |
| "epoch": 73.33, | |
| "learning_rate": 2.0833333333333336e-05, | |
| "loss": 0.0561, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 73.78, | |
| "learning_rate": 2.0622895622895624e-05, | |
| "loss": 0.0692, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 74.0, | |
| "eval_accuracy": 0.6521739130434783, | |
| "eval_loss": 2.099923610687256, | |
| "eval_runtime": 0.6142, | |
| "eval_samples_per_second": 74.893, | |
| "eval_steps_per_second": 4.884, | |
| "step": 1665 | |
| }, | |
| { | |
| "epoch": 74.22, | |
| "learning_rate": 2.0412457912457915e-05, | |
| "loss": 0.0532, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 74.67, | |
| "learning_rate": 2.0202020202020203e-05, | |
| "loss": 0.0396, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 74.98, | |
| "eval_accuracy": 0.6739130434782609, | |
| "eval_loss": 2.00740122795105, | |
| "eval_runtime": 0.6082, | |
| "eval_samples_per_second": 75.638, | |
| "eval_steps_per_second": 4.933, | |
| "step": 1687 | |
| }, | |
| { | |
| "epoch": 75.11, | |
| "learning_rate": 1.999158249158249e-05, | |
| "loss": 0.038, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 75.56, | |
| "learning_rate": 1.978114478114478e-05, | |
| "loss": 0.0209, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 76.0, | |
| "learning_rate": 1.9570707070707073e-05, | |
| "loss": 0.0505, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 76.0, | |
| "eval_accuracy": 0.717391304347826, | |
| "eval_loss": 1.7462636232376099, | |
| "eval_runtime": 0.6273, | |
| "eval_samples_per_second": 73.325, | |
| "eval_steps_per_second": 4.782, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 76.44, | |
| "learning_rate": 1.936026936026936e-05, | |
| "loss": 0.0392, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 76.89, | |
| "learning_rate": 1.914983164983165e-05, | |
| "loss": 0.0512, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 76.98, | |
| "eval_accuracy": 0.7608695652173914, | |
| "eval_loss": 1.640069603919983, | |
| "eval_runtime": 0.6271, | |
| "eval_samples_per_second": 73.348, | |
| "eval_steps_per_second": 4.784, | |
| "step": 1732 | |
| }, | |
| { | |
| "epoch": 77.33, | |
| "learning_rate": 1.893939393939394e-05, | |
| "loss": 0.0625, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 77.78, | |
| "learning_rate": 1.872895622895623e-05, | |
| "loss": 0.0653, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 78.0, | |
| "eval_accuracy": 0.6956521739130435, | |
| "eval_loss": 1.7835583686828613, | |
| "eval_runtime": 0.6087, | |
| "eval_samples_per_second": 75.57, | |
| "eval_steps_per_second": 4.928, | |
| "step": 1755 | |
| }, | |
| { | |
| "epoch": 78.22, | |
| "learning_rate": 1.8518518518518518e-05, | |
| "loss": 0.0487, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 78.67, | |
| "learning_rate": 1.830808080808081e-05, | |
| "loss": 0.0764, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 78.98, | |
| "eval_accuracy": 0.7608695652173914, | |
| "eval_loss": 1.7904179096221924, | |
| "eval_runtime": 0.6272, | |
| "eval_samples_per_second": 73.344, | |
| "eval_steps_per_second": 4.783, | |
| "step": 1777 | |
| }, | |
| { | |
| "epoch": 79.11, | |
| "learning_rate": 1.80976430976431e-05, | |
| "loss": 0.0599, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 79.56, | |
| "learning_rate": 1.788720538720539e-05, | |
| "loss": 0.0629, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 80.0, | |
| "learning_rate": 1.7676767676767676e-05, | |
| "loss": 0.0598, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 80.0, | |
| "eval_accuracy": 0.7608695652173914, | |
| "eval_loss": 1.8720263242721558, | |
| "eval_runtime": 0.6152, | |
| "eval_samples_per_second": 74.778, | |
| "eval_steps_per_second": 4.877, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 80.44, | |
| "learning_rate": 1.7466329966329967e-05, | |
| "loss": 0.0561, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 80.89, | |
| "learning_rate": 1.7255892255892255e-05, | |
| "loss": 0.0523, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 80.98, | |
| "eval_accuracy": 0.717391304347826, | |
| "eval_loss": 1.7355570793151855, | |
| "eval_runtime": 0.6281, | |
| "eval_samples_per_second": 73.232, | |
| "eval_steps_per_second": 4.776, | |
| "step": 1822 | |
| }, | |
| { | |
| "epoch": 81.33, | |
| "learning_rate": 1.7045454545454546e-05, | |
| "loss": 0.0344, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 81.78, | |
| "learning_rate": 1.6835016835016837e-05, | |
| "loss": 0.0499, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 82.0, | |
| "eval_accuracy": 0.7608695652173914, | |
| "eval_loss": 1.9222629070281982, | |
| "eval_runtime": 0.6387, | |
| "eval_samples_per_second": 72.02, | |
| "eval_steps_per_second": 4.697, | |
| "step": 1845 | |
| }, | |
| { | |
| "epoch": 82.22, | |
| "learning_rate": 1.6624579124579125e-05, | |
| "loss": 0.0562, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 82.67, | |
| "learning_rate": 1.6414141414141416e-05, | |
| "loss": 0.0841, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 82.98, | |
| "eval_accuracy": 0.7608695652173914, | |
| "eval_loss": 1.9060295820236206, | |
| "eval_runtime": 0.6047, | |
| "eval_samples_per_second": 76.075, | |
| "eval_steps_per_second": 4.961, | |
| "step": 1867 | |
| }, | |
| { | |
| "epoch": 83.11, | |
| "learning_rate": 1.6203703703703704e-05, | |
| "loss": 0.0401, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 83.56, | |
| "learning_rate": 1.5993265993265995e-05, | |
| "loss": 0.042, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 84.0, | |
| "learning_rate": 1.5782828282828283e-05, | |
| "loss": 0.0597, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 84.0, | |
| "eval_accuracy": 0.717391304347826, | |
| "eval_loss": 1.9091507196426392, | |
| "eval_runtime": 0.6107, | |
| "eval_samples_per_second": 75.323, | |
| "eval_steps_per_second": 4.912, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 84.44, | |
| "learning_rate": 1.5572390572390574e-05, | |
| "loss": 0.0563, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 84.89, | |
| "learning_rate": 1.5361952861952865e-05, | |
| "loss": 0.0696, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 84.98, | |
| "eval_accuracy": 0.6956521739130435, | |
| "eval_loss": 1.9534136056900024, | |
| "eval_runtime": 0.6437, | |
| "eval_samples_per_second": 71.465, | |
| "eval_steps_per_second": 4.661, | |
| "step": 1912 | |
| }, | |
| { | |
| "epoch": 85.33, | |
| "learning_rate": 1.5151515151515153e-05, | |
| "loss": 0.026, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 85.78, | |
| "learning_rate": 1.4941077441077442e-05, | |
| "loss": 0.0566, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 86.0, | |
| "eval_accuracy": 0.7608695652173914, | |
| "eval_loss": 1.935554027557373, | |
| "eval_runtime": 0.6054, | |
| "eval_samples_per_second": 75.977, | |
| "eval_steps_per_second": 4.955, | |
| "step": 1935 | |
| }, | |
| { | |
| "epoch": 86.22, | |
| "learning_rate": 1.473063973063973e-05, | |
| "loss": 0.0326, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 86.67, | |
| "learning_rate": 1.452020202020202e-05, | |
| "loss": 0.0435, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 86.98, | |
| "eval_accuracy": 0.7391304347826086, | |
| "eval_loss": 2.056638717651367, | |
| "eval_runtime": 0.6052, | |
| "eval_samples_per_second": 76.011, | |
| "eval_steps_per_second": 4.957, | |
| "step": 1957 | |
| }, | |
| { | |
| "epoch": 87.11, | |
| "learning_rate": 1.4309764309764309e-05, | |
| "loss": 0.0598, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 87.56, | |
| "learning_rate": 1.4099326599326602e-05, | |
| "loss": 0.0348, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 88.0, | |
| "learning_rate": 1.388888888888889e-05, | |
| "loss": 0.024, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 88.0, | |
| "eval_accuracy": 0.717391304347826, | |
| "eval_loss": 1.8717210292816162, | |
| "eval_runtime": 0.6198, | |
| "eval_samples_per_second": 74.217, | |
| "eval_steps_per_second": 4.84, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 88.44, | |
| "learning_rate": 1.3678451178451179e-05, | |
| "loss": 0.0265, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 88.89, | |
| "learning_rate": 1.3468013468013468e-05, | |
| "loss": 0.0137, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 88.98, | |
| "eval_accuracy": 0.717391304347826, | |
| "eval_loss": 2.027980089187622, | |
| "eval_runtime": 0.6032, | |
| "eval_samples_per_second": 76.263, | |
| "eval_steps_per_second": 4.974, | |
| "step": 2002 | |
| }, | |
| { | |
| "epoch": 89.33, | |
| "learning_rate": 1.3257575757575758e-05, | |
| "loss": 0.0516, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 89.78, | |
| "learning_rate": 1.3047138047138047e-05, | |
| "loss": 0.0663, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 90.0, | |
| "eval_accuracy": 0.717391304347826, | |
| "eval_loss": 1.8828924894332886, | |
| "eval_runtime": 0.6197, | |
| "eval_samples_per_second": 74.233, | |
| "eval_steps_per_second": 4.841, | |
| "step": 2025 | |
| }, | |
| { | |
| "epoch": 90.22, | |
| "learning_rate": 1.2836700336700338e-05, | |
| "loss": 0.0603, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 90.67, | |
| "learning_rate": 1.2626262626262628e-05, | |
| "loss": 0.035, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 90.98, | |
| "eval_accuracy": 0.717391304347826, | |
| "eval_loss": 1.9687623977661133, | |
| "eval_runtime": 0.6247, | |
| "eval_samples_per_second": 73.641, | |
| "eval_steps_per_second": 4.803, | |
| "step": 2047 | |
| }, | |
| { | |
| "epoch": 91.11, | |
| "learning_rate": 1.2415824915824917e-05, | |
| "loss": 0.0285, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 91.56, | |
| "learning_rate": 1.2205387205387205e-05, | |
| "loss": 0.0435, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 92.0, | |
| "learning_rate": 1.1994949494949495e-05, | |
| "loss": 0.0504, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 92.0, | |
| "eval_accuracy": 0.7391304347826086, | |
| "eval_loss": 2.0455873012542725, | |
| "eval_runtime": 0.6222, | |
| "eval_samples_per_second": 73.935, | |
| "eval_steps_per_second": 4.822, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 92.44, | |
| "learning_rate": 1.1784511784511786e-05, | |
| "loss": 0.039, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 92.89, | |
| "learning_rate": 1.1574074074074075e-05, | |
| "loss": 0.025, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 92.98, | |
| "eval_accuracy": 0.6304347826086957, | |
| "eval_loss": 2.1567869186401367, | |
| "eval_runtime": 0.5982, | |
| "eval_samples_per_second": 76.901, | |
| "eval_steps_per_second": 5.015, | |
| "step": 2092 | |
| }, | |
| { | |
| "epoch": 93.33, | |
| "learning_rate": 1.1363636363636365e-05, | |
| "loss": 0.0177, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 93.78, | |
| "learning_rate": 1.1153198653198654e-05, | |
| "loss": 0.0405, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 94.0, | |
| "eval_accuracy": 0.6304347826086957, | |
| "eval_loss": 2.0627565383911133, | |
| "eval_runtime": 0.6632, | |
| "eval_samples_per_second": 69.363, | |
| "eval_steps_per_second": 4.524, | |
| "step": 2115 | |
| }, | |
| { | |
| "epoch": 94.22, | |
| "learning_rate": 1.0942760942760944e-05, | |
| "loss": 0.0358, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 94.67, | |
| "learning_rate": 1.0732323232323233e-05, | |
| "loss": 0.0247, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 94.98, | |
| "eval_accuracy": 0.6956521739130435, | |
| "eval_loss": 2.068596124649048, | |
| "eval_runtime": 0.6331, | |
| "eval_samples_per_second": 72.653, | |
| "eval_steps_per_second": 4.738, | |
| "step": 2137 | |
| }, | |
| { | |
| "epoch": 95.11, | |
| "learning_rate": 1.0521885521885522e-05, | |
| "loss": 0.0242, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 95.56, | |
| "learning_rate": 1.0311447811447812e-05, | |
| "loss": 0.0675, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 96.0, | |
| "learning_rate": 1.0101010101010101e-05, | |
| "loss": 0.0429, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 96.0, | |
| "eval_accuracy": 0.717391304347826, | |
| "eval_loss": 2.1125054359436035, | |
| "eval_runtime": 0.6082, | |
| "eval_samples_per_second": 75.638, | |
| "eval_steps_per_second": 4.933, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 96.44, | |
| "learning_rate": 9.89057239057239e-06, | |
| "loss": 0.0287, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 96.89, | |
| "learning_rate": 9.68013468013468e-06, | |
| "loss": 0.0408, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 96.98, | |
| "eval_accuracy": 0.7391304347826086, | |
| "eval_loss": 2.1003143787384033, | |
| "eval_runtime": 0.6066, | |
| "eval_samples_per_second": 75.827, | |
| "eval_steps_per_second": 4.945, | |
| "step": 2182 | |
| }, | |
| { | |
| "epoch": 97.33, | |
| "learning_rate": 9.46969696969697e-06, | |
| "loss": 0.0576, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 97.78, | |
| "learning_rate": 9.259259259259259e-06, | |
| "loss": 0.0385, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 98.0, | |
| "eval_accuracy": 0.6739130434782609, | |
| "eval_loss": 2.2997324466705322, | |
| "eval_runtime": 0.6152, | |
| "eval_samples_per_second": 74.778, | |
| "eval_steps_per_second": 4.877, | |
| "step": 2205 | |
| }, | |
| { | |
| "epoch": 98.22, | |
| "learning_rate": 9.04882154882155e-06, | |
| "loss": 0.0345, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 98.67, | |
| "learning_rate": 8.838383838383838e-06, | |
| "loss": 0.0364, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 98.98, | |
| "eval_accuracy": 0.717391304347826, | |
| "eval_loss": 2.144188165664673, | |
| "eval_runtime": 0.6051, | |
| "eval_samples_per_second": 76.015, | |
| "eval_steps_per_second": 4.957, | |
| "step": 2227 | |
| }, | |
| { | |
| "epoch": 99.11, | |
| "learning_rate": 8.627946127946127e-06, | |
| "loss": 0.0505, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 99.56, | |
| "learning_rate": 8.417508417508419e-06, | |
| "loss": 0.0271, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 100.0, | |
| "learning_rate": 8.207070707070708e-06, | |
| "loss": 0.0415, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 100.0, | |
| "eval_accuracy": 0.717391304347826, | |
| "eval_loss": 2.104325294494629, | |
| "eval_runtime": 0.6057, | |
| "eval_samples_per_second": 75.943, | |
| "eval_steps_per_second": 4.953, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 100.44, | |
| "learning_rate": 7.996632996632998e-06, | |
| "loss": 0.024, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 100.89, | |
| "learning_rate": 7.786195286195287e-06, | |
| "loss": 0.0175, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 100.98, | |
| "eval_accuracy": 0.6739130434782609, | |
| "eval_loss": 2.1847035884857178, | |
| "eval_runtime": 0.6572, | |
| "eval_samples_per_second": 69.993, | |
| "eval_steps_per_second": 4.565, | |
| "step": 2272 | |
| }, | |
| { | |
| "epoch": 101.33, | |
| "learning_rate": 7.5757575757575764e-06, | |
| "loss": 0.0488, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 101.78, | |
| "learning_rate": 7.365319865319865e-06, | |
| "loss": 0.0281, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 102.0, | |
| "eval_accuracy": 0.6521739130434783, | |
| "eval_loss": 2.3262391090393066, | |
| "eval_runtime": 0.6817, | |
| "eval_samples_per_second": 67.477, | |
| "eval_steps_per_second": 4.401, | |
| "step": 2295 | |
| }, | |
| { | |
| "epoch": 102.22, | |
| "learning_rate": 7.1548821548821545e-06, | |
| "loss": 0.0485, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 102.67, | |
| "learning_rate": 6.944444444444445e-06, | |
| "loss": 0.0268, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 102.98, | |
| "eval_accuracy": 0.6956521739130435, | |
| "eval_loss": 2.2843432426452637, | |
| "eval_runtime": 0.6162, | |
| "eval_samples_per_second": 74.654, | |
| "eval_steps_per_second": 4.869, | |
| "step": 2317 | |
| }, | |
| { | |
| "epoch": 103.11, | |
| "learning_rate": 6.734006734006734e-06, | |
| "loss": 0.0389, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 103.56, | |
| "learning_rate": 6.523569023569024e-06, | |
| "loss": 0.0389, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 104.0, | |
| "learning_rate": 6.313131313131314e-06, | |
| "loss": 0.022, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 104.0, | |
| "eval_accuracy": 0.6956521739130435, | |
| "eval_loss": 2.352231502532959, | |
| "eval_runtime": 0.5942, | |
| "eval_samples_per_second": 77.419, | |
| "eval_steps_per_second": 5.049, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 104.44, | |
| "learning_rate": 6.1026936026936026e-06, | |
| "loss": 0.0284, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 104.89, | |
| "learning_rate": 5.892255892255893e-06, | |
| "loss": 0.0279, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 104.98, | |
| "eval_accuracy": 0.6739130434782609, | |
| "eval_loss": 2.4826581478118896, | |
| "eval_runtime": 0.6241, | |
| "eval_samples_per_second": 73.701, | |
| "eval_steps_per_second": 4.807, | |
| "step": 2362 | |
| }, | |
| { | |
| "epoch": 105.33, | |
| "learning_rate": 5.681818181818182e-06, | |
| "loss": 0.0472, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 105.78, | |
| "learning_rate": 5.471380471380472e-06, | |
| "loss": 0.0188, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 106.0, | |
| "eval_accuracy": 0.6521739130434783, | |
| "eval_loss": 2.5688304901123047, | |
| "eval_runtime": 0.6101, | |
| "eval_samples_per_second": 75.392, | |
| "eval_steps_per_second": 4.917, | |
| "step": 2385 | |
| }, | |
| { | |
| "epoch": 106.22, | |
| "learning_rate": 5.260942760942761e-06, | |
| "loss": 0.0404, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 106.67, | |
| "learning_rate": 5.050505050505051e-06, | |
| "loss": 0.0303, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 106.98, | |
| "eval_accuracy": 0.6521739130434783, | |
| "eval_loss": 2.4357047080993652, | |
| "eval_runtime": 0.6062, | |
| "eval_samples_per_second": 75.884, | |
| "eval_steps_per_second": 4.949, | |
| "step": 2407 | |
| }, | |
| { | |
| "epoch": 107.11, | |
| "learning_rate": 4.84006734006734e-06, | |
| "loss": 0.0397, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 107.56, | |
| "learning_rate": 4.6296296296296296e-06, | |
| "loss": 0.0592, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 108.0, | |
| "learning_rate": 4.419191919191919e-06, | |
| "loss": 0.0439, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 108.0, | |
| "eval_accuracy": 0.6521739130434783, | |
| "eval_loss": 2.4359428882598877, | |
| "eval_runtime": 0.6317, | |
| "eval_samples_per_second": 72.823, | |
| "eval_steps_per_second": 4.749, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 108.44, | |
| "learning_rate": 4.208754208754209e-06, | |
| "loss": 0.0501, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 108.89, | |
| "learning_rate": 3.998316498316499e-06, | |
| "loss": 0.0422, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 108.98, | |
| "eval_accuracy": 0.6739130434782609, | |
| "eval_loss": 2.472536563873291, | |
| "eval_runtime": 0.6173, | |
| "eval_samples_per_second": 74.523, | |
| "eval_steps_per_second": 4.86, | |
| "step": 2452 | |
| }, | |
| { | |
| "epoch": 109.33, | |
| "learning_rate": 3.7878787878787882e-06, | |
| "loss": 0.0158, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 109.78, | |
| "learning_rate": 3.5774410774410772e-06, | |
| "loss": 0.032, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 110.0, | |
| "eval_accuracy": 0.6521739130434783, | |
| "eval_loss": 2.289853811264038, | |
| "eval_runtime": 0.6113, | |
| "eval_samples_per_second": 75.246, | |
| "eval_steps_per_second": 4.907, | |
| "step": 2475 | |
| }, | |
| { | |
| "epoch": 110.22, | |
| "learning_rate": 3.367003367003367e-06, | |
| "loss": 0.051, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 110.67, | |
| "learning_rate": 3.156565656565657e-06, | |
| "loss": 0.0414, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 110.98, | |
| "eval_accuracy": 0.6956521739130435, | |
| "eval_loss": 2.268512010574341, | |
| "eval_runtime": 0.6027, | |
| "eval_samples_per_second": 76.328, | |
| "eval_steps_per_second": 4.978, | |
| "step": 2497 | |
| }, | |
| { | |
| "epoch": 111.11, | |
| "learning_rate": 2.9461279461279464e-06, | |
| "loss": 0.0532, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 111.56, | |
| "learning_rate": 2.735690235690236e-06, | |
| "loss": 0.0461, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 112.0, | |
| "learning_rate": 2.5252525252525253e-06, | |
| "loss": 0.03, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 112.0, | |
| "eval_accuracy": 0.6739130434782609, | |
| "eval_loss": 2.306300163269043, | |
| "eval_runtime": 0.6202, | |
| "eval_samples_per_second": 74.172, | |
| "eval_steps_per_second": 4.837, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 112.44, | |
| "learning_rate": 2.3148148148148148e-06, | |
| "loss": 0.0454, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 112.89, | |
| "learning_rate": 2.1043771043771047e-06, | |
| "loss": 0.0293, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 112.98, | |
| "eval_accuracy": 0.6739130434782609, | |
| "eval_loss": 2.352445602416992, | |
| "eval_runtime": 0.6112, | |
| "eval_samples_per_second": 75.268, | |
| "eval_steps_per_second": 4.909, | |
| "step": 2542 | |
| }, | |
| { | |
| "epoch": 113.33, | |
| "learning_rate": 1.8939393939393941e-06, | |
| "loss": 0.037, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 113.78, | |
| "learning_rate": 1.6835016835016836e-06, | |
| "loss": 0.0514, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 114.0, | |
| "eval_accuracy": 0.6739130434782609, | |
| "eval_loss": 2.361192464828491, | |
| "eval_runtime": 0.6632, | |
| "eval_samples_per_second": 69.363, | |
| "eval_steps_per_second": 4.524, | |
| "step": 2565 | |
| }, | |
| { | |
| "epoch": 114.22, | |
| "learning_rate": 1.4730639730639732e-06, | |
| "loss": 0.0786, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 114.67, | |
| "learning_rate": 1.2626262626262627e-06, | |
| "loss": 0.0234, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 114.98, | |
| "eval_accuracy": 0.6521739130434783, | |
| "eval_loss": 2.371103286743164, | |
| "eval_runtime": 0.6022, | |
| "eval_samples_per_second": 76.39, | |
| "eval_steps_per_second": 4.982, | |
| "step": 2587 | |
| }, | |
| { | |
| "epoch": 115.11, | |
| "learning_rate": 1.0521885521885523e-06, | |
| "loss": 0.0222, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 115.56, | |
| "learning_rate": 8.417508417508418e-07, | |
| "loss": 0.0355, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 116.0, | |
| "learning_rate": 6.313131313131313e-07, | |
| "loss": 0.0476, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 116.0, | |
| "eval_accuracy": 0.6739130434782609, | |
| "eval_loss": 2.354818344116211, | |
| "eval_runtime": 0.6402, | |
| "eval_samples_per_second": 71.855, | |
| "eval_steps_per_second": 4.686, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 116.44, | |
| "learning_rate": 4.208754208754209e-07, | |
| "loss": 0.0291, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 116.89, | |
| "learning_rate": 2.1043771043771044e-07, | |
| "loss": 0.0307, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 116.98, | |
| "eval_accuracy": 0.6739130434782609, | |
| "eval_loss": 2.343219041824341, | |
| "eval_runtime": 0.6017, | |
| "eval_samples_per_second": 76.455, | |
| "eval_steps_per_second": 4.986, | |
| "step": 2632 | |
| }, | |
| { | |
| "epoch": 117.33, | |
| "learning_rate": 0.0, | |
| "loss": 0.028, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 117.33, | |
| "eval_accuracy": 0.6739130434782609, | |
| "eval_loss": 2.3432161808013916, | |
| "eval_runtime": 0.6057, | |
| "eval_samples_per_second": 75.951, | |
| "eval_steps_per_second": 4.953, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 117.33, | |
| "step": 2640, | |
| "total_flos": 5.466852859010089e+18, | |
| "train_loss": 0.1724454693552671, | |
| "train_runtime": 3352.4175, | |
| "train_samples_per_second": 51.259, | |
| "train_steps_per_second": 0.787 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 2640, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 120, | |
| "save_steps": 500, | |
| "total_flos": 5.466852859010089e+18, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |