| { | |
| "best_metric": 0.0011723055504262447, | |
| "best_model_checkpoint": "./ap_train_outputs/checkpoint-10918", | |
| "epoch": 106.0, | |
| "eval_steps": 500, | |
| "global_step": 10918, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 1.999223300970874e-05, | |
| "loss": 2.0179, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 1.9984466019417477e-05, | |
| "loss": 1.9319, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 1.9976699029126216e-05, | |
| "loss": 1.7818, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 1.9968932038834955e-05, | |
| "loss": 1.7205, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 1.996116504854369e-05, | |
| "loss": 1.5862, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 1.995339805825243e-05, | |
| "loss": 1.5051, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 1.9945631067961166e-05, | |
| "loss": 1.3624, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 1.9937864077669905e-05, | |
| "loss": 1.3245, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 1.993009708737864e-05, | |
| "loss": 1.2896, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 1.992233009708738e-05, | |
| "loss": 1.1133, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.8561643835616438, | |
| "eval_loss": 1.1050430536270142, | |
| "eval_runtime": 1.0213, | |
| "eval_samples_per_second": 142.949, | |
| "eval_steps_per_second": 18.603, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 1.991456310679612e-05, | |
| "loss": 1.1258, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 1.9906796116504855e-05, | |
| "loss": 1.0101, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 1.9899029126213594e-05, | |
| "loss": 0.8295, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 1.989126213592233e-05, | |
| "loss": 0.8112, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 1.988349514563107e-05, | |
| "loss": 0.7511, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 1.987572815533981e-05, | |
| "loss": 0.7514, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 1.9867961165048548e-05, | |
| "loss": 0.7797, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 1.9860194174757283e-05, | |
| "loss": 0.6666, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 1.9852427184466022e-05, | |
| "loss": 0.7185, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 1.9844660194174758e-05, | |
| "loss": 0.6564, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.9452054794520548, | |
| "eval_loss": 0.6278233528137207, | |
| "eval_runtime": 1.018, | |
| "eval_samples_per_second": 143.418, | |
| "eval_steps_per_second": 18.664, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 1.9836893203883497e-05, | |
| "loss": 0.4882, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 1.9829126213592233e-05, | |
| "loss": 0.5629, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 1.9821359223300972e-05, | |
| "loss": 0.5384, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 1.981359223300971e-05, | |
| "loss": 0.445, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 1.9805825242718447e-05, | |
| "loss": 0.5185, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 1.9798058252427187e-05, | |
| "loss": 0.4958, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 1.9790291262135922e-05, | |
| "loss": 0.4132, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 1.978252427184466e-05, | |
| "loss": 0.4397, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 1.97747572815534e-05, | |
| "loss": 0.4415, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 1.9766990291262137e-05, | |
| "loss": 0.4004, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.9657534246575342, | |
| "eval_loss": 0.3876227140426636, | |
| "eval_runtime": 0.9913, | |
| "eval_samples_per_second": 147.283, | |
| "eval_steps_per_second": 19.167, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 3.01, | |
| "learning_rate": 1.9759223300970876e-05, | |
| "loss": 0.3367, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 3.11, | |
| "learning_rate": 1.975145631067961e-05, | |
| "loss": 0.3328, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "learning_rate": 1.974368932038835e-05, | |
| "loss": 0.3767, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 3.3, | |
| "learning_rate": 1.9735922330097087e-05, | |
| "loss": 0.3029, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "learning_rate": 1.972815533980583e-05, | |
| "loss": 0.3183, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "learning_rate": 1.9720388349514565e-05, | |
| "loss": 0.239, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 3.59, | |
| "learning_rate": 1.9712621359223304e-05, | |
| "loss": 0.3627, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 3.69, | |
| "learning_rate": 1.970485436893204e-05, | |
| "loss": 0.3516, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 3.79, | |
| "learning_rate": 1.969708737864078e-05, | |
| "loss": 0.2904, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 3.88, | |
| "learning_rate": 1.9689320388349515e-05, | |
| "loss": 0.2362, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 3.98, | |
| "learning_rate": 1.9681553398058254e-05, | |
| "loss": 0.2624, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.9863013698630136, | |
| "eval_loss": 0.24633407592773438, | |
| "eval_runtime": 0.9705, | |
| "eval_samples_per_second": 150.434, | |
| "eval_steps_per_second": 19.577, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 4.08, | |
| "learning_rate": 1.9673786407766993e-05, | |
| "loss": 0.2748, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 4.17, | |
| "learning_rate": 1.966601941747573e-05, | |
| "loss": 0.2072, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 4.27, | |
| "learning_rate": 1.965825242718447e-05, | |
| "loss": 0.209, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 4.37, | |
| "learning_rate": 1.9650485436893204e-05, | |
| "loss": 0.2567, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 4.47, | |
| "learning_rate": 1.9642718446601943e-05, | |
| "loss": 0.194, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 4.56, | |
| "learning_rate": 1.963495145631068e-05, | |
| "loss": 0.2188, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 4.66, | |
| "learning_rate": 1.962718446601942e-05, | |
| "loss": 0.2031, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 4.76, | |
| "learning_rate": 1.9619417475728157e-05, | |
| "loss": 0.2943, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 4.85, | |
| "learning_rate": 1.9611650485436893e-05, | |
| "loss": 0.1883, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 4.95, | |
| "learning_rate": 1.9603883495145632e-05, | |
| "loss": 0.2074, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.9794520547945206, | |
| "eval_loss": 0.19891677796840668, | |
| "eval_runtime": 0.9232, | |
| "eval_samples_per_second": 158.14, | |
| "eval_steps_per_second": 20.58, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 5.05, | |
| "learning_rate": 1.959611650485437e-05, | |
| "loss": 0.2058, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 5.15, | |
| "learning_rate": 1.9588349514563107e-05, | |
| "loss": 0.1468, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 5.24, | |
| "learning_rate": 1.9580582524271847e-05, | |
| "loss": 0.2338, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 5.34, | |
| "learning_rate": 1.9572815533980586e-05, | |
| "loss": 0.2376, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 5.44, | |
| "learning_rate": 1.956504854368932e-05, | |
| "loss": 0.2948, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 5.53, | |
| "learning_rate": 1.955728155339806e-05, | |
| "loss": 0.191, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 5.63, | |
| "learning_rate": 1.9549514563106797e-05, | |
| "loss": 0.1313, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 5.73, | |
| "learning_rate": 1.9541747572815536e-05, | |
| "loss": 0.1462, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 5.83, | |
| "learning_rate": 1.9533980582524275e-05, | |
| "loss": 0.239, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 5.92, | |
| "learning_rate": 1.952621359223301e-05, | |
| "loss": 0.141, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.9863013698630136, | |
| "eval_loss": 0.17079336941242218, | |
| "eval_runtime": 0.9678, | |
| "eval_samples_per_second": 150.859, | |
| "eval_steps_per_second": 19.632, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 6.02, | |
| "learning_rate": 1.951844660194175e-05, | |
| "loss": 0.1518, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 6.12, | |
| "learning_rate": 1.9510679611650486e-05, | |
| "loss": 0.1385, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 6.21, | |
| "learning_rate": 1.9502912621359225e-05, | |
| "loss": 0.1632, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 6.31, | |
| "learning_rate": 1.949514563106796e-05, | |
| "loss": 0.216, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 6.41, | |
| "learning_rate": 1.94873786407767e-05, | |
| "loss": 0.3242, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 6.5, | |
| "learning_rate": 1.947961165048544e-05, | |
| "loss": 0.1218, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 6.6, | |
| "learning_rate": 1.947184466019418e-05, | |
| "loss": 0.1637, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 6.7, | |
| "learning_rate": 1.9464077669902914e-05, | |
| "loss": 0.1651, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 6.8, | |
| "learning_rate": 1.9456310679611653e-05, | |
| "loss": 0.181, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 6.89, | |
| "learning_rate": 1.944854368932039e-05, | |
| "loss": 0.186, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 6.99, | |
| "learning_rate": 1.944077669902913e-05, | |
| "loss": 0.1338, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.9794520547945206, | |
| "eval_loss": 0.17499177157878876, | |
| "eval_runtime": 0.9605, | |
| "eval_samples_per_second": 152.011, | |
| "eval_steps_per_second": 19.782, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 7.09, | |
| "learning_rate": 1.9433009708737868e-05, | |
| "loss": 0.1857, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 7.18, | |
| "learning_rate": 1.9425242718446603e-05, | |
| "loss": 0.2048, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 7.28, | |
| "learning_rate": 1.9417475728155343e-05, | |
| "loss": 0.1958, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 7.38, | |
| "learning_rate": 1.940970873786408e-05, | |
| "loss": 0.2379, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 7.48, | |
| "learning_rate": 1.9401941747572818e-05, | |
| "loss": 0.1159, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 7.57, | |
| "learning_rate": 1.9394174757281553e-05, | |
| "loss": 0.1961, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 7.67, | |
| "learning_rate": 1.9386407766990292e-05, | |
| "loss": 0.1297, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 7.77, | |
| "learning_rate": 1.937864077669903e-05, | |
| "loss": 0.2555, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 7.86, | |
| "learning_rate": 1.9370873786407767e-05, | |
| "loss": 0.134, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 7.96, | |
| "learning_rate": 1.9363106796116507e-05, | |
| "loss": 0.1343, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.9863013698630136, | |
| "eval_loss": 0.12730906903743744, | |
| "eval_runtime": 0.993, | |
| "eval_samples_per_second": 147.023, | |
| "eval_steps_per_second": 19.133, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 8.06, | |
| "learning_rate": 1.9355339805825242e-05, | |
| "loss": 0.2206, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 8.16, | |
| "learning_rate": 1.934757281553398e-05, | |
| "loss": 0.1735, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 8.25, | |
| "learning_rate": 1.9339805825242717e-05, | |
| "loss": 0.137, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 8.35, | |
| "learning_rate": 1.933203883495146e-05, | |
| "loss": 0.1378, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 8.45, | |
| "learning_rate": 1.9324271844660196e-05, | |
| "loss": 0.1671, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 8.54, | |
| "learning_rate": 1.9316504854368935e-05, | |
| "loss": 0.212, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 8.64, | |
| "learning_rate": 1.930873786407767e-05, | |
| "loss": 0.1598, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 8.74, | |
| "learning_rate": 1.930097087378641e-05, | |
| "loss": 0.1813, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 8.83, | |
| "learning_rate": 1.9293203883495146e-05, | |
| "loss": 0.0725, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 8.93, | |
| "learning_rate": 1.9285436893203885e-05, | |
| "loss": 0.1558, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.9863013698630136, | |
| "eval_loss": 0.11657154560089111, | |
| "eval_runtime": 0.9702, | |
| "eval_samples_per_second": 150.486, | |
| "eval_steps_per_second": 19.584, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 9.03, | |
| "learning_rate": 1.9277669902912624e-05, | |
| "loss": 0.094, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 9.13, | |
| "learning_rate": 1.926990291262136e-05, | |
| "loss": 0.1341, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 9.22, | |
| "learning_rate": 1.92621359223301e-05, | |
| "loss": 0.196, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 9.32, | |
| "learning_rate": 1.9254368932038835e-05, | |
| "loss": 0.1028, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 9.42, | |
| "learning_rate": 1.9246601941747574e-05, | |
| "loss": 0.178, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 9.51, | |
| "learning_rate": 1.9238834951456313e-05, | |
| "loss": 0.2527, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 9.61, | |
| "learning_rate": 1.923106796116505e-05, | |
| "loss": 0.2609, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 9.71, | |
| "learning_rate": 1.922330097087379e-05, | |
| "loss": 0.1518, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 9.81, | |
| "learning_rate": 1.9215533980582528e-05, | |
| "loss": 0.1383, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 9.9, | |
| "learning_rate": 1.9207766990291263e-05, | |
| "loss": 0.075, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "learning_rate": 1.9200000000000003e-05, | |
| "loss": 0.0799, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.9794520547945206, | |
| "eval_loss": 0.1238846480846405, | |
| "eval_runtime": 0.9773, | |
| "eval_samples_per_second": 149.397, | |
| "eval_steps_per_second": 19.442, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 10.1, | |
| "learning_rate": 1.919223300970874e-05, | |
| "loss": 0.1866, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 10.19, | |
| "learning_rate": 1.9184466019417478e-05, | |
| "loss": 0.1154, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 10.29, | |
| "learning_rate": 1.9176699029126217e-05, | |
| "loss": 0.1123, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 10.39, | |
| "learning_rate": 1.9168932038834952e-05, | |
| "loss": 0.1085, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 10.49, | |
| "learning_rate": 1.916116504854369e-05, | |
| "loss": 0.2075, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 10.58, | |
| "learning_rate": 1.9153398058252427e-05, | |
| "loss": 0.0624, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 10.68, | |
| "learning_rate": 1.9145631067961167e-05, | |
| "loss": 0.1116, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 10.78, | |
| "learning_rate": 1.9137864077669906e-05, | |
| "loss": 0.0523, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 10.87, | |
| "learning_rate": 1.913009708737864e-05, | |
| "loss": 0.1655, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 10.97, | |
| "learning_rate": 1.912233009708738e-05, | |
| "loss": 0.1677, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.9726027397260274, | |
| "eval_loss": 0.15431082248687744, | |
| "eval_runtime": 0.948, | |
| "eval_samples_per_second": 154.005, | |
| "eval_steps_per_second": 20.042, | |
| "step": 1133 | |
| }, | |
| { | |
| "epoch": 11.07, | |
| "learning_rate": 1.9114563106796117e-05, | |
| "loss": 0.1516, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 11.17, | |
| "learning_rate": 1.9106796116504856e-05, | |
| "loss": 0.1927, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 11.26, | |
| "learning_rate": 1.909902912621359e-05, | |
| "loss": 0.0548, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 11.36, | |
| "learning_rate": 1.9091262135922334e-05, | |
| "loss": 0.085, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 11.46, | |
| "learning_rate": 1.908349514563107e-05, | |
| "loss": 0.202, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 11.55, | |
| "learning_rate": 1.907572815533981e-05, | |
| "loss": 0.0741, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 11.65, | |
| "learning_rate": 1.9067961165048545e-05, | |
| "loss": 0.1373, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 11.75, | |
| "learning_rate": 1.9060194174757284e-05, | |
| "loss": 0.219, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 11.84, | |
| "learning_rate": 1.905242718446602e-05, | |
| "loss": 0.169, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 11.94, | |
| "learning_rate": 1.904466019417476e-05, | |
| "loss": 0.1969, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.9863013698630136, | |
| "eval_loss": 0.09193126857280731, | |
| "eval_runtime": 0.9434, | |
| "eval_samples_per_second": 154.754, | |
| "eval_steps_per_second": 20.139, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 12.04, | |
| "learning_rate": 1.90368932038835e-05, | |
| "loss": 0.0957, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 12.14, | |
| "learning_rate": 1.9029126213592234e-05, | |
| "loss": 0.1396, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 12.23, | |
| "learning_rate": 1.9021359223300973e-05, | |
| "loss": 0.1004, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 12.33, | |
| "learning_rate": 1.901359223300971e-05, | |
| "loss": 0.0796, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 12.43, | |
| "learning_rate": 1.900582524271845e-05, | |
| "loss": 0.225, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 12.52, | |
| "learning_rate": 1.8998058252427184e-05, | |
| "loss": 0.1395, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 12.62, | |
| "learning_rate": 1.8990291262135923e-05, | |
| "loss": 0.0571, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 12.72, | |
| "learning_rate": 1.8982524271844663e-05, | |
| "loss": 0.0472, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 12.82, | |
| "learning_rate": 1.89747572815534e-05, | |
| "loss": 0.1276, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 12.91, | |
| "learning_rate": 1.8966990291262138e-05, | |
| "loss": 0.0533, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.9931506849315068, | |
| "eval_loss": 0.07145330309867859, | |
| "eval_runtime": 0.9485, | |
| "eval_samples_per_second": 153.935, | |
| "eval_steps_per_second": 20.033, | |
| "step": 1339 | |
| }, | |
| { | |
| "epoch": 13.01, | |
| "learning_rate": 1.8959223300970873e-05, | |
| "loss": 0.0895, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 13.11, | |
| "learning_rate": 1.8951456310679613e-05, | |
| "loss": 0.0545, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 13.2, | |
| "learning_rate": 1.894368932038835e-05, | |
| "loss": 0.0889, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 13.3, | |
| "learning_rate": 1.893592233009709e-05, | |
| "loss": 0.0444, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 13.4, | |
| "learning_rate": 1.8928155339805827e-05, | |
| "loss": 0.131, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 13.5, | |
| "learning_rate": 1.8920388349514566e-05, | |
| "loss": 0.1644, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 13.59, | |
| "learning_rate": 1.89126213592233e-05, | |
| "loss": 0.1254, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 13.69, | |
| "learning_rate": 1.890485436893204e-05, | |
| "loss": 0.1407, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 13.79, | |
| "learning_rate": 1.8897087378640777e-05, | |
| "loss": 0.1139, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 13.88, | |
| "learning_rate": 1.8889320388349516e-05, | |
| "loss": 0.0703, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 13.98, | |
| "learning_rate": 1.8881553398058255e-05, | |
| "loss": 0.1645, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.9931506849315068, | |
| "eval_loss": 0.050846148282289505, | |
| "eval_runtime": 0.9651, | |
| "eval_samples_per_second": 151.283, | |
| "eval_steps_per_second": 19.688, | |
| "step": 1442 | |
| }, | |
| { | |
| "epoch": 14.08, | |
| "learning_rate": 1.887378640776699e-05, | |
| "loss": 0.1136, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 14.17, | |
| "learning_rate": 1.886601941747573e-05, | |
| "loss": 0.0542, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 14.27, | |
| "learning_rate": 1.8858252427184466e-05, | |
| "loss": 0.1973, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 14.37, | |
| "learning_rate": 1.8850485436893205e-05, | |
| "loss": 0.1312, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 14.47, | |
| "learning_rate": 1.8842718446601944e-05, | |
| "loss": 0.1627, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 14.56, | |
| "learning_rate": 1.883495145631068e-05, | |
| "loss": 0.1452, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 14.66, | |
| "learning_rate": 1.882718446601942e-05, | |
| "loss": 0.1142, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 14.76, | |
| "learning_rate": 1.881941747572816e-05, | |
| "loss": 0.0803, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 14.85, | |
| "learning_rate": 1.8811650485436894e-05, | |
| "loss": 0.1401, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 14.95, | |
| "learning_rate": 1.8803883495145633e-05, | |
| "loss": 0.1036, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy": 0.9931506849315068, | |
| "eval_loss": 0.06801381707191467, | |
| "eval_runtime": 0.9536, | |
| "eval_samples_per_second": 153.104, | |
| "eval_steps_per_second": 19.924, | |
| "step": 1545 | |
| }, | |
| { | |
| "epoch": 15.05, | |
| "learning_rate": 1.8796116504854373e-05, | |
| "loss": 0.1774, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 15.15, | |
| "learning_rate": 1.878834951456311e-05, | |
| "loss": 0.0533, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 15.24, | |
| "learning_rate": 1.8780582524271848e-05, | |
| "loss": 0.0623, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 15.34, | |
| "learning_rate": 1.8772815533980583e-05, | |
| "loss": 0.1694, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 15.44, | |
| "learning_rate": 1.8765048543689323e-05, | |
| "loss": 0.1773, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 15.53, | |
| "learning_rate": 1.875728155339806e-05, | |
| "loss": 0.2231, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 15.63, | |
| "learning_rate": 1.8749514563106798e-05, | |
| "loss": 0.0794, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 15.73, | |
| "learning_rate": 1.8741747572815537e-05, | |
| "loss": 0.0464, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 15.83, | |
| "learning_rate": 1.8733980582524273e-05, | |
| "loss": 0.1643, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 15.92, | |
| "learning_rate": 1.8726213592233012e-05, | |
| "loss": 0.0442, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.9863013698630136, | |
| "eval_loss": 0.08464141935110092, | |
| "eval_runtime": 0.9534, | |
| "eval_samples_per_second": 153.139, | |
| "eval_steps_per_second": 19.929, | |
| "step": 1648 | |
| }, | |
| { | |
| "epoch": 16.02, | |
| "learning_rate": 1.8718446601941747e-05, | |
| "loss": 0.0626, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 16.12, | |
| "learning_rate": 1.8710679611650487e-05, | |
| "loss": 0.0868, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 16.21, | |
| "learning_rate": 1.8702912621359222e-05, | |
| "loss": 0.2295, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 16.31, | |
| "learning_rate": 1.8695145631067965e-05, | |
| "loss": 0.089, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 16.41, | |
| "learning_rate": 1.86873786407767e-05, | |
| "loss": 0.0727, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 16.5, | |
| "learning_rate": 1.867961165048544e-05, | |
| "loss": 0.0831, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 16.6, | |
| "learning_rate": 1.8671844660194176e-05, | |
| "loss": 0.1162, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 16.7, | |
| "learning_rate": 1.8664077669902915e-05, | |
| "loss": 0.0484, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 16.8, | |
| "learning_rate": 1.865631067961165e-05, | |
| "loss": 0.0411, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 16.89, | |
| "learning_rate": 1.864854368932039e-05, | |
| "loss": 0.0726, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 16.99, | |
| "learning_rate": 1.864077669902913e-05, | |
| "loss": 0.065, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_accuracy": 0.9931506849315068, | |
| "eval_loss": 0.056606147438287735, | |
| "eval_runtime": 0.9748, | |
| "eval_samples_per_second": 149.773, | |
| "eval_steps_per_second": 19.491, | |
| "step": 1751 | |
| }, | |
| { | |
| "epoch": 17.09, | |
| "learning_rate": 1.8633009708737865e-05, | |
| "loss": 0.2112, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 17.18, | |
| "learning_rate": 1.8625242718446604e-05, | |
| "loss": 0.0992, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 17.28, | |
| "learning_rate": 1.861747572815534e-05, | |
| "loss": 0.1154, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 17.38, | |
| "learning_rate": 1.860970873786408e-05, | |
| "loss": 0.0843, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 17.48, | |
| "learning_rate": 1.860194174757282e-05, | |
| "loss": 0.0969, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 17.57, | |
| "learning_rate": 1.8594174757281554e-05, | |
| "loss": 0.2324, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 17.67, | |
| "learning_rate": 1.8586407766990293e-05, | |
| "loss": 0.1239, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 17.77, | |
| "learning_rate": 1.857864077669903e-05, | |
| "loss": 0.1081, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 17.86, | |
| "learning_rate": 1.857087378640777e-05, | |
| "loss": 0.0354, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 17.96, | |
| "learning_rate": 1.8563106796116504e-05, | |
| "loss": 0.1437, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.9931506849315068, | |
| "eval_loss": 0.04979800060391426, | |
| "eval_runtime": 0.9429, | |
| "eval_samples_per_second": 154.848, | |
| "eval_steps_per_second": 20.151, | |
| "step": 1854 | |
| }, | |
| { | |
| "epoch": 18.06, | |
| "learning_rate": 1.8555339805825243e-05, | |
| "loss": 0.1741, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 18.16, | |
| "learning_rate": 1.8547572815533983e-05, | |
| "loss": 0.0442, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 18.25, | |
| "learning_rate": 1.8539805825242722e-05, | |
| "loss": 0.0778, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 18.35, | |
| "learning_rate": 1.8532038834951458e-05, | |
| "loss": 0.132, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 18.45, | |
| "learning_rate": 1.8524271844660197e-05, | |
| "loss": 0.2408, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 18.54, | |
| "learning_rate": 1.8516504854368933e-05, | |
| "loss": 0.0852, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 18.64, | |
| "learning_rate": 1.8508737864077672e-05, | |
| "loss": 0.149, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 18.74, | |
| "learning_rate": 1.850097087378641e-05, | |
| "loss": 0.0426, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 18.83, | |
| "learning_rate": 1.8493203883495147e-05, | |
| "loss": 0.1248, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 18.93, | |
| "learning_rate": 1.8485436893203886e-05, | |
| "loss": 0.1527, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_accuracy": 0.9863013698630136, | |
| "eval_loss": 0.0702158659696579, | |
| "eval_runtime": 0.9693, | |
| "eval_samples_per_second": 150.63, | |
| "eval_steps_per_second": 19.603, | |
| "step": 1957 | |
| }, | |
| { | |
| "epoch": 19.03, | |
| "learning_rate": 1.847766990291262e-05, | |
| "loss": 0.0963, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 19.13, | |
| "learning_rate": 1.846990291262136e-05, | |
| "loss": 0.1038, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 19.22, | |
| "learning_rate": 1.8462135922330097e-05, | |
| "loss": 0.0276, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 19.32, | |
| "learning_rate": 1.8454368932038836e-05, | |
| "loss": 0.2018, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 19.42, | |
| "learning_rate": 1.8446601941747575e-05, | |
| "loss": 0.1405, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 19.51, | |
| "learning_rate": 1.843883495145631e-05, | |
| "loss": 0.0337, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 19.61, | |
| "learning_rate": 1.843106796116505e-05, | |
| "loss": 0.1076, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 19.71, | |
| "learning_rate": 1.842330097087379e-05, | |
| "loss": 0.1037, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 19.81, | |
| "learning_rate": 1.8415533980582525e-05, | |
| "loss": 0.0665, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 19.9, | |
| "learning_rate": 1.8407766990291264e-05, | |
| "loss": 0.1567, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "learning_rate": 1.8400000000000003e-05, | |
| "loss": 0.0682, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.9863013698630136, | |
| "eval_loss": 0.06622537225484848, | |
| "eval_runtime": 0.9584, | |
| "eval_samples_per_second": 152.335, | |
| "eval_steps_per_second": 19.824, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 20.1, | |
| "learning_rate": 1.839223300970874e-05, | |
| "loss": 0.0995, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 20.19, | |
| "learning_rate": 1.838446601941748e-05, | |
| "loss": 0.0921, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 20.29, | |
| "learning_rate": 1.8376699029126214e-05, | |
| "loss": 0.2157, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 20.39, | |
| "learning_rate": 1.8368932038834953e-05, | |
| "loss": 0.1321, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 20.49, | |
| "learning_rate": 1.836116504854369e-05, | |
| "loss": 0.0957, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 20.58, | |
| "learning_rate": 1.835339805825243e-05, | |
| "loss": 0.2989, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 20.68, | |
| "learning_rate": 1.8345631067961168e-05, | |
| "loss": 0.2302, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 20.78, | |
| "learning_rate": 1.8337864077669903e-05, | |
| "loss": 0.1633, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 20.87, | |
| "learning_rate": 1.8330097087378643e-05, | |
| "loss": 0.0726, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 20.97, | |
| "learning_rate": 1.832233009708738e-05, | |
| "loss": 0.1013, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_accuracy": 0.9794520547945206, | |
| "eval_loss": 0.07291552424430847, | |
| "eval_runtime": 0.958, | |
| "eval_samples_per_second": 152.399, | |
| "eval_steps_per_second": 19.833, | |
| "step": 2163 | |
| }, | |
| { | |
| "epoch": 21.07, | |
| "learning_rate": 1.8314563106796118e-05, | |
| "loss": 0.0254, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 21.17, | |
| "learning_rate": 1.8306796116504857e-05, | |
| "loss": 0.1409, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 21.26, | |
| "learning_rate": 1.8299029126213596e-05, | |
| "loss": 0.0853, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 21.36, | |
| "learning_rate": 1.8291262135922332e-05, | |
| "loss": 0.131, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 21.46, | |
| "learning_rate": 1.828349514563107e-05, | |
| "loss": 0.1024, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 21.55, | |
| "learning_rate": 1.8275728155339807e-05, | |
| "loss": 0.0212, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 21.65, | |
| "learning_rate": 1.8267961165048546e-05, | |
| "loss": 0.0578, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 21.75, | |
| "learning_rate": 1.826019417475728e-05, | |
| "loss": 0.1048, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 21.84, | |
| "learning_rate": 1.825242718446602e-05, | |
| "loss": 0.0612, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 21.94, | |
| "learning_rate": 1.824466019417476e-05, | |
| "loss": 0.0807, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.02733495458960533, | |
| "eval_runtime": 0.9501, | |
| "eval_samples_per_second": 153.673, | |
| "eval_steps_per_second": 19.999, | |
| "step": 2266 | |
| }, | |
| { | |
| "epoch": 22.04, | |
| "learning_rate": 1.8236893203883496e-05, | |
| "loss": 0.1788, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 22.14, | |
| "learning_rate": 1.8229126213592235e-05, | |
| "loss": 0.1037, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 22.23, | |
| "learning_rate": 1.822135922330097e-05, | |
| "loss": 0.0909, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 22.33, | |
| "learning_rate": 1.821359223300971e-05, | |
| "loss": 0.0597, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 22.43, | |
| "learning_rate": 1.820582524271845e-05, | |
| "loss": 0.0693, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 22.52, | |
| "learning_rate": 1.8198058252427185e-05, | |
| "loss": 0.0684, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 22.62, | |
| "learning_rate": 1.8190291262135924e-05, | |
| "loss": 0.1146, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 22.72, | |
| "learning_rate": 1.818252427184466e-05, | |
| "loss": 0.0753, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 22.82, | |
| "learning_rate": 1.81747572815534e-05, | |
| "loss": 0.0691, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 22.91, | |
| "learning_rate": 1.8166990291262135e-05, | |
| "loss": 0.0803, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_accuracy": 0.9657534246575342, | |
| "eval_loss": 0.13809683918952942, | |
| "eval_runtime": 0.9466, | |
| "eval_samples_per_second": 154.239, | |
| "eval_steps_per_second": 20.072, | |
| "step": 2369 | |
| }, | |
| { | |
| "epoch": 23.01, | |
| "learning_rate": 1.8159223300970878e-05, | |
| "loss": 0.1052, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 23.11, | |
| "learning_rate": 1.8151456310679613e-05, | |
| "loss": 0.1529, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 23.2, | |
| "learning_rate": 1.8143689320388353e-05, | |
| "loss": 0.0929, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 23.3, | |
| "learning_rate": 1.813592233009709e-05, | |
| "loss": 0.1057, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 23.4, | |
| "learning_rate": 1.8128155339805828e-05, | |
| "loss": 0.0545, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 23.5, | |
| "learning_rate": 1.8120388349514563e-05, | |
| "loss": 0.0632, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 23.59, | |
| "learning_rate": 1.8112621359223303e-05, | |
| "loss": 0.0276, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 23.69, | |
| "learning_rate": 1.8104854368932042e-05, | |
| "loss": 0.0976, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 23.79, | |
| "learning_rate": 1.8097087378640778e-05, | |
| "loss": 0.087, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 23.88, | |
| "learning_rate": 1.8089320388349517e-05, | |
| "loss": 0.248, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 23.98, | |
| "learning_rate": 1.8081553398058253e-05, | |
| "loss": 0.0972, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.025691555812954903, | |
| "eval_runtime": 0.9246, | |
| "eval_samples_per_second": 157.902, | |
| "eval_steps_per_second": 20.549, | |
| "step": 2472 | |
| }, | |
| { | |
| "epoch": 24.08, | |
| "learning_rate": 1.8073786407766992e-05, | |
| "loss": 0.1732, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 24.17, | |
| "learning_rate": 1.8066019417475728e-05, | |
| "loss": 0.0377, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 24.27, | |
| "learning_rate": 1.8058252427184467e-05, | |
| "loss": 0.0686, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 24.37, | |
| "learning_rate": 1.8050485436893206e-05, | |
| "loss": 0.0926, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 24.47, | |
| "learning_rate": 1.8042718446601942e-05, | |
| "loss": 0.079, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 24.56, | |
| "learning_rate": 1.803495145631068e-05, | |
| "loss": 0.0477, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 24.66, | |
| "learning_rate": 1.802718446601942e-05, | |
| "loss": 0.0554, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 24.76, | |
| "learning_rate": 1.8019417475728156e-05, | |
| "loss": 0.0732, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 24.85, | |
| "learning_rate": 1.8011650485436895e-05, | |
| "loss": 0.0215, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 24.95, | |
| "learning_rate": 1.8003883495145634e-05, | |
| "loss": 0.0173, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_accuracy": 0.9931506849315068, | |
| "eval_loss": 0.05056421086192131, | |
| "eval_runtime": 0.9345, | |
| "eval_samples_per_second": 156.232, | |
| "eval_steps_per_second": 20.332, | |
| "step": 2575 | |
| }, | |
| { | |
| "epoch": 25.05, | |
| "learning_rate": 1.799611650485437e-05, | |
| "loss": 0.0749, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 25.15, | |
| "learning_rate": 1.798834951456311e-05, | |
| "loss": 0.0437, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 25.24, | |
| "learning_rate": 1.7980582524271845e-05, | |
| "loss": 0.0748, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 25.34, | |
| "learning_rate": 1.7972815533980584e-05, | |
| "loss": 0.04, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 25.44, | |
| "learning_rate": 1.7965048543689323e-05, | |
| "loss": 0.0721, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 25.53, | |
| "learning_rate": 1.795728155339806e-05, | |
| "loss": 0.143, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 25.63, | |
| "learning_rate": 1.79495145631068e-05, | |
| "loss": 0.185, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 25.73, | |
| "learning_rate": 1.7941747572815534e-05, | |
| "loss": 0.0379, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 25.83, | |
| "learning_rate": 1.7933980582524273e-05, | |
| "loss": 0.1209, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 25.92, | |
| "learning_rate": 1.792621359223301e-05, | |
| "loss": 0.075, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_accuracy": 0.9863013698630136, | |
| "eval_loss": 0.09800746291875839, | |
| "eval_runtime": 0.9365, | |
| "eval_samples_per_second": 155.906, | |
| "eval_steps_per_second": 20.289, | |
| "step": 2678 | |
| }, | |
| { | |
| "epoch": 26.02, | |
| "learning_rate": 1.791844660194175e-05, | |
| "loss": 0.0662, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 26.12, | |
| "learning_rate": 1.7910679611650488e-05, | |
| "loss": 0.0206, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 26.21, | |
| "learning_rate": 1.7902912621359227e-05, | |
| "loss": 0.0576, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 26.31, | |
| "learning_rate": 1.7895145631067963e-05, | |
| "loss": 0.0479, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 26.41, | |
| "learning_rate": 1.7887378640776702e-05, | |
| "loss": 0.039, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 26.5, | |
| "learning_rate": 1.7879611650485438e-05, | |
| "loss": 0.0851, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 26.6, | |
| "learning_rate": 1.7871844660194177e-05, | |
| "loss": 0.1626, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 26.7, | |
| "learning_rate": 1.7864077669902916e-05, | |
| "loss": 0.079, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 26.8, | |
| "learning_rate": 1.7856310679611652e-05, | |
| "loss": 0.0268, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 26.89, | |
| "learning_rate": 1.784854368932039e-05, | |
| "loss": 0.0656, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 26.99, | |
| "learning_rate": 1.7840776699029127e-05, | |
| "loss": 0.1103, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_accuracy": 0.958904109589041, | |
| "eval_loss": 0.10835416615009308, | |
| "eval_runtime": 0.9504, | |
| "eval_samples_per_second": 153.614, | |
| "eval_steps_per_second": 19.991, | |
| "step": 2781 | |
| }, | |
| { | |
| "epoch": 27.09, | |
| "learning_rate": 1.7833009708737866e-05, | |
| "loss": 0.0675, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 27.18, | |
| "learning_rate": 1.7825242718446602e-05, | |
| "loss": 0.183, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 27.28, | |
| "learning_rate": 1.781747572815534e-05, | |
| "loss": 0.0763, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 27.38, | |
| "learning_rate": 1.780970873786408e-05, | |
| "loss": 0.0298, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 27.48, | |
| "learning_rate": 1.7801941747572816e-05, | |
| "loss": 0.1811, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 27.57, | |
| "learning_rate": 1.7794174757281555e-05, | |
| "loss": 0.0562, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 27.67, | |
| "learning_rate": 1.778640776699029e-05, | |
| "loss": 0.146, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 27.77, | |
| "learning_rate": 1.777864077669903e-05, | |
| "loss": 0.0538, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 27.86, | |
| "learning_rate": 1.7770873786407766e-05, | |
| "loss": 0.1454, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 27.96, | |
| "learning_rate": 1.776310679611651e-05, | |
| "loss": 0.0622, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_accuracy": 0.9931506849315068, | |
| "eval_loss": 0.02398013137280941, | |
| "eval_runtime": 0.9144, | |
| "eval_samples_per_second": 159.669, | |
| "eval_steps_per_second": 20.779, | |
| "step": 2884 | |
| }, | |
| { | |
| "epoch": 28.06, | |
| "learning_rate": 1.7755339805825244e-05, | |
| "loss": 0.1488, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 28.16, | |
| "learning_rate": 1.7747572815533983e-05, | |
| "loss": 0.0624, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 28.25, | |
| "learning_rate": 1.773980582524272e-05, | |
| "loss": 0.0144, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 28.35, | |
| "learning_rate": 1.773203883495146e-05, | |
| "loss": 0.0935, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 28.45, | |
| "learning_rate": 1.7724271844660194e-05, | |
| "loss": 0.1088, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 28.54, | |
| "learning_rate": 1.7716504854368933e-05, | |
| "loss": 0.0121, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 28.64, | |
| "learning_rate": 1.7708737864077673e-05, | |
| "loss": 0.0328, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 28.74, | |
| "learning_rate": 1.770097087378641e-05, | |
| "loss": 0.0876, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 28.83, | |
| "learning_rate": 1.7693203883495148e-05, | |
| "loss": 0.0872, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 28.93, | |
| "learning_rate": 1.7685436893203883e-05, | |
| "loss": 0.0126, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "eval_accuracy": 0.9863013698630136, | |
| "eval_loss": 0.03914155438542366, | |
| "eval_runtime": 0.9262, | |
| "eval_samples_per_second": 157.634, | |
| "eval_steps_per_second": 20.514, | |
| "step": 2987 | |
| }, | |
| { | |
| "epoch": 29.03, | |
| "learning_rate": 1.7677669902912623e-05, | |
| "loss": 0.0274, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 29.13, | |
| "learning_rate": 1.7669902912621362e-05, | |
| "loss": 0.0635, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 29.22, | |
| "learning_rate": 1.7662135922330098e-05, | |
| "loss": 0.1097, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 29.32, | |
| "learning_rate": 1.7654368932038837e-05, | |
| "loss": 0.1493, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 29.42, | |
| "learning_rate": 1.7646601941747576e-05, | |
| "loss": 0.0423, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 29.51, | |
| "learning_rate": 1.7638834951456312e-05, | |
| "loss": 0.1211, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 29.61, | |
| "learning_rate": 1.763106796116505e-05, | |
| "loss": 0.0614, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 29.71, | |
| "learning_rate": 1.7623300970873787e-05, | |
| "loss": 0.0644, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 29.81, | |
| "learning_rate": 1.7615533980582526e-05, | |
| "loss": 0.0784, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 29.9, | |
| "learning_rate": 1.7607766990291265e-05, | |
| "loss": 0.156, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "learning_rate": 1.76e-05, | |
| "loss": 0.082, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_accuracy": 0.9863013698630136, | |
| "eval_loss": 0.08485659211874008, | |
| "eval_runtime": 0.9382, | |
| "eval_samples_per_second": 155.618, | |
| "eval_steps_per_second": 20.252, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 30.1, | |
| "learning_rate": 1.759223300970874e-05, | |
| "loss": 0.2005, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 30.19, | |
| "learning_rate": 1.7584466019417476e-05, | |
| "loss": 0.322, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 30.29, | |
| "learning_rate": 1.7576699029126215e-05, | |
| "loss": 0.0765, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 30.39, | |
| "learning_rate": 1.7568932038834954e-05, | |
| "loss": 0.0799, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 30.49, | |
| "learning_rate": 1.756116504854369e-05, | |
| "loss": 0.0892, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 30.58, | |
| "learning_rate": 1.755339805825243e-05, | |
| "loss": 0.0932, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 30.68, | |
| "learning_rate": 1.7545631067961165e-05, | |
| "loss": 0.0882, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 30.78, | |
| "learning_rate": 1.7537864077669904e-05, | |
| "loss": 0.0714, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 30.87, | |
| "learning_rate": 1.753009708737864e-05, | |
| "loss": 0.0537, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 30.97, | |
| "learning_rate": 1.7522330097087383e-05, | |
| "loss": 0.0203, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.019243279471993446, | |
| "eval_runtime": 0.9242, | |
| "eval_samples_per_second": 157.974, | |
| "eval_steps_per_second": 20.558, | |
| "step": 3193 | |
| }, | |
| { | |
| "epoch": 31.07, | |
| "learning_rate": 1.751456310679612e-05, | |
| "loss": 0.1567, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 31.17, | |
| "learning_rate": 1.7506796116504858e-05, | |
| "loss": 0.0829, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 31.26, | |
| "learning_rate": 1.7499029126213593e-05, | |
| "loss": 0.0259, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 31.36, | |
| "learning_rate": 1.7491262135922333e-05, | |
| "loss": 0.0337, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 31.46, | |
| "learning_rate": 1.748349514563107e-05, | |
| "loss": 0.0407, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 31.55, | |
| "learning_rate": 1.7475728155339808e-05, | |
| "loss": 0.1494, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 31.65, | |
| "learning_rate": 1.7467961165048547e-05, | |
| "loss": 0.1308, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 31.75, | |
| "learning_rate": 1.7460194174757283e-05, | |
| "loss": 0.0744, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 31.84, | |
| "learning_rate": 1.7452427184466022e-05, | |
| "loss": 0.059, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 31.94, | |
| "learning_rate": 1.7444660194174758e-05, | |
| "loss": 0.1044, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_accuracy": 0.9657534246575342, | |
| "eval_loss": 0.11390157043933868, | |
| "eval_runtime": 0.9599, | |
| "eval_samples_per_second": 152.1, | |
| "eval_steps_per_second": 19.794, | |
| "step": 3296 | |
| }, | |
| { | |
| "epoch": 32.04, | |
| "learning_rate": 1.7436893203883497e-05, | |
| "loss": 0.1431, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 32.14, | |
| "learning_rate": 1.7429126213592233e-05, | |
| "loss": 0.0207, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 32.23, | |
| "learning_rate": 1.7421359223300972e-05, | |
| "loss": 0.1726, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 32.33, | |
| "learning_rate": 1.741359223300971e-05, | |
| "loss": 0.0813, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 32.43, | |
| "learning_rate": 1.7405825242718447e-05, | |
| "loss": 0.1417, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 32.52, | |
| "learning_rate": 1.7398058252427186e-05, | |
| "loss": 0.036, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 32.62, | |
| "learning_rate": 1.7390291262135922e-05, | |
| "loss": 0.065, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 32.72, | |
| "learning_rate": 1.738252427184466e-05, | |
| "loss": 0.0654, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 32.82, | |
| "learning_rate": 1.73747572815534e-05, | |
| "loss": 0.0311, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 32.91, | |
| "learning_rate": 1.736699029126214e-05, | |
| "loss": 0.0134, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "eval_accuracy": 0.9863013698630136, | |
| "eval_loss": 0.057724058628082275, | |
| "eval_runtime": 0.9176, | |
| "eval_samples_per_second": 159.114, | |
| "eval_steps_per_second": 20.707, | |
| "step": 3399 | |
| }, | |
| { | |
| "epoch": 33.01, | |
| "learning_rate": 1.7359223300970875e-05, | |
| "loss": 0.0149, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 33.11, | |
| "learning_rate": 1.7351456310679614e-05, | |
| "loss": 0.1328, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 33.2, | |
| "learning_rate": 1.734368932038835e-05, | |
| "loss": 0.0303, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 33.3, | |
| "learning_rate": 1.733592233009709e-05, | |
| "loss": 0.0764, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 33.4, | |
| "learning_rate": 1.732815533980583e-05, | |
| "loss": 0.0176, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 33.5, | |
| "learning_rate": 1.7320388349514564e-05, | |
| "loss": 0.0133, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 33.59, | |
| "learning_rate": 1.7312621359223303e-05, | |
| "loss": 0.2035, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 33.69, | |
| "learning_rate": 1.730485436893204e-05, | |
| "loss": 0.0844, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 33.79, | |
| "learning_rate": 1.729708737864078e-05, | |
| "loss": 0.062, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 33.88, | |
| "learning_rate": 1.7289320388349514e-05, | |
| "loss": 0.0381, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 33.98, | |
| "learning_rate": 1.7281553398058253e-05, | |
| "loss": 0.0923, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "eval_accuracy": 0.958904109589041, | |
| "eval_loss": 0.182390034198761, | |
| "eval_runtime": 0.9423, | |
| "eval_samples_per_second": 154.946, | |
| "eval_steps_per_second": 20.164, | |
| "step": 3502 | |
| }, | |
| { | |
| "epoch": 34.08, | |
| "learning_rate": 1.7273786407766993e-05, | |
| "loss": 0.044, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 34.17, | |
| "learning_rate": 1.726601941747573e-05, | |
| "loss": 0.0188, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 34.27, | |
| "learning_rate": 1.7258252427184468e-05, | |
| "loss": 0.0863, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 34.37, | |
| "learning_rate": 1.7250485436893207e-05, | |
| "loss": 0.1034, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 34.47, | |
| "learning_rate": 1.7242718446601943e-05, | |
| "loss": 0.0148, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 34.56, | |
| "learning_rate": 1.7234951456310682e-05, | |
| "loss": 0.0083, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 34.66, | |
| "learning_rate": 1.722718446601942e-05, | |
| "loss": 0.0313, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 34.76, | |
| "learning_rate": 1.7219417475728157e-05, | |
| "loss": 0.1949, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 34.85, | |
| "learning_rate": 1.7211650485436896e-05, | |
| "loss": 0.0643, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 34.95, | |
| "learning_rate": 1.7203883495145632e-05, | |
| "loss": 0.1156, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "eval_accuracy": 0.9931506849315068, | |
| "eval_loss": 0.022417498752474785, | |
| "eval_runtime": 0.9325, | |
| "eval_samples_per_second": 156.574, | |
| "eval_steps_per_second": 20.376, | |
| "step": 3605 | |
| }, | |
| { | |
| "epoch": 35.05, | |
| "learning_rate": 1.719611650485437e-05, | |
| "loss": 0.1949, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 35.15, | |
| "learning_rate": 1.7188349514563107e-05, | |
| "loss": 0.0527, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 35.24, | |
| "learning_rate": 1.7180582524271846e-05, | |
| "loss": 0.0105, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 35.34, | |
| "learning_rate": 1.7172815533980585e-05, | |
| "loss": 0.1182, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 35.44, | |
| "learning_rate": 1.716504854368932e-05, | |
| "loss": 0.0799, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 35.53, | |
| "learning_rate": 1.715728155339806e-05, | |
| "loss": 0.1506, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 35.63, | |
| "learning_rate": 1.7149514563106796e-05, | |
| "loss": 0.1022, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 35.73, | |
| "learning_rate": 1.7141747572815535e-05, | |
| "loss": 0.0591, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 35.83, | |
| "learning_rate": 1.713398058252427e-05, | |
| "loss": 0.0083, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 35.92, | |
| "learning_rate": 1.7126213592233013e-05, | |
| "loss": 0.0161, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "eval_accuracy": 0.9931506849315068, | |
| "eval_loss": 0.03223036974668503, | |
| "eval_runtime": 0.926, | |
| "eval_samples_per_second": 157.661, | |
| "eval_steps_per_second": 20.518, | |
| "step": 3708 | |
| }, | |
| { | |
| "epoch": 36.02, | |
| "learning_rate": 1.711844660194175e-05, | |
| "loss": 0.1174, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 36.12, | |
| "learning_rate": 1.711067961165049e-05, | |
| "loss": 0.0884, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 36.21, | |
| "learning_rate": 1.7102912621359224e-05, | |
| "loss": 0.0085, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 36.31, | |
| "learning_rate": 1.7095145631067963e-05, | |
| "loss": 0.1102, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 36.41, | |
| "learning_rate": 1.70873786407767e-05, | |
| "loss": 0.1087, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 36.5, | |
| "learning_rate": 1.707961165048544e-05, | |
| "loss": 0.0612, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 36.6, | |
| "learning_rate": 1.7071844660194178e-05, | |
| "loss": 0.0383, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 36.7, | |
| "learning_rate": 1.7064077669902913e-05, | |
| "loss": 0.021, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 36.8, | |
| "learning_rate": 1.7056310679611653e-05, | |
| "loss": 0.1188, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 36.89, | |
| "learning_rate": 1.704854368932039e-05, | |
| "loss": 0.0805, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 36.99, | |
| "learning_rate": 1.7040776699029128e-05, | |
| "loss": 0.0754, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 37.0, | |
| "eval_accuracy": 0.9726027397260274, | |
| "eval_loss": 0.10264816880226135, | |
| "eval_runtime": 0.9208, | |
| "eval_samples_per_second": 158.561, | |
| "eval_steps_per_second": 20.635, | |
| "step": 3811 | |
| }, | |
| { | |
| "epoch": 37.09, | |
| "learning_rate": 1.7033009708737867e-05, | |
| "loss": 0.012, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 37.18, | |
| "learning_rate": 1.7025242718446603e-05, | |
| "loss": 0.0646, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 37.28, | |
| "learning_rate": 1.7017475728155342e-05, | |
| "loss": 0.0136, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 37.38, | |
| "learning_rate": 1.7009708737864078e-05, | |
| "loss": 0.0409, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 37.48, | |
| "learning_rate": 1.7001941747572817e-05, | |
| "loss": 0.109, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 37.57, | |
| "learning_rate": 1.6994174757281553e-05, | |
| "loss": 0.0205, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 37.67, | |
| "learning_rate": 1.6986407766990292e-05, | |
| "loss": 0.1262, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 37.77, | |
| "learning_rate": 1.697864077669903e-05, | |
| "loss": 0.1535, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 37.86, | |
| "learning_rate": 1.697087378640777e-05, | |
| "loss": 0.0292, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 37.96, | |
| "learning_rate": 1.6963106796116506e-05, | |
| "loss": 0.0356, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "eval_accuracy": 0.952054794520548, | |
| "eval_loss": 0.25439009070396423, | |
| "eval_runtime": 0.9243, | |
| "eval_samples_per_second": 157.958, | |
| "eval_steps_per_second": 20.556, | |
| "step": 3914 | |
| }, | |
| { | |
| "epoch": 38.06, | |
| "learning_rate": 1.6955339805825245e-05, | |
| "loss": 0.1842, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 38.16, | |
| "learning_rate": 1.694757281553398e-05, | |
| "loss": 0.1801, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 38.25, | |
| "learning_rate": 1.693980582524272e-05, | |
| "loss": 0.0089, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 38.35, | |
| "learning_rate": 1.693203883495146e-05, | |
| "loss": 0.0289, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 38.45, | |
| "learning_rate": 1.6924271844660195e-05, | |
| "loss": 0.0536, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 38.54, | |
| "learning_rate": 1.6916504854368934e-05, | |
| "loss": 0.1036, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 38.64, | |
| "learning_rate": 1.690873786407767e-05, | |
| "loss": 0.0112, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 38.74, | |
| "learning_rate": 1.690097087378641e-05, | |
| "loss": 0.008, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 38.83, | |
| "learning_rate": 1.6893203883495145e-05, | |
| "loss": 0.0183, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 38.93, | |
| "learning_rate": 1.6885436893203884e-05, | |
| "loss": 0.008, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 39.0, | |
| "eval_accuracy": 0.9794520547945206, | |
| "eval_loss": 0.10272617638111115, | |
| "eval_runtime": 0.9261, | |
| "eval_samples_per_second": 157.656, | |
| "eval_steps_per_second": 20.517, | |
| "step": 4017 | |
| }, | |
| { | |
| "epoch": 39.03, | |
| "learning_rate": 1.6877669902912623e-05, | |
| "loss": 0.1074, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 39.13, | |
| "learning_rate": 1.686990291262136e-05, | |
| "loss": 0.1753, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 39.22, | |
| "learning_rate": 1.68621359223301e-05, | |
| "loss": 0.0181, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 39.32, | |
| "learning_rate": 1.6854368932038838e-05, | |
| "loss": 0.0541, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 39.42, | |
| "learning_rate": 1.6846601941747573e-05, | |
| "loss": 0.1461, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 39.51, | |
| "learning_rate": 1.6838834951456313e-05, | |
| "loss": 0.1556, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 39.61, | |
| "learning_rate": 1.6831067961165052e-05, | |
| "loss": 0.0072, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 39.71, | |
| "learning_rate": 1.6823300970873788e-05, | |
| "loss": 0.1443, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 39.81, | |
| "learning_rate": 1.6815533980582527e-05, | |
| "loss": 0.0272, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 39.9, | |
| "learning_rate": 1.6807766990291263e-05, | |
| "loss": 0.0404, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "learning_rate": 1.6800000000000002e-05, | |
| "loss": 0.1293, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "eval_accuracy": 0.9863013698630136, | |
| "eval_loss": 0.06097627803683281, | |
| "eval_runtime": 0.961, | |
| "eval_samples_per_second": 151.929, | |
| "eval_steps_per_second": 19.772, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 40.1, | |
| "learning_rate": 1.6792233009708738e-05, | |
| "loss": 0.0466, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 40.19, | |
| "learning_rate": 1.6784466019417477e-05, | |
| "loss": 0.0499, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 40.29, | |
| "learning_rate": 1.6776699029126216e-05, | |
| "loss": 0.1867, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 40.39, | |
| "learning_rate": 1.6768932038834952e-05, | |
| "loss": 0.0198, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 40.49, | |
| "learning_rate": 1.676116504854369e-05, | |
| "loss": 0.0074, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 40.58, | |
| "learning_rate": 1.6753398058252427e-05, | |
| "loss": 0.0624, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 40.68, | |
| "learning_rate": 1.6745631067961166e-05, | |
| "loss": 0.0607, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 40.78, | |
| "learning_rate": 1.6737864077669905e-05, | |
| "loss": 0.0749, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 40.87, | |
| "learning_rate": 1.6730097087378644e-05, | |
| "loss": 0.0066, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 40.97, | |
| "learning_rate": 1.672233009708738e-05, | |
| "loss": 0.0578, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 41.0, | |
| "eval_accuracy": 0.9794520547945206, | |
| "eval_loss": 0.08583226799964905, | |
| "eval_runtime": 0.9345, | |
| "eval_samples_per_second": 156.24, | |
| "eval_steps_per_second": 20.333, | |
| "step": 4223 | |
| }, | |
| { | |
| "epoch": 41.07, | |
| "learning_rate": 1.671456310679612e-05, | |
| "loss": 0.1032, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 41.17, | |
| "learning_rate": 1.6706796116504855e-05, | |
| "loss": 0.0544, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 41.26, | |
| "learning_rate": 1.6699029126213594e-05, | |
| "loss": 0.1737, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 41.36, | |
| "learning_rate": 1.669126213592233e-05, | |
| "loss": 0.0807, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 41.46, | |
| "learning_rate": 1.668349514563107e-05, | |
| "loss": 0.0775, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 41.55, | |
| "learning_rate": 1.667572815533981e-05, | |
| "loss": 0.0672, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 41.65, | |
| "learning_rate": 1.6667961165048544e-05, | |
| "loss": 0.049, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 41.75, | |
| "learning_rate": 1.6660194174757283e-05, | |
| "loss": 0.1505, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 41.84, | |
| "learning_rate": 1.665242718446602e-05, | |
| "loss": 0.1309, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 41.94, | |
| "learning_rate": 1.664466019417476e-05, | |
| "loss": 0.0528, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 42.0, | |
| "eval_accuracy": 0.9794520547945206, | |
| "eval_loss": 0.09928789734840393, | |
| "eval_runtime": 0.9576, | |
| "eval_samples_per_second": 152.464, | |
| "eval_steps_per_second": 19.841, | |
| "step": 4326 | |
| }, | |
| { | |
| "epoch": 42.04, | |
| "learning_rate": 1.6636893203883498e-05, | |
| "loss": 0.1204, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 42.14, | |
| "learning_rate": 1.6629126213592233e-05, | |
| "loss": 0.0681, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 42.23, | |
| "learning_rate": 1.6621359223300973e-05, | |
| "loss": 0.1064, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 42.33, | |
| "learning_rate": 1.661359223300971e-05, | |
| "loss": 0.0672, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 42.43, | |
| "learning_rate": 1.6605825242718448e-05, | |
| "loss": 0.0494, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 42.52, | |
| "learning_rate": 1.6598058252427183e-05, | |
| "loss": 0.0866, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 42.62, | |
| "learning_rate": 1.6590291262135926e-05, | |
| "loss": 0.0385, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 42.72, | |
| "learning_rate": 1.6582524271844662e-05, | |
| "loss": 0.0143, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 42.82, | |
| "learning_rate": 1.65747572815534e-05, | |
| "loss": 0.014, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 42.91, | |
| "learning_rate": 1.6566990291262137e-05, | |
| "loss": 0.0886, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 43.0, | |
| "eval_accuracy": 0.9931506849315068, | |
| "eval_loss": 0.03256027027964592, | |
| "eval_runtime": 0.9428, | |
| "eval_samples_per_second": 154.855, | |
| "eval_steps_per_second": 20.152, | |
| "step": 4429 | |
| }, | |
| { | |
| "epoch": 43.01, | |
| "learning_rate": 1.6559223300970876e-05, | |
| "loss": 0.0291, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 43.11, | |
| "learning_rate": 1.6551456310679612e-05, | |
| "loss": 0.0058, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 43.2, | |
| "learning_rate": 1.654368932038835e-05, | |
| "loss": 0.0331, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 43.3, | |
| "learning_rate": 1.653592233009709e-05, | |
| "loss": 0.2086, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 43.4, | |
| "learning_rate": 1.6528155339805826e-05, | |
| "loss": 0.0081, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 43.5, | |
| "learning_rate": 1.6520388349514565e-05, | |
| "loss": 0.0051, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 43.59, | |
| "learning_rate": 1.65126213592233e-05, | |
| "loss": 0.2006, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 43.69, | |
| "learning_rate": 1.650485436893204e-05, | |
| "loss": 0.0789, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 43.79, | |
| "learning_rate": 1.6497087378640776e-05, | |
| "loss": 0.0571, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 43.88, | |
| "learning_rate": 1.6489320388349515e-05, | |
| "loss": 0.0509, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 43.98, | |
| "learning_rate": 1.6481553398058254e-05, | |
| "loss": 0.0254, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 44.0, | |
| "eval_accuracy": 0.9931506849315068, | |
| "eval_loss": 0.03951861709356308, | |
| "eval_runtime": 0.9204, | |
| "eval_samples_per_second": 158.634, | |
| "eval_steps_per_second": 20.644, | |
| "step": 4532 | |
| }, | |
| { | |
| "epoch": 44.08, | |
| "learning_rate": 1.647378640776699e-05, | |
| "loss": 0.0554, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 44.17, | |
| "learning_rate": 1.646601941747573e-05, | |
| "loss": 0.1147, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 44.27, | |
| "learning_rate": 1.645825242718447e-05, | |
| "loss": 0.0991, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 44.37, | |
| "learning_rate": 1.6450485436893204e-05, | |
| "loss": 0.0387, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 44.47, | |
| "learning_rate": 1.6442718446601943e-05, | |
| "loss": 0.0065, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 44.56, | |
| "learning_rate": 1.6434951456310683e-05, | |
| "loss": 0.0062, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 44.66, | |
| "learning_rate": 1.642718446601942e-05, | |
| "loss": 0.0337, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 44.76, | |
| "learning_rate": 1.6419417475728158e-05, | |
| "loss": 0.0792, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 44.85, | |
| "learning_rate": 1.6411650485436893e-05, | |
| "loss": 0.1339, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 44.95, | |
| "learning_rate": 1.6403883495145633e-05, | |
| "loss": 0.0087, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 45.0, | |
| "eval_accuracy": 0.958904109589041, | |
| "eval_loss": 0.1797976791858673, | |
| "eval_runtime": 0.9137, | |
| "eval_samples_per_second": 159.792, | |
| "eval_steps_per_second": 20.795, | |
| "step": 4635 | |
| }, | |
| { | |
| "epoch": 45.05, | |
| "learning_rate": 1.6396116504854372e-05, | |
| "loss": 0.0471, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 45.15, | |
| "learning_rate": 1.6388349514563108e-05, | |
| "loss": 0.2771, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 45.24, | |
| "learning_rate": 1.6380582524271847e-05, | |
| "loss": 0.0076, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 45.34, | |
| "learning_rate": 1.6372815533980583e-05, | |
| "loss": 0.0712, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 45.44, | |
| "learning_rate": 1.6365048543689322e-05, | |
| "loss": 0.0652, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 45.53, | |
| "learning_rate": 1.6357281553398058e-05, | |
| "loss": 0.0174, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 45.63, | |
| "learning_rate": 1.6349514563106797e-05, | |
| "loss": 0.0869, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 45.73, | |
| "learning_rate": 1.6341747572815536e-05, | |
| "loss": 0.0263, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 45.83, | |
| "learning_rate": 1.6333980582524275e-05, | |
| "loss": 0.1847, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 45.92, | |
| "learning_rate": 1.632621359223301e-05, | |
| "loss": 0.0736, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 46.0, | |
| "eval_accuracy": 0.9931506849315068, | |
| "eval_loss": 0.0322740375995636, | |
| "eval_runtime": 0.9527, | |
| "eval_samples_per_second": 153.246, | |
| "eval_steps_per_second": 19.943, | |
| "step": 4738 | |
| }, | |
| { | |
| "epoch": 46.02, | |
| "learning_rate": 1.631844660194175e-05, | |
| "loss": 0.0529, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 46.12, | |
| "learning_rate": 1.6310679611650486e-05, | |
| "loss": 0.0063, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 46.21, | |
| "learning_rate": 1.6302912621359225e-05, | |
| "loss": 0.0516, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 46.31, | |
| "learning_rate": 1.6295145631067964e-05, | |
| "loss": 0.0302, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 46.41, | |
| "learning_rate": 1.62873786407767e-05, | |
| "loss": 0.0122, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 46.5, | |
| "learning_rate": 1.627961165048544e-05, | |
| "loss": 0.1734, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 46.6, | |
| "learning_rate": 1.6271844660194175e-05, | |
| "loss": 0.0983, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 46.7, | |
| "learning_rate": 1.6264077669902914e-05, | |
| "loss": 0.1147, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 46.8, | |
| "learning_rate": 1.625631067961165e-05, | |
| "loss": 0.0177, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 46.89, | |
| "learning_rate": 1.624854368932039e-05, | |
| "loss": 0.0644, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 46.99, | |
| "learning_rate": 1.624077669902913e-05, | |
| "loss": 0.0427, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 47.0, | |
| "eval_accuracy": 0.9931506849315068, | |
| "eval_loss": 0.03598255664110184, | |
| "eval_runtime": 0.9233, | |
| "eval_samples_per_second": 158.128, | |
| "eval_steps_per_second": 20.578, | |
| "step": 4841 | |
| }, | |
| { | |
| "epoch": 47.09, | |
| "learning_rate": 1.6233009708737864e-05, | |
| "loss": 0.0146, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 47.18, | |
| "learning_rate": 1.6225242718446603e-05, | |
| "loss": 0.0297, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 47.28, | |
| "learning_rate": 1.621747572815534e-05, | |
| "loss": 0.0042, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 47.38, | |
| "learning_rate": 1.620970873786408e-05, | |
| "loss": 0.0157, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 47.48, | |
| "learning_rate": 1.6201941747572814e-05, | |
| "loss": 0.0753, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 47.57, | |
| "learning_rate": 1.6194174757281557e-05, | |
| "loss": 0.1388, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 47.67, | |
| "learning_rate": 1.6186407766990293e-05, | |
| "loss": 0.0326, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 47.77, | |
| "learning_rate": 1.6178640776699032e-05, | |
| "loss": 0.1892, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 47.86, | |
| "learning_rate": 1.6170873786407768e-05, | |
| "loss": 0.0573, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 47.96, | |
| "learning_rate": 1.6163106796116507e-05, | |
| "loss": 0.0322, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 48.0, | |
| "eval_accuracy": 0.9657534246575342, | |
| "eval_loss": 0.05355680733919144, | |
| "eval_runtime": 0.9463, | |
| "eval_samples_per_second": 154.291, | |
| "eval_steps_per_second": 20.079, | |
| "step": 4944 | |
| }, | |
| { | |
| "epoch": 48.06, | |
| "learning_rate": 1.6155339805825243e-05, | |
| "loss": 0.1167, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 48.16, | |
| "learning_rate": 1.6147572815533982e-05, | |
| "loss": 0.0333, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 48.25, | |
| "learning_rate": 1.613980582524272e-05, | |
| "loss": 0.2643, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 48.35, | |
| "learning_rate": 1.6132038834951457e-05, | |
| "loss": 0.0314, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 48.45, | |
| "learning_rate": 1.6124271844660196e-05, | |
| "loss": 0.0281, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 48.54, | |
| "learning_rate": 1.6116504854368932e-05, | |
| "loss": 0.0865, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 48.64, | |
| "learning_rate": 1.610873786407767e-05, | |
| "loss": 0.1104, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 48.74, | |
| "learning_rate": 1.610097087378641e-05, | |
| "loss": 0.0059, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 48.83, | |
| "learning_rate": 1.6093203883495146e-05, | |
| "loss": 0.0419, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 48.93, | |
| "learning_rate": 1.6085436893203885e-05, | |
| "loss": 0.0499, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 49.0, | |
| "eval_accuracy": 0.9863013698630136, | |
| "eval_loss": 0.035887837409973145, | |
| "eval_runtime": 0.9776, | |
| "eval_samples_per_second": 149.343, | |
| "eval_steps_per_second": 19.435, | |
| "step": 5047 | |
| }, | |
| { | |
| "epoch": 49.03, | |
| "learning_rate": 1.6077669902912624e-05, | |
| "loss": 0.062, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 49.13, | |
| "learning_rate": 1.606990291262136e-05, | |
| "loss": 0.0114, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 49.22, | |
| "learning_rate": 1.60621359223301e-05, | |
| "loss": 0.1428, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 49.32, | |
| "learning_rate": 1.6054368932038835e-05, | |
| "loss": 0.0259, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 49.42, | |
| "learning_rate": 1.6046601941747574e-05, | |
| "loss": 0.0047, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 49.51, | |
| "learning_rate": 1.6038834951456313e-05, | |
| "loss": 0.0825, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 49.61, | |
| "learning_rate": 1.603106796116505e-05, | |
| "loss": 0.1709, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 49.71, | |
| "learning_rate": 1.602330097087379e-05, | |
| "loss": 0.0343, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 49.81, | |
| "learning_rate": 1.6015533980582524e-05, | |
| "loss": 0.0675, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 49.9, | |
| "learning_rate": 1.6007766990291263e-05, | |
| "loss": 0.0356, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "learning_rate": 1.6000000000000003e-05, | |
| "loss": 0.0234, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.01597742550075054, | |
| "eval_runtime": 0.9767, | |
| "eval_samples_per_second": 149.48, | |
| "eval_steps_per_second": 19.453, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 50.1, | |
| "learning_rate": 1.599223300970874e-05, | |
| "loss": 0.1435, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 50.19, | |
| "learning_rate": 1.5984466019417478e-05, | |
| "loss": 0.1082, | |
| "step": 5170 | |
| }, | |
| { | |
| "epoch": 50.29, | |
| "learning_rate": 1.5976699029126213e-05, | |
| "loss": 0.0888, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 50.39, | |
| "learning_rate": 1.5968932038834953e-05, | |
| "loss": 0.0825, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 50.49, | |
| "learning_rate": 1.596116504854369e-05, | |
| "loss": 0.1063, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 50.58, | |
| "learning_rate": 1.595339805825243e-05, | |
| "loss": 0.0379, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 50.68, | |
| "learning_rate": 1.5945631067961167e-05, | |
| "loss": 0.0053, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 50.78, | |
| "learning_rate": 1.5937864077669906e-05, | |
| "loss": 0.0691, | |
| "step": 5230 | |
| }, | |
| { | |
| "epoch": 50.87, | |
| "learning_rate": 1.5930097087378642e-05, | |
| "loss": 0.0763, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 50.97, | |
| "learning_rate": 1.592233009708738e-05, | |
| "loss": 0.0896, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 51.0, | |
| "eval_accuracy": 0.9863013698630136, | |
| "eval_loss": 0.054553717374801636, | |
| "eval_runtime": 0.9405, | |
| "eval_samples_per_second": 155.229, | |
| "eval_steps_per_second": 20.201, | |
| "step": 5253 | |
| }, | |
| { | |
| "epoch": 51.07, | |
| "learning_rate": 1.5914563106796117e-05, | |
| "loss": 0.0095, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 51.17, | |
| "learning_rate": 1.5906796116504856e-05, | |
| "loss": 0.0832, | |
| "step": 5270 | |
| }, | |
| { | |
| "epoch": 51.26, | |
| "learning_rate": 1.5899029126213595e-05, | |
| "loss": 0.0357, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 51.36, | |
| "learning_rate": 1.589126213592233e-05, | |
| "loss": 0.0705, | |
| "step": 5290 | |
| }, | |
| { | |
| "epoch": 51.46, | |
| "learning_rate": 1.588349514563107e-05, | |
| "loss": 0.0749, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 51.55, | |
| "learning_rate": 1.5875728155339806e-05, | |
| "loss": 0.0785, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 51.65, | |
| "learning_rate": 1.5867961165048545e-05, | |
| "loss": 0.0107, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 51.75, | |
| "learning_rate": 1.586019417475728e-05, | |
| "loss": 0.0292, | |
| "step": 5330 | |
| }, | |
| { | |
| "epoch": 51.84, | |
| "learning_rate": 1.585242718446602e-05, | |
| "loss": 0.0349, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 51.94, | |
| "learning_rate": 1.584466019417476e-05, | |
| "loss": 0.1089, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 52.0, | |
| "eval_accuracy": 0.9863013698630136, | |
| "eval_loss": 0.06735321879386902, | |
| "eval_runtime": 0.9363, | |
| "eval_samples_per_second": 155.931, | |
| "eval_steps_per_second": 20.292, | |
| "step": 5356 | |
| }, | |
| { | |
| "epoch": 52.04, | |
| "learning_rate": 1.5836893203883495e-05, | |
| "loss": 0.0729, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 52.14, | |
| "learning_rate": 1.5829126213592234e-05, | |
| "loss": 0.226, | |
| "step": 5370 | |
| }, | |
| { | |
| "epoch": 52.23, | |
| "learning_rate": 1.582135922330097e-05, | |
| "loss": 0.1495, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 52.33, | |
| "learning_rate": 1.581359223300971e-05, | |
| "loss": 0.1037, | |
| "step": 5390 | |
| }, | |
| { | |
| "epoch": 52.43, | |
| "learning_rate": 1.580582524271845e-05, | |
| "loss": 0.0258, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 52.52, | |
| "learning_rate": 1.5798058252427188e-05, | |
| "loss": 0.0822, | |
| "step": 5410 | |
| }, | |
| { | |
| "epoch": 52.62, | |
| "learning_rate": 1.5790291262135923e-05, | |
| "loss": 0.0039, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 52.72, | |
| "learning_rate": 1.5782524271844663e-05, | |
| "loss": 0.1656, | |
| "step": 5430 | |
| }, | |
| { | |
| "epoch": 52.82, | |
| "learning_rate": 1.57747572815534e-05, | |
| "loss": 0.0786, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 52.91, | |
| "learning_rate": 1.5766990291262138e-05, | |
| "loss": 0.1473, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 53.0, | |
| "eval_accuracy": 0.9931506849315068, | |
| "eval_loss": 0.03693979233503342, | |
| "eval_runtime": 0.9352, | |
| "eval_samples_per_second": 156.119, | |
| "eval_steps_per_second": 20.317, | |
| "step": 5459 | |
| }, | |
| { | |
| "epoch": 53.01, | |
| "learning_rate": 1.5759223300970877e-05, | |
| "loss": 0.1319, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 53.11, | |
| "learning_rate": 1.5751456310679613e-05, | |
| "loss": 0.0548, | |
| "step": 5470 | |
| }, | |
| { | |
| "epoch": 53.2, | |
| "learning_rate": 1.5743689320388352e-05, | |
| "loss": 0.1589, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 53.3, | |
| "learning_rate": 1.5735922330097088e-05, | |
| "loss": 0.0182, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 53.4, | |
| "learning_rate": 1.5728155339805827e-05, | |
| "loss": 0.0687, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 53.5, | |
| "learning_rate": 1.5720388349514563e-05, | |
| "loss": 0.0477, | |
| "step": 5510 | |
| }, | |
| { | |
| "epoch": 53.59, | |
| "learning_rate": 1.5712621359223302e-05, | |
| "loss": 0.0341, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 53.69, | |
| "learning_rate": 1.570485436893204e-05, | |
| "loss": 0.0338, | |
| "step": 5530 | |
| }, | |
| { | |
| "epoch": 53.79, | |
| "learning_rate": 1.5697087378640777e-05, | |
| "loss": 0.0927, | |
| "step": 5540 | |
| }, | |
| { | |
| "epoch": 53.88, | |
| "learning_rate": 1.5689320388349516e-05, | |
| "loss": 0.119, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 53.98, | |
| "learning_rate": 1.5681553398058255e-05, | |
| "loss": 0.0346, | |
| "step": 5560 | |
| }, | |
| { | |
| "epoch": 54.0, | |
| "eval_accuracy": 0.9794520547945206, | |
| "eval_loss": 0.03953592851758003, | |
| "eval_runtime": 0.9234, | |
| "eval_samples_per_second": 158.116, | |
| "eval_steps_per_second": 20.577, | |
| "step": 5562 | |
| }, | |
| { | |
| "epoch": 54.08, | |
| "learning_rate": 1.567378640776699e-05, | |
| "loss": 0.0527, | |
| "step": 5570 | |
| }, | |
| { | |
| "epoch": 54.17, | |
| "learning_rate": 1.566601941747573e-05, | |
| "loss": 0.17, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 54.27, | |
| "learning_rate": 1.565825242718447e-05, | |
| "loss": 0.0102, | |
| "step": 5590 | |
| }, | |
| { | |
| "epoch": 54.37, | |
| "learning_rate": 1.5650485436893205e-05, | |
| "loss": 0.0073, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 54.47, | |
| "learning_rate": 1.5642718446601944e-05, | |
| "loss": 0.0288, | |
| "step": 5610 | |
| }, | |
| { | |
| "epoch": 54.56, | |
| "learning_rate": 1.563495145631068e-05, | |
| "loss": 0.0455, | |
| "step": 5620 | |
| }, | |
| { | |
| "epoch": 54.66, | |
| "learning_rate": 1.562718446601942e-05, | |
| "loss": 0.0343, | |
| "step": 5630 | |
| }, | |
| { | |
| "epoch": 54.76, | |
| "learning_rate": 1.5619417475728155e-05, | |
| "loss": 0.0179, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 54.85, | |
| "learning_rate": 1.5611650485436894e-05, | |
| "loss": 0.0116, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 54.95, | |
| "learning_rate": 1.5603883495145634e-05, | |
| "loss": 0.1742, | |
| "step": 5660 | |
| }, | |
| { | |
| "epoch": 55.0, | |
| "eval_accuracy": 0.9931506849315068, | |
| "eval_loss": 0.03901122882962227, | |
| "eval_runtime": 0.9237, | |
| "eval_samples_per_second": 158.058, | |
| "eval_steps_per_second": 20.569, | |
| "step": 5665 | |
| }, | |
| { | |
| "epoch": 55.05, | |
| "learning_rate": 1.559611650485437e-05, | |
| "loss": 0.0043, | |
| "step": 5670 | |
| }, | |
| { | |
| "epoch": 55.15, | |
| "learning_rate": 1.558834951456311e-05, | |
| "loss": 0.0422, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 55.24, | |
| "learning_rate": 1.5580582524271844e-05, | |
| "loss": 0.0032, | |
| "step": 5690 | |
| }, | |
| { | |
| "epoch": 55.34, | |
| "learning_rate": 1.5572815533980583e-05, | |
| "loss": 0.0043, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 55.44, | |
| "learning_rate": 1.556504854368932e-05, | |
| "loss": 0.099, | |
| "step": 5710 | |
| }, | |
| { | |
| "epoch": 55.53, | |
| "learning_rate": 1.5557281553398062e-05, | |
| "loss": 0.0195, | |
| "step": 5720 | |
| }, | |
| { | |
| "epoch": 55.63, | |
| "learning_rate": 1.5549514563106798e-05, | |
| "loss": 0.1299, | |
| "step": 5730 | |
| }, | |
| { | |
| "epoch": 55.73, | |
| "learning_rate": 1.5541747572815537e-05, | |
| "loss": 0.0038, | |
| "step": 5740 | |
| }, | |
| { | |
| "epoch": 55.83, | |
| "learning_rate": 1.5533980582524273e-05, | |
| "loss": 0.0479, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 55.92, | |
| "learning_rate": 1.5526213592233012e-05, | |
| "loss": 0.0788, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 56.0, | |
| "eval_accuracy": 0.9931506849315068, | |
| "eval_loss": 0.055711355060338974, | |
| "eval_runtime": 0.9412, | |
| "eval_samples_per_second": 155.121, | |
| "eval_steps_per_second": 20.187, | |
| "step": 5768 | |
| }, | |
| { | |
| "epoch": 56.02, | |
| "learning_rate": 1.5518446601941748e-05, | |
| "loss": 0.1459, | |
| "step": 5770 | |
| }, | |
| { | |
| "epoch": 56.12, | |
| "learning_rate": 1.5510679611650487e-05, | |
| "loss": 0.067, | |
| "step": 5780 | |
| }, | |
| { | |
| "epoch": 56.21, | |
| "learning_rate": 1.5502912621359226e-05, | |
| "loss": 0.056, | |
| "step": 5790 | |
| }, | |
| { | |
| "epoch": 56.31, | |
| "learning_rate": 1.5495145631067962e-05, | |
| "loss": 0.0233, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 56.41, | |
| "learning_rate": 1.54873786407767e-05, | |
| "loss": 0.1362, | |
| "step": 5810 | |
| }, | |
| { | |
| "epoch": 56.5, | |
| "learning_rate": 1.5479611650485437e-05, | |
| "loss": 0.092, | |
| "step": 5820 | |
| }, | |
| { | |
| "epoch": 56.6, | |
| "learning_rate": 1.5471844660194176e-05, | |
| "loss": 0.0779, | |
| "step": 5830 | |
| }, | |
| { | |
| "epoch": 56.7, | |
| "learning_rate": 1.5464077669902915e-05, | |
| "loss": 0.0401, | |
| "step": 5840 | |
| }, | |
| { | |
| "epoch": 56.8, | |
| "learning_rate": 1.545631067961165e-05, | |
| "loss": 0.0743, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 56.89, | |
| "learning_rate": 1.544854368932039e-05, | |
| "loss": 0.0751, | |
| "step": 5860 | |
| }, | |
| { | |
| "epoch": 56.99, | |
| "learning_rate": 1.5440776699029126e-05, | |
| "loss": 0.12, | |
| "step": 5870 | |
| }, | |
| { | |
| "epoch": 57.0, | |
| "eval_accuracy": 0.9794520547945206, | |
| "eval_loss": 0.04083726927638054, | |
| "eval_runtime": 0.9215, | |
| "eval_samples_per_second": 158.439, | |
| "eval_steps_per_second": 20.619, | |
| "step": 5871 | |
| }, | |
| { | |
| "epoch": 57.09, | |
| "learning_rate": 1.5433009708737865e-05, | |
| "loss": 0.0554, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 57.18, | |
| "learning_rate": 1.54252427184466e-05, | |
| "loss": 0.0277, | |
| "step": 5890 | |
| }, | |
| { | |
| "epoch": 57.28, | |
| "learning_rate": 1.541747572815534e-05, | |
| "loss": 0.0297, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 57.38, | |
| "learning_rate": 1.540970873786408e-05, | |
| "loss": 0.0304, | |
| "step": 5910 | |
| }, | |
| { | |
| "epoch": 57.48, | |
| "learning_rate": 1.540194174757282e-05, | |
| "loss": 0.1162, | |
| "step": 5920 | |
| }, | |
| { | |
| "epoch": 57.57, | |
| "learning_rate": 1.5394174757281554e-05, | |
| "loss": 0.0031, | |
| "step": 5930 | |
| }, | |
| { | |
| "epoch": 57.67, | |
| "learning_rate": 1.5386407766990294e-05, | |
| "loss": 0.0263, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 57.77, | |
| "learning_rate": 1.537864077669903e-05, | |
| "loss": 0.0255, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 57.86, | |
| "learning_rate": 1.537087378640777e-05, | |
| "loss": 0.1066, | |
| "step": 5960 | |
| }, | |
| { | |
| "epoch": 57.96, | |
| "learning_rate": 1.5363106796116508e-05, | |
| "loss": 0.0952, | |
| "step": 5970 | |
| }, | |
| { | |
| "epoch": 58.0, | |
| "eval_accuracy": 0.9863013698630136, | |
| "eval_loss": 0.04665667563676834, | |
| "eval_runtime": 0.9277, | |
| "eval_samples_per_second": 157.377, | |
| "eval_steps_per_second": 20.481, | |
| "step": 5974 | |
| }, | |
| { | |
| "epoch": 58.06, | |
| "learning_rate": 1.5355339805825243e-05, | |
| "loss": 0.0915, | |
| "step": 5980 | |
| }, | |
| { | |
| "epoch": 58.16, | |
| "learning_rate": 1.5347572815533983e-05, | |
| "loss": 0.0818, | |
| "step": 5990 | |
| }, | |
| { | |
| "epoch": 58.25, | |
| "learning_rate": 1.533980582524272e-05, | |
| "loss": 0.1942, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 58.35, | |
| "learning_rate": 1.5332038834951458e-05, | |
| "loss": 0.0257, | |
| "step": 6010 | |
| }, | |
| { | |
| "epoch": 58.45, | |
| "learning_rate": 1.5324271844660193e-05, | |
| "loss": 0.2663, | |
| "step": 6020 | |
| }, | |
| { | |
| "epoch": 58.54, | |
| "learning_rate": 1.5316504854368933e-05, | |
| "loss": 0.1124, | |
| "step": 6030 | |
| }, | |
| { | |
| "epoch": 58.64, | |
| "learning_rate": 1.5308737864077672e-05, | |
| "loss": 0.1751, | |
| "step": 6040 | |
| }, | |
| { | |
| "epoch": 58.74, | |
| "learning_rate": 1.5300970873786408e-05, | |
| "loss": 0.06, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 58.83, | |
| "learning_rate": 1.5293203883495147e-05, | |
| "loss": 0.069, | |
| "step": 6060 | |
| }, | |
| { | |
| "epoch": 58.93, | |
| "learning_rate": 1.5285436893203886e-05, | |
| "loss": 0.0119, | |
| "step": 6070 | |
| }, | |
| { | |
| "epoch": 59.0, | |
| "eval_accuracy": 0.9794520547945206, | |
| "eval_loss": 0.09355130046606064, | |
| "eval_runtime": 0.9076, | |
| "eval_samples_per_second": 160.86, | |
| "eval_steps_per_second": 20.934, | |
| "step": 6077 | |
| }, | |
| { | |
| "epoch": 59.03, | |
| "learning_rate": 1.5277669902912622e-05, | |
| "loss": 0.0037, | |
| "step": 6080 | |
| }, | |
| { | |
| "epoch": 59.13, | |
| "learning_rate": 1.526990291262136e-05, | |
| "loss": 0.0265, | |
| "step": 6090 | |
| }, | |
| { | |
| "epoch": 59.22, | |
| "learning_rate": 1.52621359223301e-05, | |
| "loss": 0.0376, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 59.32, | |
| "learning_rate": 1.5254368932038836e-05, | |
| "loss": 0.0088, | |
| "step": 6110 | |
| }, | |
| { | |
| "epoch": 59.42, | |
| "learning_rate": 1.5246601941747573e-05, | |
| "loss": 0.0406, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 59.51, | |
| "learning_rate": 1.5238834951456311e-05, | |
| "loss": 0.2535, | |
| "step": 6130 | |
| }, | |
| { | |
| "epoch": 59.61, | |
| "learning_rate": 1.5231067961165048e-05, | |
| "loss": 0.0168, | |
| "step": 6140 | |
| }, | |
| { | |
| "epoch": 59.71, | |
| "learning_rate": 1.5223300970873786e-05, | |
| "loss": 0.0041, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 59.81, | |
| "learning_rate": 1.5215533980582527e-05, | |
| "loss": 0.0073, | |
| "step": 6160 | |
| }, | |
| { | |
| "epoch": 59.9, | |
| "learning_rate": 1.5207766990291264e-05, | |
| "loss": 0.0921, | |
| "step": 6170 | |
| }, | |
| { | |
| "epoch": 60.0, | |
| "learning_rate": 1.5200000000000002e-05, | |
| "loss": 0.0381, | |
| "step": 6180 | |
| }, | |
| { | |
| "epoch": 60.0, | |
| "eval_accuracy": 0.9863013698630136, | |
| "eval_loss": 0.058126527816057205, | |
| "eval_runtime": 0.9237, | |
| "eval_samples_per_second": 158.052, | |
| "eval_steps_per_second": 20.568, | |
| "step": 6180 | |
| }, | |
| { | |
| "epoch": 60.1, | |
| "learning_rate": 1.519223300970874e-05, | |
| "loss": 0.0028, | |
| "step": 6190 | |
| }, | |
| { | |
| "epoch": 60.19, | |
| "learning_rate": 1.5184466019417477e-05, | |
| "loss": 0.0349, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 60.29, | |
| "learning_rate": 1.5176699029126214e-05, | |
| "loss": 0.0037, | |
| "step": 6210 | |
| }, | |
| { | |
| "epoch": 60.39, | |
| "learning_rate": 1.5168932038834954e-05, | |
| "loss": 0.0028, | |
| "step": 6220 | |
| }, | |
| { | |
| "epoch": 60.49, | |
| "learning_rate": 1.5161165048543691e-05, | |
| "loss": 0.0764, | |
| "step": 6230 | |
| }, | |
| { | |
| "epoch": 60.58, | |
| "learning_rate": 1.5153398058252429e-05, | |
| "loss": 0.1693, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 60.68, | |
| "learning_rate": 1.5145631067961166e-05, | |
| "loss": 0.0965, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 60.78, | |
| "learning_rate": 1.5137864077669904e-05, | |
| "loss": 0.0026, | |
| "step": 6260 | |
| }, | |
| { | |
| "epoch": 60.87, | |
| "learning_rate": 1.5130097087378641e-05, | |
| "loss": 0.0157, | |
| "step": 6270 | |
| }, | |
| { | |
| "epoch": 60.97, | |
| "learning_rate": 1.512233009708738e-05, | |
| "loss": 0.0147, | |
| "step": 6280 | |
| }, | |
| { | |
| "epoch": 61.0, | |
| "eval_accuracy": 0.9863013698630136, | |
| "eval_loss": 0.08475180715322495, | |
| "eval_runtime": 0.9312, | |
| "eval_samples_per_second": 156.787, | |
| "eval_steps_per_second": 20.404, | |
| "step": 6283 | |
| }, | |
| { | |
| "epoch": 61.07, | |
| "learning_rate": 1.5114563106796118e-05, | |
| "loss": 0.0033, | |
| "step": 6290 | |
| }, | |
| { | |
| "epoch": 61.17, | |
| "learning_rate": 1.5106796116504855e-05, | |
| "loss": 0.0149, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 61.26, | |
| "learning_rate": 1.5099029126213593e-05, | |
| "loss": 0.0366, | |
| "step": 6310 | |
| }, | |
| { | |
| "epoch": 61.36, | |
| "learning_rate": 1.5091262135922332e-05, | |
| "loss": 0.0854, | |
| "step": 6320 | |
| }, | |
| { | |
| "epoch": 61.46, | |
| "learning_rate": 1.508349514563107e-05, | |
| "loss": 0.0727, | |
| "step": 6330 | |
| }, | |
| { | |
| "epoch": 61.55, | |
| "learning_rate": 1.5075728155339807e-05, | |
| "loss": 0.0502, | |
| "step": 6340 | |
| }, | |
| { | |
| "epoch": 61.65, | |
| "learning_rate": 1.5067961165048546e-05, | |
| "loss": 0.0878, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 61.75, | |
| "learning_rate": 1.5060194174757284e-05, | |
| "loss": 0.0988, | |
| "step": 6360 | |
| }, | |
| { | |
| "epoch": 61.84, | |
| "learning_rate": 1.5052427184466021e-05, | |
| "loss": 0.0054, | |
| "step": 6370 | |
| }, | |
| { | |
| "epoch": 61.94, | |
| "learning_rate": 1.5044660194174759e-05, | |
| "loss": 0.028, | |
| "step": 6380 | |
| }, | |
| { | |
| "epoch": 62.0, | |
| "eval_accuracy": 0.9863013698630136, | |
| "eval_loss": 0.0554812066257, | |
| "eval_runtime": 0.9252, | |
| "eval_samples_per_second": 157.804, | |
| "eval_steps_per_second": 20.536, | |
| "step": 6386 | |
| }, | |
| { | |
| "epoch": 62.04, | |
| "learning_rate": 1.5036893203883496e-05, | |
| "loss": 0.0038, | |
| "step": 6390 | |
| }, | |
| { | |
| "epoch": 62.14, | |
| "learning_rate": 1.5029126213592234e-05, | |
| "loss": 0.1136, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 62.23, | |
| "learning_rate": 1.5021359223300973e-05, | |
| "loss": 0.0329, | |
| "step": 6410 | |
| }, | |
| { | |
| "epoch": 62.33, | |
| "learning_rate": 1.501359223300971e-05, | |
| "loss": 0.2072, | |
| "step": 6420 | |
| }, | |
| { | |
| "epoch": 62.43, | |
| "learning_rate": 1.5005825242718448e-05, | |
| "loss": 0.028, | |
| "step": 6430 | |
| }, | |
| { | |
| "epoch": 62.52, | |
| "learning_rate": 1.4998058252427185e-05, | |
| "loss": 0.2947, | |
| "step": 6440 | |
| }, | |
| { | |
| "epoch": 62.62, | |
| "learning_rate": 1.4990291262135923e-05, | |
| "loss": 0.0525, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 62.72, | |
| "learning_rate": 1.498252427184466e-05, | |
| "loss": 0.0433, | |
| "step": 6460 | |
| }, | |
| { | |
| "epoch": 62.82, | |
| "learning_rate": 1.4974757281553401e-05, | |
| "loss": 0.0054, | |
| "step": 6470 | |
| }, | |
| { | |
| "epoch": 62.91, | |
| "learning_rate": 1.4966990291262139e-05, | |
| "loss": 0.0108, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 63.0, | |
| "eval_accuracy": 0.9863013698630136, | |
| "eval_loss": 0.02103378064930439, | |
| "eval_runtime": 0.9293, | |
| "eval_samples_per_second": 157.104, | |
| "eval_steps_per_second": 20.445, | |
| "step": 6489 | |
| }, | |
| { | |
| "epoch": 63.01, | |
| "learning_rate": 1.4959223300970876e-05, | |
| "loss": 0.0574, | |
| "step": 6490 | |
| }, | |
| { | |
| "epoch": 63.11, | |
| "learning_rate": 1.4951456310679614e-05, | |
| "loss": 0.1288, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 63.2, | |
| "learning_rate": 1.4943689320388351e-05, | |
| "loss": 0.0043, | |
| "step": 6510 | |
| }, | |
| { | |
| "epoch": 63.3, | |
| "learning_rate": 1.4935922330097089e-05, | |
| "loss": 0.0602, | |
| "step": 6520 | |
| }, | |
| { | |
| "epoch": 63.4, | |
| "learning_rate": 1.4928155339805826e-05, | |
| "loss": 0.087, | |
| "step": 6530 | |
| }, | |
| { | |
| "epoch": 63.5, | |
| "learning_rate": 1.4920388349514565e-05, | |
| "loss": 0.0581, | |
| "step": 6540 | |
| }, | |
| { | |
| "epoch": 63.59, | |
| "learning_rate": 1.4912621359223303e-05, | |
| "loss": 0.0797, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 63.69, | |
| "learning_rate": 1.490485436893204e-05, | |
| "loss": 0.1079, | |
| "step": 6560 | |
| }, | |
| { | |
| "epoch": 63.79, | |
| "learning_rate": 1.4897087378640778e-05, | |
| "loss": 0.0026, | |
| "step": 6570 | |
| }, | |
| { | |
| "epoch": 63.88, | |
| "learning_rate": 1.4889320388349515e-05, | |
| "loss": 0.004, | |
| "step": 6580 | |
| }, | |
| { | |
| "epoch": 63.98, | |
| "learning_rate": 1.4881553398058253e-05, | |
| "loss": 0.0845, | |
| "step": 6590 | |
| }, | |
| { | |
| "epoch": 64.0, | |
| "eval_accuracy": 0.9931506849315068, | |
| "eval_loss": 0.01815168187022209, | |
| "eval_runtime": 0.957, | |
| "eval_samples_per_second": 152.56, | |
| "eval_steps_per_second": 19.854, | |
| "step": 6592 | |
| }, | |
| { | |
| "epoch": 64.08, | |
| "learning_rate": 1.4873786407766992e-05, | |
| "loss": 0.0624, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 64.17, | |
| "learning_rate": 1.486601941747573e-05, | |
| "loss": 0.0056, | |
| "step": 6610 | |
| }, | |
| { | |
| "epoch": 64.27, | |
| "learning_rate": 1.4858252427184467e-05, | |
| "loss": 0.0028, | |
| "step": 6620 | |
| }, | |
| { | |
| "epoch": 64.37, | |
| "learning_rate": 1.4850485436893204e-05, | |
| "loss": 0.0151, | |
| "step": 6630 | |
| }, | |
| { | |
| "epoch": 64.47, | |
| "learning_rate": 1.4842718446601942e-05, | |
| "loss": 0.0057, | |
| "step": 6640 | |
| }, | |
| { | |
| "epoch": 64.56, | |
| "learning_rate": 1.483495145631068e-05, | |
| "loss": 0.0037, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 64.66, | |
| "learning_rate": 1.482718446601942e-05, | |
| "loss": 0.0025, | |
| "step": 6660 | |
| }, | |
| { | |
| "epoch": 64.76, | |
| "learning_rate": 1.4819417475728158e-05, | |
| "loss": 0.0439, | |
| "step": 6670 | |
| }, | |
| { | |
| "epoch": 64.85, | |
| "learning_rate": 1.4811650485436895e-05, | |
| "loss": 0.0049, | |
| "step": 6680 | |
| }, | |
| { | |
| "epoch": 64.95, | |
| "learning_rate": 1.4803883495145633e-05, | |
| "loss": 0.0027, | |
| "step": 6690 | |
| }, | |
| { | |
| "epoch": 65.0, | |
| "eval_accuracy": 0.9931506849315068, | |
| "eval_loss": 0.021542595699429512, | |
| "eval_runtime": 0.9233, | |
| "eval_samples_per_second": 158.125, | |
| "eval_steps_per_second": 20.578, | |
| "step": 6695 | |
| }, | |
| { | |
| "epoch": 65.05, | |
| "learning_rate": 1.479611650485437e-05, | |
| "loss": 0.0547, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 65.15, | |
| "learning_rate": 1.4788349514563108e-05, | |
| "loss": 0.0319, | |
| "step": 6710 | |
| }, | |
| { | |
| "epoch": 65.24, | |
| "learning_rate": 1.4780582524271845e-05, | |
| "loss": 0.0903, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 65.34, | |
| "learning_rate": 1.4772815533980584e-05, | |
| "loss": 0.1014, | |
| "step": 6730 | |
| }, | |
| { | |
| "epoch": 65.44, | |
| "learning_rate": 1.4765048543689322e-05, | |
| "loss": 0.0021, | |
| "step": 6740 | |
| }, | |
| { | |
| "epoch": 65.53, | |
| "learning_rate": 1.475728155339806e-05, | |
| "loss": 0.0146, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 65.63, | |
| "learning_rate": 1.4749514563106797e-05, | |
| "loss": 0.0188, | |
| "step": 6760 | |
| }, | |
| { | |
| "epoch": 65.73, | |
| "learning_rate": 1.4741747572815534e-05, | |
| "loss": 0.1414, | |
| "step": 6770 | |
| }, | |
| { | |
| "epoch": 65.83, | |
| "learning_rate": 1.4733980582524272e-05, | |
| "loss": 0.0898, | |
| "step": 6780 | |
| }, | |
| { | |
| "epoch": 65.92, | |
| "learning_rate": 1.4726213592233011e-05, | |
| "loss": 0.0852, | |
| "step": 6790 | |
| }, | |
| { | |
| "epoch": 66.0, | |
| "eval_accuracy": 0.9863013698630136, | |
| "eval_loss": 0.03675241023302078, | |
| "eval_runtime": 0.9838, | |
| "eval_samples_per_second": 148.411, | |
| "eval_steps_per_second": 19.314, | |
| "step": 6798 | |
| }, | |
| { | |
| "epoch": 66.02, | |
| "learning_rate": 1.4718446601941749e-05, | |
| "loss": 0.0604, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 66.12, | |
| "learning_rate": 1.4710679611650486e-05, | |
| "loss": 0.0943, | |
| "step": 6810 | |
| }, | |
| { | |
| "epoch": 66.21, | |
| "learning_rate": 1.4702912621359225e-05, | |
| "loss": 0.1165, | |
| "step": 6820 | |
| }, | |
| { | |
| "epoch": 66.31, | |
| "learning_rate": 1.4695145631067963e-05, | |
| "loss": 0.0357, | |
| "step": 6830 | |
| }, | |
| { | |
| "epoch": 66.41, | |
| "learning_rate": 1.46873786407767e-05, | |
| "loss": 0.0481, | |
| "step": 6840 | |
| }, | |
| { | |
| "epoch": 66.5, | |
| "learning_rate": 1.467961165048544e-05, | |
| "loss": 0.0041, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 66.6, | |
| "learning_rate": 1.4671844660194177e-05, | |
| "loss": 0.019, | |
| "step": 6860 | |
| }, | |
| { | |
| "epoch": 66.7, | |
| "learning_rate": 1.4664077669902914e-05, | |
| "loss": 0.0022, | |
| "step": 6870 | |
| }, | |
| { | |
| "epoch": 66.8, | |
| "learning_rate": 1.4656310679611652e-05, | |
| "loss": 0.0822, | |
| "step": 6880 | |
| }, | |
| { | |
| "epoch": 66.89, | |
| "learning_rate": 1.464854368932039e-05, | |
| "loss": 0.0585, | |
| "step": 6890 | |
| }, | |
| { | |
| "epoch": 66.99, | |
| "learning_rate": 1.4640776699029127e-05, | |
| "loss": 0.0022, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 67.0, | |
| "eval_accuracy": 0.958904109589041, | |
| "eval_loss": 0.16548927128314972, | |
| "eval_runtime": 0.927, | |
| "eval_samples_per_second": 157.494, | |
| "eval_steps_per_second": 20.496, | |
| "step": 6901 | |
| }, | |
| { | |
| "epoch": 67.09, | |
| "learning_rate": 1.4633009708737864e-05, | |
| "loss": 0.0252, | |
| "step": 6910 | |
| }, | |
| { | |
| "epoch": 67.18, | |
| "learning_rate": 1.4625242718446604e-05, | |
| "loss": 0.1552, | |
| "step": 6920 | |
| }, | |
| { | |
| "epoch": 67.28, | |
| "learning_rate": 1.4617475728155341e-05, | |
| "loss": 0.0023, | |
| "step": 6930 | |
| }, | |
| { | |
| "epoch": 67.38, | |
| "learning_rate": 1.4609708737864079e-05, | |
| "loss": 0.0022, | |
| "step": 6940 | |
| }, | |
| { | |
| "epoch": 67.48, | |
| "learning_rate": 1.4601941747572816e-05, | |
| "loss": 0.0952, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 67.57, | |
| "learning_rate": 1.4594174757281554e-05, | |
| "loss": 0.0205, | |
| "step": 6960 | |
| }, | |
| { | |
| "epoch": 67.67, | |
| "learning_rate": 1.4586407766990291e-05, | |
| "loss": 0.0487, | |
| "step": 6970 | |
| }, | |
| { | |
| "epoch": 67.77, | |
| "learning_rate": 1.4578640776699032e-05, | |
| "loss": 0.002, | |
| "step": 6980 | |
| }, | |
| { | |
| "epoch": 67.86, | |
| "learning_rate": 1.457087378640777e-05, | |
| "loss": 0.0832, | |
| "step": 6990 | |
| }, | |
| { | |
| "epoch": 67.96, | |
| "learning_rate": 1.4563106796116507e-05, | |
| "loss": 0.0757, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 68.0, | |
| "eval_accuracy": 0.9863013698630136, | |
| "eval_loss": 0.034192681312561035, | |
| "eval_runtime": 0.8958, | |
| "eval_samples_per_second": 162.982, | |
| "eval_steps_per_second": 21.21, | |
| "step": 7004 | |
| }, | |
| { | |
| "epoch": 68.06, | |
| "learning_rate": 1.4555339805825244e-05, | |
| "loss": 0.0993, | |
| "step": 7010 | |
| }, | |
| { | |
| "epoch": 68.16, | |
| "learning_rate": 1.4547572815533982e-05, | |
| "loss": 0.0503, | |
| "step": 7020 | |
| }, | |
| { | |
| "epoch": 68.25, | |
| "learning_rate": 1.453980582524272e-05, | |
| "loss": 0.0605, | |
| "step": 7030 | |
| }, | |
| { | |
| "epoch": 68.35, | |
| "learning_rate": 1.4532038834951459e-05, | |
| "loss": 0.1539, | |
| "step": 7040 | |
| }, | |
| { | |
| "epoch": 68.45, | |
| "learning_rate": 1.4524271844660196e-05, | |
| "loss": 0.0486, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 68.54, | |
| "learning_rate": 1.4516504854368934e-05, | |
| "loss": 0.0159, | |
| "step": 7060 | |
| }, | |
| { | |
| "epoch": 68.64, | |
| "learning_rate": 1.4508737864077671e-05, | |
| "loss": 0.0608, | |
| "step": 7070 | |
| }, | |
| { | |
| "epoch": 68.74, | |
| "learning_rate": 1.4500970873786409e-05, | |
| "loss": 0.1876, | |
| "step": 7080 | |
| }, | |
| { | |
| "epoch": 68.83, | |
| "learning_rate": 1.4493203883495146e-05, | |
| "loss": 0.0214, | |
| "step": 7090 | |
| }, | |
| { | |
| "epoch": 68.93, | |
| "learning_rate": 1.4485436893203884e-05, | |
| "loss": 0.0823, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 69.0, | |
| "eval_accuracy": 0.9863013698630136, | |
| "eval_loss": 0.02801397815346718, | |
| "eval_runtime": 0.9051, | |
| "eval_samples_per_second": 161.3, | |
| "eval_steps_per_second": 20.991, | |
| "step": 7107 | |
| }, | |
| { | |
| "epoch": 69.03, | |
| "learning_rate": 1.4477669902912623e-05, | |
| "loss": 0.0045, | |
| "step": 7110 | |
| }, | |
| { | |
| "epoch": 69.13, | |
| "learning_rate": 1.446990291262136e-05, | |
| "loss": 0.0027, | |
| "step": 7120 | |
| }, | |
| { | |
| "epoch": 69.22, | |
| "learning_rate": 1.4462135922330098e-05, | |
| "loss": 0.08, | |
| "step": 7130 | |
| }, | |
| { | |
| "epoch": 69.32, | |
| "learning_rate": 1.4454368932038835e-05, | |
| "loss": 0.1202, | |
| "step": 7140 | |
| }, | |
| { | |
| "epoch": 69.42, | |
| "learning_rate": 1.4446601941747573e-05, | |
| "loss": 0.0255, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 69.51, | |
| "learning_rate": 1.443883495145631e-05, | |
| "loss": 0.0071, | |
| "step": 7160 | |
| }, | |
| { | |
| "epoch": 69.61, | |
| "learning_rate": 1.4431067961165051e-05, | |
| "loss": 0.0873, | |
| "step": 7170 | |
| }, | |
| { | |
| "epoch": 69.71, | |
| "learning_rate": 1.4423300970873789e-05, | |
| "loss": 0.0846, | |
| "step": 7180 | |
| }, | |
| { | |
| "epoch": 69.81, | |
| "learning_rate": 1.4415533980582526e-05, | |
| "loss": 0.1357, | |
| "step": 7190 | |
| }, | |
| { | |
| "epoch": 69.9, | |
| "learning_rate": 1.4407766990291264e-05, | |
| "loss": 0.082, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 70.0, | |
| "learning_rate": 1.4400000000000001e-05, | |
| "loss": 0.1071, | |
| "step": 7210 | |
| }, | |
| { | |
| "epoch": 70.0, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.014005626551806927, | |
| "eval_runtime": 0.9022, | |
| "eval_samples_per_second": 161.82, | |
| "eval_steps_per_second": 21.059, | |
| "step": 7210 | |
| }, | |
| { | |
| "epoch": 70.1, | |
| "learning_rate": 1.4392233009708739e-05, | |
| "loss": 0.0503, | |
| "step": 7220 | |
| }, | |
| { | |
| "epoch": 70.19, | |
| "learning_rate": 1.4384466019417478e-05, | |
| "loss": 0.1342, | |
| "step": 7230 | |
| }, | |
| { | |
| "epoch": 70.29, | |
| "learning_rate": 1.4376699029126215e-05, | |
| "loss": 0.0553, | |
| "step": 7240 | |
| }, | |
| { | |
| "epoch": 70.39, | |
| "learning_rate": 1.4368932038834953e-05, | |
| "loss": 0.0535, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 70.49, | |
| "learning_rate": 1.436116504854369e-05, | |
| "loss": 0.0376, | |
| "step": 7260 | |
| }, | |
| { | |
| "epoch": 70.58, | |
| "learning_rate": 1.4353398058252428e-05, | |
| "loss": 0.0154, | |
| "step": 7270 | |
| }, | |
| { | |
| "epoch": 70.68, | |
| "learning_rate": 1.4345631067961165e-05, | |
| "loss": 0.0047, | |
| "step": 7280 | |
| }, | |
| { | |
| "epoch": 70.78, | |
| "learning_rate": 1.4337864077669904e-05, | |
| "loss": 0.0018, | |
| "step": 7290 | |
| }, | |
| { | |
| "epoch": 70.87, | |
| "learning_rate": 1.4330097087378642e-05, | |
| "loss": 0.0908, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 70.97, | |
| "learning_rate": 1.432233009708738e-05, | |
| "loss": 0.0832, | |
| "step": 7310 | |
| }, | |
| { | |
| "epoch": 71.0, | |
| "eval_accuracy": 0.9931506849315068, | |
| "eval_loss": 0.03868861868977547, | |
| "eval_runtime": 0.9162, | |
| "eval_samples_per_second": 159.35, | |
| "eval_steps_per_second": 20.737, | |
| "step": 7313 | |
| }, | |
| { | |
| "epoch": 71.07, | |
| "learning_rate": 1.4314563106796117e-05, | |
| "loss": 0.054, | |
| "step": 7320 | |
| }, | |
| { | |
| "epoch": 71.17, | |
| "learning_rate": 1.4306796116504856e-05, | |
| "loss": 0.0967, | |
| "step": 7330 | |
| }, | |
| { | |
| "epoch": 71.26, | |
| "learning_rate": 1.4299029126213594e-05, | |
| "loss": 0.0186, | |
| "step": 7340 | |
| }, | |
| { | |
| "epoch": 71.36, | |
| "learning_rate": 1.4291262135922331e-05, | |
| "loss": 0.0503, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 71.46, | |
| "learning_rate": 1.428349514563107e-05, | |
| "loss": 0.1007, | |
| "step": 7360 | |
| }, | |
| { | |
| "epoch": 71.55, | |
| "learning_rate": 1.4275728155339808e-05, | |
| "loss": 0.0525, | |
| "step": 7370 | |
| }, | |
| { | |
| "epoch": 71.65, | |
| "learning_rate": 1.4267961165048545e-05, | |
| "loss": 0.0239, | |
| "step": 7380 | |
| }, | |
| { | |
| "epoch": 71.75, | |
| "learning_rate": 1.4260194174757283e-05, | |
| "loss": 0.0964, | |
| "step": 7390 | |
| }, | |
| { | |
| "epoch": 71.84, | |
| "learning_rate": 1.425242718446602e-05, | |
| "loss": 0.0359, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 71.94, | |
| "learning_rate": 1.4244660194174758e-05, | |
| "loss": 0.0417, | |
| "step": 7410 | |
| }, | |
| { | |
| "epoch": 72.0, | |
| "eval_accuracy": 0.9863013698630136, | |
| "eval_loss": 0.0697227343916893, | |
| "eval_runtime": 0.9023, | |
| "eval_samples_per_second": 161.815, | |
| "eval_steps_per_second": 21.058, | |
| "step": 7416 | |
| }, | |
| { | |
| "epoch": 72.04, | |
| "learning_rate": 1.4236893203883497e-05, | |
| "loss": 0.0933, | |
| "step": 7420 | |
| }, | |
| { | |
| "epoch": 72.14, | |
| "learning_rate": 1.4229126213592234e-05, | |
| "loss": 0.1749, | |
| "step": 7430 | |
| }, | |
| { | |
| "epoch": 72.23, | |
| "learning_rate": 1.4221359223300972e-05, | |
| "loss": 0.0145, | |
| "step": 7440 | |
| }, | |
| { | |
| "epoch": 72.33, | |
| "learning_rate": 1.421359223300971e-05, | |
| "loss": 0.0321, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 72.43, | |
| "learning_rate": 1.4205825242718447e-05, | |
| "loss": 0.0879, | |
| "step": 7460 | |
| }, | |
| { | |
| "epoch": 72.52, | |
| "learning_rate": 1.4198058252427184e-05, | |
| "loss": 0.0227, | |
| "step": 7470 | |
| }, | |
| { | |
| "epoch": 72.62, | |
| "learning_rate": 1.4190291262135925e-05, | |
| "loss": 0.1572, | |
| "step": 7480 | |
| }, | |
| { | |
| "epoch": 72.72, | |
| "learning_rate": 1.4182524271844663e-05, | |
| "loss": 0.0728, | |
| "step": 7490 | |
| }, | |
| { | |
| "epoch": 72.82, | |
| "learning_rate": 1.41747572815534e-05, | |
| "loss": 0.0045, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 72.91, | |
| "learning_rate": 1.4166990291262138e-05, | |
| "loss": 0.1208, | |
| "step": 7510 | |
| }, | |
| { | |
| "epoch": 73.0, | |
| "eval_accuracy": 0.9863013698630136, | |
| "eval_loss": 0.07754257321357727, | |
| "eval_runtime": 0.8991, | |
| "eval_samples_per_second": 162.384, | |
| "eval_steps_per_second": 21.132, | |
| "step": 7519 | |
| }, | |
| { | |
| "epoch": 73.01, | |
| "learning_rate": 1.4159223300970875e-05, | |
| "loss": 0.0482, | |
| "step": 7520 | |
| }, | |
| { | |
| "epoch": 73.11, | |
| "learning_rate": 1.4151456310679613e-05, | |
| "loss": 0.0655, | |
| "step": 7530 | |
| }, | |
| { | |
| "epoch": 73.2, | |
| "learning_rate": 1.414368932038835e-05, | |
| "loss": 0.0787, | |
| "step": 7540 | |
| }, | |
| { | |
| "epoch": 73.3, | |
| "learning_rate": 1.413592233009709e-05, | |
| "loss": 0.0483, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 73.4, | |
| "learning_rate": 1.4128155339805827e-05, | |
| "loss": 0.068, | |
| "step": 7560 | |
| }, | |
| { | |
| "epoch": 73.5, | |
| "learning_rate": 1.4120388349514564e-05, | |
| "loss": 0.0395, | |
| "step": 7570 | |
| }, | |
| { | |
| "epoch": 73.59, | |
| "learning_rate": 1.4112621359223302e-05, | |
| "loss": 0.0644, | |
| "step": 7580 | |
| }, | |
| { | |
| "epoch": 73.69, | |
| "learning_rate": 1.410485436893204e-05, | |
| "loss": 0.0126, | |
| "step": 7590 | |
| }, | |
| { | |
| "epoch": 73.79, | |
| "learning_rate": 1.4097087378640777e-05, | |
| "loss": 0.0531, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 73.88, | |
| "learning_rate": 1.4089320388349516e-05, | |
| "loss": 0.0767, | |
| "step": 7610 | |
| }, | |
| { | |
| "epoch": 73.98, | |
| "learning_rate": 1.4081553398058254e-05, | |
| "loss": 0.0083, | |
| "step": 7620 | |
| }, | |
| { | |
| "epoch": 74.0, | |
| "eval_accuracy": 0.9863013698630136, | |
| "eval_loss": 0.052464041858911514, | |
| "eval_runtime": 0.9146, | |
| "eval_samples_per_second": 159.624, | |
| "eval_steps_per_second": 20.773, | |
| "step": 7622 | |
| }, | |
| { | |
| "epoch": 74.08, | |
| "learning_rate": 1.4073786407766991e-05, | |
| "loss": 0.0115, | |
| "step": 7630 | |
| }, | |
| { | |
| "epoch": 74.17, | |
| "learning_rate": 1.4066019417475729e-05, | |
| "loss": 0.0252, | |
| "step": 7640 | |
| }, | |
| { | |
| "epoch": 74.27, | |
| "learning_rate": 1.4058252427184466e-05, | |
| "loss": 0.0033, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 74.37, | |
| "learning_rate": 1.4050485436893204e-05, | |
| "loss": 0.0883, | |
| "step": 7660 | |
| }, | |
| { | |
| "epoch": 74.47, | |
| "learning_rate": 1.4042718446601944e-05, | |
| "loss": 0.0103, | |
| "step": 7670 | |
| }, | |
| { | |
| "epoch": 74.56, | |
| "learning_rate": 1.4034951456310682e-05, | |
| "loss": 0.1416, | |
| "step": 7680 | |
| }, | |
| { | |
| "epoch": 74.66, | |
| "learning_rate": 1.402718446601942e-05, | |
| "loss": 0.0526, | |
| "step": 7690 | |
| }, | |
| { | |
| "epoch": 74.76, | |
| "learning_rate": 1.4019417475728157e-05, | |
| "loss": 0.0104, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 74.85, | |
| "learning_rate": 1.4011650485436894e-05, | |
| "loss": 0.1323, | |
| "step": 7710 | |
| }, | |
| { | |
| "epoch": 74.95, | |
| "learning_rate": 1.4003883495145632e-05, | |
| "loss": 0.0017, | |
| "step": 7720 | |
| }, | |
| { | |
| "epoch": 75.0, | |
| "eval_accuracy": 0.9931506849315068, | |
| "eval_loss": 0.04071873798966408, | |
| "eval_runtime": 0.9191, | |
| "eval_samples_per_second": 158.853, | |
| "eval_steps_per_second": 20.673, | |
| "step": 7725 | |
| }, | |
| { | |
| "epoch": 75.05, | |
| "learning_rate": 1.399611650485437e-05, | |
| "loss": 0.0016, | |
| "step": 7730 | |
| }, | |
| { | |
| "epoch": 75.15, | |
| "learning_rate": 1.3988349514563109e-05, | |
| "loss": 0.0279, | |
| "step": 7740 | |
| }, | |
| { | |
| "epoch": 75.24, | |
| "learning_rate": 1.3980582524271846e-05, | |
| "loss": 0.0353, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 75.34, | |
| "learning_rate": 1.3972815533980584e-05, | |
| "loss": 0.0031, | |
| "step": 7760 | |
| }, | |
| { | |
| "epoch": 75.44, | |
| "learning_rate": 1.3965048543689321e-05, | |
| "loss": 0.0166, | |
| "step": 7770 | |
| }, | |
| { | |
| "epoch": 75.53, | |
| "learning_rate": 1.3957281553398059e-05, | |
| "loss": 0.0515, | |
| "step": 7780 | |
| }, | |
| { | |
| "epoch": 75.63, | |
| "learning_rate": 1.3949514563106796e-05, | |
| "loss": 0.0705, | |
| "step": 7790 | |
| }, | |
| { | |
| "epoch": 75.73, | |
| "learning_rate": 1.3941747572815535e-05, | |
| "loss": 0.1237, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 75.83, | |
| "learning_rate": 1.3933980582524273e-05, | |
| "loss": 0.087, | |
| "step": 7810 | |
| }, | |
| { | |
| "epoch": 75.92, | |
| "learning_rate": 1.392621359223301e-05, | |
| "loss": 0.012, | |
| "step": 7820 | |
| }, | |
| { | |
| "epoch": 76.0, | |
| "eval_accuracy": 0.9931506849315068, | |
| "eval_loss": 0.0362648107111454, | |
| "eval_runtime": 0.9186, | |
| "eval_samples_per_second": 158.943, | |
| "eval_steps_per_second": 20.684, | |
| "step": 7828 | |
| }, | |
| { | |
| "epoch": 76.02, | |
| "learning_rate": 1.391844660194175e-05, | |
| "loss": 0.1469, | |
| "step": 7830 | |
| }, | |
| { | |
| "epoch": 76.12, | |
| "learning_rate": 1.3910679611650487e-05, | |
| "loss": 0.0019, | |
| "step": 7840 | |
| }, | |
| { | |
| "epoch": 76.21, | |
| "learning_rate": 1.3902912621359224e-05, | |
| "loss": 0.2058, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 76.31, | |
| "learning_rate": 1.3895145631067964e-05, | |
| "loss": 0.0152, | |
| "step": 7860 | |
| }, | |
| { | |
| "epoch": 76.41, | |
| "learning_rate": 1.3887378640776701e-05, | |
| "loss": 0.0499, | |
| "step": 7870 | |
| }, | |
| { | |
| "epoch": 76.5, | |
| "learning_rate": 1.3879611650485439e-05, | |
| "loss": 0.0561, | |
| "step": 7880 | |
| }, | |
| { | |
| "epoch": 76.6, | |
| "learning_rate": 1.3871844660194176e-05, | |
| "loss": 0.1247, | |
| "step": 7890 | |
| }, | |
| { | |
| "epoch": 76.7, | |
| "learning_rate": 1.3864077669902914e-05, | |
| "loss": 0.0401, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 76.8, | |
| "learning_rate": 1.3856310679611651e-05, | |
| "loss": 0.0112, | |
| "step": 7910 | |
| }, | |
| { | |
| "epoch": 76.89, | |
| "learning_rate": 1.3848543689320389e-05, | |
| "loss": 0.0092, | |
| "step": 7920 | |
| }, | |
| { | |
| "epoch": 76.99, | |
| "learning_rate": 1.3840776699029128e-05, | |
| "loss": 0.0215, | |
| "step": 7930 | |
| }, | |
| { | |
| "epoch": 77.0, | |
| "eval_accuracy": 0.9931506849315068, | |
| "eval_loss": 0.030757028609514236, | |
| "eval_runtime": 0.9146, | |
| "eval_samples_per_second": 159.637, | |
| "eval_steps_per_second": 20.775, | |
| "step": 7931 | |
| }, | |
| { | |
| "epoch": 77.09, | |
| "learning_rate": 1.3833009708737865e-05, | |
| "loss": 0.1653, | |
| "step": 7940 | |
| }, | |
| { | |
| "epoch": 77.18, | |
| "learning_rate": 1.3825242718446603e-05, | |
| "loss": 0.0577, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 77.28, | |
| "learning_rate": 1.381747572815534e-05, | |
| "loss": 0.0645, | |
| "step": 7960 | |
| }, | |
| { | |
| "epoch": 77.38, | |
| "learning_rate": 1.3809708737864078e-05, | |
| "loss": 0.0362, | |
| "step": 7970 | |
| }, | |
| { | |
| "epoch": 77.48, | |
| "learning_rate": 1.3801941747572815e-05, | |
| "loss": 0.0155, | |
| "step": 7980 | |
| }, | |
| { | |
| "epoch": 77.57, | |
| "learning_rate": 1.3794174757281556e-05, | |
| "loss": 0.0902, | |
| "step": 7990 | |
| }, | |
| { | |
| "epoch": 77.67, | |
| "learning_rate": 1.3786407766990294e-05, | |
| "loss": 0.059, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 77.77, | |
| "learning_rate": 1.3778640776699031e-05, | |
| "loss": 0.0036, | |
| "step": 8010 | |
| }, | |
| { | |
| "epoch": 77.86, | |
| "learning_rate": 1.3770873786407769e-05, | |
| "loss": 0.0471, | |
| "step": 8020 | |
| }, | |
| { | |
| "epoch": 77.96, | |
| "learning_rate": 1.3763106796116506e-05, | |
| "loss": 0.0319, | |
| "step": 8030 | |
| }, | |
| { | |
| "epoch": 78.0, | |
| "eval_accuracy": 0.9726027397260274, | |
| "eval_loss": 0.05245841667056084, | |
| "eval_runtime": 0.9259, | |
| "eval_samples_per_second": 157.685, | |
| "eval_steps_per_second": 20.521, | |
| "step": 8034 | |
| }, | |
| { | |
| "epoch": 78.06, | |
| "learning_rate": 1.3755339805825244e-05, | |
| "loss": 0.0681, | |
| "step": 8040 | |
| }, | |
| { | |
| "epoch": 78.16, | |
| "learning_rate": 1.3747572815533983e-05, | |
| "loss": 0.0086, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 78.25, | |
| "learning_rate": 1.373980582524272e-05, | |
| "loss": 0.0723, | |
| "step": 8060 | |
| }, | |
| { | |
| "epoch": 78.35, | |
| "learning_rate": 1.3732038834951458e-05, | |
| "loss": 0.1384, | |
| "step": 8070 | |
| }, | |
| { | |
| "epoch": 78.45, | |
| "learning_rate": 1.3724271844660195e-05, | |
| "loss": 0.1407, | |
| "step": 8080 | |
| }, | |
| { | |
| "epoch": 78.54, | |
| "learning_rate": 1.3716504854368933e-05, | |
| "loss": 0.007, | |
| "step": 8090 | |
| }, | |
| { | |
| "epoch": 78.64, | |
| "learning_rate": 1.370873786407767e-05, | |
| "loss": 0.0528, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 78.74, | |
| "learning_rate": 1.370097087378641e-05, | |
| "loss": 0.0091, | |
| "step": 8110 | |
| }, | |
| { | |
| "epoch": 78.83, | |
| "learning_rate": 1.3693203883495147e-05, | |
| "loss": 0.033, | |
| "step": 8120 | |
| }, | |
| { | |
| "epoch": 78.93, | |
| "learning_rate": 1.3685436893203884e-05, | |
| "loss": 0.093, | |
| "step": 8130 | |
| }, | |
| { | |
| "epoch": 79.0, | |
| "eval_accuracy": 0.9931506849315068, | |
| "eval_loss": 0.03234969452023506, | |
| "eval_runtime": 0.8975, | |
| "eval_samples_per_second": 162.673, | |
| "eval_steps_per_second": 21.17, | |
| "step": 8137 | |
| }, | |
| { | |
| "epoch": 79.03, | |
| "learning_rate": 1.3677669902912622e-05, | |
| "loss": 0.0629, | |
| "step": 8140 | |
| }, | |
| { | |
| "epoch": 79.13, | |
| "learning_rate": 1.366990291262136e-05, | |
| "loss": 0.0159, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 79.22, | |
| "learning_rate": 1.3662135922330097e-05, | |
| "loss": 0.011, | |
| "step": 8160 | |
| }, | |
| { | |
| "epoch": 79.32, | |
| "learning_rate": 1.3654368932038834e-05, | |
| "loss": 0.1088, | |
| "step": 8170 | |
| }, | |
| { | |
| "epoch": 79.42, | |
| "learning_rate": 1.3646601941747575e-05, | |
| "loss": 0.1116, | |
| "step": 8180 | |
| }, | |
| { | |
| "epoch": 79.51, | |
| "learning_rate": 1.3638834951456313e-05, | |
| "loss": 0.0462, | |
| "step": 8190 | |
| }, | |
| { | |
| "epoch": 79.61, | |
| "learning_rate": 1.363106796116505e-05, | |
| "loss": 0.0378, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 79.71, | |
| "learning_rate": 1.3623300970873788e-05, | |
| "loss": 0.0016, | |
| "step": 8210 | |
| }, | |
| { | |
| "epoch": 79.81, | |
| "learning_rate": 1.3615533980582525e-05, | |
| "loss": 0.0658, | |
| "step": 8220 | |
| }, | |
| { | |
| "epoch": 79.9, | |
| "learning_rate": 1.3607766990291263e-05, | |
| "loss": 0.1224, | |
| "step": 8230 | |
| }, | |
| { | |
| "epoch": 80.0, | |
| "learning_rate": 1.3600000000000002e-05, | |
| "loss": 0.0813, | |
| "step": 8240 | |
| }, | |
| { | |
| "epoch": 80.0, | |
| "eval_accuracy": 0.9931506849315068, | |
| "eval_loss": 0.04363853111863136, | |
| "eval_runtime": 0.9212, | |
| "eval_samples_per_second": 158.487, | |
| "eval_steps_per_second": 20.625, | |
| "step": 8240 | |
| }, | |
| { | |
| "epoch": 80.1, | |
| "learning_rate": 1.359223300970874e-05, | |
| "loss": 0.1413, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 80.19, | |
| "learning_rate": 1.3584466019417477e-05, | |
| "loss": 0.0308, | |
| "step": 8260 | |
| }, | |
| { | |
| "epoch": 80.29, | |
| "learning_rate": 1.3576699029126214e-05, | |
| "loss": 0.0483, | |
| "step": 8270 | |
| }, | |
| { | |
| "epoch": 80.39, | |
| "learning_rate": 1.3568932038834952e-05, | |
| "loss": 0.0162, | |
| "step": 8280 | |
| }, | |
| { | |
| "epoch": 80.49, | |
| "learning_rate": 1.356116504854369e-05, | |
| "loss": 0.1146, | |
| "step": 8290 | |
| }, | |
| { | |
| "epoch": 80.58, | |
| "learning_rate": 1.3553398058252429e-05, | |
| "loss": 0.0095, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 80.68, | |
| "learning_rate": 1.3545631067961166e-05, | |
| "loss": 0.0334, | |
| "step": 8310 | |
| }, | |
| { | |
| "epoch": 80.78, | |
| "learning_rate": 1.3537864077669904e-05, | |
| "loss": 0.0178, | |
| "step": 8320 | |
| }, | |
| { | |
| "epoch": 80.87, | |
| "learning_rate": 1.3530097087378641e-05, | |
| "loss": 0.0963, | |
| "step": 8330 | |
| }, | |
| { | |
| "epoch": 80.97, | |
| "learning_rate": 1.352233009708738e-05, | |
| "loss": 0.0014, | |
| "step": 8340 | |
| }, | |
| { | |
| "epoch": 81.0, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.0034924051724374294, | |
| "eval_runtime": 0.9228, | |
| "eval_samples_per_second": 158.21, | |
| "eval_steps_per_second": 20.589, | |
| "step": 8343 | |
| }, | |
| { | |
| "epoch": 81.07, | |
| "learning_rate": 1.3514563106796118e-05, | |
| "loss": 0.0019, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 81.17, | |
| "learning_rate": 1.3506796116504855e-05, | |
| "loss": 0.0013, | |
| "step": 8360 | |
| }, | |
| { | |
| "epoch": 81.26, | |
| "learning_rate": 1.3499029126213594e-05, | |
| "loss": 0.0016, | |
| "step": 8370 | |
| }, | |
| { | |
| "epoch": 81.36, | |
| "learning_rate": 1.3491262135922332e-05, | |
| "loss": 0.0363, | |
| "step": 8380 | |
| }, | |
| { | |
| "epoch": 81.46, | |
| "learning_rate": 1.348349514563107e-05, | |
| "loss": 0.0769, | |
| "step": 8390 | |
| }, | |
| { | |
| "epoch": 81.55, | |
| "learning_rate": 1.3475728155339807e-05, | |
| "loss": 0.0014, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 81.65, | |
| "learning_rate": 1.3467961165048544e-05, | |
| "loss": 0.0019, | |
| "step": 8410 | |
| }, | |
| { | |
| "epoch": 81.75, | |
| "learning_rate": 1.3460194174757282e-05, | |
| "loss": 0.0999, | |
| "step": 8420 | |
| }, | |
| { | |
| "epoch": 81.84, | |
| "learning_rate": 1.3452427184466021e-05, | |
| "loss": 0.0025, | |
| "step": 8430 | |
| }, | |
| { | |
| "epoch": 81.94, | |
| "learning_rate": 1.3444660194174759e-05, | |
| "loss": 0.0774, | |
| "step": 8440 | |
| }, | |
| { | |
| "epoch": 82.0, | |
| "eval_accuracy": 0.9726027397260274, | |
| "eval_loss": 0.10503670573234558, | |
| "eval_runtime": 0.9067, | |
| "eval_samples_per_second": 161.03, | |
| "eval_steps_per_second": 20.956, | |
| "step": 8446 | |
| }, | |
| { | |
| "epoch": 82.04, | |
| "learning_rate": 1.3436893203883496e-05, | |
| "loss": 0.0666, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 82.14, | |
| "learning_rate": 1.3429126213592234e-05, | |
| "loss": 0.0457, | |
| "step": 8460 | |
| }, | |
| { | |
| "epoch": 82.23, | |
| "learning_rate": 1.3421359223300971e-05, | |
| "loss": 0.0802, | |
| "step": 8470 | |
| }, | |
| { | |
| "epoch": 82.33, | |
| "learning_rate": 1.3413592233009709e-05, | |
| "loss": 0.0304, | |
| "step": 8480 | |
| }, | |
| { | |
| "epoch": 82.43, | |
| "learning_rate": 1.340582524271845e-05, | |
| "loss": 0.0012, | |
| "step": 8490 | |
| }, | |
| { | |
| "epoch": 82.52, | |
| "learning_rate": 1.3398058252427187e-05, | |
| "loss": 0.0039, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 82.62, | |
| "learning_rate": 1.3390291262135924e-05, | |
| "loss": 0.0734, | |
| "step": 8510 | |
| }, | |
| { | |
| "epoch": 82.72, | |
| "learning_rate": 1.3382524271844662e-05, | |
| "loss": 0.0466, | |
| "step": 8520 | |
| }, | |
| { | |
| "epoch": 82.82, | |
| "learning_rate": 1.33747572815534e-05, | |
| "loss": 0.0051, | |
| "step": 8530 | |
| }, | |
| { | |
| "epoch": 82.91, | |
| "learning_rate": 1.3366990291262137e-05, | |
| "loss": 0.0393, | |
| "step": 8540 | |
| }, | |
| { | |
| "epoch": 83.0, | |
| "eval_accuracy": 0.9794520547945206, | |
| "eval_loss": 0.08058138936758041, | |
| "eval_runtime": 0.8979, | |
| "eval_samples_per_second": 162.604, | |
| "eval_steps_per_second": 21.161, | |
| "step": 8549 | |
| }, | |
| { | |
| "epoch": 83.01, | |
| "learning_rate": 1.3359223300970874e-05, | |
| "loss": 0.1685, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 83.11, | |
| "learning_rate": 1.3351456310679614e-05, | |
| "loss": 0.0016, | |
| "step": 8560 | |
| }, | |
| { | |
| "epoch": 83.2, | |
| "learning_rate": 1.3343689320388351e-05, | |
| "loss": 0.0839, | |
| "step": 8570 | |
| }, | |
| { | |
| "epoch": 83.3, | |
| "learning_rate": 1.3335922330097089e-05, | |
| "loss": 0.0922, | |
| "step": 8580 | |
| }, | |
| { | |
| "epoch": 83.4, | |
| "learning_rate": 1.3328155339805826e-05, | |
| "loss": 0.0146, | |
| "step": 8590 | |
| }, | |
| { | |
| "epoch": 83.5, | |
| "learning_rate": 1.3320388349514564e-05, | |
| "loss": 0.0014, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 83.59, | |
| "learning_rate": 1.3312621359223301e-05, | |
| "loss": 0.0269, | |
| "step": 8610 | |
| }, | |
| { | |
| "epoch": 83.69, | |
| "learning_rate": 1.330485436893204e-05, | |
| "loss": 0.0333, | |
| "step": 8620 | |
| }, | |
| { | |
| "epoch": 83.79, | |
| "learning_rate": 1.3297087378640778e-05, | |
| "loss": 0.0037, | |
| "step": 8630 | |
| }, | |
| { | |
| "epoch": 83.88, | |
| "learning_rate": 1.3289320388349515e-05, | |
| "loss": 0.0935, | |
| "step": 8640 | |
| }, | |
| { | |
| "epoch": 83.98, | |
| "learning_rate": 1.3281553398058253e-05, | |
| "loss": 0.0537, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 84.0, | |
| "eval_accuracy": 0.9794520547945206, | |
| "eval_loss": 0.07363705337047577, | |
| "eval_runtime": 0.8825, | |
| "eval_samples_per_second": 165.435, | |
| "eval_steps_per_second": 21.529, | |
| "step": 8652 | |
| }, | |
| { | |
| "epoch": 84.08, | |
| "learning_rate": 1.327378640776699e-05, | |
| "loss": 0.1431, | |
| "step": 8660 | |
| }, | |
| { | |
| "epoch": 84.17, | |
| "learning_rate": 1.3266019417475728e-05, | |
| "loss": 0.1869, | |
| "step": 8670 | |
| }, | |
| { | |
| "epoch": 84.27, | |
| "learning_rate": 1.3258252427184469e-05, | |
| "loss": 0.041, | |
| "step": 8680 | |
| }, | |
| { | |
| "epoch": 84.37, | |
| "learning_rate": 1.3250485436893206e-05, | |
| "loss": 0.0108, | |
| "step": 8690 | |
| }, | |
| { | |
| "epoch": 84.47, | |
| "learning_rate": 1.3242718446601944e-05, | |
| "loss": 0.0105, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 84.56, | |
| "learning_rate": 1.3234951456310681e-05, | |
| "loss": 0.0507, | |
| "step": 8710 | |
| }, | |
| { | |
| "epoch": 84.66, | |
| "learning_rate": 1.3227184466019419e-05, | |
| "loss": 0.0333, | |
| "step": 8720 | |
| }, | |
| { | |
| "epoch": 84.76, | |
| "learning_rate": 1.3219417475728156e-05, | |
| "loss": 0.0421, | |
| "step": 8730 | |
| }, | |
| { | |
| "epoch": 84.85, | |
| "learning_rate": 1.3211650485436894e-05, | |
| "loss": 0.0567, | |
| "step": 8740 | |
| }, | |
| { | |
| "epoch": 84.95, | |
| "learning_rate": 1.3203883495145633e-05, | |
| "loss": 0.016, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 85.0, | |
| "eval_accuracy": 0.9863013698630136, | |
| "eval_loss": 0.06812305748462677, | |
| "eval_runtime": 0.9225, | |
| "eval_samples_per_second": 158.264, | |
| "eval_steps_per_second": 20.596, | |
| "step": 8755 | |
| }, | |
| { | |
| "epoch": 85.05, | |
| "learning_rate": 1.319611650485437e-05, | |
| "loss": 0.0198, | |
| "step": 8760 | |
| }, | |
| { | |
| "epoch": 85.15, | |
| "learning_rate": 1.3188349514563108e-05, | |
| "loss": 0.1216, | |
| "step": 8770 | |
| }, | |
| { | |
| "epoch": 85.24, | |
| "learning_rate": 1.3180582524271845e-05, | |
| "loss": 0.0408, | |
| "step": 8780 | |
| }, | |
| { | |
| "epoch": 85.34, | |
| "learning_rate": 1.3172815533980583e-05, | |
| "loss": 0.0033, | |
| "step": 8790 | |
| }, | |
| { | |
| "epoch": 85.44, | |
| "learning_rate": 1.316504854368932e-05, | |
| "loss": 0.0379, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 85.53, | |
| "learning_rate": 1.315728155339806e-05, | |
| "loss": 0.0153, | |
| "step": 8810 | |
| }, | |
| { | |
| "epoch": 85.63, | |
| "learning_rate": 1.3149514563106797e-05, | |
| "loss": 0.0569, | |
| "step": 8820 | |
| }, | |
| { | |
| "epoch": 85.73, | |
| "learning_rate": 1.3141747572815534e-05, | |
| "loss": 0.0773, | |
| "step": 8830 | |
| }, | |
| { | |
| "epoch": 85.83, | |
| "learning_rate": 1.3133980582524274e-05, | |
| "loss": 0.0036, | |
| "step": 8840 | |
| }, | |
| { | |
| "epoch": 85.92, | |
| "learning_rate": 1.3126213592233011e-05, | |
| "loss": 0.0562, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 86.0, | |
| "eval_accuracy": 0.9726027397260274, | |
| "eval_loss": 0.13558551669120789, | |
| "eval_runtime": 0.9401, | |
| "eval_samples_per_second": 155.297, | |
| "eval_steps_per_second": 20.21, | |
| "step": 8858 | |
| }, | |
| { | |
| "epoch": 86.02, | |
| "learning_rate": 1.3118446601941749e-05, | |
| "loss": 0.0014, | |
| "step": 8860 | |
| }, | |
| { | |
| "epoch": 86.12, | |
| "learning_rate": 1.3110679611650488e-05, | |
| "loss": 0.0025, | |
| "step": 8870 | |
| }, | |
| { | |
| "epoch": 86.21, | |
| "learning_rate": 1.3102912621359225e-05, | |
| "loss": 0.0016, | |
| "step": 8880 | |
| }, | |
| { | |
| "epoch": 86.31, | |
| "learning_rate": 1.3095145631067963e-05, | |
| "loss": 0.0182, | |
| "step": 8890 | |
| }, | |
| { | |
| "epoch": 86.41, | |
| "learning_rate": 1.30873786407767e-05, | |
| "loss": 0.0062, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 86.5, | |
| "learning_rate": 1.3079611650485438e-05, | |
| "loss": 0.0416, | |
| "step": 8910 | |
| }, | |
| { | |
| "epoch": 86.6, | |
| "learning_rate": 1.3071844660194175e-05, | |
| "loss": 0.0296, | |
| "step": 8920 | |
| }, | |
| { | |
| "epoch": 86.7, | |
| "learning_rate": 1.3064077669902913e-05, | |
| "loss": 0.1365, | |
| "step": 8930 | |
| }, | |
| { | |
| "epoch": 86.8, | |
| "learning_rate": 1.3056310679611652e-05, | |
| "loss": 0.0012, | |
| "step": 8940 | |
| }, | |
| { | |
| "epoch": 86.89, | |
| "learning_rate": 1.304854368932039e-05, | |
| "loss": 0.1132, | |
| "step": 8950 | |
| }, | |
| { | |
| "epoch": 86.99, | |
| "learning_rate": 1.3040776699029127e-05, | |
| "loss": 0.0133, | |
| "step": 8960 | |
| }, | |
| { | |
| "epoch": 87.0, | |
| "eval_accuracy": 0.9794520547945206, | |
| "eval_loss": 0.09805306047201157, | |
| "eval_runtime": 0.9311, | |
| "eval_samples_per_second": 156.804, | |
| "eval_steps_per_second": 20.406, | |
| "step": 8961 | |
| }, | |
| { | |
| "epoch": 87.09, | |
| "learning_rate": 1.3033009708737864e-05, | |
| "loss": 0.0237, | |
| "step": 8970 | |
| }, | |
| { | |
| "epoch": 87.18, | |
| "learning_rate": 1.3025242718446602e-05, | |
| "loss": 0.0895, | |
| "step": 8980 | |
| }, | |
| { | |
| "epoch": 87.28, | |
| "learning_rate": 1.301747572815534e-05, | |
| "loss": 0.043, | |
| "step": 8990 | |
| }, | |
| { | |
| "epoch": 87.38, | |
| "learning_rate": 1.300970873786408e-05, | |
| "loss": 0.0013, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 87.48, | |
| "learning_rate": 1.3001941747572818e-05, | |
| "loss": 0.1008, | |
| "step": 9010 | |
| }, | |
| { | |
| "epoch": 87.57, | |
| "learning_rate": 1.2994174757281555e-05, | |
| "loss": 0.1435, | |
| "step": 9020 | |
| }, | |
| { | |
| "epoch": 87.67, | |
| "learning_rate": 1.2986407766990293e-05, | |
| "loss": 0.0573, | |
| "step": 9030 | |
| }, | |
| { | |
| "epoch": 87.77, | |
| "learning_rate": 1.297864077669903e-05, | |
| "loss": 0.092, | |
| "step": 9040 | |
| }, | |
| { | |
| "epoch": 87.86, | |
| "learning_rate": 1.2970873786407768e-05, | |
| "loss": 0.1068, | |
| "step": 9050 | |
| }, | |
| { | |
| "epoch": 87.96, | |
| "learning_rate": 1.2963106796116507e-05, | |
| "loss": 0.0682, | |
| "step": 9060 | |
| }, | |
| { | |
| "epoch": 88.0, | |
| "eval_accuracy": 0.9863013698630136, | |
| "eval_loss": 0.07209344208240509, | |
| "eval_runtime": 0.9453, | |
| "eval_samples_per_second": 154.455, | |
| "eval_steps_per_second": 20.1, | |
| "step": 9064 | |
| }, | |
| { | |
| "epoch": 88.06, | |
| "learning_rate": 1.2955339805825244e-05, | |
| "loss": 0.0018, | |
| "step": 9070 | |
| }, | |
| { | |
| "epoch": 88.16, | |
| "learning_rate": 1.2947572815533982e-05, | |
| "loss": 0.1112, | |
| "step": 9080 | |
| }, | |
| { | |
| "epoch": 88.25, | |
| "learning_rate": 1.293980582524272e-05, | |
| "loss": 0.0071, | |
| "step": 9090 | |
| }, | |
| { | |
| "epoch": 88.35, | |
| "learning_rate": 1.2932038834951457e-05, | |
| "loss": 0.0061, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 88.45, | |
| "learning_rate": 1.2924271844660194e-05, | |
| "loss": 0.1292, | |
| "step": 9110 | |
| }, | |
| { | |
| "epoch": 88.54, | |
| "learning_rate": 1.2916504854368934e-05, | |
| "loss": 0.0175, | |
| "step": 9120 | |
| }, | |
| { | |
| "epoch": 88.64, | |
| "learning_rate": 1.2908737864077671e-05, | |
| "loss": 0.04, | |
| "step": 9130 | |
| }, | |
| { | |
| "epoch": 88.74, | |
| "learning_rate": 1.2900970873786409e-05, | |
| "loss": 0.0015, | |
| "step": 9140 | |
| }, | |
| { | |
| "epoch": 88.83, | |
| "learning_rate": 1.2893203883495146e-05, | |
| "loss": 0.0422, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 88.93, | |
| "learning_rate": 1.2885436893203884e-05, | |
| "loss": 0.0514, | |
| "step": 9160 | |
| }, | |
| { | |
| "epoch": 89.0, | |
| "eval_accuracy": 0.9863013698630136, | |
| "eval_loss": 0.060059912502765656, | |
| "eval_runtime": 0.9117, | |
| "eval_samples_per_second": 160.14, | |
| "eval_steps_per_second": 20.84, | |
| "step": 9167 | |
| }, | |
| { | |
| "epoch": 89.03, | |
| "learning_rate": 1.2877669902912621e-05, | |
| "loss": 0.0129, | |
| "step": 9170 | |
| }, | |
| { | |
| "epoch": 89.13, | |
| "learning_rate": 1.2869902912621359e-05, | |
| "loss": 0.0148, | |
| "step": 9180 | |
| }, | |
| { | |
| "epoch": 89.22, | |
| "learning_rate": 1.28621359223301e-05, | |
| "loss": 0.0098, | |
| "step": 9190 | |
| }, | |
| { | |
| "epoch": 89.32, | |
| "learning_rate": 1.2854368932038837e-05, | |
| "loss": 0.0552, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 89.42, | |
| "learning_rate": 1.2846601941747574e-05, | |
| "loss": 0.0969, | |
| "step": 9210 | |
| }, | |
| { | |
| "epoch": 89.51, | |
| "learning_rate": 1.2838834951456312e-05, | |
| "loss": 0.0018, | |
| "step": 9220 | |
| }, | |
| { | |
| "epoch": 89.61, | |
| "learning_rate": 1.283106796116505e-05, | |
| "loss": 0.0014, | |
| "step": 9230 | |
| }, | |
| { | |
| "epoch": 89.71, | |
| "learning_rate": 1.2823300970873787e-05, | |
| "loss": 0.0012, | |
| "step": 9240 | |
| }, | |
| { | |
| "epoch": 89.81, | |
| "learning_rate": 1.2815533980582526e-05, | |
| "loss": 0.0018, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 89.9, | |
| "learning_rate": 1.2807766990291264e-05, | |
| "loss": 0.0017, | |
| "step": 9260 | |
| }, | |
| { | |
| "epoch": 90.0, | |
| "learning_rate": 1.2800000000000001e-05, | |
| "loss": 0.0043, | |
| "step": 9270 | |
| }, | |
| { | |
| "epoch": 90.0, | |
| "eval_accuracy": 0.9657534246575342, | |
| "eval_loss": 0.16303785145282745, | |
| "eval_runtime": 0.8933, | |
| "eval_samples_per_second": 163.434, | |
| "eval_steps_per_second": 21.269, | |
| "step": 9270 | |
| }, | |
| { | |
| "epoch": 90.1, | |
| "learning_rate": 1.2792233009708739e-05, | |
| "loss": 0.1021, | |
| "step": 9280 | |
| }, | |
| { | |
| "epoch": 90.19, | |
| "learning_rate": 1.2784466019417476e-05, | |
| "loss": 0.0296, | |
| "step": 9290 | |
| }, | |
| { | |
| "epoch": 90.29, | |
| "learning_rate": 1.2776699029126214e-05, | |
| "loss": 0.0811, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 90.39, | |
| "learning_rate": 1.2768932038834953e-05, | |
| "loss": 0.0438, | |
| "step": 9310 | |
| }, | |
| { | |
| "epoch": 90.49, | |
| "learning_rate": 1.276116504854369e-05, | |
| "loss": 0.0245, | |
| "step": 9320 | |
| }, | |
| { | |
| "epoch": 90.58, | |
| "learning_rate": 1.2753398058252428e-05, | |
| "loss": 0.0102, | |
| "step": 9330 | |
| }, | |
| { | |
| "epoch": 90.68, | |
| "learning_rate": 1.2745631067961165e-05, | |
| "loss": 0.0845, | |
| "step": 9340 | |
| }, | |
| { | |
| "epoch": 90.78, | |
| "learning_rate": 1.2737864077669904e-05, | |
| "loss": 0.0013, | |
| "step": 9350 | |
| }, | |
| { | |
| "epoch": 90.87, | |
| "learning_rate": 1.2730097087378642e-05, | |
| "loss": 0.0722, | |
| "step": 9360 | |
| }, | |
| { | |
| "epoch": 90.97, | |
| "learning_rate": 1.272233009708738e-05, | |
| "loss": 0.0011, | |
| "step": 9370 | |
| }, | |
| { | |
| "epoch": 91.0, | |
| "eval_accuracy": 0.9863013698630136, | |
| "eval_loss": 0.029909975826740265, | |
| "eval_runtime": 0.8937, | |
| "eval_samples_per_second": 163.37, | |
| "eval_steps_per_second": 21.26, | |
| "step": 9373 | |
| }, | |
| { | |
| "epoch": 91.07, | |
| "learning_rate": 1.2714563106796119e-05, | |
| "loss": 0.0259, | |
| "step": 9380 | |
| }, | |
| { | |
| "epoch": 91.17, | |
| "learning_rate": 1.2706796116504856e-05, | |
| "loss": 0.0221, | |
| "step": 9390 | |
| }, | |
| { | |
| "epoch": 91.26, | |
| "learning_rate": 1.2699029126213594e-05, | |
| "loss": 0.1292, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 91.36, | |
| "learning_rate": 1.2691262135922331e-05, | |
| "loss": 0.0398, | |
| "step": 9410 | |
| }, | |
| { | |
| "epoch": 91.46, | |
| "learning_rate": 1.2683495145631069e-05, | |
| "loss": 0.0971, | |
| "step": 9420 | |
| }, | |
| { | |
| "epoch": 91.55, | |
| "learning_rate": 1.2675728155339806e-05, | |
| "loss": 0.0973, | |
| "step": 9430 | |
| }, | |
| { | |
| "epoch": 91.65, | |
| "learning_rate": 1.2667961165048545e-05, | |
| "loss": 0.0636, | |
| "step": 9440 | |
| }, | |
| { | |
| "epoch": 91.75, | |
| "learning_rate": 1.2660194174757283e-05, | |
| "loss": 0.0216, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 91.84, | |
| "learning_rate": 1.265242718446602e-05, | |
| "loss": 0.1983, | |
| "step": 9460 | |
| }, | |
| { | |
| "epoch": 91.94, | |
| "learning_rate": 1.2644660194174758e-05, | |
| "loss": 0.0707, | |
| "step": 9470 | |
| }, | |
| { | |
| "epoch": 92.0, | |
| "eval_accuracy": 0.9657534246575342, | |
| "eval_loss": 0.100075863301754, | |
| "eval_runtime": 0.8859, | |
| "eval_samples_per_second": 164.8, | |
| "eval_steps_per_second": 21.447, | |
| "step": 9476 | |
| }, | |
| { | |
| "epoch": 92.04, | |
| "learning_rate": 1.2636893203883495e-05, | |
| "loss": 0.0351, | |
| "step": 9480 | |
| }, | |
| { | |
| "epoch": 92.14, | |
| "learning_rate": 1.2629126213592233e-05, | |
| "loss": 0.0242, | |
| "step": 9490 | |
| }, | |
| { | |
| "epoch": 92.23, | |
| "learning_rate": 1.2621359223300974e-05, | |
| "loss": 0.0617, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 92.33, | |
| "learning_rate": 1.2613592233009711e-05, | |
| "loss": 0.1866, | |
| "step": 9510 | |
| }, | |
| { | |
| "epoch": 92.43, | |
| "learning_rate": 1.2605825242718449e-05, | |
| "loss": 0.0045, | |
| "step": 9520 | |
| }, | |
| { | |
| "epoch": 92.52, | |
| "learning_rate": 1.2598058252427186e-05, | |
| "loss": 0.1162, | |
| "step": 9530 | |
| }, | |
| { | |
| "epoch": 92.62, | |
| "learning_rate": 1.2590291262135924e-05, | |
| "loss": 0.034, | |
| "step": 9540 | |
| }, | |
| { | |
| "epoch": 92.72, | |
| "learning_rate": 1.2582524271844661e-05, | |
| "loss": 0.1544, | |
| "step": 9550 | |
| }, | |
| { | |
| "epoch": 92.82, | |
| "learning_rate": 1.2574757281553399e-05, | |
| "loss": 0.0101, | |
| "step": 9560 | |
| }, | |
| { | |
| "epoch": 92.91, | |
| "learning_rate": 1.2566990291262138e-05, | |
| "loss": 0.0026, | |
| "step": 9570 | |
| }, | |
| { | |
| "epoch": 93.0, | |
| "eval_accuracy": 0.9931506849315068, | |
| "eval_loss": 0.014391203410923481, | |
| "eval_runtime": 0.8967, | |
| "eval_samples_per_second": 162.823, | |
| "eval_steps_per_second": 21.189, | |
| "step": 9579 | |
| }, | |
| { | |
| "epoch": 93.01, | |
| "learning_rate": 1.2559223300970875e-05, | |
| "loss": 0.0848, | |
| "step": 9580 | |
| }, | |
| { | |
| "epoch": 93.11, | |
| "learning_rate": 1.2551456310679613e-05, | |
| "loss": 0.0198, | |
| "step": 9590 | |
| }, | |
| { | |
| "epoch": 93.2, | |
| "learning_rate": 1.254368932038835e-05, | |
| "loss": 0.033, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 93.3, | |
| "learning_rate": 1.2535922330097088e-05, | |
| "loss": 0.0032, | |
| "step": 9610 | |
| }, | |
| { | |
| "epoch": 93.4, | |
| "learning_rate": 1.2528155339805825e-05, | |
| "loss": 0.1336, | |
| "step": 9620 | |
| }, | |
| { | |
| "epoch": 93.5, | |
| "learning_rate": 1.2520388349514564e-05, | |
| "loss": 0.0281, | |
| "step": 9630 | |
| }, | |
| { | |
| "epoch": 93.59, | |
| "learning_rate": 1.2512621359223302e-05, | |
| "loss": 0.0436, | |
| "step": 9640 | |
| }, | |
| { | |
| "epoch": 93.69, | |
| "learning_rate": 1.250485436893204e-05, | |
| "loss": 0.193, | |
| "step": 9650 | |
| }, | |
| { | |
| "epoch": 93.79, | |
| "learning_rate": 1.2497087378640777e-05, | |
| "loss": 0.0018, | |
| "step": 9660 | |
| }, | |
| { | |
| "epoch": 93.88, | |
| "learning_rate": 1.2489320388349514e-05, | |
| "loss": 0.0509, | |
| "step": 9670 | |
| }, | |
| { | |
| "epoch": 93.98, | |
| "learning_rate": 1.2481553398058252e-05, | |
| "loss": 0.1578, | |
| "step": 9680 | |
| }, | |
| { | |
| "epoch": 94.0, | |
| "eval_accuracy": 0.9931506849315068, | |
| "eval_loss": 0.012599923647940159, | |
| "eval_runtime": 0.8826, | |
| "eval_samples_per_second": 165.42, | |
| "eval_steps_per_second": 21.527, | |
| "step": 9682 | |
| }, | |
| { | |
| "epoch": 94.08, | |
| "learning_rate": 1.2473786407766993e-05, | |
| "loss": 0.0837, | |
| "step": 9690 | |
| }, | |
| { | |
| "epoch": 94.17, | |
| "learning_rate": 1.246601941747573e-05, | |
| "loss": 0.0786, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 94.27, | |
| "learning_rate": 1.2458252427184468e-05, | |
| "loss": 0.0012, | |
| "step": 9710 | |
| }, | |
| { | |
| "epoch": 94.37, | |
| "learning_rate": 1.2450485436893205e-05, | |
| "loss": 0.0656, | |
| "step": 9720 | |
| }, | |
| { | |
| "epoch": 94.47, | |
| "learning_rate": 1.2442718446601943e-05, | |
| "loss": 0.022, | |
| "step": 9730 | |
| }, | |
| { | |
| "epoch": 94.56, | |
| "learning_rate": 1.243495145631068e-05, | |
| "loss": 0.1272, | |
| "step": 9740 | |
| }, | |
| { | |
| "epoch": 94.66, | |
| "learning_rate": 1.2427184466019418e-05, | |
| "loss": 0.0013, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 94.76, | |
| "learning_rate": 1.2419417475728157e-05, | |
| "loss": 0.0519, | |
| "step": 9760 | |
| }, | |
| { | |
| "epoch": 94.85, | |
| "learning_rate": 1.2411650485436894e-05, | |
| "loss": 0.084, | |
| "step": 9770 | |
| }, | |
| { | |
| "epoch": 94.95, | |
| "learning_rate": 1.2403883495145632e-05, | |
| "loss": 0.0431, | |
| "step": 9780 | |
| }, | |
| { | |
| "epoch": 95.0, | |
| "eval_accuracy": 0.9931506849315068, | |
| "eval_loss": 0.04091314598917961, | |
| "eval_runtime": 0.8882, | |
| "eval_samples_per_second": 164.379, | |
| "eval_steps_per_second": 21.392, | |
| "step": 9785 | |
| }, | |
| { | |
| "epoch": 95.05, | |
| "learning_rate": 1.239611650485437e-05, | |
| "loss": 0.1725, | |
| "step": 9790 | |
| }, | |
| { | |
| "epoch": 95.15, | |
| "learning_rate": 1.2388349514563107e-05, | |
| "loss": 0.0395, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 95.24, | |
| "learning_rate": 1.2380582524271844e-05, | |
| "loss": 0.1174, | |
| "step": 9810 | |
| }, | |
| { | |
| "epoch": 95.34, | |
| "learning_rate": 1.2372815533980584e-05, | |
| "loss": 0.0012, | |
| "step": 9820 | |
| }, | |
| { | |
| "epoch": 95.44, | |
| "learning_rate": 1.2365048543689321e-05, | |
| "loss": 0.0623, | |
| "step": 9830 | |
| }, | |
| { | |
| "epoch": 95.53, | |
| "learning_rate": 1.2357281553398059e-05, | |
| "loss": 0.0653, | |
| "step": 9840 | |
| }, | |
| { | |
| "epoch": 95.63, | |
| "learning_rate": 1.2349514563106798e-05, | |
| "loss": 0.0259, | |
| "step": 9850 | |
| }, | |
| { | |
| "epoch": 95.73, | |
| "learning_rate": 1.2341747572815535e-05, | |
| "loss": 0.0147, | |
| "step": 9860 | |
| }, | |
| { | |
| "epoch": 95.83, | |
| "learning_rate": 1.2333980582524273e-05, | |
| "loss": 0.1148, | |
| "step": 9870 | |
| }, | |
| { | |
| "epoch": 95.92, | |
| "learning_rate": 1.2326213592233012e-05, | |
| "loss": 0.1357, | |
| "step": 9880 | |
| }, | |
| { | |
| "epoch": 96.0, | |
| "eval_accuracy": 0.9863013698630136, | |
| "eval_loss": 0.05305100977420807, | |
| "eval_runtime": 0.8941, | |
| "eval_samples_per_second": 163.297, | |
| "eval_steps_per_second": 21.251, | |
| "step": 9888 | |
| }, | |
| { | |
| "epoch": 96.02, | |
| "learning_rate": 1.231844660194175e-05, | |
| "loss": 0.0041, | |
| "step": 9890 | |
| }, | |
| { | |
| "epoch": 96.12, | |
| "learning_rate": 1.2310679611650487e-05, | |
| "loss": 0.0897, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 96.21, | |
| "learning_rate": 1.2302912621359224e-05, | |
| "loss": 0.0901, | |
| "step": 9910 | |
| }, | |
| { | |
| "epoch": 96.31, | |
| "learning_rate": 1.2295145631067962e-05, | |
| "loss": 0.0013, | |
| "step": 9920 | |
| }, | |
| { | |
| "epoch": 96.41, | |
| "learning_rate": 1.22873786407767e-05, | |
| "loss": 0.1067, | |
| "step": 9930 | |
| }, | |
| { | |
| "epoch": 96.5, | |
| "learning_rate": 1.2279611650485437e-05, | |
| "loss": 0.0072, | |
| "step": 9940 | |
| }, | |
| { | |
| "epoch": 96.6, | |
| "learning_rate": 1.2271844660194176e-05, | |
| "loss": 0.0098, | |
| "step": 9950 | |
| }, | |
| { | |
| "epoch": 96.7, | |
| "learning_rate": 1.2264077669902914e-05, | |
| "loss": 0.0127, | |
| "step": 9960 | |
| }, | |
| { | |
| "epoch": 96.8, | |
| "learning_rate": 1.2256310679611651e-05, | |
| "loss": 0.0276, | |
| "step": 9970 | |
| }, | |
| { | |
| "epoch": 96.89, | |
| "learning_rate": 1.2248543689320389e-05, | |
| "loss": 0.093, | |
| "step": 9980 | |
| }, | |
| { | |
| "epoch": 96.99, | |
| "learning_rate": 1.2240776699029126e-05, | |
| "loss": 0.0476, | |
| "step": 9990 | |
| }, | |
| { | |
| "epoch": 97.0, | |
| "eval_accuracy": 0.9931506849315068, | |
| "eval_loss": 0.008646626956760883, | |
| "eval_runtime": 0.9314, | |
| "eval_samples_per_second": 156.762, | |
| "eval_steps_per_second": 20.4, | |
| "step": 9991 | |
| }, | |
| { | |
| "epoch": 97.09, | |
| "learning_rate": 1.2233009708737864e-05, | |
| "loss": 0.0041, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 97.18, | |
| "learning_rate": 1.2225242718446604e-05, | |
| "loss": 0.0017, | |
| "step": 10010 | |
| }, | |
| { | |
| "epoch": 97.28, | |
| "learning_rate": 1.2217475728155342e-05, | |
| "loss": 0.0313, | |
| "step": 10020 | |
| }, | |
| { | |
| "epoch": 97.38, | |
| "learning_rate": 1.220970873786408e-05, | |
| "loss": 0.0098, | |
| "step": 10030 | |
| }, | |
| { | |
| "epoch": 97.48, | |
| "learning_rate": 1.2201941747572817e-05, | |
| "loss": 0.0856, | |
| "step": 10040 | |
| }, | |
| { | |
| "epoch": 97.57, | |
| "learning_rate": 1.2194174757281554e-05, | |
| "loss": 0.1682, | |
| "step": 10050 | |
| }, | |
| { | |
| "epoch": 97.67, | |
| "learning_rate": 1.2186407766990292e-05, | |
| "loss": 0.1116, | |
| "step": 10060 | |
| }, | |
| { | |
| "epoch": 97.77, | |
| "learning_rate": 1.2178640776699031e-05, | |
| "loss": 0.0018, | |
| "step": 10070 | |
| }, | |
| { | |
| "epoch": 97.86, | |
| "learning_rate": 1.2170873786407769e-05, | |
| "loss": 0.002, | |
| "step": 10080 | |
| }, | |
| { | |
| "epoch": 97.96, | |
| "learning_rate": 1.2163106796116506e-05, | |
| "loss": 0.0315, | |
| "step": 10090 | |
| }, | |
| { | |
| "epoch": 98.0, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.0015310003655031323, | |
| "eval_runtime": 0.9234, | |
| "eval_samples_per_second": 158.106, | |
| "eval_steps_per_second": 20.575, | |
| "step": 10094 | |
| }, | |
| { | |
| "epoch": 98.06, | |
| "learning_rate": 1.2155339805825244e-05, | |
| "loss": 0.0884, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 98.16, | |
| "learning_rate": 1.2147572815533981e-05, | |
| "loss": 0.0014, | |
| "step": 10110 | |
| }, | |
| { | |
| "epoch": 98.25, | |
| "learning_rate": 1.2139805825242719e-05, | |
| "loss": 0.0161, | |
| "step": 10120 | |
| }, | |
| { | |
| "epoch": 98.35, | |
| "learning_rate": 1.2132038834951458e-05, | |
| "loss": 0.0423, | |
| "step": 10130 | |
| }, | |
| { | |
| "epoch": 98.45, | |
| "learning_rate": 1.2124271844660195e-05, | |
| "loss": 0.0393, | |
| "step": 10140 | |
| }, | |
| { | |
| "epoch": 98.54, | |
| "learning_rate": 1.2116504854368933e-05, | |
| "loss": 0.2191, | |
| "step": 10150 | |
| }, | |
| { | |
| "epoch": 98.64, | |
| "learning_rate": 1.210873786407767e-05, | |
| "loss": 0.0192, | |
| "step": 10160 | |
| }, | |
| { | |
| "epoch": 98.74, | |
| "learning_rate": 1.2100970873786408e-05, | |
| "loss": 0.0264, | |
| "step": 10170 | |
| }, | |
| { | |
| "epoch": 98.83, | |
| "learning_rate": 1.2093203883495145e-05, | |
| "loss": 0.1027, | |
| "step": 10180 | |
| }, | |
| { | |
| "epoch": 98.93, | |
| "learning_rate": 1.2085436893203883e-05, | |
| "loss": 0.0171, | |
| "step": 10190 | |
| }, | |
| { | |
| "epoch": 99.0, | |
| "eval_accuracy": 0.9931506849315068, | |
| "eval_loss": 0.0362049825489521, | |
| "eval_runtime": 0.9372, | |
| "eval_samples_per_second": 155.788, | |
| "eval_steps_per_second": 20.274, | |
| "step": 10197 | |
| }, | |
| { | |
| "epoch": 99.03, | |
| "learning_rate": 1.2077669902912624e-05, | |
| "loss": 0.1313, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 99.13, | |
| "learning_rate": 1.2069902912621361e-05, | |
| "loss": 0.0012, | |
| "step": 10210 | |
| }, | |
| { | |
| "epoch": 99.22, | |
| "learning_rate": 1.2062135922330099e-05, | |
| "loss": 0.0546, | |
| "step": 10220 | |
| }, | |
| { | |
| "epoch": 99.32, | |
| "learning_rate": 1.2054368932038836e-05, | |
| "loss": 0.0046, | |
| "step": 10230 | |
| }, | |
| { | |
| "epoch": 99.42, | |
| "learning_rate": 1.2046601941747574e-05, | |
| "loss": 0.0537, | |
| "step": 10240 | |
| }, | |
| { | |
| "epoch": 99.51, | |
| "learning_rate": 1.2038834951456311e-05, | |
| "loss": 0.0711, | |
| "step": 10250 | |
| }, | |
| { | |
| "epoch": 99.61, | |
| "learning_rate": 1.203106796116505e-05, | |
| "loss": 0.012, | |
| "step": 10260 | |
| }, | |
| { | |
| "epoch": 99.71, | |
| "learning_rate": 1.2023300970873788e-05, | |
| "loss": 0.0375, | |
| "step": 10270 | |
| }, | |
| { | |
| "epoch": 99.81, | |
| "learning_rate": 1.2015533980582525e-05, | |
| "loss": 0.0139, | |
| "step": 10280 | |
| }, | |
| { | |
| "epoch": 99.9, | |
| "learning_rate": 1.2007766990291263e-05, | |
| "loss": 0.0962, | |
| "step": 10290 | |
| }, | |
| { | |
| "epoch": 100.0, | |
| "learning_rate": 1.2e-05, | |
| "loss": 0.0014, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 100.0, | |
| "eval_accuracy": 0.9931506849315068, | |
| "eval_loss": 0.02322803996503353, | |
| "eval_runtime": 0.9475, | |
| "eval_samples_per_second": 154.09, | |
| "eval_steps_per_second": 20.053, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 100.1, | |
| "learning_rate": 1.1992233009708738e-05, | |
| "loss": 0.0051, | |
| "step": 10310 | |
| }, | |
| { | |
| "epoch": 100.19, | |
| "learning_rate": 1.1984466019417477e-05, | |
| "loss": 0.042, | |
| "step": 10320 | |
| }, | |
| { | |
| "epoch": 100.29, | |
| "learning_rate": 1.1976699029126214e-05, | |
| "loss": 0.001, | |
| "step": 10330 | |
| }, | |
| { | |
| "epoch": 100.39, | |
| "learning_rate": 1.1968932038834952e-05, | |
| "loss": 0.0835, | |
| "step": 10340 | |
| }, | |
| { | |
| "epoch": 100.49, | |
| "learning_rate": 1.196116504854369e-05, | |
| "loss": 0.0298, | |
| "step": 10350 | |
| }, | |
| { | |
| "epoch": 100.58, | |
| "learning_rate": 1.1953398058252429e-05, | |
| "loss": 0.001, | |
| "step": 10360 | |
| }, | |
| { | |
| "epoch": 100.68, | |
| "learning_rate": 1.1945631067961166e-05, | |
| "loss": 0.2664, | |
| "step": 10370 | |
| }, | |
| { | |
| "epoch": 100.78, | |
| "learning_rate": 1.1937864077669904e-05, | |
| "loss": 0.0031, | |
| "step": 10380 | |
| }, | |
| { | |
| "epoch": 100.87, | |
| "learning_rate": 1.1930097087378643e-05, | |
| "loss": 0.0008, | |
| "step": 10390 | |
| }, | |
| { | |
| "epoch": 100.97, | |
| "learning_rate": 1.192233009708738e-05, | |
| "loss": 0.1161, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 101.0, | |
| "eval_accuracy": 0.9863013698630136, | |
| "eval_loss": 0.043042704463005066, | |
| "eval_runtime": 0.9222, | |
| "eval_samples_per_second": 158.317, | |
| "eval_steps_per_second": 20.603, | |
| "step": 10403 | |
| }, | |
| { | |
| "epoch": 101.07, | |
| "learning_rate": 1.1914563106796118e-05, | |
| "loss": 0.0009, | |
| "step": 10410 | |
| }, | |
| { | |
| "epoch": 101.17, | |
| "learning_rate": 1.1906796116504855e-05, | |
| "loss": 0.0432, | |
| "step": 10420 | |
| }, | |
| { | |
| "epoch": 101.26, | |
| "learning_rate": 1.1899029126213593e-05, | |
| "loss": 0.0441, | |
| "step": 10430 | |
| }, | |
| { | |
| "epoch": 101.36, | |
| "learning_rate": 1.189126213592233e-05, | |
| "loss": 0.0617, | |
| "step": 10440 | |
| }, | |
| { | |
| "epoch": 101.46, | |
| "learning_rate": 1.188349514563107e-05, | |
| "loss": 0.0993, | |
| "step": 10450 | |
| }, | |
| { | |
| "epoch": 101.55, | |
| "learning_rate": 1.1875728155339807e-05, | |
| "loss": 0.1322, | |
| "step": 10460 | |
| }, | |
| { | |
| "epoch": 101.65, | |
| "learning_rate": 1.1867961165048544e-05, | |
| "loss": 0.0435, | |
| "step": 10470 | |
| }, | |
| { | |
| "epoch": 101.75, | |
| "learning_rate": 1.1860194174757282e-05, | |
| "loss": 0.013, | |
| "step": 10480 | |
| }, | |
| { | |
| "epoch": 101.84, | |
| "learning_rate": 1.185242718446602e-05, | |
| "loss": 0.0214, | |
| "step": 10490 | |
| }, | |
| { | |
| "epoch": 101.94, | |
| "learning_rate": 1.1844660194174757e-05, | |
| "loss": 0.0839, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 102.0, | |
| "eval_accuracy": 0.9794520547945206, | |
| "eval_loss": 0.10051363706588745, | |
| "eval_runtime": 0.9025, | |
| "eval_samples_per_second": 161.775, | |
| "eval_steps_per_second": 21.053, | |
| "step": 10506 | |
| }, | |
| { | |
| "epoch": 102.04, | |
| "learning_rate": 1.1836893203883498e-05, | |
| "loss": 0.0018, | |
| "step": 10510 | |
| }, | |
| { | |
| "epoch": 102.14, | |
| "learning_rate": 1.1829126213592235e-05, | |
| "loss": 0.0037, | |
| "step": 10520 | |
| }, | |
| { | |
| "epoch": 102.23, | |
| "learning_rate": 1.1821359223300973e-05, | |
| "loss": 0.0258, | |
| "step": 10530 | |
| }, | |
| { | |
| "epoch": 102.33, | |
| "learning_rate": 1.181359223300971e-05, | |
| "loss": 0.1846, | |
| "step": 10540 | |
| }, | |
| { | |
| "epoch": 102.43, | |
| "learning_rate": 1.1805825242718448e-05, | |
| "loss": 0.0964, | |
| "step": 10550 | |
| }, | |
| { | |
| "epoch": 102.52, | |
| "learning_rate": 1.1798058252427185e-05, | |
| "loss": 0.0063, | |
| "step": 10560 | |
| }, | |
| { | |
| "epoch": 102.62, | |
| "learning_rate": 1.1790291262135923e-05, | |
| "loss": 0.0415, | |
| "step": 10570 | |
| }, | |
| { | |
| "epoch": 102.72, | |
| "learning_rate": 1.1782524271844662e-05, | |
| "loss": 0.0045, | |
| "step": 10580 | |
| }, | |
| { | |
| "epoch": 102.82, | |
| "learning_rate": 1.17747572815534e-05, | |
| "loss": 0.075, | |
| "step": 10590 | |
| }, | |
| { | |
| "epoch": 102.91, | |
| "learning_rate": 1.1766990291262137e-05, | |
| "loss": 0.0428, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 103.0, | |
| "eval_accuracy": 0.9863013698630136, | |
| "eval_loss": 0.08688335865736008, | |
| "eval_runtime": 0.8958, | |
| "eval_samples_per_second": 162.976, | |
| "eval_steps_per_second": 21.209, | |
| "step": 10609 | |
| }, | |
| { | |
| "epoch": 103.01, | |
| "learning_rate": 1.1759223300970874e-05, | |
| "loss": 0.0012, | |
| "step": 10610 | |
| }, | |
| { | |
| "epoch": 103.11, | |
| "learning_rate": 1.1751456310679612e-05, | |
| "loss": 0.0385, | |
| "step": 10620 | |
| }, | |
| { | |
| "epoch": 103.2, | |
| "learning_rate": 1.174368932038835e-05, | |
| "loss": 0.0017, | |
| "step": 10630 | |
| }, | |
| { | |
| "epoch": 103.3, | |
| "learning_rate": 1.1735922330097089e-05, | |
| "loss": 0.001, | |
| "step": 10640 | |
| }, | |
| { | |
| "epoch": 103.4, | |
| "learning_rate": 1.1728155339805826e-05, | |
| "loss": 0.0486, | |
| "step": 10650 | |
| }, | |
| { | |
| "epoch": 103.5, | |
| "learning_rate": 1.1720388349514564e-05, | |
| "loss": 0.001, | |
| "step": 10660 | |
| }, | |
| { | |
| "epoch": 103.59, | |
| "learning_rate": 1.1712621359223301e-05, | |
| "loss": 0.0012, | |
| "step": 10670 | |
| }, | |
| { | |
| "epoch": 103.69, | |
| "learning_rate": 1.1704854368932039e-05, | |
| "loss": 0.0026, | |
| "step": 10680 | |
| }, | |
| { | |
| "epoch": 103.79, | |
| "learning_rate": 1.1697087378640776e-05, | |
| "loss": 0.1129, | |
| "step": 10690 | |
| }, | |
| { | |
| "epoch": 103.88, | |
| "learning_rate": 1.1689320388349517e-05, | |
| "loss": 0.0281, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 103.98, | |
| "learning_rate": 1.1681553398058255e-05, | |
| "loss": 0.0058, | |
| "step": 10710 | |
| }, | |
| { | |
| "epoch": 104.0, | |
| "eval_accuracy": 0.9863013698630136, | |
| "eval_loss": 0.02182828262448311, | |
| "eval_runtime": 0.9139, | |
| "eval_samples_per_second": 159.746, | |
| "eval_steps_per_second": 20.789, | |
| "step": 10712 | |
| }, | |
| { | |
| "epoch": 104.08, | |
| "learning_rate": 1.1673786407766992e-05, | |
| "loss": 0.0081, | |
| "step": 10720 | |
| }, | |
| { | |
| "epoch": 104.17, | |
| "learning_rate": 1.166601941747573e-05, | |
| "loss": 0.0008, | |
| "step": 10730 | |
| }, | |
| { | |
| "epoch": 104.27, | |
| "learning_rate": 1.1658252427184467e-05, | |
| "loss": 0.0018, | |
| "step": 10740 | |
| }, | |
| { | |
| "epoch": 104.37, | |
| "learning_rate": 1.1650485436893204e-05, | |
| "loss": 0.003, | |
| "step": 10750 | |
| }, | |
| { | |
| "epoch": 104.47, | |
| "learning_rate": 1.1642718446601942e-05, | |
| "loss": 0.0578, | |
| "step": 10760 | |
| }, | |
| { | |
| "epoch": 104.56, | |
| "learning_rate": 1.1634951456310681e-05, | |
| "loss": 0.0679, | |
| "step": 10770 | |
| }, | |
| { | |
| "epoch": 104.66, | |
| "learning_rate": 1.1627184466019419e-05, | |
| "loss": 0.0371, | |
| "step": 10780 | |
| }, | |
| { | |
| "epoch": 104.76, | |
| "learning_rate": 1.1619417475728156e-05, | |
| "loss": 0.0815, | |
| "step": 10790 | |
| }, | |
| { | |
| "epoch": 104.85, | |
| "learning_rate": 1.1611650485436894e-05, | |
| "loss": 0.0277, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 104.95, | |
| "learning_rate": 1.1603883495145631e-05, | |
| "loss": 0.0657, | |
| "step": 10810 | |
| }, | |
| { | |
| "epoch": 105.0, | |
| "eval_accuracy": 0.9931506849315068, | |
| "eval_loss": 0.012804172933101654, | |
| "eval_runtime": 0.8936, | |
| "eval_samples_per_second": 163.381, | |
| "eval_steps_per_second": 21.262, | |
| "step": 10815 | |
| }, | |
| { | |
| "epoch": 105.05, | |
| "learning_rate": 1.1596116504854369e-05, | |
| "loss": 0.0131, | |
| "step": 10820 | |
| }, | |
| { | |
| "epoch": 105.15, | |
| "learning_rate": 1.1588349514563108e-05, | |
| "loss": 0.0791, | |
| "step": 10830 | |
| }, | |
| { | |
| "epoch": 105.24, | |
| "learning_rate": 1.1580582524271845e-05, | |
| "loss": 0.1427, | |
| "step": 10840 | |
| }, | |
| { | |
| "epoch": 105.34, | |
| "learning_rate": 1.1572815533980583e-05, | |
| "loss": 0.0344, | |
| "step": 10850 | |
| }, | |
| { | |
| "epoch": 105.44, | |
| "learning_rate": 1.1565048543689322e-05, | |
| "loss": 0.0012, | |
| "step": 10860 | |
| }, | |
| { | |
| "epoch": 105.53, | |
| "learning_rate": 1.155728155339806e-05, | |
| "loss": 0.1199, | |
| "step": 10870 | |
| }, | |
| { | |
| "epoch": 105.63, | |
| "learning_rate": 1.1549514563106797e-05, | |
| "loss": 0.0055, | |
| "step": 10880 | |
| }, | |
| { | |
| "epoch": 105.73, | |
| "learning_rate": 1.1541747572815536e-05, | |
| "loss": 0.1326, | |
| "step": 10890 | |
| }, | |
| { | |
| "epoch": 105.83, | |
| "learning_rate": 1.1533980582524274e-05, | |
| "loss": 0.0034, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 105.92, | |
| "learning_rate": 1.1526213592233011e-05, | |
| "loss": 0.0032, | |
| "step": 10910 | |
| }, | |
| { | |
| "epoch": 106.0, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.0011723055504262447, | |
| "eval_runtime": 0.8979, | |
| "eval_samples_per_second": 162.605, | |
| "eval_steps_per_second": 21.161, | |
| "step": 10918 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 25750, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 250, | |
| "save_steps": 500, | |
| "total_flos": 6.768824322311848e+18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |