| { | |
| "best_metric": 0.3000366985797882, | |
| "best_model_checkpoint": "./new_exper3/checkpoint-4200", | |
| "epoch": 8.0, | |
| "global_step": 5112, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 9.980438184663537e-05, | |
| "loss": 4.9498, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 9.960876369327074e-05, | |
| "loss": 4.8556, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 9.94131455399061e-05, | |
| "loss": 4.7389, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 9.921752738654147e-05, | |
| "loss": 4.6112, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 9.902190923317684e-05, | |
| "loss": 4.54, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 9.882629107981222e-05, | |
| "loss": 4.448, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 9.863067292644758e-05, | |
| "loss": 4.3441, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 9.843505477308295e-05, | |
| "loss": 4.2899, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 9.823943661971832e-05, | |
| "loss": 4.2878, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 9.804381846635369e-05, | |
| "loss": 4.093, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "eval_accuracy": 0.18851195065535853, | |
| "eval_loss": 4.104459285736084, | |
| "eval_runtime": 43.0936, | |
| "eval_samples_per_second": 60.195, | |
| "eval_steps_per_second": 7.542, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 9.784820031298904e-05, | |
| "loss": 4.1228, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 9.765258215962441e-05, | |
| "loss": 4.0244, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 9.745696400625978e-05, | |
| "loss": 3.9376, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 9.726134585289515e-05, | |
| "loss": 3.7948, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 9.706572769953052e-05, | |
| "loss": 3.8001, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 9.687010954616589e-05, | |
| "loss": 3.7539, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 9.667449139280126e-05, | |
| "loss": 3.6066, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 9.647887323943663e-05, | |
| "loss": 3.5595, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 9.628325508607199e-05, | |
| "loss": 3.4972, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 9.608763693270736e-05, | |
| "loss": 3.5057, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "eval_accuracy": 0.323053199691596, | |
| "eval_loss": 3.444770336151123, | |
| "eval_runtime": 43.0641, | |
| "eval_samples_per_second": 60.236, | |
| "eval_steps_per_second": 7.547, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 9.589201877934273e-05, | |
| "loss": 3.3105, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 9.56964006259781e-05, | |
| "loss": 3.4108, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 9.550078247261345e-05, | |
| "loss": 3.4285, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 9.530516431924882e-05, | |
| "loss": 3.213, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 9.510954616588421e-05, | |
| "loss": 3.1473, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 9.491392801251958e-05, | |
| "loss": 3.1898, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 9.471830985915493e-05, | |
| "loss": 3.1523, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 9.45226917057903e-05, | |
| "loss": 3.186, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 9.432707355242567e-05, | |
| "loss": 2.9603, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 9.413145539906104e-05, | |
| "loss": 2.9116, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "eval_accuracy": 0.45373939861218193, | |
| "eval_loss": 2.9483001232147217, | |
| "eval_runtime": 42.4599, | |
| "eval_samples_per_second": 61.093, | |
| "eval_steps_per_second": 7.654, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 9.39358372456964e-05, | |
| "loss": 3.0387, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 9.374021909233177e-05, | |
| "loss": 2.8818, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 9.354460093896714e-05, | |
| "loss": 2.9362, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 9.334898278560251e-05, | |
| "loss": 2.9021, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 9.315336463223788e-05, | |
| "loss": 2.7243, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 9.295774647887325e-05, | |
| "loss": 2.6124, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 9.276212832550862e-05, | |
| "loss": 2.6288, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 9.256651017214399e-05, | |
| "loss": 2.6455, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 9.237089201877934e-05, | |
| "loss": 2.6136, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 9.217527386541471e-05, | |
| "loss": 2.561, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "eval_accuracy": 0.525828835774865, | |
| "eval_loss": 2.5700132846832275, | |
| "eval_runtime": 42.7904, | |
| "eval_samples_per_second": 60.621, | |
| "eval_steps_per_second": 7.595, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 9.197965571205008e-05, | |
| "loss": 2.5745, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 9.178403755868545e-05, | |
| "loss": 2.5422, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 9.158841940532081e-05, | |
| "loss": 2.4631, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 9.139280125195618e-05, | |
| "loss": 2.3095, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 9.119718309859156e-05, | |
| "loss": 2.3756, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 9.100156494522693e-05, | |
| "loss": 2.485, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 9.080594679186229e-05, | |
| "loss": 2.3036, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 9.061032863849766e-05, | |
| "loss": 2.2608, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 9.041471048513303e-05, | |
| "loss": 2.296, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 9.02190923317684e-05, | |
| "loss": 2.1611, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "eval_accuracy": 0.6144949884348496, | |
| "eval_loss": 2.172065258026123, | |
| "eval_runtime": 42.9818, | |
| "eval_samples_per_second": 60.351, | |
| "eval_steps_per_second": 7.561, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 9.002347417840375e-05, | |
| "loss": 2.0372, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 8.982785602503912e-05, | |
| "loss": 2.0652, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 8.963223787167449e-05, | |
| "loss": 2.1529, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 8.943661971830986e-05, | |
| "loss": 1.9779, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 8.924100156494523e-05, | |
| "loss": 1.9492, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 8.90453834115806e-05, | |
| "loss": 1.9624, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 8.884976525821597e-05, | |
| "loss": 1.8139, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 8.865414710485134e-05, | |
| "loss": 1.9124, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 8.845852895148671e-05, | |
| "loss": 1.7514, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 8.826291079812207e-05, | |
| "loss": 1.715, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "eval_accuracy": 0.6407093292212799, | |
| "eval_loss": 1.8254655599594116, | |
| "eval_runtime": 42.9275, | |
| "eval_samples_per_second": 60.428, | |
| "eval_steps_per_second": 7.571, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 8.806729264475744e-05, | |
| "loss": 1.864, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 8.78716744913928e-05, | |
| "loss": 1.8157, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 8.767605633802817e-05, | |
| "loss": 1.8333, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 8.748043818466354e-05, | |
| "loss": 1.6364, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 8.728482003129891e-05, | |
| "loss": 1.5205, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 8.708920187793428e-05, | |
| "loss": 1.5193, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 8.689358372456965e-05, | |
| "loss": 1.5602, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 8.669796557120501e-05, | |
| "loss": 1.3509, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 8.650234741784038e-05, | |
| "loss": 1.408, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 8.630672926447575e-05, | |
| "loss": 1.2752, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "eval_accuracy": 0.70508866615266, | |
| "eval_loss": 1.5340100526809692, | |
| "eval_runtime": 42.6015, | |
| "eval_samples_per_second": 60.89, | |
| "eval_steps_per_second": 7.629, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 8.611111111111112e-05, | |
| "loss": 1.184, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 8.591549295774647e-05, | |
| "loss": 1.284, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 8.571987480438184e-05, | |
| "loss": 1.2331, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 8.552425665101721e-05, | |
| "loss": 1.2059, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 8.53286384976526e-05, | |
| "loss": 1.3036, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 8.513302034428795e-05, | |
| "loss": 1.2848, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 8.493740219092332e-05, | |
| "loss": 1.073, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 8.474178403755869e-05, | |
| "loss": 1.4211, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 8.454616588419406e-05, | |
| "loss": 1.067, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 8.435054773082942e-05, | |
| "loss": 1.2487, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "eval_accuracy": 0.7201233616037008, | |
| "eval_loss": 1.353263258934021, | |
| "eval_runtime": 42.5111, | |
| "eval_samples_per_second": 61.019, | |
| "eval_steps_per_second": 7.645, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 8.415492957746479e-05, | |
| "loss": 1.2377, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 8.395931142410016e-05, | |
| "loss": 1.1797, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 8.376369327073553e-05, | |
| "loss": 1.2482, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 8.35680751173709e-05, | |
| "loss": 1.0641, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 8.337245696400627e-05, | |
| "loss": 1.14, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 8.317683881064164e-05, | |
| "loss": 1.1587, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 8.298122065727701e-05, | |
| "loss": 1.1029, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 8.278560250391236e-05, | |
| "loss": 0.9988, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 8.258998435054773e-05, | |
| "loss": 1.0692, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 8.23943661971831e-05, | |
| "loss": 1.0333, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "eval_accuracy": 0.7825751734772552, | |
| "eval_loss": 1.1474497318267822, | |
| "eval_runtime": 43.2872, | |
| "eval_samples_per_second": 59.925, | |
| "eval_steps_per_second": 7.508, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 8.219874804381847e-05, | |
| "loss": 1.0357, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 8.200312989045383e-05, | |
| "loss": 0.8625, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 8.18075117370892e-05, | |
| "loss": 1.0712, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 8.161189358372458e-05, | |
| "loss": 1.1329, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 8.141627543035995e-05, | |
| "loss": 1.0017, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 8.122065727699531e-05, | |
| "loss": 1.1086, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 8.102503912363068e-05, | |
| "loss": 0.8891, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 8.082942097026605e-05, | |
| "loss": 0.8059, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 8.063380281690142e-05, | |
| "loss": 1.0192, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 8.043818466353677e-05, | |
| "loss": 0.8856, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "eval_accuracy": 0.7644564379336931, | |
| "eval_loss": 1.0914219617843628, | |
| "eval_runtime": 42.8447, | |
| "eval_samples_per_second": 60.544, | |
| "eval_steps_per_second": 7.586, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 8.024256651017214e-05, | |
| "loss": 0.911, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 8.004694835680751e-05, | |
| "loss": 0.8939, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 7.985133020344288e-05, | |
| "loss": 0.7816, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 7.965571205007825e-05, | |
| "loss": 0.8397, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 7.946009389671362e-05, | |
| "loss": 0.8172, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 7.926447574334899e-05, | |
| "loss": 0.7408, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 7.906885758998436e-05, | |
| "loss": 0.6926, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 7.887323943661972e-05, | |
| "loss": 0.8984, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 7.867762128325509e-05, | |
| "loss": 0.7221, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 7.848200312989046e-05, | |
| "loss": 0.7512, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "eval_accuracy": 0.8118735543562067, | |
| "eval_loss": 0.8893365263938904, | |
| "eval_runtime": 42.2991, | |
| "eval_samples_per_second": 61.325, | |
| "eval_steps_per_second": 7.683, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 7.828638497652583e-05, | |
| "loss": 0.7297, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 7.809076682316118e-05, | |
| "loss": 0.6638, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 7.789514866979655e-05, | |
| "loss": 0.7419, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 7.769953051643193e-05, | |
| "loss": 0.7263, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 7.75039123630673e-05, | |
| "loss": 0.6707, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 7.730829420970266e-05, | |
| "loss": 0.7675, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 7.711267605633803e-05, | |
| "loss": 0.9189, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 7.69170579029734e-05, | |
| "loss": 0.5663, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 7.672143974960877e-05, | |
| "loss": 0.679, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 7.652582159624414e-05, | |
| "loss": 0.747, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "eval_accuracy": 0.8303777949113339, | |
| "eval_loss": 0.8370148539543152, | |
| "eval_runtime": 42.5695, | |
| "eval_samples_per_second": 60.936, | |
| "eval_steps_per_second": 7.635, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 7.63302034428795e-05, | |
| "loss": 0.8146, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 7.613458528951487e-05, | |
| "loss": 0.715, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 7.593896713615024e-05, | |
| "loss": 0.8337, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 7.57433489827856e-05, | |
| "loss": 0.6553, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 7.554773082942097e-05, | |
| "loss": 0.7805, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 7.535211267605634e-05, | |
| "loss": 0.6115, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 7.515649452269171e-05, | |
| "loss": 0.7046, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 7.496087636932708e-05, | |
| "loss": 0.551, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 7.476525821596244e-05, | |
| "loss": 0.4497, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 7.456964006259781e-05, | |
| "loss": 0.5082, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "eval_accuracy": 0.856592135697764, | |
| "eval_loss": 0.7130723595619202, | |
| "eval_runtime": 43.188, | |
| "eval_samples_per_second": 60.063, | |
| "eval_steps_per_second": 7.525, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 7.437402190923318e-05, | |
| "loss": 0.347, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 7.417840375586855e-05, | |
| "loss": 0.463, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 7.398278560250392e-05, | |
| "loss": 0.4014, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 7.378716744913929e-05, | |
| "loss": 0.488, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 7.359154929577466e-05, | |
| "loss": 0.4239, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 7.339593114241003e-05, | |
| "loss": 0.4246, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 7.320031298904538e-05, | |
| "loss": 0.3698, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 7.300469483568075e-05, | |
| "loss": 0.4264, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 7.280907668231612e-05, | |
| "loss": 0.3167, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 7.261345852895149e-05, | |
| "loss": 0.4449, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "eval_accuracy": 0.8546646106399384, | |
| "eval_loss": 0.6572707295417786, | |
| "eval_runtime": 43.2163, | |
| "eval_samples_per_second": 60.024, | |
| "eval_steps_per_second": 7.52, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 7.241784037558685e-05, | |
| "loss": 0.3504, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 7.222222222222222e-05, | |
| "loss": 0.397, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 7.202660406885759e-05, | |
| "loss": 0.4599, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 7.183098591549297e-05, | |
| "loss": 0.4151, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 7.163536776212833e-05, | |
| "loss": 0.3605, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 7.14397496087637e-05, | |
| "loss": 0.3248, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 7.124413145539907e-05, | |
| "loss": 0.32, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 7.104851330203444e-05, | |
| "loss": 0.4012, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 7.08528951486698e-05, | |
| "loss": 0.3526, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 7.065727699530516e-05, | |
| "loss": 0.2912, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "eval_accuracy": 0.8596761757902853, | |
| "eval_loss": 0.6183947324752808, | |
| "eval_runtime": 43.0374, | |
| "eval_samples_per_second": 60.273, | |
| "eval_steps_per_second": 7.552, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 7.046165884194053e-05, | |
| "loss": 0.427, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 7.02660406885759e-05, | |
| "loss": 0.4321, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 7.007042253521127e-05, | |
| "loss": 0.363, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 6.987480438184664e-05, | |
| "loss": 0.2761, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 6.967918622848201e-05, | |
| "loss": 0.3189, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 6.948356807511738e-05, | |
| "loss": 0.3227, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 6.928794992175274e-05, | |
| "loss": 0.2792, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 6.909233176838811e-05, | |
| "loss": 0.1959, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 6.889671361502348e-05, | |
| "loss": 0.2785, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 6.870109546165885e-05, | |
| "loss": 0.285, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "eval_accuracy": 0.8569776407093292, | |
| "eval_loss": 0.5973872542381287, | |
| "eval_runtime": 42.9634, | |
| "eval_samples_per_second": 60.377, | |
| "eval_steps_per_second": 7.565, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 6.85054773082942e-05, | |
| "loss": 0.2649, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 6.830985915492957e-05, | |
| "loss": 0.3259, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 6.811424100156496e-05, | |
| "loss": 0.3085, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 6.791862284820033e-05, | |
| "loss": 0.3485, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 6.772300469483568e-05, | |
| "loss": 0.2735, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 6.752738654147105e-05, | |
| "loss": 0.3112, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 6.733176838810642e-05, | |
| "loss": 0.3373, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 6.713615023474179e-05, | |
| "loss": 0.1682, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 6.694053208137715e-05, | |
| "loss": 0.2515, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 6.674491392801252e-05, | |
| "loss": 0.2267, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "eval_accuracy": 0.8646877409406323, | |
| "eval_loss": 0.5621365904808044, | |
| "eval_runtime": 43.0986, | |
| "eval_samples_per_second": 60.188, | |
| "eval_steps_per_second": 7.541, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 6.654929577464789e-05, | |
| "loss": 0.1861, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 6.635367762128326e-05, | |
| "loss": 0.3325, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 6.615805946791863e-05, | |
| "loss": 0.3621, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 6.5962441314554e-05, | |
| "loss": 0.3046, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 6.576682316118937e-05, | |
| "loss": 0.347, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 6.557120500782473e-05, | |
| "loss": 0.3916, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 6.53755868544601e-05, | |
| "loss": 0.3221, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 6.517996870109546e-05, | |
| "loss": 0.1877, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 6.498435054773083e-05, | |
| "loss": 0.3638, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 6.47887323943662e-05, | |
| "loss": 0.2553, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "eval_accuracy": 0.8816499614494988, | |
| "eval_loss": 0.5043683052062988, | |
| "eval_runtime": 42.7859, | |
| "eval_samples_per_second": 60.628, | |
| "eval_steps_per_second": 7.596, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 6.459311424100157e-05, | |
| "loss": 0.3439, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 6.439749608763693e-05, | |
| "loss": 0.2463, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 6.420187793427231e-05, | |
| "loss": 0.2192, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 6.400625978090768e-05, | |
| "loss": 0.236, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 6.381064162754305e-05, | |
| "loss": 0.2441, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 6.36150234741784e-05, | |
| "loss": 0.2125, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 6.341940532081377e-05, | |
| "loss": 0.2112, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 6.322378716744914e-05, | |
| "loss": 0.2905, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 6.302816901408451e-05, | |
| "loss": 0.3244, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 6.283255086071987e-05, | |
| "loss": 0.2029, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "eval_accuracy": 0.8955281418658443, | |
| "eval_loss": 0.43422141671180725, | |
| "eval_runtime": 42.7572, | |
| "eval_samples_per_second": 60.668, | |
| "eval_steps_per_second": 7.601, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 6.263693270735524e-05, | |
| "loss": 0.2284, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 6.244131455399061e-05, | |
| "loss": 0.2399, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 3.02, | |
| "learning_rate": 6.224569640062598e-05, | |
| "loss": 0.1875, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "learning_rate": 6.205007824726135e-05, | |
| "loss": 0.1721, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 3.05, | |
| "learning_rate": 6.185446009389672e-05, | |
| "loss": 0.2115, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 3.07, | |
| "learning_rate": 6.165884194053209e-05, | |
| "loss": 0.1698, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "learning_rate": 6.146322378716746e-05, | |
| "loss": 0.1321, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 3.1, | |
| "learning_rate": 6.126760563380281e-05, | |
| "loss": 0.1165, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 3.11, | |
| "learning_rate": 6.107198748043818e-05, | |
| "loss": 0.1121, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 3.13, | |
| "learning_rate": 6.0876369327073554e-05, | |
| "loss": 0.1763, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 3.13, | |
| "eval_accuracy": 0.8905165767154973, | |
| "eval_loss": 0.44871243834495544, | |
| "eval_runtime": 42.6933, | |
| "eval_samples_per_second": 60.759, | |
| "eval_steps_per_second": 7.612, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "learning_rate": 6.068075117370893e-05, | |
| "loss": 0.1473, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 3.16, | |
| "learning_rate": 6.0485133020344286e-05, | |
| "loss": 0.1697, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 3.18, | |
| "learning_rate": 6.0289514866979656e-05, | |
| "loss": 0.1712, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 3.19, | |
| "learning_rate": 6.0093896713615026e-05, | |
| "loss": 0.1468, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 3.21, | |
| "learning_rate": 5.9898278560250395e-05, | |
| "loss": 0.1395, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 3.22, | |
| "learning_rate": 5.970266040688576e-05, | |
| "loss": 0.1122, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 3.24, | |
| "learning_rate": 5.950704225352113e-05, | |
| "loss": 0.1426, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 3.26, | |
| "learning_rate": 5.93114241001565e-05, | |
| "loss": 0.111, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 3.27, | |
| "learning_rate": 5.911580594679187e-05, | |
| "loss": 0.141, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 3.29, | |
| "learning_rate": 5.892018779342723e-05, | |
| "loss": 0.1418, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 3.29, | |
| "eval_accuracy": 0.9005397070161912, | |
| "eval_loss": 0.41731029748916626, | |
| "eval_runtime": 42.6624, | |
| "eval_samples_per_second": 60.803, | |
| "eval_steps_per_second": 7.618, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 3.3, | |
| "learning_rate": 5.87245696400626e-05, | |
| "loss": 0.1462, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 3.32, | |
| "learning_rate": 5.852895148669797e-05, | |
| "loss": 0.0985, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 3.33, | |
| "learning_rate": 5.833333333333334e-05, | |
| "loss": 0.133, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 3.35, | |
| "learning_rate": 5.81377151799687e-05, | |
| "loss": 0.1414, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "learning_rate": 5.794209702660407e-05, | |
| "loss": 0.1297, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 3.38, | |
| "learning_rate": 5.774647887323944e-05, | |
| "loss": 0.1346, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "learning_rate": 5.755086071987481e-05, | |
| "loss": 0.1237, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 3.41, | |
| "learning_rate": 5.735524256651017e-05, | |
| "loss": 0.1664, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 3.43, | |
| "learning_rate": 5.715962441314554e-05, | |
| "loss": 0.0701, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 3.44, | |
| "learning_rate": 5.6964006259780914e-05, | |
| "loss": 0.0563, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 3.44, | |
| "eval_accuracy": 0.9047802621434079, | |
| "eval_loss": 0.387023389339447, | |
| "eval_runtime": 42.2972, | |
| "eval_samples_per_second": 61.328, | |
| "eval_steps_per_second": 7.684, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 3.46, | |
| "learning_rate": 5.6768388106416284e-05, | |
| "loss": 0.0832, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 3.47, | |
| "learning_rate": 5.657276995305164e-05, | |
| "loss": 0.0966, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 3.49, | |
| "learning_rate": 5.637715179968701e-05, | |
| "loss": 0.0926, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 3.51, | |
| "learning_rate": 5.618153364632238e-05, | |
| "loss": 0.1285, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 3.52, | |
| "learning_rate": 5.598591549295775e-05, | |
| "loss": 0.0809, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 3.54, | |
| "learning_rate": 5.579029733959311e-05, | |
| "loss": 0.1119, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 3.55, | |
| "learning_rate": 5.559467918622848e-05, | |
| "loss": 0.067, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 3.57, | |
| "learning_rate": 5.539906103286385e-05, | |
| "loss": 0.1022, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 3.58, | |
| "learning_rate": 5.520344287949922e-05, | |
| "loss": 0.1318, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "learning_rate": 5.5007824726134584e-05, | |
| "loss": 0.0579, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "eval_accuracy": 0.9036237471087124, | |
| "eval_loss": 0.38491636514663696, | |
| "eval_runtime": 42.9889, | |
| "eval_samples_per_second": 60.341, | |
| "eval_steps_per_second": 7.56, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 3.62, | |
| "learning_rate": 5.4812206572769954e-05, | |
| "loss": 0.1294, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 3.63, | |
| "learning_rate": 5.461658841940532e-05, | |
| "loss": 0.0777, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 3.65, | |
| "learning_rate": 5.442097026604069e-05, | |
| "loss": 0.0754, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 3.66, | |
| "learning_rate": 5.422535211267606e-05, | |
| "loss": 0.1463, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 3.68, | |
| "learning_rate": 5.4029733959311426e-05, | |
| "loss": 0.0578, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 3.69, | |
| "learning_rate": 5.3834115805946795e-05, | |
| "loss": 0.1084, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 3.71, | |
| "learning_rate": 5.3638497652582165e-05, | |
| "loss": 0.0534, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 3.72, | |
| "learning_rate": 5.3442879499217535e-05, | |
| "loss": 0.0598, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 3.74, | |
| "learning_rate": 5.32472613458529e-05, | |
| "loss": 0.1353, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 3.76, | |
| "learning_rate": 5.305164319248827e-05, | |
| "loss": 0.166, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 3.76, | |
| "eval_accuracy": 0.9024672320740169, | |
| "eval_loss": 0.3933294415473938, | |
| "eval_runtime": 42.6729, | |
| "eval_samples_per_second": 60.788, | |
| "eval_steps_per_second": 7.616, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 3.77, | |
| "learning_rate": 5.285602503912364e-05, | |
| "loss": 0.0961, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 3.79, | |
| "learning_rate": 5.266040688575901e-05, | |
| "loss": 0.0647, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 3.8, | |
| "learning_rate": 5.246478873239436e-05, | |
| "loss": 0.0744, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 3.82, | |
| "learning_rate": 5.226917057902973e-05, | |
| "loss": 0.1046, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 3.83, | |
| "learning_rate": 5.207355242566511e-05, | |
| "loss": 0.0925, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 3.85, | |
| "learning_rate": 5.187793427230048e-05, | |
| "loss": 0.1343, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 3.87, | |
| "learning_rate": 5.1682316118935835e-05, | |
| "loss": 0.0721, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 3.88, | |
| "learning_rate": 5.1486697965571205e-05, | |
| "loss": 0.1446, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 3.9, | |
| "learning_rate": 5.1291079812206575e-05, | |
| "loss": 0.0807, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 3.91, | |
| "learning_rate": 5.109546165884195e-05, | |
| "loss": 0.11, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 3.91, | |
| "eval_accuracy": 0.9055512721665382, | |
| "eval_loss": 0.39182865619659424, | |
| "eval_runtime": 42.4672, | |
| "eval_samples_per_second": 61.082, | |
| "eval_steps_per_second": 7.653, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 3.93, | |
| "learning_rate": 5.089984350547731e-05, | |
| "loss": 0.1331, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 3.94, | |
| "learning_rate": 5.070422535211268e-05, | |
| "loss": 0.0506, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 3.96, | |
| "learning_rate": 5.0508607198748047e-05, | |
| "loss": 0.1025, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 3.97, | |
| "learning_rate": 5.0312989045383416e-05, | |
| "loss": 0.0792, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 3.99, | |
| "learning_rate": 5.011737089201878e-05, | |
| "loss": 0.099, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 4.01, | |
| "learning_rate": 4.992175273865415e-05, | |
| "loss": 0.0861, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 4.02, | |
| "learning_rate": 4.972613458528952e-05, | |
| "loss": 0.0467, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 4.04, | |
| "learning_rate": 4.953051643192488e-05, | |
| "loss": 0.0587, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 4.05, | |
| "learning_rate": 4.933489827856025e-05, | |
| "loss": 0.064, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 4.07, | |
| "learning_rate": 4.913928012519562e-05, | |
| "loss": 0.0356, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 4.07, | |
| "eval_accuracy": 0.9202004626060138, | |
| "eval_loss": 0.3298385739326477, | |
| "eval_runtime": 42.9604, | |
| "eval_samples_per_second": 60.381, | |
| "eval_steps_per_second": 7.565, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 4.08, | |
| "learning_rate": 4.894366197183099e-05, | |
| "loss": 0.0376, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 4.1, | |
| "learning_rate": 4.8748043818466354e-05, | |
| "loss": 0.0275, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 4.12, | |
| "learning_rate": 4.855242566510172e-05, | |
| "loss": 0.0297, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 4.13, | |
| "learning_rate": 4.835680751173709e-05, | |
| "loss": 0.0323, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 4.15, | |
| "learning_rate": 4.816118935837246e-05, | |
| "loss": 0.0561, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 4.16, | |
| "learning_rate": 4.7965571205007826e-05, | |
| "loss": 0.0489, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 4.18, | |
| "learning_rate": 4.7769953051643195e-05, | |
| "loss": 0.0448, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 4.19, | |
| "learning_rate": 4.757433489827856e-05, | |
| "loss": 0.0398, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 4.21, | |
| "learning_rate": 4.737871674491393e-05, | |
| "loss": 0.0588, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 4.23, | |
| "learning_rate": 4.71830985915493e-05, | |
| "loss": 0.0513, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 4.23, | |
| "eval_accuracy": 0.9209714726291441, | |
| "eval_loss": 0.337054580450058, | |
| "eval_runtime": 42.6677, | |
| "eval_samples_per_second": 60.795, | |
| "eval_steps_per_second": 7.617, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 4.24, | |
| "learning_rate": 4.698748043818467e-05, | |
| "loss": 0.0396, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 4.26, | |
| "learning_rate": 4.679186228482003e-05, | |
| "loss": 0.0352, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 4.27, | |
| "learning_rate": 4.65962441314554e-05, | |
| "loss": 0.0695, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 4.29, | |
| "learning_rate": 4.640062597809077e-05, | |
| "loss": 0.0614, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 4.3, | |
| "learning_rate": 4.620500782472614e-05, | |
| "loss": 0.0702, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 4.32, | |
| "learning_rate": 4.60093896713615e-05, | |
| "loss": 0.037, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 4.33, | |
| "learning_rate": 4.581377151799687e-05, | |
| "loss": 0.0567, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 4.35, | |
| "learning_rate": 4.5618153364632235e-05, | |
| "loss": 0.0327, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 4.37, | |
| "learning_rate": 4.542253521126761e-05, | |
| "loss": 0.0358, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 4.38, | |
| "learning_rate": 4.5226917057902975e-05, | |
| "loss": 0.0762, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 4.38, | |
| "eval_accuracy": 0.9225134926754048, | |
| "eval_loss": 0.32532238960266113, | |
| "eval_runtime": 42.6755, | |
| "eval_samples_per_second": 60.784, | |
| "eval_steps_per_second": 7.616, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "learning_rate": 4.5031298904538344e-05, | |
| "loss": 0.0223, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 4.41, | |
| "learning_rate": 4.483568075117371e-05, | |
| "loss": 0.028, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 4.43, | |
| "learning_rate": 4.464006259780908e-05, | |
| "loss": 0.0572, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 4.44, | |
| "learning_rate": 4.4444444444444447e-05, | |
| "loss": 0.0487, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 4.46, | |
| "learning_rate": 4.4248826291079816e-05, | |
| "loss": 0.0711, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 4.48, | |
| "learning_rate": 4.405320813771518e-05, | |
| "loss": 0.0334, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 4.49, | |
| "learning_rate": 4.385758998435055e-05, | |
| "loss": 0.0219, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 4.51, | |
| "learning_rate": 4.366197183098591e-05, | |
| "loss": 0.0405, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 4.52, | |
| "learning_rate": 4.346635367762129e-05, | |
| "loss": 0.0329, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 4.54, | |
| "learning_rate": 4.327073552425665e-05, | |
| "loss": 0.018, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 4.54, | |
| "eval_accuracy": 0.9148033924441018, | |
| "eval_loss": 0.34668266773223877, | |
| "eval_runtime": 42.9733, | |
| "eval_samples_per_second": 60.363, | |
| "eval_steps_per_second": 7.563, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 4.55, | |
| "learning_rate": 4.307511737089202e-05, | |
| "loss": 0.0413, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 4.57, | |
| "learning_rate": 4.287949921752739e-05, | |
| "loss": 0.018, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 4.59, | |
| "learning_rate": 4.2683881064162754e-05, | |
| "loss": 0.0417, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 4.6, | |
| "learning_rate": 4.248826291079812e-05, | |
| "loss": 0.0254, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 4.62, | |
| "learning_rate": 4.229264475743349e-05, | |
| "loss": 0.0411, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 4.63, | |
| "learning_rate": 4.209702660406886e-05, | |
| "loss": 0.0625, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 4.65, | |
| "learning_rate": 4.1901408450704226e-05, | |
| "loss": 0.0416, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 4.66, | |
| "learning_rate": 4.1705790297339595e-05, | |
| "loss": 0.0486, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 4.68, | |
| "learning_rate": 4.1510172143974965e-05, | |
| "loss": 0.0378, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 4.69, | |
| "learning_rate": 4.1314553990610335e-05, | |
| "loss": 0.0263, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 4.69, | |
| "eval_accuracy": 0.9144178874325366, | |
| "eval_loss": 0.3544096052646637, | |
| "eval_runtime": 43.2558, | |
| "eval_samples_per_second": 59.969, | |
| "eval_steps_per_second": 7.513, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 4.71, | |
| "learning_rate": 4.11189358372457e-05, | |
| "loss": 0.0449, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 4.73, | |
| "learning_rate": 4.092331768388107e-05, | |
| "loss": 0.0758, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 4.74, | |
| "learning_rate": 4.072769953051643e-05, | |
| "loss": 0.0418, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 4.76, | |
| "learning_rate": 4.053208137715181e-05, | |
| "loss": 0.0392, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 4.77, | |
| "learning_rate": 4.033646322378717e-05, | |
| "loss": 0.0195, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 4.79, | |
| "learning_rate": 4.014084507042254e-05, | |
| "loss": 0.0317, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "learning_rate": 3.99452269170579e-05, | |
| "loss": 0.0162, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 4.82, | |
| "learning_rate": 3.974960876369327e-05, | |
| "loss": 0.0256, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 4.84, | |
| "learning_rate": 3.955399061032864e-05, | |
| "loss": 0.017, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 4.85, | |
| "learning_rate": 3.935837245696401e-05, | |
| "loss": 0.0205, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 4.85, | |
| "eval_accuracy": 0.9221279876638396, | |
| "eval_loss": 0.33404412865638733, | |
| "eval_runtime": 43.1306, | |
| "eval_samples_per_second": 60.143, | |
| "eval_steps_per_second": 7.535, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 4.87, | |
| "learning_rate": 3.9162754303599375e-05, | |
| "loss": 0.0549, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 4.88, | |
| "learning_rate": 3.8967136150234744e-05, | |
| "loss": 0.066, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 4.9, | |
| "learning_rate": 3.877151799687011e-05, | |
| "loss": 0.0307, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 4.91, | |
| "learning_rate": 3.8575899843505484e-05, | |
| "loss": 0.0212, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 4.93, | |
| "learning_rate": 3.8380281690140847e-05, | |
| "loss": 0.0311, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 4.95, | |
| "learning_rate": 3.8184663536776216e-05, | |
| "loss": 0.0146, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 4.96, | |
| "learning_rate": 3.798904538341158e-05, | |
| "loss": 0.0173, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 4.98, | |
| "learning_rate": 3.779342723004695e-05, | |
| "loss": 0.0336, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 4.99, | |
| "learning_rate": 3.759780907668232e-05, | |
| "loss": 0.0295, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 5.01, | |
| "learning_rate": 3.740219092331769e-05, | |
| "loss": 0.0237, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 5.01, | |
| "eval_accuracy": 0.9144178874325366, | |
| "eval_loss": 0.33526894450187683, | |
| "eval_runtime": 42.9414, | |
| "eval_samples_per_second": 60.408, | |
| "eval_steps_per_second": 7.568, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 5.02, | |
| "learning_rate": 3.720657276995305e-05, | |
| "loss": 0.0137, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 5.04, | |
| "learning_rate": 3.701095461658842e-05, | |
| "loss": 0.0161, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 5.05, | |
| "learning_rate": 3.681533646322379e-05, | |
| "loss": 0.0217, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 5.07, | |
| "learning_rate": 3.661971830985916e-05, | |
| "loss": 0.0184, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 5.09, | |
| "learning_rate": 3.642410015649452e-05, | |
| "loss": 0.0177, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 5.1, | |
| "learning_rate": 3.622848200312989e-05, | |
| "loss": 0.051, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 5.12, | |
| "learning_rate": 3.6032863849765256e-05, | |
| "loss": 0.012, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 5.13, | |
| "learning_rate": 3.5837245696400626e-05, | |
| "loss": 0.0156, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 5.15, | |
| "learning_rate": 3.5641627543035995e-05, | |
| "loss": 0.0129, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 5.16, | |
| "learning_rate": 3.5446009389671365e-05, | |
| "loss": 0.013, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 5.16, | |
| "eval_accuracy": 0.9228989976869699, | |
| "eval_loss": 0.3218042850494385, | |
| "eval_runtime": 42.5041, | |
| "eval_samples_per_second": 61.029, | |
| "eval_steps_per_second": 7.646, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 5.18, | |
| "learning_rate": 3.525039123630673e-05, | |
| "loss": 0.0106, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 5.2, | |
| "learning_rate": 3.50547730829421e-05, | |
| "loss": 0.0132, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 5.21, | |
| "learning_rate": 3.485915492957747e-05, | |
| "loss": 0.0212, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 5.23, | |
| "learning_rate": 3.466353677621284e-05, | |
| "loss": 0.0124, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 5.24, | |
| "learning_rate": 3.44679186228482e-05, | |
| "loss": 0.0135, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 5.26, | |
| "learning_rate": 3.427230046948357e-05, | |
| "loss": 0.0112, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 5.27, | |
| "learning_rate": 3.407668231611893e-05, | |
| "loss": 0.0175, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 5.29, | |
| "learning_rate": 3.38810641627543e-05, | |
| "loss": 0.0234, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 5.31, | |
| "learning_rate": 3.368544600938967e-05, | |
| "loss": 0.0133, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 5.32, | |
| "learning_rate": 3.348982785602504e-05, | |
| "loss": 0.0116, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 5.32, | |
| "eval_accuracy": 0.9290670778720124, | |
| "eval_loss": 0.308786541223526, | |
| "eval_runtime": 43.1692, | |
| "eval_samples_per_second": 60.089, | |
| "eval_steps_per_second": 7.529, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 5.34, | |
| "learning_rate": 3.3294209702660405e-05, | |
| "loss": 0.011, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 5.35, | |
| "learning_rate": 3.3098591549295775e-05, | |
| "loss": 0.0119, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 5.37, | |
| "learning_rate": 3.2902973395931144e-05, | |
| "loss": 0.0175, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 5.38, | |
| "learning_rate": 3.2707355242566514e-05, | |
| "loss": 0.0116, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 5.4, | |
| "learning_rate": 3.251173708920188e-05, | |
| "loss": 0.0113, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 5.41, | |
| "learning_rate": 3.2316118935837247e-05, | |
| "loss": 0.0152, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 5.43, | |
| "learning_rate": 3.212050078247261e-05, | |
| "loss": 0.012, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 5.45, | |
| "learning_rate": 3.1924882629107986e-05, | |
| "loss": 0.0096, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 5.46, | |
| "learning_rate": 3.1729264475743356e-05, | |
| "loss": 0.0109, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 5.48, | |
| "learning_rate": 3.153364632237872e-05, | |
| "loss": 0.0119, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 5.48, | |
| "eval_accuracy": 0.9279105628373169, | |
| "eval_loss": 0.3046722412109375, | |
| "eval_runtime": 42.5452, | |
| "eval_samples_per_second": 60.97, | |
| "eval_steps_per_second": 7.639, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 5.49, | |
| "learning_rate": 3.133802816901409e-05, | |
| "loss": 0.0091, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 5.51, | |
| "learning_rate": 3.114241001564945e-05, | |
| "loss": 0.0089, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 5.52, | |
| "learning_rate": 3.094679186228482e-05, | |
| "loss": 0.0113, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 5.54, | |
| "learning_rate": 3.075117370892019e-05, | |
| "loss": 0.0162, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 5.56, | |
| "learning_rate": 3.055555555555556e-05, | |
| "loss": 0.0089, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 5.57, | |
| "learning_rate": 3.0359937402190923e-05, | |
| "loss": 0.0105, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 5.59, | |
| "learning_rate": 3.0164319248826296e-05, | |
| "loss": 0.037, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 5.6, | |
| "learning_rate": 2.996870109546166e-05, | |
| "loss": 0.0097, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 5.62, | |
| "learning_rate": 2.977308294209703e-05, | |
| "loss": 0.0087, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 5.63, | |
| "learning_rate": 2.9577464788732395e-05, | |
| "loss": 0.0098, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 5.63, | |
| "eval_accuracy": 0.9282960678488821, | |
| "eval_loss": 0.30633866786956787, | |
| "eval_runtime": 42.8022, | |
| "eval_samples_per_second": 60.604, | |
| "eval_steps_per_second": 7.593, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 5.65, | |
| "learning_rate": 2.9381846635367765e-05, | |
| "loss": 0.009, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 5.67, | |
| "learning_rate": 2.918622848200313e-05, | |
| "loss": 0.0263, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 5.68, | |
| "learning_rate": 2.89906103286385e-05, | |
| "loss": 0.0112, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 5.7, | |
| "learning_rate": 2.8794992175273867e-05, | |
| "loss": 0.0104, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 5.71, | |
| "learning_rate": 2.8599374021909237e-05, | |
| "loss": 0.0082, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 5.73, | |
| "learning_rate": 2.84037558685446e-05, | |
| "loss": 0.0091, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 5.74, | |
| "learning_rate": 2.8208137715179973e-05, | |
| "loss": 0.0099, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 5.76, | |
| "learning_rate": 2.8012519561815336e-05, | |
| "loss": 0.0355, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 5.77, | |
| "learning_rate": 2.7816901408450706e-05, | |
| "loss": 0.0088, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 5.79, | |
| "learning_rate": 2.7621283255086072e-05, | |
| "loss": 0.0086, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 5.79, | |
| "eval_accuracy": 0.9267540478026214, | |
| "eval_loss": 0.30737537145614624, | |
| "eval_runtime": 42.6258, | |
| "eval_samples_per_second": 60.855, | |
| "eval_steps_per_second": 7.624, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 5.81, | |
| "learning_rate": 2.7425665101721442e-05, | |
| "loss": 0.0097, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 5.82, | |
| "learning_rate": 2.7230046948356808e-05, | |
| "loss": 0.0092, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 5.84, | |
| "learning_rate": 2.7034428794992178e-05, | |
| "loss": 0.0085, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 5.85, | |
| "learning_rate": 2.6838810641627544e-05, | |
| "loss": 0.0089, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 5.87, | |
| "learning_rate": 2.6643192488262914e-05, | |
| "loss": 0.0341, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 5.88, | |
| "learning_rate": 2.6447574334898277e-05, | |
| "loss": 0.0357, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 5.9, | |
| "learning_rate": 2.625195618153365e-05, | |
| "loss": 0.0088, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 5.92, | |
| "learning_rate": 2.6056338028169013e-05, | |
| "loss": 0.0101, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 5.93, | |
| "learning_rate": 2.5860719874804386e-05, | |
| "loss": 0.0124, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 5.95, | |
| "learning_rate": 2.566510172143975e-05, | |
| "loss": 0.0081, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 5.95, | |
| "eval_accuracy": 0.9236700077101002, | |
| "eval_loss": 0.32199642062187195, | |
| "eval_runtime": 43.3628, | |
| "eval_samples_per_second": 59.821, | |
| "eval_steps_per_second": 7.495, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 5.96, | |
| "learning_rate": 2.546948356807512e-05, | |
| "loss": 0.0096, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 5.98, | |
| "learning_rate": 2.5273865414710485e-05, | |
| "loss": 0.0217, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 5.99, | |
| "learning_rate": 2.5078247261345855e-05, | |
| "loss": 0.0112, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 6.01, | |
| "learning_rate": 2.4882629107981224e-05, | |
| "loss": 0.0115, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 6.03, | |
| "learning_rate": 2.468701095461659e-05, | |
| "loss": 0.0081, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 6.04, | |
| "learning_rate": 2.4491392801251957e-05, | |
| "loss": 0.0076, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 6.06, | |
| "learning_rate": 2.4295774647887327e-05, | |
| "loss": 0.0078, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 6.07, | |
| "learning_rate": 2.4100156494522693e-05, | |
| "loss": 0.0082, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 6.09, | |
| "learning_rate": 2.3904538341158063e-05, | |
| "loss": 0.0079, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 6.1, | |
| "learning_rate": 2.370892018779343e-05, | |
| "loss": 0.0078, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 6.1, | |
| "eval_accuracy": 0.9267540478026214, | |
| "eval_loss": 0.30635374784469604, | |
| "eval_runtime": 43.3159, | |
| "eval_samples_per_second": 59.886, | |
| "eval_steps_per_second": 7.503, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 6.12, | |
| "learning_rate": 2.3513302034428795e-05, | |
| "loss": 0.008, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 6.13, | |
| "learning_rate": 2.3317683881064165e-05, | |
| "loss": 0.0081, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 6.15, | |
| "learning_rate": 2.312206572769953e-05, | |
| "loss": 0.0074, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 6.17, | |
| "learning_rate": 2.29264475743349e-05, | |
| "loss": 0.0075, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 6.18, | |
| "learning_rate": 2.2730829420970267e-05, | |
| "loss": 0.0082, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 6.2, | |
| "learning_rate": 2.2535211267605634e-05, | |
| "loss": 0.0079, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 6.21, | |
| "learning_rate": 2.2339593114241003e-05, | |
| "loss": 0.0075, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 6.23, | |
| "learning_rate": 2.214397496087637e-05, | |
| "loss": 0.0074, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 6.24, | |
| "learning_rate": 2.194835680751174e-05, | |
| "loss": 0.0067, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 6.26, | |
| "learning_rate": 2.1752738654147106e-05, | |
| "loss": 0.0074, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 6.26, | |
| "eval_accuracy": 0.9279105628373169, | |
| "eval_loss": 0.30622774362564087, | |
| "eval_runtime": 42.6237, | |
| "eval_samples_per_second": 60.858, | |
| "eval_steps_per_second": 7.625, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 6.28, | |
| "learning_rate": 2.1557120500782476e-05, | |
| "loss": 0.0077, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 6.29, | |
| "learning_rate": 2.1361502347417842e-05, | |
| "loss": 0.0074, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 6.31, | |
| "learning_rate": 2.1165884194053208e-05, | |
| "loss": 0.0076, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 6.32, | |
| "learning_rate": 2.0970266040688578e-05, | |
| "loss": 0.0077, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 6.34, | |
| "learning_rate": 2.0774647887323944e-05, | |
| "loss": 0.0075, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 6.35, | |
| "learning_rate": 2.0579029733959314e-05, | |
| "loss": 0.0066, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 6.37, | |
| "learning_rate": 2.038341158059468e-05, | |
| "loss": 0.0077, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 6.38, | |
| "learning_rate": 2.0187793427230047e-05, | |
| "loss": 0.0076, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 6.4, | |
| "learning_rate": 1.9992175273865416e-05, | |
| "loss": 0.0073, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 6.42, | |
| "learning_rate": 1.9796557120500783e-05, | |
| "loss": 0.0068, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 6.42, | |
| "eval_accuracy": 0.9290670778720124, | |
| "eval_loss": 0.3051210343837738, | |
| "eval_runtime": 43.4448, | |
| "eval_samples_per_second": 59.708, | |
| "eval_steps_per_second": 7.481, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 6.43, | |
| "learning_rate": 1.9600938967136152e-05, | |
| "loss": 0.0067, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 6.45, | |
| "learning_rate": 1.940532081377152e-05, | |
| "loss": 0.0382, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 6.46, | |
| "learning_rate": 1.9209702660406885e-05, | |
| "loss": 0.0071, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 6.48, | |
| "learning_rate": 1.9014084507042255e-05, | |
| "loss": 0.0155, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 6.49, | |
| "learning_rate": 1.881846635367762e-05, | |
| "loss": 0.0075, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 6.51, | |
| "learning_rate": 1.862284820031299e-05, | |
| "loss": 0.0073, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 6.53, | |
| "learning_rate": 1.8427230046948357e-05, | |
| "loss": 0.0076, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 6.54, | |
| "learning_rate": 1.8231611893583723e-05, | |
| "loss": 0.0064, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 6.56, | |
| "learning_rate": 1.8035993740219093e-05, | |
| "loss": 0.0097, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 6.57, | |
| "learning_rate": 1.784037558685446e-05, | |
| "loss": 0.006, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 6.57, | |
| "eval_accuracy": 0.9298380878951427, | |
| "eval_loss": 0.3000366985797882, | |
| "eval_runtime": 42.6162, | |
| "eval_samples_per_second": 60.869, | |
| "eval_steps_per_second": 7.626, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 6.59, | |
| "learning_rate": 1.764475743348983e-05, | |
| "loss": 0.0077, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 6.6, | |
| "learning_rate": 1.7449139280125195e-05, | |
| "loss": 0.0076, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 6.62, | |
| "learning_rate": 1.7253521126760565e-05, | |
| "loss": 0.0069, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 6.64, | |
| "learning_rate": 1.705790297339593e-05, | |
| "loss": 0.0067, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 6.65, | |
| "learning_rate": 1.6862284820031298e-05, | |
| "loss": 0.0075, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 6.67, | |
| "learning_rate": 1.6666666666666667e-05, | |
| "loss": 0.0068, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 6.68, | |
| "learning_rate": 1.6471048513302034e-05, | |
| "loss": 0.0062, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 6.7, | |
| "learning_rate": 1.6275430359937403e-05, | |
| "loss": 0.0066, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 6.71, | |
| "learning_rate": 1.607981220657277e-05, | |
| "loss": 0.0072, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 6.73, | |
| "learning_rate": 1.5884194053208136e-05, | |
| "loss": 0.0075, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 6.73, | |
| "eval_accuracy": 0.930994602929838, | |
| "eval_loss": 0.30098453164100647, | |
| "eval_runtime": 42.908, | |
| "eval_samples_per_second": 60.455, | |
| "eval_steps_per_second": 7.574, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 6.74, | |
| "learning_rate": 1.5688575899843506e-05, | |
| "loss": 0.0066, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 6.76, | |
| "learning_rate": 1.5492957746478872e-05, | |
| "loss": 0.0061, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 6.78, | |
| "learning_rate": 1.5297339593114242e-05, | |
| "loss": 0.0073, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 6.79, | |
| "learning_rate": 1.5101721439749608e-05, | |
| "loss": 0.013, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 6.81, | |
| "learning_rate": 1.4906103286384976e-05, | |
| "loss": 0.0071, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 6.82, | |
| "learning_rate": 1.4710485133020346e-05, | |
| "loss": 0.0063, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 6.84, | |
| "learning_rate": 1.4514866979655714e-05, | |
| "loss": 0.0071, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 6.85, | |
| "learning_rate": 1.4319248826291082e-05, | |
| "loss": 0.0067, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 6.87, | |
| "learning_rate": 1.412363067292645e-05, | |
| "loss": 0.0475, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 6.89, | |
| "learning_rate": 1.3928012519561818e-05, | |
| "loss": 0.0057, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 6.89, | |
| "eval_accuracy": 0.9298380878951427, | |
| "eval_loss": 0.3036852180957794, | |
| "eval_runtime": 42.308, | |
| "eval_samples_per_second": 61.312, | |
| "eval_steps_per_second": 7.682, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 6.9, | |
| "learning_rate": 1.3732394366197184e-05, | |
| "loss": 0.0058, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 6.92, | |
| "learning_rate": 1.3536776212832552e-05, | |
| "loss": 0.0068, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 6.93, | |
| "learning_rate": 1.334115805946792e-05, | |
| "loss": 0.0061, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 6.95, | |
| "learning_rate": 1.3145539906103288e-05, | |
| "loss": 0.0245, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 6.96, | |
| "learning_rate": 1.2949921752738656e-05, | |
| "loss": 0.0304, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 6.98, | |
| "learning_rate": 1.2754303599374023e-05, | |
| "loss": 0.0068, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "learning_rate": 1.255868544600939e-05, | |
| "loss": 0.0066, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 7.01, | |
| "learning_rate": 1.2363067292644757e-05, | |
| "loss": 0.0071, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 7.03, | |
| "learning_rate": 1.2167449139280125e-05, | |
| "loss": 0.0053, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 7.04, | |
| "learning_rate": 1.1971830985915493e-05, | |
| "loss": 0.0058, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 7.04, | |
| "eval_accuracy": 0.9279105628373169, | |
| "eval_loss": 0.30713140964508057, | |
| "eval_runtime": 42.5583, | |
| "eval_samples_per_second": 60.952, | |
| "eval_steps_per_second": 7.637, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 7.06, | |
| "learning_rate": 1.1776212832550863e-05, | |
| "loss": 0.0061, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 7.07, | |
| "learning_rate": 1.1580594679186229e-05, | |
| "loss": 0.0064, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 7.09, | |
| "learning_rate": 1.1384976525821597e-05, | |
| "loss": 0.0069, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 7.1, | |
| "learning_rate": 1.1189358372456965e-05, | |
| "loss": 0.0057, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 7.12, | |
| "learning_rate": 1.0993740219092333e-05, | |
| "loss": 0.0061, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 7.14, | |
| "learning_rate": 1.0798122065727701e-05, | |
| "loss": 0.0062, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 7.15, | |
| "learning_rate": 1.0602503912363067e-05, | |
| "loss": 0.0062, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 7.17, | |
| "learning_rate": 1.0406885758998435e-05, | |
| "loss": 0.0058, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 7.18, | |
| "learning_rate": 1.0211267605633803e-05, | |
| "loss": 0.0073, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 7.2, | |
| "learning_rate": 1.0015649452269172e-05, | |
| "loss": 0.0075, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 7.2, | |
| "eval_accuracy": 0.9282960678488821, | |
| "eval_loss": 0.307522177696228, | |
| "eval_runtime": 43.0156, | |
| "eval_samples_per_second": 60.304, | |
| "eval_steps_per_second": 7.555, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 7.21, | |
| "learning_rate": 9.82003129890454e-06, | |
| "loss": 0.0059, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 7.23, | |
| "learning_rate": 9.624413145539906e-06, | |
| "loss": 0.0064, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 7.25, | |
| "learning_rate": 9.428794992175274e-06, | |
| "loss": 0.0063, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 7.26, | |
| "learning_rate": 9.233176838810642e-06, | |
| "loss": 0.0052, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 7.28, | |
| "learning_rate": 9.03755868544601e-06, | |
| "loss": 0.0053, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 7.29, | |
| "learning_rate": 8.841940532081378e-06, | |
| "loss": 0.0059, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 7.31, | |
| "learning_rate": 8.646322378716746e-06, | |
| "loss": 0.0059, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 7.32, | |
| "learning_rate": 8.450704225352112e-06, | |
| "loss": 0.0058, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 7.34, | |
| "learning_rate": 8.25508607198748e-06, | |
| "loss": 0.0054, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 7.36, | |
| "learning_rate": 8.059467918622848e-06, | |
| "loss": 0.0066, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 7.36, | |
| "eval_accuracy": 0.9294525828835775, | |
| "eval_loss": 0.30765867233276367, | |
| "eval_runtime": 43.157, | |
| "eval_samples_per_second": 60.106, | |
| "eval_steps_per_second": 7.531, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 7.37, | |
| "learning_rate": 7.863849765258216e-06, | |
| "loss": 0.0054, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 7.39, | |
| "learning_rate": 7.668231611893584e-06, | |
| "loss": 0.0064, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 7.4, | |
| "learning_rate": 7.4726134585289515e-06, | |
| "loss": 0.0059, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 7.42, | |
| "learning_rate": 7.2769953051643195e-06, | |
| "loss": 0.0057, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 7.43, | |
| "learning_rate": 7.081377151799687e-06, | |
| "loss": 0.0066, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 7.45, | |
| "learning_rate": 6.885758998435055e-06, | |
| "loss": 0.0113, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 7.46, | |
| "learning_rate": 6.690140845070423e-06, | |
| "loss": 0.0059, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 7.48, | |
| "learning_rate": 6.49452269170579e-06, | |
| "loss": 0.0061, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 7.5, | |
| "learning_rate": 6.298904538341158e-06, | |
| "loss": 0.0069, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 7.51, | |
| "learning_rate": 6.103286384976526e-06, | |
| "loss": 0.0056, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 7.51, | |
| "eval_accuracy": 0.9294525828835775, | |
| "eval_loss": 0.30838659405708313, | |
| "eval_runtime": 43.0465, | |
| "eval_samples_per_second": 60.26, | |
| "eval_steps_per_second": 7.55, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 7.53, | |
| "learning_rate": 5.907668231611894e-06, | |
| "loss": 0.0059, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 7.54, | |
| "learning_rate": 5.712050078247261e-06, | |
| "loss": 0.0054, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 7.56, | |
| "learning_rate": 5.516431924882629e-06, | |
| "loss": 0.0054, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 7.57, | |
| "learning_rate": 5.320813771517997e-06, | |
| "loss": 0.0059, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 7.59, | |
| "learning_rate": 5.125195618153364e-06, | |
| "loss": 0.0061, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 7.61, | |
| "learning_rate": 4.929577464788732e-06, | |
| "loss": 0.0062, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 7.62, | |
| "learning_rate": 4.7339593114241e-06, | |
| "loss": 0.0215, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 7.64, | |
| "learning_rate": 4.538341158059468e-06, | |
| "loss": 0.0057, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 7.65, | |
| "learning_rate": 4.342723004694836e-06, | |
| "loss": 0.0061, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 7.67, | |
| "learning_rate": 4.1471048513302035e-06, | |
| "loss": 0.0053, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 7.67, | |
| "eval_accuracy": 0.930994602929838, | |
| "eval_loss": 0.3063901364803314, | |
| "eval_runtime": 42.9974, | |
| "eval_samples_per_second": 60.329, | |
| "eval_steps_per_second": 7.559, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 7.68, | |
| "learning_rate": 3.9514866979655715e-06, | |
| "loss": 0.006, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 7.7, | |
| "learning_rate": 3.755868544600939e-06, | |
| "loss": 0.0053, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 7.72, | |
| "learning_rate": 3.560250391236307e-06, | |
| "loss": 0.0051, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 7.73, | |
| "learning_rate": 3.3646322378716747e-06, | |
| "loss": 0.0057, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 7.75, | |
| "learning_rate": 3.1690140845070423e-06, | |
| "loss": 0.0259, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 7.76, | |
| "learning_rate": 2.97339593114241e-06, | |
| "loss": 0.0055, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 7.78, | |
| "learning_rate": 2.777777777777778e-06, | |
| "loss": 0.0051, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 7.79, | |
| "learning_rate": 2.582159624413146e-06, | |
| "loss": 0.006, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 7.81, | |
| "learning_rate": 2.3865414710485135e-06, | |
| "loss": 0.0062, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 7.82, | |
| "learning_rate": 2.190923317683881e-06, | |
| "loss": 0.0057, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 7.82, | |
| "eval_accuracy": 0.9317656129529683, | |
| "eval_loss": 0.3068486750125885, | |
| "eval_runtime": 42.7527, | |
| "eval_samples_per_second": 60.675, | |
| "eval_steps_per_second": 7.602, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 7.84, | |
| "learning_rate": 1.995305164319249e-06, | |
| "loss": 0.0057, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 7.86, | |
| "learning_rate": 1.7996870109546167e-06, | |
| "loss": 0.0063, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 7.87, | |
| "learning_rate": 1.6040688575899843e-06, | |
| "loss": 0.0063, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 7.89, | |
| "learning_rate": 1.4084507042253521e-06, | |
| "loss": 0.0061, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 7.9, | |
| "learning_rate": 1.21283255086072e-06, | |
| "loss": 0.0051, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 7.92, | |
| "learning_rate": 1.0172143974960877e-06, | |
| "loss": 0.0061, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 7.93, | |
| "learning_rate": 8.215962441314555e-07, | |
| "loss": 0.0061, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 7.95, | |
| "learning_rate": 6.259780907668232e-07, | |
| "loss": 0.0049, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 7.97, | |
| "learning_rate": 4.303599374021909e-07, | |
| "loss": 0.0049, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 7.98, | |
| "learning_rate": 2.347417840375587e-07, | |
| "loss": 0.0055, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 7.98, | |
| "eval_accuracy": 0.9317656129529683, | |
| "eval_loss": 0.30683887004852295, | |
| "eval_runtime": 43.5191, | |
| "eval_samples_per_second": 59.606, | |
| "eval_steps_per_second": 7.468, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "learning_rate": 3.912363067292645e-08, | |
| "loss": 0.0062, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "step": 5112, | |
| "total_flos": 6.337884979995771e+18, | |
| "train_loss": 0.5634685837152139, | |
| "train_runtime": 5666.239, | |
| "train_samples_per_second": 14.415, | |
| "train_steps_per_second": 0.902 | |
| } | |
| ], | |
| "max_steps": 5112, | |
| "num_train_epochs": 8, | |
| "total_flos": 6.337884979995771e+18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |