| { | |
| "best_metric": 1.0, | |
| "best_model_checkpoint": "vit-base-patch16-224-in21k-face-recognition/checkpoint-1488", | |
| "epoch": 8.0, | |
| "global_step": 2976, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.026845637583892e-06, | |
| "loss": 2.214, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 8.053691275167785e-06, | |
| "loss": 2.1532, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 1.2080536912751678e-05, | |
| "loss": 2.0195, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 1.610738255033557e-05, | |
| "loss": 1.7918, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 2.0134228187919465e-05, | |
| "loss": 1.4794, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 2.4161073825503356e-05, | |
| "loss": 1.1516, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 2.8187919463087248e-05, | |
| "loss": 0.8457, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 3.221476510067114e-05, | |
| "loss": 0.6145, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 3.6241610738255034e-05, | |
| "loss": 0.446, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.026845637583893e-05, | |
| "loss": 0.3573, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.4295302013422824e-05, | |
| "loss": 0.289, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.832214765100671e-05, | |
| "loss": 0.2559, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 5.23489932885906e-05, | |
| "loss": 0.2186, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 5.6375838926174495e-05, | |
| "loss": 0.1936, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 6.04026845637584e-05, | |
| "loss": 0.181, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 6.442953020134228e-05, | |
| "loss": 0.1621, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 6.845637583892617e-05, | |
| "loss": 0.1518, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 7.248322147651007e-05, | |
| "loss": 0.1361, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 7.651006711409396e-05, | |
| "loss": 0.1268, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 8.053691275167786e-05, | |
| "loss": 0.1159, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 8.456375838926175e-05, | |
| "loss": 0.1088, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 8.859060402684565e-05, | |
| "loss": 0.1015, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 9.261744966442954e-05, | |
| "loss": 0.0947, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 9.664429530201342e-05, | |
| "loss": 0.087, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.00010067114093959731, | |
| "loss": 0.0823, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 0.0001046979865771812, | |
| "loss": 0.0772, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.0001087248322147651, | |
| "loss": 0.0694, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.00011275167785234899, | |
| "loss": 0.0642, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.00011677852348993289, | |
| "loss": 0.0592, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.00011991038088125467, | |
| "loss": 0.0556, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 0.000119462285287528, | |
| "loss": 0.0516, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.00011901418969380135, | |
| "loss": 0.0679, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.00011856609410007469, | |
| "loss": 0.0582, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.00011811799850634802, | |
| "loss": 0.0487, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 0.00011766990291262137, | |
| "loss": 0.0462, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 0.0001172218073188947, | |
| "loss": 0.0411, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 0.00011677371172516803, | |
| "loss": 0.0368, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.999957997311828, | |
| "eval_loss": 0.034596964716911316, | |
| "eval_runtime": 326.4848, | |
| "eval_samples_per_second": 72.922, | |
| "eval_steps_per_second": 1.139, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 0.00011632561613144138, | |
| "loss": 0.0335, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 0.00011587752053771471, | |
| "loss": 0.0316, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 0.00011542942494398806, | |
| "loss": 0.0303, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 0.00011498132935026138, | |
| "loss": 0.0285, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 0.00011453323375653473, | |
| "loss": 0.0273, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 0.00011408513816280807, | |
| "loss": 0.0258, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 0.00011363704256908141, | |
| "loss": 0.0248, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 0.00011318894697535474, | |
| "loss": 0.0257, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 0.00011274085138162807, | |
| "loss": 0.0251, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 0.00011229275578790142, | |
| "loss": 0.0225, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 0.00011184466019417477, | |
| "loss": 0.0212, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 0.00011139656460044809, | |
| "loss": 0.0214, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 0.00011094846900672144, | |
| "loss": 0.0197, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 0.00011050037341299477, | |
| "loss": 0.0203, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 0.00011005227781926812, | |
| "loss": 0.0183, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 0.00010960418222554145, | |
| "loss": 0.0202, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 0.00010915608663181478, | |
| "loss": 0.0258, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 0.00010870799103808813, | |
| "loss": 0.0198, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 0.00010825989544436148, | |
| "loss": 0.0194, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 0.00010781179985063481, | |
| "loss": 0.0171, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 0.00010736370425690814, | |
| "loss": 0.0152, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 0.00010691560866318148, | |
| "loss": 0.0162, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 0.00010646751306945482, | |
| "loss": 0.0143, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 0.00010601941747572817, | |
| "loss": 0.014, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 0.00010557132188200149, | |
| "loss": 0.0132, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 0.00010512322628827484, | |
| "loss": 0.0127, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 0.00010467513069454817, | |
| "loss": 0.0123, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 0.00010422703510082152, | |
| "loss": 0.0124, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 0.00010377893950709485, | |
| "loss": 0.0118, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 0.00010333084391336818, | |
| "loss": 0.0128, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 0.00010288274831964153, | |
| "loss": 0.0139, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 0.00010243465272591486, | |
| "loss": 0.0158, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 0.0001019865571321882, | |
| "loss": 0.0112, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 0.00010153846153846155, | |
| "loss": 0.0104, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 0.00010109036594473488, | |
| "loss": 0.0099, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 0.00010064227035100823, | |
| "loss": 0.0095, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 0.00010019417475728155, | |
| "loss": 0.0094, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.999957997311828, | |
| "eval_loss": 0.009242160245776176, | |
| "eval_runtime": 315.5591, | |
| "eval_samples_per_second": 75.447, | |
| "eval_steps_per_second": 1.179, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 9.974607916355489e-05, | |
| "loss": 0.009, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 9.929798356982824e-05, | |
| "loss": 0.0089, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 9.884988797610157e-05, | |
| "loss": 0.0086, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 9.84017923823749e-05, | |
| "loss": 0.0086, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 9.795369678864824e-05, | |
| "loss": 0.0082, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 9.750560119492159e-05, | |
| "loss": 0.0081, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 9.705750560119493e-05, | |
| "loss": 0.0081, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 9.660941000746825e-05, | |
| "loss": 0.0078, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 9.61613144137416e-05, | |
| "loss": 0.0077, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 9.571321882001495e-05, | |
| "loss": 0.0074, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 9.526512322628828e-05, | |
| "loss": 0.0073, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 9.481702763256161e-05, | |
| "loss": 0.0096, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 9.436893203883495e-05, | |
| "loss": 0.0078, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 9.39208364451083e-05, | |
| "loss": 0.0073, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 9.347274085138164e-05, | |
| "loss": 0.0069, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 9.302464525765496e-05, | |
| "loss": 0.0067, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 9.257654966392831e-05, | |
| "loss": 0.0066, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 9.212845407020164e-05, | |
| "loss": 0.0064, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 9.168035847647499e-05, | |
| "loss": 0.0063, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 9.123226288274832e-05, | |
| "loss": 0.0061, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 9.078416728902166e-05, | |
| "loss": 0.006, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 9.0336071695295e-05, | |
| "loss": 0.0059, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 8.988797610156834e-05, | |
| "loss": 0.0058, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 8.943988050784167e-05, | |
| "loss": 0.0057, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 8.899178491411502e-05, | |
| "loss": 0.0055, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 8.854368932038835e-05, | |
| "loss": 0.0056, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 8.80955937266617e-05, | |
| "loss": 0.0055, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 8.764749813293502e-05, | |
| "loss": 0.0053, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 8.719940253920836e-05, | |
| "loss": 0.0052, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 8.675130694548171e-05, | |
| "loss": 0.0052, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 8.630321135175504e-05, | |
| "loss": 0.0051, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 8.585511575802838e-05, | |
| "loss": 0.0051, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 8.540702016430171e-05, | |
| "loss": 0.005, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 8.495892457057506e-05, | |
| "loss": 0.0052, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 8.45108289768484e-05, | |
| "loss": 0.0049, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 8.406273338312172e-05, | |
| "loss": 0.0048, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 8.361463778939507e-05, | |
| "loss": 0.0046, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.999957997311828, | |
| "eval_loss": 0.004723448771983385, | |
| "eval_runtime": 316.511, | |
| "eval_samples_per_second": 75.22, | |
| "eval_steps_per_second": 1.175, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 3.01, | |
| "learning_rate": 8.316654219566842e-05, | |
| "loss": 0.0046, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "learning_rate": 8.271844660194175e-05, | |
| "loss": 0.0046, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 3.06, | |
| "learning_rate": 8.22703510082151e-05, | |
| "loss": 0.0045, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 3.09, | |
| "learning_rate": 8.182225541448842e-05, | |
| "loss": 0.0044, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "learning_rate": 8.137415982076177e-05, | |
| "loss": 0.0044, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "learning_rate": 8.092606422703511e-05, | |
| "loss": 0.0043, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 3.17, | |
| "learning_rate": 8.047796863330845e-05, | |
| "loss": 0.0042, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "learning_rate": 8.002987303958178e-05, | |
| "loss": 0.0041, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 3.23, | |
| "learning_rate": 7.958177744585511e-05, | |
| "loss": 0.0041, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 3.25, | |
| "learning_rate": 7.913368185212846e-05, | |
| "loss": 0.004, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 3.28, | |
| "learning_rate": 7.86855862584018e-05, | |
| "loss": 0.004, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 3.31, | |
| "learning_rate": 7.823749066467513e-05, | |
| "loss": 0.004, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 3.33, | |
| "learning_rate": 7.778939507094847e-05, | |
| "loss": 0.0039, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "learning_rate": 7.73412994772218e-05, | |
| "loss": 0.0039, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 3.39, | |
| "learning_rate": 7.689320388349515e-05, | |
| "loss": 0.0038, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 3.41, | |
| "learning_rate": 7.644510828976849e-05, | |
| "loss": 0.0037, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 3.44, | |
| "learning_rate": 7.599701269604182e-05, | |
| "loss": 0.0036, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 3.47, | |
| "learning_rate": 7.554891710231517e-05, | |
| "loss": 0.0036, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 3.49, | |
| "learning_rate": 7.51008215085885e-05, | |
| "loss": 0.0036, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 3.52, | |
| "learning_rate": 7.465272591486183e-05, | |
| "loss": 0.0036, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 3.55, | |
| "learning_rate": 7.420463032113518e-05, | |
| "loss": 0.0035, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 3.58, | |
| "learning_rate": 7.375653472740851e-05, | |
| "loss": 0.0034, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "learning_rate": 7.330843913368186e-05, | |
| "loss": 0.0034, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 3.63, | |
| "learning_rate": 7.286034353995518e-05, | |
| "loss": 0.0036, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 3.66, | |
| "learning_rate": 7.241224794622853e-05, | |
| "loss": 0.0035, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 3.68, | |
| "learning_rate": 7.196415235250188e-05, | |
| "loss": 0.0034, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 3.71, | |
| "learning_rate": 7.151605675877521e-05, | |
| "loss": 0.0033, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 3.74, | |
| "learning_rate": 7.106796116504854e-05, | |
| "loss": 0.0033, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 3.76, | |
| "learning_rate": 7.061986557132189e-05, | |
| "loss": 0.0032, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 3.79, | |
| "learning_rate": 7.017176997759522e-05, | |
| "loss": 0.0032, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 3.82, | |
| "learning_rate": 6.972367438386857e-05, | |
| "loss": 0.0031, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 3.84, | |
| "learning_rate": 6.927557879014189e-05, | |
| "loss": 0.0031, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 3.87, | |
| "learning_rate": 6.882748319641524e-05, | |
| "loss": 0.0031, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 3.9, | |
| "learning_rate": 6.837938760268858e-05, | |
| "loss": 0.003, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 3.92, | |
| "learning_rate": 6.793129200896192e-05, | |
| "loss": 0.0031, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 3.95, | |
| "learning_rate": 6.748319641523525e-05, | |
| "loss": 0.003, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 3.98, | |
| "learning_rate": 6.703510082150858e-05, | |
| "loss": 0.0029, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.002898953389376402, | |
| "eval_runtime": 315.5752, | |
| "eval_samples_per_second": 75.443, | |
| "eval_steps_per_second": 1.179, | |
| "step": 1488 | |
| }, | |
| { | |
| "epoch": 4.01, | |
| "learning_rate": 6.658700522778193e-05, | |
| "loss": 0.003, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 4.03, | |
| "learning_rate": 6.613890963405528e-05, | |
| "loss": 0.0029, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 4.06, | |
| "learning_rate": 6.56908140403286e-05, | |
| "loss": 0.0029, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 4.09, | |
| "learning_rate": 6.524271844660194e-05, | |
| "loss": 0.0028, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 4.11, | |
| "learning_rate": 6.479462285287528e-05, | |
| "loss": 0.0028, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 4.14, | |
| "learning_rate": 6.434652725914862e-05, | |
| "loss": 0.0028, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 4.17, | |
| "learning_rate": 6.389843166542196e-05, | |
| "loss": 0.0028, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 4.19, | |
| "learning_rate": 6.345033607169529e-05, | |
| "loss": 0.0027, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 4.22, | |
| "learning_rate": 6.300224047796864e-05, | |
| "loss": 0.0027, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 4.25, | |
| "learning_rate": 6.255414488424197e-05, | |
| "loss": 0.0027, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 4.27, | |
| "learning_rate": 6.21060492905153e-05, | |
| "loss": 0.0027, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 4.3, | |
| "learning_rate": 6.165795369678865e-05, | |
| "loss": 0.0026, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 4.33, | |
| "learning_rate": 6.120985810306199e-05, | |
| "loss": 0.0026, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 4.35, | |
| "learning_rate": 6.076176250933533e-05, | |
| "loss": 0.0026, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 4.38, | |
| "learning_rate": 6.031366691560866e-05, | |
| "loss": 0.0026, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 4.41, | |
| "learning_rate": 5.9865571321882e-05, | |
| "loss": 0.0025, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 4.44, | |
| "learning_rate": 5.941747572815534e-05, | |
| "loss": 0.0025, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 4.46, | |
| "learning_rate": 5.896938013442868e-05, | |
| "loss": 0.0025, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 4.49, | |
| "learning_rate": 5.852128454070202e-05, | |
| "loss": 0.0025, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 4.52, | |
| "learning_rate": 5.807318894697535e-05, | |
| "loss": 0.0024, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 4.54, | |
| "learning_rate": 5.762509335324869e-05, | |
| "loss": 0.0024, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 4.57, | |
| "learning_rate": 5.717699775952203e-05, | |
| "loss": 0.0032, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 4.6, | |
| "learning_rate": 5.672890216579537e-05, | |
| "loss": 0.003, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 4.62, | |
| "learning_rate": 5.6280806572068713e-05, | |
| "loss": 0.0028, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 4.65, | |
| "learning_rate": 5.583271097834205e-05, | |
| "loss": 0.0055, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 4.68, | |
| "learning_rate": 5.538461538461539e-05, | |
| "loss": 0.0036, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 4.7, | |
| "learning_rate": 5.493651979088873e-05, | |
| "loss": 0.0029, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 4.73, | |
| "learning_rate": 5.448842419716207e-05, | |
| "loss": 0.0059, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 4.76, | |
| "learning_rate": 5.40403286034354e-05, | |
| "loss": 0.0042, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 4.78, | |
| "learning_rate": 5.359223300970874e-05, | |
| "loss": 0.0031, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 4.81, | |
| "learning_rate": 5.3144137415982074e-05, | |
| "loss": 0.0026, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 4.84, | |
| "learning_rate": 5.269604182225542e-05, | |
| "loss": 0.0025, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 4.87, | |
| "learning_rate": 5.2247946228528755e-05, | |
| "loss": 0.0025, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 4.89, | |
| "learning_rate": 5.1799850634802095e-05, | |
| "loss": 0.0023, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 4.92, | |
| "learning_rate": 5.135175504107543e-05, | |
| "loss": 0.0024, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 4.95, | |
| "learning_rate": 5.090365944734877e-05, | |
| "loss": 0.0023, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 4.97, | |
| "learning_rate": 5.045556385362211e-05, | |
| "loss": 0.0022, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 5.000746825989545e-05, | |
| "loss": 0.0022, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.9999159946236559, | |
| "eval_loss": 0.0023123060818761587, | |
| "eval_runtime": 314.9106, | |
| "eval_samples_per_second": 75.602, | |
| "eval_steps_per_second": 1.181, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 5.03, | |
| "learning_rate": 4.955937266616878e-05, | |
| "loss": 0.0023, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 5.05, | |
| "learning_rate": 4.911127707244212e-05, | |
| "loss": 0.0021, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 5.08, | |
| "learning_rate": 4.866318147871546e-05, | |
| "loss": 0.0044, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 5.11, | |
| "learning_rate": 4.82150858849888e-05, | |
| "loss": 0.0021, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 5.13, | |
| "learning_rate": 4.7766990291262136e-05, | |
| "loss": 0.0022, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 5.16, | |
| "learning_rate": 4.7318894697535476e-05, | |
| "loss": 0.0021, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 5.19, | |
| "learning_rate": 4.687079910380881e-05, | |
| "loss": 0.0021, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 5.22, | |
| "learning_rate": 4.642270351008216e-05, | |
| "loss": 0.002, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 5.24, | |
| "learning_rate": 4.597460791635549e-05, | |
| "loss": 0.0023, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 5.27, | |
| "learning_rate": 4.552651232262883e-05, | |
| "loss": 0.002, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 5.3, | |
| "learning_rate": 4.5078416728902164e-05, | |
| "loss": 0.002, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 5.32, | |
| "learning_rate": 4.4630321135175504e-05, | |
| "loss": 0.0019, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 5.35, | |
| "learning_rate": 4.4182225541448844e-05, | |
| "loss": 0.0019, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 5.38, | |
| "learning_rate": 4.3734129947722184e-05, | |
| "loss": 0.0019, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 5.4, | |
| "learning_rate": 4.328603435399552e-05, | |
| "loss": 0.0019, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 5.43, | |
| "learning_rate": 4.283793876026886e-05, | |
| "loss": 0.0019, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 5.46, | |
| "learning_rate": 4.23898431665422e-05, | |
| "loss": 0.0019, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 5.48, | |
| "learning_rate": 4.194174757281554e-05, | |
| "loss": 0.0019, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 5.51, | |
| "learning_rate": 4.149365197908887e-05, | |
| "loss": 0.0019, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 5.54, | |
| "learning_rate": 4.104555638536221e-05, | |
| "loss": 0.0019, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 5.56, | |
| "learning_rate": 4.0597460791635545e-05, | |
| "loss": 0.0019, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 5.59, | |
| "learning_rate": 4.014936519790889e-05, | |
| "loss": 0.0019, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 5.62, | |
| "learning_rate": 3.9701269604182226e-05, | |
| "loss": 0.0018, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 5.65, | |
| "learning_rate": 3.9253174010455566e-05, | |
| "loss": 0.0018, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 5.67, | |
| "learning_rate": 3.88050784167289e-05, | |
| "loss": 0.0018, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 5.7, | |
| "learning_rate": 3.835698282300224e-05, | |
| "loss": 0.0018, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 5.73, | |
| "learning_rate": 3.790888722927558e-05, | |
| "loss": 0.0018, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 5.75, | |
| "learning_rate": 3.746079163554892e-05, | |
| "loss": 0.0018, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 5.78, | |
| "learning_rate": 3.701269604182225e-05, | |
| "loss": 0.0018, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 5.81, | |
| "learning_rate": 3.656460044809559e-05, | |
| "loss": 0.0017, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 5.83, | |
| "learning_rate": 3.6116504854368933e-05, | |
| "loss": 0.0018, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 5.86, | |
| "learning_rate": 3.5668409260642274e-05, | |
| "loss": 0.0017, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 5.89, | |
| "learning_rate": 3.522031366691561e-05, | |
| "loss": 0.0017, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 5.91, | |
| "learning_rate": 3.477221807318895e-05, | |
| "loss": 0.0017, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 5.94, | |
| "learning_rate": 3.432412247946228e-05, | |
| "loss": 0.0017, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 5.97, | |
| "learning_rate": 3.387602688573563e-05, | |
| "loss": 0.0017, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 5.99, | |
| "learning_rate": 3.342793129200896e-05, | |
| "loss": 0.0017, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.0016880695475265384, | |
| "eval_runtime": 314.9291, | |
| "eval_samples_per_second": 75.598, | |
| "eval_steps_per_second": 1.181, | |
| "step": 2232 | |
| }, | |
| { | |
| "epoch": 6.02, | |
| "learning_rate": 3.29798356982823e-05, | |
| "loss": 0.0017, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 6.05, | |
| "learning_rate": 3.2531740104555635e-05, | |
| "loss": 0.0017, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 6.08, | |
| "learning_rate": 3.2083644510828975e-05, | |
| "loss": 0.0016, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 6.1, | |
| "learning_rate": 3.1635548917102315e-05, | |
| "loss": 0.0017, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 6.13, | |
| "learning_rate": 3.1187453323375655e-05, | |
| "loss": 0.0016, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 6.16, | |
| "learning_rate": 3.0739357729648995e-05, | |
| "loss": 0.0016, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 6.18, | |
| "learning_rate": 3.0291262135922332e-05, | |
| "loss": 0.0016, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 6.21, | |
| "learning_rate": 2.984316654219567e-05, | |
| "loss": 0.0016, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 6.24, | |
| "learning_rate": 2.9395070948469006e-05, | |
| "loss": 0.0016, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 6.26, | |
| "learning_rate": 2.8946975354742346e-05, | |
| "loss": 0.0016, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 6.29, | |
| "learning_rate": 2.8498879761015683e-05, | |
| "loss": 0.0016, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 6.32, | |
| "learning_rate": 2.8050784167289023e-05, | |
| "loss": 0.0016, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 6.34, | |
| "learning_rate": 2.760268857356236e-05, | |
| "loss": 0.0016, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 6.37, | |
| "learning_rate": 2.71545929798357e-05, | |
| "loss": 0.0016, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 6.4, | |
| "learning_rate": 2.6706497386109037e-05, | |
| "loss": 0.0016, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 6.42, | |
| "learning_rate": 2.6258401792382373e-05, | |
| "loss": 0.0016, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 6.45, | |
| "learning_rate": 2.5810306198655713e-05, | |
| "loss": 0.0016, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 6.48, | |
| "learning_rate": 2.536221060492905e-05, | |
| "loss": 0.0016, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 6.51, | |
| "learning_rate": 2.491411501120239e-05, | |
| "loss": 0.0016, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 6.53, | |
| "learning_rate": 2.4466019417475727e-05, | |
| "loss": 0.0015, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 6.56, | |
| "learning_rate": 2.4017923823749067e-05, | |
| "loss": 0.0015, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 6.59, | |
| "learning_rate": 2.3569828230022404e-05, | |
| "loss": 0.0015, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 6.61, | |
| "learning_rate": 2.312173263629574e-05, | |
| "loss": 0.0015, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 6.64, | |
| "learning_rate": 2.267363704256908e-05, | |
| "loss": 0.0015, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 6.67, | |
| "learning_rate": 2.2225541448842418e-05, | |
| "loss": 0.0015, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 6.69, | |
| "learning_rate": 2.1777445855115758e-05, | |
| "loss": 0.0015, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 6.72, | |
| "learning_rate": 2.1329350261389095e-05, | |
| "loss": 0.0015, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 6.75, | |
| "learning_rate": 2.0881254667662435e-05, | |
| "loss": 0.0015, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 6.77, | |
| "learning_rate": 2.0433159073935772e-05, | |
| "loss": 0.0015, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 6.8, | |
| "learning_rate": 1.998506348020911e-05, | |
| "loss": 0.0015, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 6.83, | |
| "learning_rate": 1.953696788648245e-05, | |
| "loss": 0.0015, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 6.85, | |
| "learning_rate": 1.9088872292755786e-05, | |
| "loss": 0.0015, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 6.88, | |
| "learning_rate": 1.8640776699029126e-05, | |
| "loss": 0.0015, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 6.91, | |
| "learning_rate": 1.8192681105302466e-05, | |
| "loss": 0.0015, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 6.94, | |
| "learning_rate": 1.7744585511575806e-05, | |
| "loss": 0.0015, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 6.96, | |
| "learning_rate": 1.7296489917849143e-05, | |
| "loss": 0.0015, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 6.99, | |
| "learning_rate": 1.684839432412248e-05, | |
| "loss": 0.0015, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.001458011451177299, | |
| "eval_runtime": 317.9295, | |
| "eval_samples_per_second": 74.885, | |
| "eval_steps_per_second": 1.17, | |
| "step": 2604 | |
| }, | |
| { | |
| "epoch": 7.02, | |
| "learning_rate": 1.640029873039582e-05, | |
| "loss": 0.0015, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 7.04, | |
| "learning_rate": 1.5952203136669157e-05, | |
| "loss": 0.0015, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 7.07, | |
| "learning_rate": 1.5504107542942497e-05, | |
| "loss": 0.0014, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 7.1, | |
| "learning_rate": 1.5056011949215834e-05, | |
| "loss": 0.0014, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 7.12, | |
| "learning_rate": 1.460791635548917e-05, | |
| "loss": 0.0014, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 7.15, | |
| "learning_rate": 1.4159820761762509e-05, | |
| "loss": 0.0014, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 7.18, | |
| "learning_rate": 1.3711725168035847e-05, | |
| "loss": 0.0014, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 7.2, | |
| "learning_rate": 1.3263629574309186e-05, | |
| "loss": 0.0014, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 7.23, | |
| "learning_rate": 1.2815533980582524e-05, | |
| "loss": 0.0014, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 7.26, | |
| "learning_rate": 1.2367438386855863e-05, | |
| "loss": 0.0014, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 7.28, | |
| "learning_rate": 1.19193427931292e-05, | |
| "loss": 0.0014, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 7.31, | |
| "learning_rate": 1.147124719940254e-05, | |
| "loss": 0.0014, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 7.34, | |
| "learning_rate": 1.1023151605675878e-05, | |
| "loss": 0.0014, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 7.37, | |
| "learning_rate": 1.0575056011949217e-05, | |
| "loss": 0.0014, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 7.39, | |
| "learning_rate": 1.0126960418222555e-05, | |
| "loss": 0.0014, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 7.42, | |
| "learning_rate": 9.678864824495894e-06, | |
| "loss": 0.0014, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 7.45, | |
| "learning_rate": 9.230769230769232e-06, | |
| "loss": 0.0014, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 7.47, | |
| "learning_rate": 8.782673637042569e-06, | |
| "loss": 0.0014, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 7.5, | |
| "learning_rate": 8.334578043315908e-06, | |
| "loss": 0.0014, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 7.53, | |
| "learning_rate": 7.886482449589246e-06, | |
| "loss": 0.0014, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 7.55, | |
| "learning_rate": 7.4383868558625845e-06, | |
| "loss": 0.0014, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 7.58, | |
| "learning_rate": 6.990291262135923e-06, | |
| "loss": 0.0014, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 7.61, | |
| "learning_rate": 6.542195668409261e-06, | |
| "loss": 0.0014, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 7.63, | |
| "learning_rate": 6.094100074682599e-06, | |
| "loss": 0.0014, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 7.66, | |
| "learning_rate": 5.646004480955938e-06, | |
| "loss": 0.0014, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 7.69, | |
| "learning_rate": 5.197908887229276e-06, | |
| "loss": 0.0014, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 7.72, | |
| "learning_rate": 4.749813293502614e-06, | |
| "loss": 0.0014, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 7.74, | |
| "learning_rate": 4.301717699775952e-06, | |
| "loss": 0.0014, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 7.77, | |
| "learning_rate": 3.853622106049291e-06, | |
| "loss": 0.0014, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 7.8, | |
| "learning_rate": 3.4055265123226292e-06, | |
| "loss": 0.0014, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 7.82, | |
| "learning_rate": 2.9574309185959673e-06, | |
| "loss": 0.0014, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 7.85, | |
| "learning_rate": 2.5093353248693058e-06, | |
| "loss": 0.0014, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 7.88, | |
| "learning_rate": 2.061239731142644e-06, | |
| "loss": 0.0014, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 7.9, | |
| "learning_rate": 1.613144137415982e-06, | |
| "loss": 0.0014, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 7.93, | |
| "learning_rate": 1.1650485436893204e-06, | |
| "loss": 0.0014, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 7.96, | |
| "learning_rate": 7.169529499626587e-07, | |
| "loss": 0.0014, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 7.98, | |
| "learning_rate": 2.68857356235997e-07, | |
| "loss": 0.0014, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.999957997311828, | |
| "eval_loss": 0.0014714967692270875, | |
| "eval_runtime": 317.4979, | |
| "eval_samples_per_second": 74.986, | |
| "eval_steps_per_second": 1.172, | |
| "step": 2976 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "step": 2976, | |
| "total_flos": 5.9038961296526475e+19, | |
| "train_loss": 0.05853422665912207, | |
| "train_runtime": 16155.4674, | |
| "train_samples_per_second": 47.156, | |
| "train_steps_per_second": 0.184 | |
| } | |
| ], | |
| "max_steps": 2976, | |
| "num_train_epochs": 8, | |
| "total_flos": 5.9038961296526475e+19, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |