| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 5000, |
| "global_step": 2250, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0044444444444444444, |
| "grad_norm": 152.64707946777344, |
| "learning_rate": 5.000000000000001e-07, |
| "loss": 10.2908, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.008888888888888889, |
| "grad_norm": 211.05294799804688, |
| "learning_rate": 1.4000000000000001e-06, |
| "loss": 10.0664, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.013333333333333334, |
| "grad_norm": 2954.0419921875, |
| "learning_rate": 2.4000000000000003e-06, |
| "loss": 10.0624, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.017777777777777778, |
| "grad_norm": 1075.460205078125, |
| "learning_rate": 3.4000000000000005e-06, |
| "loss": 8.5938, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.022222222222222223, |
| "grad_norm": 7704.75048828125, |
| "learning_rate": 4.4e-06, |
| "loss": 7.8263, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.02666666666666667, |
| "grad_norm": 6002.2890625, |
| "learning_rate": 5.4e-06, |
| "loss": 6.2896, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.03111111111111111, |
| "grad_norm": 14311.9560546875, |
| "learning_rate": 6.4000000000000006e-06, |
| "loss": 5.4298, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.035555555555555556, |
| "grad_norm": 5250.40869140625, |
| "learning_rate": 7.4e-06, |
| "loss": 5.2735, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 470.0221862792969, |
| "learning_rate": 8.400000000000001e-06, |
| "loss": 4.9127, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.044444444444444446, |
| "grad_norm": 19534.701171875, |
| "learning_rate": 9.4e-06, |
| "loss": 4.6077, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.04888888888888889, |
| "grad_norm": 492.7581481933594, |
| "learning_rate": 1.04e-05, |
| "loss": 4.5512, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.05333333333333334, |
| "grad_norm": 9192.6767578125, |
| "learning_rate": 1.13e-05, |
| "loss": 5.1424, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.057777777777777775, |
| "grad_norm": 108.0229263305664, |
| "learning_rate": 1.23e-05, |
| "loss": 3.7802, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.06222222222222222, |
| "grad_norm": 1061.415771484375, |
| "learning_rate": 1.3300000000000001e-05, |
| "loss": 3.3019, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.06666666666666667, |
| "grad_norm": 128.2921600341797, |
| "learning_rate": 1.43e-05, |
| "loss": 2.9124, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.07111111111111111, |
| "grad_norm": 29.611740112304688, |
| "learning_rate": 1.53e-05, |
| "loss": 2.6086, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.07555555555555556, |
| "grad_norm": 43.61715316772461, |
| "learning_rate": 1.63e-05, |
| "loss": 1.8848, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 11403.16015625, |
| "learning_rate": 1.73e-05, |
| "loss": 1.9176, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.08444444444444445, |
| "grad_norm": 381.3476867675781, |
| "learning_rate": 1.83e-05, |
| "loss": 1.734, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.08888888888888889, |
| "grad_norm": 4.966193199157715, |
| "learning_rate": 1.93e-05, |
| "loss": 1.3579, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.09333333333333334, |
| "grad_norm": 4.789448261260986, |
| "learning_rate": 2.0300000000000002e-05, |
| "loss": 1.0905, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.09777777777777778, |
| "grad_norm": 2.323220729827881, |
| "learning_rate": 2.13e-05, |
| "loss": 1.0379, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.10222222222222223, |
| "grad_norm": 1.8111121654510498, |
| "learning_rate": 2.23e-05, |
| "loss": 0.9473, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.10666666666666667, |
| "grad_norm": 4.103352069854736, |
| "learning_rate": 2.3300000000000004e-05, |
| "loss": 1.0941, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.1111111111111111, |
| "grad_norm": 2.7268946170806885, |
| "learning_rate": 2.43e-05, |
| "loss": 1.1331, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.11555555555555555, |
| "grad_norm": 36.0265998840332, |
| "learning_rate": 2.5300000000000002e-05, |
| "loss": 1.3104, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 1.8667157888412476, |
| "learning_rate": 2.6300000000000002e-05, |
| "loss": 1.0259, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.12444444444444444, |
| "grad_norm": 5.474687099456787, |
| "learning_rate": 2.7300000000000003e-05, |
| "loss": 1.0916, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.1288888888888889, |
| "grad_norm": 2.022836208343506, |
| "learning_rate": 2.83e-05, |
| "loss": 0.9872, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.13333333333333333, |
| "grad_norm": 1.9781649112701416, |
| "learning_rate": 2.93e-05, |
| "loss": 0.8844, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.13777777777777778, |
| "grad_norm": 1.9537264108657837, |
| "learning_rate": 3.03e-05, |
| "loss": 1.0466, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.14222222222222222, |
| "grad_norm": 2.3547990322113037, |
| "learning_rate": 3.13e-05, |
| "loss": 1.0016, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.14666666666666667, |
| "grad_norm": 2.5301690101623535, |
| "learning_rate": 3.2300000000000006e-05, |
| "loss": 1.015, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.1511111111111111, |
| "grad_norm": 1.698588252067566, |
| "learning_rate": 3.33e-05, |
| "loss": 0.994, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.15555555555555556, |
| "grad_norm": 2.17368221282959, |
| "learning_rate": 3.430000000000001e-05, |
| "loss": 1.0743, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 2.106858968734741, |
| "learning_rate": 3.53e-05, |
| "loss": 1.161, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.16444444444444445, |
| "grad_norm": 2.4343013763427734, |
| "learning_rate": 3.63e-05, |
| "loss": 1.0599, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.1688888888888889, |
| "grad_norm": 2.284984588623047, |
| "learning_rate": 3.73e-05, |
| "loss": 0.9959, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.17333333333333334, |
| "grad_norm": 1.8927963972091675, |
| "learning_rate": 3.83e-05, |
| "loss": 1.0063, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.17777777777777778, |
| "grad_norm": 2.731109142303467, |
| "learning_rate": 3.9300000000000007e-05, |
| "loss": 1.0467, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.18222222222222223, |
| "grad_norm": 9.546753883361816, |
| "learning_rate": 4.0300000000000004e-05, |
| "loss": 1.0892, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.18666666666666668, |
| "grad_norm": 1.986333966255188, |
| "learning_rate": 4.13e-05, |
| "loss": 1.0233, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.19111111111111112, |
| "grad_norm": 2.203075408935547, |
| "learning_rate": 4.23e-05, |
| "loss": 1.268, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.19555555555555557, |
| "grad_norm": 2.235809564590454, |
| "learning_rate": 4.33e-05, |
| "loss": 0.9937, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 3.598283290863037, |
| "learning_rate": 4.43e-05, |
| "loss": 0.8837, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.20444444444444446, |
| "grad_norm": 1.872710108757019, |
| "learning_rate": 4.53e-05, |
| "loss": 1.1982, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.2088888888888889, |
| "grad_norm": 3.0466055870056152, |
| "learning_rate": 4.630000000000001e-05, |
| "loss": 1.1995, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.21333333333333335, |
| "grad_norm": 2.7732715606689453, |
| "learning_rate": 4.73e-05, |
| "loss": 1.0711, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.21777777777777776, |
| "grad_norm": 2.3050129413604736, |
| "learning_rate": 4.83e-05, |
| "loss": 1.2792, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.2222222222222222, |
| "grad_norm": 2.128685235977173, |
| "learning_rate": 4.93e-05, |
| "loss": 1.1137, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.22666666666666666, |
| "grad_norm": 2.2330660820007324, |
| "learning_rate": 4.9914285714285717e-05, |
| "loss": 1.0332, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.2311111111111111, |
| "grad_norm": 2.049591541290283, |
| "learning_rate": 4.962857142857143e-05, |
| "loss": 1.1467, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.23555555555555555, |
| "grad_norm": 2.388408660888672, |
| "learning_rate": 4.934285714285715e-05, |
| "loss": 1.1017, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 1.9390537738800049, |
| "learning_rate": 4.905714285714286e-05, |
| "loss": 1.0874, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.24444444444444444, |
| "grad_norm": 1.530515432357788, |
| "learning_rate": 4.8771428571428574e-05, |
| "loss": 1.0236, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.24888888888888888, |
| "grad_norm": 2.425351142883301, |
| "learning_rate": 4.848571428571429e-05, |
| "loss": 1.1392, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.25333333333333335, |
| "grad_norm": 2.0615339279174805, |
| "learning_rate": 4.82e-05, |
| "loss": 1.035, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.2577777777777778, |
| "grad_norm": 1.8026305437088013, |
| "learning_rate": 4.7914285714285715e-05, |
| "loss": 0.9894, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.26222222222222225, |
| "grad_norm": 1.7569513320922852, |
| "learning_rate": 4.762857142857143e-05, |
| "loss": 1.1085, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.26666666666666666, |
| "grad_norm": 2.374699354171753, |
| "learning_rate": 4.734285714285715e-05, |
| "loss": 1.0259, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.27111111111111114, |
| "grad_norm": 2.0250742435455322, |
| "learning_rate": 4.7057142857142864e-05, |
| "loss": 1.0345, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.27555555555555555, |
| "grad_norm": 2.330720901489258, |
| "learning_rate": 4.677142857142857e-05, |
| "loss": 1.1081, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 1.5041579008102417, |
| "learning_rate": 4.648571428571429e-05, |
| "loss": 0.8348, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.28444444444444444, |
| "grad_norm": 2.397007703781128, |
| "learning_rate": 4.6200000000000005e-05, |
| "loss": 1.2422, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.28888888888888886, |
| "grad_norm": 1.4800223112106323, |
| "learning_rate": 4.5914285714285714e-05, |
| "loss": 0.995, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.29333333333333333, |
| "grad_norm": 1.6215323209762573, |
| "learning_rate": 4.562857142857143e-05, |
| "loss": 1.0207, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.29777777777777775, |
| "grad_norm": 4.243315696716309, |
| "learning_rate": 4.534285714285714e-05, |
| "loss": 1.0753, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.3022222222222222, |
| "grad_norm": 1.8909763097763062, |
| "learning_rate": 4.5057142857142856e-05, |
| "loss": 0.9546, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.30666666666666664, |
| "grad_norm": 1.5152394771575928, |
| "learning_rate": 4.477142857142858e-05, |
| "loss": 1.0375, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.3111111111111111, |
| "grad_norm": 1.9466438293457031, |
| "learning_rate": 4.448571428571429e-05, |
| "loss": 0.942, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.31555555555555553, |
| "grad_norm": 2.293703317642212, |
| "learning_rate": 4.4200000000000004e-05, |
| "loss": 1.1798, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 1.8559818267822266, |
| "learning_rate": 4.391428571428572e-05, |
| "loss": 0.9743, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.3244444444444444, |
| "grad_norm": 2.2498509883880615, |
| "learning_rate": 4.362857142857143e-05, |
| "loss": 1.0288, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.3288888888888889, |
| "grad_norm": 2.1050989627838135, |
| "learning_rate": 4.3342857142857145e-05, |
| "loss": 1.0123, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.3333333333333333, |
| "grad_norm": 1.5077266693115234, |
| "learning_rate": 4.3057142857142854e-05, |
| "loss": 0.9108, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.3377777777777778, |
| "grad_norm": 1.7779529094696045, |
| "learning_rate": 4.277142857142857e-05, |
| "loss": 1.1093, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.3422222222222222, |
| "grad_norm": 1.9781780242919922, |
| "learning_rate": 4.2485714285714286e-05, |
| "loss": 1.0721, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.3466666666666667, |
| "grad_norm": 1.94735848903656, |
| "learning_rate": 4.22e-05, |
| "loss": 1.1273, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.3511111111111111, |
| "grad_norm": 1.4685845375061035, |
| "learning_rate": 4.191428571428572e-05, |
| "loss": 0.9747, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.35555555555555557, |
| "grad_norm": 1.094089150428772, |
| "learning_rate": 4.162857142857143e-05, |
| "loss": 1.0063, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 2.0618069171905518, |
| "learning_rate": 4.1342857142857144e-05, |
| "loss": 0.9391, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.36444444444444446, |
| "grad_norm": 1.430254578590393, |
| "learning_rate": 4.105714285714286e-05, |
| "loss": 0.842, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.3688888888888889, |
| "grad_norm": 2.3631067276000977, |
| "learning_rate": 4.077142857142857e-05, |
| "loss": 0.9372, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.37333333333333335, |
| "grad_norm": 2.09013032913208, |
| "learning_rate": 4.0485714285714285e-05, |
| "loss": 0.9428, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.37777777777777777, |
| "grad_norm": 2.336822748184204, |
| "learning_rate": 4.02e-05, |
| "loss": 1.0985, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.38222222222222224, |
| "grad_norm": 2.335042953491211, |
| "learning_rate": 3.991428571428572e-05, |
| "loss": 1.2887, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.38666666666666666, |
| "grad_norm": 2.4629454612731934, |
| "learning_rate": 3.9628571428571433e-05, |
| "loss": 1.1101, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.39111111111111113, |
| "grad_norm": 2.509438991546631, |
| "learning_rate": 3.934285714285714e-05, |
| "loss": 1.0036, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.39555555555555555, |
| "grad_norm": 16.282512664794922, |
| "learning_rate": 3.905714285714286e-05, |
| "loss": 1.0398, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 2.0597307682037354, |
| "learning_rate": 3.8771428571428575e-05, |
| "loss": 0.9606, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.40444444444444444, |
| "grad_norm": 1.9231626987457275, |
| "learning_rate": 3.8485714285714284e-05, |
| "loss": 0.9271, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.4088888888888889, |
| "grad_norm": 2.7093663215637207, |
| "learning_rate": 3.82e-05, |
| "loss": 1.0634, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.41333333333333333, |
| "grad_norm": 1.8224252462387085, |
| "learning_rate": 3.7914285714285716e-05, |
| "loss": 0.9241, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.4177777777777778, |
| "grad_norm": 1.1034265756607056, |
| "learning_rate": 3.762857142857143e-05, |
| "loss": 0.9692, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.4222222222222222, |
| "grad_norm": 1.8517080545425415, |
| "learning_rate": 3.734285714285715e-05, |
| "loss": 0.93, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.4266666666666667, |
| "grad_norm": 2.2137563228607178, |
| "learning_rate": 3.705714285714286e-05, |
| "loss": 0.9953, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.4311111111111111, |
| "grad_norm": 1.9600673913955688, |
| "learning_rate": 3.6771428571428574e-05, |
| "loss": 1.2618, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.43555555555555553, |
| "grad_norm": 2.1263670921325684, |
| "learning_rate": 3.648571428571429e-05, |
| "loss": 1.139, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 15.621545791625977, |
| "learning_rate": 3.62e-05, |
| "loss": 1.1132, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.4444444444444444, |
| "grad_norm": 1.1507309675216675, |
| "learning_rate": 3.5914285714285715e-05, |
| "loss": 0.8175, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.4488888888888889, |
| "grad_norm": 1.6997977495193481, |
| "learning_rate": 3.562857142857143e-05, |
| "loss": 0.9233, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.4533333333333333, |
| "grad_norm": 4.499351501464844, |
| "learning_rate": 3.534285714285715e-05, |
| "loss": 1.2277, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.4577777777777778, |
| "grad_norm": 2.199875593185425, |
| "learning_rate": 3.505714285714286e-05, |
| "loss": 1.122, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.4622222222222222, |
| "grad_norm": 1.630294919013977, |
| "learning_rate": 3.477142857142857e-05, |
| "loss": 1.0278, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.4666666666666667, |
| "grad_norm": 1.3660622835159302, |
| "learning_rate": 3.448571428571429e-05, |
| "loss": 0.8593, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.4711111111111111, |
| "grad_norm": 1.1493386030197144, |
| "learning_rate": 3.4200000000000005e-05, |
| "loss": 1.1425, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.47555555555555556, |
| "grad_norm": 1.2984066009521484, |
| "learning_rate": 3.3914285714285714e-05, |
| "loss": 1.0536, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 1.9474918842315674, |
| "learning_rate": 3.362857142857143e-05, |
| "loss": 1.0463, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.48444444444444446, |
| "grad_norm": 1.580550193786621, |
| "learning_rate": 3.334285714285714e-05, |
| "loss": 1.0603, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.4888888888888889, |
| "grad_norm": 2.1200408935546875, |
| "learning_rate": 3.305714285714286e-05, |
| "loss": 1.0465, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.49333333333333335, |
| "grad_norm": 1.9244203567504883, |
| "learning_rate": 3.277142857142858e-05, |
| "loss": 1.1546, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.49777777777777776, |
| "grad_norm": 2.738420248031616, |
| "learning_rate": 3.248571428571429e-05, |
| "loss": 1.0352, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.5022222222222222, |
| "grad_norm": 1.3462022542953491, |
| "learning_rate": 3.2200000000000003e-05, |
| "loss": 1.0471, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.5066666666666667, |
| "grad_norm": 2.3860256671905518, |
| "learning_rate": 3.191428571428571e-05, |
| "loss": 1.2251, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.5111111111111111, |
| "grad_norm": 2.3215584754943848, |
| "learning_rate": 3.162857142857143e-05, |
| "loss": 1.0449, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.5155555555555555, |
| "grad_norm": 2.4864187240600586, |
| "learning_rate": 3.1342857142857145e-05, |
| "loss": 1.0363, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 2.4286997318267822, |
| "learning_rate": 3.1057142857142854e-05, |
| "loss": 1.0117, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.5244444444444445, |
| "grad_norm": 1.4346647262573242, |
| "learning_rate": 3.077142857142857e-05, |
| "loss": 1.0378, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.5288888888888889, |
| "grad_norm": 1.2675151824951172, |
| "learning_rate": 3.048571428571429e-05, |
| "loss": 1.2185, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.5333333333333333, |
| "grad_norm": 1.820166826248169, |
| "learning_rate": 3.02e-05, |
| "loss": 0.9562, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.5377777777777778, |
| "grad_norm": 2.047520875930786, |
| "learning_rate": 2.9914285714285718e-05, |
| "loss": 1.1071, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.5422222222222223, |
| "grad_norm": 1.5641695261001587, |
| "learning_rate": 2.9628571428571428e-05, |
| "loss": 1.0808, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.5466666666666666, |
| "grad_norm": 1.394386887550354, |
| "learning_rate": 2.9342857142857144e-05, |
| "loss": 1.1786, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.5511111111111111, |
| "grad_norm": 1.5711551904678345, |
| "learning_rate": 2.905714285714286e-05, |
| "loss": 1.0592, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.5555555555555556, |
| "grad_norm": 4.389777660369873, |
| "learning_rate": 2.8771428571428572e-05, |
| "loss": 0.9317, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 2.3850152492523193, |
| "learning_rate": 2.848571428571429e-05, |
| "loss": 1.2062, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.5644444444444444, |
| "grad_norm": 1.9837779998779297, |
| "learning_rate": 2.8199999999999998e-05, |
| "loss": 1.0094, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.5688888888888889, |
| "grad_norm": 1.7174725532531738, |
| "learning_rate": 2.7914285714285714e-05, |
| "loss": 0.8973, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.5733333333333334, |
| "grad_norm": 3.0591206550598145, |
| "learning_rate": 2.762857142857143e-05, |
| "loss": 0.9418, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.5777777777777777, |
| "grad_norm": 2.102701187133789, |
| "learning_rate": 2.7342857142857142e-05, |
| "loss": 0.8937, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.5822222222222222, |
| "grad_norm": 1.4529622793197632, |
| "learning_rate": 2.705714285714286e-05, |
| "loss": 0.8687, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.5866666666666667, |
| "grad_norm": 2.070000410079956, |
| "learning_rate": 2.6771428571428575e-05, |
| "loss": 0.9817, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.5911111111111111, |
| "grad_norm": 1.9277245998382568, |
| "learning_rate": 2.6485714285714287e-05, |
| "loss": 1.0032, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.5955555555555555, |
| "grad_norm": 1.843050241470337, |
| "learning_rate": 2.6200000000000003e-05, |
| "loss": 1.1426, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 1.0302870273590088, |
| "learning_rate": 2.5914285714285713e-05, |
| "loss": 0.9906, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.6044444444444445, |
| "grad_norm": 2.2761287689208984, |
| "learning_rate": 2.562857142857143e-05, |
| "loss": 1.1229, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.6088888888888889, |
| "grad_norm": 3.804614782333374, |
| "learning_rate": 2.5342857142857145e-05, |
| "loss": 0.9621, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.6133333333333333, |
| "grad_norm": 2.139857053756714, |
| "learning_rate": 2.5057142857142857e-05, |
| "loss": 0.9236, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.6177777777777778, |
| "grad_norm": 2.6473701000213623, |
| "learning_rate": 2.4771428571428573e-05, |
| "loss": 0.9378, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.6222222222222222, |
| "grad_norm": 2.084102153778076, |
| "learning_rate": 2.4485714285714286e-05, |
| "loss": 1.1248, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.6266666666666667, |
| "grad_norm": 1.9559253454208374, |
| "learning_rate": 2.4200000000000002e-05, |
| "loss": 0.9016, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.6311111111111111, |
| "grad_norm": 2.2711124420166016, |
| "learning_rate": 2.3914285714285715e-05, |
| "loss": 0.9147, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.6355555555555555, |
| "grad_norm": 1.9618175029754639, |
| "learning_rate": 2.362857142857143e-05, |
| "loss": 1.0164, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 1.52959144115448, |
| "learning_rate": 2.3342857142857143e-05, |
| "loss": 0.9068, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.6444444444444445, |
| "grad_norm": 1.666641354560852, |
| "learning_rate": 2.3057142857142856e-05, |
| "loss": 0.9525, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.6488888888888888, |
| "grad_norm": 2.2008984088897705, |
| "learning_rate": 2.2771428571428572e-05, |
| "loss": 1.2026, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.6533333333333333, |
| "grad_norm": 1.7555994987487793, |
| "learning_rate": 2.2485714285714288e-05, |
| "loss": 1.0945, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.6577777777777778, |
| "grad_norm": 2.01448392868042, |
| "learning_rate": 2.22e-05, |
| "loss": 1.0728, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.6622222222222223, |
| "grad_norm": 3.834198474884033, |
| "learning_rate": 2.1914285714285714e-05, |
| "loss": 1.1607, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.6666666666666666, |
| "grad_norm": 1.5447688102722168, |
| "learning_rate": 2.162857142857143e-05, |
| "loss": 0.9704, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.6711111111111111, |
| "grad_norm": 1.6624338626861572, |
| "learning_rate": 2.1342857142857146e-05, |
| "loss": 0.8271, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.6755555555555556, |
| "grad_norm": 1.5587396621704102, |
| "learning_rate": 2.105714285714286e-05, |
| "loss": 1.0568, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 1.042724370956421, |
| "learning_rate": 2.077142857142857e-05, |
| "loss": 1.1686, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.6844444444444444, |
| "grad_norm": 1.855294942855835, |
| "learning_rate": 2.0485714285714287e-05, |
| "loss": 0.9368, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.6888888888888889, |
| "grad_norm": 1.5107423067092896, |
| "learning_rate": 2.0200000000000003e-05, |
| "loss": 1.0277, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.6933333333333334, |
| "grad_norm": 1.0451265573501587, |
| "learning_rate": 1.9914285714285716e-05, |
| "loss": 1.0994, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.6977777777777778, |
| "grad_norm": 2.220353126525879, |
| "learning_rate": 1.962857142857143e-05, |
| "loss": 1.1676, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.7022222222222222, |
| "grad_norm": 4.320748805999756, |
| "learning_rate": 1.9342857142857144e-05, |
| "loss": 1.0167, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.7066666666666667, |
| "grad_norm": 1.5187314748764038, |
| "learning_rate": 1.9057142857142857e-05, |
| "loss": 1.0061, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.7111111111111111, |
| "grad_norm": 2.62479305267334, |
| "learning_rate": 1.8771428571428573e-05, |
| "loss": 1.0231, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.7155555555555555, |
| "grad_norm": 37.25562286376953, |
| "learning_rate": 1.8485714285714286e-05, |
| "loss": 1.0258, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 7.708355903625488, |
| "learning_rate": 1.8200000000000002e-05, |
| "loss": 0.8673, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.7244444444444444, |
| "grad_norm": 7.80335807800293, |
| "learning_rate": 1.7914285714285715e-05, |
| "loss": 1.1722, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.7288888888888889, |
| "grad_norm": 4.959846496582031, |
| "learning_rate": 1.762857142857143e-05, |
| "loss": 1.0197, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.7333333333333333, |
| "grad_norm": 1.8894150257110596, |
| "learning_rate": 1.7342857142857143e-05, |
| "loss": 0.9571, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.7377777777777778, |
| "grad_norm": 5.9880828857421875, |
| "learning_rate": 1.7057142857142856e-05, |
| "loss": 1.1879, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.7422222222222222, |
| "grad_norm": 1.9994230270385742, |
| "learning_rate": 1.6771428571428572e-05, |
| "loss": 1.01, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.7466666666666667, |
| "grad_norm": 1.1010164022445679, |
| "learning_rate": 1.6485714285714288e-05, |
| "loss": 0.8837, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.7511111111111111, |
| "grad_norm": 4.9511399269104, |
| "learning_rate": 1.62e-05, |
| "loss": 0.9419, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.7555555555555555, |
| "grad_norm": 1.8997151851654053, |
| "learning_rate": 1.5914285714285713e-05, |
| "loss": 0.9052, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 1.6055902242660522, |
| "learning_rate": 1.562857142857143e-05, |
| "loss": 0.9279, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.7644444444444445, |
| "grad_norm": 1.4079903364181519, |
| "learning_rate": 1.5342857142857146e-05, |
| "loss": 1.0294, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.7688888888888888, |
| "grad_norm": 1.1559503078460693, |
| "learning_rate": 1.5057142857142858e-05, |
| "loss": 0.9437, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.7733333333333333, |
| "grad_norm": 2.20170259475708, |
| "learning_rate": 1.4771428571428573e-05, |
| "loss": 1.1114, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.7777777777777778, |
| "grad_norm": 1.4884487390518188, |
| "learning_rate": 1.4485714285714285e-05, |
| "loss": 1.0683, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.7822222222222223, |
| "grad_norm": 1.7694205045700073, |
| "learning_rate": 1.42e-05, |
| "loss": 0.683, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.7866666666666666, |
| "grad_norm": 7.573609352111816, |
| "learning_rate": 1.3914285714285716e-05, |
| "loss": 0.9765, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.7911111111111111, |
| "grad_norm": 1.1830403804779053, |
| "learning_rate": 1.362857142857143e-05, |
| "loss": 1.0293, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.7955555555555556, |
| "grad_norm": 2.407702922821045, |
| "learning_rate": 1.3342857142857143e-05, |
| "loss": 1.092, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 1.5534087419509888, |
| "learning_rate": 1.3057142857142857e-05, |
| "loss": 0.9226, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.8044444444444444, |
| "grad_norm": 1.5992074012756348, |
| "learning_rate": 1.2771428571428573e-05, |
| "loss": 1.024, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.8088888888888889, |
| "grad_norm": 4.057394027709961, |
| "learning_rate": 1.2485714285714287e-05, |
| "loss": 1.0755, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.8133333333333334, |
| "grad_norm": 1.3395154476165771, |
| "learning_rate": 1.22e-05, |
| "loss": 0.979, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.8177777777777778, |
| "grad_norm": 8.083459854125977, |
| "learning_rate": 1.1914285714285716e-05, |
| "loss": 0.9691, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.8222222222222222, |
| "grad_norm": 1.3631497621536255, |
| "learning_rate": 1.1628571428571429e-05, |
| "loss": 0.9148, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.8266666666666667, |
| "grad_norm": 1.3387725353240967, |
| "learning_rate": 1.1342857142857143e-05, |
| "loss": 1.058, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.8311111111111111, |
| "grad_norm": 5.362998008728027, |
| "learning_rate": 1.1057142857142858e-05, |
| "loss": 1.1844, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.8355555555555556, |
| "grad_norm": 1.5291681289672852, |
| "learning_rate": 1.0771428571428572e-05, |
| "loss": 1.0351, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 2.036616563796997, |
| "learning_rate": 1.0485714285714286e-05, |
| "loss": 1.1685, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.8444444444444444, |
| "grad_norm": 10.672110557556152, |
| "learning_rate": 1.02e-05, |
| "loss": 1.186, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.8488888888888889, |
| "grad_norm": 7.35708475112915, |
| "learning_rate": 9.914285714285715e-06, |
| "loss": 1.0015, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.8533333333333334, |
| "grad_norm": 3.0570969581604004, |
| "learning_rate": 9.628571428571428e-06, |
| "loss": 1.0048, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.8577777777777778, |
| "grad_norm": 2.548383951187134, |
| "learning_rate": 9.342857142857144e-06, |
| "loss": 0.9308, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.8622222222222222, |
| "grad_norm": 2.9634547233581543, |
| "learning_rate": 9.057142857142856e-06, |
| "loss": 1.108, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.8666666666666667, |
| "grad_norm": 1.768025279045105, |
| "learning_rate": 8.771428571428572e-06, |
| "loss": 1.1125, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.8711111111111111, |
| "grad_norm": 1.4923690557479858, |
| "learning_rate": 8.485714285714285e-06, |
| "loss": 1.0878, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.8755555555555555, |
| "grad_norm": 1.8677984476089478, |
| "learning_rate": 8.200000000000001e-06, |
| "loss": 0.8959, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 4.373391151428223, |
| "learning_rate": 7.914285714285714e-06, |
| "loss": 0.9732, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.8844444444444445, |
| "grad_norm": 1.9039726257324219, |
| "learning_rate": 7.628571428571429e-06, |
| "loss": 0.9692, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.8888888888888888, |
| "grad_norm": 4.483780384063721, |
| "learning_rate": 7.342857142857143e-06, |
| "loss": 0.9734, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.8933333333333333, |
| "grad_norm": 2.355618476867676, |
| "learning_rate": 7.057142857142858e-06, |
| "loss": 0.7923, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.8977777777777778, |
| "grad_norm": 1.768234133720398, |
| "learning_rate": 6.771428571428571e-06, |
| "loss": 1.0397, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.9022222222222223, |
| "grad_norm": 1.9736918210983276, |
| "learning_rate": 6.485714285714286e-06, |
| "loss": 0.9889, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.9066666666666666, |
| "grad_norm": 2.7325940132141113, |
| "learning_rate": 6.2e-06, |
| "loss": 0.9604, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.9111111111111111, |
| "grad_norm": 37.42255401611328, |
| "learning_rate": 5.914285714285714e-06, |
| "loss": 1.06, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.9155555555555556, |
| "grad_norm": 3.5316126346588135, |
| "learning_rate": 5.628571428571429e-06, |
| "loss": 1.0478, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 4.869263648986816, |
| "learning_rate": 5.342857142857143e-06, |
| "loss": 0.8253, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.9244444444444444, |
| "grad_norm": 71.07227325439453, |
| "learning_rate": 5.057142857142857e-06, |
| "loss": 1.1752, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.9288888888888889, |
| "grad_norm": 5.730470657348633, |
| "learning_rate": 4.771428571428572e-06, |
| "loss": 0.9383, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.9333333333333333, |
| "grad_norm": 24.144546508789062, |
| "learning_rate": 4.485714285714286e-06, |
| "loss": 0.9968, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.9377777777777778, |
| "grad_norm": 3.403139352798462, |
| "learning_rate": 4.2000000000000004e-06, |
| "loss": 0.8893, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.9422222222222222, |
| "grad_norm": 3.723447322845459, |
| "learning_rate": 3.914285714285715e-06, |
| "loss": 1.0404, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.9466666666666667, |
| "grad_norm": 9.551548957824707, |
| "learning_rate": 3.6285714285714283e-06, |
| "loss": 0.9837, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.9511111111111111, |
| "grad_norm": 6.35592794418335, |
| "learning_rate": 3.3428571428571427e-06, |
| "loss": 0.8563, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.9555555555555556, |
| "grad_norm": 3.69284987449646, |
| "learning_rate": 3.0571428571428575e-06, |
| "loss": 1.0082, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 2.6728179454803467, |
| "learning_rate": 2.771428571428572e-06, |
| "loss": 0.9613, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.9644444444444444, |
| "grad_norm": 6.068182945251465, |
| "learning_rate": 2.4857142857142858e-06, |
| "loss": 0.9627, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.9688888888888889, |
| "grad_norm": 28.534027099609375, |
| "learning_rate": 2.2e-06, |
| "loss": 0.9503, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.9733333333333334, |
| "grad_norm": 7.36533260345459, |
| "learning_rate": 1.9142857142857145e-06, |
| "loss": 1.0315, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.9777777777777777, |
| "grad_norm": 126.33111572265625, |
| "learning_rate": 1.6285714285714286e-06, |
| "loss": 1.015, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.9822222222222222, |
| "grad_norm": 3.259016990661621, |
| "learning_rate": 1.342857142857143e-06, |
| "loss": 0.8513, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.9866666666666667, |
| "grad_norm": 3.191985607147217, |
| "learning_rate": 1.0571428571428573e-06, |
| "loss": 1.0846, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.9911111111111112, |
| "grad_norm": 3.515030860900879, |
| "learning_rate": 7.714285714285715e-07, |
| "loss": 0.9536, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.9955555555555555, |
| "grad_norm": 3.0338504314422607, |
| "learning_rate": 4.857142857142857e-07, |
| "loss": 1.0406, |
| "step": 2240 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 6.6893110275268555, |
| "learning_rate": 2.285714285714286e-07, |
| "loss": 1.0769, |
| "step": 2250 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 2250, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 5000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 8.0067730341888e+16, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|