| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 531.25, |
| "eval_steps": 500, |
| "global_step": 8500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.625, |
| "grad_norm": 0.9679650664329529, |
| "learning_rate": 1.8e-07, |
| "loss": 1.4147, |
| "step": 10 |
| }, |
| { |
| "epoch": 1.25, |
| "grad_norm": 0.9715470671653748, |
| "learning_rate": 3.8e-07, |
| "loss": 1.4102, |
| "step": 20 |
| }, |
| { |
| "epoch": 1.875, |
| "grad_norm": 0.9312183260917664, |
| "learning_rate": 5.8e-07, |
| "loss": 1.4117, |
| "step": 30 |
| }, |
| { |
| "epoch": 2.5, |
| "grad_norm": 0.9487130641937256, |
| "learning_rate": 7.8e-07, |
| "loss": 1.4086, |
| "step": 40 |
| }, |
| { |
| "epoch": 3.125, |
| "grad_norm": 0.8825913071632385, |
| "learning_rate": 9.8e-07, |
| "loss": 1.3962, |
| "step": 50 |
| }, |
| { |
| "epoch": 3.75, |
| "grad_norm": 0.8918140530586243, |
| "learning_rate": 1.18e-06, |
| "loss": 1.3851, |
| "step": 60 |
| }, |
| { |
| "epoch": 4.375, |
| "grad_norm": 0.8738917708396912, |
| "learning_rate": 1.3800000000000001e-06, |
| "loss": 1.3686, |
| "step": 70 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 0.9906870126724243, |
| "learning_rate": 1.5800000000000003e-06, |
| "loss": 1.3532, |
| "step": 80 |
| }, |
| { |
| "epoch": 5.625, |
| "grad_norm": 1.1455429792404175, |
| "learning_rate": 1.7800000000000001e-06, |
| "loss": 1.3182, |
| "step": 90 |
| }, |
| { |
| "epoch": 6.25, |
| "grad_norm": 1.236258864402771, |
| "learning_rate": 1.98e-06, |
| "loss": 1.2925, |
| "step": 100 |
| }, |
| { |
| "epoch": 6.875, |
| "grad_norm": 1.327654242515564, |
| "learning_rate": 2.1800000000000003e-06, |
| "loss": 1.258, |
| "step": 110 |
| }, |
| { |
| "epoch": 7.5, |
| "grad_norm": 1.2924314737319946, |
| "learning_rate": 2.38e-06, |
| "loss": 1.2277, |
| "step": 120 |
| }, |
| { |
| "epoch": 8.125, |
| "grad_norm": 1.167385220527649, |
| "learning_rate": 2.5800000000000003e-06, |
| "loss": 1.1915, |
| "step": 130 |
| }, |
| { |
| "epoch": 8.75, |
| "grad_norm": 0.9448131322860718, |
| "learning_rate": 2.78e-06, |
| "loss": 1.1615, |
| "step": 140 |
| }, |
| { |
| "epoch": 9.375, |
| "grad_norm": 0.6841535568237305, |
| "learning_rate": 2.9800000000000003e-06, |
| "loss": 1.1314, |
| "step": 150 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 0.41633787751197815, |
| "learning_rate": 3.1800000000000005e-06, |
| "loss": 1.1138, |
| "step": 160 |
| }, |
| { |
| "epoch": 10.625, |
| "grad_norm": 0.3072815537452698, |
| "learning_rate": 3.38e-06, |
| "loss": 1.1009, |
| "step": 170 |
| }, |
| { |
| "epoch": 11.25, |
| "grad_norm": 0.28385525941848755, |
| "learning_rate": 3.58e-06, |
| "loss": 1.0909, |
| "step": 180 |
| }, |
| { |
| "epoch": 11.875, |
| "grad_norm": 0.26593610644340515, |
| "learning_rate": 3.7800000000000002e-06, |
| "loss": 1.0844, |
| "step": 190 |
| }, |
| { |
| "epoch": 12.5, |
| "grad_norm": 0.2532358169555664, |
| "learning_rate": 3.98e-06, |
| "loss": 1.0767, |
| "step": 200 |
| }, |
| { |
| "epoch": 13.125, |
| "grad_norm": 0.2549743354320526, |
| "learning_rate": 4.18e-06, |
| "loss": 1.0693, |
| "step": 210 |
| }, |
| { |
| "epoch": 13.75, |
| "grad_norm": 0.22734442353248596, |
| "learning_rate": 4.38e-06, |
| "loss": 1.0659, |
| "step": 220 |
| }, |
| { |
| "epoch": 14.375, |
| "grad_norm": 0.22863343358039856, |
| "learning_rate": 4.58e-06, |
| "loss": 1.0584, |
| "step": 230 |
| }, |
| { |
| "epoch": 15.0, |
| "grad_norm": 0.22417008876800537, |
| "learning_rate": 4.780000000000001e-06, |
| "loss": 1.0538, |
| "step": 240 |
| }, |
| { |
| "epoch": 15.625, |
| "grad_norm": 0.20044955611228943, |
| "learning_rate": 4.98e-06, |
| "loss": 1.0501, |
| "step": 250 |
| }, |
| { |
| "epoch": 16.25, |
| "grad_norm": 0.20292679965496063, |
| "learning_rate": 5.18e-06, |
| "loss": 1.0471, |
| "step": 260 |
| }, |
| { |
| "epoch": 16.875, |
| "grad_norm": 0.18847720324993134, |
| "learning_rate": 5.38e-06, |
| "loss": 1.0408, |
| "step": 270 |
| }, |
| { |
| "epoch": 17.5, |
| "grad_norm": 0.16966596245765686, |
| "learning_rate": 5.580000000000001e-06, |
| "loss": 1.0372, |
| "step": 280 |
| }, |
| { |
| "epoch": 18.125, |
| "grad_norm": 0.189141184091568, |
| "learning_rate": 5.78e-06, |
| "loss": 1.0352, |
| "step": 290 |
| }, |
| { |
| "epoch": 18.75, |
| "grad_norm": 0.18442021310329437, |
| "learning_rate": 5.98e-06, |
| "loss": 1.0313, |
| "step": 300 |
| }, |
| { |
| "epoch": 19.375, |
| "grad_norm": 0.1687787026166916, |
| "learning_rate": 6.18e-06, |
| "loss": 1.0297, |
| "step": 310 |
| }, |
| { |
| "epoch": 20.0, |
| "grad_norm": 0.20706340670585632, |
| "learning_rate": 6.38e-06, |
| "loss": 1.0266, |
| "step": 320 |
| }, |
| { |
| "epoch": 20.625, |
| "grad_norm": 0.17096994817256927, |
| "learning_rate": 6.58e-06, |
| "loss": 1.0222, |
| "step": 330 |
| }, |
| { |
| "epoch": 21.25, |
| "grad_norm": 0.18136513233184814, |
| "learning_rate": 6.78e-06, |
| "loss": 1.0181, |
| "step": 340 |
| }, |
| { |
| "epoch": 21.875, |
| "grad_norm": 0.18053822219371796, |
| "learning_rate": 6.98e-06, |
| "loss": 1.0154, |
| "step": 350 |
| }, |
| { |
| "epoch": 22.5, |
| "grad_norm": 0.2154332548379898, |
| "learning_rate": 7.180000000000001e-06, |
| "loss": 1.0116, |
| "step": 360 |
| }, |
| { |
| "epoch": 23.125, |
| "grad_norm": 0.23920832574367523, |
| "learning_rate": 7.3800000000000005e-06, |
| "loss": 1.0056, |
| "step": 370 |
| }, |
| { |
| "epoch": 23.75, |
| "grad_norm": 0.24692080914974213, |
| "learning_rate": 7.580000000000001e-06, |
| "loss": 0.9982, |
| "step": 380 |
| }, |
| { |
| "epoch": 24.375, |
| "grad_norm": 0.32462307810783386, |
| "learning_rate": 7.78e-06, |
| "loss": 0.988, |
| "step": 390 |
| }, |
| { |
| "epoch": 25.0, |
| "grad_norm": 0.3690284788608551, |
| "learning_rate": 7.98e-06, |
| "loss": 0.9721, |
| "step": 400 |
| }, |
| { |
| "epoch": 25.625, |
| "grad_norm": 0.44107159972190857, |
| "learning_rate": 8.18e-06, |
| "loss": 0.9429, |
| "step": 410 |
| }, |
| { |
| "epoch": 26.25, |
| "grad_norm": 0.5854696035385132, |
| "learning_rate": 8.380000000000001e-06, |
| "loss": 0.8986, |
| "step": 420 |
| }, |
| { |
| "epoch": 26.875, |
| "grad_norm": 0.7961719632148743, |
| "learning_rate": 8.580000000000001e-06, |
| "loss": 0.8538, |
| "step": 430 |
| }, |
| { |
| "epoch": 27.5, |
| "grad_norm": 0.9634988307952881, |
| "learning_rate": 8.78e-06, |
| "loss": 0.8043, |
| "step": 440 |
| }, |
| { |
| "epoch": 28.125, |
| "grad_norm": 0.8321980834007263, |
| "learning_rate": 8.98e-06, |
| "loss": 0.7597, |
| "step": 450 |
| }, |
| { |
| "epoch": 28.75, |
| "grad_norm": 0.784283459186554, |
| "learning_rate": 9.180000000000002e-06, |
| "loss": 0.7166, |
| "step": 460 |
| }, |
| { |
| "epoch": 29.375, |
| "grad_norm": 0.8551204800605774, |
| "learning_rate": 9.38e-06, |
| "loss": 0.6809, |
| "step": 470 |
| }, |
| { |
| "epoch": 30.0, |
| "grad_norm": 0.774193525314331, |
| "learning_rate": 9.58e-06, |
| "loss": 0.6511, |
| "step": 480 |
| }, |
| { |
| "epoch": 30.625, |
| "grad_norm": 0.8994619846343994, |
| "learning_rate": 9.78e-06, |
| "loss": 0.6267, |
| "step": 490 |
| }, |
| { |
| "epoch": 31.25, |
| "grad_norm": 1.0884357690811157, |
| "learning_rate": 9.980000000000001e-06, |
| "loss": 0.6061, |
| "step": 500 |
| }, |
| { |
| "epoch": 31.875, |
| "grad_norm": 1.0175591707229614, |
| "learning_rate": 1.018e-05, |
| "loss": 0.5858, |
| "step": 510 |
| }, |
| { |
| "epoch": 32.5, |
| "grad_norm": 1.1407521963119507, |
| "learning_rate": 1.038e-05, |
| "loss": 0.5714, |
| "step": 520 |
| }, |
| { |
| "epoch": 33.125, |
| "grad_norm": 1.042667269706726, |
| "learning_rate": 1.058e-05, |
| "loss": 0.5586, |
| "step": 530 |
| }, |
| { |
| "epoch": 33.75, |
| "grad_norm": 0.9843167662620544, |
| "learning_rate": 1.0780000000000002e-05, |
| "loss": 0.5492, |
| "step": 540 |
| }, |
| { |
| "epoch": 34.375, |
| "grad_norm": 1.0415880680084229, |
| "learning_rate": 1.098e-05, |
| "loss": 0.5431, |
| "step": 550 |
| }, |
| { |
| "epoch": 35.0, |
| "grad_norm": 1.1198561191558838, |
| "learning_rate": 1.118e-05, |
| "loss": 0.5375, |
| "step": 560 |
| }, |
| { |
| "epoch": 35.625, |
| "grad_norm": 1.2082597017288208, |
| "learning_rate": 1.1380000000000001e-05, |
| "loss": 0.5338, |
| "step": 570 |
| }, |
| { |
| "epoch": 36.25, |
| "grad_norm": 1.2071870565414429, |
| "learning_rate": 1.1580000000000001e-05, |
| "loss": 0.5298, |
| "step": 580 |
| }, |
| { |
| "epoch": 36.875, |
| "grad_norm": 1.3737869262695312, |
| "learning_rate": 1.178e-05, |
| "loss": 0.5287, |
| "step": 590 |
| }, |
| { |
| "epoch": 37.5, |
| "grad_norm": 1.0583442449569702, |
| "learning_rate": 1.198e-05, |
| "loss": 0.5243, |
| "step": 600 |
| }, |
| { |
| "epoch": 38.125, |
| "grad_norm": 1.0685936212539673, |
| "learning_rate": 1.2180000000000002e-05, |
| "loss": 0.5229, |
| "step": 610 |
| }, |
| { |
| "epoch": 38.75, |
| "grad_norm": 1.2425378561019897, |
| "learning_rate": 1.238e-05, |
| "loss": 0.5215, |
| "step": 620 |
| }, |
| { |
| "epoch": 39.375, |
| "grad_norm": 1.1828131675720215, |
| "learning_rate": 1.258e-05, |
| "loss": 0.5194, |
| "step": 630 |
| }, |
| { |
| "epoch": 40.0, |
| "grad_norm": 1.0766721963882446, |
| "learning_rate": 1.278e-05, |
| "loss": 0.519, |
| "step": 640 |
| }, |
| { |
| "epoch": 40.625, |
| "grad_norm": 1.0481817722320557, |
| "learning_rate": 1.2980000000000001e-05, |
| "loss": 0.5154, |
| "step": 650 |
| }, |
| { |
| "epoch": 41.25, |
| "grad_norm": 1.0572658777236938, |
| "learning_rate": 1.3180000000000001e-05, |
| "loss": 0.5159, |
| "step": 660 |
| }, |
| { |
| "epoch": 41.875, |
| "grad_norm": 1.203803539276123, |
| "learning_rate": 1.338e-05, |
| "loss": 0.5121, |
| "step": 670 |
| }, |
| { |
| "epoch": 42.5, |
| "grad_norm": 0.872924268245697, |
| "learning_rate": 1.358e-05, |
| "loss": 0.512, |
| "step": 680 |
| }, |
| { |
| "epoch": 43.125, |
| "grad_norm": 1.162695050239563, |
| "learning_rate": 1.3780000000000002e-05, |
| "loss": 0.5064, |
| "step": 690 |
| }, |
| { |
| "epoch": 43.75, |
| "grad_norm": 1.1628501415252686, |
| "learning_rate": 1.3980000000000002e-05, |
| "loss": 0.5082, |
| "step": 700 |
| }, |
| { |
| "epoch": 44.375, |
| "grad_norm": 1.217319369316101, |
| "learning_rate": 1.4180000000000001e-05, |
| "loss": 0.5067, |
| "step": 710 |
| }, |
| { |
| "epoch": 45.0, |
| "grad_norm": 1.1384787559509277, |
| "learning_rate": 1.4380000000000001e-05, |
| "loss": 0.506, |
| "step": 720 |
| }, |
| { |
| "epoch": 45.625, |
| "grad_norm": 1.2870819568634033, |
| "learning_rate": 1.4580000000000003e-05, |
| "loss": 0.505, |
| "step": 730 |
| }, |
| { |
| "epoch": 46.25, |
| "grad_norm": 1.1514242887496948, |
| "learning_rate": 1.4779999999999999e-05, |
| "loss": 0.5064, |
| "step": 740 |
| }, |
| { |
| "epoch": 46.875, |
| "grad_norm": 1.144319772720337, |
| "learning_rate": 1.4979999999999999e-05, |
| "loss": 0.5017, |
| "step": 750 |
| }, |
| { |
| "epoch": 47.5, |
| "grad_norm": 1.22275710105896, |
| "learning_rate": 1.518e-05, |
| "loss": 0.5013, |
| "step": 760 |
| }, |
| { |
| "epoch": 48.125, |
| "grad_norm": 1.118972659111023, |
| "learning_rate": 1.538e-05, |
| "loss": 0.4997, |
| "step": 770 |
| }, |
| { |
| "epoch": 48.75, |
| "grad_norm": 1.051975965499878, |
| "learning_rate": 1.558e-05, |
| "loss": 0.5008, |
| "step": 780 |
| }, |
| { |
| "epoch": 49.375, |
| "grad_norm": 1.4525421857833862, |
| "learning_rate": 1.578e-05, |
| "loss": 0.4996, |
| "step": 790 |
| }, |
| { |
| "epoch": 50.0, |
| "grad_norm": 1.183190941810608, |
| "learning_rate": 1.598e-05, |
| "loss": 0.4963, |
| "step": 800 |
| }, |
| { |
| "epoch": 50.625, |
| "grad_norm": 1.2612169981002808, |
| "learning_rate": 1.618e-05, |
| "loss": 0.4974, |
| "step": 810 |
| }, |
| { |
| "epoch": 51.25, |
| "grad_norm": 0.9744483828544617, |
| "learning_rate": 1.6380000000000002e-05, |
| "loss": 0.4954, |
| "step": 820 |
| }, |
| { |
| "epoch": 51.875, |
| "grad_norm": 1.1227869987487793, |
| "learning_rate": 1.658e-05, |
| "loss": 0.4959, |
| "step": 830 |
| }, |
| { |
| "epoch": 52.5, |
| "grad_norm": 1.043039321899414, |
| "learning_rate": 1.6780000000000002e-05, |
| "loss": 0.4927, |
| "step": 840 |
| }, |
| { |
| "epoch": 53.125, |
| "grad_norm": 1.2920984029769897, |
| "learning_rate": 1.698e-05, |
| "loss": 0.4918, |
| "step": 850 |
| }, |
| { |
| "epoch": 53.75, |
| "grad_norm": 1.2774907350540161, |
| "learning_rate": 1.718e-05, |
| "loss": 0.4907, |
| "step": 860 |
| }, |
| { |
| "epoch": 54.375, |
| "grad_norm": 1.1010966300964355, |
| "learning_rate": 1.7380000000000003e-05, |
| "loss": 0.4914, |
| "step": 870 |
| }, |
| { |
| "epoch": 55.0, |
| "grad_norm": 1.0284886360168457, |
| "learning_rate": 1.758e-05, |
| "loss": 0.4873, |
| "step": 880 |
| }, |
| { |
| "epoch": 55.625, |
| "grad_norm": 1.3238409757614136, |
| "learning_rate": 1.7780000000000003e-05, |
| "loss": 0.4883, |
| "step": 890 |
| }, |
| { |
| "epoch": 56.25, |
| "grad_norm": 1.127139687538147, |
| "learning_rate": 1.798e-05, |
| "loss": 0.4871, |
| "step": 900 |
| }, |
| { |
| "epoch": 56.875, |
| "grad_norm": 1.3855187892913818, |
| "learning_rate": 1.818e-05, |
| "loss": 0.4828, |
| "step": 910 |
| }, |
| { |
| "epoch": 57.5, |
| "grad_norm": 1.536786437034607, |
| "learning_rate": 1.838e-05, |
| "loss": 0.4812, |
| "step": 920 |
| }, |
| { |
| "epoch": 58.125, |
| "grad_norm": 2.120702028274536, |
| "learning_rate": 1.858e-05, |
| "loss": 0.4776, |
| "step": 930 |
| }, |
| { |
| "epoch": 58.75, |
| "grad_norm": 1.6191856861114502, |
| "learning_rate": 1.878e-05, |
| "loss": 0.4761, |
| "step": 940 |
| }, |
| { |
| "epoch": 59.375, |
| "grad_norm": 1.6021475791931152, |
| "learning_rate": 1.898e-05, |
| "loss": 0.4743, |
| "step": 950 |
| }, |
| { |
| "epoch": 60.0, |
| "grad_norm": 1.604246735572815, |
| "learning_rate": 1.918e-05, |
| "loss": 0.4705, |
| "step": 960 |
| }, |
| { |
| "epoch": 60.625, |
| "grad_norm": 3.4168691635131836, |
| "learning_rate": 1.938e-05, |
| "loss": 0.4732, |
| "step": 970 |
| }, |
| { |
| "epoch": 61.25, |
| "grad_norm": 1.997071623802185, |
| "learning_rate": 1.9580000000000002e-05, |
| "loss": 0.4709, |
| "step": 980 |
| }, |
| { |
| "epoch": 61.875, |
| "grad_norm": 2.351092576980591, |
| "learning_rate": 1.978e-05, |
| "loss": 0.4678, |
| "step": 990 |
| }, |
| { |
| "epoch": 62.5, |
| "grad_norm": 1.9072397947311401, |
| "learning_rate": 1.9980000000000002e-05, |
| "loss": 0.469, |
| "step": 1000 |
| }, |
| { |
| "epoch": 63.125, |
| "grad_norm": 1.777255892753601, |
| "learning_rate": 2.0180000000000003e-05, |
| "loss": 0.4642, |
| "step": 1010 |
| }, |
| { |
| "epoch": 63.75, |
| "grad_norm": 1.3945808410644531, |
| "learning_rate": 2.038e-05, |
| "loss": 0.4613, |
| "step": 1020 |
| }, |
| { |
| "epoch": 64.375, |
| "grad_norm": 1.5424234867095947, |
| "learning_rate": 2.0580000000000003e-05, |
| "loss": 0.4621, |
| "step": 1030 |
| }, |
| { |
| "epoch": 65.0, |
| "grad_norm": 1.4399698972702026, |
| "learning_rate": 2.078e-05, |
| "loss": 0.4596, |
| "step": 1040 |
| }, |
| { |
| "epoch": 65.625, |
| "grad_norm": 1.5211741924285889, |
| "learning_rate": 2.098e-05, |
| "loss": 0.4571, |
| "step": 1050 |
| }, |
| { |
| "epoch": 66.25, |
| "grad_norm": 1.6879644393920898, |
| "learning_rate": 2.118e-05, |
| "loss": 0.4535, |
| "step": 1060 |
| }, |
| { |
| "epoch": 66.875, |
| "grad_norm": 1.7169924974441528, |
| "learning_rate": 2.138e-05, |
| "loss": 0.4506, |
| "step": 1070 |
| }, |
| { |
| "epoch": 67.5, |
| "grad_norm": 2.5419564247131348, |
| "learning_rate": 2.158e-05, |
| "loss": 0.4497, |
| "step": 1080 |
| }, |
| { |
| "epoch": 68.125, |
| "grad_norm": 1.9424971342086792, |
| "learning_rate": 2.178e-05, |
| "loss": 0.446, |
| "step": 1090 |
| }, |
| { |
| "epoch": 68.75, |
| "grad_norm": 4.030938148498535, |
| "learning_rate": 2.198e-05, |
| "loss": 0.4459, |
| "step": 1100 |
| }, |
| { |
| "epoch": 69.375, |
| "grad_norm": 2.3670308589935303, |
| "learning_rate": 2.218e-05, |
| "loss": 0.4454, |
| "step": 1110 |
| }, |
| { |
| "epoch": 70.0, |
| "grad_norm": 2.562795400619507, |
| "learning_rate": 2.2380000000000003e-05, |
| "loss": 0.4397, |
| "step": 1120 |
| }, |
| { |
| "epoch": 70.625, |
| "grad_norm": 2.4600791931152344, |
| "learning_rate": 2.258e-05, |
| "loss": 0.4359, |
| "step": 1130 |
| }, |
| { |
| "epoch": 71.25, |
| "grad_norm": 2.77167010307312, |
| "learning_rate": 2.2780000000000002e-05, |
| "loss": 0.4325, |
| "step": 1140 |
| }, |
| { |
| "epoch": 71.875, |
| "grad_norm": 3.150618314743042, |
| "learning_rate": 2.298e-05, |
| "loss": 0.4285, |
| "step": 1150 |
| }, |
| { |
| "epoch": 72.5, |
| "grad_norm": 2.5932984352111816, |
| "learning_rate": 2.318e-05, |
| "loss": 0.4247, |
| "step": 1160 |
| }, |
| { |
| "epoch": 73.125, |
| "grad_norm": 2.8484175205230713, |
| "learning_rate": 2.3380000000000003e-05, |
| "loss": 0.4199, |
| "step": 1170 |
| }, |
| { |
| "epoch": 73.75, |
| "grad_norm": 3.6509993076324463, |
| "learning_rate": 2.358e-05, |
| "loss": 0.4183, |
| "step": 1180 |
| }, |
| { |
| "epoch": 74.375, |
| "grad_norm": 5.74982213973999, |
| "learning_rate": 2.3780000000000003e-05, |
| "loss": 0.4146, |
| "step": 1190 |
| }, |
| { |
| "epoch": 75.0, |
| "grad_norm": 5.700360298156738, |
| "learning_rate": 2.398e-05, |
| "loss": 0.4322, |
| "step": 1200 |
| }, |
| { |
| "epoch": 75.625, |
| "grad_norm": 3.3000857830047607, |
| "learning_rate": 2.418e-05, |
| "loss": 0.4228, |
| "step": 1210 |
| }, |
| { |
| "epoch": 76.25, |
| "grad_norm": 2.5206165313720703, |
| "learning_rate": 2.438e-05, |
| "loss": 0.4072, |
| "step": 1220 |
| }, |
| { |
| "epoch": 76.875, |
| "grad_norm": 4.3755083084106445, |
| "learning_rate": 2.4580000000000002e-05, |
| "loss": 0.4025, |
| "step": 1230 |
| }, |
| { |
| "epoch": 77.5, |
| "grad_norm": 3.973033905029297, |
| "learning_rate": 2.478e-05, |
| "loss": 0.3953, |
| "step": 1240 |
| }, |
| { |
| "epoch": 78.125, |
| "grad_norm": 3.5819246768951416, |
| "learning_rate": 2.498e-05, |
| "loss": 0.3909, |
| "step": 1250 |
| }, |
| { |
| "epoch": 78.75, |
| "grad_norm": 3.183445930480957, |
| "learning_rate": 2.5180000000000003e-05, |
| "loss": 0.3852, |
| "step": 1260 |
| }, |
| { |
| "epoch": 79.375, |
| "grad_norm": 3.4347057342529297, |
| "learning_rate": 2.5380000000000004e-05, |
| "loss": 0.3785, |
| "step": 1270 |
| }, |
| { |
| "epoch": 80.0, |
| "grad_norm": 5.1768388748168945, |
| "learning_rate": 2.5580000000000002e-05, |
| "loss": 0.3717, |
| "step": 1280 |
| }, |
| { |
| "epoch": 80.625, |
| "grad_norm": 3.9438490867614746, |
| "learning_rate": 2.5779999999999997e-05, |
| "loss": 0.3681, |
| "step": 1290 |
| }, |
| { |
| "epoch": 81.25, |
| "grad_norm": 4.442440986633301, |
| "learning_rate": 2.598e-05, |
| "loss": 0.3571, |
| "step": 1300 |
| }, |
| { |
| "epoch": 81.875, |
| "grad_norm": 5.6028900146484375, |
| "learning_rate": 2.618e-05, |
| "loss": 0.3553, |
| "step": 1310 |
| }, |
| { |
| "epoch": 82.5, |
| "grad_norm": 5.537353038787842, |
| "learning_rate": 2.6379999999999998e-05, |
| "loss": 0.3515, |
| "step": 1320 |
| }, |
| { |
| "epoch": 83.125, |
| "grad_norm": 6.435239315032959, |
| "learning_rate": 2.658e-05, |
| "loss": 0.3486, |
| "step": 1330 |
| }, |
| { |
| "epoch": 83.75, |
| "grad_norm": 3.413828134536743, |
| "learning_rate": 2.678e-05, |
| "loss": 0.3373, |
| "step": 1340 |
| }, |
| { |
| "epoch": 84.375, |
| "grad_norm": 3.34212327003479, |
| "learning_rate": 2.698e-05, |
| "loss": 0.3297, |
| "step": 1350 |
| }, |
| { |
| "epoch": 85.0, |
| "grad_norm": 3.8223774433135986, |
| "learning_rate": 2.718e-05, |
| "loss": 0.322, |
| "step": 1360 |
| }, |
| { |
| "epoch": 85.625, |
| "grad_norm": 4.211275577545166, |
| "learning_rate": 2.738e-05, |
| "loss": 0.314, |
| "step": 1370 |
| }, |
| { |
| "epoch": 86.25, |
| "grad_norm": 5.035346984863281, |
| "learning_rate": 2.758e-05, |
| "loss": 0.3089, |
| "step": 1380 |
| }, |
| { |
| "epoch": 86.875, |
| "grad_norm": 5.702706813812256, |
| "learning_rate": 2.778e-05, |
| "loss": 0.3017, |
| "step": 1390 |
| }, |
| { |
| "epoch": 87.5, |
| "grad_norm": 3.9658496379852295, |
| "learning_rate": 2.798e-05, |
| "loss": 0.3002, |
| "step": 1400 |
| }, |
| { |
| "epoch": 88.125, |
| "grad_norm": 3.6658170223236084, |
| "learning_rate": 2.818e-05, |
| "loss": 0.2877, |
| "step": 1410 |
| }, |
| { |
| "epoch": 88.75, |
| "grad_norm": 4.639245986938477, |
| "learning_rate": 2.8380000000000003e-05, |
| "loss": 0.2773, |
| "step": 1420 |
| }, |
| { |
| "epoch": 89.375, |
| "grad_norm": 4.7095947265625, |
| "learning_rate": 2.858e-05, |
| "loss": 0.2739, |
| "step": 1430 |
| }, |
| { |
| "epoch": 90.0, |
| "grad_norm": 5.2461700439453125, |
| "learning_rate": 2.8780000000000002e-05, |
| "loss": 0.267, |
| "step": 1440 |
| }, |
| { |
| "epoch": 90.625, |
| "grad_norm": 9.186333656311035, |
| "learning_rate": 2.898e-05, |
| "loss": 0.2638, |
| "step": 1450 |
| }, |
| { |
| "epoch": 91.25, |
| "grad_norm": 5.92840051651001, |
| "learning_rate": 2.9180000000000002e-05, |
| "loss": 0.2639, |
| "step": 1460 |
| }, |
| { |
| "epoch": 91.875, |
| "grad_norm": 4.675635814666748, |
| "learning_rate": 2.9380000000000003e-05, |
| "loss": 0.2485, |
| "step": 1470 |
| }, |
| { |
| "epoch": 92.5, |
| "grad_norm": 4.761613845825195, |
| "learning_rate": 2.958e-05, |
| "loss": 0.2392, |
| "step": 1480 |
| }, |
| { |
| "epoch": 93.125, |
| "grad_norm": 5.566701889038086, |
| "learning_rate": 2.9780000000000003e-05, |
| "loss": 0.2341, |
| "step": 1490 |
| }, |
| { |
| "epoch": 93.75, |
| "grad_norm": 4.253500461578369, |
| "learning_rate": 2.998e-05, |
| "loss": 0.2222, |
| "step": 1500 |
| }, |
| { |
| "epoch": 94.375, |
| "grad_norm": 4.602695465087891, |
| "learning_rate": 3.0180000000000002e-05, |
| "loss": 0.2183, |
| "step": 1510 |
| }, |
| { |
| "epoch": 95.0, |
| "grad_norm": 5.938004493713379, |
| "learning_rate": 3.0380000000000004e-05, |
| "loss": 0.213, |
| "step": 1520 |
| }, |
| { |
| "epoch": 95.625, |
| "grad_norm": 6.689606189727783, |
| "learning_rate": 3.058e-05, |
| "loss": 0.2123, |
| "step": 1530 |
| }, |
| { |
| "epoch": 96.25, |
| "grad_norm": 5.87599515914917, |
| "learning_rate": 3.078e-05, |
| "loss": 0.2011, |
| "step": 1540 |
| }, |
| { |
| "epoch": 96.875, |
| "grad_norm": 5.971210956573486, |
| "learning_rate": 3.0980000000000005e-05, |
| "loss": 0.1966, |
| "step": 1550 |
| }, |
| { |
| "epoch": 97.5, |
| "grad_norm": 5.859025001525879, |
| "learning_rate": 3.118e-05, |
| "loss": 0.1946, |
| "step": 1560 |
| }, |
| { |
| "epoch": 98.125, |
| "grad_norm": 5.950936317443848, |
| "learning_rate": 3.138e-05, |
| "loss": 0.1814, |
| "step": 1570 |
| }, |
| { |
| "epoch": 98.75, |
| "grad_norm": 4.768659591674805, |
| "learning_rate": 3.1580000000000006e-05, |
| "loss": 0.1768, |
| "step": 1580 |
| }, |
| { |
| "epoch": 99.375, |
| "grad_norm": 5.677441596984863, |
| "learning_rate": 3.1780000000000004e-05, |
| "loss": 0.1727, |
| "step": 1590 |
| }, |
| { |
| "epoch": 100.0, |
| "grad_norm": 4.38816499710083, |
| "learning_rate": 3.198e-05, |
| "loss": 0.162, |
| "step": 1600 |
| }, |
| { |
| "epoch": 100.625, |
| "grad_norm": 4.571051120758057, |
| "learning_rate": 3.218e-05, |
| "loss": 0.1606, |
| "step": 1610 |
| }, |
| { |
| "epoch": 101.25, |
| "grad_norm": 6.406070709228516, |
| "learning_rate": 3.238e-05, |
| "loss": 0.1494, |
| "step": 1620 |
| }, |
| { |
| "epoch": 101.875, |
| "grad_norm": 4.923046588897705, |
| "learning_rate": 3.2579999999999996e-05, |
| "loss": 0.1479, |
| "step": 1630 |
| }, |
| { |
| "epoch": 102.5, |
| "grad_norm": 6.735141277313232, |
| "learning_rate": 3.278e-05, |
| "loss": 0.1468, |
| "step": 1640 |
| }, |
| { |
| "epoch": 103.125, |
| "grad_norm": 5.201277732849121, |
| "learning_rate": 3.298e-05, |
| "loss": 0.137, |
| "step": 1650 |
| }, |
| { |
| "epoch": 103.75, |
| "grad_norm": 5.127175331115723, |
| "learning_rate": 3.318e-05, |
| "loss": 0.1377, |
| "step": 1660 |
| }, |
| { |
| "epoch": 104.375, |
| "grad_norm": 5.3002471923828125, |
| "learning_rate": 3.338e-05, |
| "loss": 0.1277, |
| "step": 1670 |
| }, |
| { |
| "epoch": 105.0, |
| "grad_norm": 5.290287017822266, |
| "learning_rate": 3.358e-05, |
| "loss": 0.1224, |
| "step": 1680 |
| }, |
| { |
| "epoch": 105.625, |
| "grad_norm": 6.6339311599731445, |
| "learning_rate": 3.378e-05, |
| "loss": 0.121, |
| "step": 1690 |
| }, |
| { |
| "epoch": 106.25, |
| "grad_norm": 6.3351826667785645, |
| "learning_rate": 3.398e-05, |
| "loss": 0.1202, |
| "step": 1700 |
| }, |
| { |
| "epoch": 106.875, |
| "grad_norm": 6.30771017074585, |
| "learning_rate": 3.418e-05, |
| "loss": 0.1182, |
| "step": 1710 |
| }, |
| { |
| "epoch": 107.5, |
| "grad_norm": 5.454580307006836, |
| "learning_rate": 3.438e-05, |
| "loss": 0.1132, |
| "step": 1720 |
| }, |
| { |
| "epoch": 108.125, |
| "grad_norm": 5.418821811676025, |
| "learning_rate": 3.4580000000000004e-05, |
| "loss": 0.1064, |
| "step": 1730 |
| }, |
| { |
| "epoch": 108.75, |
| "grad_norm": 4.996350288391113, |
| "learning_rate": 3.478e-05, |
| "loss": 0.0993, |
| "step": 1740 |
| }, |
| { |
| "epoch": 109.375, |
| "grad_norm": 6.961830139160156, |
| "learning_rate": 3.498e-05, |
| "loss": 0.0994, |
| "step": 1750 |
| }, |
| { |
| "epoch": 110.0, |
| "grad_norm": 6.240096569061279, |
| "learning_rate": 3.518e-05, |
| "loss": 0.1032, |
| "step": 1760 |
| }, |
| { |
| "epoch": 110.625, |
| "grad_norm": 5.9896111488342285, |
| "learning_rate": 3.5380000000000003e-05, |
| "loss": 0.0931, |
| "step": 1770 |
| }, |
| { |
| "epoch": 111.25, |
| "grad_norm": 4.381375312805176, |
| "learning_rate": 3.558e-05, |
| "loss": 0.0899, |
| "step": 1780 |
| }, |
| { |
| "epoch": 111.875, |
| "grad_norm": 4.847299098968506, |
| "learning_rate": 3.578e-05, |
| "loss": 0.0845, |
| "step": 1790 |
| }, |
| { |
| "epoch": 112.5, |
| "grad_norm": 6.442471981048584, |
| "learning_rate": 3.5980000000000004e-05, |
| "loss": 0.0903, |
| "step": 1800 |
| }, |
| { |
| "epoch": 113.125, |
| "grad_norm": 6.36176872253418, |
| "learning_rate": 3.618e-05, |
| "loss": 0.0871, |
| "step": 1810 |
| }, |
| { |
| "epoch": 113.75, |
| "grad_norm": 5.581753730773926, |
| "learning_rate": 3.638e-05, |
| "loss": 0.0832, |
| "step": 1820 |
| }, |
| { |
| "epoch": 114.375, |
| "grad_norm": 5.413025379180908, |
| "learning_rate": 3.6580000000000006e-05, |
| "loss": 0.0894, |
| "step": 1830 |
| }, |
| { |
| "epoch": 115.0, |
| "grad_norm": 3.545975685119629, |
| "learning_rate": 3.6780000000000004e-05, |
| "loss": 0.0816, |
| "step": 1840 |
| }, |
| { |
| "epoch": 115.625, |
| "grad_norm": 5.415703773498535, |
| "learning_rate": 3.698e-05, |
| "loss": 0.0847, |
| "step": 1850 |
| }, |
| { |
| "epoch": 116.25, |
| "grad_norm": 5.82867956161499, |
| "learning_rate": 3.7180000000000007e-05, |
| "loss": 0.0781, |
| "step": 1860 |
| }, |
| { |
| "epoch": 116.875, |
| "grad_norm": 6.405238151550293, |
| "learning_rate": 3.7380000000000005e-05, |
| "loss": 0.082, |
| "step": 1870 |
| }, |
| { |
| "epoch": 117.5, |
| "grad_norm": 3.8289811611175537, |
| "learning_rate": 3.758e-05, |
| "loss": 0.0766, |
| "step": 1880 |
| }, |
| { |
| "epoch": 118.125, |
| "grad_norm": 4.225410461425781, |
| "learning_rate": 3.778000000000001e-05, |
| "loss": 0.0753, |
| "step": 1890 |
| }, |
| { |
| "epoch": 118.75, |
| "grad_norm": 3.565117120742798, |
| "learning_rate": 3.7980000000000006e-05, |
| "loss": 0.0714, |
| "step": 1900 |
| }, |
| { |
| "epoch": 119.375, |
| "grad_norm": 4.679031848907471, |
| "learning_rate": 3.818e-05, |
| "loss": 0.0727, |
| "step": 1910 |
| }, |
| { |
| "epoch": 120.0, |
| "grad_norm": 3.9762325286865234, |
| "learning_rate": 3.838e-05, |
| "loss": 0.0746, |
| "step": 1920 |
| }, |
| { |
| "epoch": 120.625, |
| "grad_norm": 5.354043960571289, |
| "learning_rate": 3.858e-05, |
| "loss": 0.0715, |
| "step": 1930 |
| }, |
| { |
| "epoch": 121.25, |
| "grad_norm": 4.858035564422607, |
| "learning_rate": 3.878e-05, |
| "loss": 0.0736, |
| "step": 1940 |
| }, |
| { |
| "epoch": 121.875, |
| "grad_norm": 5.547657012939453, |
| "learning_rate": 3.898e-05, |
| "loss": 0.0747, |
| "step": 1950 |
| }, |
| { |
| "epoch": 122.5, |
| "grad_norm": 4.109276294708252, |
| "learning_rate": 3.918e-05, |
| "loss": 0.0671, |
| "step": 1960 |
| }, |
| { |
| "epoch": 123.125, |
| "grad_norm": 2.984168291091919, |
| "learning_rate": 3.938e-05, |
| "loss": 0.0675, |
| "step": 1970 |
| }, |
| { |
| "epoch": 123.75, |
| "grad_norm": 3.5227620601654053, |
| "learning_rate": 3.958e-05, |
| "loss": 0.0704, |
| "step": 1980 |
| }, |
| { |
| "epoch": 124.375, |
| "grad_norm": 4.113119125366211, |
| "learning_rate": 3.978e-05, |
| "loss": 0.0684, |
| "step": 1990 |
| }, |
| { |
| "epoch": 125.0, |
| "grad_norm": 4.130417823791504, |
| "learning_rate": 3.998e-05, |
| "loss": 0.0696, |
| "step": 2000 |
| }, |
| { |
| "epoch": 125.625, |
| "grad_norm": 4.076992034912109, |
| "learning_rate": 4.018e-05, |
| "loss": 0.0648, |
| "step": 2010 |
| }, |
| { |
| "epoch": 126.25, |
| "grad_norm": 3.623624801635742, |
| "learning_rate": 4.038e-05, |
| "loss": 0.0634, |
| "step": 2020 |
| }, |
| { |
| "epoch": 126.875, |
| "grad_norm": 3.0136911869049072, |
| "learning_rate": 4.058e-05, |
| "loss": 0.0619, |
| "step": 2030 |
| }, |
| { |
| "epoch": 127.5, |
| "grad_norm": 2.3207767009735107, |
| "learning_rate": 4.078e-05, |
| "loss": 0.0602, |
| "step": 2040 |
| }, |
| { |
| "epoch": 128.125, |
| "grad_norm": 6.006433963775635, |
| "learning_rate": 4.0980000000000004e-05, |
| "loss": 0.0618, |
| "step": 2050 |
| }, |
| { |
| "epoch": 128.75, |
| "grad_norm": 4.211705684661865, |
| "learning_rate": 4.118e-05, |
| "loss": 0.0614, |
| "step": 2060 |
| }, |
| { |
| "epoch": 129.375, |
| "grad_norm": 3.0991692543029785, |
| "learning_rate": 4.138e-05, |
| "loss": 0.0596, |
| "step": 2070 |
| }, |
| { |
| "epoch": 130.0, |
| "grad_norm": 3.5333359241485596, |
| "learning_rate": 4.1580000000000005e-05, |
| "loss": 0.0594, |
| "step": 2080 |
| }, |
| { |
| "epoch": 130.625, |
| "grad_norm": 2.464125394821167, |
| "learning_rate": 4.178e-05, |
| "loss": 0.0579, |
| "step": 2090 |
| }, |
| { |
| "epoch": 131.25, |
| "grad_norm": 3.499553680419922, |
| "learning_rate": 4.198e-05, |
| "loss": 0.058, |
| "step": 2100 |
| }, |
| { |
| "epoch": 131.875, |
| "grad_norm": 4.001912593841553, |
| "learning_rate": 4.2180000000000006e-05, |
| "loss": 0.0585, |
| "step": 2110 |
| }, |
| { |
| "epoch": 132.5, |
| "grad_norm": 3.2040934562683105, |
| "learning_rate": 4.2380000000000004e-05, |
| "loss": 0.0597, |
| "step": 2120 |
| }, |
| { |
| "epoch": 133.125, |
| "grad_norm": 3.650881767272949, |
| "learning_rate": 4.258e-05, |
| "loss": 0.0594, |
| "step": 2130 |
| }, |
| { |
| "epoch": 133.75, |
| "grad_norm": 3.5435853004455566, |
| "learning_rate": 4.278e-05, |
| "loss": 0.0577, |
| "step": 2140 |
| }, |
| { |
| "epoch": 134.375, |
| "grad_norm": 3.881361484527588, |
| "learning_rate": 4.2980000000000005e-05, |
| "loss": 0.0586, |
| "step": 2150 |
| }, |
| { |
| "epoch": 135.0, |
| "grad_norm": 3.0621840953826904, |
| "learning_rate": 4.318e-05, |
| "loss": 0.0562, |
| "step": 2160 |
| }, |
| { |
| "epoch": 135.625, |
| "grad_norm": 3.5643880367279053, |
| "learning_rate": 4.338e-05, |
| "loss": 0.0573, |
| "step": 2170 |
| }, |
| { |
| "epoch": 136.25, |
| "grad_norm": 3.4029245376586914, |
| "learning_rate": 4.3580000000000006e-05, |
| "loss": 0.0538, |
| "step": 2180 |
| }, |
| { |
| "epoch": 136.875, |
| "grad_norm": 2.941638469696045, |
| "learning_rate": 4.3780000000000004e-05, |
| "loss": 0.0518, |
| "step": 2190 |
| }, |
| { |
| "epoch": 137.5, |
| "grad_norm": 3.19802188873291, |
| "learning_rate": 4.398e-05, |
| "loss": 0.0578, |
| "step": 2200 |
| }, |
| { |
| "epoch": 138.125, |
| "grad_norm": 3.2176332473754883, |
| "learning_rate": 4.418000000000001e-05, |
| "loss": 0.0553, |
| "step": 2210 |
| }, |
| { |
| "epoch": 138.75, |
| "grad_norm": 3.1325228214263916, |
| "learning_rate": 4.438e-05, |
| "loss": 0.0544, |
| "step": 2220 |
| }, |
| { |
| "epoch": 139.375, |
| "grad_norm": 2.721820116043091, |
| "learning_rate": 4.458e-05, |
| "loss": 0.0537, |
| "step": 2230 |
| }, |
| { |
| "epoch": 140.0, |
| "grad_norm": 2.2297821044921875, |
| "learning_rate": 4.478e-05, |
| "loss": 0.0536, |
| "step": 2240 |
| }, |
| { |
| "epoch": 140.625, |
| "grad_norm": 3.736509323120117, |
| "learning_rate": 4.498e-05, |
| "loss": 0.0568, |
| "step": 2250 |
| }, |
| { |
| "epoch": 141.25, |
| "grad_norm": 3.414687156677246, |
| "learning_rate": 4.518e-05, |
| "loss": 0.0535, |
| "step": 2260 |
| }, |
| { |
| "epoch": 141.875, |
| "grad_norm": 3.533870220184326, |
| "learning_rate": 4.538e-05, |
| "loss": 0.0528, |
| "step": 2270 |
| }, |
| { |
| "epoch": 142.5, |
| "grad_norm": 2.922818422317505, |
| "learning_rate": 4.558e-05, |
| "loss": 0.0509, |
| "step": 2280 |
| }, |
| { |
| "epoch": 143.125, |
| "grad_norm": 3.248502731323242, |
| "learning_rate": 4.578e-05, |
| "loss": 0.0499, |
| "step": 2290 |
| }, |
| { |
| "epoch": 143.75, |
| "grad_norm": 2.737330913543701, |
| "learning_rate": 4.5980000000000004e-05, |
| "loss": 0.0504, |
| "step": 2300 |
| }, |
| { |
| "epoch": 144.375, |
| "grad_norm": 2.7490787506103516, |
| "learning_rate": 4.618e-05, |
| "loss": 0.0494, |
| "step": 2310 |
| }, |
| { |
| "epoch": 145.0, |
| "grad_norm": 3.3917601108551025, |
| "learning_rate": 4.638e-05, |
| "loss": 0.0529, |
| "step": 2320 |
| }, |
| { |
| "epoch": 145.625, |
| "grad_norm": 3.115227699279785, |
| "learning_rate": 4.6580000000000005e-05, |
| "loss": 0.0487, |
| "step": 2330 |
| }, |
| { |
| "epoch": 146.25, |
| "grad_norm": 3.6642770767211914, |
| "learning_rate": 4.678e-05, |
| "loss": 0.0511, |
| "step": 2340 |
| }, |
| { |
| "epoch": 146.875, |
| "grad_norm": 3.4796688556671143, |
| "learning_rate": 4.698e-05, |
| "loss": 0.048, |
| "step": 2350 |
| }, |
| { |
| "epoch": 147.5, |
| "grad_norm": 2.7523436546325684, |
| "learning_rate": 4.718e-05, |
| "loss": 0.0478, |
| "step": 2360 |
| }, |
| { |
| "epoch": 148.125, |
| "grad_norm": 3.309631824493408, |
| "learning_rate": 4.7380000000000004e-05, |
| "loss": 0.0489, |
| "step": 2370 |
| }, |
| { |
| "epoch": 148.75, |
| "grad_norm": 3.5280392169952393, |
| "learning_rate": 4.758e-05, |
| "loss": 0.045, |
| "step": 2380 |
| }, |
| { |
| "epoch": 149.375, |
| "grad_norm": 3.055738925933838, |
| "learning_rate": 4.778e-05, |
| "loss": 0.0443, |
| "step": 2390 |
| }, |
| { |
| "epoch": 150.0, |
| "grad_norm": 2.935150146484375, |
| "learning_rate": 4.7980000000000005e-05, |
| "loss": 0.047, |
| "step": 2400 |
| }, |
| { |
| "epoch": 150.625, |
| "grad_norm": 3.540233612060547, |
| "learning_rate": 4.818e-05, |
| "loss": 0.0483, |
| "step": 2410 |
| }, |
| { |
| "epoch": 151.25, |
| "grad_norm": 3.3195087909698486, |
| "learning_rate": 4.838e-05, |
| "loss": 0.0461, |
| "step": 2420 |
| }, |
| { |
| "epoch": 151.875, |
| "grad_norm": 3.5009474754333496, |
| "learning_rate": 4.8580000000000006e-05, |
| "loss": 0.0478, |
| "step": 2430 |
| }, |
| { |
| "epoch": 152.5, |
| "grad_norm": 3.110968589782715, |
| "learning_rate": 4.8780000000000004e-05, |
| "loss": 0.0476, |
| "step": 2440 |
| }, |
| { |
| "epoch": 153.125, |
| "grad_norm": 2.5114879608154297, |
| "learning_rate": 4.898e-05, |
| "loss": 0.0457, |
| "step": 2450 |
| }, |
| { |
| "epoch": 153.75, |
| "grad_norm": 2.591670513153076, |
| "learning_rate": 4.918000000000001e-05, |
| "loss": 0.0425, |
| "step": 2460 |
| }, |
| { |
| "epoch": 154.375, |
| "grad_norm": 2.149576187133789, |
| "learning_rate": 4.9380000000000005e-05, |
| "loss": 0.0432, |
| "step": 2470 |
| }, |
| { |
| "epoch": 155.0, |
| "grad_norm": 2.866494655609131, |
| "learning_rate": 4.958e-05, |
| "loss": 0.047, |
| "step": 2480 |
| }, |
| { |
| "epoch": 155.625, |
| "grad_norm": 3.465266227722168, |
| "learning_rate": 4.978e-05, |
| "loss": 0.0458, |
| "step": 2490 |
| }, |
| { |
| "epoch": 156.25, |
| "grad_norm": 2.855782985687256, |
| "learning_rate": 4.9980000000000006e-05, |
| "loss": 0.043, |
| "step": 2500 |
| }, |
| { |
| "epoch": 156.875, |
| "grad_norm": 2.906052350997925, |
| "learning_rate": 5.0180000000000004e-05, |
| "loss": 0.042, |
| "step": 2510 |
| }, |
| { |
| "epoch": 157.5, |
| "grad_norm": 3.16371488571167, |
| "learning_rate": 5.038e-05, |
| "loss": 0.0427, |
| "step": 2520 |
| }, |
| { |
| "epoch": 158.125, |
| "grad_norm": 2.54278826713562, |
| "learning_rate": 5.058000000000001e-05, |
| "loss": 0.0433, |
| "step": 2530 |
| }, |
| { |
| "epoch": 158.75, |
| "grad_norm": 2.1072380542755127, |
| "learning_rate": 5.0780000000000005e-05, |
| "loss": 0.0378, |
| "step": 2540 |
| }, |
| { |
| "epoch": 159.375, |
| "grad_norm": 2.749347448348999, |
| "learning_rate": 5.098e-05, |
| "loss": 0.0412, |
| "step": 2550 |
| }, |
| { |
| "epoch": 160.0, |
| "grad_norm": 3.022982120513916, |
| "learning_rate": 5.118000000000001e-05, |
| "loss": 0.0413, |
| "step": 2560 |
| }, |
| { |
| "epoch": 160.625, |
| "grad_norm": 3.038039207458496, |
| "learning_rate": 5.1380000000000006e-05, |
| "loss": 0.0418, |
| "step": 2570 |
| }, |
| { |
| "epoch": 161.25, |
| "grad_norm": 2.538886070251465, |
| "learning_rate": 5.1580000000000004e-05, |
| "loss": 0.0407, |
| "step": 2580 |
| }, |
| { |
| "epoch": 161.875, |
| "grad_norm": 2.79771089553833, |
| "learning_rate": 5.178000000000001e-05, |
| "loss": 0.0421, |
| "step": 2590 |
| }, |
| { |
| "epoch": 162.5, |
| "grad_norm": 2.6892521381378174, |
| "learning_rate": 5.198000000000001e-05, |
| "loss": 0.0408, |
| "step": 2600 |
| }, |
| { |
| "epoch": 163.125, |
| "grad_norm": 2.829843759536743, |
| "learning_rate": 5.2180000000000005e-05, |
| "loss": 0.0386, |
| "step": 2610 |
| }, |
| { |
| "epoch": 163.75, |
| "grad_norm": 2.272169828414917, |
| "learning_rate": 5.238000000000001e-05, |
| "loss": 0.0392, |
| "step": 2620 |
| }, |
| { |
| "epoch": 164.375, |
| "grad_norm": 2.683228015899658, |
| "learning_rate": 5.258000000000001e-05, |
| "loss": 0.0403, |
| "step": 2630 |
| }, |
| { |
| "epoch": 165.0, |
| "grad_norm": 2.4979324340820312, |
| "learning_rate": 5.2780000000000006e-05, |
| "loss": 0.0412, |
| "step": 2640 |
| }, |
| { |
| "epoch": 165.625, |
| "grad_norm": 2.7030258178710938, |
| "learning_rate": 5.2980000000000004e-05, |
| "loss": 0.0386, |
| "step": 2650 |
| }, |
| { |
| "epoch": 166.25, |
| "grad_norm": 2.9168074131011963, |
| "learning_rate": 5.318000000000001e-05, |
| "loss": 0.041, |
| "step": 2660 |
| }, |
| { |
| "epoch": 166.875, |
| "grad_norm": 2.2548749446868896, |
| "learning_rate": 5.338000000000001e-05, |
| "loss": 0.0386, |
| "step": 2670 |
| }, |
| { |
| "epoch": 167.5, |
| "grad_norm": 2.6179001331329346, |
| "learning_rate": 5.3580000000000005e-05, |
| "loss": 0.0399, |
| "step": 2680 |
| }, |
| { |
| "epoch": 168.125, |
| "grad_norm": 3.1817469596862793, |
| "learning_rate": 5.378e-05, |
| "loss": 0.0391, |
| "step": 2690 |
| }, |
| { |
| "epoch": 168.75, |
| "grad_norm": 2.606260061264038, |
| "learning_rate": 5.3979999999999995e-05, |
| "loss": 0.0358, |
| "step": 2700 |
| }, |
| { |
| "epoch": 169.375, |
| "grad_norm": 2.6046321392059326, |
| "learning_rate": 5.418e-05, |
| "loss": 0.0366, |
| "step": 2710 |
| }, |
| { |
| "epoch": 170.0, |
| "grad_norm": 2.150594711303711, |
| "learning_rate": 5.438e-05, |
| "loss": 0.0366, |
| "step": 2720 |
| }, |
| { |
| "epoch": 170.625, |
| "grad_norm": 1.9119679927825928, |
| "learning_rate": 5.4579999999999996e-05, |
| "loss": 0.0359, |
| "step": 2730 |
| }, |
| { |
| "epoch": 171.25, |
| "grad_norm": 2.6968297958374023, |
| "learning_rate": 5.478e-05, |
| "loss": 0.0358, |
| "step": 2740 |
| }, |
| { |
| "epoch": 171.875, |
| "grad_norm": 2.433364152908325, |
| "learning_rate": 5.498e-05, |
| "loss": 0.0396, |
| "step": 2750 |
| }, |
| { |
| "epoch": 172.5, |
| "grad_norm": 2.7723114490509033, |
| "learning_rate": 5.518e-05, |
| "loss": 0.0369, |
| "step": 2760 |
| }, |
| { |
| "epoch": 173.125, |
| "grad_norm": 1.9324524402618408, |
| "learning_rate": 5.538e-05, |
| "loss": 0.0366, |
| "step": 2770 |
| }, |
| { |
| "epoch": 173.75, |
| "grad_norm": 2.4898505210876465, |
| "learning_rate": 5.558e-05, |
| "loss": 0.0357, |
| "step": 2780 |
| }, |
| { |
| "epoch": 174.375, |
| "grad_norm": 3.377042293548584, |
| "learning_rate": 5.578e-05, |
| "loss": 0.0356, |
| "step": 2790 |
| }, |
| { |
| "epoch": 175.0, |
| "grad_norm": 2.3189809322357178, |
| "learning_rate": 5.5979999999999996e-05, |
| "loss": 0.0383, |
| "step": 2800 |
| }, |
| { |
| "epoch": 175.625, |
| "grad_norm": 2.4106035232543945, |
| "learning_rate": 5.618e-05, |
| "loss": 0.0377, |
| "step": 2810 |
| }, |
| { |
| "epoch": 176.25, |
| "grad_norm": 2.3675427436828613, |
| "learning_rate": 5.638e-05, |
| "loss": 0.034, |
| "step": 2820 |
| }, |
| { |
| "epoch": 176.875, |
| "grad_norm": 2.3263936042785645, |
| "learning_rate": 5.658e-05, |
| "loss": 0.0329, |
| "step": 2830 |
| }, |
| { |
| "epoch": 177.5, |
| "grad_norm": 2.6326184272766113, |
| "learning_rate": 5.678e-05, |
| "loss": 0.0372, |
| "step": 2840 |
| }, |
| { |
| "epoch": 178.125, |
| "grad_norm": 2.5026683807373047, |
| "learning_rate": 5.698e-05, |
| "loss": 0.0384, |
| "step": 2850 |
| }, |
| { |
| "epoch": 178.75, |
| "grad_norm": 2.7007641792297363, |
| "learning_rate": 5.718e-05, |
| "loss": 0.0345, |
| "step": 2860 |
| }, |
| { |
| "epoch": 179.375, |
| "grad_norm": 2.948171854019165, |
| "learning_rate": 5.738e-05, |
| "loss": 0.0371, |
| "step": 2870 |
| }, |
| { |
| "epoch": 180.0, |
| "grad_norm": 2.368053674697876, |
| "learning_rate": 5.758e-05, |
| "loss": 0.0358, |
| "step": 2880 |
| }, |
| { |
| "epoch": 180.625, |
| "grad_norm": 2.625312328338623, |
| "learning_rate": 5.778e-05, |
| "loss": 0.0382, |
| "step": 2890 |
| }, |
| { |
| "epoch": 181.25, |
| "grad_norm": 2.2241172790527344, |
| "learning_rate": 5.7980000000000004e-05, |
| "loss": 0.0346, |
| "step": 2900 |
| }, |
| { |
| "epoch": 181.875, |
| "grad_norm": 2.2202515602111816, |
| "learning_rate": 5.818e-05, |
| "loss": 0.0391, |
| "step": 2910 |
| }, |
| { |
| "epoch": 182.5, |
| "grad_norm": 2.5838396549224854, |
| "learning_rate": 5.838e-05, |
| "loss": 0.0332, |
| "step": 2920 |
| }, |
| { |
| "epoch": 183.125, |
| "grad_norm": 2.4340357780456543, |
| "learning_rate": 5.858e-05, |
| "loss": 0.0341, |
| "step": 2930 |
| }, |
| { |
| "epoch": 183.75, |
| "grad_norm": 3.3191001415252686, |
| "learning_rate": 5.878e-05, |
| "loss": 0.0372, |
| "step": 2940 |
| }, |
| { |
| "epoch": 184.375, |
| "grad_norm": 2.798825263977051, |
| "learning_rate": 5.898e-05, |
| "loss": 0.0344, |
| "step": 2950 |
| }, |
| { |
| "epoch": 185.0, |
| "grad_norm": 2.0992839336395264, |
| "learning_rate": 5.918e-05, |
| "loss": 0.0364, |
| "step": 2960 |
| }, |
| { |
| "epoch": 185.625, |
| "grad_norm": 2.3140695095062256, |
| "learning_rate": 5.9380000000000004e-05, |
| "loss": 0.0345, |
| "step": 2970 |
| }, |
| { |
| "epoch": 186.25, |
| "grad_norm": 2.1252496242523193, |
| "learning_rate": 5.958e-05, |
| "loss": 0.0341, |
| "step": 2980 |
| }, |
| { |
| "epoch": 186.875, |
| "grad_norm": 1.9925975799560547, |
| "learning_rate": 5.978e-05, |
| "loss": 0.0371, |
| "step": 2990 |
| }, |
| { |
| "epoch": 187.5, |
| "grad_norm": 1.8534867763519287, |
| "learning_rate": 5.9980000000000005e-05, |
| "loss": 0.0324, |
| "step": 3000 |
| }, |
| { |
| "epoch": 188.125, |
| "grad_norm": 1.8940081596374512, |
| "learning_rate": 6.018e-05, |
| "loss": 0.0313, |
| "step": 3010 |
| }, |
| { |
| "epoch": 188.75, |
| "grad_norm": 3.098815679550171, |
| "learning_rate": 6.038e-05, |
| "loss": 0.0316, |
| "step": 3020 |
| }, |
| { |
| "epoch": 189.375, |
| "grad_norm": 2.562849521636963, |
| "learning_rate": 6.0580000000000006e-05, |
| "loss": 0.034, |
| "step": 3030 |
| }, |
| { |
| "epoch": 190.0, |
| "grad_norm": 2.3118202686309814, |
| "learning_rate": 6.0780000000000004e-05, |
| "loss": 0.0324, |
| "step": 3040 |
| }, |
| { |
| "epoch": 190.625, |
| "grad_norm": 1.8349565267562866, |
| "learning_rate": 6.098e-05, |
| "loss": 0.0316, |
| "step": 3050 |
| }, |
| { |
| "epoch": 191.25, |
| "grad_norm": 2.3919525146484375, |
| "learning_rate": 6.118000000000001e-05, |
| "loss": 0.0341, |
| "step": 3060 |
| }, |
| { |
| "epoch": 191.875, |
| "grad_norm": 2.795734405517578, |
| "learning_rate": 6.138e-05, |
| "loss": 0.0321, |
| "step": 3070 |
| }, |
| { |
| "epoch": 192.5, |
| "grad_norm": 2.4285318851470947, |
| "learning_rate": 6.158e-05, |
| "loss": 0.0338, |
| "step": 3080 |
| }, |
| { |
| "epoch": 193.125, |
| "grad_norm": 2.724107265472412, |
| "learning_rate": 6.178000000000001e-05, |
| "loss": 0.0325, |
| "step": 3090 |
| }, |
| { |
| "epoch": 193.75, |
| "grad_norm": 2.212014675140381, |
| "learning_rate": 6.198e-05, |
| "loss": 0.0297, |
| "step": 3100 |
| }, |
| { |
| "epoch": 194.375, |
| "grad_norm": 1.8803651332855225, |
| "learning_rate": 6.218e-05, |
| "loss": 0.0298, |
| "step": 3110 |
| }, |
| { |
| "epoch": 195.0, |
| "grad_norm": 1.7469961643218994, |
| "learning_rate": 6.238000000000001e-05, |
| "loss": 0.0291, |
| "step": 3120 |
| }, |
| { |
| "epoch": 195.625, |
| "grad_norm": 2.5273945331573486, |
| "learning_rate": 6.258e-05, |
| "loss": 0.0309, |
| "step": 3130 |
| }, |
| { |
| "epoch": 196.25, |
| "grad_norm": 2.398287773132324, |
| "learning_rate": 6.278e-05, |
| "loss": 0.0317, |
| "step": 3140 |
| }, |
| { |
| "epoch": 196.875, |
| "grad_norm": 1.9407683610916138, |
| "learning_rate": 6.298000000000001e-05, |
| "loss": 0.0299, |
| "step": 3150 |
| }, |
| { |
| "epoch": 197.5, |
| "grad_norm": 1.6159769296646118, |
| "learning_rate": 6.318e-05, |
| "loss": 0.0286, |
| "step": 3160 |
| }, |
| { |
| "epoch": 198.125, |
| "grad_norm": 2.744300603866577, |
| "learning_rate": 6.338e-05, |
| "loss": 0.0303, |
| "step": 3170 |
| }, |
| { |
| "epoch": 198.75, |
| "grad_norm": 2.6293482780456543, |
| "learning_rate": 6.358000000000001e-05, |
| "loss": 0.0328, |
| "step": 3180 |
| }, |
| { |
| "epoch": 199.375, |
| "grad_norm": 2.2811481952667236, |
| "learning_rate": 6.378e-05, |
| "loss": 0.0328, |
| "step": 3190 |
| }, |
| { |
| "epoch": 200.0, |
| "grad_norm": 2.951794385910034, |
| "learning_rate": 6.398000000000001e-05, |
| "loss": 0.033, |
| "step": 3200 |
| }, |
| { |
| "epoch": 200.625, |
| "grad_norm": 2.3573927879333496, |
| "learning_rate": 6.418000000000001e-05, |
| "loss": 0.0312, |
| "step": 3210 |
| }, |
| { |
| "epoch": 201.25, |
| "grad_norm": 2.088592529296875, |
| "learning_rate": 6.438e-05, |
| "loss": 0.0314, |
| "step": 3220 |
| }, |
| { |
| "epoch": 201.875, |
| "grad_norm": 2.646054983139038, |
| "learning_rate": 6.458000000000001e-05, |
| "loss": 0.0295, |
| "step": 3230 |
| }, |
| { |
| "epoch": 202.5, |
| "grad_norm": 2.5917739868164062, |
| "learning_rate": 6.478000000000001e-05, |
| "loss": 0.032, |
| "step": 3240 |
| }, |
| { |
| "epoch": 203.125, |
| "grad_norm": 2.122236490249634, |
| "learning_rate": 6.498e-05, |
| "loss": 0.0306, |
| "step": 3250 |
| }, |
| { |
| "epoch": 203.75, |
| "grad_norm": 2.2258174419403076, |
| "learning_rate": 6.518000000000001e-05, |
| "loss": 0.0289, |
| "step": 3260 |
| }, |
| { |
| "epoch": 204.375, |
| "grad_norm": 2.1164627075195312, |
| "learning_rate": 6.538000000000001e-05, |
| "loss": 0.0298, |
| "step": 3270 |
| }, |
| { |
| "epoch": 205.0, |
| "grad_norm": 2.397019386291504, |
| "learning_rate": 6.558e-05, |
| "loss": 0.0298, |
| "step": 3280 |
| }, |
| { |
| "epoch": 205.625, |
| "grad_norm": 2.260453701019287, |
| "learning_rate": 6.578000000000001e-05, |
| "loss": 0.0279, |
| "step": 3290 |
| }, |
| { |
| "epoch": 206.25, |
| "grad_norm": 2.1338107585906982, |
| "learning_rate": 6.598e-05, |
| "loss": 0.0292, |
| "step": 3300 |
| }, |
| { |
| "epoch": 206.875, |
| "grad_norm": 1.875387191772461, |
| "learning_rate": 6.618e-05, |
| "loss": 0.0276, |
| "step": 3310 |
| }, |
| { |
| "epoch": 207.5, |
| "grad_norm": 1.619683027267456, |
| "learning_rate": 6.638e-05, |
| "loss": 0.0286, |
| "step": 3320 |
| }, |
| { |
| "epoch": 208.125, |
| "grad_norm": 2.5062685012817383, |
| "learning_rate": 6.658e-05, |
| "loss": 0.031, |
| "step": 3330 |
| }, |
| { |
| "epoch": 208.75, |
| "grad_norm": 2.3004539012908936, |
| "learning_rate": 6.678e-05, |
| "loss": 0.0305, |
| "step": 3340 |
| }, |
| { |
| "epoch": 209.375, |
| "grad_norm": 2.2835469245910645, |
| "learning_rate": 6.698e-05, |
| "loss": 0.0281, |
| "step": 3350 |
| }, |
| { |
| "epoch": 210.0, |
| "grad_norm": 2.0576257705688477, |
| "learning_rate": 6.718e-05, |
| "loss": 0.0318, |
| "step": 3360 |
| }, |
| { |
| "epoch": 210.625, |
| "grad_norm": 2.0494043827056885, |
| "learning_rate": 6.738e-05, |
| "loss": 0.0284, |
| "step": 3370 |
| }, |
| { |
| "epoch": 211.25, |
| "grad_norm": 1.5460221767425537, |
| "learning_rate": 6.758e-05, |
| "loss": 0.0274, |
| "step": 3380 |
| }, |
| { |
| "epoch": 211.875, |
| "grad_norm": 2.422177791595459, |
| "learning_rate": 6.778e-05, |
| "loss": 0.0287, |
| "step": 3390 |
| }, |
| { |
| "epoch": 212.5, |
| "grad_norm": 2.38964581489563, |
| "learning_rate": 6.798e-05, |
| "loss": 0.0329, |
| "step": 3400 |
| }, |
| { |
| "epoch": 213.125, |
| "grad_norm": 2.0634000301361084, |
| "learning_rate": 6.818e-05, |
| "loss": 0.0273, |
| "step": 3410 |
| }, |
| { |
| "epoch": 213.75, |
| "grad_norm": 2.5334651470184326, |
| "learning_rate": 6.838e-05, |
| "loss": 0.0312, |
| "step": 3420 |
| }, |
| { |
| "epoch": 214.375, |
| "grad_norm": 2.527052402496338, |
| "learning_rate": 6.858e-05, |
| "loss": 0.0297, |
| "step": 3430 |
| }, |
| { |
| "epoch": 215.0, |
| "grad_norm": 2.3704299926757812, |
| "learning_rate": 6.878e-05, |
| "loss": 0.0313, |
| "step": 3440 |
| }, |
| { |
| "epoch": 215.625, |
| "grad_norm": 1.926483154296875, |
| "learning_rate": 6.898e-05, |
| "loss": 0.0256, |
| "step": 3450 |
| }, |
| { |
| "epoch": 216.25, |
| "grad_norm": 1.56046724319458, |
| "learning_rate": 6.918e-05, |
| "loss": 0.0278, |
| "step": 3460 |
| }, |
| { |
| "epoch": 216.875, |
| "grad_norm": 1.8307677507400513, |
| "learning_rate": 6.938e-05, |
| "loss": 0.0269, |
| "step": 3470 |
| }, |
| { |
| "epoch": 217.5, |
| "grad_norm": 1.9908180236816406, |
| "learning_rate": 6.958e-05, |
| "loss": 0.0276, |
| "step": 3480 |
| }, |
| { |
| "epoch": 218.125, |
| "grad_norm": 2.067988395690918, |
| "learning_rate": 6.978e-05, |
| "loss": 0.0268, |
| "step": 3490 |
| }, |
| { |
| "epoch": 218.75, |
| "grad_norm": 1.8545929193496704, |
| "learning_rate": 6.998e-05, |
| "loss": 0.0268, |
| "step": 3500 |
| }, |
| { |
| "epoch": 219.375, |
| "grad_norm": 2.052927017211914, |
| "learning_rate": 7.018e-05, |
| "loss": 0.0253, |
| "step": 3510 |
| }, |
| { |
| "epoch": 220.0, |
| "grad_norm": 2.1113545894622803, |
| "learning_rate": 7.038e-05, |
| "loss": 0.0245, |
| "step": 3520 |
| }, |
| { |
| "epoch": 220.625, |
| "grad_norm": 1.541675329208374, |
| "learning_rate": 7.058e-05, |
| "loss": 0.0253, |
| "step": 3530 |
| }, |
| { |
| "epoch": 221.25, |
| "grad_norm": 1.7272151708602905, |
| "learning_rate": 7.078e-05, |
| "loss": 0.0251, |
| "step": 3540 |
| }, |
| { |
| "epoch": 221.875, |
| "grad_norm": 1.7178980112075806, |
| "learning_rate": 7.098e-05, |
| "loss": 0.026, |
| "step": 3550 |
| }, |
| { |
| "epoch": 222.5, |
| "grad_norm": 2.246424913406372, |
| "learning_rate": 7.118e-05, |
| "loss": 0.0267, |
| "step": 3560 |
| }, |
| { |
| "epoch": 223.125, |
| "grad_norm": 1.9230071306228638, |
| "learning_rate": 7.138e-05, |
| "loss": 0.0268, |
| "step": 3570 |
| }, |
| { |
| "epoch": 223.75, |
| "grad_norm": 1.9361920356750488, |
| "learning_rate": 7.158e-05, |
| "loss": 0.0268, |
| "step": 3580 |
| }, |
| { |
| "epoch": 224.375, |
| "grad_norm": 1.6865476369857788, |
| "learning_rate": 7.178000000000001e-05, |
| "loss": 0.0248, |
| "step": 3590 |
| }, |
| { |
| "epoch": 225.0, |
| "grad_norm": 2.019584894180298, |
| "learning_rate": 7.198e-05, |
| "loss": 0.0258, |
| "step": 3600 |
| }, |
| { |
| "epoch": 225.625, |
| "grad_norm": 1.8740990161895752, |
| "learning_rate": 7.218e-05, |
| "loss": 0.0243, |
| "step": 3610 |
| }, |
| { |
| "epoch": 226.25, |
| "grad_norm": 2.088883399963379, |
| "learning_rate": 7.238000000000001e-05, |
| "loss": 0.0253, |
| "step": 3620 |
| }, |
| { |
| "epoch": 226.875, |
| "grad_norm": 2.107874870300293, |
| "learning_rate": 7.258e-05, |
| "loss": 0.0265, |
| "step": 3630 |
| }, |
| { |
| "epoch": 227.5, |
| "grad_norm": 1.690873622894287, |
| "learning_rate": 7.278e-05, |
| "loss": 0.0262, |
| "step": 3640 |
| }, |
| { |
| "epoch": 228.125, |
| "grad_norm": 2.7033252716064453, |
| "learning_rate": 7.298000000000001e-05, |
| "loss": 0.025, |
| "step": 3650 |
| }, |
| { |
| "epoch": 228.75, |
| "grad_norm": 1.91816246509552, |
| "learning_rate": 7.318e-05, |
| "loss": 0.0265, |
| "step": 3660 |
| }, |
| { |
| "epoch": 229.375, |
| "grad_norm": 1.9548629522323608, |
| "learning_rate": 7.338e-05, |
| "loss": 0.0251, |
| "step": 3670 |
| }, |
| { |
| "epoch": 230.0, |
| "grad_norm": 1.911120891571045, |
| "learning_rate": 7.358000000000001e-05, |
| "loss": 0.0245, |
| "step": 3680 |
| }, |
| { |
| "epoch": 230.625, |
| "grad_norm": 1.6720895767211914, |
| "learning_rate": 7.378e-05, |
| "loss": 0.0252, |
| "step": 3690 |
| }, |
| { |
| "epoch": 231.25, |
| "grad_norm": 1.9147329330444336, |
| "learning_rate": 7.398e-05, |
| "loss": 0.0247, |
| "step": 3700 |
| }, |
| { |
| "epoch": 231.875, |
| "grad_norm": 2.1456077098846436, |
| "learning_rate": 7.418000000000001e-05, |
| "loss": 0.0252, |
| "step": 3710 |
| }, |
| { |
| "epoch": 232.5, |
| "grad_norm": 1.9418590068817139, |
| "learning_rate": 7.438e-05, |
| "loss": 0.0257, |
| "step": 3720 |
| }, |
| { |
| "epoch": 233.125, |
| "grad_norm": 1.9458227157592773, |
| "learning_rate": 7.458000000000001e-05, |
| "loss": 0.0271, |
| "step": 3730 |
| }, |
| { |
| "epoch": 233.75, |
| "grad_norm": 1.9564207792282104, |
| "learning_rate": 7.478e-05, |
| "loss": 0.0262, |
| "step": 3740 |
| }, |
| { |
| "epoch": 234.375, |
| "grad_norm": 1.4478167295455933, |
| "learning_rate": 7.498e-05, |
| "loss": 0.0254, |
| "step": 3750 |
| }, |
| { |
| "epoch": 235.0, |
| "grad_norm": 2.14218807220459, |
| "learning_rate": 7.518000000000001e-05, |
| "loss": 0.0251, |
| "step": 3760 |
| }, |
| { |
| "epoch": 235.625, |
| "grad_norm": 2.029665946960449, |
| "learning_rate": 7.538e-05, |
| "loss": 0.0276, |
| "step": 3770 |
| }, |
| { |
| "epoch": 236.25, |
| "grad_norm": 1.8243962526321411, |
| "learning_rate": 7.558e-05, |
| "loss": 0.0267, |
| "step": 3780 |
| }, |
| { |
| "epoch": 236.875, |
| "grad_norm": 1.6162742376327515, |
| "learning_rate": 7.578000000000001e-05, |
| "loss": 0.0224, |
| "step": 3790 |
| }, |
| { |
| "epoch": 237.5, |
| "grad_norm": 2.0405139923095703, |
| "learning_rate": 7.598e-05, |
| "loss": 0.0248, |
| "step": 3800 |
| }, |
| { |
| "epoch": 238.125, |
| "grad_norm": 1.9894390106201172, |
| "learning_rate": 7.618e-05, |
| "loss": 0.0239, |
| "step": 3810 |
| }, |
| { |
| "epoch": 238.75, |
| "grad_norm": 1.7805562019348145, |
| "learning_rate": 7.638000000000001e-05, |
| "loss": 0.0245, |
| "step": 3820 |
| }, |
| { |
| "epoch": 239.375, |
| "grad_norm": 2.0249173641204834, |
| "learning_rate": 7.658e-05, |
| "loss": 0.0221, |
| "step": 3830 |
| }, |
| { |
| "epoch": 240.0, |
| "grad_norm": 1.8023134469985962, |
| "learning_rate": 7.678000000000001e-05, |
| "loss": 0.0234, |
| "step": 3840 |
| }, |
| { |
| "epoch": 240.625, |
| "grad_norm": 1.5592528581619263, |
| "learning_rate": 7.698000000000001e-05, |
| "loss": 0.0245, |
| "step": 3850 |
| }, |
| { |
| "epoch": 241.25, |
| "grad_norm": 2.1557257175445557, |
| "learning_rate": 7.718e-05, |
| "loss": 0.0243, |
| "step": 3860 |
| }, |
| { |
| "epoch": 241.875, |
| "grad_norm": 1.9655349254608154, |
| "learning_rate": 7.738000000000001e-05, |
| "loss": 0.0223, |
| "step": 3870 |
| }, |
| { |
| "epoch": 242.5, |
| "grad_norm": 1.616184115409851, |
| "learning_rate": 7.758000000000001e-05, |
| "loss": 0.0249, |
| "step": 3880 |
| }, |
| { |
| "epoch": 243.125, |
| "grad_norm": 2.146557331085205, |
| "learning_rate": 7.778e-05, |
| "loss": 0.0243, |
| "step": 3890 |
| }, |
| { |
| "epoch": 243.75, |
| "grad_norm": 1.6077772378921509, |
| "learning_rate": 7.798000000000001e-05, |
| "loss": 0.0237, |
| "step": 3900 |
| }, |
| { |
| "epoch": 244.375, |
| "grad_norm": 2.073211431503296, |
| "learning_rate": 7.818000000000001e-05, |
| "loss": 0.0211, |
| "step": 3910 |
| }, |
| { |
| "epoch": 245.0, |
| "grad_norm": 1.7445831298828125, |
| "learning_rate": 7.838e-05, |
| "loss": 0.0225, |
| "step": 3920 |
| }, |
| { |
| "epoch": 245.625, |
| "grad_norm": 1.5558561086654663, |
| "learning_rate": 7.858000000000001e-05, |
| "loss": 0.0215, |
| "step": 3930 |
| }, |
| { |
| "epoch": 246.25, |
| "grad_norm": 1.4040555953979492, |
| "learning_rate": 7.878e-05, |
| "loss": 0.0219, |
| "step": 3940 |
| }, |
| { |
| "epoch": 246.875, |
| "grad_norm": 1.6972527503967285, |
| "learning_rate": 7.897999999999999e-05, |
| "loss": 0.0247, |
| "step": 3950 |
| }, |
| { |
| "epoch": 247.5, |
| "grad_norm": 1.862613320350647, |
| "learning_rate": 7.918e-05, |
| "loss": 0.0235, |
| "step": 3960 |
| }, |
| { |
| "epoch": 248.125, |
| "grad_norm": 1.9567930698394775, |
| "learning_rate": 7.938e-05, |
| "loss": 0.0243, |
| "step": 3970 |
| }, |
| { |
| "epoch": 248.75, |
| "grad_norm": 1.8193110227584839, |
| "learning_rate": 7.958e-05, |
| "loss": 0.0232, |
| "step": 3980 |
| }, |
| { |
| "epoch": 249.375, |
| "grad_norm": 1.8279744386672974, |
| "learning_rate": 7.978e-05, |
| "loss": 0.022, |
| "step": 3990 |
| }, |
| { |
| "epoch": 250.0, |
| "grad_norm": 1.9170351028442383, |
| "learning_rate": 7.998e-05, |
| "loss": 0.0238, |
| "step": 4000 |
| }, |
| { |
| "epoch": 250.625, |
| "grad_norm": 1.7806050777435303, |
| "learning_rate": 8.018e-05, |
| "loss": 0.0224, |
| "step": 4010 |
| }, |
| { |
| "epoch": 251.25, |
| "grad_norm": 1.618657112121582, |
| "learning_rate": 8.038e-05, |
| "loss": 0.0228, |
| "step": 4020 |
| }, |
| { |
| "epoch": 251.875, |
| "grad_norm": 1.3448606729507446, |
| "learning_rate": 8.058e-05, |
| "loss": 0.0213, |
| "step": 4030 |
| }, |
| { |
| "epoch": 252.5, |
| "grad_norm": 2.1564993858337402, |
| "learning_rate": 8.078e-05, |
| "loss": 0.0224, |
| "step": 4040 |
| }, |
| { |
| "epoch": 253.125, |
| "grad_norm": 1.9321818351745605, |
| "learning_rate": 8.098e-05, |
| "loss": 0.0258, |
| "step": 4050 |
| }, |
| { |
| "epoch": 253.75, |
| "grad_norm": 1.6877397298812866, |
| "learning_rate": 8.118e-05, |
| "loss": 0.0235, |
| "step": 4060 |
| }, |
| { |
| "epoch": 254.375, |
| "grad_norm": 1.899335265159607, |
| "learning_rate": 8.138e-05, |
| "loss": 0.0243, |
| "step": 4070 |
| }, |
| { |
| "epoch": 255.0, |
| "grad_norm": 1.6680128574371338, |
| "learning_rate": 8.158e-05, |
| "loss": 0.0247, |
| "step": 4080 |
| }, |
| { |
| "epoch": 255.625, |
| "grad_norm": 1.4403914213180542, |
| "learning_rate": 8.178e-05, |
| "loss": 0.0224, |
| "step": 4090 |
| }, |
| { |
| "epoch": 256.25, |
| "grad_norm": 1.8112647533416748, |
| "learning_rate": 8.198e-05, |
| "loss": 0.022, |
| "step": 4100 |
| }, |
| { |
| "epoch": 256.875, |
| "grad_norm": 1.6451849937438965, |
| "learning_rate": 8.218e-05, |
| "loss": 0.0216, |
| "step": 4110 |
| }, |
| { |
| "epoch": 257.5, |
| "grad_norm": 1.4328521490097046, |
| "learning_rate": 8.238000000000001e-05, |
| "loss": 0.0217, |
| "step": 4120 |
| }, |
| { |
| "epoch": 258.125, |
| "grad_norm": 1.8865714073181152, |
| "learning_rate": 8.258e-05, |
| "loss": 0.0218, |
| "step": 4130 |
| }, |
| { |
| "epoch": 258.75, |
| "grad_norm": 1.6151604652404785, |
| "learning_rate": 8.278e-05, |
| "loss": 0.0207, |
| "step": 4140 |
| }, |
| { |
| "epoch": 259.375, |
| "grad_norm": 1.576856017112732, |
| "learning_rate": 8.298000000000001e-05, |
| "loss": 0.0227, |
| "step": 4150 |
| }, |
| { |
| "epoch": 260.0, |
| "grad_norm": 1.9383561611175537, |
| "learning_rate": 8.318e-05, |
| "loss": 0.0211, |
| "step": 4160 |
| }, |
| { |
| "epoch": 260.625, |
| "grad_norm": 1.417213797569275, |
| "learning_rate": 8.338e-05, |
| "loss": 0.0218, |
| "step": 4170 |
| }, |
| { |
| "epoch": 261.25, |
| "grad_norm": 1.4880584478378296, |
| "learning_rate": 8.358e-05, |
| "loss": 0.0215, |
| "step": 4180 |
| }, |
| { |
| "epoch": 261.875, |
| "grad_norm": 1.7698973417282104, |
| "learning_rate": 8.378e-05, |
| "loss": 0.0209, |
| "step": 4190 |
| }, |
| { |
| "epoch": 262.5, |
| "grad_norm": 1.4688743352890015, |
| "learning_rate": 8.398e-05, |
| "loss": 0.022, |
| "step": 4200 |
| }, |
| { |
| "epoch": 263.125, |
| "grad_norm": 1.563480019569397, |
| "learning_rate": 8.418e-05, |
| "loss": 0.021, |
| "step": 4210 |
| }, |
| { |
| "epoch": 263.75, |
| "grad_norm": 1.6026536226272583, |
| "learning_rate": 8.438e-05, |
| "loss": 0.0196, |
| "step": 4220 |
| }, |
| { |
| "epoch": 264.375, |
| "grad_norm": 1.390167236328125, |
| "learning_rate": 8.458e-05, |
| "loss": 0.0203, |
| "step": 4230 |
| }, |
| { |
| "epoch": 265.0, |
| "grad_norm": 1.3945834636688232, |
| "learning_rate": 8.478e-05, |
| "loss": 0.0187, |
| "step": 4240 |
| }, |
| { |
| "epoch": 265.625, |
| "grad_norm": 1.6028813123703003, |
| "learning_rate": 8.498e-05, |
| "loss": 0.0211, |
| "step": 4250 |
| }, |
| { |
| "epoch": 266.25, |
| "grad_norm": 1.5985839366912842, |
| "learning_rate": 8.518000000000001e-05, |
| "loss": 0.021, |
| "step": 4260 |
| }, |
| { |
| "epoch": 266.875, |
| "grad_norm": 1.3894219398498535, |
| "learning_rate": 8.538e-05, |
| "loss": 0.0203, |
| "step": 4270 |
| }, |
| { |
| "epoch": 267.5, |
| "grad_norm": 1.9198909997940063, |
| "learning_rate": 8.558e-05, |
| "loss": 0.0217, |
| "step": 4280 |
| }, |
| { |
| "epoch": 268.125, |
| "grad_norm": 1.6992826461791992, |
| "learning_rate": 8.578000000000001e-05, |
| "loss": 0.0218, |
| "step": 4290 |
| }, |
| { |
| "epoch": 268.75, |
| "grad_norm": 1.5295377969741821, |
| "learning_rate": 8.598e-05, |
| "loss": 0.0209, |
| "step": 4300 |
| }, |
| { |
| "epoch": 269.375, |
| "grad_norm": 1.9647233486175537, |
| "learning_rate": 8.618e-05, |
| "loss": 0.0208, |
| "step": 4310 |
| }, |
| { |
| "epoch": 270.0, |
| "grad_norm": 1.6796159744262695, |
| "learning_rate": 8.638000000000001e-05, |
| "loss": 0.0207, |
| "step": 4320 |
| }, |
| { |
| "epoch": 270.625, |
| "grad_norm": 1.7937408685684204, |
| "learning_rate": 8.658e-05, |
| "loss": 0.0212, |
| "step": 4330 |
| }, |
| { |
| "epoch": 271.25, |
| "grad_norm": 1.944583535194397, |
| "learning_rate": 8.678e-05, |
| "loss": 0.0214, |
| "step": 4340 |
| }, |
| { |
| "epoch": 271.875, |
| "grad_norm": 1.501273512840271, |
| "learning_rate": 8.698000000000001e-05, |
| "loss": 0.0227, |
| "step": 4350 |
| }, |
| { |
| "epoch": 272.5, |
| "grad_norm": 1.630289077758789, |
| "learning_rate": 8.718e-05, |
| "loss": 0.0187, |
| "step": 4360 |
| }, |
| { |
| "epoch": 273.125, |
| "grad_norm": 1.558972716331482, |
| "learning_rate": 8.738000000000001e-05, |
| "loss": 0.0201, |
| "step": 4370 |
| }, |
| { |
| "epoch": 273.75, |
| "grad_norm": 1.319100260734558, |
| "learning_rate": 8.758000000000001e-05, |
| "loss": 0.0199, |
| "step": 4380 |
| }, |
| { |
| "epoch": 274.375, |
| "grad_norm": 1.687119722366333, |
| "learning_rate": 8.778e-05, |
| "loss": 0.0222, |
| "step": 4390 |
| }, |
| { |
| "epoch": 275.0, |
| "grad_norm": 1.5666712522506714, |
| "learning_rate": 8.798000000000001e-05, |
| "loss": 0.0218, |
| "step": 4400 |
| }, |
| { |
| "epoch": 275.625, |
| "grad_norm": 1.374186396598816, |
| "learning_rate": 8.818000000000001e-05, |
| "loss": 0.0201, |
| "step": 4410 |
| }, |
| { |
| "epoch": 276.25, |
| "grad_norm": 1.4911551475524902, |
| "learning_rate": 8.838e-05, |
| "loss": 0.0197, |
| "step": 4420 |
| }, |
| { |
| "epoch": 276.875, |
| "grad_norm": 1.391093134880066, |
| "learning_rate": 8.858000000000001e-05, |
| "loss": 0.019, |
| "step": 4430 |
| }, |
| { |
| "epoch": 277.5, |
| "grad_norm": 1.5734379291534424, |
| "learning_rate": 8.878000000000001e-05, |
| "loss": 0.0182, |
| "step": 4440 |
| }, |
| { |
| "epoch": 278.125, |
| "grad_norm": 1.5925443172454834, |
| "learning_rate": 8.898e-05, |
| "loss": 0.0196, |
| "step": 4450 |
| }, |
| { |
| "epoch": 278.75, |
| "grad_norm": 1.6269075870513916, |
| "learning_rate": 8.918000000000001e-05, |
| "loss": 0.0211, |
| "step": 4460 |
| }, |
| { |
| "epoch": 279.375, |
| "grad_norm": 1.5029900074005127, |
| "learning_rate": 8.938e-05, |
| "loss": 0.0208, |
| "step": 4470 |
| }, |
| { |
| "epoch": 280.0, |
| "grad_norm": 1.56442391872406, |
| "learning_rate": 8.958e-05, |
| "loss": 0.0196, |
| "step": 4480 |
| }, |
| { |
| "epoch": 280.625, |
| "grad_norm": 1.3483182191848755, |
| "learning_rate": 8.978000000000001e-05, |
| "loss": 0.0185, |
| "step": 4490 |
| }, |
| { |
| "epoch": 281.25, |
| "grad_norm": 1.2249255180358887, |
| "learning_rate": 8.998e-05, |
| "loss": 0.0182, |
| "step": 4500 |
| }, |
| { |
| "epoch": 281.875, |
| "grad_norm": 1.206023097038269, |
| "learning_rate": 9.018000000000001e-05, |
| "loss": 0.0191, |
| "step": 4510 |
| }, |
| { |
| "epoch": 282.5, |
| "grad_norm": 1.9158329963684082, |
| "learning_rate": 9.038000000000001e-05, |
| "loss": 0.0202, |
| "step": 4520 |
| }, |
| { |
| "epoch": 283.125, |
| "grad_norm": 1.6515963077545166, |
| "learning_rate": 9.058e-05, |
| "loss": 0.0199, |
| "step": 4530 |
| }, |
| { |
| "epoch": 283.75, |
| "grad_norm": 1.7891855239868164, |
| "learning_rate": 9.078000000000001e-05, |
| "loss": 0.0213, |
| "step": 4540 |
| }, |
| { |
| "epoch": 284.375, |
| "grad_norm": 1.5916194915771484, |
| "learning_rate": 9.098000000000001e-05, |
| "loss": 0.0204, |
| "step": 4550 |
| }, |
| { |
| "epoch": 285.0, |
| "grad_norm": 1.6548500061035156, |
| "learning_rate": 9.118e-05, |
| "loss": 0.0206, |
| "step": 4560 |
| }, |
| { |
| "epoch": 285.625, |
| "grad_norm": 1.7890138626098633, |
| "learning_rate": 9.138e-05, |
| "loss": 0.0216, |
| "step": 4570 |
| }, |
| { |
| "epoch": 286.25, |
| "grad_norm": 1.3698619604110718, |
| "learning_rate": 9.158e-05, |
| "loss": 0.021, |
| "step": 4580 |
| }, |
| { |
| "epoch": 286.875, |
| "grad_norm": 1.3164348602294922, |
| "learning_rate": 9.178e-05, |
| "loss": 0.0201, |
| "step": 4590 |
| }, |
| { |
| "epoch": 287.5, |
| "grad_norm": 1.2602595090866089, |
| "learning_rate": 9.198e-05, |
| "loss": 0.0206, |
| "step": 4600 |
| }, |
| { |
| "epoch": 288.125, |
| "grad_norm": 1.6356364488601685, |
| "learning_rate": 9.218e-05, |
| "loss": 0.0206, |
| "step": 4610 |
| }, |
| { |
| "epoch": 288.75, |
| "grad_norm": 1.339037299156189, |
| "learning_rate": 9.238e-05, |
| "loss": 0.0194, |
| "step": 4620 |
| }, |
| { |
| "epoch": 289.375, |
| "grad_norm": 1.5343581438064575, |
| "learning_rate": 9.258e-05, |
| "loss": 0.0214, |
| "step": 4630 |
| }, |
| { |
| "epoch": 290.0, |
| "grad_norm": 1.7950295209884644, |
| "learning_rate": 9.278e-05, |
| "loss": 0.02, |
| "step": 4640 |
| }, |
| { |
| "epoch": 290.625, |
| "grad_norm": 1.346240758895874, |
| "learning_rate": 9.298e-05, |
| "loss": 0.0202, |
| "step": 4650 |
| }, |
| { |
| "epoch": 291.25, |
| "grad_norm": 1.1901124715805054, |
| "learning_rate": 9.318e-05, |
| "loss": 0.0176, |
| "step": 4660 |
| }, |
| { |
| "epoch": 291.875, |
| "grad_norm": 1.3559141159057617, |
| "learning_rate": 9.338e-05, |
| "loss": 0.0165, |
| "step": 4670 |
| }, |
| { |
| "epoch": 292.5, |
| "grad_norm": 1.342185139656067, |
| "learning_rate": 9.358e-05, |
| "loss": 0.0191, |
| "step": 4680 |
| }, |
| { |
| "epoch": 293.125, |
| "grad_norm": 1.5401999950408936, |
| "learning_rate": 9.378e-05, |
| "loss": 0.0193, |
| "step": 4690 |
| }, |
| { |
| "epoch": 293.75, |
| "grad_norm": 1.4412999153137207, |
| "learning_rate": 9.398e-05, |
| "loss": 0.0191, |
| "step": 4700 |
| }, |
| { |
| "epoch": 294.375, |
| "grad_norm": 1.2340666055679321, |
| "learning_rate": 9.418e-05, |
| "loss": 0.0182, |
| "step": 4710 |
| }, |
| { |
| "epoch": 295.0, |
| "grad_norm": 1.1816933155059814, |
| "learning_rate": 9.438e-05, |
| "loss": 0.0175, |
| "step": 4720 |
| }, |
| { |
| "epoch": 295.625, |
| "grad_norm": 1.2440204620361328, |
| "learning_rate": 9.458e-05, |
| "loss": 0.0178, |
| "step": 4730 |
| }, |
| { |
| "epoch": 296.25, |
| "grad_norm": 1.4980961084365845, |
| "learning_rate": 9.478e-05, |
| "loss": 0.0173, |
| "step": 4740 |
| }, |
| { |
| "epoch": 296.875, |
| "grad_norm": 1.4015268087387085, |
| "learning_rate": 9.498e-05, |
| "loss": 0.0198, |
| "step": 4750 |
| }, |
| { |
| "epoch": 297.5, |
| "grad_norm": 1.420882225036621, |
| "learning_rate": 9.518000000000001e-05, |
| "loss": 0.019, |
| "step": 4760 |
| }, |
| { |
| "epoch": 298.125, |
| "grad_norm": 1.2662218809127808, |
| "learning_rate": 9.538e-05, |
| "loss": 0.0195, |
| "step": 4770 |
| }, |
| { |
| "epoch": 298.75, |
| "grad_norm": 1.528330683708191, |
| "learning_rate": 9.558e-05, |
| "loss": 0.0213, |
| "step": 4780 |
| }, |
| { |
| "epoch": 299.375, |
| "grad_norm": 1.3324357271194458, |
| "learning_rate": 9.578000000000001e-05, |
| "loss": 0.0194, |
| "step": 4790 |
| }, |
| { |
| "epoch": 300.0, |
| "grad_norm": 1.3170146942138672, |
| "learning_rate": 9.598e-05, |
| "loss": 0.0186, |
| "step": 4800 |
| }, |
| { |
| "epoch": 300.625, |
| "grad_norm": 1.4495036602020264, |
| "learning_rate": 9.618e-05, |
| "loss": 0.0178, |
| "step": 4810 |
| }, |
| { |
| "epoch": 301.25, |
| "grad_norm": 1.6242793798446655, |
| "learning_rate": 9.638000000000001e-05, |
| "loss": 0.0204, |
| "step": 4820 |
| }, |
| { |
| "epoch": 301.875, |
| "grad_norm": 1.4832464456558228, |
| "learning_rate": 9.658e-05, |
| "loss": 0.0203, |
| "step": 4830 |
| }, |
| { |
| "epoch": 302.5, |
| "grad_norm": 1.3549563884735107, |
| "learning_rate": 9.678e-05, |
| "loss": 0.0177, |
| "step": 4840 |
| }, |
| { |
| "epoch": 303.125, |
| "grad_norm": 1.804412841796875, |
| "learning_rate": 9.698000000000001e-05, |
| "loss": 0.021, |
| "step": 4850 |
| }, |
| { |
| "epoch": 303.75, |
| "grad_norm": 1.5907257795333862, |
| "learning_rate": 9.718e-05, |
| "loss": 0.0209, |
| "step": 4860 |
| }, |
| { |
| "epoch": 304.375, |
| "grad_norm": 1.4540935754776, |
| "learning_rate": 9.738e-05, |
| "loss": 0.017, |
| "step": 4870 |
| }, |
| { |
| "epoch": 305.0, |
| "grad_norm": 1.223158597946167, |
| "learning_rate": 9.758000000000001e-05, |
| "loss": 0.018, |
| "step": 4880 |
| }, |
| { |
| "epoch": 305.625, |
| "grad_norm": 1.2038943767547607, |
| "learning_rate": 9.778e-05, |
| "loss": 0.0176, |
| "step": 4890 |
| }, |
| { |
| "epoch": 306.25, |
| "grad_norm": 1.110867977142334, |
| "learning_rate": 9.798000000000001e-05, |
| "loss": 0.0173, |
| "step": 4900 |
| }, |
| { |
| "epoch": 306.875, |
| "grad_norm": 1.414939522743225, |
| "learning_rate": 9.818000000000001e-05, |
| "loss": 0.017, |
| "step": 4910 |
| }, |
| { |
| "epoch": 307.5, |
| "grad_norm": 1.3866313695907593, |
| "learning_rate": 9.838e-05, |
| "loss": 0.02, |
| "step": 4920 |
| }, |
| { |
| "epoch": 308.125, |
| "grad_norm": 1.5799922943115234, |
| "learning_rate": 9.858000000000001e-05, |
| "loss": 0.0162, |
| "step": 4930 |
| }, |
| { |
| "epoch": 308.75, |
| "grad_norm": 1.261763334274292, |
| "learning_rate": 9.878e-05, |
| "loss": 0.0189, |
| "step": 4940 |
| }, |
| { |
| "epoch": 309.375, |
| "grad_norm": 1.474787950515747, |
| "learning_rate": 9.898e-05, |
| "loss": 0.0181, |
| "step": 4950 |
| }, |
| { |
| "epoch": 310.0, |
| "grad_norm": 1.287822961807251, |
| "learning_rate": 9.918000000000001e-05, |
| "loss": 0.0184, |
| "step": 4960 |
| }, |
| { |
| "epoch": 310.625, |
| "grad_norm": 1.0713199377059937, |
| "learning_rate": 9.938e-05, |
| "loss": 0.0179, |
| "step": 4970 |
| }, |
| { |
| "epoch": 311.25, |
| "grad_norm": 1.2200391292572021, |
| "learning_rate": 9.958e-05, |
| "loss": 0.018, |
| "step": 4980 |
| }, |
| { |
| "epoch": 311.875, |
| "grad_norm": 1.5587009191513062, |
| "learning_rate": 9.978000000000001e-05, |
| "loss": 0.0199, |
| "step": 4990 |
| }, |
| { |
| "epoch": 312.5, |
| "grad_norm": 1.4640460014343262, |
| "learning_rate": 9.998e-05, |
| "loss": 0.017, |
| "step": 5000 |
| }, |
| { |
| "epoch": 313.125, |
| "grad_norm": 1.4215519428253174, |
| "learning_rate": 9.999999778549045e-05, |
| "loss": 0.0171, |
| "step": 5010 |
| }, |
| { |
| "epoch": 313.75, |
| "grad_norm": 1.1879425048828125, |
| "learning_rate": 9.999999013039593e-05, |
| "loss": 0.016, |
| "step": 5020 |
| }, |
| { |
| "epoch": 314.375, |
| "grad_norm": 1.231829047203064, |
| "learning_rate": 9.999997700737766e-05, |
| "loss": 0.0158, |
| "step": 5030 |
| }, |
| { |
| "epoch": 315.0, |
| "grad_norm": 1.224221110343933, |
| "learning_rate": 9.999995841643709e-05, |
| "loss": 0.0164, |
| "step": 5040 |
| }, |
| { |
| "epoch": 315.625, |
| "grad_norm": 1.491013765335083, |
| "learning_rate": 9.999993435757623e-05, |
| "loss": 0.0166, |
| "step": 5050 |
| }, |
| { |
| "epoch": 316.25, |
| "grad_norm": 1.2551881074905396, |
| "learning_rate": 9.999990483079773e-05, |
| "loss": 0.0187, |
| "step": 5060 |
| }, |
| { |
| "epoch": 316.875, |
| "grad_norm": 1.3919192552566528, |
| "learning_rate": 9.999986983610481e-05, |
| "loss": 0.0167, |
| "step": 5070 |
| }, |
| { |
| "epoch": 317.5, |
| "grad_norm": 1.145408272743225, |
| "learning_rate": 9.99998293735013e-05, |
| "loss": 0.0169, |
| "step": 5080 |
| }, |
| { |
| "epoch": 318.125, |
| "grad_norm": 1.5774271488189697, |
| "learning_rate": 9.999978344299161e-05, |
| "loss": 0.0171, |
| "step": 5090 |
| }, |
| { |
| "epoch": 318.75, |
| "grad_norm": 1.4125555753707886, |
| "learning_rate": 9.99997320445808e-05, |
| "loss": 0.0191, |
| "step": 5100 |
| }, |
| { |
| "epoch": 319.375, |
| "grad_norm": 1.110128402709961, |
| "learning_rate": 9.999967517827444e-05, |
| "loss": 0.0159, |
| "step": 5110 |
| }, |
| { |
| "epoch": 320.0, |
| "grad_norm": 1.3442533016204834, |
| "learning_rate": 9.999961284407879e-05, |
| "loss": 0.0177, |
| "step": 5120 |
| }, |
| { |
| "epoch": 320.625, |
| "grad_norm": 1.3384839296340942, |
| "learning_rate": 9.999954504200067e-05, |
| "loss": 0.0154, |
| "step": 5130 |
| }, |
| { |
| "epoch": 321.25, |
| "grad_norm": 1.1482480764389038, |
| "learning_rate": 9.999947177204744e-05, |
| "loss": 0.0166, |
| "step": 5140 |
| }, |
| { |
| "epoch": 321.875, |
| "grad_norm": 1.2519944906234741, |
| "learning_rate": 9.999939303422718e-05, |
| "loss": 0.0172, |
| "step": 5150 |
| }, |
| { |
| "epoch": 322.5, |
| "grad_norm": 1.3870333433151245, |
| "learning_rate": 9.999930882854847e-05, |
| "loss": 0.0168, |
| "step": 5160 |
| }, |
| { |
| "epoch": 323.125, |
| "grad_norm": 1.366909146308899, |
| "learning_rate": 9.999921915502051e-05, |
| "loss": 0.016, |
| "step": 5170 |
| }, |
| { |
| "epoch": 323.75, |
| "grad_norm": 1.1931958198547363, |
| "learning_rate": 9.99991240136531e-05, |
| "loss": 0.0186, |
| "step": 5180 |
| }, |
| { |
| "epoch": 324.375, |
| "grad_norm": 1.1246201992034912, |
| "learning_rate": 9.999902340445668e-05, |
| "loss": 0.0151, |
| "step": 5190 |
| }, |
| { |
| "epoch": 325.0, |
| "grad_norm": 1.2969485521316528, |
| "learning_rate": 9.999891732744224e-05, |
| "loss": 0.0154, |
| "step": 5200 |
| }, |
| { |
| "epoch": 325.625, |
| "grad_norm": 1.1869677305221558, |
| "learning_rate": 9.999880578262135e-05, |
| "loss": 0.0167, |
| "step": 5210 |
| }, |
| { |
| "epoch": 326.25, |
| "grad_norm": 1.221058964729309, |
| "learning_rate": 9.999868877000624e-05, |
| "loss": 0.0164, |
| "step": 5220 |
| }, |
| { |
| "epoch": 326.875, |
| "grad_norm": 1.2182931900024414, |
| "learning_rate": 9.99985662896097e-05, |
| "loss": 0.0175, |
| "step": 5230 |
| }, |
| { |
| "epoch": 327.5, |
| "grad_norm": 1.2568279504776, |
| "learning_rate": 9.999843834144513e-05, |
| "loss": 0.0159, |
| "step": 5240 |
| }, |
| { |
| "epoch": 328.125, |
| "grad_norm": 1.254540205001831, |
| "learning_rate": 9.99983049255265e-05, |
| "loss": 0.0161, |
| "step": 5250 |
| }, |
| { |
| "epoch": 328.75, |
| "grad_norm": 1.2322643995285034, |
| "learning_rate": 9.999816604186843e-05, |
| "loss": 0.0168, |
| "step": 5260 |
| }, |
| { |
| "epoch": 329.375, |
| "grad_norm": 0.9582310914993286, |
| "learning_rate": 9.999802169048609e-05, |
| "loss": 0.0149, |
| "step": 5270 |
| }, |
| { |
| "epoch": 330.0, |
| "grad_norm": 0.900672197341919, |
| "learning_rate": 9.999787187139527e-05, |
| "loss": 0.0141, |
| "step": 5280 |
| }, |
| { |
| "epoch": 330.625, |
| "grad_norm": 1.049651026725769, |
| "learning_rate": 9.999771658461234e-05, |
| "loss": 0.0153, |
| "step": 5290 |
| }, |
| { |
| "epoch": 331.25, |
| "grad_norm": 1.0110572576522827, |
| "learning_rate": 9.999755583015431e-05, |
| "loss": 0.0145, |
| "step": 5300 |
| }, |
| { |
| "epoch": 331.875, |
| "grad_norm": 1.1884170770645142, |
| "learning_rate": 9.999738960803874e-05, |
| "loss": 0.0152, |
| "step": 5310 |
| }, |
| { |
| "epoch": 332.5, |
| "grad_norm": 1.4686788320541382, |
| "learning_rate": 9.99972179182838e-05, |
| "loss": 0.0136, |
| "step": 5320 |
| }, |
| { |
| "epoch": 333.125, |
| "grad_norm": 1.0699830055236816, |
| "learning_rate": 9.99970407609083e-05, |
| "loss": 0.0161, |
| "step": 5330 |
| }, |
| { |
| "epoch": 333.75, |
| "grad_norm": 1.8003672361373901, |
| "learning_rate": 9.999685813593159e-05, |
| "loss": 0.0177, |
| "step": 5340 |
| }, |
| { |
| "epoch": 334.375, |
| "grad_norm": 1.38191556930542, |
| "learning_rate": 9.999667004337362e-05, |
| "loss": 0.0161, |
| "step": 5350 |
| }, |
| { |
| "epoch": 335.0, |
| "grad_norm": 1.199036717414856, |
| "learning_rate": 9.9996476483255e-05, |
| "loss": 0.0164, |
| "step": 5360 |
| }, |
| { |
| "epoch": 335.625, |
| "grad_norm": 1.1064685583114624, |
| "learning_rate": 9.999627745559688e-05, |
| "loss": 0.0153, |
| "step": 5370 |
| }, |
| { |
| "epoch": 336.25, |
| "grad_norm": 0.968438982963562, |
| "learning_rate": 9.999607296042101e-05, |
| "loss": 0.015, |
| "step": 5380 |
| }, |
| { |
| "epoch": 336.875, |
| "grad_norm": 1.3204340934753418, |
| "learning_rate": 9.99958629977498e-05, |
| "loss": 0.0144, |
| "step": 5390 |
| }, |
| { |
| "epoch": 337.5, |
| "grad_norm": 1.0026376247406006, |
| "learning_rate": 9.999564756760615e-05, |
| "loss": 0.0144, |
| "step": 5400 |
| }, |
| { |
| "epoch": 338.125, |
| "grad_norm": 1.094014048576355, |
| "learning_rate": 9.999542667001366e-05, |
| "loss": 0.0143, |
| "step": 5410 |
| }, |
| { |
| "epoch": 338.75, |
| "grad_norm": 1.0915470123291016, |
| "learning_rate": 9.999520030499647e-05, |
| "loss": 0.0138, |
| "step": 5420 |
| }, |
| { |
| "epoch": 339.375, |
| "grad_norm": 1.0048651695251465, |
| "learning_rate": 9.999496847257936e-05, |
| "loss": 0.0146, |
| "step": 5430 |
| }, |
| { |
| "epoch": 340.0, |
| "grad_norm": 1.138767123222351, |
| "learning_rate": 9.999473117278764e-05, |
| "loss": 0.0162, |
| "step": 5440 |
| }, |
| { |
| "epoch": 340.625, |
| "grad_norm": 1.3121551275253296, |
| "learning_rate": 9.999448840564731e-05, |
| "loss": 0.0144, |
| "step": 5450 |
| }, |
| { |
| "epoch": 341.25, |
| "grad_norm": 1.2357908487319946, |
| "learning_rate": 9.999424017118488e-05, |
| "loss": 0.0155, |
| "step": 5460 |
| }, |
| { |
| "epoch": 341.875, |
| "grad_norm": 1.4110485315322876, |
| "learning_rate": 9.999398646942751e-05, |
| "loss": 0.0171, |
| "step": 5470 |
| }, |
| { |
| "epoch": 342.5, |
| "grad_norm": 1.231876015663147, |
| "learning_rate": 9.999372730040296e-05, |
| "loss": 0.0148, |
| "step": 5480 |
| }, |
| { |
| "epoch": 343.125, |
| "grad_norm": 1.1513409614562988, |
| "learning_rate": 9.999346266413953e-05, |
| "loss": 0.0155, |
| "step": 5490 |
| }, |
| { |
| "epoch": 343.75, |
| "grad_norm": 1.0324758291244507, |
| "learning_rate": 9.99931925606662e-05, |
| "loss": 0.0155, |
| "step": 5500 |
| }, |
| { |
| "epoch": 344.375, |
| "grad_norm": 1.2001458406448364, |
| "learning_rate": 9.99929169900125e-05, |
| "loss": 0.0142, |
| "step": 5510 |
| }, |
| { |
| "epoch": 345.0, |
| "grad_norm": 0.9090719819068909, |
| "learning_rate": 9.999263595220855e-05, |
| "loss": 0.0133, |
| "step": 5520 |
| }, |
| { |
| "epoch": 345.625, |
| "grad_norm": 0.9517356157302856, |
| "learning_rate": 9.99923494472851e-05, |
| "loss": 0.0131, |
| "step": 5530 |
| }, |
| { |
| "epoch": 346.25, |
| "grad_norm": 0.9557884931564331, |
| "learning_rate": 9.999205747527348e-05, |
| "loss": 0.0153, |
| "step": 5540 |
| }, |
| { |
| "epoch": 346.875, |
| "grad_norm": 1.039165735244751, |
| "learning_rate": 9.999176003620561e-05, |
| "loss": 0.0141, |
| "step": 5550 |
| }, |
| { |
| "epoch": 347.5, |
| "grad_norm": 0.930853545665741, |
| "learning_rate": 9.999145713011405e-05, |
| "loss": 0.0143, |
| "step": 5560 |
| }, |
| { |
| "epoch": 348.125, |
| "grad_norm": 0.956095278263092, |
| "learning_rate": 9.999114875703186e-05, |
| "loss": 0.0141, |
| "step": 5570 |
| }, |
| { |
| "epoch": 348.75, |
| "grad_norm": 0.771486222743988, |
| "learning_rate": 9.999083491699281e-05, |
| "loss": 0.0143, |
| "step": 5580 |
| }, |
| { |
| "epoch": 349.375, |
| "grad_norm": 0.6893032193183899, |
| "learning_rate": 9.999051561003123e-05, |
| "loss": 0.0144, |
| "step": 5590 |
| }, |
| { |
| "epoch": 350.0, |
| "grad_norm": 1.0121644735336304, |
| "learning_rate": 9.999019083618202e-05, |
| "loss": 0.0151, |
| "step": 5600 |
| }, |
| { |
| "epoch": 350.625, |
| "grad_norm": 1.1058743000030518, |
| "learning_rate": 9.99898605954807e-05, |
| "loss": 0.0162, |
| "step": 5610 |
| }, |
| { |
| "epoch": 351.25, |
| "grad_norm": 1.0109678506851196, |
| "learning_rate": 9.998952488796338e-05, |
| "loss": 0.015, |
| "step": 5620 |
| }, |
| { |
| "epoch": 351.875, |
| "grad_norm": 0.8328022360801697, |
| "learning_rate": 9.998918371366676e-05, |
| "loss": 0.0142, |
| "step": 5630 |
| }, |
| { |
| "epoch": 352.5, |
| "grad_norm": 0.836746096611023, |
| "learning_rate": 9.99888370726282e-05, |
| "loss": 0.0137, |
| "step": 5640 |
| }, |
| { |
| "epoch": 353.125, |
| "grad_norm": 0.9082058072090149, |
| "learning_rate": 9.998848496488556e-05, |
| "loss": 0.0141, |
| "step": 5650 |
| }, |
| { |
| "epoch": 353.75, |
| "grad_norm": 0.9380905628204346, |
| "learning_rate": 9.998812739047736e-05, |
| "loss": 0.0149, |
| "step": 5660 |
| }, |
| { |
| "epoch": 354.375, |
| "grad_norm": 0.9345435500144958, |
| "learning_rate": 9.99877643494427e-05, |
| "loss": 0.0144, |
| "step": 5670 |
| }, |
| { |
| "epoch": 355.0, |
| "grad_norm": 0.8377882242202759, |
| "learning_rate": 9.998739584182128e-05, |
| "loss": 0.0151, |
| "step": 5680 |
| }, |
| { |
| "epoch": 355.625, |
| "grad_norm": 1.1241296529769897, |
| "learning_rate": 9.998702186765342e-05, |
| "loss": 0.0145, |
| "step": 5690 |
| }, |
| { |
| "epoch": 356.25, |
| "grad_norm": 1.0022445917129517, |
| "learning_rate": 9.998664242698e-05, |
| "loss": 0.0137, |
| "step": 5700 |
| }, |
| { |
| "epoch": 356.875, |
| "grad_norm": 1.14398992061615, |
| "learning_rate": 9.998625751984251e-05, |
| "loss": 0.0122, |
| "step": 5710 |
| }, |
| { |
| "epoch": 357.5, |
| "grad_norm": 1.511240839958191, |
| "learning_rate": 9.998586714628307e-05, |
| "loss": 0.0141, |
| "step": 5720 |
| }, |
| { |
| "epoch": 358.125, |
| "grad_norm": 1.257946252822876, |
| "learning_rate": 9.998547130634432e-05, |
| "loss": 0.0157, |
| "step": 5730 |
| }, |
| { |
| "epoch": 358.75, |
| "grad_norm": 1.1702454090118408, |
| "learning_rate": 9.99850700000696e-05, |
| "loss": 0.0144, |
| "step": 5740 |
| }, |
| { |
| "epoch": 359.375, |
| "grad_norm": 0.8067399859428406, |
| "learning_rate": 9.998466322750278e-05, |
| "loss": 0.0136, |
| "step": 5750 |
| }, |
| { |
| "epoch": 360.0, |
| "grad_norm": 0.8550326228141785, |
| "learning_rate": 9.998425098868834e-05, |
| "loss": 0.0129, |
| "step": 5760 |
| }, |
| { |
| "epoch": 360.625, |
| "grad_norm": 0.9919332265853882, |
| "learning_rate": 9.998383328367136e-05, |
| "loss": 0.013, |
| "step": 5770 |
| }, |
| { |
| "epoch": 361.25, |
| "grad_norm": 0.9598110914230347, |
| "learning_rate": 9.99834101124975e-05, |
| "loss": 0.0136, |
| "step": 5780 |
| }, |
| { |
| "epoch": 361.875, |
| "grad_norm": 0.8677031397819519, |
| "learning_rate": 9.998298147521309e-05, |
| "loss": 0.0137, |
| "step": 5790 |
| }, |
| { |
| "epoch": 362.5, |
| "grad_norm": 0.9038897156715393, |
| "learning_rate": 9.998254737186496e-05, |
| "loss": 0.0124, |
| "step": 5800 |
| }, |
| { |
| "epoch": 363.125, |
| "grad_norm": 0.9390170574188232, |
| "learning_rate": 9.99821078025006e-05, |
| "loss": 0.0119, |
| "step": 5810 |
| }, |
| { |
| "epoch": 363.75, |
| "grad_norm": 1.011299967765808, |
| "learning_rate": 9.998166276716807e-05, |
| "loss": 0.0131, |
| "step": 5820 |
| }, |
| { |
| "epoch": 364.375, |
| "grad_norm": 0.7727632522583008, |
| "learning_rate": 9.998121226591606e-05, |
| "loss": 0.0124, |
| "step": 5830 |
| }, |
| { |
| "epoch": 365.0, |
| "grad_norm": 0.9111457467079163, |
| "learning_rate": 9.998075629879382e-05, |
| "loss": 0.0122, |
| "step": 5840 |
| }, |
| { |
| "epoch": 365.625, |
| "grad_norm": 0.8254387378692627, |
| "learning_rate": 9.99802948658512e-05, |
| "loss": 0.0122, |
| "step": 5850 |
| }, |
| { |
| "epoch": 366.25, |
| "grad_norm": 0.8419124484062195, |
| "learning_rate": 9.99798279671387e-05, |
| "loss": 0.0136, |
| "step": 5860 |
| }, |
| { |
| "epoch": 366.875, |
| "grad_norm": 0.9950329661369324, |
| "learning_rate": 9.997935560270734e-05, |
| "loss": 0.0139, |
| "step": 5870 |
| }, |
| { |
| "epoch": 367.5, |
| "grad_norm": 0.8446523547172546, |
| "learning_rate": 9.997887777260879e-05, |
| "loss": 0.0128, |
| "step": 5880 |
| }, |
| { |
| "epoch": 368.125, |
| "grad_norm": 0.8795507550239563, |
| "learning_rate": 9.997839447689532e-05, |
| "loss": 0.0142, |
| "step": 5890 |
| }, |
| { |
| "epoch": 368.75, |
| "grad_norm": 0.9794557094573975, |
| "learning_rate": 9.997790571561978e-05, |
| "loss": 0.0134, |
| "step": 5900 |
| }, |
| { |
| "epoch": 369.375, |
| "grad_norm": 0.9027246236801147, |
| "learning_rate": 9.99774114888356e-05, |
| "loss": 0.0126, |
| "step": 5910 |
| }, |
| { |
| "epoch": 370.0, |
| "grad_norm": 0.8756938576698303, |
| "learning_rate": 9.997691179659684e-05, |
| "loss": 0.014, |
| "step": 5920 |
| }, |
| { |
| "epoch": 370.625, |
| "grad_norm": 1.2023380994796753, |
| "learning_rate": 9.997640663895815e-05, |
| "loss": 0.0131, |
| "step": 5930 |
| }, |
| { |
| "epoch": 371.25, |
| "grad_norm": 1.141804814338684, |
| "learning_rate": 9.997589601597477e-05, |
| "loss": 0.015, |
| "step": 5940 |
| }, |
| { |
| "epoch": 371.875, |
| "grad_norm": 0.9179847836494446, |
| "learning_rate": 9.997537992770252e-05, |
| "loss": 0.0126, |
| "step": 5950 |
| }, |
| { |
| "epoch": 372.5, |
| "grad_norm": 0.8151926398277283, |
| "learning_rate": 9.997485837419788e-05, |
| "loss": 0.013, |
| "step": 5960 |
| }, |
| { |
| "epoch": 373.125, |
| "grad_norm": 0.6601715683937073, |
| "learning_rate": 9.997433135551786e-05, |
| "loss": 0.0123, |
| "step": 5970 |
| }, |
| { |
| "epoch": 373.75, |
| "grad_norm": 0.8281500935554504, |
| "learning_rate": 9.997379887172009e-05, |
| "loss": 0.0115, |
| "step": 5980 |
| }, |
| { |
| "epoch": 374.375, |
| "grad_norm": 0.8727806806564331, |
| "learning_rate": 9.997326092286281e-05, |
| "loss": 0.0128, |
| "step": 5990 |
| }, |
| { |
| "epoch": 375.0, |
| "grad_norm": 0.8489688038825989, |
| "learning_rate": 9.997271750900486e-05, |
| "loss": 0.0129, |
| "step": 6000 |
| }, |
| { |
| "epoch": 375.625, |
| "grad_norm": 0.6510198712348938, |
| "learning_rate": 9.997216863020565e-05, |
| "loss": 0.0117, |
| "step": 6010 |
| }, |
| { |
| "epoch": 376.25, |
| "grad_norm": 0.8793591856956482, |
| "learning_rate": 9.99716142865252e-05, |
| "loss": 0.012, |
| "step": 6020 |
| }, |
| { |
| "epoch": 376.875, |
| "grad_norm": 0.7070950269699097, |
| "learning_rate": 9.997105447802415e-05, |
| "loss": 0.0118, |
| "step": 6030 |
| }, |
| { |
| "epoch": 377.5, |
| "grad_norm": 0.8314371109008789, |
| "learning_rate": 9.997048920476373e-05, |
| "loss": 0.0118, |
| "step": 6040 |
| }, |
| { |
| "epoch": 378.125, |
| "grad_norm": 0.761350154876709, |
| "learning_rate": 9.996991846680572e-05, |
| "loss": 0.0127, |
| "step": 6050 |
| }, |
| { |
| "epoch": 378.75, |
| "grad_norm": 0.7484061121940613, |
| "learning_rate": 9.996934226421257e-05, |
| "loss": 0.0119, |
| "step": 6060 |
| }, |
| { |
| "epoch": 379.375, |
| "grad_norm": 0.7929844260215759, |
| "learning_rate": 9.996876059704726e-05, |
| "loss": 0.012, |
| "step": 6070 |
| }, |
| { |
| "epoch": 380.0, |
| "grad_norm": 0.8181713819503784, |
| "learning_rate": 9.996817346537343e-05, |
| "loss": 0.0142, |
| "step": 6080 |
| }, |
| { |
| "epoch": 380.625, |
| "grad_norm": 0.9369438290596008, |
| "learning_rate": 9.996758086925526e-05, |
| "loss": 0.0132, |
| "step": 6090 |
| }, |
| { |
| "epoch": 381.25, |
| "grad_norm": 0.8046433925628662, |
| "learning_rate": 9.996698280875759e-05, |
| "loss": 0.012, |
| "step": 6100 |
| }, |
| { |
| "epoch": 381.875, |
| "grad_norm": 0.7803655862808228, |
| "learning_rate": 9.99663792839458e-05, |
| "loss": 0.0134, |
| "step": 6110 |
| }, |
| { |
| "epoch": 382.5, |
| "grad_norm": 0.7660366296768188, |
| "learning_rate": 9.99657702948859e-05, |
| "loss": 0.0124, |
| "step": 6120 |
| }, |
| { |
| "epoch": 383.125, |
| "grad_norm": 0.6417670845985413, |
| "learning_rate": 9.996515584164448e-05, |
| "loss": 0.012, |
| "step": 6130 |
| }, |
| { |
| "epoch": 383.75, |
| "grad_norm": 0.8960108160972595, |
| "learning_rate": 9.996453592428873e-05, |
| "loss": 0.0117, |
| "step": 6140 |
| }, |
| { |
| "epoch": 384.375, |
| "grad_norm": 0.8871966600418091, |
| "learning_rate": 9.996391054288646e-05, |
| "loss": 0.0116, |
| "step": 6150 |
| }, |
| { |
| "epoch": 385.0, |
| "grad_norm": 0.8760678172111511, |
| "learning_rate": 9.996327969750605e-05, |
| "loss": 0.0117, |
| "step": 6160 |
| }, |
| { |
| "epoch": 385.625, |
| "grad_norm": 0.865280032157898, |
| "learning_rate": 9.996264338821649e-05, |
| "loss": 0.011, |
| "step": 6170 |
| }, |
| { |
| "epoch": 386.25, |
| "grad_norm": 1.1085981130599976, |
| "learning_rate": 9.996200161508735e-05, |
| "loss": 0.0128, |
| "step": 6180 |
| }, |
| { |
| "epoch": 386.875, |
| "grad_norm": 1.0455905199050903, |
| "learning_rate": 9.996135437818885e-05, |
| "loss": 0.0121, |
| "step": 6190 |
| }, |
| { |
| "epoch": 387.5, |
| "grad_norm": 0.8136721253395081, |
| "learning_rate": 9.996070167759175e-05, |
| "loss": 0.013, |
| "step": 6200 |
| }, |
| { |
| "epoch": 388.125, |
| "grad_norm": 0.7488872408866882, |
| "learning_rate": 9.996004351336743e-05, |
| "loss": 0.0126, |
| "step": 6210 |
| }, |
| { |
| "epoch": 388.75, |
| "grad_norm": 0.8310092091560364, |
| "learning_rate": 9.995937988558785e-05, |
| "loss": 0.0136, |
| "step": 6220 |
| }, |
| { |
| "epoch": 389.375, |
| "grad_norm": 0.8811050653457642, |
| "learning_rate": 9.995871079432561e-05, |
| "loss": 0.0132, |
| "step": 6230 |
| }, |
| { |
| "epoch": 390.0, |
| "grad_norm": 0.9369884133338928, |
| "learning_rate": 9.995803623965389e-05, |
| "loss": 0.0133, |
| "step": 6240 |
| }, |
| { |
| "epoch": 390.625, |
| "grad_norm": 0.9472755193710327, |
| "learning_rate": 9.995735622164641e-05, |
| "loss": 0.0132, |
| "step": 6250 |
| }, |
| { |
| "epoch": 391.25, |
| "grad_norm": 1.1913206577301025, |
| "learning_rate": 9.995667074037758e-05, |
| "loss": 0.0134, |
| "step": 6260 |
| }, |
| { |
| "epoch": 391.875, |
| "grad_norm": 0.8896439075469971, |
| "learning_rate": 9.995597979592232e-05, |
| "loss": 0.0134, |
| "step": 6270 |
| }, |
| { |
| "epoch": 392.5, |
| "grad_norm": 0.8965170383453369, |
| "learning_rate": 9.995528338835625e-05, |
| "loss": 0.0124, |
| "step": 6280 |
| }, |
| { |
| "epoch": 393.125, |
| "grad_norm": 0.8789317011833191, |
| "learning_rate": 9.995458151775547e-05, |
| "loss": 0.0126, |
| "step": 6290 |
| }, |
| { |
| "epoch": 393.75, |
| "grad_norm": 0.7865223288536072, |
| "learning_rate": 9.995387418419677e-05, |
| "loss": 0.0119, |
| "step": 6300 |
| }, |
| { |
| "epoch": 394.375, |
| "grad_norm": 0.7527452111244202, |
| "learning_rate": 9.99531613877575e-05, |
| "loss": 0.0118, |
| "step": 6310 |
| }, |
| { |
| "epoch": 395.0, |
| "grad_norm": 0.7900567650794983, |
| "learning_rate": 9.995244312851559e-05, |
| "loss": 0.0116, |
| "step": 6320 |
| }, |
| { |
| "epoch": 395.625, |
| "grad_norm": 0.7366781234741211, |
| "learning_rate": 9.995171940654961e-05, |
| "loss": 0.0112, |
| "step": 6330 |
| }, |
| { |
| "epoch": 396.25, |
| "grad_norm": 0.8073196411132812, |
| "learning_rate": 9.995099022193871e-05, |
| "loss": 0.0116, |
| "step": 6340 |
| }, |
| { |
| "epoch": 396.875, |
| "grad_norm": 0.924555242061615, |
| "learning_rate": 9.995025557476261e-05, |
| "loss": 0.0109, |
| "step": 6350 |
| }, |
| { |
| "epoch": 397.5, |
| "grad_norm": 0.8284614682197571, |
| "learning_rate": 9.994951546510165e-05, |
| "loss": 0.0117, |
| "step": 6360 |
| }, |
| { |
| "epoch": 398.125, |
| "grad_norm": 0.8100062012672424, |
| "learning_rate": 9.994876989303679e-05, |
| "loss": 0.0127, |
| "step": 6370 |
| }, |
| { |
| "epoch": 398.75, |
| "grad_norm": 0.9377039670944214, |
| "learning_rate": 9.994801885864955e-05, |
| "loss": 0.0122, |
| "step": 6380 |
| }, |
| { |
| "epoch": 399.375, |
| "grad_norm": 0.9842908978462219, |
| "learning_rate": 9.994726236202205e-05, |
| "loss": 0.013, |
| "step": 6390 |
| }, |
| { |
| "epoch": 400.0, |
| "grad_norm": 1.1019262075424194, |
| "learning_rate": 9.994650040323704e-05, |
| "loss": 0.0134, |
| "step": 6400 |
| }, |
| { |
| "epoch": 400.625, |
| "grad_norm": 1.0751221179962158, |
| "learning_rate": 9.994573298237784e-05, |
| "loss": 0.0118, |
| "step": 6410 |
| }, |
| { |
| "epoch": 401.25, |
| "grad_norm": 0.898923933506012, |
| "learning_rate": 9.994496009952837e-05, |
| "loss": 0.012, |
| "step": 6420 |
| }, |
| { |
| "epoch": 401.875, |
| "grad_norm": 0.8281941413879395, |
| "learning_rate": 9.994418175477316e-05, |
| "loss": 0.0124, |
| "step": 6430 |
| }, |
| { |
| "epoch": 402.5, |
| "grad_norm": 0.692079484462738, |
| "learning_rate": 9.994339794819733e-05, |
| "loss": 0.011, |
| "step": 6440 |
| }, |
| { |
| "epoch": 403.125, |
| "grad_norm": 0.7526706457138062, |
| "learning_rate": 9.994260867988658e-05, |
| "loss": 0.0121, |
| "step": 6450 |
| }, |
| { |
| "epoch": 403.75, |
| "grad_norm": 0.8704769015312195, |
| "learning_rate": 9.994181394992723e-05, |
| "loss": 0.0109, |
| "step": 6460 |
| }, |
| { |
| "epoch": 404.375, |
| "grad_norm": 0.8282954096794128, |
| "learning_rate": 9.994101375840618e-05, |
| "loss": 0.0107, |
| "step": 6470 |
| }, |
| { |
| "epoch": 405.0, |
| "grad_norm": 0.7742241621017456, |
| "learning_rate": 9.994020810541098e-05, |
| "loss": 0.0115, |
| "step": 6480 |
| }, |
| { |
| "epoch": 405.625, |
| "grad_norm": 0.7262750267982483, |
| "learning_rate": 9.99393969910297e-05, |
| "loss": 0.011, |
| "step": 6490 |
| }, |
| { |
| "epoch": 406.25, |
| "grad_norm": 0.8099271655082703, |
| "learning_rate": 9.993858041535104e-05, |
| "loss": 0.0126, |
| "step": 6500 |
| }, |
| { |
| "epoch": 406.875, |
| "grad_norm": 0.8308644890785217, |
| "learning_rate": 9.99377583784643e-05, |
| "loss": 0.0119, |
| "step": 6510 |
| }, |
| { |
| "epoch": 407.5, |
| "grad_norm": 0.900124728679657, |
| "learning_rate": 9.993693088045939e-05, |
| "loss": 0.0112, |
| "step": 6520 |
| }, |
| { |
| "epoch": 408.125, |
| "grad_norm": 0.8921932578086853, |
| "learning_rate": 9.99360979214268e-05, |
| "loss": 0.0112, |
| "step": 6530 |
| }, |
| { |
| "epoch": 408.75, |
| "grad_norm": 0.9405972361564636, |
| "learning_rate": 9.99352595014576e-05, |
| "loss": 0.0107, |
| "step": 6540 |
| }, |
| { |
| "epoch": 409.375, |
| "grad_norm": 0.8436768651008606, |
| "learning_rate": 9.993441562064354e-05, |
| "loss": 0.0113, |
| "step": 6550 |
| }, |
| { |
| "epoch": 410.0, |
| "grad_norm": 0.804934024810791, |
| "learning_rate": 9.993356627907685e-05, |
| "loss": 0.0117, |
| "step": 6560 |
| }, |
| { |
| "epoch": 410.625, |
| "grad_norm": 0.945950984954834, |
| "learning_rate": 9.99327114768504e-05, |
| "loss": 0.0125, |
| "step": 6570 |
| }, |
| { |
| "epoch": 411.25, |
| "grad_norm": 0.925611674785614, |
| "learning_rate": 9.99318512140577e-05, |
| "loss": 0.0121, |
| "step": 6580 |
| }, |
| { |
| "epoch": 411.875, |
| "grad_norm": 0.9319164156913757, |
| "learning_rate": 9.993098549079284e-05, |
| "loss": 0.012, |
| "step": 6590 |
| }, |
| { |
| "epoch": 412.5, |
| "grad_norm": 1.0740889310836792, |
| "learning_rate": 9.993011430715047e-05, |
| "loss": 0.0137, |
| "step": 6600 |
| }, |
| { |
| "epoch": 413.125, |
| "grad_norm": 1.1442779302597046, |
| "learning_rate": 9.992923766322586e-05, |
| "loss": 0.0125, |
| "step": 6610 |
| }, |
| { |
| "epoch": 413.75, |
| "grad_norm": 0.8353562355041504, |
| "learning_rate": 9.99283555591149e-05, |
| "loss": 0.0119, |
| "step": 6620 |
| }, |
| { |
| "epoch": 414.375, |
| "grad_norm": 0.720020592212677, |
| "learning_rate": 9.992746799491404e-05, |
| "loss": 0.012, |
| "step": 6630 |
| }, |
| { |
| "epoch": 415.0, |
| "grad_norm": 0.7117792367935181, |
| "learning_rate": 9.992657497072033e-05, |
| "loss": 0.0118, |
| "step": 6640 |
| }, |
| { |
| "epoch": 415.625, |
| "grad_norm": 0.8013281226158142, |
| "learning_rate": 9.992567648663147e-05, |
| "loss": 0.0134, |
| "step": 6650 |
| }, |
| { |
| "epoch": 416.25, |
| "grad_norm": 0.8130918145179749, |
| "learning_rate": 9.992477254274568e-05, |
| "loss": 0.0118, |
| "step": 6660 |
| }, |
| { |
| "epoch": 416.875, |
| "grad_norm": 0.7213727235794067, |
| "learning_rate": 9.992386313916183e-05, |
| "loss": 0.0111, |
| "step": 6670 |
| }, |
| { |
| "epoch": 417.5, |
| "grad_norm": 0.6564821004867554, |
| "learning_rate": 9.992294827597934e-05, |
| "loss": 0.0113, |
| "step": 6680 |
| }, |
| { |
| "epoch": 418.125, |
| "grad_norm": 0.6905478239059448, |
| "learning_rate": 9.992202795329831e-05, |
| "loss": 0.012, |
| "step": 6690 |
| }, |
| { |
| "epoch": 418.75, |
| "grad_norm": 0.821371853351593, |
| "learning_rate": 9.992110217121936e-05, |
| "loss": 0.0128, |
| "step": 6700 |
| }, |
| { |
| "epoch": 419.375, |
| "grad_norm": 0.8834856152534485, |
| "learning_rate": 9.992017092984372e-05, |
| "loss": 0.0106, |
| "step": 6710 |
| }, |
| { |
| "epoch": 420.0, |
| "grad_norm": 0.8281375765800476, |
| "learning_rate": 9.991923422927326e-05, |
| "loss": 0.0115, |
| "step": 6720 |
| }, |
| { |
| "epoch": 420.625, |
| "grad_norm": 0.7310401797294617, |
| "learning_rate": 9.991829206961037e-05, |
| "loss": 0.0101, |
| "step": 6730 |
| }, |
| { |
| "epoch": 421.25, |
| "grad_norm": 0.7845788598060608, |
| "learning_rate": 9.991734445095813e-05, |
| "loss": 0.0105, |
| "step": 6740 |
| }, |
| { |
| "epoch": 421.875, |
| "grad_norm": 0.8412182331085205, |
| "learning_rate": 9.991639137342015e-05, |
| "loss": 0.0111, |
| "step": 6750 |
| }, |
| { |
| "epoch": 422.5, |
| "grad_norm": 0.7537260055541992, |
| "learning_rate": 9.991543283710064e-05, |
| "loss": 0.0113, |
| "step": 6760 |
| }, |
| { |
| "epoch": 423.125, |
| "grad_norm": 0.6647925972938538, |
| "learning_rate": 9.991446884210445e-05, |
| "loss": 0.0119, |
| "step": 6770 |
| }, |
| { |
| "epoch": 423.75, |
| "grad_norm": 0.7035212516784668, |
| "learning_rate": 9.9913499388537e-05, |
| "loss": 0.0097, |
| "step": 6780 |
| }, |
| { |
| "epoch": 424.375, |
| "grad_norm": 0.7553647756576538, |
| "learning_rate": 9.99125244765043e-05, |
| "loss": 0.01, |
| "step": 6790 |
| }, |
| { |
| "epoch": 425.0, |
| "grad_norm": 0.7420441508293152, |
| "learning_rate": 9.991154410611296e-05, |
| "loss": 0.0114, |
| "step": 6800 |
| }, |
| { |
| "epoch": 425.625, |
| "grad_norm": 0.6657722592353821, |
| "learning_rate": 9.99105582774702e-05, |
| "loss": 0.0109, |
| "step": 6810 |
| }, |
| { |
| "epoch": 426.25, |
| "grad_norm": 0.7254708409309387, |
| "learning_rate": 9.990956699068384e-05, |
| "loss": 0.0116, |
| "step": 6820 |
| }, |
| { |
| "epoch": 426.875, |
| "grad_norm": 0.8595172166824341, |
| "learning_rate": 9.990857024586224e-05, |
| "loss": 0.0113, |
| "step": 6830 |
| }, |
| { |
| "epoch": 427.5, |
| "grad_norm": 0.9384058117866516, |
| "learning_rate": 9.990756804311446e-05, |
| "loss": 0.0112, |
| "step": 6840 |
| }, |
| { |
| "epoch": 428.125, |
| "grad_norm": 0.8805230855941772, |
| "learning_rate": 9.990656038255006e-05, |
| "loss": 0.0097, |
| "step": 6850 |
| }, |
| { |
| "epoch": 428.75, |
| "grad_norm": 0.8175788521766663, |
| "learning_rate": 9.990554726427926e-05, |
| "loss": 0.0111, |
| "step": 6860 |
| }, |
| { |
| "epoch": 429.375, |
| "grad_norm": 0.8853816390037537, |
| "learning_rate": 9.990452868841284e-05, |
| "loss": 0.0119, |
| "step": 6870 |
| }, |
| { |
| "epoch": 430.0, |
| "grad_norm": 0.8857107758522034, |
| "learning_rate": 9.99035046550622e-05, |
| "loss": 0.0111, |
| "step": 6880 |
| }, |
| { |
| "epoch": 430.625, |
| "grad_norm": 0.7299500107765198, |
| "learning_rate": 9.99024751643393e-05, |
| "loss": 0.0113, |
| "step": 6890 |
| }, |
| { |
| "epoch": 431.25, |
| "grad_norm": 0.6400433778762817, |
| "learning_rate": 9.990144021635677e-05, |
| "loss": 0.0106, |
| "step": 6900 |
| }, |
| { |
| "epoch": 431.875, |
| "grad_norm": 0.6998341083526611, |
| "learning_rate": 9.990039981122775e-05, |
| "loss": 0.0117, |
| "step": 6910 |
| }, |
| { |
| "epoch": 432.5, |
| "grad_norm": 0.6614553928375244, |
| "learning_rate": 9.989935394906602e-05, |
| "loss": 0.0108, |
| "step": 6920 |
| }, |
| { |
| "epoch": 433.125, |
| "grad_norm": 0.8393372893333435, |
| "learning_rate": 9.989830262998598e-05, |
| "loss": 0.013, |
| "step": 6930 |
| }, |
| { |
| "epoch": 433.75, |
| "grad_norm": 0.7657507061958313, |
| "learning_rate": 9.989724585410259e-05, |
| "loss": 0.0115, |
| "step": 6940 |
| }, |
| { |
| "epoch": 434.375, |
| "grad_norm": 0.6534095406532288, |
| "learning_rate": 9.989618362153139e-05, |
| "loss": 0.0116, |
| "step": 6950 |
| }, |
| { |
| "epoch": 435.0, |
| "grad_norm": 0.5554938316345215, |
| "learning_rate": 9.989511593238859e-05, |
| "loss": 0.0101, |
| "step": 6960 |
| }, |
| { |
| "epoch": 435.625, |
| "grad_norm": 0.633482813835144, |
| "learning_rate": 9.98940427867909e-05, |
| "loss": 0.0105, |
| "step": 6970 |
| }, |
| { |
| "epoch": 436.25, |
| "grad_norm": 0.5705388784408569, |
| "learning_rate": 9.989296418485573e-05, |
| "loss": 0.0127, |
| "step": 6980 |
| }, |
| { |
| "epoch": 436.875, |
| "grad_norm": 0.560118556022644, |
| "learning_rate": 9.989188012670101e-05, |
| "loss": 0.0102, |
| "step": 6990 |
| }, |
| { |
| "epoch": 437.5, |
| "grad_norm": 0.5680054426193237, |
| "learning_rate": 9.989079061244528e-05, |
| "loss": 0.0108, |
| "step": 7000 |
| }, |
| { |
| "epoch": 438.125, |
| "grad_norm": 0.6862987875938416, |
| "learning_rate": 9.988969564220769e-05, |
| "loss": 0.011, |
| "step": 7010 |
| }, |
| { |
| "epoch": 438.75, |
| "grad_norm": 0.6537038683891296, |
| "learning_rate": 9.988859521610801e-05, |
| "loss": 0.011, |
| "step": 7020 |
| }, |
| { |
| "epoch": 439.375, |
| "grad_norm": 0.7102747559547424, |
| "learning_rate": 9.988748933426656e-05, |
| "loss": 0.0114, |
| "step": 7030 |
| }, |
| { |
| "epoch": 440.0, |
| "grad_norm": 0.7743424773216248, |
| "learning_rate": 9.988637799680428e-05, |
| "loss": 0.0114, |
| "step": 7040 |
| }, |
| { |
| "epoch": 440.625, |
| "grad_norm": 0.7385320663452148, |
| "learning_rate": 9.98852612038427e-05, |
| "loss": 0.0102, |
| "step": 7050 |
| }, |
| { |
| "epoch": 441.25, |
| "grad_norm": 0.7324809432029724, |
| "learning_rate": 9.988413895550397e-05, |
| "loss": 0.0095, |
| "step": 7060 |
| }, |
| { |
| "epoch": 441.875, |
| "grad_norm": 0.6916730999946594, |
| "learning_rate": 9.98830112519108e-05, |
| "loss": 0.012, |
| "step": 7070 |
| }, |
| { |
| "epoch": 442.5, |
| "grad_norm": 0.5611207485198975, |
| "learning_rate": 9.98818780931865e-05, |
| "loss": 0.0099, |
| "step": 7080 |
| }, |
| { |
| "epoch": 443.125, |
| "grad_norm": 0.6533907055854797, |
| "learning_rate": 9.988073947945502e-05, |
| "loss": 0.0097, |
| "step": 7090 |
| }, |
| { |
| "epoch": 443.75, |
| "grad_norm": 0.8114432096481323, |
| "learning_rate": 9.987959541084087e-05, |
| "loss": 0.0096, |
| "step": 7100 |
| }, |
| { |
| "epoch": 444.375, |
| "grad_norm": 0.5615887641906738, |
| "learning_rate": 9.987844588746915e-05, |
| "loss": 0.0085, |
| "step": 7110 |
| }, |
| { |
| "epoch": 445.0, |
| "grad_norm": 0.6930294632911682, |
| "learning_rate": 9.987729090946558e-05, |
| "loss": 0.0096, |
| "step": 7120 |
| }, |
| { |
| "epoch": 445.625, |
| "grad_norm": 0.7661396265029907, |
| "learning_rate": 9.987613047695647e-05, |
| "loss": 0.0099, |
| "step": 7130 |
| }, |
| { |
| "epoch": 446.25, |
| "grad_norm": 0.7148370146751404, |
| "learning_rate": 9.987496459006871e-05, |
| "loss": 0.0092, |
| "step": 7140 |
| }, |
| { |
| "epoch": 446.875, |
| "grad_norm": 0.9166419506072998, |
| "learning_rate": 9.987379324892982e-05, |
| "loss": 0.0113, |
| "step": 7150 |
| }, |
| { |
| "epoch": 447.5, |
| "grad_norm": 0.8479866981506348, |
| "learning_rate": 9.987261645366788e-05, |
| "loss": 0.0101, |
| "step": 7160 |
| }, |
| { |
| "epoch": 448.125, |
| "grad_norm": 0.6642943620681763, |
| "learning_rate": 9.987143420441158e-05, |
| "loss": 0.01, |
| "step": 7170 |
| }, |
| { |
| "epoch": 448.75, |
| "grad_norm": 0.6536929607391357, |
| "learning_rate": 9.987024650129022e-05, |
| "loss": 0.0102, |
| "step": 7180 |
| }, |
| { |
| "epoch": 449.375, |
| "grad_norm": 0.8466352820396423, |
| "learning_rate": 9.986905334443368e-05, |
| "loss": 0.0117, |
| "step": 7190 |
| }, |
| { |
| "epoch": 450.0, |
| "grad_norm": 0.7626696228981018, |
| "learning_rate": 9.986785473397245e-05, |
| "loss": 0.0103, |
| "step": 7200 |
| }, |
| { |
| "epoch": 450.625, |
| "grad_norm": 0.7776815891265869, |
| "learning_rate": 9.98666506700376e-05, |
| "loss": 0.0115, |
| "step": 7210 |
| }, |
| { |
| "epoch": 451.25, |
| "grad_norm": 1.0069994926452637, |
| "learning_rate": 9.986544115276081e-05, |
| "loss": 0.0128, |
| "step": 7220 |
| }, |
| { |
| "epoch": 451.875, |
| "grad_norm": 0.8917898535728455, |
| "learning_rate": 9.986422618227433e-05, |
| "loss": 0.0109, |
| "step": 7230 |
| }, |
| { |
| "epoch": 452.5, |
| "grad_norm": 0.7967373728752136, |
| "learning_rate": 9.986300575871106e-05, |
| "loss": 0.0116, |
| "step": 7240 |
| }, |
| { |
| "epoch": 453.125, |
| "grad_norm": 0.6768915057182312, |
| "learning_rate": 9.986177988220444e-05, |
| "loss": 0.0099, |
| "step": 7250 |
| }, |
| { |
| "epoch": 453.75, |
| "grad_norm": 0.7261281609535217, |
| "learning_rate": 9.986054855288856e-05, |
| "loss": 0.0103, |
| "step": 7260 |
| }, |
| { |
| "epoch": 454.375, |
| "grad_norm": 0.7023577094078064, |
| "learning_rate": 9.985931177089802e-05, |
| "loss": 0.0112, |
| "step": 7270 |
| }, |
| { |
| "epoch": 455.0, |
| "grad_norm": 0.5902547836303711, |
| "learning_rate": 9.985806953636814e-05, |
| "loss": 0.0098, |
| "step": 7280 |
| }, |
| { |
| "epoch": 455.625, |
| "grad_norm": 0.6153225302696228, |
| "learning_rate": 9.985682184943471e-05, |
| "loss": 0.0111, |
| "step": 7290 |
| }, |
| { |
| "epoch": 456.25, |
| "grad_norm": 0.6180372834205627, |
| "learning_rate": 9.98555687102342e-05, |
| "loss": 0.0096, |
| "step": 7300 |
| }, |
| { |
| "epoch": 456.875, |
| "grad_norm": 0.7004512548446655, |
| "learning_rate": 9.985431011890367e-05, |
| "loss": 0.0107, |
| "step": 7310 |
| }, |
| { |
| "epoch": 457.5, |
| "grad_norm": 0.8018707036972046, |
| "learning_rate": 9.985304607558075e-05, |
| "loss": 0.0104, |
| "step": 7320 |
| }, |
| { |
| "epoch": 458.125, |
| "grad_norm": 0.6335276365280151, |
| "learning_rate": 9.985177658040364e-05, |
| "loss": 0.0102, |
| "step": 7330 |
| }, |
| { |
| "epoch": 458.75, |
| "grad_norm": 0.8146379590034485, |
| "learning_rate": 9.985050163351119e-05, |
| "loss": 0.0106, |
| "step": 7340 |
| }, |
| { |
| "epoch": 459.375, |
| "grad_norm": 0.7131094336509705, |
| "learning_rate": 9.984922123504286e-05, |
| "loss": 0.0093, |
| "step": 7350 |
| }, |
| { |
| "epoch": 460.0, |
| "grad_norm": 0.647261381149292, |
| "learning_rate": 9.984793538513862e-05, |
| "loss": 0.0103, |
| "step": 7360 |
| }, |
| { |
| "epoch": 460.625, |
| "grad_norm": 0.6319265961647034, |
| "learning_rate": 9.984664408393912e-05, |
| "loss": 0.01, |
| "step": 7370 |
| }, |
| { |
| "epoch": 461.25, |
| "grad_norm": 0.5086030960083008, |
| "learning_rate": 9.984534733158556e-05, |
| "loss": 0.0105, |
| "step": 7380 |
| }, |
| { |
| "epoch": 461.875, |
| "grad_norm": 0.6072356104850769, |
| "learning_rate": 9.984404512821977e-05, |
| "loss": 0.0089, |
| "step": 7390 |
| }, |
| { |
| "epoch": 462.5, |
| "grad_norm": 0.6429985165596008, |
| "learning_rate": 9.984273747398411e-05, |
| "loss": 0.0102, |
| "step": 7400 |
| }, |
| { |
| "epoch": 463.125, |
| "grad_norm": 0.5790389776229858, |
| "learning_rate": 9.984142436902165e-05, |
| "loss": 0.0104, |
| "step": 7410 |
| }, |
| { |
| "epoch": 463.75, |
| "grad_norm": 0.701302170753479, |
| "learning_rate": 9.984010581347596e-05, |
| "loss": 0.0089, |
| "step": 7420 |
| }, |
| { |
| "epoch": 464.375, |
| "grad_norm": 0.6150535941123962, |
| "learning_rate": 9.983878180749121e-05, |
| "loss": 0.0098, |
| "step": 7430 |
| }, |
| { |
| "epoch": 465.0, |
| "grad_norm": 0.6264737248420715, |
| "learning_rate": 9.983745235121222e-05, |
| "loss": 0.0093, |
| "step": 7440 |
| }, |
| { |
| "epoch": 465.625, |
| "grad_norm": 0.5422685146331787, |
| "learning_rate": 9.983611744478438e-05, |
| "loss": 0.0104, |
| "step": 7450 |
| }, |
| { |
| "epoch": 466.25, |
| "grad_norm": 0.6225709915161133, |
| "learning_rate": 9.983477708835365e-05, |
| "loss": 0.0101, |
| "step": 7460 |
| }, |
| { |
| "epoch": 466.875, |
| "grad_norm": 0.5819153785705566, |
| "learning_rate": 9.983343128206664e-05, |
| "loss": 0.0106, |
| "step": 7470 |
| }, |
| { |
| "epoch": 467.5, |
| "grad_norm": 0.7224307060241699, |
| "learning_rate": 9.983208002607049e-05, |
| "loss": 0.0107, |
| "step": 7480 |
| }, |
| { |
| "epoch": 468.125, |
| "grad_norm": 0.7039912939071655, |
| "learning_rate": 9.9830723320513e-05, |
| "loss": 0.0103, |
| "step": 7490 |
| }, |
| { |
| "epoch": 468.75, |
| "grad_norm": 0.6855049133300781, |
| "learning_rate": 9.982936116554254e-05, |
| "loss": 0.0088, |
| "step": 7500 |
| }, |
| { |
| "epoch": 469.375, |
| "grad_norm": 0.6290692687034607, |
| "learning_rate": 9.982799356130803e-05, |
| "loss": 0.0106, |
| "step": 7510 |
| }, |
| { |
| "epoch": 470.0, |
| "grad_norm": 0.5659773945808411, |
| "learning_rate": 9.982662050795908e-05, |
| "loss": 0.0106, |
| "step": 7520 |
| }, |
| { |
| "epoch": 470.625, |
| "grad_norm": 0.5781753063201904, |
| "learning_rate": 9.982524200564583e-05, |
| "loss": 0.0104, |
| "step": 7530 |
| }, |
| { |
| "epoch": 471.25, |
| "grad_norm": 0.6644128561019897, |
| "learning_rate": 9.982385805451901e-05, |
| "loss": 0.0103, |
| "step": 7540 |
| }, |
| { |
| "epoch": 471.875, |
| "grad_norm": 0.7858973145484924, |
| "learning_rate": 9.982246865472998e-05, |
| "loss": 0.0093, |
| "step": 7550 |
| }, |
| { |
| "epoch": 472.5, |
| "grad_norm": 0.7751241326332092, |
| "learning_rate": 9.982107380643069e-05, |
| "loss": 0.0101, |
| "step": 7560 |
| }, |
| { |
| "epoch": 473.125, |
| "grad_norm": 0.8384363055229187, |
| "learning_rate": 9.981967350977368e-05, |
| "loss": 0.0107, |
| "step": 7570 |
| }, |
| { |
| "epoch": 473.75, |
| "grad_norm": 0.8584528565406799, |
| "learning_rate": 9.981826776491208e-05, |
| "loss": 0.0095, |
| "step": 7580 |
| }, |
| { |
| "epoch": 474.375, |
| "grad_norm": 0.995509922504425, |
| "learning_rate": 9.98168565719996e-05, |
| "loss": 0.0115, |
| "step": 7590 |
| }, |
| { |
| "epoch": 475.0, |
| "grad_norm": 0.8218001127243042, |
| "learning_rate": 9.98154399311906e-05, |
| "loss": 0.011, |
| "step": 7600 |
| }, |
| { |
| "epoch": 475.625, |
| "grad_norm": 0.7269605994224548, |
| "learning_rate": 9.981401784263997e-05, |
| "loss": 0.0103, |
| "step": 7610 |
| }, |
| { |
| "epoch": 476.25, |
| "grad_norm": 0.6630864143371582, |
| "learning_rate": 9.981259030650326e-05, |
| "loss": 0.0092, |
| "step": 7620 |
| }, |
| { |
| "epoch": 476.875, |
| "grad_norm": 0.7081972360610962, |
| "learning_rate": 9.981115732293655e-05, |
| "loss": 0.0084, |
| "step": 7630 |
| }, |
| { |
| "epoch": 477.5, |
| "grad_norm": 0.6908837556838989, |
| "learning_rate": 9.980971889209659e-05, |
| "loss": 0.0096, |
| "step": 7640 |
| }, |
| { |
| "epoch": 478.125, |
| "grad_norm": 0.6863625645637512, |
| "learning_rate": 9.980827501414064e-05, |
| "loss": 0.0094, |
| "step": 7650 |
| }, |
| { |
| "epoch": 478.75, |
| "grad_norm": 0.628754734992981, |
| "learning_rate": 9.980682568922663e-05, |
| "loss": 0.0087, |
| "step": 7660 |
| }, |
| { |
| "epoch": 479.375, |
| "grad_norm": 0.6461851000785828, |
| "learning_rate": 9.980537091751304e-05, |
| "loss": 0.0091, |
| "step": 7670 |
| }, |
| { |
| "epoch": 480.0, |
| "grad_norm": 0.6353027820587158, |
| "learning_rate": 9.980391069915897e-05, |
| "loss": 0.009, |
| "step": 7680 |
| }, |
| { |
| "epoch": 480.625, |
| "grad_norm": 0.5868967175483704, |
| "learning_rate": 9.98024450343241e-05, |
| "loss": 0.0101, |
| "step": 7690 |
| }, |
| { |
| "epoch": 481.25, |
| "grad_norm": 0.6688029766082764, |
| "learning_rate": 9.980097392316872e-05, |
| "loss": 0.0083, |
| "step": 7700 |
| }, |
| { |
| "epoch": 481.875, |
| "grad_norm": 0.5620129108428955, |
| "learning_rate": 9.97994973658537e-05, |
| "loss": 0.0088, |
| "step": 7710 |
| }, |
| { |
| "epoch": 482.5, |
| "grad_norm": 0.6990760564804077, |
| "learning_rate": 9.979801536254054e-05, |
| "loss": 0.008, |
| "step": 7720 |
| }, |
| { |
| "epoch": 483.125, |
| "grad_norm": 0.5271959900856018, |
| "learning_rate": 9.979652791339127e-05, |
| "loss": 0.01, |
| "step": 7730 |
| }, |
| { |
| "epoch": 483.75, |
| "grad_norm": 0.717219352722168, |
| "learning_rate": 9.97950350185686e-05, |
| "loss": 0.0104, |
| "step": 7740 |
| }, |
| { |
| "epoch": 484.375, |
| "grad_norm": 0.5886634588241577, |
| "learning_rate": 9.979353667823574e-05, |
| "loss": 0.0086, |
| "step": 7750 |
| }, |
| { |
| "epoch": 485.0, |
| "grad_norm": 0.7227773070335388, |
| "learning_rate": 9.979203289255658e-05, |
| "loss": 0.0094, |
| "step": 7760 |
| }, |
| { |
| "epoch": 485.625, |
| "grad_norm": 0.6355369687080383, |
| "learning_rate": 9.979052366169557e-05, |
| "loss": 0.0098, |
| "step": 7770 |
| }, |
| { |
| "epoch": 486.25, |
| "grad_norm": 0.6813123226165771, |
| "learning_rate": 9.978900898581775e-05, |
| "loss": 0.01, |
| "step": 7780 |
| }, |
| { |
| "epoch": 486.875, |
| "grad_norm": 0.659970223903656, |
| "learning_rate": 9.978748886508875e-05, |
| "loss": 0.0088, |
| "step": 7790 |
| }, |
| { |
| "epoch": 487.5, |
| "grad_norm": 0.7737880349159241, |
| "learning_rate": 9.978596329967484e-05, |
| "loss": 0.0106, |
| "step": 7800 |
| }, |
| { |
| "epoch": 488.125, |
| "grad_norm": 0.7581619024276733, |
| "learning_rate": 9.978443228974284e-05, |
| "loss": 0.0087, |
| "step": 7810 |
| }, |
| { |
| "epoch": 488.75, |
| "grad_norm": 0.7430512309074402, |
| "learning_rate": 9.978289583546015e-05, |
| "loss": 0.0093, |
| "step": 7820 |
| }, |
| { |
| "epoch": 489.375, |
| "grad_norm": 0.6579586863517761, |
| "learning_rate": 9.978135393699484e-05, |
| "loss": 0.0092, |
| "step": 7830 |
| }, |
| { |
| "epoch": 490.0, |
| "grad_norm": 0.6156346797943115, |
| "learning_rate": 9.977980659451548e-05, |
| "loss": 0.0099, |
| "step": 7840 |
| }, |
| { |
| "epoch": 490.625, |
| "grad_norm": 0.6920315623283386, |
| "learning_rate": 9.977825380819135e-05, |
| "loss": 0.0101, |
| "step": 7850 |
| }, |
| { |
| "epoch": 491.25, |
| "grad_norm": 0.7143272161483765, |
| "learning_rate": 9.97766955781922e-05, |
| "loss": 0.0102, |
| "step": 7860 |
| }, |
| { |
| "epoch": 491.875, |
| "grad_norm": 0.6715136170387268, |
| "learning_rate": 9.977513190468848e-05, |
| "loss": 0.0092, |
| "step": 7870 |
| }, |
| { |
| "epoch": 492.5, |
| "grad_norm": 0.792335569858551, |
| "learning_rate": 9.977356278785116e-05, |
| "loss": 0.0094, |
| "step": 7880 |
| }, |
| { |
| "epoch": 493.125, |
| "grad_norm": 0.8089608550071716, |
| "learning_rate": 9.977198822785184e-05, |
| "loss": 0.0099, |
| "step": 7890 |
| }, |
| { |
| "epoch": 493.75, |
| "grad_norm": 0.727393627166748, |
| "learning_rate": 9.977040822486273e-05, |
| "loss": 0.0093, |
| "step": 7900 |
| }, |
| { |
| "epoch": 494.375, |
| "grad_norm": 0.7314863204956055, |
| "learning_rate": 9.97688227790566e-05, |
| "loss": 0.01, |
| "step": 7910 |
| }, |
| { |
| "epoch": 495.0, |
| "grad_norm": 0.6197735667228699, |
| "learning_rate": 9.976723189060684e-05, |
| "loss": 0.0093, |
| "step": 7920 |
| }, |
| { |
| "epoch": 495.625, |
| "grad_norm": 0.6258811950683594, |
| "learning_rate": 9.976563555968742e-05, |
| "loss": 0.0089, |
| "step": 7930 |
| }, |
| { |
| "epoch": 496.25, |
| "grad_norm": 0.6613799929618835, |
| "learning_rate": 9.976403378647292e-05, |
| "loss": 0.0099, |
| "step": 7940 |
| }, |
| { |
| "epoch": 496.875, |
| "grad_norm": 0.5219643115997314, |
| "learning_rate": 9.97624265711385e-05, |
| "loss": 0.0102, |
| "step": 7950 |
| }, |
| { |
| "epoch": 497.5, |
| "grad_norm": 0.5938867330551147, |
| "learning_rate": 9.976081391385993e-05, |
| "loss": 0.0101, |
| "step": 7960 |
| }, |
| { |
| "epoch": 498.125, |
| "grad_norm": 0.5493279099464417, |
| "learning_rate": 9.975919581481356e-05, |
| "loss": 0.01, |
| "step": 7970 |
| }, |
| { |
| "epoch": 498.75, |
| "grad_norm": 0.5064048767089844, |
| "learning_rate": 9.975757227417634e-05, |
| "loss": 0.0092, |
| "step": 7980 |
| }, |
| { |
| "epoch": 499.375, |
| "grad_norm": 0.5940008163452148, |
| "learning_rate": 9.975594329212586e-05, |
| "loss": 0.0097, |
| "step": 7990 |
| }, |
| { |
| "epoch": 500.0, |
| "grad_norm": 0.5561034083366394, |
| "learning_rate": 9.97543088688402e-05, |
| "loss": 0.0092, |
| "step": 8000 |
| }, |
| { |
| "epoch": 500.625, |
| "grad_norm": 0.587040901184082, |
| "learning_rate": 9.975266900449814e-05, |
| "loss": 0.0105, |
| "step": 8010 |
| }, |
| { |
| "epoch": 501.25, |
| "grad_norm": 0.6578340530395508, |
| "learning_rate": 9.975102369927898e-05, |
| "loss": 0.0088, |
| "step": 8020 |
| }, |
| { |
| "epoch": 501.875, |
| "grad_norm": 0.6301031708717346, |
| "learning_rate": 9.974937295336269e-05, |
| "loss": 0.0096, |
| "step": 8030 |
| }, |
| { |
| "epoch": 502.5, |
| "grad_norm": 0.49646562337875366, |
| "learning_rate": 9.974771676692975e-05, |
| "loss": 0.0094, |
| "step": 8040 |
| }, |
| { |
| "epoch": 503.125, |
| "grad_norm": 0.5952965021133423, |
| "learning_rate": 9.974605514016131e-05, |
| "loss": 0.0088, |
| "step": 8050 |
| }, |
| { |
| "epoch": 503.75, |
| "grad_norm": 0.6772691607475281, |
| "learning_rate": 9.974438807323907e-05, |
| "loss": 0.0093, |
| "step": 8060 |
| }, |
| { |
| "epoch": 504.375, |
| "grad_norm": 0.5597459673881531, |
| "learning_rate": 9.974271556634535e-05, |
| "loss": 0.0088, |
| "step": 8070 |
| }, |
| { |
| "epoch": 505.0, |
| "grad_norm": 0.8469547033309937, |
| "learning_rate": 9.974103761966302e-05, |
| "loss": 0.0106, |
| "step": 8080 |
| }, |
| { |
| "epoch": 505.625, |
| "grad_norm": 0.775303065776825, |
| "learning_rate": 9.973935423337563e-05, |
| "loss": 0.0097, |
| "step": 8090 |
| }, |
| { |
| "epoch": 506.25, |
| "grad_norm": 0.7015887498855591, |
| "learning_rate": 9.973766540766722e-05, |
| "loss": 0.0095, |
| "step": 8100 |
| }, |
| { |
| "epoch": 506.875, |
| "grad_norm": 0.6640006303787231, |
| "learning_rate": 9.97359711427225e-05, |
| "loss": 0.0111, |
| "step": 8110 |
| }, |
| { |
| "epoch": 507.5, |
| "grad_norm": 0.6578481793403625, |
| "learning_rate": 9.973427143872677e-05, |
| "loss": 0.0088, |
| "step": 8120 |
| }, |
| { |
| "epoch": 508.125, |
| "grad_norm": 0.6807109713554382, |
| "learning_rate": 9.973256629586589e-05, |
| "loss": 0.0102, |
| "step": 8130 |
| }, |
| { |
| "epoch": 508.75, |
| "grad_norm": 0.5422506332397461, |
| "learning_rate": 9.973085571432632e-05, |
| "loss": 0.0101, |
| "step": 8140 |
| }, |
| { |
| "epoch": 509.375, |
| "grad_norm": 0.5136811137199402, |
| "learning_rate": 9.972913969429513e-05, |
| "loss": 0.0097, |
| "step": 8150 |
| }, |
| { |
| "epoch": 510.0, |
| "grad_norm": 0.693134069442749, |
| "learning_rate": 9.972741823596e-05, |
| "loss": 0.0094, |
| "step": 8160 |
| }, |
| { |
| "epoch": 510.625, |
| "grad_norm": 0.611960232257843, |
| "learning_rate": 9.972569133950917e-05, |
| "loss": 0.0089, |
| "step": 8170 |
| }, |
| { |
| "epoch": 511.25, |
| "grad_norm": 0.617396354675293, |
| "learning_rate": 9.972395900513151e-05, |
| "loss": 0.0088, |
| "step": 8180 |
| }, |
| { |
| "epoch": 511.875, |
| "grad_norm": 0.6016327738761902, |
| "learning_rate": 9.972222123301645e-05, |
| "loss": 0.0095, |
| "step": 8190 |
| }, |
| { |
| "epoch": 512.5, |
| "grad_norm": 0.5470365881919861, |
| "learning_rate": 9.972047802335403e-05, |
| "loss": 0.0096, |
| "step": 8200 |
| }, |
| { |
| "epoch": 513.125, |
| "grad_norm": 0.6275759935379028, |
| "learning_rate": 9.971872937633488e-05, |
| "loss": 0.0085, |
| "step": 8210 |
| }, |
| { |
| "epoch": 513.75, |
| "grad_norm": 0.5876614451408386, |
| "learning_rate": 9.971697529215024e-05, |
| "loss": 0.0093, |
| "step": 8220 |
| }, |
| { |
| "epoch": 514.375, |
| "grad_norm": 0.57300865650177, |
| "learning_rate": 9.971521577099192e-05, |
| "loss": 0.0091, |
| "step": 8230 |
| }, |
| { |
| "epoch": 515.0, |
| "grad_norm": 0.6590330600738525, |
| "learning_rate": 9.971345081305236e-05, |
| "loss": 0.0094, |
| "step": 8240 |
| }, |
| { |
| "epoch": 515.625, |
| "grad_norm": 0.7168742418289185, |
| "learning_rate": 9.971168041852456e-05, |
| "loss": 0.0091, |
| "step": 8250 |
| }, |
| { |
| "epoch": 516.25, |
| "grad_norm": 0.7002500295639038, |
| "learning_rate": 9.970990458760215e-05, |
| "loss": 0.0082, |
| "step": 8260 |
| }, |
| { |
| "epoch": 516.875, |
| "grad_norm": 0.5979912877082825, |
| "learning_rate": 9.970812332047929e-05, |
| "loss": 0.0083, |
| "step": 8270 |
| }, |
| { |
| "epoch": 517.5, |
| "grad_norm": 0.6995880603790283, |
| "learning_rate": 9.97063366173508e-05, |
| "loss": 0.0083, |
| "step": 8280 |
| }, |
| { |
| "epoch": 518.125, |
| "grad_norm": 0.6054606437683105, |
| "learning_rate": 9.970454447841207e-05, |
| "loss": 0.0086, |
| "step": 8290 |
| }, |
| { |
| "epoch": 518.75, |
| "grad_norm": 0.6761727333068848, |
| "learning_rate": 9.970274690385909e-05, |
| "loss": 0.0091, |
| "step": 8300 |
| }, |
| { |
| "epoch": 519.375, |
| "grad_norm": 0.7297013401985168, |
| "learning_rate": 9.970094389388844e-05, |
| "loss": 0.0101, |
| "step": 8310 |
| }, |
| { |
| "epoch": 520.0, |
| "grad_norm": 0.6933302879333496, |
| "learning_rate": 9.969913544869728e-05, |
| "loss": 0.009, |
| "step": 8320 |
| }, |
| { |
| "epoch": 520.625, |
| "grad_norm": 0.632068932056427, |
| "learning_rate": 9.96973215684834e-05, |
| "loss": 0.0092, |
| "step": 8330 |
| }, |
| { |
| "epoch": 521.25, |
| "grad_norm": 0.5213248133659363, |
| "learning_rate": 9.969550225344513e-05, |
| "loss": 0.0095, |
| "step": 8340 |
| }, |
| { |
| "epoch": 521.875, |
| "grad_norm": 0.5387685298919678, |
| "learning_rate": 9.969367750378147e-05, |
| "loss": 0.0072, |
| "step": 8350 |
| }, |
| { |
| "epoch": 522.5, |
| "grad_norm": 0.5790697336196899, |
| "learning_rate": 9.969184731969194e-05, |
| "loss": 0.0098, |
| "step": 8360 |
| }, |
| { |
| "epoch": 523.125, |
| "grad_norm": 0.6181520819664001, |
| "learning_rate": 9.96900117013767e-05, |
| "loss": 0.0094, |
| "step": 8370 |
| }, |
| { |
| "epoch": 523.75, |
| "grad_norm": 0.6647499799728394, |
| "learning_rate": 9.96881706490365e-05, |
| "loss": 0.0092, |
| "step": 8380 |
| }, |
| { |
| "epoch": 524.375, |
| "grad_norm": 0.5274850726127625, |
| "learning_rate": 9.968632416287265e-05, |
| "loss": 0.0092, |
| "step": 8390 |
| }, |
| { |
| "epoch": 525.0, |
| "grad_norm": 0.5954369902610779, |
| "learning_rate": 9.96844722430871e-05, |
| "loss": 0.0083, |
| "step": 8400 |
| }, |
| { |
| "epoch": 525.625, |
| "grad_norm": 0.5637514591217041, |
| "learning_rate": 9.968261488988235e-05, |
| "loss": 0.0096, |
| "step": 8410 |
| }, |
| { |
| "epoch": 526.25, |
| "grad_norm": 0.5467987656593323, |
| "learning_rate": 9.968075210346155e-05, |
| "loss": 0.0087, |
| "step": 8420 |
| }, |
| { |
| "epoch": 526.875, |
| "grad_norm": 0.6766216158866882, |
| "learning_rate": 9.967888388402839e-05, |
| "loss": 0.0098, |
| "step": 8430 |
| }, |
| { |
| "epoch": 527.5, |
| "grad_norm": 0.689804196357727, |
| "learning_rate": 9.967701023178717e-05, |
| "loss": 0.0094, |
| "step": 8440 |
| }, |
| { |
| "epoch": 528.125, |
| "grad_norm": 0.6711739301681519, |
| "learning_rate": 9.967513114694282e-05, |
| "loss": 0.0098, |
| "step": 8450 |
| }, |
| { |
| "epoch": 528.75, |
| "grad_norm": 0.7609061002731323, |
| "learning_rate": 9.967324662970079e-05, |
| "loss": 0.0091, |
| "step": 8460 |
| }, |
| { |
| "epoch": 529.375, |
| "grad_norm": 0.6599430441856384, |
| "learning_rate": 9.96713566802672e-05, |
| "loss": 0.0096, |
| "step": 8470 |
| }, |
| { |
| "epoch": 530.0, |
| "grad_norm": 0.6817207932472229, |
| "learning_rate": 9.966946129884873e-05, |
| "loss": 0.0093, |
| "step": 8480 |
| }, |
| { |
| "epoch": 530.625, |
| "grad_norm": 0.8081104755401611, |
| "learning_rate": 9.966756048565265e-05, |
| "loss": 0.01, |
| "step": 8490 |
| }, |
| { |
| "epoch": 531.25, |
| "grad_norm": 0.6982617378234863, |
| "learning_rate": 9.966565424088681e-05, |
| "loss": 0.0088, |
| "step": 8500 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 100000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 6250, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 256, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|