{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 44.96402877697842, "eval_steps": 500, "global_step": 60000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.008992805755395683, "grad_norm": 12.169702529907227, "learning_rate": 3.0000000000000004e-07, "loss": 1.7876, "step": 10 }, { "epoch": 0.017985611510791366, "grad_norm": 31.994789123535156, "learning_rate": 6.333333333333333e-07, "loss": 1.8836, "step": 20 }, { "epoch": 0.02697841726618705, "grad_norm": 11.541727066040039, "learning_rate": 9.666666666666668e-07, "loss": 1.6674, "step": 30 }, { "epoch": 0.03597122302158273, "grad_norm": 10.795075416564941, "learning_rate": 1.3e-06, "loss": 1.3167, "step": 40 }, { "epoch": 0.044964028776978415, "grad_norm": 6.521556377410889, "learning_rate": 1.6333333333333333e-06, "loss": 0.9712, "step": 50 }, { "epoch": 0.0539568345323741, "grad_norm": 3.1484618186950684, "learning_rate": 1.9666666666666668e-06, "loss": 0.6134, "step": 60 }, { "epoch": 0.06294964028776978, "grad_norm": 1.6573251485824585, "learning_rate": 2.3e-06, "loss": 0.3729, "step": 70 }, { "epoch": 0.07194244604316546, "grad_norm": 1.3628209829330444, "learning_rate": 2.6333333333333337e-06, "loss": 0.3045, "step": 80 }, { "epoch": 0.08093525179856115, "grad_norm": 0.9302336573600769, "learning_rate": 2.966666666666667e-06, "loss": 0.235, "step": 90 }, { "epoch": 0.08992805755395683, "grad_norm": 1.042898178100586, "learning_rate": 3.3e-06, "loss": 0.1895, "step": 100 }, { "epoch": 0.09892086330935251, "grad_norm": 0.9959041476249695, "learning_rate": 3.633333333333334e-06, "loss": 0.1525, "step": 110 }, { "epoch": 0.1079136690647482, "grad_norm": 1.173815131187439, "learning_rate": 3.966666666666667e-06, "loss": 0.1417, "step": 120 }, { "epoch": 0.11690647482014388, "grad_norm": 0.5417676568031311, "learning_rate": 4.2999999999999995e-06, "loss": 0.107, "step": 130 }, { "epoch": 0.12589928057553956, "grad_norm": 1.062164545059204, "learning_rate": 4.633333333333334e-06, "loss": 0.0938, "step": 140 }, { "epoch": 0.13489208633093525, "grad_norm": 1.0245305299758911, "learning_rate": 4.966666666666667e-06, "loss": 0.102, "step": 150 }, { "epoch": 0.14388489208633093, "grad_norm": 0.5010718107223511, "learning_rate": 5.3e-06, "loss": 0.0883, "step": 160 }, { "epoch": 0.1528776978417266, "grad_norm": 0.8752794861793518, "learning_rate": 5.633333333333333e-06, "loss": 0.0844, "step": 170 }, { "epoch": 0.1618705035971223, "grad_norm": 0.613207221031189, "learning_rate": 5.9666666666666666e-06, "loss": 0.0717, "step": 180 }, { "epoch": 0.17086330935251798, "grad_norm": 0.7363653779029846, "learning_rate": 6.300000000000001e-06, "loss": 0.0804, "step": 190 }, { "epoch": 0.17985611510791366, "grad_norm": 0.788270890712738, "learning_rate": 6.633333333333333e-06, "loss": 0.0767, "step": 200 }, { "epoch": 0.18884892086330934, "grad_norm": 0.7177644968032837, "learning_rate": 6.966666666666667e-06, "loss": 0.0705, "step": 210 }, { "epoch": 0.19784172661870503, "grad_norm": 0.7640016674995422, "learning_rate": 7.2999999999999996e-06, "loss": 0.0703, "step": 220 }, { "epoch": 0.2068345323741007, "grad_norm": 0.6877925395965576, "learning_rate": 7.633333333333334e-06, "loss": 0.0645, "step": 230 }, { "epoch": 0.2158273381294964, "grad_norm": 0.6750697493553162, "learning_rate": 7.966666666666666e-06, "loss": 0.0673, "step": 240 }, { "epoch": 0.22482014388489208, "grad_norm": 0.7784804105758667, "learning_rate": 8.3e-06, "loss": 0.0602, "step": 250 }, { "epoch": 0.23381294964028776, "grad_norm": 0.6669899225234985, "learning_rate": 8.633333333333334e-06, "loss": 0.0591, "step": 260 }, { "epoch": 0.24280575539568344, "grad_norm": 0.6916087865829468, "learning_rate": 8.966666666666668e-06, "loss": 0.0563, "step": 270 }, { "epoch": 0.2517985611510791, "grad_norm": 0.8705959916114807, "learning_rate": 9.3e-06, "loss": 0.06, "step": 280 }, { "epoch": 0.2607913669064748, "grad_norm": 0.5803012847900391, "learning_rate": 9.633333333333335e-06, "loss": 0.0534, "step": 290 }, { "epoch": 0.2697841726618705, "grad_norm": 0.8907013535499573, "learning_rate": 9.966666666666667e-06, "loss": 0.0613, "step": 300 }, { "epoch": 0.2787769784172662, "grad_norm": 0.684184193611145, "learning_rate": 1.03e-05, "loss": 0.0574, "step": 310 }, { "epoch": 0.28776978417266186, "grad_norm": 0.9376711249351501, "learning_rate": 1.0633333333333334e-05, "loss": 0.0633, "step": 320 }, { "epoch": 0.29676258992805754, "grad_norm": 0.7373517155647278, "learning_rate": 1.0966666666666666e-05, "loss": 0.0538, "step": 330 }, { "epoch": 0.3057553956834532, "grad_norm": 0.8107944130897522, "learning_rate": 1.13e-05, "loss": 0.0502, "step": 340 }, { "epoch": 0.3147482014388489, "grad_norm": 0.589606761932373, "learning_rate": 1.1633333333333334e-05, "loss": 0.0559, "step": 350 }, { "epoch": 0.3237410071942446, "grad_norm": 0.8796077966690063, "learning_rate": 1.1966666666666668e-05, "loss": 0.057, "step": 360 }, { "epoch": 0.3327338129496403, "grad_norm": 0.8390140533447266, "learning_rate": 1.23e-05, "loss": 0.0458, "step": 370 }, { "epoch": 0.34172661870503596, "grad_norm": 0.6230068802833557, "learning_rate": 1.2633333333333333e-05, "loss": 0.0494, "step": 380 }, { "epoch": 0.35071942446043164, "grad_norm": 0.7000166773796082, "learning_rate": 1.2966666666666669e-05, "loss": 0.0535, "step": 390 }, { "epoch": 0.3597122302158273, "grad_norm": 0.6203688383102417, "learning_rate": 1.3300000000000001e-05, "loss": 0.0442, "step": 400 }, { "epoch": 0.368705035971223, "grad_norm": 0.6010410189628601, "learning_rate": 1.3633333333333334e-05, "loss": 0.0485, "step": 410 }, { "epoch": 0.3776978417266187, "grad_norm": 0.594441831111908, "learning_rate": 1.3966666666666666e-05, "loss": 0.0508, "step": 420 }, { "epoch": 0.38669064748201437, "grad_norm": 0.7176153063774109, "learning_rate": 1.43e-05, "loss": 0.0493, "step": 430 }, { "epoch": 0.39568345323741005, "grad_norm": 0.5175239443778992, "learning_rate": 1.4633333333333334e-05, "loss": 0.0504, "step": 440 }, { "epoch": 0.40467625899280574, "grad_norm": 0.804223895072937, "learning_rate": 1.4966666666666668e-05, "loss": 0.0482, "step": 450 }, { "epoch": 0.4136690647482014, "grad_norm": 0.5307528972625732, "learning_rate": 1.53e-05, "loss": 0.0423, "step": 460 }, { "epoch": 0.4226618705035971, "grad_norm": 0.8359518647193909, "learning_rate": 1.563333333333333e-05, "loss": 0.0489, "step": 470 }, { "epoch": 0.4316546762589928, "grad_norm": 0.7311592102050781, "learning_rate": 1.5966666666666667e-05, "loss": 0.0381, "step": 480 }, { "epoch": 0.44064748201438847, "grad_norm": 0.6870697736740112, "learning_rate": 1.63e-05, "loss": 0.045, "step": 490 }, { "epoch": 0.44964028776978415, "grad_norm": 0.4180646538734436, "learning_rate": 1.6633333333333336e-05, "loss": 0.0379, "step": 500 }, { "epoch": 0.45863309352517984, "grad_norm": 1.303950309753418, "learning_rate": 1.6966666666666668e-05, "loss": 0.0451, "step": 510 }, { "epoch": 0.4676258992805755, "grad_norm": 0.6716282963752747, "learning_rate": 1.73e-05, "loss": 0.0389, "step": 520 }, { "epoch": 0.4766187050359712, "grad_norm": 0.7494242191314697, "learning_rate": 1.7633333333333336e-05, "loss": 0.0501, "step": 530 }, { "epoch": 0.4856115107913669, "grad_norm": 0.6588238477706909, "learning_rate": 1.796666666666667e-05, "loss": 0.0513, "step": 540 }, { "epoch": 0.49460431654676257, "grad_norm": 0.6895037293434143, "learning_rate": 1.83e-05, "loss": 0.0386, "step": 550 }, { "epoch": 0.5035971223021583, "grad_norm": 0.693341076374054, "learning_rate": 1.8633333333333333e-05, "loss": 0.0403, "step": 560 }, { "epoch": 0.512589928057554, "grad_norm": 0.6730033755302429, "learning_rate": 1.896666666666667e-05, "loss": 0.0463, "step": 570 }, { "epoch": 0.5215827338129496, "grad_norm": 0.8265236616134644, "learning_rate": 1.93e-05, "loss": 0.042, "step": 580 }, { "epoch": 0.5305755395683454, "grad_norm": 0.5746334195137024, "learning_rate": 1.9633333333333334e-05, "loss": 0.0356, "step": 590 }, { "epoch": 0.539568345323741, "grad_norm": 0.7619888186454773, "learning_rate": 1.9966666666666666e-05, "loss": 0.0415, "step": 600 }, { "epoch": 0.5485611510791367, "grad_norm": 0.6619823575019836, "learning_rate": 2.0300000000000002e-05, "loss": 0.0477, "step": 610 }, { "epoch": 0.5575539568345323, "grad_norm": 1.006982684135437, "learning_rate": 2.0633333333333335e-05, "loss": 0.0463, "step": 620 }, { "epoch": 0.5665467625899281, "grad_norm": 0.6192636489868164, "learning_rate": 2.0966666666666667e-05, "loss": 0.0423, "step": 630 }, { "epoch": 0.5755395683453237, "grad_norm": 0.6061815023422241, "learning_rate": 2.13e-05, "loss": 0.0408, "step": 640 }, { "epoch": 0.5845323741007195, "grad_norm": 0.6457885503768921, "learning_rate": 2.1633333333333332e-05, "loss": 0.0386, "step": 650 }, { "epoch": 0.5935251798561151, "grad_norm": 0.6767024397850037, "learning_rate": 2.1966666666666668e-05, "loss": 0.0398, "step": 660 }, { "epoch": 0.6025179856115108, "grad_norm": 0.5835772156715393, "learning_rate": 2.23e-05, "loss": 0.0293, "step": 670 }, { "epoch": 0.6115107913669064, "grad_norm": 0.8425363302230835, "learning_rate": 2.2633333333333336e-05, "loss": 0.0461, "step": 680 }, { "epoch": 0.6205035971223022, "grad_norm": 0.6132856011390686, "learning_rate": 2.2966666666666668e-05, "loss": 0.0341, "step": 690 }, { "epoch": 0.6294964028776978, "grad_norm": 0.5517874360084534, "learning_rate": 2.3300000000000004e-05, "loss": 0.0358, "step": 700 }, { "epoch": 0.6384892086330936, "grad_norm": 0.5572258830070496, "learning_rate": 2.3633333333333336e-05, "loss": 0.0449, "step": 710 }, { "epoch": 0.6474820143884892, "grad_norm": 0.7345280051231384, "learning_rate": 2.396666666666667e-05, "loss": 0.0365, "step": 720 }, { "epoch": 0.6564748201438849, "grad_norm": 0.6350587010383606, "learning_rate": 2.43e-05, "loss": 0.0418, "step": 730 }, { "epoch": 0.6654676258992805, "grad_norm": 0.5544281601905823, "learning_rate": 2.4633333333333334e-05, "loss": 0.0353, "step": 740 }, { "epoch": 0.6744604316546763, "grad_norm": 0.5243659615516663, "learning_rate": 2.496666666666667e-05, "loss": 0.0363, "step": 750 }, { "epoch": 0.6834532374100719, "grad_norm": 0.5307471752166748, "learning_rate": 2.5300000000000002e-05, "loss": 0.0326, "step": 760 }, { "epoch": 0.6924460431654677, "grad_norm": 0.6827713251113892, "learning_rate": 2.5633333333333338e-05, "loss": 0.0336, "step": 770 }, { "epoch": 0.7014388489208633, "grad_norm": 0.4750767648220062, "learning_rate": 2.5966666666666667e-05, "loss": 0.0394, "step": 780 }, { "epoch": 0.710431654676259, "grad_norm": 0.534871518611908, "learning_rate": 2.6300000000000002e-05, "loss": 0.0281, "step": 790 }, { "epoch": 0.7194244604316546, "grad_norm": 0.4586481750011444, "learning_rate": 2.663333333333333e-05, "loss": 0.036, "step": 800 }, { "epoch": 0.7284172661870504, "grad_norm": 0.5596522092819214, "learning_rate": 2.6966666666666667e-05, "loss": 0.0314, "step": 810 }, { "epoch": 0.737410071942446, "grad_norm": 0.6704754829406738, "learning_rate": 2.7300000000000003e-05, "loss": 0.0495, "step": 820 }, { "epoch": 0.7464028776978417, "grad_norm": 0.4135896563529968, "learning_rate": 2.7633333333333332e-05, "loss": 0.0358, "step": 830 }, { "epoch": 0.7553956834532374, "grad_norm": 0.7131924629211426, "learning_rate": 2.7966666666666668e-05, "loss": 0.0313, "step": 840 }, { "epoch": 0.7643884892086331, "grad_norm": 0.4585304856300354, "learning_rate": 2.83e-05, "loss": 0.0341, "step": 850 }, { "epoch": 0.7733812949640287, "grad_norm": 0.6051360368728638, "learning_rate": 2.8633333333333336e-05, "loss": 0.0368, "step": 860 }, { "epoch": 0.7823741007194245, "grad_norm": 0.6827794909477234, "learning_rate": 2.8966666666666668e-05, "loss": 0.0307, "step": 870 }, { "epoch": 0.7913669064748201, "grad_norm": 0.7061790823936462, "learning_rate": 2.93e-05, "loss": 0.0335, "step": 880 }, { "epoch": 0.8003597122302158, "grad_norm": 0.5370869040489197, "learning_rate": 2.9633333333333336e-05, "loss": 0.0294, "step": 890 }, { "epoch": 0.8093525179856115, "grad_norm": 0.46536925435066223, "learning_rate": 2.9966666666666672e-05, "loss": 0.0317, "step": 900 }, { "epoch": 0.8183453237410072, "grad_norm": 0.5787712335586548, "learning_rate": 3.03e-05, "loss": 0.0314, "step": 910 }, { "epoch": 0.8273381294964028, "grad_norm": 0.5565289855003357, "learning_rate": 3.063333333333334e-05, "loss": 0.0375, "step": 920 }, { "epoch": 0.8363309352517986, "grad_norm": 0.8085096478462219, "learning_rate": 3.096666666666666e-05, "loss": 0.0424, "step": 930 }, { "epoch": 0.8453237410071942, "grad_norm": 0.5221720337867737, "learning_rate": 3.13e-05, "loss": 0.0253, "step": 940 }, { "epoch": 0.85431654676259, "grad_norm": 0.3817703127861023, "learning_rate": 3.1633333333333334e-05, "loss": 0.026, "step": 950 }, { "epoch": 0.8633093525179856, "grad_norm": 0.6362994909286499, "learning_rate": 3.196666666666667e-05, "loss": 0.0333, "step": 960 }, { "epoch": 0.8723021582733813, "grad_norm": 0.6375539302825928, "learning_rate": 3.2300000000000006e-05, "loss": 0.0258, "step": 970 }, { "epoch": 0.8812949640287769, "grad_norm": 0.7257735729217529, "learning_rate": 3.263333333333333e-05, "loss": 0.0288, "step": 980 }, { "epoch": 0.8902877697841727, "grad_norm": 0.6481053233146667, "learning_rate": 3.296666666666667e-05, "loss": 0.0315, "step": 990 }, { "epoch": 0.8992805755395683, "grad_norm": 0.5899021625518799, "learning_rate": 3.33e-05, "loss": 0.0273, "step": 1000 }, { "epoch": 0.908273381294964, "grad_norm": 0.45100849866867065, "learning_rate": 3.3633333333333335e-05, "loss": 0.0338, "step": 1010 }, { "epoch": 0.9172661870503597, "grad_norm": 0.5714636445045471, "learning_rate": 3.396666666666667e-05, "loss": 0.028, "step": 1020 }, { "epoch": 0.9262589928057554, "grad_norm": 0.49513566493988037, "learning_rate": 3.430000000000001e-05, "loss": 0.0273, "step": 1030 }, { "epoch": 0.935251798561151, "grad_norm": 0.5994616150856018, "learning_rate": 3.463333333333333e-05, "loss": 0.0246, "step": 1040 }, { "epoch": 0.9442446043165468, "grad_norm": 0.4341832101345062, "learning_rate": 3.496666666666667e-05, "loss": 0.0274, "step": 1050 }, { "epoch": 0.9532374100719424, "grad_norm": 0.39078640937805176, "learning_rate": 3.53e-05, "loss": 0.0339, "step": 1060 }, { "epoch": 0.9622302158273381, "grad_norm": 0.3605891466140747, "learning_rate": 3.563333333333334e-05, "loss": 0.0236, "step": 1070 }, { "epoch": 0.9712230215827338, "grad_norm": 0.45421382784843445, "learning_rate": 3.596666666666667e-05, "loss": 0.0322, "step": 1080 }, { "epoch": 0.9802158273381295, "grad_norm": 0.5937280654907227, "learning_rate": 3.63e-05, "loss": 0.031, "step": 1090 }, { "epoch": 0.9892086330935251, "grad_norm": 0.3874131143093109, "learning_rate": 3.6633333333333334e-05, "loss": 0.0264, "step": 1100 }, { "epoch": 0.9982014388489209, "grad_norm": 0.3879619538784027, "learning_rate": 3.6966666666666666e-05, "loss": 0.0243, "step": 1110 }, { "epoch": 1.0071942446043165, "grad_norm": 0.5671226382255554, "learning_rate": 3.73e-05, "loss": 0.0244, "step": 1120 }, { "epoch": 1.0161870503597121, "grad_norm": 0.4576553404331207, "learning_rate": 3.763333333333334e-05, "loss": 0.0286, "step": 1130 }, { "epoch": 1.025179856115108, "grad_norm": 0.5765942335128784, "learning_rate": 3.796666666666667e-05, "loss": 0.025, "step": 1140 }, { "epoch": 1.0341726618705036, "grad_norm": 0.5159966945648193, "learning_rate": 3.83e-05, "loss": 0.0261, "step": 1150 }, { "epoch": 1.0431654676258992, "grad_norm": 0.29970479011535645, "learning_rate": 3.8633333333333335e-05, "loss": 0.0223, "step": 1160 }, { "epoch": 1.0521582733812949, "grad_norm": 0.44197365641593933, "learning_rate": 3.896666666666667e-05, "loss": 0.0303, "step": 1170 }, { "epoch": 1.0611510791366907, "grad_norm": 0.45418810844421387, "learning_rate": 3.9300000000000007e-05, "loss": 0.029, "step": 1180 }, { "epoch": 1.0701438848920863, "grad_norm": 0.3709140121936798, "learning_rate": 3.963333333333333e-05, "loss": 0.0259, "step": 1190 }, { "epoch": 1.079136690647482, "grad_norm": 0.33311301469802856, "learning_rate": 3.996666666666667e-05, "loss": 0.0278, "step": 1200 }, { "epoch": 1.0881294964028776, "grad_norm": 0.3058193027973175, "learning_rate": 4.0300000000000004e-05, "loss": 0.0324, "step": 1210 }, { "epoch": 1.0971223021582734, "grad_norm": 0.6065534949302673, "learning_rate": 4.0633333333333336e-05, "loss": 0.0296, "step": 1220 }, { "epoch": 1.106115107913669, "grad_norm": 0.36303427815437317, "learning_rate": 4.096666666666667e-05, "loss": 0.0237, "step": 1230 }, { "epoch": 1.1151079136690647, "grad_norm": 0.5038174986839294, "learning_rate": 4.13e-05, "loss": 0.0268, "step": 1240 }, { "epoch": 1.1241007194244603, "grad_norm": 0.3655582070350647, "learning_rate": 4.1633333333333333e-05, "loss": 0.023, "step": 1250 }, { "epoch": 1.1330935251798562, "grad_norm": 0.46073421835899353, "learning_rate": 4.196666666666667e-05, "loss": 0.0189, "step": 1260 }, { "epoch": 1.1420863309352518, "grad_norm": 0.4446021020412445, "learning_rate": 4.23e-05, "loss": 0.0187, "step": 1270 }, { "epoch": 1.1510791366906474, "grad_norm": 0.47919580340385437, "learning_rate": 4.263333333333334e-05, "loss": 0.0247, "step": 1280 }, { "epoch": 1.1600719424460433, "grad_norm": 0.4152076244354248, "learning_rate": 4.296666666666666e-05, "loss": 0.0216, "step": 1290 }, { "epoch": 1.169064748201439, "grad_norm": 0.4630049765110016, "learning_rate": 4.33e-05, "loss": 0.0251, "step": 1300 }, { "epoch": 1.1780575539568345, "grad_norm": 0.3760194182395935, "learning_rate": 4.3633333333333335e-05, "loss": 0.0209, "step": 1310 }, { "epoch": 1.1870503597122302, "grad_norm": 0.7935547828674316, "learning_rate": 4.396666666666667e-05, "loss": 0.0263, "step": 1320 }, { "epoch": 1.1960431654676258, "grad_norm": 0.6143286228179932, "learning_rate": 4.43e-05, "loss": 0.0288, "step": 1330 }, { "epoch": 1.2050359712230216, "grad_norm": 0.5034136176109314, "learning_rate": 4.463333333333334e-05, "loss": 0.0318, "step": 1340 }, { "epoch": 1.2140287769784173, "grad_norm": 0.40933260321617126, "learning_rate": 4.496666666666667e-05, "loss": 0.0203, "step": 1350 }, { "epoch": 1.223021582733813, "grad_norm": 0.4762915372848511, "learning_rate": 4.53e-05, "loss": 0.0322, "step": 1360 }, { "epoch": 1.2320143884892087, "grad_norm": 0.7031678557395935, "learning_rate": 4.5633333333333336e-05, "loss": 0.0245, "step": 1370 }, { "epoch": 1.2410071942446044, "grad_norm": 0.5315391421318054, "learning_rate": 4.596666666666667e-05, "loss": 0.0248, "step": 1380 }, { "epoch": 1.25, "grad_norm": 0.5610358715057373, "learning_rate": 4.630000000000001e-05, "loss": 0.026, "step": 1390 }, { "epoch": 1.2589928057553956, "grad_norm": 0.513306200504303, "learning_rate": 4.663333333333333e-05, "loss": 0.023, "step": 1400 }, { "epoch": 1.2679856115107913, "grad_norm": 0.449634850025177, "learning_rate": 4.696666666666667e-05, "loss": 0.0324, "step": 1410 }, { "epoch": 1.276978417266187, "grad_norm": 0.3906124532222748, "learning_rate": 4.73e-05, "loss": 0.0262, "step": 1420 }, { "epoch": 1.2859712230215827, "grad_norm": 0.3555466830730438, "learning_rate": 4.763333333333334e-05, "loss": 0.0274, "step": 1430 }, { "epoch": 1.2949640287769784, "grad_norm": 0.4193325340747833, "learning_rate": 4.796666666666667e-05, "loss": 0.02, "step": 1440 }, { "epoch": 1.3039568345323742, "grad_norm": 0.4070984721183777, "learning_rate": 4.83e-05, "loss": 0.0291, "step": 1450 }, { "epoch": 1.3129496402877698, "grad_norm": 0.2832242250442505, "learning_rate": 4.8633333333333334e-05, "loss": 0.0231, "step": 1460 }, { "epoch": 1.3219424460431655, "grad_norm": 0.6140715479850769, "learning_rate": 4.8966666666666667e-05, "loss": 0.0174, "step": 1470 }, { "epoch": 1.330935251798561, "grad_norm": 0.45311352610588074, "learning_rate": 4.93e-05, "loss": 0.024, "step": 1480 }, { "epoch": 1.3399280575539567, "grad_norm": 0.6416414380073547, "learning_rate": 4.963333333333334e-05, "loss": 0.0255, "step": 1490 }, { "epoch": 1.3489208633093526, "grad_norm": 0.5135415196418762, "learning_rate": 4.996666666666667e-05, "loss": 0.0293, "step": 1500 }, { "epoch": 1.3579136690647482, "grad_norm": 0.4779556095600128, "learning_rate": 5.03e-05, "loss": 0.0201, "step": 1510 }, { "epoch": 1.3669064748201438, "grad_norm": 0.5223605632781982, "learning_rate": 5.0633333333333335e-05, "loss": 0.0219, "step": 1520 }, { "epoch": 1.3758992805755397, "grad_norm": 0.5957224369049072, "learning_rate": 5.0966666666666674e-05, "loss": 0.0231, "step": 1530 }, { "epoch": 1.3848920863309353, "grad_norm": 0.5442118644714355, "learning_rate": 5.130000000000001e-05, "loss": 0.0194, "step": 1540 }, { "epoch": 1.393884892086331, "grad_norm": 0.3539281189441681, "learning_rate": 5.163333333333333e-05, "loss": 0.0275, "step": 1550 }, { "epoch": 1.4028776978417266, "grad_norm": 0.33964499831199646, "learning_rate": 5.196666666666667e-05, "loss": 0.0182, "step": 1560 }, { "epoch": 1.4118705035971222, "grad_norm": 0.503248929977417, "learning_rate": 5.2300000000000004e-05, "loss": 0.0194, "step": 1570 }, { "epoch": 1.420863309352518, "grad_norm": 0.38262856006622314, "learning_rate": 5.2633333333333336e-05, "loss": 0.0191, "step": 1580 }, { "epoch": 1.4298561151079137, "grad_norm": 0.36083510518074036, "learning_rate": 5.296666666666666e-05, "loss": 0.0221, "step": 1590 }, { "epoch": 1.4388489208633093, "grad_norm": 0.32721957564353943, "learning_rate": 5.330000000000001e-05, "loss": 0.0207, "step": 1600 }, { "epoch": 1.4478417266187051, "grad_norm": 0.615338146686554, "learning_rate": 5.3633333333333334e-05, "loss": 0.025, "step": 1610 }, { "epoch": 1.4568345323741008, "grad_norm": 0.49179086089134216, "learning_rate": 5.3966666666666666e-05, "loss": 0.0234, "step": 1620 }, { "epoch": 1.4658273381294964, "grad_norm": 0.6005310416221619, "learning_rate": 5.4300000000000005e-05, "loss": 0.0282, "step": 1630 }, { "epoch": 1.474820143884892, "grad_norm": 0.37723806500434875, "learning_rate": 5.463333333333334e-05, "loss": 0.0226, "step": 1640 }, { "epoch": 1.4838129496402876, "grad_norm": 0.3596477508544922, "learning_rate": 5.496666666666666e-05, "loss": 0.0192, "step": 1650 }, { "epoch": 1.4928057553956835, "grad_norm": 0.41189396381378174, "learning_rate": 5.530000000000001e-05, "loss": 0.0307, "step": 1660 }, { "epoch": 1.5017985611510791, "grad_norm": 0.5236110687255859, "learning_rate": 5.5633333333333335e-05, "loss": 0.022, "step": 1670 }, { "epoch": 1.5107913669064748, "grad_norm": 0.4981384575366974, "learning_rate": 5.596666666666667e-05, "loss": 0.0213, "step": 1680 }, { "epoch": 1.5197841726618706, "grad_norm": 0.47838637232780457, "learning_rate": 5.63e-05, "loss": 0.0318, "step": 1690 }, { "epoch": 1.5287769784172662, "grad_norm": 0.496573805809021, "learning_rate": 5.663333333333334e-05, "loss": 0.0255, "step": 1700 }, { "epoch": 1.5377697841726619, "grad_norm": 0.5643261075019836, "learning_rate": 5.696666666666667e-05, "loss": 0.0234, "step": 1710 }, { "epoch": 1.5467625899280577, "grad_norm": 0.40274539589881897, "learning_rate": 5.73e-05, "loss": 0.0263, "step": 1720 }, { "epoch": 1.5557553956834531, "grad_norm": 0.37294310331344604, "learning_rate": 5.7633333333333336e-05, "loss": 0.0216, "step": 1730 }, { "epoch": 1.564748201438849, "grad_norm": 0.4680860638618469, "learning_rate": 5.796666666666667e-05, "loss": 0.028, "step": 1740 }, { "epoch": 1.5737410071942446, "grad_norm": 0.39273378252983093, "learning_rate": 5.83e-05, "loss": 0.0204, "step": 1750 }, { "epoch": 1.5827338129496402, "grad_norm": 0.4621880352497101, "learning_rate": 5.863333333333334e-05, "loss": 0.0234, "step": 1760 }, { "epoch": 1.591726618705036, "grad_norm": 0.41845959424972534, "learning_rate": 5.896666666666667e-05, "loss": 0.0208, "step": 1770 }, { "epoch": 1.6007194244604317, "grad_norm": 0.344439297914505, "learning_rate": 5.93e-05, "loss": 0.0178, "step": 1780 }, { "epoch": 1.6097122302158273, "grad_norm": 0.38512709736824036, "learning_rate": 5.9633333333333344e-05, "loss": 0.0235, "step": 1790 }, { "epoch": 1.6187050359712232, "grad_norm": 0.3003654181957245, "learning_rate": 5.996666666666667e-05, "loss": 0.0257, "step": 1800 }, { "epoch": 1.6276978417266186, "grad_norm": 0.32357653975486755, "learning_rate": 6.03e-05, "loss": 0.0185, "step": 1810 }, { "epoch": 1.6366906474820144, "grad_norm": 0.31201982498168945, "learning_rate": 6.063333333333333e-05, "loss": 0.0157, "step": 1820 }, { "epoch": 1.64568345323741, "grad_norm": 0.48603180050849915, "learning_rate": 6.0966666666666674e-05, "loss": 0.0202, "step": 1830 }, { "epoch": 1.6546762589928057, "grad_norm": 0.44929057359695435, "learning_rate": 6.13e-05, "loss": 0.024, "step": 1840 }, { "epoch": 1.6636690647482015, "grad_norm": 0.33624380826950073, "learning_rate": 6.163333333333333e-05, "loss": 0.0183, "step": 1850 }, { "epoch": 1.6726618705035972, "grad_norm": 0.3135296106338501, "learning_rate": 6.196666666666668e-05, "loss": 0.0197, "step": 1860 }, { "epoch": 1.6816546762589928, "grad_norm": 0.32382723689079285, "learning_rate": 6.23e-05, "loss": 0.0152, "step": 1870 }, { "epoch": 1.6906474820143886, "grad_norm": 0.24591860175132751, "learning_rate": 6.263333333333333e-05, "loss": 0.0199, "step": 1880 }, { "epoch": 1.699640287769784, "grad_norm": 0.2572685182094574, "learning_rate": 6.296666666666667e-05, "loss": 0.023, "step": 1890 }, { "epoch": 1.70863309352518, "grad_norm": 0.2963395118713379, "learning_rate": 6.330000000000001e-05, "loss": 0.027, "step": 1900 }, { "epoch": 1.7176258992805755, "grad_norm": 0.3914031982421875, "learning_rate": 6.363333333333334e-05, "loss": 0.0193, "step": 1910 }, { "epoch": 1.7266187050359711, "grad_norm": 0.37751758098602295, "learning_rate": 6.396666666666667e-05, "loss": 0.0155, "step": 1920 }, { "epoch": 1.735611510791367, "grad_norm": 0.48819205164909363, "learning_rate": 6.43e-05, "loss": 0.0235, "step": 1930 }, { "epoch": 1.7446043165467626, "grad_norm": 0.4641638994216919, "learning_rate": 6.463333333333334e-05, "loss": 0.018, "step": 1940 }, { "epoch": 1.7535971223021583, "grad_norm": 0.26353082060813904, "learning_rate": 6.496666666666667e-05, "loss": 0.0221, "step": 1950 }, { "epoch": 1.762589928057554, "grad_norm": 0.5137572884559631, "learning_rate": 6.53e-05, "loss": 0.0236, "step": 1960 }, { "epoch": 1.7715827338129495, "grad_norm": 0.3412526845932007, "learning_rate": 6.563333333333333e-05, "loss": 0.0212, "step": 1970 }, { "epoch": 1.7805755395683454, "grad_norm": 0.37288081645965576, "learning_rate": 6.596666666666667e-05, "loss": 0.0258, "step": 1980 }, { "epoch": 1.789568345323741, "grad_norm": 0.3389125466346741, "learning_rate": 6.630000000000001e-05, "loss": 0.0201, "step": 1990 }, { "epoch": 1.7985611510791366, "grad_norm": 0.3326701521873474, "learning_rate": 6.663333333333333e-05, "loss": 0.0199, "step": 2000 }, { "epoch": 1.8075539568345325, "grad_norm": 0.39092501997947693, "learning_rate": 6.696666666666666e-05, "loss": 0.0178, "step": 2010 }, { "epoch": 1.816546762589928, "grad_norm": 0.24465425312519073, "learning_rate": 6.730000000000001e-05, "loss": 0.017, "step": 2020 }, { "epoch": 1.8255395683453237, "grad_norm": 0.4638669490814209, "learning_rate": 6.763333333333334e-05, "loss": 0.0194, "step": 2030 }, { "epoch": 1.8345323741007196, "grad_norm": 0.3515363931655884, "learning_rate": 6.796666666666666e-05, "loss": 0.019, "step": 2040 }, { "epoch": 1.843525179856115, "grad_norm": 0.37603479623794556, "learning_rate": 6.83e-05, "loss": 0.0181, "step": 2050 }, { "epoch": 1.8525179856115108, "grad_norm": 0.3387431502342224, "learning_rate": 6.863333333333334e-05, "loss": 0.0202, "step": 2060 }, { "epoch": 1.8615107913669064, "grad_norm": 0.3482540249824524, "learning_rate": 6.896666666666667e-05, "loss": 0.0191, "step": 2070 }, { "epoch": 1.870503597122302, "grad_norm": 0.3204061985015869, "learning_rate": 6.93e-05, "loss": 0.0215, "step": 2080 }, { "epoch": 1.879496402877698, "grad_norm": 0.42274248600006104, "learning_rate": 6.963333333333334e-05, "loss": 0.0182, "step": 2090 }, { "epoch": 1.8884892086330936, "grad_norm": 0.3649995028972626, "learning_rate": 6.996666666666667e-05, "loss": 0.0185, "step": 2100 }, { "epoch": 1.8974820143884892, "grad_norm": 0.31374239921569824, "learning_rate": 7.03e-05, "loss": 0.0235, "step": 2110 }, { "epoch": 1.906474820143885, "grad_norm": 0.4202151596546173, "learning_rate": 7.063333333333333e-05, "loss": 0.0257, "step": 2120 }, { "epoch": 1.9154676258992804, "grad_norm": 0.357906699180603, "learning_rate": 7.096666666666667e-05, "loss": 0.0271, "step": 2130 }, { "epoch": 1.9244604316546763, "grad_norm": 0.41871264576911926, "learning_rate": 7.13e-05, "loss": 0.0222, "step": 2140 }, { "epoch": 1.933453237410072, "grad_norm": 0.3728399872779846, "learning_rate": 7.163333333333334e-05, "loss": 0.0181, "step": 2150 }, { "epoch": 1.9424460431654675, "grad_norm": 0.41087913513183594, "learning_rate": 7.196666666666668e-05, "loss": 0.0281, "step": 2160 }, { "epoch": 1.9514388489208634, "grad_norm": 0.4281725585460663, "learning_rate": 7.23e-05, "loss": 0.0163, "step": 2170 }, { "epoch": 1.960431654676259, "grad_norm": 0.3197653591632843, "learning_rate": 7.263333333333334e-05, "loss": 0.0134, "step": 2180 }, { "epoch": 1.9694244604316546, "grad_norm": 0.4316820502281189, "learning_rate": 7.296666666666667e-05, "loss": 0.0203, "step": 2190 }, { "epoch": 1.9784172661870505, "grad_norm": 0.3656926453113556, "learning_rate": 7.33e-05, "loss": 0.0217, "step": 2200 }, { "epoch": 1.987410071942446, "grad_norm": 0.4207707643508911, "learning_rate": 7.363333333333334e-05, "loss": 0.0238, "step": 2210 }, { "epoch": 1.9964028776978417, "grad_norm": 0.4236559271812439, "learning_rate": 7.396666666666667e-05, "loss": 0.019, "step": 2220 }, { "epoch": 2.0053956834532376, "grad_norm": 0.404480904340744, "learning_rate": 7.43e-05, "loss": 0.0221, "step": 2230 }, { "epoch": 2.014388489208633, "grad_norm": 0.28360095620155334, "learning_rate": 7.463333333333334e-05, "loss": 0.0188, "step": 2240 }, { "epoch": 2.023381294964029, "grad_norm": 0.39416590332984924, "learning_rate": 7.496666666666667e-05, "loss": 0.0237, "step": 2250 }, { "epoch": 2.0323741007194243, "grad_norm": 0.36612486839294434, "learning_rate": 7.53e-05, "loss": 0.0282, "step": 2260 }, { "epoch": 2.04136690647482, "grad_norm": 0.3753468692302704, "learning_rate": 7.563333333333333e-05, "loss": 0.0239, "step": 2270 }, { "epoch": 2.050359712230216, "grad_norm": 0.38467639684677124, "learning_rate": 7.596666666666668e-05, "loss": 0.0191, "step": 2280 }, { "epoch": 2.0593525179856114, "grad_norm": 0.3249014914035797, "learning_rate": 7.630000000000001e-05, "loss": 0.0235, "step": 2290 }, { "epoch": 2.068345323741007, "grad_norm": 0.29532772302627563, "learning_rate": 7.663333333333333e-05, "loss": 0.0192, "step": 2300 }, { "epoch": 2.077338129496403, "grad_norm": 0.3942927122116089, "learning_rate": 7.696666666666668e-05, "loss": 0.0171, "step": 2310 }, { "epoch": 2.0863309352517985, "grad_norm": 0.46342945098876953, "learning_rate": 7.730000000000001e-05, "loss": 0.0215, "step": 2320 }, { "epoch": 2.0953237410071943, "grad_norm": 0.3227763772010803, "learning_rate": 7.763333333333334e-05, "loss": 0.0186, "step": 2330 }, { "epoch": 2.1043165467625897, "grad_norm": 0.3496449291706085, "learning_rate": 7.796666666666666e-05, "loss": 0.0189, "step": 2340 }, { "epoch": 2.1133093525179856, "grad_norm": 0.6242960691452026, "learning_rate": 7.83e-05, "loss": 0.0226, "step": 2350 }, { "epoch": 2.1223021582733814, "grad_norm": 0.36715495586395264, "learning_rate": 7.863333333333334e-05, "loss": 0.02, "step": 2360 }, { "epoch": 2.131294964028777, "grad_norm": 0.5717832446098328, "learning_rate": 7.896666666666667e-05, "loss": 0.0224, "step": 2370 }, { "epoch": 2.1402877697841727, "grad_norm": 0.40052375197410583, "learning_rate": 7.93e-05, "loss": 0.0211, "step": 2380 }, { "epoch": 2.1492805755395685, "grad_norm": 0.28874796628952026, "learning_rate": 7.963333333333334e-05, "loss": 0.0271, "step": 2390 }, { "epoch": 2.158273381294964, "grad_norm": 0.3861978352069855, "learning_rate": 7.996666666666667e-05, "loss": 0.0177, "step": 2400 }, { "epoch": 2.16726618705036, "grad_norm": 0.4737129211425781, "learning_rate": 8.030000000000001e-05, "loss": 0.0244, "step": 2410 }, { "epoch": 2.176258992805755, "grad_norm": 0.41579470038414, "learning_rate": 8.063333333333333e-05, "loss": 0.0248, "step": 2420 }, { "epoch": 2.185251798561151, "grad_norm": 0.48143866658210754, "learning_rate": 8.096666666666667e-05, "loss": 0.0213, "step": 2430 }, { "epoch": 2.194244604316547, "grad_norm": 0.5678229331970215, "learning_rate": 8.13e-05, "loss": 0.0221, "step": 2440 }, { "epoch": 2.2032374100719423, "grad_norm": 0.5354724526405334, "learning_rate": 8.163333333333334e-05, "loss": 0.027, "step": 2450 }, { "epoch": 2.212230215827338, "grad_norm": 0.5312917828559875, "learning_rate": 8.196666666666668e-05, "loss": 0.0239, "step": 2460 }, { "epoch": 2.221223021582734, "grad_norm": 0.43779459595680237, "learning_rate": 8.23e-05, "loss": 0.0203, "step": 2470 }, { "epoch": 2.2302158273381294, "grad_norm": 0.3978138267993927, "learning_rate": 8.263333333333334e-05, "loss": 0.0214, "step": 2480 }, { "epoch": 2.2392086330935252, "grad_norm": 0.4071294963359833, "learning_rate": 8.296666666666667e-05, "loss": 0.0261, "step": 2490 }, { "epoch": 2.2482014388489207, "grad_norm": 0.432433158159256, "learning_rate": 8.33e-05, "loss": 0.0264, "step": 2500 }, { "epoch": 2.2571942446043165, "grad_norm": 0.4422755837440491, "learning_rate": 8.363333333333334e-05, "loss": 0.0217, "step": 2510 }, { "epoch": 2.2661870503597124, "grad_norm": 0.3350636065006256, "learning_rate": 8.396666666666667e-05, "loss": 0.02, "step": 2520 }, { "epoch": 2.2751798561151078, "grad_norm": 0.4476014971733093, "learning_rate": 8.43e-05, "loss": 0.0197, "step": 2530 }, { "epoch": 2.2841726618705036, "grad_norm": 0.3110078275203705, "learning_rate": 8.463333333333335e-05, "loss": 0.0279, "step": 2540 }, { "epoch": 2.2931654676258995, "grad_norm": 0.5398663878440857, "learning_rate": 8.496666666666667e-05, "loss": 0.0177, "step": 2550 }, { "epoch": 2.302158273381295, "grad_norm": 0.3977122902870178, "learning_rate": 8.53e-05, "loss": 0.0263, "step": 2560 }, { "epoch": 2.3111510791366907, "grad_norm": 0.43960878252983093, "learning_rate": 8.563333333333333e-05, "loss": 0.0227, "step": 2570 }, { "epoch": 2.3201438848920866, "grad_norm": 0.3769584894180298, "learning_rate": 8.596666666666668e-05, "loss": 0.0248, "step": 2580 }, { "epoch": 2.329136690647482, "grad_norm": 0.34481334686279297, "learning_rate": 8.63e-05, "loss": 0.0189, "step": 2590 }, { "epoch": 2.338129496402878, "grad_norm": 0.3963090777397156, "learning_rate": 8.663333333333333e-05, "loss": 0.0243, "step": 2600 }, { "epoch": 2.347122302158273, "grad_norm": 0.44480210542678833, "learning_rate": 8.696666666666668e-05, "loss": 0.0247, "step": 2610 }, { "epoch": 2.356115107913669, "grad_norm": 0.31026244163513184, "learning_rate": 8.730000000000001e-05, "loss": 0.0191, "step": 2620 }, { "epoch": 2.365107913669065, "grad_norm": 0.438397616147995, "learning_rate": 8.763333333333334e-05, "loss": 0.0237, "step": 2630 }, { "epoch": 2.3741007194244603, "grad_norm": 0.41911861300468445, "learning_rate": 8.796666666666667e-05, "loss": 0.0233, "step": 2640 }, { "epoch": 2.383093525179856, "grad_norm": 0.3531447649002075, "learning_rate": 8.83e-05, "loss": 0.0203, "step": 2650 }, { "epoch": 2.3920863309352516, "grad_norm": 0.3271470069885254, "learning_rate": 8.863333333333334e-05, "loss": 0.0193, "step": 2660 }, { "epoch": 2.4010791366906474, "grad_norm": 0.4113585650920868, "learning_rate": 8.896666666666667e-05, "loss": 0.025, "step": 2670 }, { "epoch": 2.4100719424460433, "grad_norm": 0.3433830738067627, "learning_rate": 8.93e-05, "loss": 0.019, "step": 2680 }, { "epoch": 2.4190647482014387, "grad_norm": 0.30605700612068176, "learning_rate": 8.963333333333333e-05, "loss": 0.0166, "step": 2690 }, { "epoch": 2.4280575539568345, "grad_norm": 0.3108651340007782, "learning_rate": 8.996666666666667e-05, "loss": 0.0186, "step": 2700 }, { "epoch": 2.4370503597122304, "grad_norm": 0.4370670020580292, "learning_rate": 9.030000000000001e-05, "loss": 0.0179, "step": 2710 }, { "epoch": 2.446043165467626, "grad_norm": 0.3707813620567322, "learning_rate": 9.063333333333333e-05, "loss": 0.0234, "step": 2720 }, { "epoch": 2.4550359712230216, "grad_norm": 0.3705277740955353, "learning_rate": 9.096666666666666e-05, "loss": 0.0155, "step": 2730 }, { "epoch": 2.4640287769784175, "grad_norm": 0.5345609784126282, "learning_rate": 9.130000000000001e-05, "loss": 0.0173, "step": 2740 }, { "epoch": 2.473021582733813, "grad_norm": 0.4133647084236145, "learning_rate": 9.163333333333334e-05, "loss": 0.02, "step": 2750 }, { "epoch": 2.4820143884892087, "grad_norm": 0.40498554706573486, "learning_rate": 9.196666666666666e-05, "loss": 0.0195, "step": 2760 }, { "epoch": 2.491007194244604, "grad_norm": 0.2834918200969696, "learning_rate": 9.230000000000001e-05, "loss": 0.0167, "step": 2770 }, { "epoch": 2.5, "grad_norm": 0.4070112109184265, "learning_rate": 9.263333333333334e-05, "loss": 0.0183, "step": 2780 }, { "epoch": 2.508992805755396, "grad_norm": 0.4774252474308014, "learning_rate": 9.296666666666667e-05, "loss": 0.0227, "step": 2790 }, { "epoch": 2.5179856115107913, "grad_norm": 0.41158509254455566, "learning_rate": 9.33e-05, "loss": 0.018, "step": 2800 }, { "epoch": 2.526978417266187, "grad_norm": 0.33233362436294556, "learning_rate": 9.363333333333334e-05, "loss": 0.0191, "step": 2810 }, { "epoch": 2.5359712230215825, "grad_norm": 0.38455528020858765, "learning_rate": 9.396666666666667e-05, "loss": 0.0144, "step": 2820 }, { "epoch": 2.5449640287769784, "grad_norm": 0.401472270488739, "learning_rate": 9.43e-05, "loss": 0.0256, "step": 2830 }, { "epoch": 2.553956834532374, "grad_norm": 0.39514586329460144, "learning_rate": 9.463333333333333e-05, "loss": 0.0191, "step": 2840 }, { "epoch": 2.56294964028777, "grad_norm": 0.25485000014305115, "learning_rate": 9.496666666666667e-05, "loss": 0.0175, "step": 2850 }, { "epoch": 2.5719424460431655, "grad_norm": 0.29962557554244995, "learning_rate": 9.53e-05, "loss": 0.0226, "step": 2860 }, { "epoch": 2.5809352517985613, "grad_norm": 0.4036204516887665, "learning_rate": 9.563333333333334e-05, "loss": 0.0218, "step": 2870 }, { "epoch": 2.5899280575539567, "grad_norm": 0.4251576066017151, "learning_rate": 9.596666666666668e-05, "loss": 0.0177, "step": 2880 }, { "epoch": 2.5989208633093526, "grad_norm": 0.2543228268623352, "learning_rate": 9.63e-05, "loss": 0.0206, "step": 2890 }, { "epoch": 2.6079136690647484, "grad_norm": 0.4487517774105072, "learning_rate": 9.663333333333334e-05, "loss": 0.0199, "step": 2900 }, { "epoch": 2.616906474820144, "grad_norm": 0.2969162166118622, "learning_rate": 9.696666666666667e-05, "loss": 0.0181, "step": 2910 }, { "epoch": 2.6258992805755397, "grad_norm": 0.3699372112751007, "learning_rate": 9.730000000000001e-05, "loss": 0.0193, "step": 2920 }, { "epoch": 2.634892086330935, "grad_norm": 0.2719968855381012, "learning_rate": 9.763333333333334e-05, "loss": 0.0191, "step": 2930 }, { "epoch": 2.643884892086331, "grad_norm": 0.35260897874832153, "learning_rate": 9.796666666666667e-05, "loss": 0.0219, "step": 2940 }, { "epoch": 2.652877697841727, "grad_norm": 0.2568923830986023, "learning_rate": 9.83e-05, "loss": 0.0304, "step": 2950 }, { "epoch": 2.661870503597122, "grad_norm": 0.44168323278427124, "learning_rate": 9.863333333333334e-05, "loss": 0.0227, "step": 2960 }, { "epoch": 2.670863309352518, "grad_norm": 0.3863123655319214, "learning_rate": 9.896666666666667e-05, "loss": 0.0208, "step": 2970 }, { "epoch": 2.6798561151079134, "grad_norm": 0.33462947607040405, "learning_rate": 9.93e-05, "loss": 0.0174, "step": 2980 }, { "epoch": 2.6888489208633093, "grad_norm": 0.4328790307044983, "learning_rate": 9.963333333333333e-05, "loss": 0.0213, "step": 2990 }, { "epoch": 2.697841726618705, "grad_norm": 0.24513651430606842, "learning_rate": 9.996666666666668e-05, "loss": 0.0289, "step": 3000 }, { "epoch": 2.706834532374101, "grad_norm": 0.39992040395736694, "learning_rate": 9.999999384858465e-05, "loss": 0.021, "step": 3010 }, { "epoch": 2.7158273381294964, "grad_norm": 0.42899590730667114, "learning_rate": 9.999997258443473e-05, "loss": 0.0234, "step": 3020 }, { "epoch": 2.7248201438848922, "grad_norm": 0.3563728630542755, "learning_rate": 9.999993613161331e-05, "loss": 0.0204, "step": 3030 }, { "epoch": 2.7338129496402876, "grad_norm": 0.4074245095252991, "learning_rate": 9.999988449013146e-05, "loss": 0.0222, "step": 3040 }, { "epoch": 2.7428057553956835, "grad_norm": 0.5093651413917542, "learning_rate": 9.99998176600049e-05, "loss": 0.0241, "step": 3050 }, { "epoch": 2.7517985611510793, "grad_norm": 0.4087010622024536, "learning_rate": 9.999973564125389e-05, "loss": 0.0186, "step": 3060 }, { "epoch": 2.7607913669064748, "grad_norm": 0.42344292998313904, "learning_rate": 9.999963843390335e-05, "loss": 0.0187, "step": 3070 }, { "epoch": 2.7697841726618706, "grad_norm": 0.49534285068511963, "learning_rate": 9.999952603798282e-05, "loss": 0.0318, "step": 3080 }, { "epoch": 2.778776978417266, "grad_norm": 0.36145350337028503, "learning_rate": 9.999939845352646e-05, "loss": 0.0205, "step": 3090 }, { "epoch": 2.787769784172662, "grad_norm": 0.29189175367355347, "learning_rate": 9.999925568057298e-05, "loss": 0.0209, "step": 3100 }, { "epoch": 2.7967625899280577, "grad_norm": 0.35105445981025696, "learning_rate": 9.999909771916578e-05, "loss": 0.0188, "step": 3110 }, { "epoch": 2.805755395683453, "grad_norm": 0.4221089780330658, "learning_rate": 9.999892456935285e-05, "loss": 0.0215, "step": 3120 }, { "epoch": 2.814748201438849, "grad_norm": 0.34114399552345276, "learning_rate": 9.999873623118679e-05, "loss": 0.0195, "step": 3130 }, { "epoch": 2.8237410071942444, "grad_norm": 0.3204825222492218, "learning_rate": 9.999853270472479e-05, "loss": 0.0193, "step": 3140 }, { "epoch": 2.83273381294964, "grad_norm": 0.3350900411605835, "learning_rate": 9.999831399002871e-05, "loss": 0.0221, "step": 3150 }, { "epoch": 2.841726618705036, "grad_norm": 0.5247554183006287, "learning_rate": 9.999808008716494e-05, "loss": 0.0175, "step": 3160 }, { "epoch": 2.850719424460432, "grad_norm": 0.29641133546829224, "learning_rate": 9.999783099620459e-05, "loss": 0.0234, "step": 3170 }, { "epoch": 2.8597122302158273, "grad_norm": 0.342242032289505, "learning_rate": 9.999756671722328e-05, "loss": 0.0192, "step": 3180 }, { "epoch": 2.868705035971223, "grad_norm": 0.34149256348609924, "learning_rate": 9.99972872503013e-05, "loss": 0.0177, "step": 3190 }, { "epoch": 2.8776978417266186, "grad_norm": 0.369344025850296, "learning_rate": 9.999699259552359e-05, "loss": 0.0221, "step": 3200 }, { "epoch": 2.8866906474820144, "grad_norm": 0.34744712710380554, "learning_rate": 9.99966827529796e-05, "loss": 0.0155, "step": 3210 }, { "epoch": 2.8956834532374103, "grad_norm": 0.25269949436187744, "learning_rate": 9.999635772276348e-05, "loss": 0.0179, "step": 3220 }, { "epoch": 2.9046762589928057, "grad_norm": 0.2777753174304962, "learning_rate": 9.999601750497396e-05, "loss": 0.0164, "step": 3230 }, { "epoch": 2.9136690647482015, "grad_norm": 0.45754489302635193, "learning_rate": 9.99956620997144e-05, "loss": 0.0176, "step": 3240 }, { "epoch": 2.922661870503597, "grad_norm": 0.37405431270599365, "learning_rate": 9.999529150709275e-05, "loss": 0.0208, "step": 3250 }, { "epoch": 2.931654676258993, "grad_norm": 0.3707926869392395, "learning_rate": 9.999490572722158e-05, "loss": 0.0167, "step": 3260 }, { "epoch": 2.9406474820143886, "grad_norm": 0.2539052963256836, "learning_rate": 9.99945047602181e-05, "loss": 0.0188, "step": 3270 }, { "epoch": 2.949640287769784, "grad_norm": 0.284555047750473, "learning_rate": 9.99940886062041e-05, "loss": 0.0179, "step": 3280 }, { "epoch": 2.95863309352518, "grad_norm": 0.3673015832901001, "learning_rate": 9.999365726530599e-05, "loss": 0.0209, "step": 3290 }, { "epoch": 2.9676258992805753, "grad_norm": 0.31273362040519714, "learning_rate": 9.999321073765481e-05, "loss": 0.0257, "step": 3300 }, { "epoch": 2.976618705035971, "grad_norm": 0.3554779589176178, "learning_rate": 9.99927490233862e-05, "loss": 0.0167, "step": 3310 }, { "epoch": 2.985611510791367, "grad_norm": 0.3408820331096649, "learning_rate": 9.999227212264043e-05, "loss": 0.0163, "step": 3320 }, { "epoch": 2.994604316546763, "grad_norm": 0.48193520307540894, "learning_rate": 9.999178003556236e-05, "loss": 0.0214, "step": 3330 }, { "epoch": 3.0035971223021583, "grad_norm": 0.3307094871997833, "learning_rate": 9.999127276230146e-05, "loss": 0.0193, "step": 3340 }, { "epoch": 3.012589928057554, "grad_norm": 0.42831170558929443, "learning_rate": 9.999075030301184e-05, "loss": 0.0222, "step": 3350 }, { "epoch": 3.0215827338129495, "grad_norm": 0.35916566848754883, "learning_rate": 9.999021265785221e-05, "loss": 0.0212, "step": 3360 }, { "epoch": 3.0305755395683454, "grad_norm": 0.40848976373672485, "learning_rate": 9.998965982698589e-05, "loss": 0.0213, "step": 3370 }, { "epoch": 3.039568345323741, "grad_norm": 0.4339492917060852, "learning_rate": 9.998909181058082e-05, "loss": 0.0178, "step": 3380 }, { "epoch": 3.0485611510791366, "grad_norm": 0.41806095838546753, "learning_rate": 9.998850860880953e-05, "loss": 0.018, "step": 3390 }, { "epoch": 3.0575539568345325, "grad_norm": 0.30249956250190735, "learning_rate": 9.998791022184922e-05, "loss": 0.0193, "step": 3400 }, { "epoch": 3.066546762589928, "grad_norm": 0.30758947134017944, "learning_rate": 9.99872966498816e-05, "loss": 0.0157, "step": 3410 }, { "epoch": 3.0755395683453237, "grad_norm": 0.3837072551250458, "learning_rate": 9.998666789309313e-05, "loss": 0.0194, "step": 3420 }, { "epoch": 3.0845323741007196, "grad_norm": 0.4145548939704895, "learning_rate": 9.998602395167475e-05, "loss": 0.0169, "step": 3430 }, { "epoch": 3.093525179856115, "grad_norm": 0.1886853128671646, "learning_rate": 9.998536482582213e-05, "loss": 0.0254, "step": 3440 }, { "epoch": 3.102517985611511, "grad_norm": 0.34564444422721863, "learning_rate": 9.998469051573544e-05, "loss": 0.0216, "step": 3450 }, { "epoch": 3.1115107913669067, "grad_norm": 0.23312653601169586, "learning_rate": 9.998400102161954e-05, "loss": 0.0185, "step": 3460 }, { "epoch": 3.120503597122302, "grad_norm": 0.39035874605178833, "learning_rate": 9.998329634368388e-05, "loss": 0.0173, "step": 3470 }, { "epoch": 3.129496402877698, "grad_norm": 0.41416892409324646, "learning_rate": 9.998257648214253e-05, "loss": 0.0153, "step": 3480 }, { "epoch": 3.1384892086330933, "grad_norm": 0.37070775032043457, "learning_rate": 9.998184143721417e-05, "loss": 0.0226, "step": 3490 }, { "epoch": 3.147482014388489, "grad_norm": 0.4114771783351898, "learning_rate": 9.998109120912206e-05, "loss": 0.0163, "step": 3500 }, { "epoch": 3.156474820143885, "grad_norm": 0.42899394035339355, "learning_rate": 9.998032579809411e-05, "loss": 0.021, "step": 3510 }, { "epoch": 3.1654676258992804, "grad_norm": 0.2824059724807739, "learning_rate": 9.997954520436286e-05, "loss": 0.0214, "step": 3520 }, { "epoch": 3.1744604316546763, "grad_norm": 0.4154525399208069, "learning_rate": 9.997874942816538e-05, "loss": 0.0175, "step": 3530 }, { "epoch": 3.183453237410072, "grad_norm": 0.4785711467266083, "learning_rate": 9.997793846974345e-05, "loss": 0.0268, "step": 3540 }, { "epoch": 3.1924460431654675, "grad_norm": 0.3470432162284851, "learning_rate": 9.997711232934341e-05, "loss": 0.0192, "step": 3550 }, { "epoch": 3.2014388489208634, "grad_norm": 0.4067460000514984, "learning_rate": 9.99762710072162e-05, "loss": 0.0203, "step": 3560 }, { "epoch": 3.210431654676259, "grad_norm": 0.31621047854423523, "learning_rate": 9.997541450361743e-05, "loss": 0.026, "step": 3570 }, { "epoch": 3.2194244604316546, "grad_norm": 0.4418244957923889, "learning_rate": 9.997454281880723e-05, "loss": 0.0219, "step": 3580 }, { "epoch": 3.2284172661870505, "grad_norm": 0.3750530183315277, "learning_rate": 9.997365595305044e-05, "loss": 0.0169, "step": 3590 }, { "epoch": 3.237410071942446, "grad_norm": 0.35913240909576416, "learning_rate": 9.997275390661644e-05, "loss": 0.0203, "step": 3600 }, { "epoch": 3.2464028776978417, "grad_norm": 0.4074920117855072, "learning_rate": 9.997183667977926e-05, "loss": 0.0204, "step": 3610 }, { "epoch": 3.2553956834532376, "grad_norm": 0.5431424379348755, "learning_rate": 9.997090427281752e-05, "loss": 0.0205, "step": 3620 }, { "epoch": 3.264388489208633, "grad_norm": 0.2403244823217392, "learning_rate": 9.996995668601448e-05, "loss": 0.0216, "step": 3630 }, { "epoch": 3.273381294964029, "grad_norm": 0.3878364562988281, "learning_rate": 9.996899391965798e-05, "loss": 0.0166, "step": 3640 }, { "epoch": 3.2823741007194247, "grad_norm": 0.3513074517250061, "learning_rate": 9.996801597404048e-05, "loss": 0.0186, "step": 3650 }, { "epoch": 3.29136690647482, "grad_norm": 0.2708936631679535, "learning_rate": 9.996702284945905e-05, "loss": 0.0155, "step": 3660 }, { "epoch": 3.300359712230216, "grad_norm": 0.28691914677619934, "learning_rate": 9.996601454621539e-05, "loss": 0.0171, "step": 3670 }, { "epoch": 3.3093525179856114, "grad_norm": 0.28921982645988464, "learning_rate": 9.996499106461577e-05, "loss": 0.0138, "step": 3680 }, { "epoch": 3.318345323741007, "grad_norm": 0.2512739896774292, "learning_rate": 9.996395240497112e-05, "loss": 0.0186, "step": 3690 }, { "epoch": 3.327338129496403, "grad_norm": 0.3186863660812378, "learning_rate": 9.996289856759696e-05, "loss": 0.0186, "step": 3700 }, { "epoch": 3.3363309352517985, "grad_norm": 0.4980567991733551, "learning_rate": 9.996182955281342e-05, "loss": 0.0186, "step": 3710 }, { "epoch": 3.3453237410071943, "grad_norm": 0.3482001721858978, "learning_rate": 9.996074536094519e-05, "loss": 0.0202, "step": 3720 }, { "epoch": 3.3543165467625897, "grad_norm": 0.2925301492214203, "learning_rate": 9.995964599232168e-05, "loss": 0.0181, "step": 3730 }, { "epoch": 3.3633093525179856, "grad_norm": 0.441699743270874, "learning_rate": 9.995853144727683e-05, "loss": 0.0213, "step": 3740 }, { "epoch": 3.3723021582733814, "grad_norm": 0.27267730236053467, "learning_rate": 9.99574017261492e-05, "loss": 0.0171, "step": 3750 }, { "epoch": 3.381294964028777, "grad_norm": 0.21165937185287476, "learning_rate": 9.995625682928198e-05, "loss": 0.0127, "step": 3760 }, { "epoch": 3.3902877697841727, "grad_norm": 0.27886325120925903, "learning_rate": 9.995509675702295e-05, "loss": 0.0132, "step": 3770 }, { "epoch": 3.3992805755395685, "grad_norm": 0.25027182698249817, "learning_rate": 9.995392150972451e-05, "loss": 0.0153, "step": 3780 }, { "epoch": 3.408273381294964, "grad_norm": 0.30424273014068604, "learning_rate": 9.995273108774366e-05, "loss": 0.0206, "step": 3790 }, { "epoch": 3.41726618705036, "grad_norm": 0.28445613384246826, "learning_rate": 9.995152549144205e-05, "loss": 0.0203, "step": 3800 }, { "epoch": 3.4262589928057556, "grad_norm": 0.40371137857437134, "learning_rate": 9.995030472118587e-05, "loss": 0.0195, "step": 3810 }, { "epoch": 3.435251798561151, "grad_norm": 0.4864778220653534, "learning_rate": 9.9949068777346e-05, "loss": 0.0205, "step": 3820 }, { "epoch": 3.444244604316547, "grad_norm": 0.4368647038936615, "learning_rate": 9.994781766029786e-05, "loss": 0.0155, "step": 3830 }, { "epoch": 3.4532374100719423, "grad_norm": 0.4187399446964264, "learning_rate": 9.994655137042151e-05, "loss": 0.0221, "step": 3840 }, { "epoch": 3.462230215827338, "grad_norm": 0.41272619366645813, "learning_rate": 9.99452699081016e-05, "loss": 0.0154, "step": 3850 }, { "epoch": 3.471223021582734, "grad_norm": 0.2915782928466797, "learning_rate": 9.994397327372743e-05, "loss": 0.0208, "step": 3860 }, { "epoch": 3.4802158273381294, "grad_norm": 0.4616684317588806, "learning_rate": 9.994266146769286e-05, "loss": 0.0198, "step": 3870 }, { "epoch": 3.4892086330935252, "grad_norm": 0.3081507086753845, "learning_rate": 9.994133449039642e-05, "loss": 0.0174, "step": 3880 }, { "epoch": 3.4982014388489207, "grad_norm": 0.4339585602283478, "learning_rate": 9.993999234224118e-05, "loss": 0.0206, "step": 3890 }, { "epoch": 3.5071942446043165, "grad_norm": 0.5222663879394531, "learning_rate": 9.993863502363485e-05, "loss": 0.0185, "step": 3900 }, { "epoch": 3.5161870503597124, "grad_norm": 0.28304409980773926, "learning_rate": 9.993726253498976e-05, "loss": 0.0154, "step": 3910 }, { "epoch": 3.5251798561151078, "grad_norm": 0.3721869885921478, "learning_rate": 9.993587487672282e-05, "loss": 0.0153, "step": 3920 }, { "epoch": 3.5341726618705036, "grad_norm": 0.25050657987594604, "learning_rate": 9.993447204925558e-05, "loss": 0.0179, "step": 3930 }, { "epoch": 3.543165467625899, "grad_norm": 0.3916977047920227, "learning_rate": 9.993305405301416e-05, "loss": 0.021, "step": 3940 }, { "epoch": 3.552158273381295, "grad_norm": 0.33341512084007263, "learning_rate": 9.993162088842935e-05, "loss": 0.0194, "step": 3950 }, { "epoch": 3.5611510791366907, "grad_norm": 0.49867746233940125, "learning_rate": 9.993017255593646e-05, "loss": 0.0249, "step": 3960 }, { "epoch": 3.5701438848920866, "grad_norm": 0.4738839268684387, "learning_rate": 9.992870905597548e-05, "loss": 0.0205, "step": 3970 }, { "epoch": 3.579136690647482, "grad_norm": 0.4244374632835388, "learning_rate": 9.9927230388991e-05, "loss": 0.0171, "step": 3980 }, { "epoch": 3.588129496402878, "grad_norm": 0.3377975821495056, "learning_rate": 9.992573655543215e-05, "loss": 0.0174, "step": 3990 }, { "epoch": 3.597122302158273, "grad_norm": 0.21853744983673096, "learning_rate": 9.992422755575277e-05, "loss": 0.0145, "step": 4000 }, { "epoch": 3.606115107913669, "grad_norm": 0.40220266580581665, "learning_rate": 9.992270339041123e-05, "loss": 0.0155, "step": 4010 }, { "epoch": 3.615107913669065, "grad_norm": 0.48645374178886414, "learning_rate": 9.992116405987053e-05, "loss": 0.0168, "step": 4020 }, { "epoch": 3.6241007194244603, "grad_norm": 0.38072672486305237, "learning_rate": 9.991960956459828e-05, "loss": 0.0167, "step": 4030 }, { "epoch": 3.633093525179856, "grad_norm": 0.30559614300727844, "learning_rate": 9.991803990506669e-05, "loss": 0.0191, "step": 4040 }, { "epoch": 3.6420863309352516, "grad_norm": 0.2684467136859894, "learning_rate": 9.991645508175258e-05, "loss": 0.0185, "step": 4050 }, { "epoch": 3.6510791366906474, "grad_norm": 0.28349390625953674, "learning_rate": 9.99148550951374e-05, "loss": 0.0202, "step": 4060 }, { "epoch": 3.6600719424460433, "grad_norm": 0.2857445180416107, "learning_rate": 9.991323994570716e-05, "loss": 0.0257, "step": 4070 }, { "epoch": 3.6690647482014387, "grad_norm": 0.257369726896286, "learning_rate": 9.99116096339525e-05, "loss": 0.0176, "step": 4080 }, { "epoch": 3.6780575539568345, "grad_norm": 0.34864547848701477, "learning_rate": 9.990996416036869e-05, "loss": 0.0187, "step": 4090 }, { "epoch": 3.68705035971223, "grad_norm": 0.4678668975830078, "learning_rate": 9.990830352545555e-05, "loss": 0.0179, "step": 4100 }, { "epoch": 3.696043165467626, "grad_norm": 0.3813004195690155, "learning_rate": 9.990662772971756e-05, "loss": 0.0162, "step": 4110 }, { "epoch": 3.7050359712230216, "grad_norm": 0.34655916690826416, "learning_rate": 9.990493677366376e-05, "loss": 0.0182, "step": 4120 }, { "epoch": 3.7140287769784175, "grad_norm": 0.24595551192760468, "learning_rate": 9.990323065780786e-05, "loss": 0.0139, "step": 4130 }, { "epoch": 3.723021582733813, "grad_norm": 0.3527373671531677, "learning_rate": 9.990150938266808e-05, "loss": 0.0201, "step": 4140 }, { "epoch": 3.7320143884892087, "grad_norm": 0.481990247964859, "learning_rate": 9.989977294876733e-05, "loss": 0.0164, "step": 4150 }, { "epoch": 3.741007194244604, "grad_norm": 0.3119540214538574, "learning_rate": 9.989802135663308e-05, "loss": 0.0152, "step": 4160 }, { "epoch": 3.75, "grad_norm": 0.31459271907806396, "learning_rate": 9.989625460679743e-05, "loss": 0.0163, "step": 4170 }, { "epoch": 3.758992805755396, "grad_norm": 0.27530065178871155, "learning_rate": 9.989447269979706e-05, "loss": 0.0176, "step": 4180 }, { "epoch": 3.7679856115107913, "grad_norm": 0.3450658619403839, "learning_rate": 9.989267563617328e-05, "loss": 0.0172, "step": 4190 }, { "epoch": 3.776978417266187, "grad_norm": 0.2566367983818054, "learning_rate": 9.989086341647198e-05, "loss": 0.0138, "step": 4200 }, { "epoch": 3.7859712230215825, "grad_norm": 0.2781045436859131, "learning_rate": 9.988903604124366e-05, "loss": 0.0141, "step": 4210 }, { "epoch": 3.7949640287769784, "grad_norm": 0.26834020018577576, "learning_rate": 9.988719351104343e-05, "loss": 0.0149, "step": 4220 }, { "epoch": 3.803956834532374, "grad_norm": 0.26571953296661377, "learning_rate": 9.9885335826431e-05, "loss": 0.0131, "step": 4230 }, { "epoch": 3.81294964028777, "grad_norm": 0.29608744382858276, "learning_rate": 9.988346298797071e-05, "loss": 0.013, "step": 4240 }, { "epoch": 3.8219424460431655, "grad_norm": 0.2827228307723999, "learning_rate": 9.988157499623146e-05, "loss": 0.0197, "step": 4250 }, { "epoch": 3.8309352517985613, "grad_norm": 0.33070477843284607, "learning_rate": 9.987967185178677e-05, "loss": 0.0133, "step": 4260 }, { "epoch": 3.8399280575539567, "grad_norm": 0.41404035687446594, "learning_rate": 9.987775355521476e-05, "loss": 0.0178, "step": 4270 }, { "epoch": 3.8489208633093526, "grad_norm": 0.4200587868690491, "learning_rate": 9.987582010709817e-05, "loss": 0.016, "step": 4280 }, { "epoch": 3.8579136690647484, "grad_norm": 0.47995296120643616, "learning_rate": 9.987387150802431e-05, "loss": 0.0284, "step": 4290 }, { "epoch": 3.866906474820144, "grad_norm": 0.35772573947906494, "learning_rate": 9.987190775858517e-05, "loss": 0.015, "step": 4300 }, { "epoch": 3.8758992805755397, "grad_norm": 0.35705965757369995, "learning_rate": 9.98699288593772e-05, "loss": 0.0182, "step": 4310 }, { "epoch": 3.884892086330935, "grad_norm": 0.4290000796318054, "learning_rate": 9.986793481100161e-05, "loss": 0.0166, "step": 4320 }, { "epoch": 3.893884892086331, "grad_norm": 0.4575938880443573, "learning_rate": 9.986592561406412e-05, "loss": 0.0178, "step": 4330 }, { "epoch": 3.902877697841727, "grad_norm": 0.21518723666667938, "learning_rate": 9.986390126917503e-05, "loss": 0.0119, "step": 4340 }, { "epoch": 3.911870503597122, "grad_norm": 0.21544478833675385, "learning_rate": 9.986186177694933e-05, "loss": 0.0132, "step": 4350 }, { "epoch": 3.920863309352518, "grad_norm": 0.48843058943748474, "learning_rate": 9.985980713800656e-05, "loss": 0.0214, "step": 4360 }, { "epoch": 3.9298561151079134, "grad_norm": 0.3120597004890442, "learning_rate": 9.985773735297084e-05, "loss": 0.0129, "step": 4370 }, { "epoch": 3.9388489208633093, "grad_norm": 0.21859347820281982, "learning_rate": 9.985565242247092e-05, "loss": 0.0173, "step": 4380 }, { "epoch": 3.947841726618705, "grad_norm": 0.30413389205932617, "learning_rate": 9.985355234714016e-05, "loss": 0.0197, "step": 4390 }, { "epoch": 3.956834532374101, "grad_norm": 0.20715487003326416, "learning_rate": 9.985143712761652e-05, "loss": 0.0114, "step": 4400 }, { "epoch": 3.9658273381294964, "grad_norm": 0.30584314465522766, "learning_rate": 9.984930676454252e-05, "loss": 0.0133, "step": 4410 }, { "epoch": 3.9748201438848922, "grad_norm": 0.36583197116851807, "learning_rate": 9.984716125856532e-05, "loss": 0.0163, "step": 4420 }, { "epoch": 3.9838129496402876, "grad_norm": 0.3249223530292511, "learning_rate": 9.984500061033667e-05, "loss": 0.0116, "step": 4430 }, { "epoch": 3.9928057553956835, "grad_norm": 0.32920512557029724, "learning_rate": 9.984282482051293e-05, "loss": 0.0146, "step": 4440 }, { "epoch": 4.001798561151079, "grad_norm": 0.22731710970401764, "learning_rate": 9.9840633889755e-05, "loss": 0.0143, "step": 4450 }, { "epoch": 4.010791366906475, "grad_norm": 0.3556356132030487, "learning_rate": 9.983842781872848e-05, "loss": 0.0172, "step": 4460 }, { "epoch": 4.01978417266187, "grad_norm": 0.270275741815567, "learning_rate": 9.98362066081035e-05, "loss": 0.016, "step": 4470 }, { "epoch": 4.028776978417266, "grad_norm": 0.22374063730239868, "learning_rate": 9.983397025855479e-05, "loss": 0.0162, "step": 4480 }, { "epoch": 4.037769784172662, "grad_norm": 0.35581550002098083, "learning_rate": 9.983171877076171e-05, "loss": 0.0151, "step": 4490 }, { "epoch": 4.046762589928058, "grad_norm": 0.2807891368865967, "learning_rate": 9.98294521454082e-05, "loss": 0.0239, "step": 4500 }, { "epoch": 4.055755395683454, "grad_norm": 0.28638121485710144, "learning_rate": 9.98271703831828e-05, "loss": 0.0155, "step": 4510 }, { "epoch": 4.0647482014388485, "grad_norm": 0.32039472460746765, "learning_rate": 9.982487348477865e-05, "loss": 0.0211, "step": 4520 }, { "epoch": 4.073741007194244, "grad_norm": 0.35244858264923096, "learning_rate": 9.982256145089347e-05, "loss": 0.0199, "step": 4530 }, { "epoch": 4.08273381294964, "grad_norm": 0.3320842385292053, "learning_rate": 9.982023428222962e-05, "loss": 0.0129, "step": 4540 }, { "epoch": 4.091726618705036, "grad_norm": 0.3011675775051117, "learning_rate": 9.981789197949403e-05, "loss": 0.023, "step": 4550 }, { "epoch": 4.100719424460432, "grad_norm": 0.30485567450523376, "learning_rate": 9.98155345433982e-05, "loss": 0.0162, "step": 4560 }, { "epoch": 4.109712230215828, "grad_norm": 0.3041466176509857, "learning_rate": 9.981316197465831e-05, "loss": 0.0148, "step": 4570 }, { "epoch": 4.118705035971223, "grad_norm": 0.17092546820640564, "learning_rate": 9.981077427399504e-05, "loss": 0.0155, "step": 4580 }, { "epoch": 4.127697841726619, "grad_norm": 0.2478470653295517, "learning_rate": 9.980837144213371e-05, "loss": 0.0143, "step": 4590 }, { "epoch": 4.136690647482014, "grad_norm": 0.24949736893177032, "learning_rate": 9.980595347980426e-05, "loss": 0.0128, "step": 4600 }, { "epoch": 4.14568345323741, "grad_norm": 0.25499460101127625, "learning_rate": 9.980352038774119e-05, "loss": 0.0199, "step": 4610 }, { "epoch": 4.154676258992806, "grad_norm": 0.2638578712940216, "learning_rate": 9.98010721666836e-05, "loss": 0.0203, "step": 4620 }, { "epoch": 4.163669064748201, "grad_norm": 0.27315986156463623, "learning_rate": 9.979860881737523e-05, "loss": 0.0122, "step": 4630 }, { "epoch": 4.172661870503597, "grad_norm": 0.4591233730316162, "learning_rate": 9.979613034056434e-05, "loss": 0.0141, "step": 4640 }, { "epoch": 4.181654676258993, "grad_norm": 0.33830222487449646, "learning_rate": 9.979363673700386e-05, "loss": 0.0242, "step": 4650 }, { "epoch": 4.190647482014389, "grad_norm": 0.35123541951179504, "learning_rate": 9.979112800745124e-05, "loss": 0.0233, "step": 4660 }, { "epoch": 4.1996402877697845, "grad_norm": 0.29958948493003845, "learning_rate": 9.978860415266861e-05, "loss": 0.0163, "step": 4670 }, { "epoch": 4.2086330935251794, "grad_norm": 0.35076019167900085, "learning_rate": 9.978606517342262e-05, "loss": 0.0161, "step": 4680 }, { "epoch": 4.217625899280575, "grad_norm": 0.3666781783103943, "learning_rate": 9.978351107048456e-05, "loss": 0.0198, "step": 4690 }, { "epoch": 4.226618705035971, "grad_norm": 0.3636649250984192, "learning_rate": 9.978094184463029e-05, "loss": 0.0175, "step": 4700 }, { "epoch": 4.235611510791367, "grad_norm": 0.3594069182872772, "learning_rate": 9.977835749664029e-05, "loss": 0.0168, "step": 4710 }, { "epoch": 4.244604316546763, "grad_norm": 0.407676637172699, "learning_rate": 9.97757580272996e-05, "loss": 0.0136, "step": 4720 }, { "epoch": 4.253597122302159, "grad_norm": 0.2667829394340515, "learning_rate": 9.977314343739786e-05, "loss": 0.0118, "step": 4730 }, { "epoch": 4.262589928057554, "grad_norm": 0.22562985122203827, "learning_rate": 9.977051372772934e-05, "loss": 0.0188, "step": 4740 }, { "epoch": 4.2715827338129495, "grad_norm": 0.2573854923248291, "learning_rate": 9.976786889909286e-05, "loss": 0.0125, "step": 4750 }, { "epoch": 4.280575539568345, "grad_norm": 0.3158126175403595, "learning_rate": 9.976520895229185e-05, "loss": 0.0148, "step": 4760 }, { "epoch": 4.289568345323741, "grad_norm": 0.3731728494167328, "learning_rate": 9.976253388813433e-05, "loss": 0.0173, "step": 4770 }, { "epoch": 4.298561151079137, "grad_norm": 0.2954322099685669, "learning_rate": 9.975984370743293e-05, "loss": 0.0149, "step": 4780 }, { "epoch": 4.307553956834532, "grad_norm": 0.31987428665161133, "learning_rate": 9.975713841100485e-05, "loss": 0.0164, "step": 4790 }, { "epoch": 4.316546762589928, "grad_norm": 0.4146029055118561, "learning_rate": 9.975441799967187e-05, "loss": 0.0153, "step": 4800 }, { "epoch": 4.325539568345324, "grad_norm": 0.2233443558216095, "learning_rate": 9.975168247426039e-05, "loss": 0.015, "step": 4810 }, { "epoch": 4.33453237410072, "grad_norm": 0.38692668080329895, "learning_rate": 9.974893183560139e-05, "loss": 0.0138, "step": 4820 }, { "epoch": 4.343525179856115, "grad_norm": 0.41272515058517456, "learning_rate": 9.974616608453045e-05, "loss": 0.0164, "step": 4830 }, { "epoch": 4.35251798561151, "grad_norm": 0.17670974135398865, "learning_rate": 9.974338522188772e-05, "loss": 0.015, "step": 4840 }, { "epoch": 4.361510791366906, "grad_norm": 0.32849738001823425, "learning_rate": 9.974058924851797e-05, "loss": 0.0137, "step": 4850 }, { "epoch": 4.370503597122302, "grad_norm": 0.38976436853408813, "learning_rate": 9.973777816527051e-05, "loss": 0.0153, "step": 4860 }, { "epoch": 4.379496402877698, "grad_norm": 0.40809497237205505, "learning_rate": 9.973495197299931e-05, "loss": 0.0184, "step": 4870 }, { "epoch": 4.388489208633094, "grad_norm": 0.23467299342155457, "learning_rate": 9.973211067256287e-05, "loss": 0.0151, "step": 4880 }, { "epoch": 4.39748201438849, "grad_norm": 0.28100264072418213, "learning_rate": 9.97292542648243e-05, "loss": 0.0157, "step": 4890 }, { "epoch": 4.406474820143885, "grad_norm": 0.47568050026893616, "learning_rate": 9.972638275065131e-05, "loss": 0.015, "step": 4900 }, { "epoch": 4.41546762589928, "grad_norm": 0.26278895139694214, "learning_rate": 9.972349613091621e-05, "loss": 0.0175, "step": 4910 }, { "epoch": 4.424460431654676, "grad_norm": 0.29977259039878845, "learning_rate": 9.972059440649584e-05, "loss": 0.0196, "step": 4920 }, { "epoch": 4.433453237410072, "grad_norm": 0.29593220353126526, "learning_rate": 9.971767757827168e-05, "loss": 0.0158, "step": 4930 }, { "epoch": 4.442446043165468, "grad_norm": 0.3824442923069, "learning_rate": 9.971474564712982e-05, "loss": 0.0149, "step": 4940 }, { "epoch": 4.451438848920863, "grad_norm": 0.2949729859828949, "learning_rate": 9.971179861396084e-05, "loss": 0.0157, "step": 4950 }, { "epoch": 4.460431654676259, "grad_norm": 0.34956473112106323, "learning_rate": 9.970883647966003e-05, "loss": 0.0193, "step": 4960 }, { "epoch": 4.469424460431655, "grad_norm": 0.3333277106285095, "learning_rate": 9.970585924512717e-05, "loss": 0.0139, "step": 4970 }, { "epoch": 4.4784172661870505, "grad_norm": 0.3530625104904175, "learning_rate": 9.970286691126669e-05, "loss": 0.0134, "step": 4980 }, { "epoch": 4.487410071942446, "grad_norm": 0.2993045747280121, "learning_rate": 9.969985947898756e-05, "loss": 0.0171, "step": 4990 }, { "epoch": 4.496402877697841, "grad_norm": 0.3196657598018646, "learning_rate": 9.969683694920337e-05, "loss": 0.019, "step": 5000 }, { "epoch": 4.505395683453237, "grad_norm": 0.3569885194301605, "learning_rate": 9.969379932283228e-05, "loss": 0.0161, "step": 5010 }, { "epoch": 4.514388489208633, "grad_norm": 0.3296067714691162, "learning_rate": 9.969074660079704e-05, "loss": 0.016, "step": 5020 }, { "epoch": 4.523381294964029, "grad_norm": 0.2657872438430786, "learning_rate": 9.968767878402501e-05, "loss": 0.0163, "step": 5030 }, { "epoch": 4.532374100719425, "grad_norm": 0.2189215123653412, "learning_rate": 9.968459587344808e-05, "loss": 0.0126, "step": 5040 }, { "epoch": 4.5413669064748206, "grad_norm": 0.3377128541469574, "learning_rate": 9.968149787000278e-05, "loss": 0.0133, "step": 5050 }, { "epoch": 4.5503597122302155, "grad_norm": 0.23673276603221893, "learning_rate": 9.967838477463018e-05, "loss": 0.0132, "step": 5060 }, { "epoch": 4.559352517985611, "grad_norm": 0.2276393473148346, "learning_rate": 9.967525658827597e-05, "loss": 0.015, "step": 5070 }, { "epoch": 4.568345323741007, "grad_norm": 0.30233755707740784, "learning_rate": 9.967211331189042e-05, "loss": 0.018, "step": 5080 }, { "epoch": 4.577338129496403, "grad_norm": 0.4672054648399353, "learning_rate": 9.966895494642834e-05, "loss": 0.0208, "step": 5090 }, { "epoch": 4.586330935251799, "grad_norm": 0.4220753610134125, "learning_rate": 9.96657814928492e-05, "loss": 0.0205, "step": 5100 }, { "epoch": 4.595323741007194, "grad_norm": 0.4177705645561218, "learning_rate": 9.966259295211697e-05, "loss": 0.0183, "step": 5110 }, { "epoch": 4.60431654676259, "grad_norm": 0.2821277976036072, "learning_rate": 9.965938932520028e-05, "loss": 0.0181, "step": 5120 }, { "epoch": 4.613309352517986, "grad_norm": 0.2919083535671234, "learning_rate": 9.965617061307229e-05, "loss": 0.0155, "step": 5130 }, { "epoch": 4.622302158273381, "grad_norm": 0.313682496547699, "learning_rate": 9.965293681671077e-05, "loss": 0.0174, "step": 5140 }, { "epoch": 4.631294964028777, "grad_norm": 0.4117647409439087, "learning_rate": 9.964968793709804e-05, "loss": 0.0184, "step": 5150 }, { "epoch": 4.640287769784173, "grad_norm": 0.3668295443058014, "learning_rate": 9.964642397522106e-05, "loss": 0.0161, "step": 5160 }, { "epoch": 4.649280575539568, "grad_norm": 0.5203273296356201, "learning_rate": 9.96431449320713e-05, "loss": 0.0172, "step": 5170 }, { "epoch": 4.658273381294964, "grad_norm": 0.2918151617050171, "learning_rate": 9.963985080864486e-05, "loss": 0.0195, "step": 5180 }, { "epoch": 4.66726618705036, "grad_norm": 0.29905205965042114, "learning_rate": 9.96365416059424e-05, "loss": 0.0165, "step": 5190 }, { "epoch": 4.676258992805756, "grad_norm": 0.22724175453186035, "learning_rate": 9.963321732496919e-05, "loss": 0.0159, "step": 5200 }, { "epoch": 4.685251798561151, "grad_norm": 0.28898900747299194, "learning_rate": 9.962987796673506e-05, "loss": 0.0182, "step": 5210 }, { "epoch": 4.694244604316546, "grad_norm": 0.3117143511772156, "learning_rate": 9.962652353225438e-05, "loss": 0.0128, "step": 5220 }, { "epoch": 4.703237410071942, "grad_norm": 0.3360442817211151, "learning_rate": 9.962315402254619e-05, "loss": 0.0176, "step": 5230 }, { "epoch": 4.712230215827338, "grad_norm": 0.2654707431793213, "learning_rate": 9.9619769438634e-05, "loss": 0.0145, "step": 5240 }, { "epoch": 4.721223021582734, "grad_norm": 0.3031374216079712, "learning_rate": 9.9616369781546e-05, "loss": 0.0127, "step": 5250 }, { "epoch": 4.73021582733813, "grad_norm": 0.4004557132720947, "learning_rate": 9.961295505231491e-05, "loss": 0.0166, "step": 5260 }, { "epoch": 4.739208633093525, "grad_norm": 0.2702541649341583, "learning_rate": 9.960952525197804e-05, "loss": 0.0156, "step": 5270 }, { "epoch": 4.748201438848921, "grad_norm": 0.270107626914978, "learning_rate": 9.960608038157724e-05, "loss": 0.0099, "step": 5280 }, { "epoch": 4.7571942446043165, "grad_norm": 0.3300579786300659, "learning_rate": 9.960262044215901e-05, "loss": 0.0147, "step": 5290 }, { "epoch": 4.766187050359712, "grad_norm": 0.17105938494205475, "learning_rate": 9.959914543477435e-05, "loss": 0.0214, "step": 5300 }, { "epoch": 4.775179856115108, "grad_norm": 0.34566453099250793, "learning_rate": 9.959565536047892e-05, "loss": 0.0275, "step": 5310 }, { "epoch": 4.784172661870503, "grad_norm": 0.3799647092819214, "learning_rate": 9.959215022033288e-05, "loss": 0.0168, "step": 5320 }, { "epoch": 4.793165467625899, "grad_norm": 0.40797892212867737, "learning_rate": 9.9588630015401e-05, "loss": 0.0196, "step": 5330 }, { "epoch": 4.802158273381295, "grad_norm": 0.28156957030296326, "learning_rate": 9.958509474675264e-05, "loss": 0.0146, "step": 5340 }, { "epoch": 4.811151079136691, "grad_norm": 0.29070454835891724, "learning_rate": 9.958154441546171e-05, "loss": 0.0253, "step": 5350 }, { "epoch": 4.820143884892087, "grad_norm": 0.2083570659160614, "learning_rate": 9.957797902260673e-05, "loss": 0.0171, "step": 5360 }, { "epoch": 4.829136690647482, "grad_norm": 0.22842298448085785, "learning_rate": 9.957439856927073e-05, "loss": 0.0124, "step": 5370 }, { "epoch": 4.838129496402877, "grad_norm": 0.33997705578804016, "learning_rate": 9.957080305654139e-05, "loss": 0.0192, "step": 5380 }, { "epoch": 4.847122302158273, "grad_norm": 0.2762313485145569, "learning_rate": 9.956719248551092e-05, "loss": 0.02, "step": 5390 }, { "epoch": 4.856115107913669, "grad_norm": 0.2805772125720978, "learning_rate": 9.956356685727612e-05, "loss": 0.0162, "step": 5400 }, { "epoch": 4.865107913669065, "grad_norm": 0.29270699620246887, "learning_rate": 9.955992617293836e-05, "loss": 0.0173, "step": 5410 }, { "epoch": 4.874100719424461, "grad_norm": 0.2857111096382141, "learning_rate": 9.955627043360358e-05, "loss": 0.0223, "step": 5420 }, { "epoch": 4.883093525179856, "grad_norm": 0.2402123361825943, "learning_rate": 9.955259964038231e-05, "loss": 0.0153, "step": 5430 }, { "epoch": 4.892086330935252, "grad_norm": 0.38970062136650085, "learning_rate": 9.954891379438962e-05, "loss": 0.017, "step": 5440 }, { "epoch": 4.901079136690647, "grad_norm": 0.2723695933818817, "learning_rate": 9.954521289674519e-05, "loss": 0.0208, "step": 5450 }, { "epoch": 4.910071942446043, "grad_norm": 0.35185614228248596, "learning_rate": 9.954149694857325e-05, "loss": 0.015, "step": 5460 }, { "epoch": 4.919064748201439, "grad_norm": 0.32593879103660583, "learning_rate": 9.953776595100258e-05, "loss": 0.0207, "step": 5470 }, { "epoch": 4.928057553956835, "grad_norm": 0.2828299105167389, "learning_rate": 9.95340199051666e-05, "loss": 0.0128, "step": 5480 }, { "epoch": 4.93705035971223, "grad_norm": 0.24299389123916626, "learning_rate": 9.953025881220325e-05, "loss": 0.0122, "step": 5490 }, { "epoch": 4.946043165467626, "grad_norm": 0.30867213010787964, "learning_rate": 9.952648267325504e-05, "loss": 0.0117, "step": 5500 }, { "epoch": 4.955035971223022, "grad_norm": 0.2717340886592865, "learning_rate": 9.952269148946905e-05, "loss": 0.0137, "step": 5510 }, { "epoch": 4.9640287769784175, "grad_norm": 0.3203513026237488, "learning_rate": 9.951888526199697e-05, "loss": 0.0133, "step": 5520 }, { "epoch": 4.9730215827338125, "grad_norm": 0.275937020778656, "learning_rate": 9.951506399199501e-05, "loss": 0.0144, "step": 5530 }, { "epoch": 4.982014388489208, "grad_norm": 0.28414297103881836, "learning_rate": 9.951122768062399e-05, "loss": 0.0133, "step": 5540 }, { "epoch": 4.991007194244604, "grad_norm": 0.3182006776332855, "learning_rate": 9.950737632904927e-05, "loss": 0.0155, "step": 5550 }, { "epoch": 5.0, "grad_norm": 0.330781489610672, "learning_rate": 9.950350993844077e-05, "loss": 0.016, "step": 5560 }, { "epoch": 5.008992805755396, "grad_norm": 0.29106175899505615, "learning_rate": 9.949962850997303e-05, "loss": 0.0154, "step": 5570 }, { "epoch": 5.017985611510792, "grad_norm": 0.33059847354888916, "learning_rate": 9.949573204482512e-05, "loss": 0.0121, "step": 5580 }, { "epoch": 5.026978417266187, "grad_norm": 0.29943257570266724, "learning_rate": 9.949182054418064e-05, "loss": 0.0128, "step": 5590 }, { "epoch": 5.0359712230215825, "grad_norm": 0.3379148840904236, "learning_rate": 9.948789400922787e-05, "loss": 0.0166, "step": 5600 }, { "epoch": 5.044964028776978, "grad_norm": 0.371024489402771, "learning_rate": 9.948395244115953e-05, "loss": 0.0168, "step": 5610 }, { "epoch": 5.053956834532374, "grad_norm": 0.42009082436561584, "learning_rate": 9.9479995841173e-05, "loss": 0.0181, "step": 5620 }, { "epoch": 5.06294964028777, "grad_norm": 0.27764180302619934, "learning_rate": 9.947602421047017e-05, "loss": 0.0109, "step": 5630 }, { "epoch": 5.071942446043165, "grad_norm": 0.37249016761779785, "learning_rate": 9.947203755025753e-05, "loss": 0.0192, "step": 5640 }, { "epoch": 5.080935251798561, "grad_norm": 0.30154329538345337, "learning_rate": 9.946803586174611e-05, "loss": 0.0181, "step": 5650 }, { "epoch": 5.089928057553957, "grad_norm": 0.29023951292037964, "learning_rate": 9.946401914615151e-05, "loss": 0.0191, "step": 5660 }, { "epoch": 5.098920863309353, "grad_norm": 0.43264949321746826, "learning_rate": 9.945998740469394e-05, "loss": 0.0146, "step": 5670 }, { "epoch": 5.107913669064748, "grad_norm": 0.29489028453826904, "learning_rate": 9.945594063859809e-05, "loss": 0.0215, "step": 5680 }, { "epoch": 5.116906474820144, "grad_norm": 0.45546281337738037, "learning_rate": 9.94518788490933e-05, "loss": 0.0141, "step": 5690 }, { "epoch": 5.125899280575539, "grad_norm": 0.32769784331321716, "learning_rate": 9.944780203741341e-05, "loss": 0.0192, "step": 5700 }, { "epoch": 5.134892086330935, "grad_norm": 0.31723305583000183, "learning_rate": 9.944371020479686e-05, "loss": 0.0172, "step": 5710 }, { "epoch": 5.143884892086331, "grad_norm": 0.4582144021987915, "learning_rate": 9.943960335248662e-05, "loss": 0.0133, "step": 5720 }, { "epoch": 5.152877697841727, "grad_norm": 0.3292665481567383, "learning_rate": 9.943548148173027e-05, "loss": 0.0166, "step": 5730 }, { "epoch": 5.161870503597123, "grad_norm": 0.26829245686531067, "learning_rate": 9.943134459377992e-05, "loss": 0.013, "step": 5740 }, { "epoch": 5.170863309352518, "grad_norm": 0.31403735280036926, "learning_rate": 9.942719268989222e-05, "loss": 0.0166, "step": 5750 }, { "epoch": 5.179856115107913, "grad_norm": 0.274330735206604, "learning_rate": 9.942302577132844e-05, "loss": 0.0139, "step": 5760 }, { "epoch": 5.188848920863309, "grad_norm": 0.2830810546875, "learning_rate": 9.941884383935438e-05, "loss": 0.0123, "step": 5770 }, { "epoch": 5.197841726618705, "grad_norm": 0.2755391299724579, "learning_rate": 9.941464689524039e-05, "loss": 0.0155, "step": 5780 }, { "epoch": 5.206834532374101, "grad_norm": 0.40319228172302246, "learning_rate": 9.941043494026139e-05, "loss": 0.0174, "step": 5790 }, { "epoch": 5.215827338129497, "grad_norm": 0.5020081996917725, "learning_rate": 9.940620797569685e-05, "loss": 0.0148, "step": 5800 }, { "epoch": 5.224820143884892, "grad_norm": 0.3886682689189911, "learning_rate": 9.940196600283082e-05, "loss": 0.0244, "step": 5810 }, { "epoch": 5.233812949640288, "grad_norm": 0.25251033902168274, "learning_rate": 9.939770902295192e-05, "loss": 0.0228, "step": 5820 }, { "epoch": 5.2428057553956835, "grad_norm": 0.3926614224910736, "learning_rate": 9.939343703735329e-05, "loss": 0.0186, "step": 5830 }, { "epoch": 5.251798561151079, "grad_norm": 0.3923702836036682, "learning_rate": 9.938915004733264e-05, "loss": 0.014, "step": 5840 }, { "epoch": 5.260791366906475, "grad_norm": 0.27648958563804626, "learning_rate": 9.938484805419224e-05, "loss": 0.0153, "step": 5850 }, { "epoch": 5.26978417266187, "grad_norm": 0.374968945980072, "learning_rate": 9.938053105923894e-05, "loss": 0.0141, "step": 5860 }, { "epoch": 5.278776978417266, "grad_norm": 0.2973593473434448, "learning_rate": 9.937619906378413e-05, "loss": 0.0127, "step": 5870 }, { "epoch": 5.287769784172662, "grad_norm": 0.307830274105072, "learning_rate": 9.937185206914374e-05, "loss": 0.0176, "step": 5880 }, { "epoch": 5.296762589928058, "grad_norm": 0.34166666865348816, "learning_rate": 9.936749007663829e-05, "loss": 0.0133, "step": 5890 }, { "epoch": 5.305755395683454, "grad_norm": 0.3090035319328308, "learning_rate": 9.93631130875928e-05, "loss": 0.0136, "step": 5900 }, { "epoch": 5.3147482014388485, "grad_norm": 0.42530161142349243, "learning_rate": 9.935872110333692e-05, "loss": 0.0189, "step": 5910 }, { "epoch": 5.323741007194244, "grad_norm": 0.4084072411060333, "learning_rate": 9.935431412520484e-05, "loss": 0.0181, "step": 5920 }, { "epoch": 5.33273381294964, "grad_norm": 0.43914633989334106, "learning_rate": 9.934989215453523e-05, "loss": 0.0165, "step": 5930 }, { "epoch": 5.341726618705036, "grad_norm": 0.3290022611618042, "learning_rate": 9.934545519267139e-05, "loss": 0.0193, "step": 5940 }, { "epoch": 5.350719424460432, "grad_norm": 0.3558102548122406, "learning_rate": 9.934100324096117e-05, "loss": 0.0128, "step": 5950 }, { "epoch": 5.359712230215827, "grad_norm": 0.23731359839439392, "learning_rate": 9.933653630075692e-05, "loss": 0.0123, "step": 5960 }, { "epoch": 5.368705035971223, "grad_norm": 0.32419222593307495, "learning_rate": 9.93320543734156e-05, "loss": 0.013, "step": 5970 }, { "epoch": 5.377697841726619, "grad_norm": 0.28937995433807373, "learning_rate": 9.932755746029871e-05, "loss": 0.0103, "step": 5980 }, { "epoch": 5.386690647482014, "grad_norm": 0.38369467854499817, "learning_rate": 9.932304556277228e-05, "loss": 0.014, "step": 5990 }, { "epoch": 5.39568345323741, "grad_norm": 0.3781099319458008, "learning_rate": 9.93185186822069e-05, "loss": 0.0204, "step": 6000 }, { "epoch": 5.404676258992806, "grad_norm": 0.23766596615314484, "learning_rate": 9.931397681997773e-05, "loss": 0.0153, "step": 6010 }, { "epoch": 5.413669064748201, "grad_norm": 0.424989253282547, "learning_rate": 9.930941997746446e-05, "loss": 0.0149, "step": 6020 }, { "epoch": 5.422661870503597, "grad_norm": 0.18539957702159882, "learning_rate": 9.930484815605134e-05, "loss": 0.0158, "step": 6030 }, { "epoch": 5.431654676258993, "grad_norm": 0.3632741868495941, "learning_rate": 9.930026135712717e-05, "loss": 0.0206, "step": 6040 }, { "epoch": 5.440647482014389, "grad_norm": 0.49324485659599304, "learning_rate": 9.92956595820853e-05, "loss": 0.0153, "step": 6050 }, { "epoch": 5.4496402877697845, "grad_norm": 0.24174678325653076, "learning_rate": 9.929104283232362e-05, "loss": 0.0134, "step": 6060 }, { "epoch": 5.4586330935251794, "grad_norm": 0.3373875319957733, "learning_rate": 9.92864111092446e-05, "loss": 0.0143, "step": 6070 }, { "epoch": 5.467625899280575, "grad_norm": 0.27146610617637634, "learning_rate": 9.92817644142552e-05, "loss": 0.0119, "step": 6080 }, { "epoch": 5.476618705035971, "grad_norm": 0.24645498394966125, "learning_rate": 9.927710274876698e-05, "loss": 0.0152, "step": 6090 }, { "epoch": 5.485611510791367, "grad_norm": 0.22288772463798523, "learning_rate": 9.927242611419603e-05, "loss": 0.0161, "step": 6100 }, { "epoch": 5.494604316546763, "grad_norm": 0.25083184242248535, "learning_rate": 9.926773451196301e-05, "loss": 0.0187, "step": 6110 }, { "epoch": 5.503597122302159, "grad_norm": 0.3066023886203766, "learning_rate": 9.926302794349306e-05, "loss": 0.0139, "step": 6120 }, { "epoch": 5.512589928057554, "grad_norm": 0.30430710315704346, "learning_rate": 9.925830641021594e-05, "loss": 0.0202, "step": 6130 }, { "epoch": 5.5215827338129495, "grad_norm": 0.24677523970603943, "learning_rate": 9.925356991356593e-05, "loss": 0.0149, "step": 6140 }, { "epoch": 5.530575539568345, "grad_norm": 0.27841925621032715, "learning_rate": 9.924881845498184e-05, "loss": 0.0141, "step": 6150 }, { "epoch": 5.539568345323741, "grad_norm": 0.31841206550598145, "learning_rate": 9.924405203590705e-05, "loss": 0.0155, "step": 6160 }, { "epoch": 5.548561151079137, "grad_norm": 0.3492887616157532, "learning_rate": 9.923927065778946e-05, "loss": 0.0145, "step": 6170 }, { "epoch": 5.557553956834532, "grad_norm": 0.3511433005332947, "learning_rate": 9.923447432208154e-05, "loss": 0.0157, "step": 6180 }, { "epoch": 5.566546762589928, "grad_norm": 0.24985907971858978, "learning_rate": 9.922966303024027e-05, "loss": 0.0135, "step": 6190 }, { "epoch": 5.575539568345324, "grad_norm": 0.3522159159183502, "learning_rate": 9.922483678372721e-05, "loss": 0.0187, "step": 6200 }, { "epoch": 5.58453237410072, "grad_norm": 0.23592357337474823, "learning_rate": 9.921999558400845e-05, "loss": 0.0148, "step": 6210 }, { "epoch": 5.593525179856115, "grad_norm": 0.2224595546722412, "learning_rate": 9.92151394325546e-05, "loss": 0.0216, "step": 6220 }, { "epoch": 5.602517985611511, "grad_norm": 0.3880715072154999, "learning_rate": 9.921026833084084e-05, "loss": 0.0158, "step": 6230 }, { "epoch": 5.611510791366906, "grad_norm": 0.25925925374031067, "learning_rate": 9.920538228034689e-05, "loss": 0.0124, "step": 6240 }, { "epoch": 5.620503597122302, "grad_norm": 0.21878887712955475, "learning_rate": 9.920048128255699e-05, "loss": 0.0137, "step": 6250 }, { "epoch": 5.629496402877698, "grad_norm": 0.3869539201259613, "learning_rate": 9.919556533895995e-05, "loss": 0.018, "step": 6260 }, { "epoch": 5.638489208633094, "grad_norm": 0.30304771661758423, "learning_rate": 9.919063445104907e-05, "loss": 0.0166, "step": 6270 }, { "epoch": 5.647482014388489, "grad_norm": 0.33399254083633423, "learning_rate": 9.918568862032227e-05, "loss": 0.0163, "step": 6280 }, { "epoch": 5.656474820143885, "grad_norm": 0.25217851996421814, "learning_rate": 9.918072784828194e-05, "loss": 0.014, "step": 6290 }, { "epoch": 5.66546762589928, "grad_norm": 0.2882864773273468, "learning_rate": 9.917575213643501e-05, "loss": 0.0178, "step": 6300 }, { "epoch": 5.674460431654676, "grad_norm": 0.27046069502830505, "learning_rate": 9.917076148629302e-05, "loss": 0.0127, "step": 6310 }, { "epoch": 5.683453237410072, "grad_norm": 0.2888891398906708, "learning_rate": 9.916575589937196e-05, "loss": 0.0163, "step": 6320 }, { "epoch": 5.692446043165468, "grad_norm": 0.33862489461898804, "learning_rate": 9.916073537719239e-05, "loss": 0.0166, "step": 6330 }, { "epoch": 5.701438848920863, "grad_norm": 0.19077305495738983, "learning_rate": 9.915569992127944e-05, "loss": 0.0151, "step": 6340 }, { "epoch": 5.710431654676259, "grad_norm": 0.2787266969680786, "learning_rate": 9.915064953316273e-05, "loss": 0.0133, "step": 6350 }, { "epoch": 5.719424460431655, "grad_norm": 0.22872456908226013, "learning_rate": 9.914558421437645e-05, "loss": 0.0125, "step": 6360 }, { "epoch": 5.7284172661870505, "grad_norm": 0.3104342520236969, "learning_rate": 9.914050396645929e-05, "loss": 0.0148, "step": 6370 }, { "epoch": 5.737410071942446, "grad_norm": 0.3088624179363251, "learning_rate": 9.913540879095452e-05, "loss": 0.0166, "step": 6380 }, { "epoch": 5.746402877697841, "grad_norm": 0.4277992248535156, "learning_rate": 9.913029868940987e-05, "loss": 0.0185, "step": 6390 }, { "epoch": 5.755395683453237, "grad_norm": 0.3210202753543854, "learning_rate": 9.912517366337772e-05, "loss": 0.016, "step": 6400 }, { "epoch": 5.764388489208633, "grad_norm": 0.2318948209285736, "learning_rate": 9.912003371441487e-05, "loss": 0.0161, "step": 6410 }, { "epoch": 5.773381294964029, "grad_norm": 0.21347112953662872, "learning_rate": 9.911487884408271e-05, "loss": 0.0116, "step": 6420 }, { "epoch": 5.782374100719425, "grad_norm": 0.26153287291526794, "learning_rate": 9.910970905394719e-05, "loss": 0.0127, "step": 6430 }, { "epoch": 5.7913669064748206, "grad_norm": 0.23307397961616516, "learning_rate": 9.91045243455787e-05, "loss": 0.0094, "step": 6440 }, { "epoch": 5.8003597122302155, "grad_norm": 0.173900306224823, "learning_rate": 9.909932472055225e-05, "loss": 0.0148, "step": 6450 }, { "epoch": 5.809352517985611, "grad_norm": 0.1930505633354187, "learning_rate": 9.909411018044734e-05, "loss": 0.0106, "step": 6460 }, { "epoch": 5.818345323741007, "grad_norm": 0.45154842734336853, "learning_rate": 9.908888072684802e-05, "loss": 0.0152, "step": 6470 }, { "epoch": 5.827338129496403, "grad_norm": 0.35664990544319153, "learning_rate": 9.908363636134285e-05, "loss": 0.0125, "step": 6480 }, { "epoch": 5.836330935251799, "grad_norm": 0.30570393800735474, "learning_rate": 9.907837708552493e-05, "loss": 0.0112, "step": 6490 }, { "epoch": 5.845323741007194, "grad_norm": 0.27667075395584106, "learning_rate": 9.90731029009919e-05, "loss": 0.0128, "step": 6500 }, { "epoch": 5.85431654676259, "grad_norm": 0.42370617389678955, "learning_rate": 9.906781380934589e-05, "loss": 0.0237, "step": 6510 }, { "epoch": 5.863309352517986, "grad_norm": 0.42119744420051575, "learning_rate": 9.906250981219362e-05, "loss": 0.0171, "step": 6520 }, { "epoch": 5.872302158273381, "grad_norm": 0.45917198061943054, "learning_rate": 9.905719091114628e-05, "loss": 0.0133, "step": 6530 }, { "epoch": 5.881294964028777, "grad_norm": 0.4611833393573761, "learning_rate": 9.905185710781964e-05, "loss": 0.0186, "step": 6540 }, { "epoch": 5.890287769784173, "grad_norm": 0.19112852215766907, "learning_rate": 9.904650840383392e-05, "loss": 0.0166, "step": 6550 }, { "epoch": 5.899280575539568, "grad_norm": 0.320197731256485, "learning_rate": 9.904114480081397e-05, "loss": 0.0124, "step": 6560 }, { "epoch": 5.908273381294964, "grad_norm": 0.4434064030647278, "learning_rate": 9.903576630038906e-05, "loss": 0.0208, "step": 6570 }, { "epoch": 5.91726618705036, "grad_norm": 0.28785908222198486, "learning_rate": 9.903037290419309e-05, "loss": 0.0155, "step": 6580 }, { "epoch": 5.926258992805756, "grad_norm": 0.1989692747592926, "learning_rate": 9.902496461386439e-05, "loss": 0.0145, "step": 6590 }, { "epoch": 5.935251798561151, "grad_norm": 0.3544127643108368, "learning_rate": 9.901954143104588e-05, "loss": 0.0146, "step": 6600 }, { "epoch": 5.944244604316546, "grad_norm": 0.291659951210022, "learning_rate": 9.901410335738496e-05, "loss": 0.0114, "step": 6610 }, { "epoch": 5.953237410071942, "grad_norm": 0.4676528573036194, "learning_rate": 9.900865039453358e-05, "loss": 0.0197, "step": 6620 }, { "epoch": 5.962230215827338, "grad_norm": 0.4087923765182495, "learning_rate": 9.900318254414821e-05, "loss": 0.016, "step": 6630 }, { "epoch": 5.971223021582734, "grad_norm": 0.32116150856018066, "learning_rate": 9.899769980788985e-05, "loss": 0.0159, "step": 6640 }, { "epoch": 5.98021582733813, "grad_norm": 0.3633772134780884, "learning_rate": 9.899220218742398e-05, "loss": 0.0153, "step": 6650 }, { "epoch": 5.989208633093525, "grad_norm": 0.4792759120464325, "learning_rate": 9.898668968442066e-05, "loss": 0.0196, "step": 6660 }, { "epoch": 5.998201438848921, "grad_norm": 0.32364293932914734, "learning_rate": 9.898116230055443e-05, "loss": 0.0156, "step": 6670 }, { "epoch": 6.0071942446043165, "grad_norm": 0.3009689450263977, "learning_rate": 9.897562003750437e-05, "loss": 0.0134, "step": 6680 }, { "epoch": 6.016187050359712, "grad_norm": 0.30460378527641296, "learning_rate": 9.897006289695407e-05, "loss": 0.0184, "step": 6690 }, { "epoch": 6.025179856115108, "grad_norm": 0.36655426025390625, "learning_rate": 9.896449088059164e-05, "loss": 0.0213, "step": 6700 }, { "epoch": 6.034172661870503, "grad_norm": 0.36697837710380554, "learning_rate": 9.89589039901097e-05, "loss": 0.0158, "step": 6710 }, { "epoch": 6.043165467625899, "grad_norm": 0.3533744812011719, "learning_rate": 9.895330222720542e-05, "loss": 0.0152, "step": 6720 }, { "epoch": 6.052158273381295, "grad_norm": 0.2771022915840149, "learning_rate": 9.894768559358047e-05, "loss": 0.0244, "step": 6730 }, { "epoch": 6.061151079136691, "grad_norm": 0.4142623245716095, "learning_rate": 9.894205409094101e-05, "loss": 0.0236, "step": 6740 }, { "epoch": 6.070143884892087, "grad_norm": 0.29286596179008484, "learning_rate": 9.893640772099777e-05, "loss": 0.0173, "step": 6750 }, { "epoch": 6.079136690647482, "grad_norm": 0.38816964626312256, "learning_rate": 9.893074648546595e-05, "loss": 0.0237, "step": 6760 }, { "epoch": 6.088129496402877, "grad_norm": 0.2403395026922226, "learning_rate": 9.892507038606528e-05, "loss": 0.0201, "step": 6770 }, { "epoch": 6.097122302158273, "grad_norm": 0.3101983070373535, "learning_rate": 9.891937942452003e-05, "loss": 0.0169, "step": 6780 }, { "epoch": 6.106115107913669, "grad_norm": 0.22705335915088654, "learning_rate": 9.891367360255895e-05, "loss": 0.0134, "step": 6790 }, { "epoch": 6.115107913669065, "grad_norm": 0.2799359858036041, "learning_rate": 9.890795292191532e-05, "loss": 0.0219, "step": 6800 }, { "epoch": 6.124100719424461, "grad_norm": 0.3121403455734253, "learning_rate": 9.890221738432694e-05, "loss": 0.0172, "step": 6810 }, { "epoch": 6.133093525179856, "grad_norm": 0.3237219452857971, "learning_rate": 9.88964669915361e-05, "loss": 0.0142, "step": 6820 }, { "epoch": 6.142086330935252, "grad_norm": 0.3976234495639801, "learning_rate": 9.889070174528963e-05, "loss": 0.0194, "step": 6830 }, { "epoch": 6.151079136690647, "grad_norm": 0.24991706013679504, "learning_rate": 9.888492164733883e-05, "loss": 0.0137, "step": 6840 }, { "epoch": 6.160071942446043, "grad_norm": 0.3595796823501587, "learning_rate": 9.88791266994396e-05, "loss": 0.0157, "step": 6850 }, { "epoch": 6.169064748201439, "grad_norm": 0.24214684963226318, "learning_rate": 9.887331690335223e-05, "loss": 0.013, "step": 6860 }, { "epoch": 6.178057553956835, "grad_norm": 0.4026492238044739, "learning_rate": 9.886749226084163e-05, "loss": 0.0135, "step": 6870 }, { "epoch": 6.18705035971223, "grad_norm": 0.3170332908630371, "learning_rate": 9.886165277367714e-05, "loss": 0.0221, "step": 6880 }, { "epoch": 6.196043165467626, "grad_norm": 0.3472321033477783, "learning_rate": 9.885579844363265e-05, "loss": 0.0126, "step": 6890 }, { "epoch": 6.205035971223022, "grad_norm": 0.29172077775001526, "learning_rate": 9.884992927248656e-05, "loss": 0.0149, "step": 6900 }, { "epoch": 6.2140287769784175, "grad_norm": 0.3213501274585724, "learning_rate": 9.884404526202178e-05, "loss": 0.0197, "step": 6910 }, { "epoch": 6.223021582733813, "grad_norm": 0.3424447774887085, "learning_rate": 9.883814641402568e-05, "loss": 0.0171, "step": 6920 }, { "epoch": 6.232014388489208, "grad_norm": 0.22430939972400665, "learning_rate": 9.88322327302902e-05, "loss": 0.0125, "step": 6930 }, { "epoch": 6.241007194244604, "grad_norm": 0.10095209628343582, "learning_rate": 9.882630421261176e-05, "loss": 0.012, "step": 6940 }, { "epoch": 6.25, "grad_norm": 0.1595180630683899, "learning_rate": 9.88203608627913e-05, "loss": 0.0132, "step": 6950 }, { "epoch": 6.258992805755396, "grad_norm": 0.2889959216117859, "learning_rate": 9.881440268263422e-05, "loss": 0.0139, "step": 6960 }, { "epoch": 6.267985611510792, "grad_norm": 0.3449006676673889, "learning_rate": 9.880842967395048e-05, "loss": 0.014, "step": 6970 }, { "epoch": 6.276978417266187, "grad_norm": 0.33164921402931213, "learning_rate": 9.880244183855452e-05, "loss": 0.0157, "step": 6980 }, { "epoch": 6.2859712230215825, "grad_norm": 0.2966235876083374, "learning_rate": 9.879643917826527e-05, "loss": 0.0234, "step": 6990 }, { "epoch": 6.294964028776978, "grad_norm": 0.23807089030742645, "learning_rate": 9.87904216949062e-05, "loss": 0.0146, "step": 7000 }, { "epoch": 6.303956834532374, "grad_norm": 0.3713900148868561, "learning_rate": 9.878438939030526e-05, "loss": 0.0163, "step": 7010 }, { "epoch": 6.31294964028777, "grad_norm": 0.24591763317584991, "learning_rate": 9.877834226629489e-05, "loss": 0.0141, "step": 7020 }, { "epoch": 6.321942446043165, "grad_norm": 0.19404545426368713, "learning_rate": 9.877228032471206e-05, "loss": 0.0123, "step": 7030 }, { "epoch": 6.330935251798561, "grad_norm": 0.21122264862060547, "learning_rate": 9.876620356739823e-05, "loss": 0.0121, "step": 7040 }, { "epoch": 6.339928057553957, "grad_norm": 0.1931176781654358, "learning_rate": 9.876011199619935e-05, "loss": 0.0136, "step": 7050 }, { "epoch": 6.348920863309353, "grad_norm": 0.3102763891220093, "learning_rate": 9.875400561296589e-05, "loss": 0.0123, "step": 7060 }, { "epoch": 6.357913669064748, "grad_norm": 0.23333987593650818, "learning_rate": 9.874788441955278e-05, "loss": 0.0158, "step": 7070 }, { "epoch": 6.366906474820144, "grad_norm": 0.35731762647628784, "learning_rate": 9.874174841781951e-05, "loss": 0.0172, "step": 7080 }, { "epoch": 6.375899280575539, "grad_norm": 0.4169794023036957, "learning_rate": 9.873559760963003e-05, "loss": 0.0146, "step": 7090 }, { "epoch": 6.384892086330935, "grad_norm": 0.3385731279850006, "learning_rate": 9.872943199685278e-05, "loss": 0.0147, "step": 7100 }, { "epoch": 6.393884892086331, "grad_norm": 0.3438441753387451, "learning_rate": 9.872325158136071e-05, "loss": 0.0185, "step": 7110 }, { "epoch": 6.402877697841727, "grad_norm": 0.4320373833179474, "learning_rate": 9.871705636503128e-05, "loss": 0.0132, "step": 7120 }, { "epoch": 6.411870503597123, "grad_norm": 0.315799742937088, "learning_rate": 9.871084634974641e-05, "loss": 0.016, "step": 7130 }, { "epoch": 6.420863309352518, "grad_norm": 0.39828556776046753, "learning_rate": 9.870462153739257e-05, "loss": 0.019, "step": 7140 }, { "epoch": 6.429856115107913, "grad_norm": 0.19940787553787231, "learning_rate": 9.869838192986067e-05, "loss": 0.0136, "step": 7150 }, { "epoch": 6.438848920863309, "grad_norm": 0.285359650850296, "learning_rate": 9.869212752904616e-05, "loss": 0.0186, "step": 7160 }, { "epoch": 6.447841726618705, "grad_norm": 0.3437132239341736, "learning_rate": 9.868585833684894e-05, "loss": 0.0137, "step": 7170 }, { "epoch": 6.456834532374101, "grad_norm": 0.27626630663871765, "learning_rate": 9.867957435517342e-05, "loss": 0.0116, "step": 7180 }, { "epoch": 6.465827338129497, "grad_norm": 0.3694213628768921, "learning_rate": 9.867327558592854e-05, "loss": 0.0145, "step": 7190 }, { "epoch": 6.474820143884892, "grad_norm": 0.4571789801120758, "learning_rate": 9.866696203102766e-05, "loss": 0.0185, "step": 7200 }, { "epoch": 6.483812949640288, "grad_norm": 0.28595098853111267, "learning_rate": 9.86606336923887e-05, "loss": 0.0126, "step": 7210 }, { "epoch": 6.4928057553956835, "grad_norm": 0.3039836585521698, "learning_rate": 9.865429057193403e-05, "loss": 0.0122, "step": 7220 }, { "epoch": 6.501798561151079, "grad_norm": 0.3013346791267395, "learning_rate": 9.864793267159053e-05, "loss": 0.0138, "step": 7230 }, { "epoch": 6.510791366906475, "grad_norm": 0.2693064212799072, "learning_rate": 9.864155999328957e-05, "loss": 0.013, "step": 7240 }, { "epoch": 6.51978417266187, "grad_norm": 0.3500765860080719, "learning_rate": 9.8635172538967e-05, "loss": 0.0156, "step": 7250 }, { "epoch": 6.528776978417266, "grad_norm": 0.28568246960639954, "learning_rate": 9.862877031056312e-05, "loss": 0.0139, "step": 7260 }, { "epoch": 6.537769784172662, "grad_norm": 0.32591512799263, "learning_rate": 9.862235331002279e-05, "loss": 0.0204, "step": 7270 }, { "epoch": 6.546762589928058, "grad_norm": 0.538131833076477, "learning_rate": 9.861592153929533e-05, "loss": 0.0134, "step": 7280 }, { "epoch": 6.555755395683454, "grad_norm": 0.316620409488678, "learning_rate": 9.860947500033455e-05, "loss": 0.0142, "step": 7290 }, { "epoch": 6.564748201438849, "grad_norm": 0.37656036019325256, "learning_rate": 9.86030136950987e-05, "loss": 0.0197, "step": 7300 }, { "epoch": 6.573741007194244, "grad_norm": 0.3357205390930176, "learning_rate": 9.85965376255506e-05, "loss": 0.0126, "step": 7310 }, { "epoch": 6.58273381294964, "grad_norm": 0.3696017861366272, "learning_rate": 9.859004679365747e-05, "loss": 0.0144, "step": 7320 }, { "epoch": 6.591726618705036, "grad_norm": 0.28822624683380127, "learning_rate": 9.858354120139108e-05, "loss": 0.0135, "step": 7330 }, { "epoch": 6.600719424460432, "grad_norm": 0.3708309531211853, "learning_rate": 9.857702085072764e-05, "loss": 0.0154, "step": 7340 }, { "epoch": 6.609712230215827, "grad_norm": 0.36960479617118835, "learning_rate": 9.857048574364787e-05, "loss": 0.0162, "step": 7350 }, { "epoch": 6.618705035971223, "grad_norm": 0.13417215645313263, "learning_rate": 9.856393588213698e-05, "loss": 0.0173, "step": 7360 }, { "epoch": 6.627697841726619, "grad_norm": 0.361968994140625, "learning_rate": 9.855737126818458e-05, "loss": 0.0166, "step": 7370 }, { "epoch": 6.636690647482014, "grad_norm": 0.2264057993888855, "learning_rate": 9.855079190378491e-05, "loss": 0.0146, "step": 7380 }, { "epoch": 6.64568345323741, "grad_norm": 0.31936004757881165, "learning_rate": 9.854419779093655e-05, "loss": 0.0145, "step": 7390 }, { "epoch": 6.654676258992806, "grad_norm": 0.3049984574317932, "learning_rate": 9.853758893164264e-05, "loss": 0.0116, "step": 7400 }, { "epoch": 6.663669064748201, "grad_norm": 0.276628315448761, "learning_rate": 9.853096532791078e-05, "loss": 0.0139, "step": 7410 }, { "epoch": 6.672661870503597, "grad_norm": 0.251189649105072, "learning_rate": 9.852432698175304e-05, "loss": 0.0219, "step": 7420 }, { "epoch": 6.681654676258993, "grad_norm": 0.3853735029697418, "learning_rate": 9.851767389518597e-05, "loss": 0.016, "step": 7430 }, { "epoch": 6.690647482014389, "grad_norm": 0.32619646191596985, "learning_rate": 9.85110060702306e-05, "loss": 0.0139, "step": 7440 }, { "epoch": 6.6996402877697845, "grad_norm": 0.27305883169174194, "learning_rate": 9.850432350891245e-05, "loss": 0.0111, "step": 7450 }, { "epoch": 6.7086330935251794, "grad_norm": 0.23285508155822754, "learning_rate": 9.84976262132615e-05, "loss": 0.0135, "step": 7460 }, { "epoch": 6.717625899280575, "grad_norm": 0.2801623046398163, "learning_rate": 9.849091418531222e-05, "loss": 0.0143, "step": 7470 }, { "epoch": 6.726618705035971, "grad_norm": 0.2926919460296631, "learning_rate": 9.848418742710353e-05, "loss": 0.02, "step": 7480 }, { "epoch": 6.735611510791367, "grad_norm": 0.2273934930562973, "learning_rate": 9.847744594067885e-05, "loss": 0.0133, "step": 7490 }, { "epoch": 6.744604316546763, "grad_norm": 0.21343384683132172, "learning_rate": 9.847068972808607e-05, "loss": 0.0143, "step": 7500 }, { "epoch": 6.753597122302159, "grad_norm": 0.3092547059059143, "learning_rate": 9.846391879137756e-05, "loss": 0.0109, "step": 7510 }, { "epoch": 6.762589928057554, "grad_norm": 0.18325723707675934, "learning_rate": 9.845713313261012e-05, "loss": 0.018, "step": 7520 }, { "epoch": 6.7715827338129495, "grad_norm": 0.3062940239906311, "learning_rate": 9.845033275384505e-05, "loss": 0.0214, "step": 7530 }, { "epoch": 6.780575539568345, "grad_norm": 0.3281556963920593, "learning_rate": 9.844351765714818e-05, "loss": 0.0147, "step": 7540 }, { "epoch": 6.789568345323741, "grad_norm": 0.24128341674804688, "learning_rate": 9.843668784458971e-05, "loss": 0.0166, "step": 7550 }, { "epoch": 6.798561151079137, "grad_norm": 0.30157041549682617, "learning_rate": 9.842984331824437e-05, "loss": 0.0125, "step": 7560 }, { "epoch": 6.807553956834532, "grad_norm": 0.21583667397499084, "learning_rate": 9.842298408019133e-05, "loss": 0.0109, "step": 7570 }, { "epoch": 6.816546762589928, "grad_norm": 0.28148478269577026, "learning_rate": 9.841611013251429e-05, "loss": 0.0147, "step": 7580 }, { "epoch": 6.825539568345324, "grad_norm": 0.17182746529579163, "learning_rate": 9.840922147730133e-05, "loss": 0.0159, "step": 7590 }, { "epoch": 6.83453237410072, "grad_norm": 0.30041787028312683, "learning_rate": 9.840231811664506e-05, "loss": 0.0145, "step": 7600 }, { "epoch": 6.843525179856115, "grad_norm": 0.2676701247692108, "learning_rate": 9.839540005264252e-05, "loss": 0.013, "step": 7610 }, { "epoch": 6.852517985611511, "grad_norm": 0.2847594916820526, "learning_rate": 9.838846728739527e-05, "loss": 0.0105, "step": 7620 }, { "epoch": 6.861510791366906, "grad_norm": 0.27255842089653015, "learning_rate": 9.838151982300927e-05, "loss": 0.0207, "step": 7630 }, { "epoch": 6.870503597122302, "grad_norm": 0.16523227095603943, "learning_rate": 9.8374557661595e-05, "loss": 0.011, "step": 7640 }, { "epoch": 6.879496402877698, "grad_norm": 0.2820015251636505, "learning_rate": 9.836758080526735e-05, "loss": 0.018, "step": 7650 }, { "epoch": 6.888489208633094, "grad_norm": 0.1901496946811676, "learning_rate": 9.836058925614575e-05, "loss": 0.0146, "step": 7660 }, { "epoch": 6.897482014388489, "grad_norm": 0.24775324761867523, "learning_rate": 9.8353583016354e-05, "loss": 0.014, "step": 7670 }, { "epoch": 6.906474820143885, "grad_norm": 0.275926798582077, "learning_rate": 9.834656208802044e-05, "loss": 0.0151, "step": 7680 }, { "epoch": 6.91546762589928, "grad_norm": 0.3123582601547241, "learning_rate": 9.833952647327784e-05, "loss": 0.0152, "step": 7690 }, { "epoch": 6.924460431654676, "grad_norm": 0.368440181016922, "learning_rate": 9.833247617426342e-05, "loss": 0.0141, "step": 7700 }, { "epoch": 6.933453237410072, "grad_norm": 0.3985920548439026, "learning_rate": 9.832541119311889e-05, "loss": 0.0147, "step": 7710 }, { "epoch": 6.942446043165468, "grad_norm": 0.31502053141593933, "learning_rate": 9.83183315319904e-05, "loss": 0.0191, "step": 7720 }, { "epoch": 6.951438848920863, "grad_norm": 0.30114054679870605, "learning_rate": 9.831123719302855e-05, "loss": 0.0157, "step": 7730 }, { "epoch": 6.960431654676259, "grad_norm": 0.3541383147239685, "learning_rate": 9.830412817838842e-05, "loss": 0.0143, "step": 7740 }, { "epoch": 6.969424460431655, "grad_norm": 0.24623307585716248, "learning_rate": 9.829700449022956e-05, "loss": 0.0151, "step": 7750 }, { "epoch": 6.9784172661870505, "grad_norm": 0.26751166582107544, "learning_rate": 9.828986613071593e-05, "loss": 0.0121, "step": 7760 }, { "epoch": 6.987410071942446, "grad_norm": 0.24560123682022095, "learning_rate": 9.828271310201601e-05, "loss": 0.0113, "step": 7770 }, { "epoch": 6.996402877697841, "grad_norm": 0.22223839163780212, "learning_rate": 9.827554540630268e-05, "loss": 0.0113, "step": 7780 }, { "epoch": 7.005395683453237, "grad_norm": 0.22892551124095917, "learning_rate": 9.826836304575329e-05, "loss": 0.0165, "step": 7790 }, { "epoch": 7.014388489208633, "grad_norm": 0.1896945834159851, "learning_rate": 9.826116602254966e-05, "loss": 0.0101, "step": 7800 }, { "epoch": 7.023381294964029, "grad_norm": 0.1912059187889099, "learning_rate": 9.825395433887805e-05, "loss": 0.0128, "step": 7810 }, { "epoch": 7.032374100719425, "grad_norm": 0.24248164892196655, "learning_rate": 9.824672799692917e-05, "loss": 0.0125, "step": 7820 }, { "epoch": 7.0413669064748206, "grad_norm": 0.42050978541374207, "learning_rate": 9.823948699889823e-05, "loss": 0.0149, "step": 7830 }, { "epoch": 7.0503597122302155, "grad_norm": 0.24977348744869232, "learning_rate": 9.823223134698483e-05, "loss": 0.0116, "step": 7840 }, { "epoch": 7.059352517985611, "grad_norm": 0.4667723774909973, "learning_rate": 9.822496104339303e-05, "loss": 0.0141, "step": 7850 }, { "epoch": 7.068345323741007, "grad_norm": 0.4929187595844269, "learning_rate": 9.821767609033138e-05, "loss": 0.0138, "step": 7860 }, { "epoch": 7.077338129496403, "grad_norm": 0.44623827934265137, "learning_rate": 9.821037649001284e-05, "loss": 0.0135, "step": 7870 }, { "epoch": 7.086330935251799, "grad_norm": 0.353865385055542, "learning_rate": 9.820306224465486e-05, "loss": 0.0141, "step": 7880 }, { "epoch": 7.095323741007194, "grad_norm": 0.2270999550819397, "learning_rate": 9.819573335647928e-05, "loss": 0.0134, "step": 7890 }, { "epoch": 7.10431654676259, "grad_norm": 0.3132058084011078, "learning_rate": 9.818838982771246e-05, "loss": 0.0199, "step": 7900 }, { "epoch": 7.113309352517986, "grad_norm": 0.32158640027046204, "learning_rate": 9.818103166058514e-05, "loss": 0.0129, "step": 7910 }, { "epoch": 7.122302158273381, "grad_norm": 0.23491990566253662, "learning_rate": 9.817365885733254e-05, "loss": 0.0139, "step": 7920 }, { "epoch": 7.131294964028777, "grad_norm": 0.23369397222995758, "learning_rate": 9.816627142019434e-05, "loss": 0.0137, "step": 7930 }, { "epoch": 7.140287769784172, "grad_norm": 0.3525892198085785, "learning_rate": 9.815886935141463e-05, "loss": 0.0175, "step": 7940 }, { "epoch": 7.149280575539568, "grad_norm": 0.23105253279209137, "learning_rate": 9.8151452653242e-05, "loss": 0.0132, "step": 7950 }, { "epoch": 7.158273381294964, "grad_norm": 0.3227084279060364, "learning_rate": 9.814402132792939e-05, "loss": 0.0142, "step": 7960 }, { "epoch": 7.16726618705036, "grad_norm": 0.1596461981534958, "learning_rate": 9.813657537773428e-05, "loss": 0.0154, "step": 7970 }, { "epoch": 7.176258992805756, "grad_norm": 0.18190278112888336, "learning_rate": 9.812911480491854e-05, "loss": 0.0128, "step": 7980 }, { "epoch": 7.1852517985611515, "grad_norm": 0.19840726256370544, "learning_rate": 9.81216396117485e-05, "loss": 0.0132, "step": 7990 }, { "epoch": 7.194244604316546, "grad_norm": 0.31567707657814026, "learning_rate": 9.811414980049491e-05, "loss": 0.0152, "step": 8000 }, { "epoch": 7.203237410071942, "grad_norm": 0.307396799325943, "learning_rate": 9.810664537343301e-05, "loss": 0.0141, "step": 8010 }, { "epoch": 7.212230215827338, "grad_norm": 0.25621482729911804, "learning_rate": 9.809912633284243e-05, "loss": 0.0127, "step": 8020 }, { "epoch": 7.221223021582734, "grad_norm": 0.18184679746627808, "learning_rate": 9.809159268100725e-05, "loss": 0.0109, "step": 8030 }, { "epoch": 7.23021582733813, "grad_norm": 0.22988209128379822, "learning_rate": 9.808404442021599e-05, "loss": 0.0132, "step": 8040 }, { "epoch": 7.239208633093525, "grad_norm": 0.21264107525348663, "learning_rate": 9.807648155276163e-05, "loss": 0.014, "step": 8050 }, { "epoch": 7.248201438848921, "grad_norm": 0.28661543130874634, "learning_rate": 9.806890408094156e-05, "loss": 0.0147, "step": 8060 }, { "epoch": 7.2571942446043165, "grad_norm": 0.24012288451194763, "learning_rate": 9.806131200705761e-05, "loss": 0.0118, "step": 8070 }, { "epoch": 7.266187050359712, "grad_norm": 0.15828709304332733, "learning_rate": 9.805370533341605e-05, "loss": 0.0151, "step": 8080 }, { "epoch": 7.275179856115108, "grad_norm": 0.1906590312719345, "learning_rate": 9.804608406232762e-05, "loss": 0.0122, "step": 8090 }, { "epoch": 7.284172661870503, "grad_norm": 0.2992948889732361, "learning_rate": 9.803844819610741e-05, "loss": 0.0134, "step": 8100 }, { "epoch": 7.293165467625899, "grad_norm": 0.28639718890190125, "learning_rate": 9.803079773707504e-05, "loss": 0.0139, "step": 8110 }, { "epoch": 7.302158273381295, "grad_norm": 0.19264866411685944, "learning_rate": 9.802313268755447e-05, "loss": 0.0172, "step": 8120 }, { "epoch": 7.311151079136691, "grad_norm": 0.2826678156852722, "learning_rate": 9.801545304987419e-05, "loss": 0.0147, "step": 8130 }, { "epoch": 7.320143884892087, "grad_norm": 0.20837731659412384, "learning_rate": 9.800775882636704e-05, "loss": 0.0133, "step": 8140 }, { "epoch": 7.329136690647482, "grad_norm": 0.18511268496513367, "learning_rate": 9.800005001937034e-05, "loss": 0.0095, "step": 8150 }, { "epoch": 7.338129496402877, "grad_norm": 0.34261709451675415, "learning_rate": 9.79923266312258e-05, "loss": 0.0132, "step": 8160 }, { "epoch": 7.347122302158273, "grad_norm": 0.2577700912952423, "learning_rate": 9.79845886642796e-05, "loss": 0.0142, "step": 8170 }, { "epoch": 7.356115107913669, "grad_norm": 0.2687057852745056, "learning_rate": 9.797683612088233e-05, "loss": 0.0127, "step": 8180 }, { "epoch": 7.365107913669065, "grad_norm": 0.22334130108356476, "learning_rate": 9.796906900338898e-05, "loss": 0.0153, "step": 8190 }, { "epoch": 7.374100719424461, "grad_norm": 0.33839115500450134, "learning_rate": 9.796128731415903e-05, "loss": 0.0166, "step": 8200 }, { "epoch": 7.383093525179856, "grad_norm": 0.24766181409358978, "learning_rate": 9.795349105555634e-05, "loss": 0.0138, "step": 8210 }, { "epoch": 7.392086330935252, "grad_norm": 0.3434305489063263, "learning_rate": 9.794568022994922e-05, "loss": 0.0148, "step": 8220 }, { "epoch": 7.401079136690647, "grad_norm": 0.33678603172302246, "learning_rate": 9.793785483971034e-05, "loss": 0.0138, "step": 8230 }, { "epoch": 7.410071942446043, "grad_norm": 0.3088516891002655, "learning_rate": 9.793001488721691e-05, "loss": 0.0149, "step": 8240 }, { "epoch": 7.419064748201439, "grad_norm": 0.33419087529182434, "learning_rate": 9.792216037485047e-05, "loss": 0.0119, "step": 8250 }, { "epoch": 7.428057553956835, "grad_norm": 0.20412872731685638, "learning_rate": 9.791429130499704e-05, "loss": 0.0131, "step": 8260 }, { "epoch": 7.43705035971223, "grad_norm": 0.269997775554657, "learning_rate": 9.790640768004698e-05, "loss": 0.0143, "step": 8270 }, { "epoch": 7.446043165467626, "grad_norm": 0.2657114267349243, "learning_rate": 9.789850950239518e-05, "loss": 0.0149, "step": 8280 }, { "epoch": 7.455035971223022, "grad_norm": 0.1736127883195877, "learning_rate": 9.789059677444089e-05, "loss": 0.0164, "step": 8290 }, { "epoch": 7.4640287769784175, "grad_norm": 0.21796011924743652, "learning_rate": 9.788266949858776e-05, "loss": 0.0112, "step": 8300 }, { "epoch": 7.473021582733813, "grad_norm": 0.37243854999542236, "learning_rate": 9.787472767724392e-05, "loss": 0.01, "step": 8310 }, { "epoch": 7.482014388489208, "grad_norm": 0.35739970207214355, "learning_rate": 9.786677131282185e-05, "loss": 0.0145, "step": 8320 }, { "epoch": 7.491007194244604, "grad_norm": 0.19709472358226776, "learning_rate": 9.785880040773853e-05, "loss": 0.0142, "step": 8330 }, { "epoch": 7.5, "grad_norm": 0.2555290460586548, "learning_rate": 9.785081496441527e-05, "loss": 0.0198, "step": 8340 }, { "epoch": 7.508992805755396, "grad_norm": 0.3664574921131134, "learning_rate": 9.784281498527785e-05, "loss": 0.0122, "step": 8350 }, { "epoch": 7.517985611510792, "grad_norm": 0.307015061378479, "learning_rate": 9.783480047275646e-05, "loss": 0.0194, "step": 8360 }, { "epoch": 7.5269784172661875, "grad_norm": 0.2628996670246124, "learning_rate": 9.78267714292857e-05, "loss": 0.0182, "step": 8370 }, { "epoch": 7.5359712230215825, "grad_norm": 0.2691592574119568, "learning_rate": 9.781872785730454e-05, "loss": 0.0136, "step": 8380 }, { "epoch": 7.544964028776978, "grad_norm": 0.22525444626808167, "learning_rate": 9.781066975925646e-05, "loss": 0.013, "step": 8390 }, { "epoch": 7.553956834532374, "grad_norm": 0.2467961460351944, "learning_rate": 9.780259713758928e-05, "loss": 0.0137, "step": 8400 }, { "epoch": 7.56294964028777, "grad_norm": 0.3243885338306427, "learning_rate": 9.779450999475524e-05, "loss": 0.0127, "step": 8410 }, { "epoch": 7.571942446043165, "grad_norm": 0.24511153995990753, "learning_rate": 9.7786408333211e-05, "loss": 0.0151, "step": 8420 }, { "epoch": 7.580935251798561, "grad_norm": 0.22888602316379547, "learning_rate": 9.777829215541764e-05, "loss": 0.0156, "step": 8430 }, { "epoch": 7.589928057553957, "grad_norm": 0.1968606561422348, "learning_rate": 9.777016146384064e-05, "loss": 0.0113, "step": 8440 }, { "epoch": 7.598920863309353, "grad_norm": 0.24817785620689392, "learning_rate": 9.776201626094988e-05, "loss": 0.0125, "step": 8450 }, { "epoch": 7.607913669064748, "grad_norm": 0.2986956536769867, "learning_rate": 9.775385654921965e-05, "loss": 0.0123, "step": 8460 }, { "epoch": 7.616906474820144, "grad_norm": 0.27078360319137573, "learning_rate": 9.774568233112868e-05, "loss": 0.0155, "step": 8470 }, { "epoch": 7.625899280575539, "grad_norm": 0.35625869035720825, "learning_rate": 9.773749360916007e-05, "loss": 0.0161, "step": 8480 }, { "epoch": 7.634892086330935, "grad_norm": 0.26284265518188477, "learning_rate": 9.772929038580134e-05, "loss": 0.0149, "step": 8490 }, { "epoch": 7.643884892086331, "grad_norm": 0.47807517647743225, "learning_rate": 9.772107266354439e-05, "loss": 0.0173, "step": 8500 }, { "epoch": 7.652877697841727, "grad_norm": 0.3621053695678711, "learning_rate": 9.77128404448856e-05, "loss": 0.0204, "step": 8510 }, { "epoch": 7.661870503597123, "grad_norm": 0.2538863718509674, "learning_rate": 9.770459373232565e-05, "loss": 0.0117, "step": 8520 }, { "epoch": 7.670863309352518, "grad_norm": 0.35613712668418884, "learning_rate": 9.769633252836969e-05, "loss": 0.0165, "step": 8530 }, { "epoch": 7.679856115107913, "grad_norm": 0.28326377272605896, "learning_rate": 9.768805683552724e-05, "loss": 0.0107, "step": 8540 }, { "epoch": 7.688848920863309, "grad_norm": 0.23828333616256714, "learning_rate": 9.767976665631228e-05, "loss": 0.012, "step": 8550 }, { "epoch": 7.697841726618705, "grad_norm": 0.25042346119880676, "learning_rate": 9.767146199324311e-05, "loss": 0.0136, "step": 8560 }, { "epoch": 7.706834532374101, "grad_norm": 0.2950842082500458, "learning_rate": 9.766314284884249e-05, "loss": 0.0109, "step": 8570 }, { "epoch": 7.715827338129497, "grad_norm": 0.3001496493816376, "learning_rate": 9.765480922563752e-05, "loss": 0.0143, "step": 8580 }, { "epoch": 7.724820143884892, "grad_norm": 0.3203628957271576, "learning_rate": 9.764646112615978e-05, "loss": 0.0148, "step": 8590 }, { "epoch": 7.733812949640288, "grad_norm": 0.33481013774871826, "learning_rate": 9.763809855294517e-05, "loss": 0.0138, "step": 8600 }, { "epoch": 7.7428057553956835, "grad_norm": 0.24260449409484863, "learning_rate": 9.762972150853404e-05, "loss": 0.012, "step": 8610 }, { "epoch": 7.751798561151079, "grad_norm": 0.37393447756767273, "learning_rate": 9.762132999547111e-05, "loss": 0.012, "step": 8620 }, { "epoch": 7.760791366906475, "grad_norm": 0.2974550426006317, "learning_rate": 9.761292401630549e-05, "loss": 0.0179, "step": 8630 }, { "epoch": 7.76978417266187, "grad_norm": 0.18205220997333527, "learning_rate": 9.76045035735907e-05, "loss": 0.01, "step": 8640 }, { "epoch": 7.778776978417266, "grad_norm": 0.17864660918712616, "learning_rate": 9.759606866988464e-05, "loss": 0.0132, "step": 8650 }, { "epoch": 7.787769784172662, "grad_norm": 0.28401514887809753, "learning_rate": 9.758761930774963e-05, "loss": 0.0089, "step": 8660 }, { "epoch": 7.796762589928058, "grad_norm": 0.24392332136631012, "learning_rate": 9.757915548975235e-05, "loss": 0.0101, "step": 8670 }, { "epoch": 7.805755395683454, "grad_norm": 0.2712804675102234, "learning_rate": 9.757067721846389e-05, "loss": 0.0134, "step": 8680 }, { "epoch": 7.814748201438849, "grad_norm": 0.22601334750652313, "learning_rate": 9.756218449645971e-05, "loss": 0.0119, "step": 8690 }, { "epoch": 7.823741007194244, "grad_norm": 0.3393121063709259, "learning_rate": 9.75536773263197e-05, "loss": 0.0125, "step": 8700 }, { "epoch": 7.83273381294964, "grad_norm": 0.335697740316391, "learning_rate": 9.75451557106281e-05, "loss": 0.0101, "step": 8710 }, { "epoch": 7.841726618705036, "grad_norm": 0.29606300592422485, "learning_rate": 9.753661965197354e-05, "loss": 0.0118, "step": 8720 }, { "epoch": 7.850719424460432, "grad_norm": 0.20096386969089508, "learning_rate": 9.752806915294908e-05, "loss": 0.0122, "step": 8730 }, { "epoch": 7.859712230215827, "grad_norm": 0.3253609240055084, "learning_rate": 9.75195042161521e-05, "loss": 0.0121, "step": 8740 }, { "epoch": 7.868705035971223, "grad_norm": 0.2798093259334564, "learning_rate": 9.751092484418442e-05, "loss": 0.011, "step": 8750 }, { "epoch": 7.877697841726619, "grad_norm": 0.21442589163780212, "learning_rate": 9.750233103965224e-05, "loss": 0.0139, "step": 8760 }, { "epoch": 7.886690647482014, "grad_norm": 0.23417450487613678, "learning_rate": 9.749372280516611e-05, "loss": 0.0151, "step": 8770 }, { "epoch": 7.89568345323741, "grad_norm": 0.32738709449768066, "learning_rate": 9.748510014334097e-05, "loss": 0.0187, "step": 8780 }, { "epoch": 7.904676258992806, "grad_norm": 0.4163707196712494, "learning_rate": 9.747646305679621e-05, "loss": 0.0171, "step": 8790 }, { "epoch": 7.913669064748201, "grad_norm": 0.3658519685268402, "learning_rate": 9.74678115481555e-05, "loss": 0.0214, "step": 8800 }, { "epoch": 7.922661870503597, "grad_norm": 0.37025585770606995, "learning_rate": 9.745914562004696e-05, "loss": 0.0167, "step": 8810 }, { "epoch": 7.931654676258993, "grad_norm": 0.35384172201156616, "learning_rate": 9.745046527510307e-05, "loss": 0.0122, "step": 8820 }, { "epoch": 7.940647482014389, "grad_norm": 0.40574875473976135, "learning_rate": 9.744177051596068e-05, "loss": 0.0177, "step": 8830 }, { "epoch": 7.9496402877697845, "grad_norm": 0.37623071670532227, "learning_rate": 9.743306134526105e-05, "loss": 0.015, "step": 8840 }, { "epoch": 7.9586330935251794, "grad_norm": 0.23740990459918976, "learning_rate": 9.742433776564977e-05, "loss": 0.0139, "step": 8850 }, { "epoch": 7.967625899280575, "grad_norm": 0.26697230339050293, "learning_rate": 9.741559977977683e-05, "loss": 0.0139, "step": 8860 }, { "epoch": 7.976618705035971, "grad_norm": 0.3132588267326355, "learning_rate": 9.740684739029661e-05, "loss": 0.0145, "step": 8870 }, { "epoch": 7.985611510791367, "grad_norm": 0.33297693729400635, "learning_rate": 9.739808059986789e-05, "loss": 0.0209, "step": 8880 }, { "epoch": 7.994604316546763, "grad_norm": 0.25827813148498535, "learning_rate": 9.738929941115373e-05, "loss": 0.0147, "step": 8890 }, { "epoch": 8.003597122302159, "grad_norm": 0.2859897017478943, "learning_rate": 9.738050382682167e-05, "loss": 0.0093, "step": 8900 }, { "epoch": 8.012589928057555, "grad_norm": 0.30201950669288635, "learning_rate": 9.737169384954355e-05, "loss": 0.0161, "step": 8910 }, { "epoch": 8.02158273381295, "grad_norm": 0.1818581223487854, "learning_rate": 9.736286948199562e-05, "loss": 0.012, "step": 8920 }, { "epoch": 8.030575539568344, "grad_norm": 0.3638345003128052, "learning_rate": 9.735403072685848e-05, "loss": 0.0114, "step": 8930 }, { "epoch": 8.03956834532374, "grad_norm": 0.2599697709083557, "learning_rate": 9.734517758681712e-05, "loss": 0.0131, "step": 8940 }, { "epoch": 8.048561151079136, "grad_norm": 0.2865602672100067, "learning_rate": 9.733631006456088e-05, "loss": 0.0115, "step": 8950 }, { "epoch": 8.057553956834532, "grad_norm": 0.3866965174674988, "learning_rate": 9.732742816278348e-05, "loss": 0.0142, "step": 8960 }, { "epoch": 8.066546762589928, "grad_norm": 0.30373984575271606, "learning_rate": 9.731853188418302e-05, "loss": 0.0119, "step": 8970 }, { "epoch": 8.075539568345324, "grad_norm": 0.2848149538040161, "learning_rate": 9.730962123146194e-05, "loss": 0.0176, "step": 8980 }, { "epoch": 8.08453237410072, "grad_norm": 0.20121708512306213, "learning_rate": 9.730069620732709e-05, "loss": 0.0134, "step": 8990 }, { "epoch": 8.093525179856115, "grad_norm": 0.24035270512104034, "learning_rate": 9.72917568144896e-05, "loss": 0.0126, "step": 9000 }, { "epoch": 8.102517985611511, "grad_norm": 0.38574594259262085, "learning_rate": 9.728280305566509e-05, "loss": 0.0181, "step": 9010 }, { "epoch": 8.111510791366907, "grad_norm": 0.2913314998149872, "learning_rate": 9.727383493357343e-05, "loss": 0.0181, "step": 9020 }, { "epoch": 8.120503597122303, "grad_norm": 0.4293472468852997, "learning_rate": 9.726485245093891e-05, "loss": 0.0147, "step": 9030 }, { "epoch": 8.129496402877697, "grad_norm": 0.2602013945579529, "learning_rate": 9.725585561049018e-05, "loss": 0.0121, "step": 9040 }, { "epoch": 8.138489208633093, "grad_norm": 0.3968658745288849, "learning_rate": 9.724684441496022e-05, "loss": 0.0167, "step": 9050 }, { "epoch": 8.147482014388489, "grad_norm": 0.29755133390426636, "learning_rate": 9.72378188670864e-05, "loss": 0.0126, "step": 9060 }, { "epoch": 8.156474820143885, "grad_norm": 0.19019144773483276, "learning_rate": 9.722877896961047e-05, "loss": 0.0124, "step": 9070 }, { "epoch": 8.16546762589928, "grad_norm": 0.21659888327121735, "learning_rate": 9.721972472527848e-05, "loss": 0.012, "step": 9080 }, { "epoch": 8.174460431654676, "grad_norm": 0.25202253460884094, "learning_rate": 9.721065613684089e-05, "loss": 0.0116, "step": 9090 }, { "epoch": 8.183453237410072, "grad_norm": 0.25838571786880493, "learning_rate": 9.72015732070525e-05, "loss": 0.0113, "step": 9100 }, { "epoch": 8.192446043165468, "grad_norm": 0.2843961715698242, "learning_rate": 9.719247593867244e-05, "loss": 0.0114, "step": 9110 }, { "epoch": 8.201438848920864, "grad_norm": 0.22379620373249054, "learning_rate": 9.718336433446423e-05, "loss": 0.0121, "step": 9120 }, { "epoch": 8.21043165467626, "grad_norm": 0.2679446339607239, "learning_rate": 9.717423839719574e-05, "loss": 0.0152, "step": 9130 }, { "epoch": 8.219424460431656, "grad_norm": 0.38786107301712036, "learning_rate": 9.71650981296392e-05, "loss": 0.0127, "step": 9140 }, { "epoch": 8.22841726618705, "grad_norm": 0.41950666904449463, "learning_rate": 9.715594353457118e-05, "loss": 0.0124, "step": 9150 }, { "epoch": 8.237410071942445, "grad_norm": 0.244532972574234, "learning_rate": 9.714677461477257e-05, "loss": 0.0142, "step": 9160 }, { "epoch": 8.246402877697841, "grad_norm": 0.29029878973960876, "learning_rate": 9.713759137302869e-05, "loss": 0.0129, "step": 9170 }, { "epoch": 8.255395683453237, "grad_norm": 0.23624874651432037, "learning_rate": 9.712839381212914e-05, "loss": 0.0117, "step": 9180 }, { "epoch": 8.264388489208633, "grad_norm": 0.2991546094417572, "learning_rate": 9.71191819348679e-05, "loss": 0.0111, "step": 9190 }, { "epoch": 8.273381294964029, "grad_norm": 0.2459460198879242, "learning_rate": 9.710995574404331e-05, "loss": 0.0126, "step": 9200 }, { "epoch": 8.282374100719425, "grad_norm": 0.2870553135871887, "learning_rate": 9.710071524245802e-05, "loss": 0.0147, "step": 9210 }, { "epoch": 8.29136690647482, "grad_norm": 0.23649844527244568, "learning_rate": 9.709146043291906e-05, "loss": 0.0097, "step": 9220 }, { "epoch": 8.300359712230216, "grad_norm": 0.47159111499786377, "learning_rate": 9.70821913182378e-05, "loss": 0.0146, "step": 9230 }, { "epoch": 8.309352517985612, "grad_norm": 0.41146788001060486, "learning_rate": 9.707290790122995e-05, "loss": 0.0154, "step": 9240 }, { "epoch": 8.318345323741006, "grad_norm": 0.22113989293575287, "learning_rate": 9.706361018471557e-05, "loss": 0.0142, "step": 9250 }, { "epoch": 8.327338129496402, "grad_norm": 0.23865538835525513, "learning_rate": 9.705429817151906e-05, "loss": 0.0126, "step": 9260 }, { "epoch": 8.336330935251798, "grad_norm": 0.2610316276550293, "learning_rate": 9.704497186446917e-05, "loss": 0.0109, "step": 9270 }, { "epoch": 8.345323741007194, "grad_norm": 0.3161206543445587, "learning_rate": 9.703563126639896e-05, "loss": 0.0177, "step": 9280 }, { "epoch": 8.35431654676259, "grad_norm": 0.3687174320220947, "learning_rate": 9.70262763801459e-05, "loss": 0.0114, "step": 9290 }, { "epoch": 8.363309352517986, "grad_norm": 0.30190813541412354, "learning_rate": 9.701690720855171e-05, "loss": 0.0159, "step": 9300 }, { "epoch": 8.372302158273381, "grad_norm": 0.3401632606983185, "learning_rate": 9.700752375446253e-05, "loss": 0.0126, "step": 9310 }, { "epoch": 8.381294964028777, "grad_norm": 0.29253464937210083, "learning_rate": 9.69981260207288e-05, "loss": 0.0244, "step": 9320 }, { "epoch": 8.390287769784173, "grad_norm": 0.28587111830711365, "learning_rate": 9.698871401020529e-05, "loss": 0.0131, "step": 9330 }, { "epoch": 8.399280575539569, "grad_norm": 0.2856445610523224, "learning_rate": 9.697928772575112e-05, "loss": 0.0137, "step": 9340 }, { "epoch": 8.408273381294965, "grad_norm": 0.2629420757293701, "learning_rate": 9.696984717022976e-05, "loss": 0.0096, "step": 9350 }, { "epoch": 8.417266187050359, "grad_norm": 0.25624677538871765, "learning_rate": 9.6960392346509e-05, "loss": 0.0106, "step": 9360 }, { "epoch": 8.426258992805755, "grad_norm": 0.19367435574531555, "learning_rate": 9.695092325746097e-05, "loss": 0.0203, "step": 9370 }, { "epoch": 8.43525179856115, "grad_norm": 0.2369888871908188, "learning_rate": 9.694143990596211e-05, "loss": 0.0121, "step": 9380 }, { "epoch": 8.444244604316546, "grad_norm": 0.31392714381217957, "learning_rate": 9.693194229489325e-05, "loss": 0.0242, "step": 9390 }, { "epoch": 8.453237410071942, "grad_norm": 0.3594159781932831, "learning_rate": 9.692243042713944e-05, "loss": 0.0105, "step": 9400 }, { "epoch": 8.462230215827338, "grad_norm": 0.3227817416191101, "learning_rate": 9.691290430559022e-05, "loss": 0.0127, "step": 9410 }, { "epoch": 8.471223021582734, "grad_norm": 0.2665427625179291, "learning_rate": 9.690336393313932e-05, "loss": 0.0127, "step": 9420 }, { "epoch": 8.48021582733813, "grad_norm": 0.23605775833129883, "learning_rate": 9.689380931268487e-05, "loss": 0.0158, "step": 9430 }, { "epoch": 8.489208633093526, "grad_norm": 0.2466113418340683, "learning_rate": 9.688424044712932e-05, "loss": 0.0122, "step": 9440 }, { "epoch": 8.498201438848922, "grad_norm": 0.2527177631855011, "learning_rate": 9.687465733937942e-05, "loss": 0.0159, "step": 9450 }, { "epoch": 8.507194244604317, "grad_norm": 0.35510024428367615, "learning_rate": 9.686505999234627e-05, "loss": 0.01, "step": 9460 }, { "epoch": 8.516187050359711, "grad_norm": 0.24586661159992218, "learning_rate": 9.685544840894529e-05, "loss": 0.0115, "step": 9470 }, { "epoch": 8.525179856115107, "grad_norm": 0.24523335695266724, "learning_rate": 9.684582259209624e-05, "loss": 0.0111, "step": 9480 }, { "epoch": 8.534172661870503, "grad_norm": 0.22623993456363678, "learning_rate": 9.683618254472317e-05, "loss": 0.0138, "step": 9490 }, { "epoch": 8.543165467625899, "grad_norm": 0.2645338773727417, "learning_rate": 9.682652826975449e-05, "loss": 0.013, "step": 9500 }, { "epoch": 8.552158273381295, "grad_norm": 0.19939105212688446, "learning_rate": 9.681685977012291e-05, "loss": 0.0146, "step": 9510 }, { "epoch": 8.56115107913669, "grad_norm": 0.25064313411712646, "learning_rate": 9.680717704876546e-05, "loss": 0.0127, "step": 9520 }, { "epoch": 8.570143884892087, "grad_norm": 0.24782580137252808, "learning_rate": 9.679748010862349e-05, "loss": 0.0129, "step": 9530 }, { "epoch": 8.579136690647482, "grad_norm": 0.33917632699012756, "learning_rate": 9.678776895264267e-05, "loss": 0.0141, "step": 9540 }, { "epoch": 8.588129496402878, "grad_norm": 0.3072395920753479, "learning_rate": 9.6778043583773e-05, "loss": 0.0167, "step": 9550 }, { "epoch": 8.597122302158274, "grad_norm": 0.2172016054391861, "learning_rate": 9.67683040049688e-05, "loss": 0.0101, "step": 9560 }, { "epoch": 8.60611510791367, "grad_norm": 0.18217888474464417, "learning_rate": 9.675855021918869e-05, "loss": 0.0147, "step": 9570 }, { "epoch": 8.615107913669064, "grad_norm": 0.16000841557979584, "learning_rate": 9.674878222939561e-05, "loss": 0.0105, "step": 9580 }, { "epoch": 8.62410071942446, "grad_norm": 0.25596052408218384, "learning_rate": 9.673900003855681e-05, "loss": 0.0115, "step": 9590 }, { "epoch": 8.633093525179856, "grad_norm": 0.23004211485385895, "learning_rate": 9.672920364964389e-05, "loss": 0.0205, "step": 9600 }, { "epoch": 8.642086330935252, "grad_norm": 0.2949293851852417, "learning_rate": 9.671939306563269e-05, "loss": 0.0117, "step": 9610 }, { "epoch": 8.651079136690647, "grad_norm": 0.2059028595685959, "learning_rate": 9.670956828950345e-05, "loss": 0.0126, "step": 9620 }, { "epoch": 8.660071942446043, "grad_norm": 0.32228395342826843, "learning_rate": 9.669972932424065e-05, "loss": 0.0115, "step": 9630 }, { "epoch": 8.66906474820144, "grad_norm": 0.19533395767211914, "learning_rate": 9.668987617283312e-05, "loss": 0.0093, "step": 9640 }, { "epoch": 8.678057553956835, "grad_norm": 0.18511545658111572, "learning_rate": 9.668000883827397e-05, "loss": 0.0145, "step": 9650 }, { "epoch": 8.68705035971223, "grad_norm": 0.26224929094314575, "learning_rate": 9.667012732356067e-05, "loss": 0.0139, "step": 9660 }, { "epoch": 8.696043165467627, "grad_norm": 0.3949420750141144, "learning_rate": 9.666023163169493e-05, "loss": 0.0276, "step": 9670 }, { "epoch": 8.70503597122302, "grad_norm": 0.23025809228420258, "learning_rate": 9.665032176568281e-05, "loss": 0.0126, "step": 9680 }, { "epoch": 8.714028776978417, "grad_norm": 0.1936158388853073, "learning_rate": 9.664039772853469e-05, "loss": 0.0158, "step": 9690 }, { "epoch": 8.723021582733812, "grad_norm": 0.18519818782806396, "learning_rate": 9.663045952326518e-05, "loss": 0.0128, "step": 9700 }, { "epoch": 8.732014388489208, "grad_norm": 0.21682853996753693, "learning_rate": 9.662050715289328e-05, "loss": 0.011, "step": 9710 }, { "epoch": 8.741007194244604, "grad_norm": 0.2287682443857193, "learning_rate": 9.661054062044226e-05, "loss": 0.0188, "step": 9720 }, { "epoch": 8.75, "grad_norm": 0.15529678761959076, "learning_rate": 9.660055992893968e-05, "loss": 0.0117, "step": 9730 }, { "epoch": 8.758992805755396, "grad_norm": 0.18809610605239868, "learning_rate": 9.659056508141739e-05, "loss": 0.0122, "step": 9740 }, { "epoch": 8.767985611510792, "grad_norm": 0.26317691802978516, "learning_rate": 9.658055608091161e-05, "loss": 0.0167, "step": 9750 }, { "epoch": 8.776978417266188, "grad_norm": 0.17190980911254883, "learning_rate": 9.657053293046276e-05, "loss": 0.0144, "step": 9760 }, { "epoch": 8.785971223021583, "grad_norm": 0.30014216899871826, "learning_rate": 9.656049563311564e-05, "loss": 0.0102, "step": 9770 }, { "epoch": 8.79496402877698, "grad_norm": 0.3662255108356476, "learning_rate": 9.655044419191929e-05, "loss": 0.0165, "step": 9780 }, { "epoch": 8.803956834532373, "grad_norm": 0.2577444016933441, "learning_rate": 9.654037860992711e-05, "loss": 0.0144, "step": 9790 }, { "epoch": 8.81294964028777, "grad_norm": 0.39859217405319214, "learning_rate": 9.653029889019672e-05, "loss": 0.0168, "step": 9800 }, { "epoch": 8.821942446043165, "grad_norm": 0.232044517993927, "learning_rate": 9.65202050357901e-05, "loss": 0.0138, "step": 9810 }, { "epoch": 8.83093525179856, "grad_norm": 0.27907609939575195, "learning_rate": 9.651009704977347e-05, "loss": 0.0133, "step": 9820 }, { "epoch": 8.839928057553957, "grad_norm": 0.24844202399253845, "learning_rate": 9.649997493521738e-05, "loss": 0.0123, "step": 9830 }, { "epoch": 8.848920863309353, "grad_norm": 0.345768004655838, "learning_rate": 9.64898386951967e-05, "loss": 0.0119, "step": 9840 }, { "epoch": 8.857913669064748, "grad_norm": 0.40873631834983826, "learning_rate": 9.647968833279049e-05, "loss": 0.0162, "step": 9850 }, { "epoch": 8.866906474820144, "grad_norm": 0.39219334721565247, "learning_rate": 9.646952385108218e-05, "loss": 0.0163, "step": 9860 }, { "epoch": 8.87589928057554, "grad_norm": 0.3141116797924042, "learning_rate": 9.645934525315951e-05, "loss": 0.0119, "step": 9870 }, { "epoch": 8.884892086330936, "grad_norm": 0.31655648350715637, "learning_rate": 9.644915254211442e-05, "loss": 0.0131, "step": 9880 }, { "epoch": 8.89388489208633, "grad_norm": 0.24636052548885345, "learning_rate": 9.643894572104321e-05, "loss": 0.0116, "step": 9890 }, { "epoch": 8.902877697841726, "grad_norm": 0.21533334255218506, "learning_rate": 9.642872479304644e-05, "loss": 0.0134, "step": 9900 }, { "epoch": 8.911870503597122, "grad_norm": 0.2535178065299988, "learning_rate": 9.641848976122895e-05, "loss": 0.0113, "step": 9910 }, { "epoch": 8.920863309352518, "grad_norm": 0.2768927216529846, "learning_rate": 9.64082406286999e-05, "loss": 0.0084, "step": 9920 }, { "epoch": 8.929856115107913, "grad_norm": 0.42477551102638245, "learning_rate": 9.639797739857269e-05, "loss": 0.02, "step": 9930 }, { "epoch": 8.93884892086331, "grad_norm": 0.3222229480743408, "learning_rate": 9.638770007396498e-05, "loss": 0.0125, "step": 9940 }, { "epoch": 8.947841726618705, "grad_norm": 0.31956276297569275, "learning_rate": 9.63774086579988e-05, "loss": 0.0209, "step": 9950 }, { "epoch": 8.956834532374101, "grad_norm": 0.21721699833869934, "learning_rate": 9.63671031538004e-05, "loss": 0.0115, "step": 9960 }, { "epoch": 8.965827338129497, "grad_norm": 0.32669597864151, "learning_rate": 9.635678356450031e-05, "loss": 0.0143, "step": 9970 }, { "epoch": 8.974820143884893, "grad_norm": 0.20435692369937897, "learning_rate": 9.634644989323336e-05, "loss": 0.0121, "step": 9980 }, { "epoch": 8.983812949640289, "grad_norm": 0.2816207706928253, "learning_rate": 9.633610214313861e-05, "loss": 0.012, "step": 9990 }, { "epoch": 8.992805755395683, "grad_norm": 0.3126622140407562, "learning_rate": 9.632574031735951e-05, "loss": 0.0114, "step": 10000 }, { "epoch": 9.001798561151078, "grad_norm": 0.19034959375858307, "learning_rate": 9.631536441904364e-05, "loss": 0.0093, "step": 10010 }, { "epoch": 9.010791366906474, "grad_norm": 0.24008747935295105, "learning_rate": 9.630497445134293e-05, "loss": 0.0182, "step": 10020 }, { "epoch": 9.01978417266187, "grad_norm": 0.22581157088279724, "learning_rate": 9.62945704174136e-05, "loss": 0.0104, "step": 10030 }, { "epoch": 9.028776978417266, "grad_norm": 0.3023940324783325, "learning_rate": 9.628415232041612e-05, "loss": 0.0167, "step": 10040 }, { "epoch": 9.037769784172662, "grad_norm": 0.40769413113594055, "learning_rate": 9.627372016351524e-05, "loss": 0.0131, "step": 10050 }, { "epoch": 9.046762589928058, "grad_norm": 0.1920979768037796, "learning_rate": 9.626327394987995e-05, "loss": 0.0116, "step": 10060 }, { "epoch": 9.055755395683454, "grad_norm": 0.1582612842321396, "learning_rate": 9.625281368268355e-05, "loss": 0.0106, "step": 10070 }, { "epoch": 9.06474820143885, "grad_norm": 0.21145126223564148, "learning_rate": 9.624233936510357e-05, "loss": 0.0115, "step": 10080 }, { "epoch": 9.073741007194245, "grad_norm": 0.40961387753486633, "learning_rate": 9.623185100032187e-05, "loss": 0.0174, "step": 10090 }, { "epoch": 9.082733812949641, "grad_norm": 0.24653840065002441, "learning_rate": 9.62213485915245e-05, "loss": 0.0102, "step": 10100 }, { "epoch": 9.091726618705035, "grad_norm": 0.32400840520858765, "learning_rate": 9.621083214190186e-05, "loss": 0.011, "step": 10110 }, { "epoch": 9.100719424460431, "grad_norm": 0.3279179334640503, "learning_rate": 9.62003016546485e-05, "loss": 0.0166, "step": 10120 }, { "epoch": 9.109712230215827, "grad_norm": 0.308858186006546, "learning_rate": 9.618975713296339e-05, "loss": 0.0097, "step": 10130 }, { "epoch": 9.118705035971223, "grad_norm": 0.25354674458503723, "learning_rate": 9.61791985800496e-05, "loss": 0.0081, "step": 10140 }, { "epoch": 9.127697841726619, "grad_norm": 0.21132895350456238, "learning_rate": 9.616862599911458e-05, "loss": 0.013, "step": 10150 }, { "epoch": 9.136690647482014, "grad_norm": 0.2438725084066391, "learning_rate": 9.615803939337e-05, "loss": 0.013, "step": 10160 }, { "epoch": 9.14568345323741, "grad_norm": 0.25052791833877563, "learning_rate": 9.614743876603178e-05, "loss": 0.0131, "step": 10170 }, { "epoch": 9.154676258992806, "grad_norm": 0.2145964652299881, "learning_rate": 9.613682412032013e-05, "loss": 0.0145, "step": 10180 }, { "epoch": 9.163669064748202, "grad_norm": 0.25553616881370544, "learning_rate": 9.612619545945947e-05, "loss": 0.0192, "step": 10190 }, { "epoch": 9.172661870503598, "grad_norm": 0.2468167096376419, "learning_rate": 9.611555278667852e-05, "loss": 0.0126, "step": 10200 }, { "epoch": 9.181654676258994, "grad_norm": 0.2839725613594055, "learning_rate": 9.610489610521024e-05, "loss": 0.0125, "step": 10210 }, { "epoch": 9.190647482014388, "grad_norm": 0.30509504675865173, "learning_rate": 9.609422541829187e-05, "loss": 0.013, "step": 10220 }, { "epoch": 9.199640287769784, "grad_norm": 0.2354154735803604, "learning_rate": 9.608354072916486e-05, "loss": 0.0208, "step": 10230 }, { "epoch": 9.20863309352518, "grad_norm": 0.2649502456188202, "learning_rate": 9.607284204107493e-05, "loss": 0.0156, "step": 10240 }, { "epoch": 9.217625899280575, "grad_norm": 0.3492107689380646, "learning_rate": 9.606212935727208e-05, "loss": 0.0102, "step": 10250 }, { "epoch": 9.226618705035971, "grad_norm": 0.29217734932899475, "learning_rate": 9.605140268101052e-05, "loss": 0.012, "step": 10260 }, { "epoch": 9.235611510791367, "grad_norm": 0.2974001169204712, "learning_rate": 9.604066201554875e-05, "loss": 0.0107, "step": 10270 }, { "epoch": 9.244604316546763, "grad_norm": 0.34472495317459106, "learning_rate": 9.60299073641495e-05, "loss": 0.0117, "step": 10280 }, { "epoch": 9.253597122302159, "grad_norm": 0.19400005042552948, "learning_rate": 9.601913873007974e-05, "loss": 0.009, "step": 10290 }, { "epoch": 9.262589928057555, "grad_norm": 0.24884243309497833, "learning_rate": 9.60083561166107e-05, "loss": 0.0116, "step": 10300 }, { "epoch": 9.27158273381295, "grad_norm": 0.22204895317554474, "learning_rate": 9.599755952701783e-05, "loss": 0.0123, "step": 10310 }, { "epoch": 9.280575539568344, "grad_norm": 0.32717040181159973, "learning_rate": 9.598674896458089e-05, "loss": 0.0126, "step": 10320 }, { "epoch": 9.28956834532374, "grad_norm": 0.28308266401290894, "learning_rate": 9.597592443258383e-05, "loss": 0.01, "step": 10330 }, { "epoch": 9.298561151079136, "grad_norm": 0.31274834275245667, "learning_rate": 9.596508593431483e-05, "loss": 0.0084, "step": 10340 }, { "epoch": 9.307553956834532, "grad_norm": 0.19648519158363342, "learning_rate": 9.59542334730664e-05, "loss": 0.0119, "step": 10350 }, { "epoch": 9.316546762589928, "grad_norm": 0.34536653757095337, "learning_rate": 9.594336705213516e-05, "loss": 0.0111, "step": 10360 }, { "epoch": 9.325539568345324, "grad_norm": 0.2671663761138916, "learning_rate": 9.593248667482208e-05, "loss": 0.0139, "step": 10370 }, { "epoch": 9.33453237410072, "grad_norm": 0.45667144656181335, "learning_rate": 9.592159234443233e-05, "loss": 0.0147, "step": 10380 }, { "epoch": 9.343525179856115, "grad_norm": 0.2950770854949951, "learning_rate": 9.59106840642753e-05, "loss": 0.0195, "step": 10390 }, { "epoch": 9.352517985611511, "grad_norm": 0.3053819239139557, "learning_rate": 9.589976183766467e-05, "loss": 0.0134, "step": 10400 }, { "epoch": 9.361510791366907, "grad_norm": 0.20726048946380615, "learning_rate": 9.58888256679183e-05, "loss": 0.0137, "step": 10410 }, { "epoch": 9.370503597122303, "grad_norm": 0.3242001235485077, "learning_rate": 9.587787555835832e-05, "loss": 0.0123, "step": 10420 }, { "epoch": 9.379496402877697, "grad_norm": 0.22633592784404755, "learning_rate": 9.586691151231107e-05, "loss": 0.0124, "step": 10430 }, { "epoch": 9.388489208633093, "grad_norm": 0.301980584859848, "learning_rate": 9.585593353310715e-05, "loss": 0.0137, "step": 10440 }, { "epoch": 9.397482014388489, "grad_norm": 0.30210649967193604, "learning_rate": 9.58449416240814e-05, "loss": 0.015, "step": 10450 }, { "epoch": 9.406474820143885, "grad_norm": 0.33246153593063354, "learning_rate": 9.583393578857283e-05, "loss": 0.0131, "step": 10460 }, { "epoch": 9.41546762589928, "grad_norm": 0.2777871787548065, "learning_rate": 9.582291602992474e-05, "loss": 0.0114, "step": 10470 }, { "epoch": 9.424460431654676, "grad_norm": 0.4096430242061615, "learning_rate": 9.581188235148466e-05, "loss": 0.0146, "step": 10480 }, { "epoch": 9.433453237410072, "grad_norm": 0.34158241748809814, "learning_rate": 9.58008347566043e-05, "loss": 0.0104, "step": 10490 }, { "epoch": 9.442446043165468, "grad_norm": 0.28645965456962585, "learning_rate": 9.578977324863965e-05, "loss": 0.0118, "step": 10500 }, { "epoch": 9.451438848920864, "grad_norm": 0.27467358112335205, "learning_rate": 9.577869783095089e-05, "loss": 0.0131, "step": 10510 }, { "epoch": 9.46043165467626, "grad_norm": 0.15838980674743652, "learning_rate": 9.576760850690245e-05, "loss": 0.0138, "step": 10520 }, { "epoch": 9.469424460431656, "grad_norm": 0.26652923226356506, "learning_rate": 9.575650527986298e-05, "loss": 0.0142, "step": 10530 }, { "epoch": 9.47841726618705, "grad_norm": 0.16359597444534302, "learning_rate": 9.574538815320531e-05, "loss": 0.01, "step": 10540 }, { "epoch": 9.487410071942445, "grad_norm": 0.22446006536483765, "learning_rate": 9.573425713030656e-05, "loss": 0.0131, "step": 10550 }, { "epoch": 9.496402877697841, "grad_norm": 0.3119538426399231, "learning_rate": 9.572311221454806e-05, "loss": 0.0089, "step": 10560 }, { "epoch": 9.505395683453237, "grad_norm": 0.2509544789791107, "learning_rate": 9.57119534093153e-05, "loss": 0.0149, "step": 10570 }, { "epoch": 9.514388489208633, "grad_norm": 0.32391273975372314, "learning_rate": 9.570078071799806e-05, "loss": 0.011, "step": 10580 }, { "epoch": 9.523381294964029, "grad_norm": 0.35093075037002563, "learning_rate": 9.568959414399028e-05, "loss": 0.0113, "step": 10590 }, { "epoch": 9.532374100719425, "grad_norm": 0.2313140481710434, "learning_rate": 9.567839369069018e-05, "loss": 0.0131, "step": 10600 }, { "epoch": 9.54136690647482, "grad_norm": 0.26351481676101685, "learning_rate": 9.566717936150013e-05, "loss": 0.0126, "step": 10610 }, { "epoch": 9.550359712230216, "grad_norm": 0.26474708318710327, "learning_rate": 9.565595115982678e-05, "loss": 0.0099, "step": 10620 }, { "epoch": 9.559352517985612, "grad_norm": 0.26029616594314575, "learning_rate": 9.564470908908094e-05, "loss": 0.0127, "step": 10630 }, { "epoch": 9.568345323741006, "grad_norm": 0.18185864388942719, "learning_rate": 9.563345315267764e-05, "loss": 0.0139, "step": 10640 }, { "epoch": 9.577338129496402, "grad_norm": 0.1599336713552475, "learning_rate": 9.562218335403616e-05, "loss": 0.0103, "step": 10650 }, { "epoch": 9.586330935251798, "grad_norm": 0.25643280148506165, "learning_rate": 9.561089969657999e-05, "loss": 0.0097, "step": 10660 }, { "epoch": 9.595323741007194, "grad_norm": 0.22385159134864807, "learning_rate": 9.559960218373673e-05, "loss": 0.014, "step": 10670 }, { "epoch": 9.60431654676259, "grad_norm": 0.23609423637390137, "learning_rate": 9.558829081893836e-05, "loss": 0.0134, "step": 10680 }, { "epoch": 9.613309352517986, "grad_norm": 0.23086056113243103, "learning_rate": 9.55769656056209e-05, "loss": 0.0124, "step": 10690 }, { "epoch": 9.622302158273381, "grad_norm": 0.4727957546710968, "learning_rate": 9.556562654722469e-05, "loss": 0.0135, "step": 10700 }, { "epoch": 9.631294964028777, "grad_norm": 0.45255401730537415, "learning_rate": 9.555427364719422e-05, "loss": 0.0168, "step": 10710 }, { "epoch": 9.640287769784173, "grad_norm": 0.3938722610473633, "learning_rate": 9.55429069089782e-05, "loss": 0.0129, "step": 10720 }, { "epoch": 9.649280575539569, "grad_norm": 0.22394439578056335, "learning_rate": 9.553152633602956e-05, "loss": 0.0151, "step": 10730 }, { "epoch": 9.658273381294965, "grad_norm": 0.31391486525535583, "learning_rate": 9.552013193180543e-05, "loss": 0.0151, "step": 10740 }, { "epoch": 9.667266187050359, "grad_norm": 0.37763485312461853, "learning_rate": 9.550872369976707e-05, "loss": 0.0117, "step": 10750 }, { "epoch": 9.676258992805755, "grad_norm": 0.35183054208755493, "learning_rate": 9.549730164338007e-05, "loss": 0.0135, "step": 10760 }, { "epoch": 9.68525179856115, "grad_norm": 0.2140798568725586, "learning_rate": 9.548586576611408e-05, "loss": 0.0103, "step": 10770 }, { "epoch": 9.694244604316546, "grad_norm": 0.20838354527950287, "learning_rate": 9.54744160714431e-05, "loss": 0.0105, "step": 10780 }, { "epoch": 9.703237410071942, "grad_norm": 0.19017863273620605, "learning_rate": 9.546295256284516e-05, "loss": 0.0107, "step": 10790 }, { "epoch": 9.712230215827338, "grad_norm": 0.21263867616653442, "learning_rate": 9.545147524380265e-05, "loss": 0.0124, "step": 10800 }, { "epoch": 9.721223021582734, "grad_norm": 0.22467470169067383, "learning_rate": 9.543998411780201e-05, "loss": 0.013, "step": 10810 }, { "epoch": 9.73021582733813, "grad_norm": 0.3300039768218994, "learning_rate": 9.542847918833397e-05, "loss": 0.011, "step": 10820 }, { "epoch": 9.739208633093526, "grad_norm": 0.22539038956165314, "learning_rate": 9.541696045889343e-05, "loss": 0.0102, "step": 10830 }, { "epoch": 9.748201438848922, "grad_norm": 0.21904124319553375, "learning_rate": 9.540542793297947e-05, "loss": 0.0086, "step": 10840 }, { "epoch": 9.757194244604317, "grad_norm": 0.28066542744636536, "learning_rate": 9.539388161409537e-05, "loss": 0.0112, "step": 10850 }, { "epoch": 9.766187050359711, "grad_norm": 0.3801414966583252, "learning_rate": 9.538232150574857e-05, "loss": 0.0133, "step": 10860 }, { "epoch": 9.775179856115107, "grad_norm": 0.22066235542297363, "learning_rate": 9.537074761145076e-05, "loss": 0.0095, "step": 10870 }, { "epoch": 9.784172661870503, "grad_norm": 0.2904055416584015, "learning_rate": 9.535915993471778e-05, "loss": 0.014, "step": 10880 }, { "epoch": 9.793165467625899, "grad_norm": 0.30911701917648315, "learning_rate": 9.534755847906964e-05, "loss": 0.0193, "step": 10890 }, { "epoch": 9.802158273381295, "grad_norm": 0.3932766020298004, "learning_rate": 9.533594324803057e-05, "loss": 0.0109, "step": 10900 }, { "epoch": 9.81115107913669, "grad_norm": 0.19095289707183838, "learning_rate": 9.532431424512895e-05, "loss": 0.0089, "step": 10910 }, { "epoch": 9.820143884892087, "grad_norm": 0.3187963664531708, "learning_rate": 9.531267147389741e-05, "loss": 0.0172, "step": 10920 }, { "epoch": 9.829136690647482, "grad_norm": 0.23486916720867157, "learning_rate": 9.530101493787266e-05, "loss": 0.0164, "step": 10930 }, { "epoch": 9.838129496402878, "grad_norm": 0.27390721440315247, "learning_rate": 9.528934464059571e-05, "loss": 0.0136, "step": 10940 }, { "epoch": 9.847122302158274, "grad_norm": 0.1932508796453476, "learning_rate": 9.527766058561163e-05, "loss": 0.0108, "step": 10950 }, { "epoch": 9.85611510791367, "grad_norm": 0.4313051402568817, "learning_rate": 9.526596277646976e-05, "loss": 0.0142, "step": 10960 }, { "epoch": 9.865107913669064, "grad_norm": 0.3601059317588806, "learning_rate": 9.525425121672358e-05, "loss": 0.0106, "step": 10970 }, { "epoch": 9.87410071942446, "grad_norm": 0.27229222655296326, "learning_rate": 9.524252590993074e-05, "loss": 0.0106, "step": 10980 }, { "epoch": 9.883093525179856, "grad_norm": 0.21564243733882904, "learning_rate": 9.523078685965309e-05, "loss": 0.0119, "step": 10990 }, { "epoch": 9.892086330935252, "grad_norm": 0.23878902196884155, "learning_rate": 9.521903406945664e-05, "loss": 0.0126, "step": 11000 }, { "epoch": 9.901079136690647, "grad_norm": 0.17237769067287445, "learning_rate": 9.520726754291158e-05, "loss": 0.0132, "step": 11010 }, { "epoch": 9.910071942446043, "grad_norm": 0.20268508791923523, "learning_rate": 9.519548728359227e-05, "loss": 0.0129, "step": 11020 }, { "epoch": 9.91906474820144, "grad_norm": 0.1894335001707077, "learning_rate": 9.518369329507726e-05, "loss": 0.0102, "step": 11030 }, { "epoch": 9.928057553956835, "grad_norm": 0.25901928544044495, "learning_rate": 9.51718855809492e-05, "loss": 0.0105, "step": 11040 }, { "epoch": 9.93705035971223, "grad_norm": 0.30965280532836914, "learning_rate": 9.516006414479502e-05, "loss": 0.0135, "step": 11050 }, { "epoch": 9.946043165467627, "grad_norm": 0.2640356719493866, "learning_rate": 9.514822899020572e-05, "loss": 0.0213, "step": 11060 }, { "epoch": 9.95503597122302, "grad_norm": 0.27481815218925476, "learning_rate": 9.513638012077654e-05, "loss": 0.0135, "step": 11070 }, { "epoch": 9.964028776978417, "grad_norm": 0.2593681812286377, "learning_rate": 9.512451754010683e-05, "loss": 0.0107, "step": 11080 }, { "epoch": 9.973021582733812, "grad_norm": 0.3419885039329529, "learning_rate": 9.511264125180013e-05, "loss": 0.0094, "step": 11090 }, { "epoch": 9.982014388489208, "grad_norm": 0.15792831778526306, "learning_rate": 9.510075125946414e-05, "loss": 0.0111, "step": 11100 }, { "epoch": 9.991007194244604, "grad_norm": 0.16743841767311096, "learning_rate": 9.508884756671075e-05, "loss": 0.0123, "step": 11110 }, { "epoch": 10.0, "grad_norm": 0.4686104655265808, "learning_rate": 9.507693017715596e-05, "loss": 0.0124, "step": 11120 }, { "epoch": 10.008992805755396, "grad_norm": 0.3116721212863922, "learning_rate": 9.506499909441997e-05, "loss": 0.0106, "step": 11130 }, { "epoch": 10.017985611510792, "grad_norm": 0.34563809633255005, "learning_rate": 9.505305432212713e-05, "loss": 0.0109, "step": 11140 }, { "epoch": 10.026978417266188, "grad_norm": 0.3206530809402466, "learning_rate": 9.504109586390595e-05, "loss": 0.0112, "step": 11150 }, { "epoch": 10.035971223021583, "grad_norm": 0.28937166929244995, "learning_rate": 9.502912372338908e-05, "loss": 0.0145, "step": 11160 }, { "epoch": 10.04496402877698, "grad_norm": 0.35417550802230835, "learning_rate": 9.501713790421335e-05, "loss": 0.0152, "step": 11170 }, { "epoch": 10.053956834532373, "grad_norm": 0.2671443521976471, "learning_rate": 9.500513841001974e-05, "loss": 0.0125, "step": 11180 }, { "epoch": 10.06294964028777, "grad_norm": 0.24806608259677887, "learning_rate": 9.499312524445336e-05, "loss": 0.0143, "step": 11190 }, { "epoch": 10.071942446043165, "grad_norm": 0.313983291387558, "learning_rate": 9.498109841116351e-05, "loss": 0.0122, "step": 11200 }, { "epoch": 10.08093525179856, "grad_norm": 0.2532663643360138, "learning_rate": 9.496905791380363e-05, "loss": 0.0189, "step": 11210 }, { "epoch": 10.089928057553957, "grad_norm": 0.27411121129989624, "learning_rate": 9.495700375603129e-05, "loss": 0.0116, "step": 11220 }, { "epoch": 10.098920863309353, "grad_norm": 0.3661630153656006, "learning_rate": 9.494493594150822e-05, "loss": 0.0151, "step": 11230 }, { "epoch": 10.107913669064748, "grad_norm": 0.29447105526924133, "learning_rate": 9.493285447390032e-05, "loss": 0.0131, "step": 11240 }, { "epoch": 10.116906474820144, "grad_norm": 0.1929466277360916, "learning_rate": 9.492075935687761e-05, "loss": 0.0127, "step": 11250 }, { "epoch": 10.12589928057554, "grad_norm": 0.21024039387702942, "learning_rate": 9.490865059411427e-05, "loss": 0.0115, "step": 11260 }, { "epoch": 10.134892086330936, "grad_norm": 0.31434664130210876, "learning_rate": 9.489652818928863e-05, "loss": 0.0138, "step": 11270 }, { "epoch": 10.14388489208633, "grad_norm": 0.32669761776924133, "learning_rate": 9.488439214608315e-05, "loss": 0.0218, "step": 11280 }, { "epoch": 10.152877697841726, "grad_norm": 0.27024635672569275, "learning_rate": 9.487224246818444e-05, "loss": 0.011, "step": 11290 }, { "epoch": 10.161870503597122, "grad_norm": 0.19340011477470398, "learning_rate": 9.486007915928325e-05, "loss": 0.0105, "step": 11300 }, { "epoch": 10.170863309352518, "grad_norm": 0.2927538752555847, "learning_rate": 9.484790222307448e-05, "loss": 0.0129, "step": 11310 }, { "epoch": 10.179856115107913, "grad_norm": 0.20986570417881012, "learning_rate": 9.483571166325716e-05, "loss": 0.0154, "step": 11320 }, { "epoch": 10.18884892086331, "grad_norm": 0.3660582900047302, "learning_rate": 9.482350748353444e-05, "loss": 0.0129, "step": 11330 }, { "epoch": 10.197841726618705, "grad_norm": 0.33180972933769226, "learning_rate": 9.481128968761363e-05, "loss": 0.0117, "step": 11340 }, { "epoch": 10.206834532374101, "grad_norm": 0.2937066853046417, "learning_rate": 9.479905827920621e-05, "loss": 0.0111, "step": 11350 }, { "epoch": 10.215827338129497, "grad_norm": 0.36349955201148987, "learning_rate": 9.478681326202773e-05, "loss": 0.0128, "step": 11360 }, { "epoch": 10.224820143884893, "grad_norm": 0.26965323090553284, "learning_rate": 9.477455463979791e-05, "loss": 0.0106, "step": 11370 }, { "epoch": 10.233812949640289, "grad_norm": 0.3716491460800171, "learning_rate": 9.476228241624059e-05, "loss": 0.0173, "step": 11380 }, { "epoch": 10.242805755395683, "grad_norm": 0.24234138429164886, "learning_rate": 9.474999659508374e-05, "loss": 0.0115, "step": 11390 }, { "epoch": 10.251798561151078, "grad_norm": 0.2325478345155716, "learning_rate": 9.47376971800595e-05, "loss": 0.0085, "step": 11400 }, { "epoch": 10.260791366906474, "grad_norm": 0.22337712347507477, "learning_rate": 9.472538417490409e-05, "loss": 0.0155, "step": 11410 }, { "epoch": 10.26978417266187, "grad_norm": 0.32077154517173767, "learning_rate": 9.471305758335784e-05, "loss": 0.0135, "step": 11420 }, { "epoch": 10.278776978417266, "grad_norm": 0.3098817467689514, "learning_rate": 9.47007174091653e-05, "loss": 0.0189, "step": 11430 }, { "epoch": 10.287769784172662, "grad_norm": 0.2585045099258423, "learning_rate": 9.468836365607507e-05, "loss": 0.0162, "step": 11440 }, { "epoch": 10.296762589928058, "grad_norm": 0.35853296518325806, "learning_rate": 9.467599632783988e-05, "loss": 0.016, "step": 11450 }, { "epoch": 10.305755395683454, "grad_norm": 0.29752299189567566, "learning_rate": 9.466361542821662e-05, "loss": 0.0149, "step": 11460 }, { "epoch": 10.31474820143885, "grad_norm": 0.32388991117477417, "learning_rate": 9.465122096096625e-05, "loss": 0.0095, "step": 11470 }, { "epoch": 10.323741007194245, "grad_norm": 0.337380975484848, "learning_rate": 9.463881292985391e-05, "loss": 0.0169, "step": 11480 }, { "epoch": 10.332733812949641, "grad_norm": 0.355055034160614, "learning_rate": 9.462639133864881e-05, "loss": 0.0104, "step": 11490 }, { "epoch": 10.341726618705035, "grad_norm": 0.27670496702194214, "learning_rate": 9.461395619112432e-05, "loss": 0.0133, "step": 11500 }, { "epoch": 10.350719424460431, "grad_norm": 0.34794914722442627, "learning_rate": 9.460150749105791e-05, "loss": 0.0106, "step": 11510 }, { "epoch": 10.359712230215827, "grad_norm": 0.30898353457450867, "learning_rate": 9.458904524223116e-05, "loss": 0.0124, "step": 11520 }, { "epoch": 10.368705035971223, "grad_norm": 0.2835516929626465, "learning_rate": 9.457656944842976e-05, "loss": 0.0145, "step": 11530 }, { "epoch": 10.377697841726619, "grad_norm": 0.22424596548080444, "learning_rate": 9.456408011344353e-05, "loss": 0.0096, "step": 11540 }, { "epoch": 10.386690647482014, "grad_norm": 0.25043684244155884, "learning_rate": 9.455157724106643e-05, "loss": 0.0126, "step": 11550 }, { "epoch": 10.39568345323741, "grad_norm": 0.22768834233283997, "learning_rate": 9.453906083509647e-05, "loss": 0.0141, "step": 11560 }, { "epoch": 10.404676258992806, "grad_norm": 0.24087825417518616, "learning_rate": 9.45265308993358e-05, "loss": 0.0092, "step": 11570 }, { "epoch": 10.413669064748202, "grad_norm": 0.2917632758617401, "learning_rate": 9.451398743759071e-05, "loss": 0.0181, "step": 11580 }, { "epoch": 10.422661870503598, "grad_norm": 0.1654195487499237, "learning_rate": 9.450143045367156e-05, "loss": 0.0105, "step": 11590 }, { "epoch": 10.431654676258994, "grad_norm": 0.2274540662765503, "learning_rate": 9.448885995139283e-05, "loss": 0.0117, "step": 11600 }, { "epoch": 10.440647482014388, "grad_norm": 0.256679892539978, "learning_rate": 9.44762759345731e-05, "loss": 0.0105, "step": 11610 }, { "epoch": 10.449640287769784, "grad_norm": 0.2880480885505676, "learning_rate": 9.446367840703509e-05, "loss": 0.0124, "step": 11620 }, { "epoch": 10.45863309352518, "grad_norm": 0.24376118183135986, "learning_rate": 9.445106737260556e-05, "loss": 0.0095, "step": 11630 }, { "epoch": 10.467625899280575, "grad_norm": 0.4016568660736084, "learning_rate": 9.443844283511543e-05, "loss": 0.0109, "step": 11640 }, { "epoch": 10.476618705035971, "grad_norm": 0.3336844742298126, "learning_rate": 9.442580479839968e-05, "loss": 0.0152, "step": 11650 }, { "epoch": 10.485611510791367, "grad_norm": 0.327433705329895, "learning_rate": 9.441315326629745e-05, "loss": 0.0143, "step": 11660 }, { "epoch": 10.494604316546763, "grad_norm": 0.30363601446151733, "learning_rate": 9.44004882426519e-05, "loss": 0.0129, "step": 11670 }, { "epoch": 10.503597122302159, "grad_norm": 0.24772463738918304, "learning_rate": 9.438780973131037e-05, "loss": 0.0118, "step": 11680 }, { "epoch": 10.512589928057555, "grad_norm": 0.25337642431259155, "learning_rate": 9.437511773612423e-05, "loss": 0.0151, "step": 11690 }, { "epoch": 10.52158273381295, "grad_norm": 0.19112487137317657, "learning_rate": 9.436241226094896e-05, "loss": 0.0179, "step": 11700 }, { "epoch": 10.530575539568346, "grad_norm": 0.23782426118850708, "learning_rate": 9.434969330964418e-05, "loss": 0.0122, "step": 11710 }, { "epoch": 10.53956834532374, "grad_norm": 0.24616746604442596, "learning_rate": 9.433696088607356e-05, "loss": 0.012, "step": 11720 }, { "epoch": 10.548561151079136, "grad_norm": 0.2337513118982315, "learning_rate": 9.432421499410486e-05, "loss": 0.0114, "step": 11730 }, { "epoch": 10.557553956834532, "grad_norm": 0.26053690910339355, "learning_rate": 9.431145563760998e-05, "loss": 0.0106, "step": 11740 }, { "epoch": 10.566546762589928, "grad_norm": 0.24443534016609192, "learning_rate": 9.429868282046484e-05, "loss": 0.0145, "step": 11750 }, { "epoch": 10.575539568345324, "grad_norm": 0.30495378375053406, "learning_rate": 9.428589654654951e-05, "loss": 0.0201, "step": 11760 }, { "epoch": 10.58453237410072, "grad_norm": 0.3151768147945404, "learning_rate": 9.42730968197481e-05, "loss": 0.0113, "step": 11770 }, { "epoch": 10.593525179856115, "grad_norm": 0.277603417634964, "learning_rate": 9.426028364394883e-05, "loss": 0.0111, "step": 11780 }, { "epoch": 10.602517985611511, "grad_norm": 0.2669908404350281, "learning_rate": 9.424745702304402e-05, "loss": 0.0103, "step": 11790 }, { "epoch": 10.611510791366907, "grad_norm": 0.2368995100259781, "learning_rate": 9.423461696093006e-05, "loss": 0.0144, "step": 11800 }, { "epoch": 10.620503597122303, "grad_norm": 0.19827482104301453, "learning_rate": 9.422176346150741e-05, "loss": 0.0105, "step": 11810 }, { "epoch": 10.629496402877697, "grad_norm": 0.31111273169517517, "learning_rate": 9.420889652868063e-05, "loss": 0.0117, "step": 11820 }, { "epoch": 10.638489208633093, "grad_norm": 0.15614724159240723, "learning_rate": 9.419601616635836e-05, "loss": 0.0108, "step": 11830 }, { "epoch": 10.647482014388489, "grad_norm": 0.17434850335121155, "learning_rate": 9.418312237845331e-05, "loss": 0.0128, "step": 11840 }, { "epoch": 10.656474820143885, "grad_norm": 0.24739427864551544, "learning_rate": 9.417021516888225e-05, "loss": 0.01, "step": 11850 }, { "epoch": 10.66546762589928, "grad_norm": 0.23200677335262299, "learning_rate": 9.415729454156608e-05, "loss": 0.0096, "step": 11860 }, { "epoch": 10.674460431654676, "grad_norm": 0.21525153517723083, "learning_rate": 9.414436050042973e-05, "loss": 0.0115, "step": 11870 }, { "epoch": 10.683453237410072, "grad_norm": 0.24880154430866241, "learning_rate": 9.413141304940223e-05, "loss": 0.0101, "step": 11880 }, { "epoch": 10.692446043165468, "grad_norm": 0.3018129765987396, "learning_rate": 9.411845219241666e-05, "loss": 0.0101, "step": 11890 }, { "epoch": 10.701438848920864, "grad_norm": 0.31930965185165405, "learning_rate": 9.410547793341021e-05, "loss": 0.0113, "step": 11900 }, { "epoch": 10.71043165467626, "grad_norm": 0.21952296793460846, "learning_rate": 9.409249027632408e-05, "loss": 0.0102, "step": 11910 }, { "epoch": 10.719424460431654, "grad_norm": 0.2835155129432678, "learning_rate": 9.407948922510362e-05, "loss": 0.0112, "step": 11920 }, { "epoch": 10.72841726618705, "grad_norm": 0.32913118600845337, "learning_rate": 9.406647478369817e-05, "loss": 0.0092, "step": 11930 }, { "epoch": 10.737410071942445, "grad_norm": 0.19119049608707428, "learning_rate": 9.405344695606118e-05, "loss": 0.011, "step": 11940 }, { "epoch": 10.746402877697841, "grad_norm": 0.2581065595149994, "learning_rate": 9.404040574615018e-05, "loss": 0.0103, "step": 11950 }, { "epoch": 10.755395683453237, "grad_norm": 0.24610893428325653, "learning_rate": 9.402735115792674e-05, "loss": 0.0129, "step": 11960 }, { "epoch": 10.764388489208633, "grad_norm": 0.27313023805618286, "learning_rate": 9.401428319535649e-05, "loss": 0.0087, "step": 11970 }, { "epoch": 10.773381294964029, "grad_norm": 0.2912907302379608, "learning_rate": 9.400120186240912e-05, "loss": 0.0123, "step": 11980 }, { "epoch": 10.782374100719425, "grad_norm": 0.21502864360809326, "learning_rate": 9.398810716305844e-05, "loss": 0.0093, "step": 11990 }, { "epoch": 10.79136690647482, "grad_norm": 0.28582772612571716, "learning_rate": 9.397499910128222e-05, "loss": 0.0135, "step": 12000 }, { "epoch": 10.800359712230216, "grad_norm": 0.31094035506248474, "learning_rate": 9.396187768106237e-05, "loss": 0.0099, "step": 12010 }, { "epoch": 10.809352517985612, "grad_norm": 0.3290638029575348, "learning_rate": 9.394874290638482e-05, "loss": 0.0111, "step": 12020 }, { "epoch": 10.818345323741006, "grad_norm": 0.2619308531284332, "learning_rate": 9.393559478123959e-05, "loss": 0.0138, "step": 12030 }, { "epoch": 10.827338129496402, "grad_norm": 0.21446944773197174, "learning_rate": 9.39224333096207e-05, "loss": 0.012, "step": 12040 }, { "epoch": 10.836330935251798, "grad_norm": 0.3210412859916687, "learning_rate": 9.390925849552629e-05, "loss": 0.0121, "step": 12050 }, { "epoch": 10.845323741007194, "grad_norm": 0.2954634726047516, "learning_rate": 9.389607034295849e-05, "loss": 0.0112, "step": 12060 }, { "epoch": 10.85431654676259, "grad_norm": 0.27605006098747253, "learning_rate": 9.388286885592355e-05, "loss": 0.0163, "step": 12070 }, { "epoch": 10.863309352517986, "grad_norm": 0.21402856707572937, "learning_rate": 9.386965403843168e-05, "loss": 0.0117, "step": 12080 }, { "epoch": 10.872302158273381, "grad_norm": 0.16802725195884705, "learning_rate": 9.385642589449726e-05, "loss": 0.009, "step": 12090 }, { "epoch": 10.881294964028777, "grad_norm": 0.2844763994216919, "learning_rate": 9.38431844281386e-05, "loss": 0.0078, "step": 12100 }, { "epoch": 10.890287769784173, "grad_norm": 0.30320581793785095, "learning_rate": 9.38299296433781e-05, "loss": 0.01, "step": 12110 }, { "epoch": 10.899280575539569, "grad_norm": 0.19692742824554443, "learning_rate": 9.381666154424226e-05, "loss": 0.0113, "step": 12120 }, { "epoch": 10.908273381294965, "grad_norm": 0.3058730959892273, "learning_rate": 9.380338013476157e-05, "loss": 0.0105, "step": 12130 }, { "epoch": 10.917266187050359, "grad_norm": 0.2507074773311615, "learning_rate": 9.379008541897054e-05, "loss": 0.0114, "step": 12140 }, { "epoch": 10.926258992805755, "grad_norm": 0.2283502072095871, "learning_rate": 9.377677740090777e-05, "loss": 0.0134, "step": 12150 }, { "epoch": 10.93525179856115, "grad_norm": 0.2453027218580246, "learning_rate": 9.376345608461588e-05, "loss": 0.0111, "step": 12160 }, { "epoch": 10.944244604316546, "grad_norm": 0.28503167629241943, "learning_rate": 9.375012147414155e-05, "loss": 0.0108, "step": 12170 }, { "epoch": 10.953237410071942, "grad_norm": 0.25260382890701294, "learning_rate": 9.373677357353545e-05, "loss": 0.0119, "step": 12180 }, { "epoch": 10.962230215827338, "grad_norm": 0.20795956254005432, "learning_rate": 9.372341238685237e-05, "loss": 0.0095, "step": 12190 }, { "epoch": 10.971223021582734, "grad_norm": 0.24000532925128937, "learning_rate": 9.371003791815102e-05, "loss": 0.0097, "step": 12200 }, { "epoch": 10.98021582733813, "grad_norm": 0.23857001960277557, "learning_rate": 9.369665017149429e-05, "loss": 0.009, "step": 12210 }, { "epoch": 10.989208633093526, "grad_norm": 0.2037239670753479, "learning_rate": 9.368324915094895e-05, "loss": 0.0089, "step": 12220 }, { "epoch": 10.998201438848922, "grad_norm": 0.2323504090309143, "learning_rate": 9.366983486058591e-05, "loss": 0.0119, "step": 12230 }, { "epoch": 11.007194244604317, "grad_norm": 0.1898108571767807, "learning_rate": 9.365640730448009e-05, "loss": 0.0104, "step": 12240 }, { "epoch": 11.016187050359711, "grad_norm": 0.25988689064979553, "learning_rate": 9.36429664867104e-05, "loss": 0.0118, "step": 12250 }, { "epoch": 11.025179856115107, "grad_norm": 0.27933385968208313, "learning_rate": 9.362951241135982e-05, "loss": 0.0127, "step": 12260 }, { "epoch": 11.034172661870503, "grad_norm": 0.29200080037117004, "learning_rate": 9.361604508251534e-05, "loss": 0.0094, "step": 12270 }, { "epoch": 11.043165467625899, "grad_norm": 0.1773144006729126, "learning_rate": 9.360256450426799e-05, "loss": 0.0093, "step": 12280 }, { "epoch": 11.052158273381295, "grad_norm": 0.26117631793022156, "learning_rate": 9.358907068071279e-05, "loss": 0.0097, "step": 12290 }, { "epoch": 11.06115107913669, "grad_norm": 0.17513017356395721, "learning_rate": 9.357556361594882e-05, "loss": 0.0091, "step": 12300 }, { "epoch": 11.070143884892087, "grad_norm": 0.23793551325798035, "learning_rate": 9.356204331407917e-05, "loss": 0.0105, "step": 12310 }, { "epoch": 11.079136690647482, "grad_norm": 0.2207697033882141, "learning_rate": 9.354850977921094e-05, "loss": 0.017, "step": 12320 }, { "epoch": 11.088129496402878, "grad_norm": 0.31198832392692566, "learning_rate": 9.353496301545529e-05, "loss": 0.0123, "step": 12330 }, { "epoch": 11.097122302158274, "grad_norm": 0.32186269760131836, "learning_rate": 9.352140302692733e-05, "loss": 0.0129, "step": 12340 }, { "epoch": 11.10611510791367, "grad_norm": 0.26284506916999817, "learning_rate": 9.350782981774627e-05, "loss": 0.0126, "step": 12350 }, { "epoch": 11.115107913669064, "grad_norm": 0.24521109461784363, "learning_rate": 9.349424339203526e-05, "loss": 0.0122, "step": 12360 }, { "epoch": 11.12410071942446, "grad_norm": 0.3080753684043884, "learning_rate": 9.34806437539215e-05, "loss": 0.0123, "step": 12370 }, { "epoch": 11.133093525179856, "grad_norm": 0.297965407371521, "learning_rate": 9.346703090753622e-05, "loss": 0.0152, "step": 12380 }, { "epoch": 11.142086330935252, "grad_norm": 0.3192374110221863, "learning_rate": 9.345340485701461e-05, "loss": 0.015, "step": 12390 }, { "epoch": 11.151079136690647, "grad_norm": 0.24190212786197662, "learning_rate": 9.343976560649595e-05, "loss": 0.0105, "step": 12400 }, { "epoch": 11.160071942446043, "grad_norm": 0.2071496695280075, "learning_rate": 9.342611316012344e-05, "loss": 0.0112, "step": 12410 }, { "epoch": 11.16906474820144, "grad_norm": 0.31134024262428284, "learning_rate": 9.341244752204437e-05, "loss": 0.0135, "step": 12420 }, { "epoch": 11.178057553956835, "grad_norm": 0.18585632741451263, "learning_rate": 9.339876869640995e-05, "loss": 0.0105, "step": 12430 }, { "epoch": 11.18705035971223, "grad_norm": 0.29595234990119934, "learning_rate": 9.33850766873755e-05, "loss": 0.0143, "step": 12440 }, { "epoch": 11.196043165467627, "grad_norm": 0.32563674449920654, "learning_rate": 9.337137149910028e-05, "loss": 0.0148, "step": 12450 }, { "epoch": 11.20503597122302, "grad_norm": 0.37647899985313416, "learning_rate": 9.335765313574753e-05, "loss": 0.01, "step": 12460 }, { "epoch": 11.214028776978417, "grad_norm": 0.33283257484436035, "learning_rate": 9.334392160148457e-05, "loss": 0.0113, "step": 12470 }, { "epoch": 11.223021582733812, "grad_norm": 0.3157326877117157, "learning_rate": 9.333017690048264e-05, "loss": 0.0125, "step": 12480 }, { "epoch": 11.232014388489208, "grad_norm": 0.33587583899497986, "learning_rate": 9.331641903691706e-05, "loss": 0.0113, "step": 12490 }, { "epoch": 11.241007194244604, "grad_norm": 0.22489508986473083, "learning_rate": 9.330264801496707e-05, "loss": 0.0167, "step": 12500 }, { "epoch": 11.25, "grad_norm": 0.39643001556396484, "learning_rate": 9.328886383881594e-05, "loss": 0.0128, "step": 12510 }, { "epoch": 11.258992805755396, "grad_norm": 0.3053304851055145, "learning_rate": 9.327506651265095e-05, "loss": 0.0173, "step": 12520 }, { "epoch": 11.267985611510792, "grad_norm": 0.3682536482810974, "learning_rate": 9.326125604066338e-05, "loss": 0.0141, "step": 12530 }, { "epoch": 11.276978417266188, "grad_norm": 0.39529287815093994, "learning_rate": 9.324743242704847e-05, "loss": 0.0127, "step": 12540 }, { "epoch": 11.285971223021583, "grad_norm": 0.28252679109573364, "learning_rate": 9.323359567600546e-05, "loss": 0.0124, "step": 12550 }, { "epoch": 11.29496402877698, "grad_norm": 0.3636002242565155, "learning_rate": 9.321974579173761e-05, "loss": 0.0119, "step": 12560 }, { "epoch": 11.303956834532373, "grad_norm": 0.3012107014656067, "learning_rate": 9.320588277845213e-05, "loss": 0.0169, "step": 12570 }, { "epoch": 11.31294964028777, "grad_norm": 0.20072521269321442, "learning_rate": 9.319200664036026e-05, "loss": 0.0104, "step": 12580 }, { "epoch": 11.321942446043165, "grad_norm": 0.3015614151954651, "learning_rate": 9.31781173816772e-05, "loss": 0.0099, "step": 12590 }, { "epoch": 11.33093525179856, "grad_norm": 0.23644188046455383, "learning_rate": 9.316421500662212e-05, "loss": 0.0118, "step": 12600 }, { "epoch": 11.339928057553957, "grad_norm": 0.16920386254787445, "learning_rate": 9.31502995194182e-05, "loss": 0.0104, "step": 12610 }, { "epoch": 11.348920863309353, "grad_norm": 0.3346429467201233, "learning_rate": 9.31363709242926e-05, "loss": 0.0111, "step": 12620 }, { "epoch": 11.357913669064748, "grad_norm": 0.4135216772556305, "learning_rate": 9.312242922547647e-05, "loss": 0.0081, "step": 12630 }, { "epoch": 11.366906474820144, "grad_norm": 0.45017901062965393, "learning_rate": 9.310847442720492e-05, "loss": 0.0133, "step": 12640 }, { "epoch": 11.37589928057554, "grad_norm": 0.23184344172477722, "learning_rate": 9.309450653371706e-05, "loss": 0.0109, "step": 12650 }, { "epoch": 11.384892086330936, "grad_norm": 0.3771873116493225, "learning_rate": 9.308052554925595e-05, "loss": 0.0108, "step": 12660 }, { "epoch": 11.39388489208633, "grad_norm": 0.2714274525642395, "learning_rate": 9.306653147806867e-05, "loss": 0.0125, "step": 12670 }, { "epoch": 11.402877697841726, "grad_norm": 0.246804341673851, "learning_rate": 9.305252432440622e-05, "loss": 0.0104, "step": 12680 }, { "epoch": 11.411870503597122, "grad_norm": 0.25711458921432495, "learning_rate": 9.303850409252361e-05, "loss": 0.0106, "step": 12690 }, { "epoch": 11.420863309352518, "grad_norm": 0.33836284279823303, "learning_rate": 9.302447078667985e-05, "loss": 0.0093, "step": 12700 }, { "epoch": 11.429856115107913, "grad_norm": 0.3626708686351776, "learning_rate": 9.301042441113783e-05, "loss": 0.0104, "step": 12710 }, { "epoch": 11.43884892086331, "grad_norm": 0.2435702681541443, "learning_rate": 9.299636497016451e-05, "loss": 0.013, "step": 12720 }, { "epoch": 11.447841726618705, "grad_norm": 0.17190104722976685, "learning_rate": 9.298229246803076e-05, "loss": 0.0113, "step": 12730 }, { "epoch": 11.456834532374101, "grad_norm": 0.24551819264888763, "learning_rate": 9.296820690901144e-05, "loss": 0.0112, "step": 12740 }, { "epoch": 11.465827338129497, "grad_norm": 0.314388245344162, "learning_rate": 9.295410829738539e-05, "loss": 0.0104, "step": 12750 }, { "epoch": 11.474820143884893, "grad_norm": 0.35548990964889526, "learning_rate": 9.293999663743535e-05, "loss": 0.0102, "step": 12760 }, { "epoch": 11.483812949640289, "grad_norm": 0.27393218874931335, "learning_rate": 9.292587193344813e-05, "loss": 0.0129, "step": 12770 }, { "epoch": 11.492805755395683, "grad_norm": 0.31284913420677185, "learning_rate": 9.291173418971437e-05, "loss": 0.0116, "step": 12780 }, { "epoch": 11.501798561151078, "grad_norm": 0.19044901430606842, "learning_rate": 9.28975834105288e-05, "loss": 0.0111, "step": 12790 }, { "epoch": 11.510791366906474, "grad_norm": 0.3314090371131897, "learning_rate": 9.288341960019004e-05, "loss": 0.0111, "step": 12800 }, { "epoch": 11.51978417266187, "grad_norm": 0.35375353693962097, "learning_rate": 9.286924276300067e-05, "loss": 0.013, "step": 12810 }, { "epoch": 11.528776978417266, "grad_norm": 0.24853968620300293, "learning_rate": 9.285505290326726e-05, "loss": 0.0109, "step": 12820 }, { "epoch": 11.537769784172662, "grad_norm": 0.21913765370845795, "learning_rate": 9.284085002530027e-05, "loss": 0.0093, "step": 12830 }, { "epoch": 11.546762589928058, "grad_norm": 0.3071359395980835, "learning_rate": 9.282663413341422e-05, "loss": 0.0136, "step": 12840 }, { "epoch": 11.555755395683454, "grad_norm": 0.21924661099910736, "learning_rate": 9.281240523192747e-05, "loss": 0.0086, "step": 12850 }, { "epoch": 11.56474820143885, "grad_norm": 0.23317059874534607, "learning_rate": 9.279816332516242e-05, "loss": 0.0087, "step": 12860 }, { "epoch": 11.573741007194245, "grad_norm": 0.2833620309829712, "learning_rate": 9.278390841744536e-05, "loss": 0.012, "step": 12870 }, { "epoch": 11.582733812949641, "grad_norm": 0.2139812856912613, "learning_rate": 9.276964051310658e-05, "loss": 0.0097, "step": 12880 }, { "epoch": 11.591726618705035, "grad_norm": 0.15151986479759216, "learning_rate": 9.275535961648027e-05, "loss": 0.0086, "step": 12890 }, { "epoch": 11.600719424460431, "grad_norm": 0.19227783381938934, "learning_rate": 9.274106573190459e-05, "loss": 0.009, "step": 12900 }, { "epoch": 11.609712230215827, "grad_norm": 0.1431138813495636, "learning_rate": 9.272675886372168e-05, "loss": 0.0065, "step": 12910 }, { "epoch": 11.618705035971223, "grad_norm": 0.13629096746444702, "learning_rate": 9.271243901627754e-05, "loss": 0.0098, "step": 12920 }, { "epoch": 11.627697841726619, "grad_norm": 0.25266292691230774, "learning_rate": 9.269810619392219e-05, "loss": 0.0093, "step": 12930 }, { "epoch": 11.636690647482014, "grad_norm": 0.2840169370174408, "learning_rate": 9.268376040100955e-05, "loss": 0.016, "step": 12940 }, { "epoch": 11.64568345323741, "grad_norm": 0.2895908057689667, "learning_rate": 9.266940164189752e-05, "loss": 0.0101, "step": 12950 }, { "epoch": 11.654676258992806, "grad_norm": 0.3364458382129669, "learning_rate": 9.265502992094787e-05, "loss": 0.0109, "step": 12960 }, { "epoch": 11.663669064748202, "grad_norm": 0.22246134281158447, "learning_rate": 9.264064524252638e-05, "loss": 0.009, "step": 12970 }, { "epoch": 11.672661870503598, "grad_norm": 0.33445313572883606, "learning_rate": 9.262624761100271e-05, "loss": 0.0138, "step": 12980 }, { "epoch": 11.681654676258994, "grad_norm": 0.3012174069881439, "learning_rate": 9.261183703075051e-05, "loss": 0.0103, "step": 12990 }, { "epoch": 11.690647482014388, "grad_norm": 0.3368338346481323, "learning_rate": 9.259741350614733e-05, "loss": 0.0106, "step": 13000 }, { "epoch": 11.699640287769784, "grad_norm": 0.33622685074806213, "learning_rate": 9.258297704157464e-05, "loss": 0.0126, "step": 13010 }, { "epoch": 11.70863309352518, "grad_norm": 0.41708990931510925, "learning_rate": 9.256852764141786e-05, "loss": 0.0133, "step": 13020 }, { "epoch": 11.717625899280575, "grad_norm": 0.282265305519104, "learning_rate": 9.255406531006634e-05, "loss": 0.0094, "step": 13030 }, { "epoch": 11.726618705035971, "grad_norm": 0.3190333843231201, "learning_rate": 9.253959005191335e-05, "loss": 0.0157, "step": 13040 }, { "epoch": 11.735611510791367, "grad_norm": 0.2667068541049957, "learning_rate": 9.25251018713561e-05, "loss": 0.0143, "step": 13050 }, { "epoch": 11.744604316546763, "grad_norm": 0.25732487440109253, "learning_rate": 9.251060077279571e-05, "loss": 0.0091, "step": 13060 }, { "epoch": 11.753597122302159, "grad_norm": 0.28113222122192383, "learning_rate": 9.249608676063724e-05, "loss": 0.0114, "step": 13070 }, { "epoch": 11.762589928057555, "grad_norm": 0.26160579919815063, "learning_rate": 9.248155983928964e-05, "loss": 0.0146, "step": 13080 }, { "epoch": 11.77158273381295, "grad_norm": 0.33508169651031494, "learning_rate": 9.246702001316583e-05, "loss": 0.0121, "step": 13090 }, { "epoch": 11.780575539568346, "grad_norm": 0.22533823549747467, "learning_rate": 9.245246728668262e-05, "loss": 0.0121, "step": 13100 }, { "epoch": 11.78956834532374, "grad_norm": 0.3220086991786957, "learning_rate": 9.243790166426073e-05, "loss": 0.013, "step": 13110 }, { "epoch": 11.798561151079136, "grad_norm": 0.23271973431110382, "learning_rate": 9.242332315032484e-05, "loss": 0.01, "step": 13120 }, { "epoch": 11.807553956834532, "grad_norm": 0.21500380337238312, "learning_rate": 9.240873174930349e-05, "loss": 0.0159, "step": 13130 }, { "epoch": 11.816546762589928, "grad_norm": 0.27739930152893066, "learning_rate": 9.239412746562917e-05, "loss": 0.0141, "step": 13140 }, { "epoch": 11.825539568345324, "grad_norm": 0.26337817311286926, "learning_rate": 9.237951030373828e-05, "loss": 0.0173, "step": 13150 }, { "epoch": 11.83453237410072, "grad_norm": 0.3017718493938446, "learning_rate": 9.236488026807113e-05, "loss": 0.0171, "step": 13160 }, { "epoch": 11.843525179856115, "grad_norm": 0.42831382155418396, "learning_rate": 9.235023736307193e-05, "loss": 0.0123, "step": 13170 }, { "epoch": 11.852517985611511, "grad_norm": 0.3615286350250244, "learning_rate": 9.233558159318881e-05, "loss": 0.0123, "step": 13180 }, { "epoch": 11.861510791366907, "grad_norm": 0.2631225883960724, "learning_rate": 9.232091296287382e-05, "loss": 0.01, "step": 13190 }, { "epoch": 11.870503597122303, "grad_norm": 0.20997823774814606, "learning_rate": 9.230623147658288e-05, "loss": 0.0118, "step": 13200 }, { "epoch": 11.879496402877697, "grad_norm": 0.3187306821346283, "learning_rate": 9.229153713877586e-05, "loss": 0.0096, "step": 13210 }, { "epoch": 11.888489208633093, "grad_norm": 0.3426077365875244, "learning_rate": 9.227682995391649e-05, "loss": 0.0135, "step": 13220 }, { "epoch": 11.897482014388489, "grad_norm": 0.2792164087295532, "learning_rate": 9.226210992647243e-05, "loss": 0.0135, "step": 13230 }, { "epoch": 11.906474820143885, "grad_norm": 0.23161257803440094, "learning_rate": 9.224737706091525e-05, "loss": 0.0085, "step": 13240 }, { "epoch": 11.91546762589928, "grad_norm": 0.286446213722229, "learning_rate": 9.223263136172039e-05, "loss": 0.0122, "step": 13250 }, { "epoch": 11.924460431654676, "grad_norm": 0.35388582944869995, "learning_rate": 9.22178728333672e-05, "loss": 0.0143, "step": 13260 }, { "epoch": 11.933453237410072, "grad_norm": 0.2224084436893463, "learning_rate": 9.220310148033897e-05, "loss": 0.012, "step": 13270 }, { "epoch": 11.942446043165468, "grad_norm": 0.26409170031547546, "learning_rate": 9.21883173071228e-05, "loss": 0.0118, "step": 13280 }, { "epoch": 11.951438848920864, "grad_norm": 0.2579101324081421, "learning_rate": 9.217352031820976e-05, "loss": 0.0084, "step": 13290 }, { "epoch": 11.96043165467626, "grad_norm": 0.1572285145521164, "learning_rate": 9.215871051809477e-05, "loss": 0.0107, "step": 13300 }, { "epoch": 11.969424460431654, "grad_norm": 0.30348289012908936, "learning_rate": 9.214388791127666e-05, "loss": 0.0119, "step": 13310 }, { "epoch": 11.97841726618705, "grad_norm": 0.224385067820549, "learning_rate": 9.212905250225814e-05, "loss": 0.0108, "step": 13320 }, { "epoch": 11.987410071942445, "grad_norm": 0.2566951513290405, "learning_rate": 9.211420429554583e-05, "loss": 0.0108, "step": 13330 }, { "epoch": 11.996402877697841, "grad_norm": 0.25784942507743835, "learning_rate": 9.209934329565022e-05, "loss": 0.0086, "step": 13340 }, { "epoch": 12.005395683453237, "grad_norm": 0.3117499053478241, "learning_rate": 9.208446950708568e-05, "loss": 0.0136, "step": 13350 }, { "epoch": 12.014388489208633, "grad_norm": 0.2760503590106964, "learning_rate": 9.20695829343705e-05, "loss": 0.0104, "step": 13360 }, { "epoch": 12.023381294964029, "grad_norm": 0.18527504801750183, "learning_rate": 9.205468358202678e-05, "loss": 0.0098, "step": 13370 }, { "epoch": 12.032374100719425, "grad_norm": 0.3559057116508484, "learning_rate": 9.203977145458059e-05, "loss": 0.0133, "step": 13380 }, { "epoch": 12.04136690647482, "grad_norm": 0.20046019554138184, "learning_rate": 9.202484655656182e-05, "loss": 0.0104, "step": 13390 }, { "epoch": 12.050359712230216, "grad_norm": 0.2630917429924011, "learning_rate": 9.200990889250427e-05, "loss": 0.0128, "step": 13400 }, { "epoch": 12.059352517985612, "grad_norm": 0.305337518453598, "learning_rate": 9.19949584669456e-05, "loss": 0.01, "step": 13410 }, { "epoch": 12.068345323741006, "grad_norm": 0.2880862355232239, "learning_rate": 9.197999528442738e-05, "loss": 0.0123, "step": 13420 }, { "epoch": 12.077338129496402, "grad_norm": 0.2328377217054367, "learning_rate": 9.196501934949499e-05, "loss": 0.0108, "step": 13430 }, { "epoch": 12.086330935251798, "grad_norm": 0.2870248556137085, "learning_rate": 9.195003066669776e-05, "loss": 0.011, "step": 13440 }, { "epoch": 12.095323741007194, "grad_norm": 0.19087468087673187, "learning_rate": 9.193502924058884e-05, "loss": 0.0109, "step": 13450 }, { "epoch": 12.10431654676259, "grad_norm": 0.3854980766773224, "learning_rate": 9.192001507572526e-05, "loss": 0.0139, "step": 13460 }, { "epoch": 12.113309352517986, "grad_norm": 0.3473704755306244, "learning_rate": 9.190498817666793e-05, "loss": 0.0153, "step": 13470 }, { "epoch": 12.122302158273381, "grad_norm": 0.18961475789546967, "learning_rate": 9.188994854798163e-05, "loss": 0.0112, "step": 13480 }, { "epoch": 12.131294964028777, "grad_norm": 0.1754615306854248, "learning_rate": 9.187489619423499e-05, "loss": 0.0106, "step": 13490 }, { "epoch": 12.140287769784173, "grad_norm": 0.2505207359790802, "learning_rate": 9.185983112000056e-05, "loss": 0.0085, "step": 13500 }, { "epoch": 12.149280575539569, "grad_norm": 0.29082345962524414, "learning_rate": 9.184475332985464e-05, "loss": 0.0078, "step": 13510 }, { "epoch": 12.158273381294965, "grad_norm": 0.2682136595249176, "learning_rate": 9.182966282837754e-05, "loss": 0.0104, "step": 13520 }, { "epoch": 12.167266187050359, "grad_norm": 0.21248683333396912, "learning_rate": 9.18145596201533e-05, "loss": 0.0071, "step": 13530 }, { "epoch": 12.176258992805755, "grad_norm": 0.2628817558288574, "learning_rate": 9.179944370976991e-05, "loss": 0.0098, "step": 13540 }, { "epoch": 12.18525179856115, "grad_norm": 0.24359655380249023, "learning_rate": 9.178431510181918e-05, "loss": 0.0077, "step": 13550 }, { "epoch": 12.194244604316546, "grad_norm": 0.23162686824798584, "learning_rate": 9.176917380089675e-05, "loss": 0.0117, "step": 13560 }, { "epoch": 12.203237410071942, "grad_norm": 0.33795371651649475, "learning_rate": 9.175401981160219e-05, "loss": 0.0137, "step": 13570 }, { "epoch": 12.212230215827338, "grad_norm": 0.36170831322669983, "learning_rate": 9.173885313853885e-05, "loss": 0.0105, "step": 13580 }, { "epoch": 12.221223021582734, "grad_norm": 0.37192028760910034, "learning_rate": 9.172367378631398e-05, "loss": 0.0152, "step": 13590 }, { "epoch": 12.23021582733813, "grad_norm": 0.2804718315601349, "learning_rate": 9.170848175953866e-05, "loss": 0.0165, "step": 13600 }, { "epoch": 12.239208633093526, "grad_norm": 0.3750666677951813, "learning_rate": 9.169327706282784e-05, "loss": 0.0111, "step": 13610 }, { "epoch": 12.248201438848922, "grad_norm": 0.28287625312805176, "learning_rate": 9.167805970080029e-05, "loss": 0.0094, "step": 13620 }, { "epoch": 12.257194244604317, "grad_norm": 0.24645079672336578, "learning_rate": 9.166282967807864e-05, "loss": 0.0107, "step": 13630 }, { "epoch": 12.266187050359711, "grad_norm": 0.2636655271053314, "learning_rate": 9.16475869992894e-05, "loss": 0.0084, "step": 13640 }, { "epoch": 12.275179856115107, "grad_norm": 0.21057312190532684, "learning_rate": 9.163233166906284e-05, "loss": 0.0076, "step": 13650 }, { "epoch": 12.284172661870503, "grad_norm": 0.2044605314731598, "learning_rate": 9.161706369203317e-05, "loss": 0.0106, "step": 13660 }, { "epoch": 12.293165467625899, "grad_norm": 0.18185271322727203, "learning_rate": 9.16017830728384e-05, "loss": 0.007, "step": 13670 }, { "epoch": 12.302158273381295, "grad_norm": 0.19360226392745972, "learning_rate": 9.158648981612035e-05, "loss": 0.0109, "step": 13680 }, { "epoch": 12.31115107913669, "grad_norm": 0.31052419543266296, "learning_rate": 9.157118392652472e-05, "loss": 0.0102, "step": 13690 }, { "epoch": 12.320143884892087, "grad_norm": 0.1520543098449707, "learning_rate": 9.155586540870104e-05, "loss": 0.0076, "step": 13700 }, { "epoch": 12.329136690647482, "grad_norm": 0.29116037487983704, "learning_rate": 9.154053426730267e-05, "loss": 0.0095, "step": 13710 }, { "epoch": 12.338129496402878, "grad_norm": 0.19769594073295593, "learning_rate": 9.15251905069868e-05, "loss": 0.0116, "step": 13720 }, { "epoch": 12.347122302158274, "grad_norm": 0.1553727090358734, "learning_rate": 9.150983413241446e-05, "loss": 0.0096, "step": 13730 }, { "epoch": 12.35611510791367, "grad_norm": 0.18054333329200745, "learning_rate": 9.149446514825051e-05, "loss": 0.0115, "step": 13740 }, { "epoch": 12.365107913669064, "grad_norm": 0.27599233388900757, "learning_rate": 9.147908355916365e-05, "loss": 0.0126, "step": 13750 }, { "epoch": 12.37410071942446, "grad_norm": 0.14258936047554016, "learning_rate": 9.146368936982642e-05, "loss": 0.0121, "step": 13760 }, { "epoch": 12.383093525179856, "grad_norm": 0.16815675795078278, "learning_rate": 9.144828258491511e-05, "loss": 0.0098, "step": 13770 }, { "epoch": 12.392086330935252, "grad_norm": 0.16045396029949188, "learning_rate": 9.143286320910996e-05, "loss": 0.0095, "step": 13780 }, { "epoch": 12.401079136690647, "grad_norm": 0.23693057894706726, "learning_rate": 9.141743124709491e-05, "loss": 0.0086, "step": 13790 }, { "epoch": 12.410071942446043, "grad_norm": 0.32990503311157227, "learning_rate": 9.140198670355784e-05, "loss": 0.0105, "step": 13800 }, { "epoch": 12.41906474820144, "grad_norm": 0.18815839290618896, "learning_rate": 9.138652958319034e-05, "loss": 0.0128, "step": 13810 }, { "epoch": 12.428057553956835, "grad_norm": 0.2749486267566681, "learning_rate": 9.137105989068791e-05, "loss": 0.0089, "step": 13820 }, { "epoch": 12.43705035971223, "grad_norm": 0.27481767535209656, "learning_rate": 9.135557763074983e-05, "loss": 0.0159, "step": 13830 }, { "epoch": 12.446043165467627, "grad_norm": 0.29096826910972595, "learning_rate": 9.13400828080792e-05, "loss": 0.0134, "step": 13840 }, { "epoch": 12.45503597122302, "grad_norm": 0.4369473457336426, "learning_rate": 9.132457542738292e-05, "loss": 0.0097, "step": 13850 }, { "epoch": 12.464028776978417, "grad_norm": 0.21726003289222717, "learning_rate": 9.130905549337174e-05, "loss": 0.0085, "step": 13860 }, { "epoch": 12.473021582733812, "grad_norm": 0.24457351863384247, "learning_rate": 9.129352301076021e-05, "loss": 0.0094, "step": 13870 }, { "epoch": 12.482014388489208, "grad_norm": 0.1918737143278122, "learning_rate": 9.127797798426668e-05, "loss": 0.0084, "step": 13880 }, { "epoch": 12.491007194244604, "grad_norm": 0.29686737060546875, "learning_rate": 9.126242041861333e-05, "loss": 0.0135, "step": 13890 }, { "epoch": 12.5, "grad_norm": 0.20163454115390778, "learning_rate": 9.124685031852611e-05, "loss": 0.012, "step": 13900 }, { "epoch": 12.508992805755396, "grad_norm": 0.27067750692367554, "learning_rate": 9.123126768873482e-05, "loss": 0.0102, "step": 13910 }, { "epoch": 12.517985611510792, "grad_norm": 0.3164565861225128, "learning_rate": 9.121567253397308e-05, "loss": 0.0125, "step": 13920 }, { "epoch": 12.526978417266188, "grad_norm": 0.2647062838077545, "learning_rate": 9.120006485897824e-05, "loss": 0.0085, "step": 13930 }, { "epoch": 12.535971223021583, "grad_norm": 0.30503642559051514, "learning_rate": 9.118444466849152e-05, "loss": 0.0155, "step": 13940 }, { "epoch": 12.54496402877698, "grad_norm": 0.21309979259967804, "learning_rate": 9.116881196725793e-05, "loss": 0.0106, "step": 13950 }, { "epoch": 12.553956834532373, "grad_norm": 0.2041652947664261, "learning_rate": 9.115316676002627e-05, "loss": 0.0097, "step": 13960 }, { "epoch": 12.56294964028777, "grad_norm": 0.23564450442790985, "learning_rate": 9.113750905154911e-05, "loss": 0.0104, "step": 13970 }, { "epoch": 12.571942446043165, "grad_norm": 0.1929241269826889, "learning_rate": 9.112183884658289e-05, "loss": 0.0133, "step": 13980 }, { "epoch": 12.58093525179856, "grad_norm": 0.17238052189350128, "learning_rate": 9.11061561498878e-05, "loss": 0.0073, "step": 13990 }, { "epoch": 12.589928057553957, "grad_norm": 0.2703281342983246, "learning_rate": 9.109046096622779e-05, "loss": 0.012, "step": 14000 }, { "epoch": 12.598920863309353, "grad_norm": 0.39415988326072693, "learning_rate": 9.107475330037069e-05, "loss": 0.0097, "step": 14010 }, { "epoch": 12.607913669064748, "grad_norm": 0.28474584221839905, "learning_rate": 9.105903315708806e-05, "loss": 0.0108, "step": 14020 }, { "epoch": 12.616906474820144, "grad_norm": 0.2435186207294464, "learning_rate": 9.104330054115524e-05, "loss": 0.0085, "step": 14030 }, { "epoch": 12.62589928057554, "grad_norm": 0.2857704162597656, "learning_rate": 9.102755545735141e-05, "loss": 0.0082, "step": 14040 }, { "epoch": 12.634892086330936, "grad_norm": 0.23373746871948242, "learning_rate": 9.10117979104595e-05, "loss": 0.0105, "step": 14050 }, { "epoch": 12.64388489208633, "grad_norm": 0.2588748335838318, "learning_rate": 9.099602790526624e-05, "loss": 0.0117, "step": 14060 }, { "epoch": 12.652877697841726, "grad_norm": 0.241126149892807, "learning_rate": 9.098024544656212e-05, "loss": 0.0085, "step": 14070 }, { "epoch": 12.661870503597122, "grad_norm": 0.3402811884880066, "learning_rate": 9.096445053914148e-05, "loss": 0.0148, "step": 14080 }, { "epoch": 12.670863309352518, "grad_norm": 0.19609467685222626, "learning_rate": 9.094864318780236e-05, "loss": 0.012, "step": 14090 }, { "epoch": 12.679856115107913, "grad_norm": 0.38898661732673645, "learning_rate": 9.093282339734663e-05, "loss": 0.0188, "step": 14100 }, { "epoch": 12.68884892086331, "grad_norm": 0.2049136757850647, "learning_rate": 9.091699117257992e-05, "loss": 0.0104, "step": 14110 }, { "epoch": 12.697841726618705, "grad_norm": 0.3155696988105774, "learning_rate": 9.090114651831163e-05, "loss": 0.0097, "step": 14120 }, { "epoch": 12.706834532374101, "grad_norm": 0.30227550864219666, "learning_rate": 9.088528943935497e-05, "loss": 0.0141, "step": 14130 }, { "epoch": 12.715827338129497, "grad_norm": 0.25896155834198, "learning_rate": 9.086941994052689e-05, "loss": 0.009, "step": 14140 }, { "epoch": 12.724820143884893, "grad_norm": 0.28174546360969543, "learning_rate": 9.085353802664813e-05, "loss": 0.0092, "step": 14150 }, { "epoch": 12.733812949640289, "grad_norm": 0.3421269655227661, "learning_rate": 9.08376437025432e-05, "loss": 0.0096, "step": 14160 }, { "epoch": 12.742805755395683, "grad_norm": 0.2070046067237854, "learning_rate": 9.082173697304035e-05, "loss": 0.0102, "step": 14170 }, { "epoch": 12.751798561151078, "grad_norm": 0.23607303202152252, "learning_rate": 9.080581784297166e-05, "loss": 0.0071, "step": 14180 }, { "epoch": 12.760791366906474, "grad_norm": 0.3459584414958954, "learning_rate": 9.078988631717291e-05, "loss": 0.0111, "step": 14190 }, { "epoch": 12.76978417266187, "grad_norm": 0.20316506922245026, "learning_rate": 9.077394240048369e-05, "loss": 0.009, "step": 14200 }, { "epoch": 12.778776978417266, "grad_norm": 0.2850469946861267, "learning_rate": 9.075798609774736e-05, "loss": 0.0101, "step": 14210 }, { "epoch": 12.787769784172662, "grad_norm": 0.24583971500396729, "learning_rate": 9.0742017413811e-05, "loss": 0.0087, "step": 14220 }, { "epoch": 12.796762589928058, "grad_norm": 0.20220085978507996, "learning_rate": 9.072603635352548e-05, "loss": 0.0098, "step": 14230 }, { "epoch": 12.805755395683454, "grad_norm": 0.3006630837917328, "learning_rate": 9.071004292174541e-05, "loss": 0.0111, "step": 14240 }, { "epoch": 12.81474820143885, "grad_norm": 0.16808198392391205, "learning_rate": 9.06940371233292e-05, "loss": 0.0082, "step": 14250 }, { "epoch": 12.823741007194245, "grad_norm": 0.13336293399333954, "learning_rate": 9.067801896313898e-05, "loss": 0.0101, "step": 14260 }, { "epoch": 12.832733812949641, "grad_norm": 0.21232520043849945, "learning_rate": 9.066198844604064e-05, "loss": 0.0103, "step": 14270 }, { "epoch": 12.841726618705035, "grad_norm": 0.24588435888290405, "learning_rate": 9.06459455769038e-05, "loss": 0.0129, "step": 14280 }, { "epoch": 12.850719424460431, "grad_norm": 0.1370544731616974, "learning_rate": 9.062989036060193e-05, "loss": 0.0177, "step": 14290 }, { "epoch": 12.859712230215827, "grad_norm": 0.1558130830526352, "learning_rate": 9.061382280201212e-05, "loss": 0.0093, "step": 14300 }, { "epoch": 12.868705035971223, "grad_norm": 0.10084118694067001, "learning_rate": 9.059774290601528e-05, "loss": 0.0146, "step": 14310 }, { "epoch": 12.877697841726619, "grad_norm": 0.21640312671661377, "learning_rate": 9.058165067749606e-05, "loss": 0.0096, "step": 14320 }, { "epoch": 12.886690647482014, "grad_norm": 0.25056105852127075, "learning_rate": 9.056554612134288e-05, "loss": 0.012, "step": 14330 }, { "epoch": 12.89568345323741, "grad_norm": 0.2401287704706192, "learning_rate": 9.054942924244785e-05, "loss": 0.0092, "step": 14340 }, { "epoch": 12.904676258992806, "grad_norm": 0.19145110249519348, "learning_rate": 9.053330004570686e-05, "loss": 0.0117, "step": 14350 }, { "epoch": 12.913669064748202, "grad_norm": 0.17405930161476135, "learning_rate": 9.051715853601955e-05, "loss": 0.01, "step": 14360 }, { "epoch": 12.922661870503598, "grad_norm": 0.17510831356048584, "learning_rate": 9.050100471828926e-05, "loss": 0.0144, "step": 14370 }, { "epoch": 12.931654676258994, "grad_norm": 0.20305006206035614, "learning_rate": 9.048483859742311e-05, "loss": 0.0136, "step": 14380 }, { "epoch": 12.940647482014388, "grad_norm": 0.346632182598114, "learning_rate": 9.046866017833193e-05, "loss": 0.0097, "step": 14390 }, { "epoch": 12.949640287769784, "grad_norm": 0.24212735891342163, "learning_rate": 9.045246946593029e-05, "loss": 0.0107, "step": 14400 }, { "epoch": 12.95863309352518, "grad_norm": 0.2008792608976364, "learning_rate": 9.043626646513652e-05, "loss": 0.0092, "step": 14410 }, { "epoch": 12.967625899280575, "grad_norm": 0.22739380598068237, "learning_rate": 9.042005118087267e-05, "loss": 0.0074, "step": 14420 }, { "epoch": 12.976618705035971, "grad_norm": 0.2311946004629135, "learning_rate": 9.040382361806448e-05, "loss": 0.0122, "step": 14430 }, { "epoch": 12.985611510791367, "grad_norm": 0.10816383361816406, "learning_rate": 9.038758378164148e-05, "loss": 0.0122, "step": 14440 }, { "epoch": 12.994604316546763, "grad_norm": 0.22686362266540527, "learning_rate": 9.037133167653691e-05, "loss": 0.009, "step": 14450 }, { "epoch": 13.003597122302159, "grad_norm": 0.16753628849983215, "learning_rate": 9.035506730768771e-05, "loss": 0.0081, "step": 14460 }, { "epoch": 13.012589928057555, "grad_norm": 0.1826799064874649, "learning_rate": 9.033879068003458e-05, "loss": 0.0088, "step": 14470 }, { "epoch": 13.02158273381295, "grad_norm": 0.22235557436943054, "learning_rate": 9.032250179852193e-05, "loss": 0.0103, "step": 14480 }, { "epoch": 13.030575539568344, "grad_norm": 0.1648584008216858, "learning_rate": 9.030620066809787e-05, "loss": 0.0085, "step": 14490 }, { "epoch": 13.03956834532374, "grad_norm": 0.323970764875412, "learning_rate": 9.028988729371428e-05, "loss": 0.0093, "step": 14500 }, { "epoch": 13.048561151079136, "grad_norm": 0.14535276591777802, "learning_rate": 9.027356168032673e-05, "loss": 0.0087, "step": 14510 }, { "epoch": 13.057553956834532, "grad_norm": 0.27819716930389404, "learning_rate": 9.02572238328945e-05, "loss": 0.0116, "step": 14520 }, { "epoch": 13.066546762589928, "grad_norm": 0.32434654235839844, "learning_rate": 9.02408737563806e-05, "loss": 0.0105, "step": 14530 }, { "epoch": 13.075539568345324, "grad_norm": 0.24684856832027435, "learning_rate": 9.022451145575174e-05, "loss": 0.0101, "step": 14540 }, { "epoch": 13.08453237410072, "grad_norm": 0.2680242657661438, "learning_rate": 9.02081369359784e-05, "loss": 0.011, "step": 14550 }, { "epoch": 13.093525179856115, "grad_norm": 0.13902734220027924, "learning_rate": 9.019175020203465e-05, "loss": 0.0076, "step": 14560 }, { "epoch": 13.102517985611511, "grad_norm": 0.49047133326530457, "learning_rate": 9.017535125889842e-05, "loss": 0.0085, "step": 14570 }, { "epoch": 13.111510791366907, "grad_norm": 0.31220492720603943, "learning_rate": 9.015894011155124e-05, "loss": 0.0117, "step": 14580 }, { "epoch": 13.120503597122303, "grad_norm": 0.26327207684516907, "learning_rate": 9.014251676497838e-05, "loss": 0.0151, "step": 14590 }, { "epoch": 13.129496402877697, "grad_norm": 0.22892625629901886, "learning_rate": 9.012608122416884e-05, "loss": 0.0088, "step": 14600 }, { "epoch": 13.138489208633093, "grad_norm": 0.2166002243757248, "learning_rate": 9.010963349411529e-05, "loss": 0.0127, "step": 14610 }, { "epoch": 13.147482014388489, "grad_norm": 0.2246667444705963, "learning_rate": 9.00931735798141e-05, "loss": 0.0141, "step": 14620 }, { "epoch": 13.156474820143885, "grad_norm": 0.24801483750343323, "learning_rate": 9.00767014862654e-05, "loss": 0.0082, "step": 14630 }, { "epoch": 13.16546762589928, "grad_norm": 0.23890186846256256, "learning_rate": 9.006021721847295e-05, "loss": 0.0088, "step": 14640 }, { "epoch": 13.174460431654676, "grad_norm": 0.36600178480148315, "learning_rate": 9.004372078144423e-05, "loss": 0.0104, "step": 14650 }, { "epoch": 13.183453237410072, "grad_norm": 0.28168484568595886, "learning_rate": 9.002721218019043e-05, "loss": 0.0131, "step": 14660 }, { "epoch": 13.192446043165468, "grad_norm": 0.2854299545288086, "learning_rate": 9.001069141972642e-05, "loss": 0.0087, "step": 14670 }, { "epoch": 13.201438848920864, "grad_norm": 0.224836066365242, "learning_rate": 8.99941585050708e-05, "loss": 0.0112, "step": 14680 }, { "epoch": 13.21043165467626, "grad_norm": 0.24097320437431335, "learning_rate": 8.997761344124578e-05, "loss": 0.0091, "step": 14690 }, { "epoch": 13.219424460431656, "grad_norm": 0.19883672893047333, "learning_rate": 8.996105623327737e-05, "loss": 0.007, "step": 14700 }, { "epoch": 13.22841726618705, "grad_norm": 0.23801589012145996, "learning_rate": 8.994448688619517e-05, "loss": 0.008, "step": 14710 }, { "epoch": 13.237410071942445, "grad_norm": 0.23318731784820557, "learning_rate": 8.992790540503253e-05, "loss": 0.0099, "step": 14720 }, { "epoch": 13.246402877697841, "grad_norm": 0.19361697137355804, "learning_rate": 8.991131179482648e-05, "loss": 0.0072, "step": 14730 }, { "epoch": 13.255395683453237, "grad_norm": 0.17933930456638336, "learning_rate": 8.989470606061768e-05, "loss": 0.0098, "step": 14740 }, { "epoch": 13.264388489208633, "grad_norm": 0.2999407947063446, "learning_rate": 8.987808820745056e-05, "loss": 0.0125, "step": 14750 }, { "epoch": 13.273381294964029, "grad_norm": 0.36591336131095886, "learning_rate": 8.986145824037315e-05, "loss": 0.0092, "step": 14760 }, { "epoch": 13.282374100719425, "grad_norm": 0.2165016233921051, "learning_rate": 8.984481616443721e-05, "loss": 0.0147, "step": 14770 }, { "epoch": 13.29136690647482, "grad_norm": 0.21977642178535461, "learning_rate": 8.982816198469815e-05, "loss": 0.0081, "step": 14780 }, { "epoch": 13.300359712230216, "grad_norm": 0.22775013744831085, "learning_rate": 8.98114957062151e-05, "loss": 0.0111, "step": 14790 }, { "epoch": 13.309352517985612, "grad_norm": 0.2627594470977783, "learning_rate": 8.97948173340508e-05, "loss": 0.0084, "step": 14800 }, { "epoch": 13.318345323741006, "grad_norm": 0.21556589007377625, "learning_rate": 8.977812687327172e-05, "loss": 0.0072, "step": 14810 }, { "epoch": 13.327338129496402, "grad_norm": 0.22735264897346497, "learning_rate": 8.976142432894798e-05, "loss": 0.0092, "step": 14820 }, { "epoch": 13.336330935251798, "grad_norm": 0.12013321369886398, "learning_rate": 8.974470970615336e-05, "loss": 0.0117, "step": 14830 }, { "epoch": 13.345323741007194, "grad_norm": 0.2678423821926117, "learning_rate": 8.972798300996534e-05, "loss": 0.0097, "step": 14840 }, { "epoch": 13.35431654676259, "grad_norm": 0.1875368058681488, "learning_rate": 8.971124424546504e-05, "loss": 0.0071, "step": 14850 }, { "epoch": 13.363309352517986, "grad_norm": 0.21985968947410583, "learning_rate": 8.969449341773724e-05, "loss": 0.011, "step": 14860 }, { "epoch": 13.372302158273381, "grad_norm": 0.18170413374900818, "learning_rate": 8.967773053187042e-05, "loss": 0.0135, "step": 14870 }, { "epoch": 13.381294964028777, "grad_norm": 0.20118753612041473, "learning_rate": 8.966095559295668e-05, "loss": 0.0083, "step": 14880 }, { "epoch": 13.390287769784173, "grad_norm": 0.17952004075050354, "learning_rate": 8.964416860609184e-05, "loss": 0.0107, "step": 14890 }, { "epoch": 13.399280575539569, "grad_norm": 0.25507399439811707, "learning_rate": 8.962736957637532e-05, "loss": 0.0073, "step": 14900 }, { "epoch": 13.408273381294965, "grad_norm": 0.11578459292650223, "learning_rate": 8.96105585089102e-05, "loss": 0.0058, "step": 14910 }, { "epoch": 13.417266187050359, "grad_norm": 0.23176150023937225, "learning_rate": 8.959373540880329e-05, "loss": 0.0153, "step": 14920 }, { "epoch": 13.426258992805755, "grad_norm": 0.1885872185230255, "learning_rate": 8.957690028116495e-05, "loss": 0.0096, "step": 14930 }, { "epoch": 13.43525179856115, "grad_norm": 0.28859052062034607, "learning_rate": 8.956005313110928e-05, "loss": 0.0089, "step": 14940 }, { "epoch": 13.444244604316546, "grad_norm": 0.21117214858531952, "learning_rate": 8.9543193963754e-05, "loss": 0.0115, "step": 14950 }, { "epoch": 13.453237410071942, "grad_norm": 0.24410796165466309, "learning_rate": 8.952632278422048e-05, "loss": 0.0089, "step": 14960 }, { "epoch": 13.462230215827338, "grad_norm": 0.1855291873216629, "learning_rate": 8.95094395976337e-05, "loss": 0.0112, "step": 14970 }, { "epoch": 13.471223021582734, "grad_norm": 0.19723844528198242, "learning_rate": 8.949254440912239e-05, "loss": 0.0109, "step": 14980 }, { "epoch": 13.48021582733813, "grad_norm": 0.22044874727725983, "learning_rate": 8.94756372238188e-05, "loss": 0.0099, "step": 14990 }, { "epoch": 13.489208633093526, "grad_norm": 0.21296770870685577, "learning_rate": 8.945871804685892e-05, "loss": 0.0087, "step": 15000 }, { "epoch": 13.498201438848922, "grad_norm": 0.19711776077747345, "learning_rate": 8.944178688338236e-05, "loss": 0.0079, "step": 15010 }, { "epoch": 13.507194244604317, "grad_norm": 0.23346877098083496, "learning_rate": 8.942484373853233e-05, "loss": 0.0081, "step": 15020 }, { "epoch": 13.516187050359711, "grad_norm": 0.21997399628162384, "learning_rate": 8.940788861745572e-05, "loss": 0.0132, "step": 15030 }, { "epoch": 13.525179856115107, "grad_norm": 0.29945841431617737, "learning_rate": 8.939092152530308e-05, "loss": 0.0103, "step": 15040 }, { "epoch": 13.534172661870503, "grad_norm": 0.20732659101486206, "learning_rate": 8.937394246722853e-05, "loss": 0.0079, "step": 15050 }, { "epoch": 13.543165467625899, "grad_norm": 0.24984923005104065, "learning_rate": 8.935695144838984e-05, "loss": 0.013, "step": 15060 }, { "epoch": 13.552158273381295, "grad_norm": 0.24369576573371887, "learning_rate": 8.933994847394849e-05, "loss": 0.0103, "step": 15070 }, { "epoch": 13.56115107913669, "grad_norm": 0.32486504316329956, "learning_rate": 8.932293354906949e-05, "loss": 0.0084, "step": 15080 }, { "epoch": 13.570143884892087, "grad_norm": 0.2284381240606308, "learning_rate": 8.930590667892153e-05, "loss": 0.0107, "step": 15090 }, { "epoch": 13.579136690647482, "grad_norm": 0.23113864660263062, "learning_rate": 8.928886786867696e-05, "loss": 0.0092, "step": 15100 }, { "epoch": 13.588129496402878, "grad_norm": 0.18741704523563385, "learning_rate": 8.927181712351168e-05, "loss": 0.0081, "step": 15110 }, { "epoch": 13.597122302158274, "grad_norm": 0.33103638887405396, "learning_rate": 8.925475444860527e-05, "loss": 0.0106, "step": 15120 }, { "epoch": 13.60611510791367, "grad_norm": 0.21684114634990692, "learning_rate": 8.923767984914092e-05, "loss": 0.0071, "step": 15130 }, { "epoch": 13.615107913669064, "grad_norm": 0.32093897461891174, "learning_rate": 8.922059333030545e-05, "loss": 0.0144, "step": 15140 }, { "epoch": 13.62410071942446, "grad_norm": 0.2786828875541687, "learning_rate": 8.920349489728928e-05, "loss": 0.0096, "step": 15150 }, { "epoch": 13.633093525179856, "grad_norm": 0.18879196047782898, "learning_rate": 8.918638455528646e-05, "loss": 0.0135, "step": 15160 }, { "epoch": 13.642086330935252, "grad_norm": 0.21809358894824982, "learning_rate": 8.916926230949468e-05, "loss": 0.0112, "step": 15170 }, { "epoch": 13.651079136690647, "grad_norm": 0.26523900032043457, "learning_rate": 8.915212816511522e-05, "loss": 0.0103, "step": 15180 }, { "epoch": 13.660071942446043, "grad_norm": 0.2418147623538971, "learning_rate": 8.913498212735296e-05, "loss": 0.0121, "step": 15190 }, { "epoch": 13.66906474820144, "grad_norm": 0.28252798318862915, "learning_rate": 8.911782420141643e-05, "loss": 0.0108, "step": 15200 }, { "epoch": 13.678057553956835, "grad_norm": 0.3047530949115753, "learning_rate": 8.910065439251775e-05, "loss": 0.0084, "step": 15210 }, { "epoch": 13.68705035971223, "grad_norm": 0.3346084654331207, "learning_rate": 8.908347270587268e-05, "loss": 0.0094, "step": 15220 }, { "epoch": 13.696043165467627, "grad_norm": 0.37705302238464355, "learning_rate": 8.906627914670054e-05, "loss": 0.0092, "step": 15230 }, { "epoch": 13.70503597122302, "grad_norm": 0.2133893221616745, "learning_rate": 8.904907372022427e-05, "loss": 0.0087, "step": 15240 }, { "epoch": 13.714028776978417, "grad_norm": 0.39140135049819946, "learning_rate": 8.903185643167042e-05, "loss": 0.011, "step": 15250 }, { "epoch": 13.723021582733812, "grad_norm": 0.1597742885351181, "learning_rate": 8.901462728626919e-05, "loss": 0.0083, "step": 15260 }, { "epoch": 13.732014388489208, "grad_norm": 0.25233668088912964, "learning_rate": 8.899738628925429e-05, "loss": 0.0107, "step": 15270 }, { "epoch": 13.741007194244604, "grad_norm": 0.23387326300144196, "learning_rate": 8.898013344586312e-05, "loss": 0.0078, "step": 15280 }, { "epoch": 13.75, "grad_norm": 0.189239963889122, "learning_rate": 8.896286876133661e-05, "loss": 0.0157, "step": 15290 }, { "epoch": 13.758992805755396, "grad_norm": 0.3849700391292572, "learning_rate": 8.894559224091933e-05, "loss": 0.0103, "step": 15300 }, { "epoch": 13.767985611510792, "grad_norm": 0.2552902400493622, "learning_rate": 8.892830388985942e-05, "loss": 0.0121, "step": 15310 }, { "epoch": 13.776978417266188, "grad_norm": 0.23587624728679657, "learning_rate": 8.891100371340864e-05, "loss": 0.0083, "step": 15320 }, { "epoch": 13.785971223021583, "grad_norm": 0.29260411858558655, "learning_rate": 8.889369171682231e-05, "loss": 0.0139, "step": 15330 }, { "epoch": 13.79496402877698, "grad_norm": 0.2756161093711853, "learning_rate": 8.887636790535936e-05, "loss": 0.0085, "step": 15340 }, { "epoch": 13.803956834532373, "grad_norm": 0.14953972399234772, "learning_rate": 8.885903228428231e-05, "loss": 0.0076, "step": 15350 }, { "epoch": 13.81294964028777, "grad_norm": 0.27102598547935486, "learning_rate": 8.884168485885727e-05, "loss": 0.0098, "step": 15360 }, { "epoch": 13.821942446043165, "grad_norm": 0.15584927797317505, "learning_rate": 8.882432563435393e-05, "loss": 0.0099, "step": 15370 }, { "epoch": 13.83093525179856, "grad_norm": 0.20718149840831757, "learning_rate": 8.880695461604556e-05, "loss": 0.0078, "step": 15380 }, { "epoch": 13.839928057553957, "grad_norm": 0.24394866824150085, "learning_rate": 8.878957180920901e-05, "loss": 0.0095, "step": 15390 }, { "epoch": 13.848920863309353, "grad_norm": 0.28250232338905334, "learning_rate": 8.877217721912473e-05, "loss": 0.0106, "step": 15400 }, { "epoch": 13.857913669064748, "grad_norm": 0.1906736195087433, "learning_rate": 8.875477085107673e-05, "loss": 0.0103, "step": 15410 }, { "epoch": 13.866906474820144, "grad_norm": 0.29849734902381897, "learning_rate": 8.87373527103526e-05, "loss": 0.0119, "step": 15420 }, { "epoch": 13.87589928057554, "grad_norm": 0.15384213626384735, "learning_rate": 8.871992280224353e-05, "loss": 0.0077, "step": 15430 }, { "epoch": 13.884892086330936, "grad_norm": 0.23286916315555573, "learning_rate": 8.870248113204422e-05, "loss": 0.0084, "step": 15440 }, { "epoch": 13.89388489208633, "grad_norm": 0.21953396499156952, "learning_rate": 8.868502770505306e-05, "loss": 0.0111, "step": 15450 }, { "epoch": 13.902877697841726, "grad_norm": 0.16360275447368622, "learning_rate": 8.86675625265719e-05, "loss": 0.0122, "step": 15460 }, { "epoch": 13.911870503597122, "grad_norm": 0.16260723769664764, "learning_rate": 8.865008560190618e-05, "loss": 0.0081, "step": 15470 }, { "epoch": 13.920863309352518, "grad_norm": 0.16835299134254456, "learning_rate": 8.863259693636496e-05, "loss": 0.0109, "step": 15480 }, { "epoch": 13.929856115107913, "grad_norm": 0.3409527838230133, "learning_rate": 8.861509653526083e-05, "loss": 0.0095, "step": 15490 }, { "epoch": 13.93884892086331, "grad_norm": 0.18568190932273865, "learning_rate": 8.859758440390993e-05, "loss": 0.009, "step": 15500 }, { "epoch": 13.947841726618705, "grad_norm": 0.2534100413322449, "learning_rate": 8.858006054763202e-05, "loss": 0.0079, "step": 15510 }, { "epoch": 13.956834532374101, "grad_norm": 0.2773996591567993, "learning_rate": 8.856252497175035e-05, "loss": 0.0097, "step": 15520 }, { "epoch": 13.965827338129497, "grad_norm": 0.2251114845275879, "learning_rate": 8.854497768159178e-05, "loss": 0.006, "step": 15530 }, { "epoch": 13.974820143884893, "grad_norm": 0.27074968814849854, "learning_rate": 8.852741868248671e-05, "loss": 0.0087, "step": 15540 }, { "epoch": 13.983812949640289, "grad_norm": 0.27773186564445496, "learning_rate": 8.85098479797691e-05, "loss": 0.0102, "step": 15550 }, { "epoch": 13.992805755395683, "grad_norm": 0.3692442774772644, "learning_rate": 8.849226557877646e-05, "loss": 0.0104, "step": 15560 }, { "epoch": 14.001798561151078, "grad_norm": 0.36565524339675903, "learning_rate": 8.84746714848499e-05, "loss": 0.0101, "step": 15570 }, { "epoch": 14.010791366906474, "grad_norm": 0.19361039996147156, "learning_rate": 8.845706570333397e-05, "loss": 0.0089, "step": 15580 }, { "epoch": 14.01978417266187, "grad_norm": 0.2092127948999405, "learning_rate": 8.84394482395769e-05, "loss": 0.0118, "step": 15590 }, { "epoch": 14.028776978417266, "grad_norm": 0.23099902272224426, "learning_rate": 8.842181909893038e-05, "loss": 0.0071, "step": 15600 }, { "epoch": 14.037769784172662, "grad_norm": 0.13463933765888214, "learning_rate": 8.840417828674969e-05, "loss": 0.0084, "step": 15610 }, { "epoch": 14.046762589928058, "grad_norm": 0.23576462268829346, "learning_rate": 8.838652580839364e-05, "loss": 0.0124, "step": 15620 }, { "epoch": 14.055755395683454, "grad_norm": 0.3456405699253082, "learning_rate": 8.836886166922458e-05, "loss": 0.0179, "step": 15630 }, { "epoch": 14.06474820143885, "grad_norm": 0.3224973976612091, "learning_rate": 8.835118587460844e-05, "loss": 0.0102, "step": 15640 }, { "epoch": 14.073741007194245, "grad_norm": 0.27607667446136475, "learning_rate": 8.83334984299146e-05, "loss": 0.0143, "step": 15650 }, { "epoch": 14.082733812949641, "grad_norm": 0.284016877412796, "learning_rate": 8.83157993405161e-05, "loss": 0.0164, "step": 15660 }, { "epoch": 14.091726618705035, "grad_norm": 0.384288489818573, "learning_rate": 8.829808861178943e-05, "loss": 0.0114, "step": 15670 }, { "epoch": 14.100719424460431, "grad_norm": 0.30378687381744385, "learning_rate": 8.828036624911464e-05, "loss": 0.0135, "step": 15680 }, { "epoch": 14.109712230215827, "grad_norm": 0.29116079211235046, "learning_rate": 8.826263225787532e-05, "loss": 0.0114, "step": 15690 }, { "epoch": 14.118705035971223, "grad_norm": 0.21708661317825317, "learning_rate": 8.824488664345858e-05, "loss": 0.009, "step": 15700 }, { "epoch": 14.127697841726619, "grad_norm": 0.4630233347415924, "learning_rate": 8.822712941125508e-05, "loss": 0.0102, "step": 15710 }, { "epoch": 14.136690647482014, "grad_norm": 0.36720800399780273, "learning_rate": 8.820936056665898e-05, "loss": 0.0114, "step": 15720 }, { "epoch": 14.14568345323741, "grad_norm": 0.28893449902534485, "learning_rate": 8.819158011506801e-05, "loss": 0.01, "step": 15730 }, { "epoch": 14.154676258992806, "grad_norm": 0.18834009766578674, "learning_rate": 8.81737880618834e-05, "loss": 0.0086, "step": 15740 }, { "epoch": 14.163669064748202, "grad_norm": 0.2027776688337326, "learning_rate": 8.815598441250987e-05, "loss": 0.0125, "step": 15750 }, { "epoch": 14.172661870503598, "grad_norm": 0.20171527564525604, "learning_rate": 8.813816917235576e-05, "loss": 0.0088, "step": 15760 }, { "epoch": 14.181654676258994, "grad_norm": 0.24785448610782623, "learning_rate": 8.812034234683282e-05, "loss": 0.0106, "step": 15770 }, { "epoch": 14.190647482014388, "grad_norm": 0.25504693388938904, "learning_rate": 8.810250394135637e-05, "loss": 0.0088, "step": 15780 }, { "epoch": 14.199640287769784, "grad_norm": 0.20937517285346985, "learning_rate": 8.808465396134529e-05, "loss": 0.0106, "step": 15790 }, { "epoch": 14.20863309352518, "grad_norm": 0.2717573046684265, "learning_rate": 8.806679241222189e-05, "loss": 0.0083, "step": 15800 }, { "epoch": 14.217625899280575, "grad_norm": 0.4332013428211212, "learning_rate": 8.804891929941203e-05, "loss": 0.0136, "step": 15810 }, { "epoch": 14.226618705035971, "grad_norm": 0.2621344029903412, "learning_rate": 8.803103462834514e-05, "loss": 0.0113, "step": 15820 }, { "epoch": 14.235611510791367, "grad_norm": 0.28780099749565125, "learning_rate": 8.801313840445408e-05, "loss": 0.0083, "step": 15830 }, { "epoch": 14.244604316546763, "grad_norm": 0.24608999490737915, "learning_rate": 8.799523063317524e-05, "loss": 0.0095, "step": 15840 }, { "epoch": 14.253597122302159, "grad_norm": 0.2050783336162567, "learning_rate": 8.797731131994854e-05, "loss": 0.0076, "step": 15850 }, { "epoch": 14.262589928057555, "grad_norm": 0.36490964889526367, "learning_rate": 8.795938047021739e-05, "loss": 0.013, "step": 15860 }, { "epoch": 14.27158273381295, "grad_norm": 0.23230399191379547, "learning_rate": 8.794143808942872e-05, "loss": 0.0096, "step": 15870 }, { "epoch": 14.280575539568344, "grad_norm": 0.25461915135383606, "learning_rate": 8.792348418303296e-05, "loss": 0.0096, "step": 15880 }, { "epoch": 14.28956834532374, "grad_norm": 0.2631610929965973, "learning_rate": 8.790551875648398e-05, "loss": 0.0137, "step": 15890 }, { "epoch": 14.298561151079136, "grad_norm": 0.19819195568561554, "learning_rate": 8.788754181523926e-05, "loss": 0.0143, "step": 15900 }, { "epoch": 14.307553956834532, "grad_norm": 0.29641619324684143, "learning_rate": 8.78695533647597e-05, "loss": 0.0155, "step": 15910 }, { "epoch": 14.316546762589928, "grad_norm": 0.32720762491226196, "learning_rate": 8.785155341050972e-05, "loss": 0.017, "step": 15920 }, { "epoch": 14.325539568345324, "grad_norm": 0.20749834179878235, "learning_rate": 8.783354195795721e-05, "loss": 0.0114, "step": 15930 }, { "epoch": 14.33453237410072, "grad_norm": 0.3097092807292938, "learning_rate": 8.78155190125736e-05, "loss": 0.0115, "step": 15940 }, { "epoch": 14.343525179856115, "grad_norm": 0.2234681397676468, "learning_rate": 8.779748457983378e-05, "loss": 0.0153, "step": 15950 }, { "epoch": 14.352517985611511, "grad_norm": 0.18416546285152435, "learning_rate": 8.777943866521612e-05, "loss": 0.0184, "step": 15960 }, { "epoch": 14.361510791366907, "grad_norm": 0.24046848714351654, "learning_rate": 8.77613812742025e-05, "loss": 0.0075, "step": 15970 }, { "epoch": 14.370503597122303, "grad_norm": 0.25480911135673523, "learning_rate": 8.774331241227829e-05, "loss": 0.0127, "step": 15980 }, { "epoch": 14.379496402877697, "grad_norm": 0.22797228395938873, "learning_rate": 8.772523208493232e-05, "loss": 0.0076, "step": 15990 }, { "epoch": 14.388489208633093, "grad_norm": 0.1598351001739502, "learning_rate": 8.770714029765692e-05, "loss": 0.0116, "step": 16000 }, { "epoch": 14.397482014388489, "grad_norm": 0.3052712678909302, "learning_rate": 8.768903705594789e-05, "loss": 0.0116, "step": 16010 }, { "epoch": 14.406474820143885, "grad_norm": 0.2467009425163269, "learning_rate": 8.767092236530453e-05, "loss": 0.0135, "step": 16020 }, { "epoch": 14.41546762589928, "grad_norm": 0.31480106711387634, "learning_rate": 8.76527962312296e-05, "loss": 0.0134, "step": 16030 }, { "epoch": 14.424460431654676, "grad_norm": 0.1215977743268013, "learning_rate": 8.763465865922934e-05, "loss": 0.0075, "step": 16040 }, { "epoch": 14.433453237410072, "grad_norm": 0.23673006892204285, "learning_rate": 8.761650965481347e-05, "loss": 0.012, "step": 16050 }, { "epoch": 14.442446043165468, "grad_norm": 0.248163640499115, "learning_rate": 8.759834922349516e-05, "loss": 0.0086, "step": 16060 }, { "epoch": 14.451438848920864, "grad_norm": 0.18592208623886108, "learning_rate": 8.758017737079108e-05, "loss": 0.0107, "step": 16070 }, { "epoch": 14.46043165467626, "grad_norm": 0.19655850529670715, "learning_rate": 8.756199410222137e-05, "loss": 0.0072, "step": 16080 }, { "epoch": 14.469424460431656, "grad_norm": 0.1848985105752945, "learning_rate": 8.754379942330963e-05, "loss": 0.0085, "step": 16090 }, { "epoch": 14.47841726618705, "grad_norm": 0.1349622905254364, "learning_rate": 8.75255933395829e-05, "loss": 0.0077, "step": 16100 }, { "epoch": 14.487410071942445, "grad_norm": 0.212606281042099, "learning_rate": 8.750737585657171e-05, "loss": 0.0091, "step": 16110 }, { "epoch": 14.496402877697841, "grad_norm": 0.24046237766742706, "learning_rate": 8.748914697981008e-05, "loss": 0.0092, "step": 16120 }, { "epoch": 14.505395683453237, "grad_norm": 0.22921480238437653, "learning_rate": 8.747090671483542e-05, "loss": 0.0099, "step": 16130 }, { "epoch": 14.514388489208633, "grad_norm": 0.24745425581932068, "learning_rate": 8.745265506718869e-05, "loss": 0.0069, "step": 16140 }, { "epoch": 14.523381294964029, "grad_norm": 0.23718206584453583, "learning_rate": 8.74343920424142e-05, "loss": 0.0094, "step": 16150 }, { "epoch": 14.532374100719425, "grad_norm": 0.28338295221328735, "learning_rate": 8.741611764605982e-05, "loss": 0.0099, "step": 16160 }, { "epoch": 14.54136690647482, "grad_norm": 0.11529185622930527, "learning_rate": 8.739783188367682e-05, "loss": 0.0096, "step": 16170 }, { "epoch": 14.550359712230216, "grad_norm": 0.26860231161117554, "learning_rate": 8.737953476081991e-05, "loss": 0.008, "step": 16180 }, { "epoch": 14.559352517985612, "grad_norm": 0.25125572085380554, "learning_rate": 8.73612262830473e-05, "loss": 0.0085, "step": 16190 }, { "epoch": 14.568345323741006, "grad_norm": 0.17446546256542206, "learning_rate": 8.734290645592061e-05, "loss": 0.0073, "step": 16200 }, { "epoch": 14.577338129496402, "grad_norm": 0.25165683031082153, "learning_rate": 8.732457528500493e-05, "loss": 0.0111, "step": 16210 }, { "epoch": 14.586330935251798, "grad_norm": 0.24331501126289368, "learning_rate": 8.730623277586875e-05, "loss": 0.0099, "step": 16220 }, { "epoch": 14.595323741007194, "grad_norm": 0.30370935797691345, "learning_rate": 8.72878789340841e-05, "loss": 0.0088, "step": 16230 }, { "epoch": 14.60431654676259, "grad_norm": 0.2181074619293213, "learning_rate": 8.726951376522635e-05, "loss": 0.011, "step": 16240 }, { "epoch": 14.613309352517986, "grad_norm": 0.2654135525226593, "learning_rate": 8.725113727487435e-05, "loss": 0.0093, "step": 16250 }, { "epoch": 14.622302158273381, "grad_norm": 0.21956942975521088, "learning_rate": 8.723274946861042e-05, "loss": 0.008, "step": 16260 }, { "epoch": 14.631294964028777, "grad_norm": 0.2622988522052765, "learning_rate": 8.721435035202026e-05, "loss": 0.0075, "step": 16270 }, { "epoch": 14.640287769784173, "grad_norm": 0.16247163712978363, "learning_rate": 8.719593993069306e-05, "loss": 0.011, "step": 16280 }, { "epoch": 14.649280575539569, "grad_norm": 0.3344869017601013, "learning_rate": 8.717751821022139e-05, "loss": 0.0089, "step": 16290 }, { "epoch": 14.658273381294965, "grad_norm": 0.9387147426605225, "learning_rate": 8.715908519620134e-05, "loss": 0.0096, "step": 16300 }, { "epoch": 14.667266187050359, "grad_norm": 0.3790053129196167, "learning_rate": 8.71406408942323e-05, "loss": 0.0103, "step": 16310 }, { "epoch": 14.676258992805755, "grad_norm": 0.16015313565731049, "learning_rate": 8.712218530991723e-05, "loss": 0.0086, "step": 16320 }, { "epoch": 14.68525179856115, "grad_norm": 0.22222672402858734, "learning_rate": 8.710371844886241e-05, "loss": 0.0089, "step": 16330 }, { "epoch": 14.694244604316546, "grad_norm": 0.20094147324562073, "learning_rate": 8.708524031667758e-05, "loss": 0.0092, "step": 16340 }, { "epoch": 14.703237410071942, "grad_norm": 0.29420533776283264, "learning_rate": 8.706675091897592e-05, "loss": 0.0118, "step": 16350 }, { "epoch": 14.712230215827338, "grad_norm": 0.2286561280488968, "learning_rate": 8.704825026137404e-05, "loss": 0.0124, "step": 16360 }, { "epoch": 14.721223021582734, "grad_norm": 0.21900662779808044, "learning_rate": 8.702973834949192e-05, "loss": 0.0103, "step": 16370 }, { "epoch": 14.73021582733813, "grad_norm": 0.19122926890850067, "learning_rate": 8.701121518895301e-05, "loss": 0.0091, "step": 16380 }, { "epoch": 14.739208633093526, "grad_norm": 0.3055160343647003, "learning_rate": 8.699268078538414e-05, "loss": 0.0117, "step": 16390 }, { "epoch": 14.748201438848922, "grad_norm": 0.19232231378555298, "learning_rate": 8.69741351444156e-05, "loss": 0.0077, "step": 16400 }, { "epoch": 14.757194244604317, "grad_norm": 0.23684845864772797, "learning_rate": 8.695557827168101e-05, "loss": 0.0095, "step": 16410 }, { "epoch": 14.766187050359711, "grad_norm": 0.25088605284690857, "learning_rate": 8.693701017281753e-05, "loss": 0.0129, "step": 16420 }, { "epoch": 14.775179856115107, "grad_norm": 0.36254972219467163, "learning_rate": 8.691843085346563e-05, "loss": 0.0142, "step": 16430 }, { "epoch": 14.784172661870503, "grad_norm": 0.18660421669483185, "learning_rate": 8.689984031926919e-05, "loss": 0.01, "step": 16440 }, { "epoch": 14.793165467625899, "grad_norm": 0.1448613703250885, "learning_rate": 8.688123857587555e-05, "loss": 0.01, "step": 16450 }, { "epoch": 14.802158273381295, "grad_norm": 0.2939911484718323, "learning_rate": 8.686262562893544e-05, "loss": 0.0114, "step": 16460 }, { "epoch": 14.81115107913669, "grad_norm": 0.19714082777500153, "learning_rate": 8.684400148410294e-05, "loss": 0.0089, "step": 16470 }, { "epoch": 14.820143884892087, "grad_norm": 0.2376938909292221, "learning_rate": 8.682536614703562e-05, "loss": 0.0099, "step": 16480 }, { "epoch": 14.829136690647482, "grad_norm": 0.20830823481082916, "learning_rate": 8.680671962339437e-05, "loss": 0.0089, "step": 16490 }, { "epoch": 14.838129496402878, "grad_norm": 0.17519380152225494, "learning_rate": 8.678806191884352e-05, "loss": 0.01, "step": 16500 }, { "epoch": 14.847122302158274, "grad_norm": 0.15907299518585205, "learning_rate": 8.67693930390508e-05, "loss": 0.0089, "step": 16510 }, { "epoch": 14.85611510791367, "grad_norm": 0.17566095292568207, "learning_rate": 8.67507129896873e-05, "loss": 0.0074, "step": 16520 }, { "epoch": 14.865107913669064, "grad_norm": 0.2879117727279663, "learning_rate": 8.673202177642757e-05, "loss": 0.0143, "step": 16530 }, { "epoch": 14.87410071942446, "grad_norm": 0.2231319397687912, "learning_rate": 8.671331940494945e-05, "loss": 0.0166, "step": 16540 }, { "epoch": 14.883093525179856, "grad_norm": 0.30664315819740295, "learning_rate": 8.669460588093427e-05, "loss": 0.0113, "step": 16550 }, { "epoch": 14.892086330935252, "grad_norm": 0.1992950439453125, "learning_rate": 8.667588121006667e-05, "loss": 0.009, "step": 16560 }, { "epoch": 14.901079136690647, "grad_norm": 0.27568063139915466, "learning_rate": 8.665714539803475e-05, "loss": 0.0132, "step": 16570 }, { "epoch": 14.910071942446043, "grad_norm": 0.28151875734329224, "learning_rate": 8.663839845052993e-05, "loss": 0.008, "step": 16580 }, { "epoch": 14.91906474820144, "grad_norm": 0.28875333070755005, "learning_rate": 8.661964037324703e-05, "loss": 0.01, "step": 16590 }, { "epoch": 14.928057553956835, "grad_norm": 0.2500198483467102, "learning_rate": 8.660087117188427e-05, "loss": 0.0072, "step": 16600 }, { "epoch": 14.93705035971223, "grad_norm": 0.2570837140083313, "learning_rate": 8.658209085214325e-05, "loss": 0.0117, "step": 16610 }, { "epoch": 14.946043165467627, "grad_norm": 0.23440177738666534, "learning_rate": 8.656329941972891e-05, "loss": 0.0152, "step": 16620 }, { "epoch": 14.95503597122302, "grad_norm": 0.13475443422794342, "learning_rate": 8.654449688034963e-05, "loss": 0.0094, "step": 16630 }, { "epoch": 14.964028776978417, "grad_norm": 0.2867593467235565, "learning_rate": 8.652568323971706e-05, "loss": 0.0112, "step": 16640 }, { "epoch": 14.973021582733812, "grad_norm": 0.26289933919906616, "learning_rate": 8.650685850354636e-05, "loss": 0.009, "step": 16650 }, { "epoch": 14.982014388489208, "grad_norm": 0.2859683930873871, "learning_rate": 8.648802267755593e-05, "loss": 0.0112, "step": 16660 }, { "epoch": 14.991007194244604, "grad_norm": 0.24444670975208282, "learning_rate": 8.646917576746764e-05, "loss": 0.0094, "step": 16670 }, { "epoch": 15.0, "grad_norm": 0.22334103286266327, "learning_rate": 8.645031777900666e-05, "loss": 0.0078, "step": 16680 }, { "epoch": 15.008992805755396, "grad_norm": 0.3318159282207489, "learning_rate": 8.643144871790154e-05, "loss": 0.0118, "step": 16690 }, { "epoch": 15.017985611510792, "grad_norm": 0.2716572880744934, "learning_rate": 8.641256858988424e-05, "loss": 0.0101, "step": 16700 }, { "epoch": 15.026978417266188, "grad_norm": 0.2786824703216553, "learning_rate": 8.639367740069e-05, "loss": 0.0093, "step": 16710 }, { "epoch": 15.035971223021583, "grad_norm": 0.25238606333732605, "learning_rate": 8.63747751560575e-05, "loss": 0.01, "step": 16720 }, { "epoch": 15.04496402877698, "grad_norm": 0.1376042366027832, "learning_rate": 8.635586186172871e-05, "loss": 0.0074, "step": 16730 }, { "epoch": 15.053956834532373, "grad_norm": 0.20201601088047028, "learning_rate": 8.633693752344902e-05, "loss": 0.0075, "step": 16740 }, { "epoch": 15.06294964028777, "grad_norm": 0.20235979557037354, "learning_rate": 8.631800214696713e-05, "loss": 0.0091, "step": 16750 }, { "epoch": 15.071942446043165, "grad_norm": 0.22378218173980713, "learning_rate": 8.629905573803511e-05, "loss": 0.01, "step": 16760 }, { "epoch": 15.08093525179856, "grad_norm": 0.176409050822258, "learning_rate": 8.628009830240839e-05, "loss": 0.0093, "step": 16770 }, { "epoch": 15.089928057553957, "grad_norm": 0.179216206073761, "learning_rate": 8.626112984584571e-05, "loss": 0.0111, "step": 16780 }, { "epoch": 15.098920863309353, "grad_norm": 0.22382314503192902, "learning_rate": 8.62421503741092e-05, "loss": 0.0092, "step": 16790 }, { "epoch": 15.107913669064748, "grad_norm": 0.14575129747390747, "learning_rate": 8.622315989296432e-05, "loss": 0.0062, "step": 16800 }, { "epoch": 15.116906474820144, "grad_norm": 0.1707952469587326, "learning_rate": 8.62041584081799e-05, "loss": 0.0094, "step": 16810 }, { "epoch": 15.12589928057554, "grad_norm": 0.19743041694164276, "learning_rate": 8.618514592552807e-05, "loss": 0.008, "step": 16820 }, { "epoch": 15.134892086330936, "grad_norm": 0.14477184414863586, "learning_rate": 8.616612245078431e-05, "loss": 0.007, "step": 16830 }, { "epoch": 15.14388489208633, "grad_norm": 0.1631578803062439, "learning_rate": 8.614708798972746e-05, "loss": 0.0093, "step": 16840 }, { "epoch": 15.152877697841726, "grad_norm": 0.17137566208839417, "learning_rate": 8.61280425481397e-05, "loss": 0.0065, "step": 16850 }, { "epoch": 15.161870503597122, "grad_norm": 0.20287683606147766, "learning_rate": 8.61089861318065e-05, "loss": 0.0083, "step": 16860 }, { "epoch": 15.170863309352518, "grad_norm": 0.19125793874263763, "learning_rate": 8.608991874651673e-05, "loss": 0.0053, "step": 16870 }, { "epoch": 15.179856115107913, "grad_norm": 0.2212005853652954, "learning_rate": 8.607084039806255e-05, "loss": 0.009, "step": 16880 }, { "epoch": 15.18884892086331, "grad_norm": 0.22509285807609558, "learning_rate": 8.605175109223944e-05, "loss": 0.0085, "step": 16890 }, { "epoch": 15.197841726618705, "grad_norm": 0.3070281744003296, "learning_rate": 8.603265083484624e-05, "loss": 0.0087, "step": 16900 }, { "epoch": 15.206834532374101, "grad_norm": 0.15719740092754364, "learning_rate": 8.60135396316851e-05, "loss": 0.0069, "step": 16910 }, { "epoch": 15.215827338129497, "grad_norm": 0.15637461841106415, "learning_rate": 8.599441748856152e-05, "loss": 0.0078, "step": 16920 }, { "epoch": 15.224820143884893, "grad_norm": 0.16382500529289246, "learning_rate": 8.597528441128427e-05, "loss": 0.0091, "step": 16930 }, { "epoch": 15.233812949640289, "grad_norm": 0.1859724223613739, "learning_rate": 8.595614040566549e-05, "loss": 0.007, "step": 16940 }, { "epoch": 15.242805755395683, "grad_norm": 0.24146762490272522, "learning_rate": 8.593698547752063e-05, "loss": 0.0092, "step": 16950 }, { "epoch": 15.251798561151078, "grad_norm": 0.3097940981388092, "learning_rate": 8.591781963266843e-05, "loss": 0.0177, "step": 16960 }, { "epoch": 15.260791366906474, "grad_norm": 0.21166419982910156, "learning_rate": 8.5898642876931e-05, "loss": 0.0081, "step": 16970 }, { "epoch": 15.26978417266187, "grad_norm": 0.3047437369823456, "learning_rate": 8.587945521613369e-05, "loss": 0.0145, "step": 16980 }, { "epoch": 15.278776978417266, "grad_norm": 0.25072142481803894, "learning_rate": 8.586025665610524e-05, "loss": 0.0073, "step": 16990 }, { "epoch": 15.287769784172662, "grad_norm": 0.23246440291404724, "learning_rate": 8.584104720267765e-05, "loss": 0.009, "step": 17000 }, { "epoch": 15.296762589928058, "grad_norm": 0.213529571890831, "learning_rate": 8.582182686168625e-05, "loss": 0.0084, "step": 17010 }, { "epoch": 15.305755395683454, "grad_norm": 0.19532375037670135, "learning_rate": 8.580259563896967e-05, "loss": 0.0094, "step": 17020 }, { "epoch": 15.31474820143885, "grad_norm": 0.2593597173690796, "learning_rate": 8.578335354036983e-05, "loss": 0.0091, "step": 17030 }, { "epoch": 15.323741007194245, "grad_norm": 0.1851549744606018, "learning_rate": 8.576410057173201e-05, "loss": 0.0085, "step": 17040 }, { "epoch": 15.332733812949641, "grad_norm": 0.22984474897384644, "learning_rate": 8.574483673890474e-05, "loss": 0.0078, "step": 17050 }, { "epoch": 15.341726618705035, "grad_norm": 0.2870316803455353, "learning_rate": 8.572556204773983e-05, "loss": 0.0067, "step": 17060 }, { "epoch": 15.350719424460431, "grad_norm": 0.21007724106311798, "learning_rate": 8.570627650409246e-05, "loss": 0.0065, "step": 17070 }, { "epoch": 15.359712230215827, "grad_norm": 0.11393947899341583, "learning_rate": 8.568698011382107e-05, "loss": 0.0073, "step": 17080 }, { "epoch": 15.368705035971223, "grad_norm": 0.27040955424308777, "learning_rate": 8.566767288278738e-05, "loss": 0.0111, "step": 17090 }, { "epoch": 15.377697841726619, "grad_norm": 0.24968545138835907, "learning_rate": 8.56483548168564e-05, "loss": 0.0081, "step": 17100 }, { "epoch": 15.386690647482014, "grad_norm": 0.23530583083629608, "learning_rate": 8.562902592189648e-05, "loss": 0.0094, "step": 17110 }, { "epoch": 15.39568345323741, "grad_norm": 0.13635031878948212, "learning_rate": 8.560968620377921e-05, "loss": 0.0082, "step": 17120 }, { "epoch": 15.404676258992806, "grad_norm": 0.2722063958644867, "learning_rate": 8.559033566837951e-05, "loss": 0.0123, "step": 17130 }, { "epoch": 15.413669064748202, "grad_norm": 0.3143732249736786, "learning_rate": 8.557097432157551e-05, "loss": 0.0115, "step": 17140 }, { "epoch": 15.422661870503598, "grad_norm": 0.1853758990764618, "learning_rate": 8.555160216924872e-05, "loss": 0.0083, "step": 17150 }, { "epoch": 15.431654676258994, "grad_norm": 0.24368292093276978, "learning_rate": 8.55322192172839e-05, "loss": 0.0103, "step": 17160 }, { "epoch": 15.440647482014388, "grad_norm": 0.15103022754192352, "learning_rate": 8.551282547156902e-05, "loss": 0.0063, "step": 17170 }, { "epoch": 15.449640287769784, "grad_norm": 0.3238500952720642, "learning_rate": 8.549342093799544e-05, "loss": 0.0103, "step": 17180 }, { "epoch": 15.45863309352518, "grad_norm": 0.21256771683692932, "learning_rate": 8.547400562245773e-05, "loss": 0.0129, "step": 17190 }, { "epoch": 15.467625899280575, "grad_norm": 0.3163875341415405, "learning_rate": 8.545457953085374e-05, "loss": 0.0106, "step": 17200 }, { "epoch": 15.476618705035971, "grad_norm": 0.22162553668022156, "learning_rate": 8.543514266908463e-05, "loss": 0.0088, "step": 17210 }, { "epoch": 15.485611510791367, "grad_norm": 0.2394648939371109, "learning_rate": 8.541569504305478e-05, "loss": 0.0106, "step": 17220 }, { "epoch": 15.494604316546763, "grad_norm": 0.2796693742275238, "learning_rate": 8.539623665867187e-05, "loss": 0.0098, "step": 17230 }, { "epoch": 15.503597122302159, "grad_norm": 0.27977627515792847, "learning_rate": 8.537676752184685e-05, "loss": 0.0108, "step": 17240 }, { "epoch": 15.512589928057555, "grad_norm": 0.20496174693107605, "learning_rate": 8.53572876384939e-05, "loss": 0.0086, "step": 17250 }, { "epoch": 15.52158273381295, "grad_norm": 0.28595981001853943, "learning_rate": 8.533779701453056e-05, "loss": 0.0092, "step": 17260 }, { "epoch": 15.530575539568346, "grad_norm": 0.20230671763420105, "learning_rate": 8.53182956558775e-05, "loss": 0.0104, "step": 17270 }, { "epoch": 15.53956834532374, "grad_norm": 0.21797075867652893, "learning_rate": 8.529878356845877e-05, "loss": 0.008, "step": 17280 }, { "epoch": 15.548561151079136, "grad_norm": 0.3229576051235199, "learning_rate": 8.527926075820158e-05, "loss": 0.0106, "step": 17290 }, { "epoch": 15.557553956834532, "grad_norm": 0.25124892592430115, "learning_rate": 8.525972723103648e-05, "loss": 0.0126, "step": 17300 }, { "epoch": 15.566546762589928, "grad_norm": 0.1589513123035431, "learning_rate": 8.524018299289722e-05, "loss": 0.0098, "step": 17310 }, { "epoch": 15.575539568345324, "grad_norm": 0.29337769746780396, "learning_rate": 8.522062804972083e-05, "loss": 0.0105, "step": 17320 }, { "epoch": 15.58453237410072, "grad_norm": 0.2801873981952667, "learning_rate": 8.520106240744759e-05, "loss": 0.0095, "step": 17330 }, { "epoch": 15.593525179856115, "grad_norm": 0.2895070016384125, "learning_rate": 8.518148607202102e-05, "loss": 0.0118, "step": 17340 }, { "epoch": 15.602517985611511, "grad_norm": 0.34925225377082825, "learning_rate": 8.51618990493879e-05, "loss": 0.0106, "step": 17350 }, { "epoch": 15.611510791366907, "grad_norm": 0.26033055782318115, "learning_rate": 8.514230134549823e-05, "loss": 0.0131, "step": 17360 }, { "epoch": 15.620503597122303, "grad_norm": 0.26349323987960815, "learning_rate": 8.51226929663053e-05, "loss": 0.0096, "step": 17370 }, { "epoch": 15.629496402877697, "grad_norm": 0.3069261908531189, "learning_rate": 8.51030739177656e-05, "loss": 0.0102, "step": 17380 }, { "epoch": 15.638489208633093, "grad_norm": 0.27400925755500793, "learning_rate": 8.508344420583889e-05, "loss": 0.0125, "step": 17390 }, { "epoch": 15.647482014388489, "grad_norm": 0.1885857880115509, "learning_rate": 8.506380383648816e-05, "loss": 0.0082, "step": 17400 }, { "epoch": 15.656474820143885, "grad_norm": 0.28562527894973755, "learning_rate": 8.504415281567963e-05, "loss": 0.0085, "step": 17410 }, { "epoch": 15.66546762589928, "grad_norm": 0.20656371116638184, "learning_rate": 8.502449114938275e-05, "loss": 0.0074, "step": 17420 }, { "epoch": 15.674460431654676, "grad_norm": 0.20840801298618317, "learning_rate": 8.500481884357025e-05, "loss": 0.0096, "step": 17430 }, { "epoch": 15.683453237410072, "grad_norm": 0.2901526093482971, "learning_rate": 8.498513590421801e-05, "loss": 0.0085, "step": 17440 }, { "epoch": 15.692446043165468, "grad_norm": 0.22877535223960876, "learning_rate": 8.496544233730522e-05, "loss": 0.0099, "step": 17450 }, { "epoch": 15.701438848920864, "grad_norm": 0.24232502281665802, "learning_rate": 8.494573814881426e-05, "loss": 0.0106, "step": 17460 }, { "epoch": 15.71043165467626, "grad_norm": 0.19713279604911804, "learning_rate": 8.492602334473074e-05, "loss": 0.0072, "step": 17470 }, { "epoch": 15.719424460431654, "grad_norm": 0.1958431601524353, "learning_rate": 8.49062979310435e-05, "loss": 0.0115, "step": 17480 }, { "epoch": 15.72841726618705, "grad_norm": 0.21960638463497162, "learning_rate": 8.488656191374458e-05, "loss": 0.0126, "step": 17490 }, { "epoch": 15.737410071942445, "grad_norm": 0.2043512612581253, "learning_rate": 8.48668152988293e-05, "loss": 0.0088, "step": 17500 }, { "epoch": 15.746402877697841, "grad_norm": 0.33629098534584045, "learning_rate": 8.484705809229612e-05, "loss": 0.0092, "step": 17510 }, { "epoch": 15.755395683453237, "grad_norm": 0.33993422985076904, "learning_rate": 8.482729030014677e-05, "loss": 0.0114, "step": 17520 }, { "epoch": 15.764388489208633, "grad_norm": 0.15049827098846436, "learning_rate": 8.48075119283862e-05, "loss": 0.0083, "step": 17530 }, { "epoch": 15.773381294964029, "grad_norm": 0.32182738184928894, "learning_rate": 8.478772298302254e-05, "loss": 0.0089, "step": 17540 }, { "epoch": 15.782374100719425, "grad_norm": 0.27924254536628723, "learning_rate": 8.476792347006716e-05, "loss": 0.0117, "step": 17550 }, { "epoch": 15.79136690647482, "grad_norm": 0.34987133741378784, "learning_rate": 8.474811339553462e-05, "loss": 0.0078, "step": 17560 }, { "epoch": 15.800359712230216, "grad_norm": 0.3911101818084717, "learning_rate": 8.47282927654427e-05, "loss": 0.0149, "step": 17570 }, { "epoch": 15.809352517985612, "grad_norm": 0.23601990938186646, "learning_rate": 8.470846158581238e-05, "loss": 0.0068, "step": 17580 }, { "epoch": 15.818345323741006, "grad_norm": 0.1716727763414383, "learning_rate": 8.468861986266787e-05, "loss": 0.0088, "step": 17590 }, { "epoch": 15.827338129496402, "grad_norm": 0.15180669724941254, "learning_rate": 8.466876760203654e-05, "loss": 0.0089, "step": 17600 }, { "epoch": 15.836330935251798, "grad_norm": 0.2901901602745056, "learning_rate": 8.464890480994898e-05, "loss": 0.0141, "step": 17610 }, { "epoch": 15.845323741007194, "grad_norm": 0.2155124992132187, "learning_rate": 8.462903149243899e-05, "loss": 0.0079, "step": 17620 }, { "epoch": 15.85431654676259, "grad_norm": 0.304085910320282, "learning_rate": 8.460914765554357e-05, "loss": 0.0076, "step": 17630 }, { "epoch": 15.863309352517986, "grad_norm": 0.2990993559360504, "learning_rate": 8.458925330530288e-05, "loss": 0.0132, "step": 17640 }, { "epoch": 15.872302158273381, "grad_norm": 0.2501242160797119, "learning_rate": 8.456934844776032e-05, "loss": 0.0098, "step": 17650 }, { "epoch": 15.881294964028777, "grad_norm": 0.3371065855026245, "learning_rate": 8.454943308896246e-05, "loss": 0.0104, "step": 17660 }, { "epoch": 15.890287769784173, "grad_norm": 0.19983389973640442, "learning_rate": 8.452950723495905e-05, "loss": 0.008, "step": 17670 }, { "epoch": 15.899280575539569, "grad_norm": 0.19192014634609222, "learning_rate": 8.450957089180303e-05, "loss": 0.0089, "step": 17680 }, { "epoch": 15.908273381294965, "grad_norm": 0.3014160394668579, "learning_rate": 8.448962406555055e-05, "loss": 0.0093, "step": 17690 }, { "epoch": 15.917266187050359, "grad_norm": 0.21955770254135132, "learning_rate": 8.446966676226093e-05, "loss": 0.0131, "step": 17700 }, { "epoch": 15.926258992805755, "grad_norm": 0.3091218173503876, "learning_rate": 8.444969898799667e-05, "loss": 0.0106, "step": 17710 }, { "epoch": 15.93525179856115, "grad_norm": 0.2041618376970291, "learning_rate": 8.442972074882343e-05, "loss": 0.0097, "step": 17720 }, { "epoch": 15.944244604316546, "grad_norm": 0.2569500505924225, "learning_rate": 8.44097320508101e-05, "loss": 0.0087, "step": 17730 }, { "epoch": 15.953237410071942, "grad_norm": 0.17593495547771454, "learning_rate": 8.43897329000287e-05, "loss": 0.0061, "step": 17740 }, { "epoch": 15.962230215827338, "grad_norm": 0.368720680475235, "learning_rate": 8.436972330255448e-05, "loss": 0.0102, "step": 17750 }, { "epoch": 15.971223021582734, "grad_norm": 0.23966695368289948, "learning_rate": 8.434970326446579e-05, "loss": 0.0097, "step": 17760 }, { "epoch": 15.98021582733813, "grad_norm": 0.1923954337835312, "learning_rate": 8.432967279184418e-05, "loss": 0.008, "step": 17770 }, { "epoch": 15.989208633093526, "grad_norm": 0.19140617549419403, "learning_rate": 8.430963189077441e-05, "loss": 0.0069, "step": 17780 }, { "epoch": 15.998201438848922, "grad_norm": 0.23003077507019043, "learning_rate": 8.428958056734437e-05, "loss": 0.0072, "step": 17790 }, { "epoch": 16.007194244604317, "grad_norm": 0.24206319451332092, "learning_rate": 8.426951882764513e-05, "loss": 0.0152, "step": 17800 }, { "epoch": 16.01618705035971, "grad_norm": 0.23105688393115997, "learning_rate": 8.424944667777089e-05, "loss": 0.0075, "step": 17810 }, { "epoch": 16.02517985611511, "grad_norm": 0.4068743586540222, "learning_rate": 8.422936412381905e-05, "loss": 0.0093, "step": 17820 }, { "epoch": 16.034172661870503, "grad_norm": 0.14632472395896912, "learning_rate": 8.420927117189017e-05, "loss": 0.0082, "step": 17830 }, { "epoch": 16.0431654676259, "grad_norm": 0.2628095746040344, "learning_rate": 8.418916782808795e-05, "loss": 0.0092, "step": 17840 }, { "epoch": 16.052158273381295, "grad_norm": 0.31422939896583557, "learning_rate": 8.416905409851926e-05, "loss": 0.0086, "step": 17850 }, { "epoch": 16.06115107913669, "grad_norm": 0.16282443702220917, "learning_rate": 8.41489299892941e-05, "loss": 0.0067, "step": 17860 }, { "epoch": 16.070143884892087, "grad_norm": 0.15376216173171997, "learning_rate": 8.412879550652566e-05, "loss": 0.0076, "step": 17870 }, { "epoch": 16.07913669064748, "grad_norm": 0.263151079416275, "learning_rate": 8.410865065633029e-05, "loss": 0.0093, "step": 17880 }, { "epoch": 16.08812949640288, "grad_norm": 0.25727972388267517, "learning_rate": 8.408849544482742e-05, "loss": 0.0103, "step": 17890 }, { "epoch": 16.097122302158272, "grad_norm": 0.22839608788490295, "learning_rate": 8.406832987813968e-05, "loss": 0.0083, "step": 17900 }, { "epoch": 16.10611510791367, "grad_norm": 0.20842629671096802, "learning_rate": 8.404815396239286e-05, "loss": 0.0094, "step": 17910 }, { "epoch": 16.115107913669064, "grad_norm": 0.2536851465702057, "learning_rate": 8.402796770371587e-05, "loss": 0.0084, "step": 17920 }, { "epoch": 16.12410071942446, "grad_norm": 0.18131615221500397, "learning_rate": 8.400777110824071e-05, "loss": 0.0071, "step": 17930 }, { "epoch": 16.133093525179856, "grad_norm": 0.2681281268596649, "learning_rate": 8.398756418210263e-05, "loss": 0.0078, "step": 17940 }, { "epoch": 16.142086330935253, "grad_norm": 0.17098048329353333, "learning_rate": 8.396734693143993e-05, "loss": 0.0077, "step": 17950 }, { "epoch": 16.151079136690647, "grad_norm": 0.21443136036396027, "learning_rate": 8.39471193623941e-05, "loss": 0.0145, "step": 17960 }, { "epoch": 16.16007194244604, "grad_norm": 0.2813592851161957, "learning_rate": 8.392688148110974e-05, "loss": 0.01, "step": 17970 }, { "epoch": 16.16906474820144, "grad_norm": 0.18418170511722565, "learning_rate": 8.390663329373456e-05, "loss": 0.0082, "step": 17980 }, { "epoch": 16.178057553956833, "grad_norm": 0.33365705609321594, "learning_rate": 8.388637480641944e-05, "loss": 0.0119, "step": 17990 }, { "epoch": 16.18705035971223, "grad_norm": 0.24116331338882446, "learning_rate": 8.386610602531837e-05, "loss": 0.012, "step": 18000 }, { "epoch": 16.196043165467625, "grad_norm": 0.4123648405075073, "learning_rate": 8.384582695658847e-05, "loss": 0.0092, "step": 18010 }, { "epoch": 16.205035971223023, "grad_norm": 0.24853515625, "learning_rate": 8.382553760638999e-05, "loss": 0.0098, "step": 18020 }, { "epoch": 16.214028776978417, "grad_norm": 0.18764299154281616, "learning_rate": 8.380523798088631e-05, "loss": 0.0087, "step": 18030 }, { "epoch": 16.223021582733814, "grad_norm": 0.22980715334415436, "learning_rate": 8.378492808624389e-05, "loss": 0.0084, "step": 18040 }, { "epoch": 16.23201438848921, "grad_norm": 0.20650677382946014, "learning_rate": 8.376460792863237e-05, "loss": 0.0115, "step": 18050 }, { "epoch": 16.241007194244606, "grad_norm": 0.1982944905757904, "learning_rate": 8.374427751422444e-05, "loss": 0.0066, "step": 18060 }, { "epoch": 16.25, "grad_norm": 0.16916698217391968, "learning_rate": 8.3723936849196e-05, "loss": 0.0075, "step": 18070 }, { "epoch": 16.258992805755394, "grad_norm": 0.2712149918079376, "learning_rate": 8.370358593972595e-05, "loss": 0.008, "step": 18080 }, { "epoch": 16.26798561151079, "grad_norm": 0.21828274428844452, "learning_rate": 8.36832247919964e-05, "loss": 0.0082, "step": 18090 }, { "epoch": 16.276978417266186, "grad_norm": 0.20527851581573486, "learning_rate": 8.36628534121925e-05, "loss": 0.0104, "step": 18100 }, { "epoch": 16.285971223021583, "grad_norm": 0.29260897636413574, "learning_rate": 8.364247180650254e-05, "loss": 0.0117, "step": 18110 }, { "epoch": 16.294964028776977, "grad_norm": 0.17583633959293365, "learning_rate": 8.362207998111794e-05, "loss": 0.0065, "step": 18120 }, { "epoch": 16.303956834532375, "grad_norm": 0.14725525677204132, "learning_rate": 8.360167794223318e-05, "loss": 0.0084, "step": 18130 }, { "epoch": 16.31294964028777, "grad_norm": 0.24915750324726105, "learning_rate": 8.358126569604586e-05, "loss": 0.0088, "step": 18140 }, { "epoch": 16.321942446043167, "grad_norm": 0.1386277824640274, "learning_rate": 8.356084324875668e-05, "loss": 0.0114, "step": 18150 }, { "epoch": 16.33093525179856, "grad_norm": 0.18445317447185516, "learning_rate": 8.354041060656945e-05, "loss": 0.0072, "step": 18160 }, { "epoch": 16.33992805755396, "grad_norm": 0.202480286359787, "learning_rate": 8.351996777569106e-05, "loss": 0.0072, "step": 18170 }, { "epoch": 16.348920863309353, "grad_norm": 0.1550438404083252, "learning_rate": 8.349951476233148e-05, "loss": 0.0097, "step": 18180 }, { "epoch": 16.357913669064747, "grad_norm": 0.19174359738826752, "learning_rate": 8.347905157270386e-05, "loss": 0.0074, "step": 18190 }, { "epoch": 16.366906474820144, "grad_norm": 0.20056408643722534, "learning_rate": 8.345857821302432e-05, "loss": 0.0096, "step": 18200 }, { "epoch": 16.37589928057554, "grad_norm": 0.1672387719154358, "learning_rate": 8.343809468951213e-05, "loss": 0.0098, "step": 18210 }, { "epoch": 16.384892086330936, "grad_norm": 0.19726258516311646, "learning_rate": 8.341760100838965e-05, "loss": 0.0118, "step": 18220 }, { "epoch": 16.39388489208633, "grad_norm": 0.13894155621528625, "learning_rate": 8.339709717588233e-05, "loss": 0.0111, "step": 18230 }, { "epoch": 16.402877697841728, "grad_norm": 0.18094003200531006, "learning_rate": 8.33765831982187e-05, "loss": 0.0085, "step": 18240 }, { "epoch": 16.41187050359712, "grad_norm": 0.19376705586910248, "learning_rate": 8.335605908163035e-05, "loss": 0.0062, "step": 18250 }, { "epoch": 16.42086330935252, "grad_norm": 0.1857338547706604, "learning_rate": 8.333552483235196e-05, "loss": 0.0073, "step": 18260 }, { "epoch": 16.429856115107913, "grad_norm": 0.21819449961185455, "learning_rate": 8.33149804566213e-05, "loss": 0.0087, "step": 18270 }, { "epoch": 16.43884892086331, "grad_norm": 0.16726553440093994, "learning_rate": 8.329442596067921e-05, "loss": 0.0083, "step": 18280 }, { "epoch": 16.447841726618705, "grad_norm": 0.2573315501213074, "learning_rate": 8.32738613507696e-05, "loss": 0.0111, "step": 18290 }, { "epoch": 16.4568345323741, "grad_norm": 0.2195669710636139, "learning_rate": 8.325328663313946e-05, "loss": 0.0114, "step": 18300 }, { "epoch": 16.465827338129497, "grad_norm": 0.14182224869728088, "learning_rate": 8.323270181403884e-05, "loss": 0.0091, "step": 18310 }, { "epoch": 16.47482014388489, "grad_norm": 0.17377498745918274, "learning_rate": 8.321210689972086e-05, "loss": 0.0099, "step": 18320 }, { "epoch": 16.48381294964029, "grad_norm": 0.2185131013393402, "learning_rate": 8.319150189644174e-05, "loss": 0.0083, "step": 18330 }, { "epoch": 16.492805755395683, "grad_norm": 0.26788631081581116, "learning_rate": 8.31708868104607e-05, "loss": 0.0145, "step": 18340 }, { "epoch": 16.50179856115108, "grad_norm": 0.1717056930065155, "learning_rate": 8.315026164804007e-05, "loss": 0.0087, "step": 18350 }, { "epoch": 16.510791366906474, "grad_norm": 0.33499106764793396, "learning_rate": 8.312962641544524e-05, "loss": 0.0109, "step": 18360 }, { "epoch": 16.519784172661872, "grad_norm": 0.17425282299518585, "learning_rate": 8.310898111894465e-05, "loss": 0.0078, "step": 18370 }, { "epoch": 16.528776978417266, "grad_norm": 0.2599163353443146, "learning_rate": 8.308832576480977e-05, "loss": 0.0089, "step": 18380 }, { "epoch": 16.53776978417266, "grad_norm": 0.32272079586982727, "learning_rate": 8.306766035931519e-05, "loss": 0.0124, "step": 18390 }, { "epoch": 16.546762589928058, "grad_norm": 0.26730579137802124, "learning_rate": 8.304698490873847e-05, "loss": 0.0084, "step": 18400 }, { "epoch": 16.555755395683452, "grad_norm": 0.24488689005374908, "learning_rate": 8.30262994193603e-05, "loss": 0.0106, "step": 18410 }, { "epoch": 16.56474820143885, "grad_norm": 0.3149888813495636, "learning_rate": 8.300560389746438e-05, "loss": 0.0081, "step": 18420 }, { "epoch": 16.573741007194243, "grad_norm": 0.282181054353714, "learning_rate": 8.298489834933745e-05, "loss": 0.0105, "step": 18430 }, { "epoch": 16.58273381294964, "grad_norm": 0.16971080005168915, "learning_rate": 8.296418278126934e-05, "loss": 0.0114, "step": 18440 }, { "epoch": 16.591726618705035, "grad_norm": 0.2200460135936737, "learning_rate": 8.294345719955284e-05, "loss": 0.0097, "step": 18450 }, { "epoch": 16.600719424460433, "grad_norm": 0.23061537742614746, "learning_rate": 8.29227216104839e-05, "loss": 0.0091, "step": 18460 }, { "epoch": 16.609712230215827, "grad_norm": 0.36139926314353943, "learning_rate": 8.290197602036137e-05, "loss": 0.0106, "step": 18470 }, { "epoch": 16.618705035971225, "grad_norm": 0.20942077040672302, "learning_rate": 8.288122043548725e-05, "loss": 0.0072, "step": 18480 }, { "epoch": 16.62769784172662, "grad_norm": 0.2481854408979416, "learning_rate": 8.286045486216657e-05, "loss": 0.0119, "step": 18490 }, { "epoch": 16.636690647482013, "grad_norm": 0.42488446831703186, "learning_rate": 8.283967930670733e-05, "loss": 0.0089, "step": 18500 }, { "epoch": 16.64568345323741, "grad_norm": 0.30848443508148193, "learning_rate": 8.281889377542058e-05, "loss": 0.009, "step": 18510 }, { "epoch": 16.654676258992804, "grad_norm": 0.33130940794944763, "learning_rate": 8.279809827462045e-05, "loss": 0.0092, "step": 18520 }, { "epoch": 16.663669064748202, "grad_norm": 0.21455863118171692, "learning_rate": 8.277729281062402e-05, "loss": 0.0117, "step": 18530 }, { "epoch": 16.672661870503596, "grad_norm": 0.1948249787092209, "learning_rate": 8.27564773897515e-05, "loss": 0.0086, "step": 18540 }, { "epoch": 16.681654676258994, "grad_norm": 0.20295138657093048, "learning_rate": 8.273565201832602e-05, "loss": 0.0068, "step": 18550 }, { "epoch": 16.690647482014388, "grad_norm": 0.19388845562934875, "learning_rate": 8.27148167026738e-05, "loss": 0.0074, "step": 18560 }, { "epoch": 16.699640287769785, "grad_norm": 0.20653124153614044, "learning_rate": 8.269397144912405e-05, "loss": 0.0085, "step": 18570 }, { "epoch": 16.70863309352518, "grad_norm": 0.14248815178871155, "learning_rate": 8.267311626400899e-05, "loss": 0.007, "step": 18580 }, { "epoch": 16.717625899280577, "grad_norm": 0.34209850430488586, "learning_rate": 8.26522511536639e-05, "loss": 0.0125, "step": 18590 }, { "epoch": 16.72661870503597, "grad_norm": 0.2383316457271576, "learning_rate": 8.263137612442706e-05, "loss": 0.0086, "step": 18600 }, { "epoch": 16.735611510791365, "grad_norm": 0.23255647718906403, "learning_rate": 8.261049118263971e-05, "loss": 0.0076, "step": 18610 }, { "epoch": 16.744604316546763, "grad_norm": 0.16028587520122528, "learning_rate": 8.258959633464619e-05, "loss": 0.0104, "step": 18620 }, { "epoch": 16.753597122302157, "grad_norm": 0.16013896465301514, "learning_rate": 8.256869158679377e-05, "loss": 0.007, "step": 18630 }, { "epoch": 16.762589928057555, "grad_norm": 0.14527031779289246, "learning_rate": 8.254777694543278e-05, "loss": 0.01, "step": 18640 }, { "epoch": 16.77158273381295, "grad_norm": 0.19471389055252075, "learning_rate": 8.252685241691651e-05, "loss": 0.0075, "step": 18650 }, { "epoch": 16.780575539568346, "grad_norm": 0.3586810529232025, "learning_rate": 8.250591800760133e-05, "loss": 0.0116, "step": 18660 }, { "epoch": 16.78956834532374, "grad_norm": 0.25394731760025024, "learning_rate": 8.248497372384649e-05, "loss": 0.0101, "step": 18670 }, { "epoch": 16.798561151079138, "grad_norm": 0.25872015953063965, "learning_rate": 8.246401957201437e-05, "loss": 0.0088, "step": 18680 }, { "epoch": 16.807553956834532, "grad_norm": 0.3237902820110321, "learning_rate": 8.244305555847027e-05, "loss": 0.0076, "step": 18690 }, { "epoch": 16.81654676258993, "grad_norm": 0.21288903057575226, "learning_rate": 8.24220816895825e-05, "loss": 0.007, "step": 18700 }, { "epoch": 16.825539568345324, "grad_norm": 0.2516633868217468, "learning_rate": 8.240109797172237e-05, "loss": 0.0103, "step": 18710 }, { "epoch": 16.834532374100718, "grad_norm": 0.20389972627162933, "learning_rate": 8.238010441126416e-05, "loss": 0.0141, "step": 18720 }, { "epoch": 16.843525179856115, "grad_norm": 0.2041064351797104, "learning_rate": 8.23591010145852e-05, "loss": 0.0137, "step": 18730 }, { "epoch": 16.85251798561151, "grad_norm": 0.26086121797561646, "learning_rate": 8.233808778806571e-05, "loss": 0.0117, "step": 18740 }, { "epoch": 16.861510791366907, "grad_norm": 0.22656628489494324, "learning_rate": 8.231706473808903e-05, "loss": 0.01, "step": 18750 }, { "epoch": 16.8705035971223, "grad_norm": 0.23439213633537292, "learning_rate": 8.229603187104133e-05, "loss": 0.0139, "step": 18760 }, { "epoch": 16.8794964028777, "grad_norm": 0.2476656585931778, "learning_rate": 8.22749891933119e-05, "loss": 0.0098, "step": 18770 }, { "epoch": 16.888489208633093, "grad_norm": 0.2776206433773041, "learning_rate": 8.225393671129291e-05, "loss": 0.0124, "step": 18780 }, { "epoch": 16.89748201438849, "grad_norm": 0.20078131556510925, "learning_rate": 8.223287443137957e-05, "loss": 0.0068, "step": 18790 }, { "epoch": 16.906474820143885, "grad_norm": 0.2295513153076172, "learning_rate": 8.221180235997004e-05, "loss": 0.0088, "step": 18800 }, { "epoch": 16.915467625899282, "grad_norm": 0.17338860034942627, "learning_rate": 8.219072050346544e-05, "loss": 0.0089, "step": 18810 }, { "epoch": 16.924460431654676, "grad_norm": 0.351915180683136, "learning_rate": 8.216962886826992e-05, "loss": 0.0085, "step": 18820 }, { "epoch": 16.93345323741007, "grad_norm": 0.25426197052001953, "learning_rate": 8.214852746079054e-05, "loss": 0.0111, "step": 18830 }, { "epoch": 16.942446043165468, "grad_norm": 0.21653766930103302, "learning_rate": 8.212741628743732e-05, "loss": 0.0078, "step": 18840 }, { "epoch": 16.951438848920862, "grad_norm": 0.14517438411712646, "learning_rate": 8.210629535462333e-05, "loss": 0.0076, "step": 18850 }, { "epoch": 16.96043165467626, "grad_norm": 0.226958230137825, "learning_rate": 8.208516466876453e-05, "loss": 0.009, "step": 18860 }, { "epoch": 16.969424460431654, "grad_norm": 0.23099571466445923, "learning_rate": 8.206402423627986e-05, "loss": 0.006, "step": 18870 }, { "epoch": 16.97841726618705, "grad_norm": 0.266057550907135, "learning_rate": 8.204287406359124e-05, "loss": 0.0162, "step": 18880 }, { "epoch": 16.987410071942445, "grad_norm": 0.26772868633270264, "learning_rate": 8.20217141571235e-05, "loss": 0.009, "step": 18890 }, { "epoch": 16.996402877697843, "grad_norm": 0.21522530913352966, "learning_rate": 8.200054452330449e-05, "loss": 0.0067, "step": 18900 }, { "epoch": 17.005395683453237, "grad_norm": 0.202776238322258, "learning_rate": 8.197936516856499e-05, "loss": 0.0098, "step": 18910 }, { "epoch": 17.014388489208635, "grad_norm": 0.2657319903373718, "learning_rate": 8.195817609933871e-05, "loss": 0.0102, "step": 18920 }, { "epoch": 17.02338129496403, "grad_norm": 0.14920304715633392, "learning_rate": 8.193697732206233e-05, "loss": 0.0092, "step": 18930 }, { "epoch": 17.032374100719423, "grad_norm": 0.3373125195503235, "learning_rate": 8.19157688431755e-05, "loss": 0.0094, "step": 18940 }, { "epoch": 17.04136690647482, "grad_norm": 0.45252689719200134, "learning_rate": 8.189455066912077e-05, "loss": 0.0084, "step": 18950 }, { "epoch": 17.050359712230215, "grad_norm": 0.36570850014686584, "learning_rate": 8.187332280634369e-05, "loss": 0.0096, "step": 18960 }, { "epoch": 17.059352517985612, "grad_norm": 0.22904379665851593, "learning_rate": 8.18520852612927e-05, "loss": 0.0088, "step": 18970 }, { "epoch": 17.068345323741006, "grad_norm": 0.21386024355888367, "learning_rate": 8.183083804041921e-05, "loss": 0.0118, "step": 18980 }, { "epoch": 17.077338129496404, "grad_norm": 0.20153860747814178, "learning_rate": 8.180958115017757e-05, "loss": 0.0067, "step": 18990 }, { "epoch": 17.086330935251798, "grad_norm": 0.30197766423225403, "learning_rate": 8.178831459702505e-05, "loss": 0.0147, "step": 19000 }, { "epoch": 17.095323741007196, "grad_norm": 0.27791714668273926, "learning_rate": 8.17670383874219e-05, "loss": 0.0116, "step": 19010 }, { "epoch": 17.10431654676259, "grad_norm": 0.1957734376192093, "learning_rate": 8.174575252783124e-05, "loss": 0.0084, "step": 19020 }, { "epoch": 17.113309352517987, "grad_norm": 0.23824214935302734, "learning_rate": 8.172445702471914e-05, "loss": 0.0118, "step": 19030 }, { "epoch": 17.12230215827338, "grad_norm": 0.2099459171295166, "learning_rate": 8.170315188455466e-05, "loss": 0.0083, "step": 19040 }, { "epoch": 17.131294964028775, "grad_norm": 0.13851653039455414, "learning_rate": 8.168183711380969e-05, "loss": 0.0078, "step": 19050 }, { "epoch": 17.140287769784173, "grad_norm": 0.20920860767364502, "learning_rate": 8.166051271895913e-05, "loss": 0.0089, "step": 19060 }, { "epoch": 17.149280575539567, "grad_norm": 0.25354456901550293, "learning_rate": 8.163917870648075e-05, "loss": 0.0143, "step": 19070 }, { "epoch": 17.158273381294965, "grad_norm": 0.1968984454870224, "learning_rate": 8.161783508285526e-05, "loss": 0.0078, "step": 19080 }, { "epoch": 17.16726618705036, "grad_norm": 0.17845647037029266, "learning_rate": 8.159648185456628e-05, "loss": 0.0086, "step": 19090 }, { "epoch": 17.176258992805757, "grad_norm": 0.23222078382968903, "learning_rate": 8.157511902810038e-05, "loss": 0.0112, "step": 19100 }, { "epoch": 17.18525179856115, "grad_norm": 0.2649948000907898, "learning_rate": 8.155374660994701e-05, "loss": 0.0074, "step": 19110 }, { "epoch": 17.194244604316548, "grad_norm": 0.177718386054039, "learning_rate": 8.153236460659857e-05, "loss": 0.0113, "step": 19120 }, { "epoch": 17.203237410071942, "grad_norm": 0.29441744089126587, "learning_rate": 8.151097302455031e-05, "loss": 0.0073, "step": 19130 }, { "epoch": 17.21223021582734, "grad_norm": 0.3500048518180847, "learning_rate": 8.148957187030044e-05, "loss": 0.0127, "step": 19140 }, { "epoch": 17.221223021582734, "grad_norm": 0.3207835853099823, "learning_rate": 8.146816115035006e-05, "loss": 0.0095, "step": 19150 }, { "epoch": 17.230215827338128, "grad_norm": 0.2236095517873764, "learning_rate": 8.14467408712032e-05, "loss": 0.0128, "step": 19160 }, { "epoch": 17.239208633093526, "grad_norm": 0.3048582971096039, "learning_rate": 8.142531103936678e-05, "loss": 0.0094, "step": 19170 }, { "epoch": 17.24820143884892, "grad_norm": 0.18372893333435059, "learning_rate": 8.14038716613506e-05, "loss": 0.0052, "step": 19180 }, { "epoch": 17.257194244604317, "grad_norm": 0.20224358141422272, "learning_rate": 8.138242274366736e-05, "loss": 0.0087, "step": 19190 }, { "epoch": 17.26618705035971, "grad_norm": 0.23431918025016785, "learning_rate": 8.136096429283271e-05, "loss": 0.0113, "step": 19200 }, { "epoch": 17.27517985611511, "grad_norm": 0.3175741136074066, "learning_rate": 8.133949631536515e-05, "loss": 0.0117, "step": 19210 }, { "epoch": 17.284172661870503, "grad_norm": 0.19136418402194977, "learning_rate": 8.131801881778607e-05, "loss": 0.0108, "step": 19220 }, { "epoch": 17.2931654676259, "grad_norm": 0.2526572644710541, "learning_rate": 8.129653180661978e-05, "loss": 0.0092, "step": 19230 }, { "epoch": 17.302158273381295, "grad_norm": 0.20834511518478394, "learning_rate": 8.127503528839346e-05, "loss": 0.0097, "step": 19240 }, { "epoch": 17.31115107913669, "grad_norm": 0.18655459582805634, "learning_rate": 8.125352926963721e-05, "loss": 0.0132, "step": 19250 }, { "epoch": 17.320143884892087, "grad_norm": 0.22062814235687256, "learning_rate": 8.123201375688395e-05, "loss": 0.0097, "step": 19260 }, { "epoch": 17.32913669064748, "grad_norm": 0.25507852435112, "learning_rate": 8.121048875666954e-05, "loss": 0.0092, "step": 19270 }, { "epoch": 17.33812949640288, "grad_norm": 0.23748286068439484, "learning_rate": 8.118895427553274e-05, "loss": 0.0069, "step": 19280 }, { "epoch": 17.347122302158272, "grad_norm": 0.21281106770038605, "learning_rate": 8.116741032001511e-05, "loss": 0.0105, "step": 19290 }, { "epoch": 17.35611510791367, "grad_norm": 0.2306027114391327, "learning_rate": 8.114585689666114e-05, "loss": 0.0098, "step": 19300 }, { "epoch": 17.365107913669064, "grad_norm": 0.16938447952270508, "learning_rate": 8.112429401201821e-05, "loss": 0.0071, "step": 19310 }, { "epoch": 17.37410071942446, "grad_norm": 0.14437861740589142, "learning_rate": 8.110272167263656e-05, "loss": 0.0094, "step": 19320 }, { "epoch": 17.383093525179856, "grad_norm": 0.16891086101531982, "learning_rate": 8.108113988506929e-05, "loss": 0.0075, "step": 19330 }, { "epoch": 17.392086330935253, "grad_norm": 0.222320094704628, "learning_rate": 8.105954865587235e-05, "loss": 0.0102, "step": 19340 }, { "epoch": 17.401079136690647, "grad_norm": 0.27946069836616516, "learning_rate": 8.103794799160463e-05, "loss": 0.0103, "step": 19350 }, { "epoch": 17.41007194244604, "grad_norm": 0.2745920419692993, "learning_rate": 8.101633789882781e-05, "loss": 0.012, "step": 19360 }, { "epoch": 17.41906474820144, "grad_norm": 0.24607425928115845, "learning_rate": 8.099471838410648e-05, "loss": 0.0066, "step": 19370 }, { "epoch": 17.428057553956833, "grad_norm": 0.25422701239585876, "learning_rate": 8.097308945400806e-05, "loss": 0.0084, "step": 19380 }, { "epoch": 17.43705035971223, "grad_norm": 0.31021982431411743, "learning_rate": 8.095145111510288e-05, "loss": 0.0104, "step": 19390 }, { "epoch": 17.446043165467625, "grad_norm": 0.20142626762390137, "learning_rate": 8.092980337396406e-05, "loss": 0.0075, "step": 19400 }, { "epoch": 17.455035971223023, "grad_norm": 0.25878196954727173, "learning_rate": 8.090814623716763e-05, "loss": 0.0113, "step": 19410 }, { "epoch": 17.464028776978417, "grad_norm": 0.24453185498714447, "learning_rate": 8.088647971129246e-05, "loss": 0.0109, "step": 19420 }, { "epoch": 17.473021582733814, "grad_norm": 0.20291496813297272, "learning_rate": 8.086480380292026e-05, "loss": 0.0118, "step": 19430 }, { "epoch": 17.48201438848921, "grad_norm": 0.28012558817863464, "learning_rate": 8.084311851863562e-05, "loss": 0.0094, "step": 19440 }, { "epoch": 17.491007194244606, "grad_norm": 0.1874961405992508, "learning_rate": 8.082142386502591e-05, "loss": 0.0076, "step": 19450 }, { "epoch": 17.5, "grad_norm": 0.18309877812862396, "learning_rate": 8.079971984868145e-05, "loss": 0.0114, "step": 19460 }, { "epoch": 17.508992805755394, "grad_norm": 0.13538683950901031, "learning_rate": 8.077800647619532e-05, "loss": 0.0096, "step": 19470 }, { "epoch": 17.51798561151079, "grad_norm": 0.1743958741426468, "learning_rate": 8.075628375416345e-05, "loss": 0.0069, "step": 19480 }, { "epoch": 17.526978417266186, "grad_norm": 0.21456864476203918, "learning_rate": 8.073455168918464e-05, "loss": 0.0092, "step": 19490 }, { "epoch": 17.535971223021583, "grad_norm": 0.17761842906475067, "learning_rate": 8.071281028786055e-05, "loss": 0.0071, "step": 19500 }, { "epoch": 17.544964028776977, "grad_norm": 0.21637333929538727, "learning_rate": 8.069105955679562e-05, "loss": 0.0069, "step": 19510 }, { "epoch": 17.553956834532375, "grad_norm": 0.22973056137561798, "learning_rate": 8.066929950259713e-05, "loss": 0.0082, "step": 19520 }, { "epoch": 17.56294964028777, "grad_norm": 0.21702973544597626, "learning_rate": 8.064753013187522e-05, "loss": 0.0087, "step": 19530 }, { "epoch": 17.571942446043167, "grad_norm": 0.3018585741519928, "learning_rate": 8.062575145124289e-05, "loss": 0.0076, "step": 19540 }, { "epoch": 17.58093525179856, "grad_norm": 0.19595445692539215, "learning_rate": 8.060396346731587e-05, "loss": 0.0087, "step": 19550 }, { "epoch": 17.58992805755396, "grad_norm": 0.22629578411579132, "learning_rate": 8.058216618671281e-05, "loss": 0.0073, "step": 19560 }, { "epoch": 17.598920863309353, "grad_norm": 0.21233874559402466, "learning_rate": 8.056035961605514e-05, "loss": 0.0065, "step": 19570 }, { "epoch": 17.607913669064747, "grad_norm": 0.2533756494522095, "learning_rate": 8.05385437619671e-05, "loss": 0.0127, "step": 19580 }, { "epoch": 17.616906474820144, "grad_norm": 0.18080297112464905, "learning_rate": 8.05167186310758e-05, "loss": 0.007, "step": 19590 }, { "epoch": 17.62589928057554, "grad_norm": 0.24956770241260529, "learning_rate": 8.049488423001113e-05, "loss": 0.0078, "step": 19600 }, { "epoch": 17.634892086330936, "grad_norm": 0.3301486074924469, "learning_rate": 8.047304056540581e-05, "loss": 0.0083, "step": 19610 }, { "epoch": 17.64388489208633, "grad_norm": 0.30687108635902405, "learning_rate": 8.045118764389534e-05, "loss": 0.0106, "step": 19620 }, { "epoch": 17.652877697841728, "grad_norm": 0.15487276017665863, "learning_rate": 8.042932547211809e-05, "loss": 0.0052, "step": 19630 }, { "epoch": 17.66187050359712, "grad_norm": 0.30974194407463074, "learning_rate": 8.04074540567152e-05, "loss": 0.0077, "step": 19640 }, { "epoch": 17.67086330935252, "grad_norm": 0.1885053962469101, "learning_rate": 8.038557340433063e-05, "loss": 0.0155, "step": 19650 }, { "epoch": 17.679856115107913, "grad_norm": 0.21585355699062347, "learning_rate": 8.036368352161115e-05, "loss": 0.0073, "step": 19660 }, { "epoch": 17.68884892086331, "grad_norm": 0.23317274451255798, "learning_rate": 8.034178441520633e-05, "loss": 0.009, "step": 19670 }, { "epoch": 17.697841726618705, "grad_norm": 0.355244904756546, "learning_rate": 8.031987609176852e-05, "loss": 0.0121, "step": 19680 }, { "epoch": 17.7068345323741, "grad_norm": 0.18149882555007935, "learning_rate": 8.02979585579529e-05, "loss": 0.0099, "step": 19690 }, { "epoch": 17.715827338129497, "grad_norm": 0.2515527307987213, "learning_rate": 8.027603182041745e-05, "loss": 0.0117, "step": 19700 }, { "epoch": 17.72482014388489, "grad_norm": 0.18214404582977295, "learning_rate": 8.025409588582292e-05, "loss": 0.0121, "step": 19710 }, { "epoch": 17.73381294964029, "grad_norm": 0.2819967567920685, "learning_rate": 8.023215076083288e-05, "loss": 0.0091, "step": 19720 }, { "epoch": 17.742805755395683, "grad_norm": 0.25784868001937866, "learning_rate": 8.021019645211367e-05, "loss": 0.0076, "step": 19730 }, { "epoch": 17.75179856115108, "grad_norm": 0.29987579584121704, "learning_rate": 8.018823296633441e-05, "loss": 0.0084, "step": 19740 }, { "epoch": 17.760791366906474, "grad_norm": 0.1576661765575409, "learning_rate": 8.016626031016708e-05, "loss": 0.0119, "step": 19750 }, { "epoch": 17.769784172661872, "grad_norm": 0.1999870091676712, "learning_rate": 8.014427849028636e-05, "loss": 0.0079, "step": 19760 }, { "epoch": 17.778776978417266, "grad_norm": 0.28806188702583313, "learning_rate": 8.012228751336974e-05, "loss": 0.0083, "step": 19770 }, { "epoch": 17.78776978417266, "grad_norm": 0.22321413457393646, "learning_rate": 8.01002873860975e-05, "loss": 0.0127, "step": 19780 }, { "epoch": 17.796762589928058, "grad_norm": 0.16799747943878174, "learning_rate": 8.00782781151527e-05, "loss": 0.0079, "step": 19790 }, { "epoch": 17.805755395683452, "grad_norm": 0.1392730176448822, "learning_rate": 8.005625970722119e-05, "loss": 0.0106, "step": 19800 }, { "epoch": 17.81474820143885, "grad_norm": 0.28186413645744324, "learning_rate": 8.003423216899158e-05, "loss": 0.0068, "step": 19810 }, { "epoch": 17.823741007194243, "grad_norm": 0.18519631028175354, "learning_rate": 8.001219550715522e-05, "loss": 0.0126, "step": 19820 }, { "epoch": 17.83273381294964, "grad_norm": 0.20423749089241028, "learning_rate": 7.999014972840632e-05, "loss": 0.0148, "step": 19830 }, { "epoch": 17.841726618705035, "grad_norm": 0.18348394334316254, "learning_rate": 7.996809483944174e-05, "loss": 0.0082, "step": 19840 }, { "epoch": 17.850719424460433, "grad_norm": 0.1762518584728241, "learning_rate": 7.994603084696124e-05, "loss": 0.0055, "step": 19850 }, { "epoch": 17.859712230215827, "grad_norm": 0.22763526439666748, "learning_rate": 7.992395775766724e-05, "loss": 0.0076, "step": 19860 }, { "epoch": 17.868705035971225, "grad_norm": 0.14491429924964905, "learning_rate": 7.990187557826497e-05, "loss": 0.0119, "step": 19870 }, { "epoch": 17.87769784172662, "grad_norm": 0.18477758765220642, "learning_rate": 7.987978431546242e-05, "loss": 0.0092, "step": 19880 }, { "epoch": 17.886690647482013, "grad_norm": 0.16305214166641235, "learning_rate": 7.985768397597031e-05, "loss": 0.0093, "step": 19890 }, { "epoch": 17.89568345323741, "grad_norm": 0.21125847101211548, "learning_rate": 7.983557456650216e-05, "loss": 0.0114, "step": 19900 }, { "epoch": 17.904676258992804, "grad_norm": 0.2890818417072296, "learning_rate": 7.981345609377422e-05, "loss": 0.0135, "step": 19910 }, { "epoch": 17.913669064748202, "grad_norm": 0.19893696904182434, "learning_rate": 7.97913285645055e-05, "loss": 0.0082, "step": 19920 }, { "epoch": 17.922661870503596, "grad_norm": 0.1951165348291397, "learning_rate": 7.976919198541776e-05, "loss": 0.0074, "step": 19930 }, { "epoch": 17.931654676258994, "grad_norm": 0.19172382354736328, "learning_rate": 7.974704636323548e-05, "loss": 0.0097, "step": 19940 }, { "epoch": 17.940647482014388, "grad_norm": 0.20288240909576416, "learning_rate": 7.972489170468597e-05, "loss": 0.0071, "step": 19950 }, { "epoch": 17.949640287769785, "grad_norm": 0.28118282556533813, "learning_rate": 7.970272801649918e-05, "loss": 0.0119, "step": 19960 }, { "epoch": 17.95863309352518, "grad_norm": 0.2126876264810562, "learning_rate": 7.96805553054079e-05, "loss": 0.0099, "step": 19970 }, { "epoch": 17.967625899280577, "grad_norm": 0.2752527594566345, "learning_rate": 7.965837357814756e-05, "loss": 0.0086, "step": 19980 }, { "epoch": 17.97661870503597, "grad_norm": 0.1344626098871231, "learning_rate": 7.963618284145643e-05, "loss": 0.0127, "step": 19990 }, { "epoch": 17.985611510791365, "grad_norm": 0.2890588045120239, "learning_rate": 7.961398310207544e-05, "loss": 0.0107, "step": 20000 }, { "epoch": 17.994604316546763, "grad_norm": 0.17300990223884583, "learning_rate": 7.95917743667483e-05, "loss": 0.0087, "step": 20010 }, { "epoch": 18.003597122302157, "grad_norm": 0.15755894780158997, "learning_rate": 7.956955664222144e-05, "loss": 0.0119, "step": 20020 }, { "epoch": 18.012589928057555, "grad_norm": 0.14426715672016144, "learning_rate": 7.954732993524399e-05, "loss": 0.0091, "step": 20030 }, { "epoch": 18.02158273381295, "grad_norm": 0.12560465931892395, "learning_rate": 7.952509425256786e-05, "loss": 0.0079, "step": 20040 }, { "epoch": 18.030575539568346, "grad_norm": 0.1887693703174591, "learning_rate": 7.950284960094767e-05, "loss": 0.009, "step": 20050 }, { "epoch": 18.03956834532374, "grad_norm": 0.17650721967220306, "learning_rate": 7.948059598714076e-05, "loss": 0.0072, "step": 20060 }, { "epoch": 18.048561151079138, "grad_norm": 0.3314500153064728, "learning_rate": 7.945833341790717e-05, "loss": 0.0109, "step": 20070 }, { "epoch": 18.057553956834532, "grad_norm": 0.35227829217910767, "learning_rate": 7.94360619000097e-05, "loss": 0.0128, "step": 20080 }, { "epoch": 18.06654676258993, "grad_norm": 0.29015398025512695, "learning_rate": 7.941378144021381e-05, "loss": 0.0095, "step": 20090 }, { "epoch": 18.075539568345324, "grad_norm": 0.22786545753479004, "learning_rate": 7.939149204528777e-05, "loss": 0.0094, "step": 20100 }, { "epoch": 18.084532374100718, "grad_norm": 0.24050776660442352, "learning_rate": 7.936919372200246e-05, "loss": 0.0107, "step": 20110 }, { "epoch": 18.093525179856115, "grad_norm": 0.2678135633468628, "learning_rate": 7.934688647713158e-05, "loss": 0.0098, "step": 20120 }, { "epoch": 18.10251798561151, "grad_norm": 0.28322479128837585, "learning_rate": 7.932457031745143e-05, "loss": 0.0084, "step": 20130 }, { "epoch": 18.111510791366907, "grad_norm": 0.26151129603385925, "learning_rate": 7.930224524974108e-05, "loss": 0.0099, "step": 20140 }, { "epoch": 18.1205035971223, "grad_norm": 0.16049417853355408, "learning_rate": 7.927991128078232e-05, "loss": 0.0095, "step": 20150 }, { "epoch": 18.1294964028777, "grad_norm": 0.23354984819889069, "learning_rate": 7.925756841735958e-05, "loss": 0.0083, "step": 20160 }, { "epoch": 18.138489208633093, "grad_norm": 0.17891797423362732, "learning_rate": 7.923521666626008e-05, "loss": 0.0074, "step": 20170 }, { "epoch": 18.14748201438849, "grad_norm": 0.13486714661121368, "learning_rate": 7.921285603427366e-05, "loss": 0.0074, "step": 20180 }, { "epoch": 18.156474820143885, "grad_norm": 0.17343422770500183, "learning_rate": 7.91904865281929e-05, "loss": 0.0076, "step": 20190 }, { "epoch": 18.165467625899282, "grad_norm": 0.2658272683620453, "learning_rate": 7.916810815481307e-05, "loss": 0.0073, "step": 20200 }, { "epoch": 18.174460431654676, "grad_norm": 0.28282803297042847, "learning_rate": 7.914572092093211e-05, "loss": 0.0094, "step": 20210 }, { "epoch": 18.18345323741007, "grad_norm": 0.22740021347999573, "learning_rate": 7.912332483335068e-05, "loss": 0.0086, "step": 20220 }, { "epoch": 18.192446043165468, "grad_norm": 0.21130570769309998, "learning_rate": 7.910091989887213e-05, "loss": 0.0076, "step": 20230 }, { "epoch": 18.201438848920862, "grad_norm": 0.3071298897266388, "learning_rate": 7.907850612430248e-05, "loss": 0.009, "step": 20240 }, { "epoch": 18.21043165467626, "grad_norm": 0.2681068480014801, "learning_rate": 7.905608351645044e-05, "loss": 0.0065, "step": 20250 }, { "epoch": 18.219424460431654, "grad_norm": 0.18013732135295868, "learning_rate": 7.90336520821274e-05, "loss": 0.0084, "step": 20260 }, { "epoch": 18.22841726618705, "grad_norm": 0.11542954295873642, "learning_rate": 7.901121182814746e-05, "loss": 0.0062, "step": 20270 }, { "epoch": 18.237410071942445, "grad_norm": 0.1465880423784256, "learning_rate": 7.898876276132736e-05, "loss": 0.0098, "step": 20280 }, { "epoch": 18.246402877697843, "grad_norm": 0.18605497479438782, "learning_rate": 7.896630488848654e-05, "loss": 0.0071, "step": 20290 }, { "epoch": 18.255395683453237, "grad_norm": 0.2633856236934662, "learning_rate": 7.89438382164471e-05, "loss": 0.0077, "step": 20300 }, { "epoch": 18.264388489208635, "grad_norm": 0.20635473728179932, "learning_rate": 7.892136275203383e-05, "loss": 0.0076, "step": 20310 }, { "epoch": 18.27338129496403, "grad_norm": 0.281216025352478, "learning_rate": 7.889887850207418e-05, "loss": 0.0075, "step": 20320 }, { "epoch": 18.282374100719423, "grad_norm": 0.28837332129478455, "learning_rate": 7.887638547339827e-05, "loss": 0.0104, "step": 20330 }, { "epoch": 18.29136690647482, "grad_norm": 0.22094859182834625, "learning_rate": 7.885388367283891e-05, "loss": 0.0078, "step": 20340 }, { "epoch": 18.300359712230215, "grad_norm": 0.1879628300666809, "learning_rate": 7.88313731072315e-05, "loss": 0.007, "step": 20350 }, { "epoch": 18.309352517985612, "grad_norm": 0.2257799357175827, "learning_rate": 7.88088537834142e-05, "loss": 0.0091, "step": 20360 }, { "epoch": 18.318345323741006, "grad_norm": 0.1966465264558792, "learning_rate": 7.878632570822778e-05, "loss": 0.0084, "step": 20370 }, { "epoch": 18.327338129496404, "grad_norm": 0.26068976521492004, "learning_rate": 7.876378888851567e-05, "loss": 0.0094, "step": 20380 }, { "epoch": 18.336330935251798, "grad_norm": 0.2773849070072174, "learning_rate": 7.874124333112396e-05, "loss": 0.0105, "step": 20390 }, { "epoch": 18.345323741007196, "grad_norm": 0.18251527845859528, "learning_rate": 7.871868904290138e-05, "loss": 0.008, "step": 20400 }, { "epoch": 18.35431654676259, "grad_norm": 0.17248865962028503, "learning_rate": 7.869612603069935e-05, "loss": 0.0104, "step": 20410 }, { "epoch": 18.363309352517987, "grad_norm": 0.16136550903320312, "learning_rate": 7.867355430137192e-05, "loss": 0.0075, "step": 20420 }, { "epoch": 18.37230215827338, "grad_norm": 0.2905915081501007, "learning_rate": 7.865097386177577e-05, "loss": 0.0061, "step": 20430 }, { "epoch": 18.381294964028775, "grad_norm": 0.2208395004272461, "learning_rate": 7.862838471877023e-05, "loss": 0.0105, "step": 20440 }, { "epoch": 18.390287769784173, "grad_norm": 0.28347039222717285, "learning_rate": 7.860578687921731e-05, "loss": 0.0095, "step": 20450 }, { "epoch": 18.399280575539567, "grad_norm": 0.18367548286914825, "learning_rate": 7.858318034998164e-05, "loss": 0.0079, "step": 20460 }, { "epoch": 18.408273381294965, "grad_norm": 0.15300579369068146, "learning_rate": 7.856056513793046e-05, "loss": 0.0061, "step": 20470 }, { "epoch": 18.41726618705036, "grad_norm": 0.18826211988925934, "learning_rate": 7.85379412499337e-05, "loss": 0.0086, "step": 20480 }, { "epoch": 18.426258992805757, "grad_norm": 0.15065869688987732, "learning_rate": 7.851530869286389e-05, "loss": 0.0072, "step": 20490 }, { "epoch": 18.43525179856115, "grad_norm": 0.32227981090545654, "learning_rate": 7.849266747359619e-05, "loss": 0.0082, "step": 20500 }, { "epoch": 18.444244604316548, "grad_norm": 0.23568584024906158, "learning_rate": 7.847001759900843e-05, "loss": 0.0052, "step": 20510 }, { "epoch": 18.453237410071942, "grad_norm": 0.19311048090457916, "learning_rate": 7.844735907598102e-05, "loss": 0.0136, "step": 20520 }, { "epoch": 18.46223021582734, "grad_norm": 0.16743454337120056, "learning_rate": 7.842469191139703e-05, "loss": 0.0084, "step": 20530 }, { "epoch": 18.471223021582734, "grad_norm": 0.13335132598876953, "learning_rate": 7.840201611214215e-05, "loss": 0.0078, "step": 20540 }, { "epoch": 18.480215827338128, "grad_norm": 0.22228656709194183, "learning_rate": 7.837933168510469e-05, "loss": 0.0125, "step": 20550 }, { "epoch": 18.489208633093526, "grad_norm": 0.30480852723121643, "learning_rate": 7.835663863717559e-05, "loss": 0.0106, "step": 20560 }, { "epoch": 18.49820143884892, "grad_norm": 0.250309556722641, "learning_rate": 7.833393697524838e-05, "loss": 0.0123, "step": 20570 }, { "epoch": 18.507194244604317, "grad_norm": 0.29967668652534485, "learning_rate": 7.831122670621922e-05, "loss": 0.009, "step": 20580 }, { "epoch": 18.51618705035971, "grad_norm": 0.24499915540218353, "learning_rate": 7.82885078369869e-05, "loss": 0.0086, "step": 20590 }, { "epoch": 18.52517985611511, "grad_norm": 0.22398321330547333, "learning_rate": 7.826578037445283e-05, "loss": 0.0072, "step": 20600 }, { "epoch": 18.534172661870503, "grad_norm": 0.21675367653369904, "learning_rate": 7.824304432552097e-05, "loss": 0.0062, "step": 20610 }, { "epoch": 18.5431654676259, "grad_norm": 0.1756380796432495, "learning_rate": 7.822029969709798e-05, "loss": 0.0098, "step": 20620 }, { "epoch": 18.552158273381295, "grad_norm": 0.16682718694210052, "learning_rate": 7.819754649609306e-05, "loss": 0.0143, "step": 20630 }, { "epoch": 18.56115107913669, "grad_norm": 0.13133597373962402, "learning_rate": 7.817478472941802e-05, "loss": 0.0094, "step": 20640 }, { "epoch": 18.570143884892087, "grad_norm": 0.2443639636039734, "learning_rate": 7.815201440398727e-05, "loss": 0.0057, "step": 20650 }, { "epoch": 18.57913669064748, "grad_norm": 0.19880802929401398, "learning_rate": 7.812923552671789e-05, "loss": 0.0086, "step": 20660 }, { "epoch": 18.58812949640288, "grad_norm": 0.2247762680053711, "learning_rate": 7.810644810452945e-05, "loss": 0.0061, "step": 20670 }, { "epoch": 18.597122302158272, "grad_norm": 0.15299592912197113, "learning_rate": 7.808365214434417e-05, "loss": 0.0073, "step": 20680 }, { "epoch": 18.60611510791367, "grad_norm": 0.1783798635005951, "learning_rate": 7.80608476530869e-05, "loss": 0.0076, "step": 20690 }, { "epoch": 18.615107913669064, "grad_norm": 0.17492936551570892, "learning_rate": 7.8038034637685e-05, "loss": 0.0059, "step": 20700 }, { "epoch": 18.62410071942446, "grad_norm": 0.17852002382278442, "learning_rate": 7.801521310506848e-05, "loss": 0.0056, "step": 20710 }, { "epoch": 18.633093525179856, "grad_norm": 0.12059535831212997, "learning_rate": 7.799238306216994e-05, "loss": 0.0047, "step": 20720 }, { "epoch": 18.642086330935253, "grad_norm": 0.1417984813451767, "learning_rate": 7.796954451592448e-05, "loss": 0.0089, "step": 20730 }, { "epoch": 18.651079136690647, "grad_norm": 0.26923051476478577, "learning_rate": 7.794669747326992e-05, "loss": 0.0097, "step": 20740 }, { "epoch": 18.66007194244604, "grad_norm": 0.2614977955818176, "learning_rate": 7.792384194114654e-05, "loss": 0.0067, "step": 20750 }, { "epoch": 18.66906474820144, "grad_norm": 0.40205004811286926, "learning_rate": 7.790097792649729e-05, "loss": 0.0087, "step": 20760 }, { "epoch": 18.678057553956833, "grad_norm": 0.30785149335861206, "learning_rate": 7.787810543626762e-05, "loss": 0.0073, "step": 20770 }, { "epoch": 18.68705035971223, "grad_norm": 0.27593713998794556, "learning_rate": 7.785522447740558e-05, "loss": 0.0065, "step": 20780 }, { "epoch": 18.696043165467625, "grad_norm": 0.3177194595336914, "learning_rate": 7.783233505686182e-05, "loss": 0.0135, "step": 20790 }, { "epoch": 18.705035971223023, "grad_norm": 0.1655728816986084, "learning_rate": 7.780943718158955e-05, "loss": 0.0054, "step": 20800 }, { "epoch": 18.714028776978417, "grad_norm": 0.2639835476875305, "learning_rate": 7.778653085854453e-05, "loss": 0.0093, "step": 20810 }, { "epoch": 18.723021582733814, "grad_norm": 0.3225957751274109, "learning_rate": 7.77636160946851e-05, "loss": 0.0064, "step": 20820 }, { "epoch": 18.73201438848921, "grad_norm": 0.17428915202617645, "learning_rate": 7.774069289697215e-05, "loss": 0.009, "step": 20830 }, { "epoch": 18.741007194244606, "grad_norm": 0.1483527421951294, "learning_rate": 7.771776127236913e-05, "loss": 0.0073, "step": 20840 }, { "epoch": 18.75, "grad_norm": 0.18321415781974792, "learning_rate": 7.769482122784212e-05, "loss": 0.0064, "step": 20850 }, { "epoch": 18.758992805755394, "grad_norm": 0.32396620512008667, "learning_rate": 7.767187277035963e-05, "loss": 0.0084, "step": 20860 }, { "epoch": 18.76798561151079, "grad_norm": 0.35099443793296814, "learning_rate": 7.764891590689285e-05, "loss": 0.0109, "step": 20870 }, { "epoch": 18.776978417266186, "grad_norm": 0.31928983330726624, "learning_rate": 7.762595064441542e-05, "loss": 0.0094, "step": 20880 }, { "epoch": 18.785971223021583, "grad_norm": 0.16731873154640198, "learning_rate": 7.760297698990362e-05, "loss": 0.0088, "step": 20890 }, { "epoch": 18.794964028776977, "grad_norm": 0.26521357893943787, "learning_rate": 7.757999495033623e-05, "loss": 0.0093, "step": 20900 }, { "epoch": 18.803956834532375, "grad_norm": 0.22324064373970032, "learning_rate": 7.755700453269456e-05, "loss": 0.0054, "step": 20910 }, { "epoch": 18.81294964028777, "grad_norm": 0.2527245879173279, "learning_rate": 7.753400574396254e-05, "loss": 0.0093, "step": 20920 }, { "epoch": 18.821942446043167, "grad_norm": 0.2938147485256195, "learning_rate": 7.751099859112655e-05, "loss": 0.0107, "step": 20930 }, { "epoch": 18.83093525179856, "grad_norm": 0.2185845822095871, "learning_rate": 7.748798308117557e-05, "loss": 0.0061, "step": 20940 }, { "epoch": 18.83992805755396, "grad_norm": 0.2502462565898895, "learning_rate": 7.746495922110112e-05, "loss": 0.009, "step": 20950 }, { "epoch": 18.848920863309353, "grad_norm": 0.20578043162822723, "learning_rate": 7.744192701789723e-05, "loss": 0.0073, "step": 20960 }, { "epoch": 18.857913669064747, "grad_norm": 0.2254682332277298, "learning_rate": 7.741888647856046e-05, "loss": 0.0073, "step": 20970 }, { "epoch": 18.866906474820144, "grad_norm": 0.16638986766338348, "learning_rate": 7.739583761008994e-05, "loss": 0.0093, "step": 20980 }, { "epoch": 18.87589928057554, "grad_norm": 0.3444851338863373, "learning_rate": 7.73727804194873e-05, "loss": 0.0072, "step": 20990 }, { "epoch": 18.884892086330936, "grad_norm": 0.20004673302173615, "learning_rate": 7.734971491375671e-05, "loss": 0.0072, "step": 21000 }, { "epoch": 18.89388489208633, "grad_norm": 0.2459608018398285, "learning_rate": 7.732664109990485e-05, "loss": 0.0073, "step": 21010 }, { "epoch": 18.902877697841728, "grad_norm": 0.2047954499721527, "learning_rate": 7.730355898494095e-05, "loss": 0.0141, "step": 21020 }, { "epoch": 18.91187050359712, "grad_norm": 0.16767184436321259, "learning_rate": 7.728046857587673e-05, "loss": 0.0117, "step": 21030 }, { "epoch": 18.92086330935252, "grad_norm": 0.36639168858528137, "learning_rate": 7.725736987972647e-05, "loss": 0.0095, "step": 21040 }, { "epoch": 18.929856115107913, "grad_norm": 0.29900163412094116, "learning_rate": 7.723426290350691e-05, "loss": 0.0107, "step": 21050 }, { "epoch": 18.93884892086331, "grad_norm": 0.17790353298187256, "learning_rate": 7.721114765423736e-05, "loss": 0.0102, "step": 21060 }, { "epoch": 18.947841726618705, "grad_norm": 0.1775980293750763, "learning_rate": 7.718802413893963e-05, "loss": 0.0067, "step": 21070 }, { "epoch": 18.9568345323741, "grad_norm": 0.2111998200416565, "learning_rate": 7.716489236463802e-05, "loss": 0.0083, "step": 21080 }, { "epoch": 18.965827338129497, "grad_norm": 0.16560354828834534, "learning_rate": 7.714175233835936e-05, "loss": 0.0063, "step": 21090 }, { "epoch": 18.97482014388489, "grad_norm": 0.1645592898130417, "learning_rate": 7.711860406713299e-05, "loss": 0.0066, "step": 21100 }, { "epoch": 18.98381294964029, "grad_norm": 0.20551134645938873, "learning_rate": 7.70954475579907e-05, "loss": 0.0076, "step": 21110 }, { "epoch": 18.992805755395683, "grad_norm": 0.2942383289337158, "learning_rate": 7.707228281796688e-05, "loss": 0.0081, "step": 21120 }, { "epoch": 19.00179856115108, "grad_norm": 0.2417481392621994, "learning_rate": 7.704910985409833e-05, "loss": 0.0078, "step": 21130 }, { "epoch": 19.010791366906474, "grad_norm": 0.17523273825645447, "learning_rate": 7.702592867342439e-05, "loss": 0.0091, "step": 21140 }, { "epoch": 19.019784172661872, "grad_norm": 0.20087528228759766, "learning_rate": 7.700273928298691e-05, "loss": 0.0067, "step": 21150 }, { "epoch": 19.028776978417266, "grad_norm": 0.35940197110176086, "learning_rate": 7.697954168983021e-05, "loss": 0.0077, "step": 21160 }, { "epoch": 19.037769784172664, "grad_norm": 0.1618017852306366, "learning_rate": 7.695633590100109e-05, "loss": 0.0095, "step": 21170 }, { "epoch": 19.046762589928058, "grad_norm": 0.2712794840335846, "learning_rate": 7.693312192354886e-05, "loss": 0.0081, "step": 21180 }, { "epoch": 19.055755395683452, "grad_norm": 0.16785913705825806, "learning_rate": 7.690989976452532e-05, "loss": 0.0073, "step": 21190 }, { "epoch": 19.06474820143885, "grad_norm": 0.20149260759353638, "learning_rate": 7.688666943098475e-05, "loss": 0.0079, "step": 21200 }, { "epoch": 19.073741007194243, "grad_norm": 0.18052440881729126, "learning_rate": 7.686343092998389e-05, "loss": 0.0069, "step": 21210 }, { "epoch": 19.08273381294964, "grad_norm": 0.1841708868741989, "learning_rate": 7.684018426858202e-05, "loss": 0.0082, "step": 21220 }, { "epoch": 19.091726618705035, "grad_norm": 0.34485384821891785, "learning_rate": 7.681692945384084e-05, "loss": 0.0097, "step": 21230 }, { "epoch": 19.100719424460433, "grad_norm": 0.13328422605991364, "learning_rate": 7.679366649282456e-05, "loss": 0.0065, "step": 21240 }, { "epoch": 19.109712230215827, "grad_norm": 0.2682981491088867, "learning_rate": 7.677039539259983e-05, "loss": 0.0095, "step": 21250 }, { "epoch": 19.118705035971225, "grad_norm": 0.20099060237407684, "learning_rate": 7.674711616023581e-05, "loss": 0.0078, "step": 21260 }, { "epoch": 19.12769784172662, "grad_norm": 0.18575771152973175, "learning_rate": 7.672382880280413e-05, "loss": 0.0064, "step": 21270 }, { "epoch": 19.136690647482013, "grad_norm": 0.17559204995632172, "learning_rate": 7.670053332737885e-05, "loss": 0.0063, "step": 21280 }, { "epoch": 19.14568345323741, "grad_norm": 0.1757269650697708, "learning_rate": 7.667722974103654e-05, "loss": 0.0061, "step": 21290 }, { "epoch": 19.154676258992804, "grad_norm": 0.20980185270309448, "learning_rate": 7.66539180508562e-05, "loss": 0.0071, "step": 21300 }, { "epoch": 19.163669064748202, "grad_norm": 0.21019218862056732, "learning_rate": 7.663059826391932e-05, "loss": 0.0101, "step": 21310 }, { "epoch": 19.172661870503596, "grad_norm": 0.11771951615810394, "learning_rate": 7.660727038730981e-05, "loss": 0.0072, "step": 21320 }, { "epoch": 19.181654676258994, "grad_norm": 0.17810732126235962, "learning_rate": 7.65839344281141e-05, "loss": 0.0069, "step": 21330 }, { "epoch": 19.190647482014388, "grad_norm": 0.2923010289669037, "learning_rate": 7.656059039342101e-05, "loss": 0.0061, "step": 21340 }, { "epoch": 19.199640287769785, "grad_norm": 0.2017114758491516, "learning_rate": 7.653723829032187e-05, "loss": 0.0085, "step": 21350 }, { "epoch": 19.20863309352518, "grad_norm": 0.2314285784959793, "learning_rate": 7.65138781259104e-05, "loss": 0.0118, "step": 21360 }, { "epoch": 19.217625899280577, "grad_norm": 0.2839830219745636, "learning_rate": 7.649050990728279e-05, "loss": 0.0059, "step": 21370 }, { "epoch": 19.22661870503597, "grad_norm": 0.21244744956493378, "learning_rate": 7.646713364153774e-05, "loss": 0.0078, "step": 21380 }, { "epoch": 19.235611510791365, "grad_norm": 0.2974812686443329, "learning_rate": 7.64437493357763e-05, "loss": 0.0067, "step": 21390 }, { "epoch": 19.244604316546763, "grad_norm": 0.138424351811409, "learning_rate": 7.642035699710202e-05, "loss": 0.0077, "step": 21400 }, { "epoch": 19.253597122302157, "grad_norm": 0.24084150791168213, "learning_rate": 7.639695663262089e-05, "loss": 0.0066, "step": 21410 }, { "epoch": 19.262589928057555, "grad_norm": 0.2721313536167145, "learning_rate": 7.637354824944128e-05, "loss": 0.0149, "step": 21420 }, { "epoch": 19.27158273381295, "grad_norm": 0.23404920101165771, "learning_rate": 7.635013185467408e-05, "loss": 0.0071, "step": 21430 }, { "epoch": 19.280575539568346, "grad_norm": 0.21279557049274445, "learning_rate": 7.632670745543256e-05, "loss": 0.0065, "step": 21440 }, { "epoch": 19.28956834532374, "grad_norm": 0.19368846714496613, "learning_rate": 7.630327505883242e-05, "loss": 0.0088, "step": 21450 }, { "epoch": 19.298561151079138, "grad_norm": 0.2127445936203003, "learning_rate": 7.627983467199182e-05, "loss": 0.005, "step": 21460 }, { "epoch": 19.307553956834532, "grad_norm": 0.15948572754859924, "learning_rate": 7.625638630203132e-05, "loss": 0.0098, "step": 21470 }, { "epoch": 19.31654676258993, "grad_norm": 0.24746829271316528, "learning_rate": 7.623292995607394e-05, "loss": 0.0072, "step": 21480 }, { "epoch": 19.325539568345324, "grad_norm": 0.23852333426475525, "learning_rate": 7.620946564124507e-05, "loss": 0.0114, "step": 21490 }, { "epoch": 19.334532374100718, "grad_norm": 0.2456989288330078, "learning_rate": 7.618599336467256e-05, "loss": 0.0085, "step": 21500 }, { "epoch": 19.343525179856115, "grad_norm": 0.18993236124515533, "learning_rate": 7.616251313348666e-05, "loss": 0.0079, "step": 21510 }, { "epoch": 19.35251798561151, "grad_norm": 0.19613921642303467, "learning_rate": 7.613902495482005e-05, "loss": 0.0099, "step": 21520 }, { "epoch": 19.361510791366907, "grad_norm": 0.310662180185318, "learning_rate": 7.611552883580784e-05, "loss": 0.0076, "step": 21530 }, { "epoch": 19.3705035971223, "grad_norm": 0.13896536827087402, "learning_rate": 7.609202478358748e-05, "loss": 0.0088, "step": 21540 }, { "epoch": 19.3794964028777, "grad_norm": 0.20947393774986267, "learning_rate": 7.606851280529895e-05, "loss": 0.0074, "step": 21550 }, { "epoch": 19.388489208633093, "grad_norm": 0.13055270910263062, "learning_rate": 7.604499290808449e-05, "loss": 0.0067, "step": 21560 }, { "epoch": 19.39748201438849, "grad_norm": 0.22438162565231323, "learning_rate": 7.602146509908888e-05, "loss": 0.0082, "step": 21570 }, { "epoch": 19.406474820143885, "grad_norm": 0.19472694396972656, "learning_rate": 7.599792938545921e-05, "loss": 0.0136, "step": 21580 }, { "epoch": 19.415467625899282, "grad_norm": 0.17690254747867584, "learning_rate": 7.597438577434506e-05, "loss": 0.0086, "step": 21590 }, { "epoch": 19.424460431654676, "grad_norm": 0.23781782388687134, "learning_rate": 7.595083427289831e-05, "loss": 0.0065, "step": 21600 }, { "epoch": 19.43345323741007, "grad_norm": 0.10738594084978104, "learning_rate": 7.59272748882733e-05, "loss": 0.0059, "step": 21610 }, { "epoch": 19.442446043165468, "grad_norm": 0.14516966044902802, "learning_rate": 7.590370762762675e-05, "loss": 0.008, "step": 21620 }, { "epoch": 19.451438848920862, "grad_norm": 0.3375394344329834, "learning_rate": 7.588013249811777e-05, "loss": 0.0101, "step": 21630 }, { "epoch": 19.46043165467626, "grad_norm": 0.1691025197505951, "learning_rate": 7.585654950690786e-05, "loss": 0.0069, "step": 21640 }, { "epoch": 19.469424460431654, "grad_norm": 0.2575664520263672, "learning_rate": 7.583295866116091e-05, "loss": 0.009, "step": 21650 }, { "epoch": 19.47841726618705, "grad_norm": 0.12221824377775192, "learning_rate": 7.580935996804321e-05, "loss": 0.0089, "step": 21660 }, { "epoch": 19.487410071942445, "grad_norm": 0.22572492063045502, "learning_rate": 7.57857534347234e-05, "loss": 0.0085, "step": 21670 }, { "epoch": 19.496402877697843, "grad_norm": 0.2750979959964752, "learning_rate": 7.576213906837254e-05, "loss": 0.0067, "step": 21680 }, { "epoch": 19.505395683453237, "grad_norm": 0.23535381257534027, "learning_rate": 7.573851687616403e-05, "loss": 0.0094, "step": 21690 }, { "epoch": 19.514388489208635, "grad_norm": 0.17949700355529785, "learning_rate": 7.571488686527368e-05, "loss": 0.0063, "step": 21700 }, { "epoch": 19.52338129496403, "grad_norm": 0.2110150158405304, "learning_rate": 7.569124904287968e-05, "loss": 0.0066, "step": 21710 }, { "epoch": 19.532374100719423, "grad_norm": 0.24467748403549194, "learning_rate": 7.566760341616254e-05, "loss": 0.0087, "step": 21720 }, { "epoch": 19.54136690647482, "grad_norm": 0.2744024395942688, "learning_rate": 7.564394999230519e-05, "loss": 0.0088, "step": 21730 }, { "epoch": 19.550359712230215, "grad_norm": 0.11028477549552917, "learning_rate": 7.562028877849294e-05, "loss": 0.0059, "step": 21740 }, { "epoch": 19.559352517985612, "grad_norm": 0.26008522510528564, "learning_rate": 7.559661978191341e-05, "loss": 0.008, "step": 21750 }, { "epoch": 19.568345323741006, "grad_norm": 0.2005806565284729, "learning_rate": 7.557294300975664e-05, "loss": 0.0071, "step": 21760 }, { "epoch": 19.577338129496404, "grad_norm": 0.19985154271125793, "learning_rate": 7.554925846921499e-05, "loss": 0.0071, "step": 21770 }, { "epoch": 19.586330935251798, "grad_norm": 0.1946127712726593, "learning_rate": 7.552556616748321e-05, "loss": 0.0073, "step": 21780 }, { "epoch": 19.595323741007196, "grad_norm": 0.2196793109178543, "learning_rate": 7.550186611175838e-05, "loss": 0.0092, "step": 21790 }, { "epoch": 19.60431654676259, "grad_norm": 0.16738203167915344, "learning_rate": 7.547815830923998e-05, "loss": 0.0047, "step": 21800 }, { "epoch": 19.613309352517987, "grad_norm": 0.29557356238365173, "learning_rate": 7.54544427671298e-05, "loss": 0.0077, "step": 21810 }, { "epoch": 19.62230215827338, "grad_norm": 0.2406550496816635, "learning_rate": 7.543071949263198e-05, "loss": 0.0082, "step": 21820 }, { "epoch": 19.631294964028775, "grad_norm": 0.12294816225767136, "learning_rate": 7.540698849295305e-05, "loss": 0.0052, "step": 21830 }, { "epoch": 19.640287769784173, "grad_norm": 0.1429678201675415, "learning_rate": 7.538324977530183e-05, "loss": 0.0059, "step": 21840 }, { "epoch": 19.649280575539567, "grad_norm": 0.279605507850647, "learning_rate": 7.535950334688955e-05, "loss": 0.0083, "step": 21850 }, { "epoch": 19.658273381294965, "grad_norm": 0.25874850153923035, "learning_rate": 7.533574921492972e-05, "loss": 0.0066, "step": 21860 }, { "epoch": 19.66726618705036, "grad_norm": 0.2577434182167053, "learning_rate": 7.531198738663824e-05, "loss": 0.0054, "step": 21870 }, { "epoch": 19.676258992805757, "grad_norm": 0.17037340998649597, "learning_rate": 7.528821786923333e-05, "loss": 0.0062, "step": 21880 }, { "epoch": 19.68525179856115, "grad_norm": 0.2866000831127167, "learning_rate": 7.52644406699355e-05, "loss": 0.0093, "step": 21890 }, { "epoch": 19.694244604316548, "grad_norm": 0.1816851794719696, "learning_rate": 7.524065579596766e-05, "loss": 0.0058, "step": 21900 }, { "epoch": 19.703237410071942, "grad_norm": 0.2180425226688385, "learning_rate": 7.521686325455506e-05, "loss": 0.0064, "step": 21910 }, { "epoch": 19.71223021582734, "grad_norm": 0.21457985043525696, "learning_rate": 7.51930630529252e-05, "loss": 0.0082, "step": 21920 }, { "epoch": 19.721223021582734, "grad_norm": 0.1974411904811859, "learning_rate": 7.516925519830797e-05, "loss": 0.0073, "step": 21930 }, { "epoch": 19.730215827338128, "grad_norm": 0.20021598041057587, "learning_rate": 7.514543969793557e-05, "loss": 0.0073, "step": 21940 }, { "epoch": 19.739208633093526, "grad_norm": 0.19835156202316284, "learning_rate": 7.512161655904251e-05, "loss": 0.0087, "step": 21950 }, { "epoch": 19.74820143884892, "grad_norm": 0.42683276534080505, "learning_rate": 7.509778578886563e-05, "loss": 0.0078, "step": 21960 }, { "epoch": 19.757194244604317, "grad_norm": 0.15600822865962982, "learning_rate": 7.507394739464412e-05, "loss": 0.0074, "step": 21970 }, { "epoch": 19.76618705035971, "grad_norm": 0.17039921879768372, "learning_rate": 7.50501013836194e-05, "loss": 0.0107, "step": 21980 }, { "epoch": 19.77517985611511, "grad_norm": 0.2871319055557251, "learning_rate": 7.50262477630353e-05, "loss": 0.0096, "step": 21990 }, { "epoch": 19.784172661870503, "grad_norm": 0.1419629156589508, "learning_rate": 7.500238654013794e-05, "loss": 0.0105, "step": 22000 }, { "epoch": 19.7931654676259, "grad_norm": 0.25886571407318115, "learning_rate": 7.497851772217566e-05, "loss": 0.0069, "step": 22010 }, { "epoch": 19.802158273381295, "grad_norm": 0.2144048511981964, "learning_rate": 7.495464131639924e-05, "loss": 0.0051, "step": 22020 }, { "epoch": 19.81115107913669, "grad_norm": 0.25943654775619507, "learning_rate": 7.493075733006166e-05, "loss": 0.0081, "step": 22030 }, { "epoch": 19.820143884892087, "grad_norm": 0.22857385873794556, "learning_rate": 7.490686577041828e-05, "loss": 0.008, "step": 22040 }, { "epoch": 19.82913669064748, "grad_norm": 0.19386562705039978, "learning_rate": 7.488296664472668e-05, "loss": 0.0125, "step": 22050 }, { "epoch": 19.83812949640288, "grad_norm": 0.14397510886192322, "learning_rate": 7.485905996024682e-05, "loss": 0.0081, "step": 22060 }, { "epoch": 19.847122302158272, "grad_norm": 0.3188629150390625, "learning_rate": 7.483514572424093e-05, "loss": 0.0066, "step": 22070 }, { "epoch": 19.85611510791367, "grad_norm": 0.1097516119480133, "learning_rate": 7.481122394397349e-05, "loss": 0.0071, "step": 22080 }, { "epoch": 19.865107913669064, "grad_norm": 0.08745988458395004, "learning_rate": 7.478729462671131e-05, "loss": 0.0057, "step": 22090 }, { "epoch": 19.87410071942446, "grad_norm": 0.29030641913414, "learning_rate": 7.47633577797235e-05, "loss": 0.0101, "step": 22100 }, { "epoch": 19.883093525179856, "grad_norm": 0.174276202917099, "learning_rate": 7.473941341028144e-05, "loss": 0.0058, "step": 22110 }, { "epoch": 19.892086330935253, "grad_norm": 0.1700766682624817, "learning_rate": 7.471546152565879e-05, "loss": 0.0049, "step": 22120 }, { "epoch": 19.901079136690647, "grad_norm": 0.13857831060886383, "learning_rate": 7.46915021331315e-05, "loss": 0.0094, "step": 22130 }, { "epoch": 19.91007194244604, "grad_norm": 0.25474441051483154, "learning_rate": 7.466753523997778e-05, "loss": 0.0087, "step": 22140 }, { "epoch": 19.91906474820144, "grad_norm": 0.2572079598903656, "learning_rate": 7.464356085347819e-05, "loss": 0.0078, "step": 22150 }, { "epoch": 19.928057553956833, "grad_norm": 0.15142269432544708, "learning_rate": 7.461957898091548e-05, "loss": 0.0079, "step": 22160 }, { "epoch": 19.93705035971223, "grad_norm": 0.27215325832366943, "learning_rate": 7.459558962957473e-05, "loss": 0.0093, "step": 22170 }, { "epoch": 19.946043165467625, "grad_norm": 0.24716386198997498, "learning_rate": 7.457159280674326e-05, "loss": 0.0072, "step": 22180 }, { "epoch": 19.955035971223023, "grad_norm": 0.2370034009218216, "learning_rate": 7.454758851971066e-05, "loss": 0.0081, "step": 22190 }, { "epoch": 19.964028776978417, "grad_norm": 0.1820066124200821, "learning_rate": 7.45235767757688e-05, "loss": 0.0088, "step": 22200 }, { "epoch": 19.973021582733814, "grad_norm": 0.19941408932209015, "learning_rate": 7.449955758221183e-05, "loss": 0.0124, "step": 22210 }, { "epoch": 19.98201438848921, "grad_norm": 0.16084586083889008, "learning_rate": 7.447553094633615e-05, "loss": 0.0074, "step": 22220 }, { "epoch": 19.991007194244606, "grad_norm": 0.24168363213539124, "learning_rate": 7.445149687544039e-05, "loss": 0.0074, "step": 22230 }, { "epoch": 20.0, "grad_norm": 0.29987215995788574, "learning_rate": 7.44274553768255e-05, "loss": 0.008, "step": 22240 }, { "epoch": 20.008992805755394, "grad_norm": 0.14705781638622284, "learning_rate": 7.440340645779464e-05, "loss": 0.0061, "step": 22250 }, { "epoch": 20.01798561151079, "grad_norm": 0.12452351301908493, "learning_rate": 7.437935012565322e-05, "loss": 0.0077, "step": 22260 }, { "epoch": 20.026978417266186, "grad_norm": 0.20155470073223114, "learning_rate": 7.435528638770893e-05, "loss": 0.0091, "step": 22270 }, { "epoch": 20.035971223021583, "grad_norm": 0.15869978070259094, "learning_rate": 7.433121525127171e-05, "loss": 0.0128, "step": 22280 }, { "epoch": 20.044964028776977, "grad_norm": 0.22095632553100586, "learning_rate": 7.430713672365371e-05, "loss": 0.0207, "step": 22290 }, { "epoch": 20.053956834532375, "grad_norm": 0.1943831592798233, "learning_rate": 7.428305081216938e-05, "loss": 0.0064, "step": 22300 }, { "epoch": 20.06294964028777, "grad_norm": 0.25020697712898254, "learning_rate": 7.425895752413536e-05, "loss": 0.0088, "step": 22310 }, { "epoch": 20.071942446043167, "grad_norm": 0.24446307122707367, "learning_rate": 7.423485686687057e-05, "loss": 0.0141, "step": 22320 }, { "epoch": 20.08093525179856, "grad_norm": 0.39146000146865845, "learning_rate": 7.421074884769616e-05, "loss": 0.0087, "step": 22330 }, { "epoch": 20.08992805755396, "grad_norm": 0.3670203685760498, "learning_rate": 7.418663347393548e-05, "loss": 0.0118, "step": 22340 }, { "epoch": 20.098920863309353, "grad_norm": 0.2180539071559906, "learning_rate": 7.416251075291418e-05, "loss": 0.0105, "step": 22350 }, { "epoch": 20.107913669064747, "grad_norm": 0.16293442249298096, "learning_rate": 7.413838069196007e-05, "loss": 0.0058, "step": 22360 }, { "epoch": 20.116906474820144, "grad_norm": 0.2307584583759308, "learning_rate": 7.411424329840324e-05, "loss": 0.0123, "step": 22370 }, { "epoch": 20.12589928057554, "grad_norm": 0.19988210499286652, "learning_rate": 7.409009857957601e-05, "loss": 0.0144, "step": 22380 }, { "epoch": 20.134892086330936, "grad_norm": 0.16307418048381805, "learning_rate": 7.40659465428129e-05, "loss": 0.0119, "step": 22390 }, { "epoch": 20.14388489208633, "grad_norm": 0.20730768144130707, "learning_rate": 7.404178719545063e-05, "loss": 0.0092, "step": 22400 }, { "epoch": 20.152877697841728, "grad_norm": 0.3446018099784851, "learning_rate": 7.401762054482822e-05, "loss": 0.0141, "step": 22410 }, { "epoch": 20.16187050359712, "grad_norm": 0.23419710993766785, "learning_rate": 7.39934465982868e-05, "loss": 0.009, "step": 22420 }, { "epoch": 20.17086330935252, "grad_norm": 0.205569326877594, "learning_rate": 7.396926536316984e-05, "loss": 0.0072, "step": 22430 }, { "epoch": 20.179856115107913, "grad_norm": 0.1813746988773346, "learning_rate": 7.394507684682293e-05, "loss": 0.0094, "step": 22440 }, { "epoch": 20.18884892086331, "grad_norm": 0.3261217176914215, "learning_rate": 7.392088105659393e-05, "loss": 0.0092, "step": 22450 }, { "epoch": 20.197841726618705, "grad_norm": 0.2577457129955292, "learning_rate": 7.389667799983284e-05, "loss": 0.0111, "step": 22460 }, { "epoch": 20.2068345323741, "grad_norm": 0.21711215376853943, "learning_rate": 7.387246768389193e-05, "loss": 0.0073, "step": 22470 }, { "epoch": 20.215827338129497, "grad_norm": 0.2439783811569214, "learning_rate": 7.384825011612563e-05, "loss": 0.0062, "step": 22480 }, { "epoch": 20.22482014388489, "grad_norm": 0.24087072908878326, "learning_rate": 7.382402530389066e-05, "loss": 0.0136, "step": 22490 }, { "epoch": 20.23381294964029, "grad_norm": 0.24554501473903656, "learning_rate": 7.379979325454582e-05, "loss": 0.009, "step": 22500 }, { "epoch": 20.242805755395683, "grad_norm": 0.24543119966983795, "learning_rate": 7.37755539754522e-05, "loss": 0.0086, "step": 22510 }, { "epoch": 20.25179856115108, "grad_norm": 0.16692349314689636, "learning_rate": 7.375130747397302e-05, "loss": 0.0058, "step": 22520 }, { "epoch": 20.260791366906474, "grad_norm": 0.2568872272968292, "learning_rate": 7.372705375747377e-05, "loss": 0.0065, "step": 22530 }, { "epoch": 20.269784172661872, "grad_norm": 0.24260301887989044, "learning_rate": 7.370279283332205e-05, "loss": 0.0066, "step": 22540 }, { "epoch": 20.278776978417266, "grad_norm": 0.23282834887504578, "learning_rate": 7.36785247088877e-05, "loss": 0.0067, "step": 22550 }, { "epoch": 20.28776978417266, "grad_norm": 0.12819232046604156, "learning_rate": 7.365424939154275e-05, "loss": 0.0076, "step": 22560 }, { "epoch": 20.296762589928058, "grad_norm": 0.26052355766296387, "learning_rate": 7.362996688866138e-05, "loss": 0.0073, "step": 22570 }, { "epoch": 20.305755395683452, "grad_norm": 0.17222627997398376, "learning_rate": 7.360567720761999e-05, "loss": 0.0097, "step": 22580 }, { "epoch": 20.31474820143885, "grad_norm": 0.2180832475423813, "learning_rate": 7.358138035579711e-05, "loss": 0.0079, "step": 22590 }, { "epoch": 20.323741007194243, "grad_norm": 0.2178744673728943, "learning_rate": 7.355707634057354e-05, "loss": 0.0086, "step": 22600 }, { "epoch": 20.33273381294964, "grad_norm": 0.1990690678358078, "learning_rate": 7.353276516933215e-05, "loss": 0.0049, "step": 22610 }, { "epoch": 20.341726618705035, "grad_norm": 0.12577369809150696, "learning_rate": 7.350844684945806e-05, "loss": 0.0089, "step": 22620 }, { "epoch": 20.350719424460433, "grad_norm": 0.18199215829372406, "learning_rate": 7.348412138833851e-05, "loss": 0.0066, "step": 22630 }, { "epoch": 20.359712230215827, "grad_norm": 0.2650308310985565, "learning_rate": 7.345978879336295e-05, "loss": 0.0065, "step": 22640 }, { "epoch": 20.368705035971225, "grad_norm": 0.1264207810163498, "learning_rate": 7.343544907192296e-05, "loss": 0.0112, "step": 22650 }, { "epoch": 20.37769784172662, "grad_norm": 0.15287835896015167, "learning_rate": 7.341110223141235e-05, "loss": 0.0077, "step": 22660 }, { "epoch": 20.386690647482013, "grad_norm": 0.20333509147167206, "learning_rate": 7.3386748279227e-05, "loss": 0.0075, "step": 22670 }, { "epoch": 20.39568345323741, "grad_norm": 0.27358245849609375, "learning_rate": 7.336238722276501e-05, "loss": 0.0114, "step": 22680 }, { "epoch": 20.404676258992804, "grad_norm": 0.28524112701416016, "learning_rate": 7.333801906942663e-05, "loss": 0.0085, "step": 22690 }, { "epoch": 20.413669064748202, "grad_norm": 0.15389007329940796, "learning_rate": 7.331364382661428e-05, "loss": 0.0085, "step": 22700 }, { "epoch": 20.422661870503596, "grad_norm": 0.1852969080209732, "learning_rate": 7.328926150173248e-05, "loss": 0.0067, "step": 22710 }, { "epoch": 20.431654676258994, "grad_norm": 0.21281559765338898, "learning_rate": 7.326487210218795e-05, "loss": 0.0071, "step": 22720 }, { "epoch": 20.440647482014388, "grad_norm": 0.30532029271125793, "learning_rate": 7.324047563538955e-05, "loss": 0.0118, "step": 22730 }, { "epoch": 20.449640287769785, "grad_norm": 0.11587841808795929, "learning_rate": 7.321607210874828e-05, "loss": 0.0091, "step": 22740 }, { "epoch": 20.45863309352518, "grad_norm": 0.17100109159946442, "learning_rate": 7.31916615296773e-05, "loss": 0.0069, "step": 22750 }, { "epoch": 20.467625899280577, "grad_norm": 0.12868449091911316, "learning_rate": 7.316724390559188e-05, "loss": 0.0065, "step": 22760 }, { "epoch": 20.47661870503597, "grad_norm": 0.18059802055358887, "learning_rate": 7.314281924390946e-05, "loss": 0.005, "step": 22770 }, { "epoch": 20.485611510791365, "grad_norm": 0.18798862397670746, "learning_rate": 7.311838755204959e-05, "loss": 0.0086, "step": 22780 }, { "epoch": 20.494604316546763, "grad_norm": 0.20548802614212036, "learning_rate": 7.3093948837434e-05, "loss": 0.0077, "step": 22790 }, { "epoch": 20.503597122302157, "grad_norm": 0.17897827923297882, "learning_rate": 7.306950310748651e-05, "loss": 0.0054, "step": 22800 }, { "epoch": 20.512589928057555, "grad_norm": 0.28207093477249146, "learning_rate": 7.304505036963311e-05, "loss": 0.0087, "step": 22810 }, { "epoch": 20.52158273381295, "grad_norm": 0.23945921659469604, "learning_rate": 7.302059063130186e-05, "loss": 0.0063, "step": 22820 }, { "epoch": 20.530575539568346, "grad_norm": 0.21760572493076324, "learning_rate": 7.2996123899923e-05, "loss": 0.0066, "step": 22830 }, { "epoch": 20.53956834532374, "grad_norm": 0.19695830345153809, "learning_rate": 7.297165018292886e-05, "loss": 0.0091, "step": 22840 }, { "epoch": 20.548561151079138, "grad_norm": 0.22595448791980743, "learning_rate": 7.294716948775396e-05, "loss": 0.0074, "step": 22850 }, { "epoch": 20.557553956834532, "grad_norm": 0.24146665632724762, "learning_rate": 7.292268182183484e-05, "loss": 0.0089, "step": 22860 }, { "epoch": 20.56654676258993, "grad_norm": 0.19764627516269684, "learning_rate": 7.28981871926102e-05, "loss": 0.0056, "step": 22870 }, { "epoch": 20.575539568345324, "grad_norm": 0.19012735784053802, "learning_rate": 7.28736856075209e-05, "loss": 0.0087, "step": 22880 }, { "epoch": 20.584532374100718, "grad_norm": 0.14327403903007507, "learning_rate": 7.284917707400985e-05, "loss": 0.0065, "step": 22890 }, { "epoch": 20.593525179856115, "grad_norm": 0.3227463960647583, "learning_rate": 7.282466159952212e-05, "loss": 0.0102, "step": 22900 }, { "epoch": 20.60251798561151, "grad_norm": 0.15545670688152313, "learning_rate": 7.280013919150483e-05, "loss": 0.0084, "step": 22910 }, { "epoch": 20.611510791366907, "grad_norm": 0.25326311588287354, "learning_rate": 7.277560985740728e-05, "loss": 0.0146, "step": 22920 }, { "epoch": 20.6205035971223, "grad_norm": 0.21731586754322052, "learning_rate": 7.275107360468079e-05, "loss": 0.0089, "step": 22930 }, { "epoch": 20.6294964028777, "grad_norm": 0.2574801445007324, "learning_rate": 7.272653044077885e-05, "loss": 0.0085, "step": 22940 }, { "epoch": 20.638489208633093, "grad_norm": 0.19118978083133698, "learning_rate": 7.270198037315703e-05, "loss": 0.0119, "step": 22950 }, { "epoch": 20.64748201438849, "grad_norm": 0.17289359867572784, "learning_rate": 7.267742340927297e-05, "loss": 0.0063, "step": 22960 }, { "epoch": 20.656474820143885, "grad_norm": 0.15052159130573273, "learning_rate": 7.265285955658645e-05, "loss": 0.0046, "step": 22970 }, { "epoch": 20.665467625899282, "grad_norm": 0.1039736419916153, "learning_rate": 7.26282888225593e-05, "loss": 0.0068, "step": 22980 }, { "epoch": 20.674460431654676, "grad_norm": 0.19361189007759094, "learning_rate": 7.260371121465548e-05, "loss": 0.0069, "step": 22990 }, { "epoch": 20.68345323741007, "grad_norm": 0.20391389727592468, "learning_rate": 7.2579126740341e-05, "loss": 0.0105, "step": 23000 }, { "epoch": 20.692446043165468, "grad_norm": 0.10209143161773682, "learning_rate": 7.2554535407084e-05, "loss": 0.0057, "step": 23010 }, { "epoch": 20.701438848920862, "grad_norm": 0.19430960714817047, "learning_rate": 7.252993722235464e-05, "loss": 0.0072, "step": 23020 }, { "epoch": 20.71043165467626, "grad_norm": 0.21651814877986908, "learning_rate": 7.250533219362523e-05, "loss": 0.0053, "step": 23030 }, { "epoch": 20.719424460431654, "grad_norm": 0.1519799828529358, "learning_rate": 7.248072032837012e-05, "loss": 0.011, "step": 23040 }, { "epoch": 20.72841726618705, "grad_norm": 0.2530313730239868, "learning_rate": 7.245610163406575e-05, "loss": 0.0082, "step": 23050 }, { "epoch": 20.737410071942445, "grad_norm": 0.14335212111473083, "learning_rate": 7.243147611819061e-05, "loss": 0.0087, "step": 23060 }, { "epoch": 20.746402877697843, "grad_norm": 0.2173602432012558, "learning_rate": 7.240684378822531e-05, "loss": 0.0061, "step": 23070 }, { "epoch": 20.755395683453237, "grad_norm": 0.14912059903144836, "learning_rate": 7.238220465165248e-05, "loss": 0.0059, "step": 23080 }, { "epoch": 20.764388489208635, "grad_norm": 0.21634092926979065, "learning_rate": 7.235755871595684e-05, "loss": 0.0094, "step": 23090 }, { "epoch": 20.77338129496403, "grad_norm": 0.20832405984401703, "learning_rate": 7.233290598862517e-05, "loss": 0.0067, "step": 23100 }, { "epoch": 20.782374100719423, "grad_norm": 0.1918906420469284, "learning_rate": 7.230824647714635e-05, "loss": 0.0067, "step": 23110 }, { "epoch": 20.79136690647482, "grad_norm": 0.2362699955701828, "learning_rate": 7.228358018901124e-05, "loss": 0.0078, "step": 23120 }, { "epoch": 20.800359712230215, "grad_norm": 0.2816280722618103, "learning_rate": 7.225890713171286e-05, "loss": 0.009, "step": 23130 }, { "epoch": 20.809352517985612, "grad_norm": 0.29668673872947693, "learning_rate": 7.223422731274618e-05, "loss": 0.0064, "step": 23140 }, { "epoch": 20.818345323741006, "grad_norm": 0.2354690581560135, "learning_rate": 7.220954073960832e-05, "loss": 0.0069, "step": 23150 }, { "epoch": 20.827338129496404, "grad_norm": 0.32532167434692383, "learning_rate": 7.218484741979838e-05, "loss": 0.0076, "step": 23160 }, { "epoch": 20.836330935251798, "grad_norm": 0.21012982726097107, "learning_rate": 7.216014736081756e-05, "loss": 0.0112, "step": 23170 }, { "epoch": 20.845323741007196, "grad_norm": 0.21037337183952332, "learning_rate": 7.213544057016906e-05, "loss": 0.0084, "step": 23180 }, { "epoch": 20.85431654676259, "grad_norm": 0.1872122585773468, "learning_rate": 7.211072705535819e-05, "loss": 0.007, "step": 23190 }, { "epoch": 20.863309352517987, "grad_norm": 0.16538555920124054, "learning_rate": 7.208600682389224e-05, "loss": 0.0065, "step": 23200 }, { "epoch": 20.87230215827338, "grad_norm": 0.18482866883277893, "learning_rate": 7.206127988328055e-05, "loss": 0.0078, "step": 23210 }, { "epoch": 20.881294964028775, "grad_norm": 0.1897495537996292, "learning_rate": 7.203654624103453e-05, "loss": 0.0064, "step": 23220 }, { "epoch": 20.890287769784173, "grad_norm": 0.23944206535816193, "learning_rate": 7.201180590466761e-05, "loss": 0.0079, "step": 23230 }, { "epoch": 20.899280575539567, "grad_norm": 0.21876055002212524, "learning_rate": 7.198705888169523e-05, "loss": 0.0099, "step": 23240 }, { "epoch": 20.908273381294965, "grad_norm": 0.1878504902124405, "learning_rate": 7.196230517963491e-05, "loss": 0.006, "step": 23250 }, { "epoch": 20.91726618705036, "grad_norm": 0.22715245187282562, "learning_rate": 7.193754480600615e-05, "loss": 0.0089, "step": 23260 }, { "epoch": 20.926258992805757, "grad_norm": 0.24385063350200653, "learning_rate": 7.19127777683305e-05, "loss": 0.013, "step": 23270 }, { "epoch": 20.93525179856115, "grad_norm": 0.1766130030155182, "learning_rate": 7.188800407413156e-05, "loss": 0.0051, "step": 23280 }, { "epoch": 20.944244604316548, "grad_norm": 0.13936364650726318, "learning_rate": 7.186322373093489e-05, "loss": 0.008, "step": 23290 }, { "epoch": 20.953237410071942, "grad_norm": 0.14448238909244537, "learning_rate": 7.18384367462681e-05, "loss": 0.0046, "step": 23300 }, { "epoch": 20.96223021582734, "grad_norm": 0.21767181158065796, "learning_rate": 7.181364312766085e-05, "loss": 0.0069, "step": 23310 }, { "epoch": 20.971223021582734, "grad_norm": 0.15346601605415344, "learning_rate": 7.178884288264477e-05, "loss": 0.0054, "step": 23320 }, { "epoch": 20.980215827338128, "grad_norm": 0.18566539883613586, "learning_rate": 7.176403601875353e-05, "loss": 0.009, "step": 23330 }, { "epoch": 20.989208633093526, "grad_norm": 0.19628039002418518, "learning_rate": 7.173922254352279e-05, "loss": 0.0083, "step": 23340 }, { "epoch": 20.99820143884892, "grad_norm": 0.18587322533130646, "learning_rate": 7.171440246449024e-05, "loss": 0.0066, "step": 23350 }, { "epoch": 21.007194244604317, "grad_norm": 0.22583946585655212, "learning_rate": 7.168957578919555e-05, "loss": 0.0054, "step": 23360 }, { "epoch": 21.01618705035971, "grad_norm": 0.1836862862110138, "learning_rate": 7.16647425251804e-05, "loss": 0.0069, "step": 23370 }, { "epoch": 21.02517985611511, "grad_norm": 0.13849179446697235, "learning_rate": 7.163990267998852e-05, "loss": 0.0078, "step": 23380 }, { "epoch": 21.034172661870503, "grad_norm": 0.1968035250902176, "learning_rate": 7.161505626116556e-05, "loss": 0.0082, "step": 23390 }, { "epoch": 21.0431654676259, "grad_norm": 0.2533884048461914, "learning_rate": 7.159020327625923e-05, "loss": 0.0077, "step": 23400 }, { "epoch": 21.052158273381295, "grad_norm": 0.2491820603609085, "learning_rate": 7.15653437328192e-05, "loss": 0.0117, "step": 23410 }, { "epoch": 21.06115107913669, "grad_norm": 0.1828056424856186, "learning_rate": 7.154047763839713e-05, "loss": 0.007, "step": 23420 }, { "epoch": 21.070143884892087, "grad_norm": 0.2851572334766388, "learning_rate": 7.15156050005467e-05, "loss": 0.0058, "step": 23430 }, { "epoch": 21.07913669064748, "grad_norm": 0.17644792795181274, "learning_rate": 7.149072582682357e-05, "loss": 0.0061, "step": 23440 }, { "epoch": 21.08812949640288, "grad_norm": 0.22681733965873718, "learning_rate": 7.146584012478535e-05, "loss": 0.0068, "step": 23450 }, { "epoch": 21.097122302158272, "grad_norm": 0.19098946452140808, "learning_rate": 7.144094790199169e-05, "loss": 0.0086, "step": 23460 }, { "epoch": 21.10611510791367, "grad_norm": 0.21483995020389557, "learning_rate": 7.141604916600415e-05, "loss": 0.0082, "step": 23470 }, { "epoch": 21.115107913669064, "grad_norm": 0.15395738184452057, "learning_rate": 7.139114392438635e-05, "loss": 0.0053, "step": 23480 }, { "epoch": 21.12410071942446, "grad_norm": 0.0996568575501442, "learning_rate": 7.136623218470382e-05, "loss": 0.0073, "step": 23490 }, { "epoch": 21.133093525179856, "grad_norm": 0.19720296561717987, "learning_rate": 7.13413139545241e-05, "loss": 0.0069, "step": 23500 }, { "epoch": 21.142086330935253, "grad_norm": 0.15839047729969025, "learning_rate": 7.131638924141668e-05, "loss": 0.0081, "step": 23510 }, { "epoch": 21.151079136690647, "grad_norm": 0.2375253438949585, "learning_rate": 7.129145805295304e-05, "loss": 0.0086, "step": 23520 }, { "epoch": 21.16007194244604, "grad_norm": 0.12382958084344864, "learning_rate": 7.126652039670661e-05, "loss": 0.0039, "step": 23530 }, { "epoch": 21.16906474820144, "grad_norm": 0.169782355427742, "learning_rate": 7.124157628025278e-05, "loss": 0.0057, "step": 23540 }, { "epoch": 21.178057553956833, "grad_norm": 0.13406755030155182, "learning_rate": 7.121662571116894e-05, "loss": 0.0071, "step": 23550 }, { "epoch": 21.18705035971223, "grad_norm": 0.14449627697467804, "learning_rate": 7.119166869703441e-05, "loss": 0.0046, "step": 23560 }, { "epoch": 21.196043165467625, "grad_norm": 0.20895127952098846, "learning_rate": 7.116670524543044e-05, "loss": 0.0078, "step": 23570 }, { "epoch": 21.205035971223023, "grad_norm": 0.2848685681819916, "learning_rate": 7.114173536394032e-05, "loss": 0.0073, "step": 23580 }, { "epoch": 21.214028776978417, "grad_norm": 0.18354730308055878, "learning_rate": 7.111675906014917e-05, "loss": 0.0087, "step": 23590 }, { "epoch": 21.223021582733814, "grad_norm": 0.1429445892572403, "learning_rate": 7.109177634164421e-05, "loss": 0.0049, "step": 23600 }, { "epoch": 21.23201438848921, "grad_norm": 0.22424842417240143, "learning_rate": 7.106678721601449e-05, "loss": 0.0055, "step": 23610 }, { "epoch": 21.241007194244606, "grad_norm": 0.17946499586105347, "learning_rate": 7.104179169085103e-05, "loss": 0.0075, "step": 23620 }, { "epoch": 21.25, "grad_norm": 0.2647473216056824, "learning_rate": 7.101678977374683e-05, "loss": 0.0115, "step": 23630 }, { "epoch": 21.258992805755394, "grad_norm": 0.18847012519836426, "learning_rate": 7.099178147229685e-05, "loss": 0.0059, "step": 23640 }, { "epoch": 21.26798561151079, "grad_norm": 0.11886311322450638, "learning_rate": 7.096676679409789e-05, "loss": 0.008, "step": 23650 }, { "epoch": 21.276978417266186, "grad_norm": 0.20187687873840332, "learning_rate": 7.094174574674877e-05, "loss": 0.0069, "step": 23660 }, { "epoch": 21.285971223021583, "grad_norm": 0.27322298288345337, "learning_rate": 7.091671833785025e-05, "loss": 0.0052, "step": 23670 }, { "epoch": 21.294964028776977, "grad_norm": 0.28971967101097107, "learning_rate": 7.089168457500493e-05, "loss": 0.0069, "step": 23680 }, { "epoch": 21.303956834532375, "grad_norm": 0.16743890941143036, "learning_rate": 7.086664446581747e-05, "loss": 0.0115, "step": 23690 }, { "epoch": 21.31294964028777, "grad_norm": 0.17095990478992462, "learning_rate": 7.084159801789438e-05, "loss": 0.007, "step": 23700 }, { "epoch": 21.321942446043167, "grad_norm": 0.21966929733753204, "learning_rate": 7.081654523884411e-05, "loss": 0.0082, "step": 23710 }, { "epoch": 21.33093525179856, "grad_norm": 0.20676586031913757, "learning_rate": 7.0791486136277e-05, "loss": 0.0093, "step": 23720 }, { "epoch": 21.33992805755396, "grad_norm": 0.2047353833913803, "learning_rate": 7.07664207178054e-05, "loss": 0.0103, "step": 23730 }, { "epoch": 21.348920863309353, "grad_norm": 0.20650804042816162, "learning_rate": 7.074134899104345e-05, "loss": 0.0093, "step": 23740 }, { "epoch": 21.357913669064747, "grad_norm": 0.1947992593050003, "learning_rate": 7.071627096360735e-05, "loss": 0.0068, "step": 23750 }, { "epoch": 21.366906474820144, "grad_norm": 0.1296653151512146, "learning_rate": 7.069118664311511e-05, "loss": 0.006, "step": 23760 }, { "epoch": 21.37589928057554, "grad_norm": 0.16159968078136444, "learning_rate": 7.06660960371867e-05, "loss": 0.0053, "step": 23770 }, { "epoch": 21.384892086330936, "grad_norm": 0.23301050066947937, "learning_rate": 7.064099915344396e-05, "loss": 0.009, "step": 23780 }, { "epoch": 21.39388489208633, "grad_norm": 0.33211904764175415, "learning_rate": 7.061589599951066e-05, "loss": 0.0075, "step": 23790 }, { "epoch": 21.402877697841728, "grad_norm": 0.17917940020561218, "learning_rate": 7.05907865830125e-05, "loss": 0.0142, "step": 23800 }, { "epoch": 21.41187050359712, "grad_norm": 0.30580252408981323, "learning_rate": 7.056567091157703e-05, "loss": 0.0071, "step": 23810 }, { "epoch": 21.42086330935252, "grad_norm": 0.1575096696615219, "learning_rate": 7.054054899283375e-05, "loss": 0.0091, "step": 23820 }, { "epoch": 21.429856115107913, "grad_norm": 0.2217419296503067, "learning_rate": 7.051542083441403e-05, "loss": 0.0058, "step": 23830 }, { "epoch": 21.43884892086331, "grad_norm": 0.17973662912845612, "learning_rate": 7.049028644395113e-05, "loss": 0.0062, "step": 23840 }, { "epoch": 21.447841726618705, "grad_norm": 0.1431485265493393, "learning_rate": 7.046514582908024e-05, "loss": 0.0052, "step": 23850 }, { "epoch": 21.4568345323741, "grad_norm": 0.19212424755096436, "learning_rate": 7.043999899743838e-05, "loss": 0.0068, "step": 23860 }, { "epoch": 21.465827338129497, "grad_norm": 0.15998174250125885, "learning_rate": 7.041484595666451e-05, "loss": 0.0097, "step": 23870 }, { "epoch": 21.47482014388489, "grad_norm": 0.1757289469242096, "learning_rate": 7.038968671439948e-05, "loss": 0.0066, "step": 23880 }, { "epoch": 21.48381294964029, "grad_norm": 0.1798657923936844, "learning_rate": 7.036452127828596e-05, "loss": 0.0076, "step": 23890 }, { "epoch": 21.492805755395683, "grad_norm": 0.22354164719581604, "learning_rate": 7.033934965596859e-05, "loss": 0.0055, "step": 23900 }, { "epoch": 21.50179856115108, "grad_norm": 0.19048751890659332, "learning_rate": 7.031417185509381e-05, "loss": 0.0088, "step": 23910 }, { "epoch": 21.510791366906474, "grad_norm": 0.16603340208530426, "learning_rate": 7.028898788331e-05, "loss": 0.0071, "step": 23920 }, { "epoch": 21.519784172661872, "grad_norm": 0.1236453577876091, "learning_rate": 7.026379774826736e-05, "loss": 0.0136, "step": 23930 }, { "epoch": 21.528776978417266, "grad_norm": 0.19574162364006042, "learning_rate": 7.0238601457618e-05, "loss": 0.0059, "step": 23940 }, { "epoch": 21.53776978417266, "grad_norm": 0.19051963090896606, "learning_rate": 7.02133990190159e-05, "loss": 0.0071, "step": 23950 }, { "epoch": 21.546762589928058, "grad_norm": 0.23475764691829681, "learning_rate": 7.018819044011687e-05, "loss": 0.0076, "step": 23960 }, { "epoch": 21.555755395683452, "grad_norm": 0.26903295516967773, "learning_rate": 7.016297572857863e-05, "loss": 0.0059, "step": 23970 }, { "epoch": 21.56474820143885, "grad_norm": 0.16352573037147522, "learning_rate": 7.013775489206072e-05, "loss": 0.0082, "step": 23980 }, { "epoch": 21.573741007194243, "grad_norm": 0.23507016897201538, "learning_rate": 7.01125279382246e-05, "loss": 0.0058, "step": 23990 }, { "epoch": 21.58273381294964, "grad_norm": 0.19665563106536865, "learning_rate": 7.008729487473351e-05, "loss": 0.0076, "step": 24000 }, { "epoch": 21.591726618705035, "grad_norm": 0.21904972195625305, "learning_rate": 7.006205570925263e-05, "loss": 0.0093, "step": 24010 }, { "epoch": 21.600719424460433, "grad_norm": 0.19479063153266907, "learning_rate": 7.003681044944892e-05, "loss": 0.008, "step": 24020 }, { "epoch": 21.609712230215827, "grad_norm": 0.2100483924150467, "learning_rate": 7.001155910299126e-05, "loss": 0.0071, "step": 24030 }, { "epoch": 21.618705035971225, "grad_norm": 0.203975111246109, "learning_rate": 6.99863016775503e-05, "loss": 0.0111, "step": 24040 }, { "epoch": 21.62769784172662, "grad_norm": 0.20817629992961884, "learning_rate": 6.996103818079859e-05, "loss": 0.0077, "step": 24050 }, { "epoch": 21.636690647482013, "grad_norm": 0.18428921699523926, "learning_rate": 6.993576862041054e-05, "loss": 0.0085, "step": 24060 }, { "epoch": 21.64568345323741, "grad_norm": 0.18316924571990967, "learning_rate": 6.991049300406235e-05, "loss": 0.0054, "step": 24070 }, { "epoch": 21.654676258992804, "grad_norm": 0.26468491554260254, "learning_rate": 6.988521133943209e-05, "loss": 0.0074, "step": 24080 }, { "epoch": 21.663669064748202, "grad_norm": 0.1814817488193512, "learning_rate": 6.985992363419966e-05, "loss": 0.0097, "step": 24090 }, { "epoch": 21.672661870503596, "grad_norm": 0.2718987464904785, "learning_rate": 6.983462989604682e-05, "loss": 0.0081, "step": 24100 }, { "epoch": 21.681654676258994, "grad_norm": 0.17811954021453857, "learning_rate": 6.980933013265709e-05, "loss": 0.0056, "step": 24110 }, { "epoch": 21.690647482014388, "grad_norm": 0.23560065031051636, "learning_rate": 6.978402435171592e-05, "loss": 0.0124, "step": 24120 }, { "epoch": 21.699640287769785, "grad_norm": 0.181339830160141, "learning_rate": 6.975871256091052e-05, "loss": 0.0087, "step": 24130 }, { "epoch": 21.70863309352518, "grad_norm": 0.2263648808002472, "learning_rate": 6.973339476792995e-05, "loss": 0.0065, "step": 24140 }, { "epoch": 21.717625899280577, "grad_norm": 0.24774262309074402, "learning_rate": 6.970807098046505e-05, "loss": 0.0068, "step": 24150 }, { "epoch": 21.72661870503597, "grad_norm": 0.2981756329536438, "learning_rate": 6.968274120620858e-05, "loss": 0.0078, "step": 24160 }, { "epoch": 21.735611510791365, "grad_norm": 0.24717874825000763, "learning_rate": 6.965740545285499e-05, "loss": 0.0054, "step": 24170 }, { "epoch": 21.744604316546763, "grad_norm": 0.16040104627609253, "learning_rate": 6.963206372810068e-05, "loss": 0.007, "step": 24180 }, { "epoch": 21.753597122302157, "grad_norm": 0.21997901797294617, "learning_rate": 6.960671603964375e-05, "loss": 0.0091, "step": 24190 }, { "epoch": 21.762589928057555, "grad_norm": 0.29987555742263794, "learning_rate": 6.958136239518418e-05, "loss": 0.0078, "step": 24200 }, { "epoch": 21.77158273381295, "grad_norm": 0.1857626587152481, "learning_rate": 6.955600280242371e-05, "loss": 0.0052, "step": 24210 }, { "epoch": 21.780575539568346, "grad_norm": 0.14940541982650757, "learning_rate": 6.953063726906596e-05, "loss": 0.0068, "step": 24220 }, { "epoch": 21.78956834532374, "grad_norm": 0.23675765097141266, "learning_rate": 6.950526580281626e-05, "loss": 0.006, "step": 24230 }, { "epoch": 21.798561151079138, "grad_norm": 0.1791832596063614, "learning_rate": 6.947988841138184e-05, "loss": 0.0073, "step": 24240 }, { "epoch": 21.807553956834532, "grad_norm": 0.21053653955459595, "learning_rate": 6.945450510247165e-05, "loss": 0.0054, "step": 24250 }, { "epoch": 21.81654676258993, "grad_norm": 0.24278147518634796, "learning_rate": 6.942911588379647e-05, "loss": 0.0063, "step": 24260 }, { "epoch": 21.825539568345324, "grad_norm": 0.17010001838207245, "learning_rate": 6.940372076306888e-05, "loss": 0.0063, "step": 24270 }, { "epoch": 21.834532374100718, "grad_norm": 0.1307593584060669, "learning_rate": 6.937831974800326e-05, "loss": 0.007, "step": 24280 }, { "epoch": 21.843525179856115, "grad_norm": 0.22146524488925934, "learning_rate": 6.935291284631574e-05, "loss": 0.0082, "step": 24290 }, { "epoch": 21.85251798561151, "grad_norm": 0.14865514636039734, "learning_rate": 6.932750006572428e-05, "loss": 0.0057, "step": 24300 }, { "epoch": 21.861510791366907, "grad_norm": 0.16330832242965698, "learning_rate": 6.930208141394863e-05, "loss": 0.0184, "step": 24310 }, { "epoch": 21.8705035971223, "grad_norm": 0.22754134237766266, "learning_rate": 6.927665689871026e-05, "loss": 0.0059, "step": 24320 }, { "epoch": 21.8794964028777, "grad_norm": 0.2027151882648468, "learning_rate": 6.925122652773253e-05, "loss": 0.0066, "step": 24330 }, { "epoch": 21.888489208633093, "grad_norm": 0.19068242609500885, "learning_rate": 6.922579030874046e-05, "loss": 0.0069, "step": 24340 }, { "epoch": 21.89748201438849, "grad_norm": 0.22447125613689423, "learning_rate": 6.920034824946093e-05, "loss": 0.0102, "step": 24350 }, { "epoch": 21.906474820143885, "grad_norm": 0.17407776415348053, "learning_rate": 6.917490035762255e-05, "loss": 0.0101, "step": 24360 }, { "epoch": 21.915467625899282, "grad_norm": 0.18100932240486145, "learning_rate": 6.914944664095573e-05, "loss": 0.0134, "step": 24370 }, { "epoch": 21.924460431654676, "grad_norm": 0.2652789354324341, "learning_rate": 6.912398710719264e-05, "loss": 0.0081, "step": 24380 }, { "epoch": 21.93345323741007, "grad_norm": 0.20015902817249298, "learning_rate": 6.90985217640672e-05, "loss": 0.0053, "step": 24390 }, { "epoch": 21.942446043165468, "grad_norm": 0.18464863300323486, "learning_rate": 6.90730506193151e-05, "loss": 0.0073, "step": 24400 }, { "epoch": 21.951438848920862, "grad_norm": 0.18425965309143066, "learning_rate": 6.904757368067384e-05, "loss": 0.0086, "step": 24410 }, { "epoch": 21.96043165467626, "grad_norm": 0.16037221252918243, "learning_rate": 6.90220909558826e-05, "loss": 0.0102, "step": 24420 }, { "epoch": 21.969424460431654, "grad_norm": 0.1846074014902115, "learning_rate": 6.899660245268237e-05, "loss": 0.0084, "step": 24430 }, { "epoch": 21.97841726618705, "grad_norm": 0.13567207753658295, "learning_rate": 6.897110817881592e-05, "loss": 0.009, "step": 24440 }, { "epoch": 21.987410071942445, "grad_norm": 0.301209956407547, "learning_rate": 6.894560814202769e-05, "loss": 0.0086, "step": 24450 }, { "epoch": 21.996402877697843, "grad_norm": 0.2676408588886261, "learning_rate": 6.892010235006394e-05, "loss": 0.0123, "step": 24460 }, { "epoch": 22.005395683453237, "grad_norm": 0.21666587889194489, "learning_rate": 6.889459081067264e-05, "loss": 0.0061, "step": 24470 }, { "epoch": 22.014388489208635, "grad_norm": 0.2132551074028015, "learning_rate": 6.886907353160356e-05, "loss": 0.0084, "step": 24480 }, { "epoch": 22.02338129496403, "grad_norm": 0.1700417846441269, "learning_rate": 6.884355052060814e-05, "loss": 0.0075, "step": 24490 }, { "epoch": 22.032374100719423, "grad_norm": 0.19941700994968414, "learning_rate": 6.88180217854396e-05, "loss": 0.0074, "step": 24500 }, { "epoch": 22.04136690647482, "grad_norm": 0.19667233526706696, "learning_rate": 6.87924873338529e-05, "loss": 0.009, "step": 24510 }, { "epoch": 22.050359712230215, "grad_norm": 0.24992021918296814, "learning_rate": 6.876694717360475e-05, "loss": 0.0074, "step": 24520 }, { "epoch": 22.059352517985612, "grad_norm": 0.20081765949726105, "learning_rate": 6.874140131245355e-05, "loss": 0.007, "step": 24530 }, { "epoch": 22.068345323741006, "grad_norm": 0.2282036691904068, "learning_rate": 6.871584975815948e-05, "loss": 0.008, "step": 24540 }, { "epoch": 22.077338129496404, "grad_norm": 0.1336228996515274, "learning_rate": 6.86902925184844e-05, "loss": 0.0061, "step": 24550 }, { "epoch": 22.086330935251798, "grad_norm": 0.23839211463928223, "learning_rate": 6.866472960119195e-05, "loss": 0.0082, "step": 24560 }, { "epoch": 22.095323741007196, "grad_norm": 0.14668454229831696, "learning_rate": 6.863916101404748e-05, "loss": 0.0067, "step": 24570 }, { "epoch": 22.10431654676259, "grad_norm": 0.26727941632270813, "learning_rate": 6.8613586764818e-05, "loss": 0.0063, "step": 24580 }, { "epoch": 22.113309352517987, "grad_norm": 0.205424964427948, "learning_rate": 6.858800686127233e-05, "loss": 0.006, "step": 24590 }, { "epoch": 22.12230215827338, "grad_norm": 0.16386696696281433, "learning_rate": 6.856242131118097e-05, "loss": 0.0065, "step": 24600 }, { "epoch": 22.131294964028775, "grad_norm": 0.27613821625709534, "learning_rate": 6.853683012231614e-05, "loss": 0.007, "step": 24610 }, { "epoch": 22.140287769784173, "grad_norm": 0.3156723082065582, "learning_rate": 6.851123330245173e-05, "loss": 0.0068, "step": 24620 }, { "epoch": 22.149280575539567, "grad_norm": 0.19773389399051666, "learning_rate": 6.848563085936343e-05, "loss": 0.0067, "step": 24630 }, { "epoch": 22.158273381294965, "grad_norm": 0.12877443432807922, "learning_rate": 6.846002280082853e-05, "loss": 0.0055, "step": 24640 }, { "epoch": 22.16726618705036, "grad_norm": 0.15620602667331696, "learning_rate": 6.843440913462614e-05, "loss": 0.0049, "step": 24650 }, { "epoch": 22.176258992805757, "grad_norm": 0.2716774344444275, "learning_rate": 6.840878986853698e-05, "loss": 0.0063, "step": 24660 }, { "epoch": 22.18525179856115, "grad_norm": 0.20698171854019165, "learning_rate": 6.838316501034352e-05, "loss": 0.0056, "step": 24670 }, { "epoch": 22.194244604316548, "grad_norm": 0.12448994070291519, "learning_rate": 6.83575345678299e-05, "loss": 0.0049, "step": 24680 }, { "epoch": 22.203237410071942, "grad_norm": 0.2057008296251297, "learning_rate": 6.833189854878196e-05, "loss": 0.0064, "step": 24690 }, { "epoch": 22.21223021582734, "grad_norm": 0.2041039913892746, "learning_rate": 6.83062569609873e-05, "loss": 0.0065, "step": 24700 }, { "epoch": 22.221223021582734, "grad_norm": 0.1683950126171112, "learning_rate": 6.828060981223512e-05, "loss": 0.005, "step": 24710 }, { "epoch": 22.230215827338128, "grad_norm": 0.2644893527030945, "learning_rate": 6.825495711031634e-05, "loss": 0.0058, "step": 24720 }, { "epoch": 22.239208633093526, "grad_norm": 0.16260045766830444, "learning_rate": 6.822929886302359e-05, "loss": 0.0054, "step": 24730 }, { "epoch": 22.24820143884892, "grad_norm": 0.1395808607339859, "learning_rate": 6.820363507815116e-05, "loss": 0.0049, "step": 24740 }, { "epoch": 22.257194244604317, "grad_norm": 0.24730345606803894, "learning_rate": 6.817796576349501e-05, "loss": 0.0099, "step": 24750 }, { "epoch": 22.26618705035971, "grad_norm": 0.2103293091058731, "learning_rate": 6.815229092685285e-05, "loss": 0.0099, "step": 24760 }, { "epoch": 22.27517985611511, "grad_norm": 0.2003534734249115, "learning_rate": 6.812661057602399e-05, "loss": 0.0063, "step": 24770 }, { "epoch": 22.284172661870503, "grad_norm": 0.17832860350608826, "learning_rate": 6.810092471880943e-05, "loss": 0.0084, "step": 24780 }, { "epoch": 22.2931654676259, "grad_norm": 0.27958524227142334, "learning_rate": 6.807523336301187e-05, "loss": 0.0087, "step": 24790 }, { "epoch": 22.302158273381295, "grad_norm": 0.23434703052043915, "learning_rate": 6.804953651643566e-05, "loss": 0.0091, "step": 24800 }, { "epoch": 22.31115107913669, "grad_norm": 0.1708628088235855, "learning_rate": 6.802383418688685e-05, "loss": 0.0084, "step": 24810 }, { "epoch": 22.320143884892087, "grad_norm": 0.2236488163471222, "learning_rate": 6.799812638217309e-05, "loss": 0.0086, "step": 24820 }, { "epoch": 22.32913669064748, "grad_norm": 0.19188006222248077, "learning_rate": 6.797241311010373e-05, "loss": 0.0084, "step": 24830 }, { "epoch": 22.33812949640288, "grad_norm": 0.2058544009923935, "learning_rate": 6.794669437848982e-05, "loss": 0.0098, "step": 24840 }, { "epoch": 22.347122302158272, "grad_norm": 0.15054862201213837, "learning_rate": 6.792097019514402e-05, "loss": 0.0076, "step": 24850 }, { "epoch": 22.35611510791367, "grad_norm": 0.28161635994911194, "learning_rate": 6.789524056788064e-05, "loss": 0.0081, "step": 24860 }, { "epoch": 22.365107913669064, "grad_norm": 0.10301727056503296, "learning_rate": 6.786950550451567e-05, "loss": 0.0065, "step": 24870 }, { "epoch": 22.37410071942446, "grad_norm": 0.1917203664779663, "learning_rate": 6.784376501286676e-05, "loss": 0.0075, "step": 24880 }, { "epoch": 22.383093525179856, "grad_norm": 0.1319398730993271, "learning_rate": 6.781801910075316e-05, "loss": 0.0046, "step": 24890 }, { "epoch": 22.392086330935253, "grad_norm": 0.15868060290813446, "learning_rate": 6.779226777599581e-05, "loss": 0.0061, "step": 24900 }, { "epoch": 22.401079136690647, "grad_norm": 0.14803868532180786, "learning_rate": 6.776651104641729e-05, "loss": 0.0092, "step": 24910 }, { "epoch": 22.41007194244604, "grad_norm": 0.3135799169540405, "learning_rate": 6.774074891984183e-05, "loss": 0.0083, "step": 24920 }, { "epoch": 22.41906474820144, "grad_norm": 0.1634533703327179, "learning_rate": 6.771498140409526e-05, "loss": 0.0076, "step": 24930 }, { "epoch": 22.428057553956833, "grad_norm": 0.19417089223861694, "learning_rate": 6.768920850700506e-05, "loss": 0.0054, "step": 24940 }, { "epoch": 22.43705035971223, "grad_norm": 0.23575927317142487, "learning_rate": 6.766343023640039e-05, "loss": 0.011, "step": 24950 }, { "epoch": 22.446043165467625, "grad_norm": 0.1703750640153885, "learning_rate": 6.763764660011198e-05, "loss": 0.0063, "step": 24960 }, { "epoch": 22.455035971223023, "grad_norm": 0.18592126667499542, "learning_rate": 6.761185760597223e-05, "loss": 0.0059, "step": 24970 }, { "epoch": 22.464028776978417, "grad_norm": 0.13312047719955444, "learning_rate": 6.758606326181515e-05, "loss": 0.0074, "step": 24980 }, { "epoch": 22.473021582733814, "grad_norm": 0.2886122763156891, "learning_rate": 6.75602635754764e-05, "loss": 0.0049, "step": 24990 }, { "epoch": 22.48201438848921, "grad_norm": 0.23330748081207275, "learning_rate": 6.75344585547932e-05, "loss": 0.0085, "step": 25000 }, { "epoch": 22.491007194244606, "grad_norm": 0.2803632616996765, "learning_rate": 6.750864820760449e-05, "loss": 0.0045, "step": 25010 }, { "epoch": 22.5, "grad_norm": 0.26724135875701904, "learning_rate": 6.748283254175072e-05, "loss": 0.0066, "step": 25020 }, { "epoch": 22.508992805755394, "grad_norm": 0.20299871265888214, "learning_rate": 6.745701156507404e-05, "loss": 0.0063, "step": 25030 }, { "epoch": 22.51798561151079, "grad_norm": 0.14068758487701416, "learning_rate": 6.743118528541818e-05, "loss": 0.0051, "step": 25040 }, { "epoch": 22.526978417266186, "grad_norm": 0.3487178385257721, "learning_rate": 6.740535371062846e-05, "loss": 0.0068, "step": 25050 }, { "epoch": 22.535971223021583, "grad_norm": 0.15174919366836548, "learning_rate": 6.737951684855185e-05, "loss": 0.0052, "step": 25060 }, { "epoch": 22.544964028776977, "grad_norm": 0.20076607167720795, "learning_rate": 6.735367470703691e-05, "loss": 0.0064, "step": 25070 }, { "epoch": 22.553956834532375, "grad_norm": 0.1949676275253296, "learning_rate": 6.732782729393379e-05, "loss": 0.0063, "step": 25080 }, { "epoch": 22.56294964028777, "grad_norm": 0.23798461258411407, "learning_rate": 6.730197461709425e-05, "loss": 0.0055, "step": 25090 }, { "epoch": 22.571942446043167, "grad_norm": 0.15913636982440948, "learning_rate": 6.727611668437164e-05, "loss": 0.0117, "step": 25100 }, { "epoch": 22.58093525179856, "grad_norm": 0.1877867728471756, "learning_rate": 6.725025350362094e-05, "loss": 0.0051, "step": 25110 }, { "epoch": 22.58992805755396, "grad_norm": 0.18457886576652527, "learning_rate": 6.72243850826987e-05, "loss": 0.005, "step": 25120 }, { "epoch": 22.598920863309353, "grad_norm": 0.19211389124393463, "learning_rate": 6.719851142946305e-05, "loss": 0.0052, "step": 25130 }, { "epoch": 22.607913669064747, "grad_norm": 0.2452288120985031, "learning_rate": 6.717263255177372e-05, "loss": 0.0108, "step": 25140 }, { "epoch": 22.616906474820144, "grad_norm": 0.1318684220314026, "learning_rate": 6.714674845749205e-05, "loss": 0.0064, "step": 25150 }, { "epoch": 22.62589928057554, "grad_norm": 0.15961799025535583, "learning_rate": 6.712085915448092e-05, "loss": 0.0064, "step": 25160 }, { "epoch": 22.634892086330936, "grad_norm": 0.23387688398361206, "learning_rate": 6.709496465060486e-05, "loss": 0.0063, "step": 25170 }, { "epoch": 22.64388489208633, "grad_norm": 0.13773015141487122, "learning_rate": 6.706906495372987e-05, "loss": 0.0047, "step": 25180 }, { "epoch": 22.652877697841728, "grad_norm": 0.22859816253185272, "learning_rate": 6.704316007172365e-05, "loss": 0.0063, "step": 25190 }, { "epoch": 22.66187050359712, "grad_norm": 0.12098641693592072, "learning_rate": 6.701725001245539e-05, "loss": 0.0081, "step": 25200 }, { "epoch": 22.67086330935252, "grad_norm": 0.13017188012599945, "learning_rate": 6.699133478379588e-05, "loss": 0.006, "step": 25210 }, { "epoch": 22.679856115107913, "grad_norm": 0.13640369474887848, "learning_rate": 6.69654143936175e-05, "loss": 0.0062, "step": 25220 }, { "epoch": 22.68884892086331, "grad_norm": 0.18268048763275146, "learning_rate": 6.693948884979419e-05, "loss": 0.0077, "step": 25230 }, { "epoch": 22.697841726618705, "grad_norm": 0.14609354734420776, "learning_rate": 6.691355816020142e-05, "loss": 0.0053, "step": 25240 }, { "epoch": 22.7068345323741, "grad_norm": 0.1377611756324768, "learning_rate": 6.688762233271624e-05, "loss": 0.008, "step": 25250 }, { "epoch": 22.715827338129497, "grad_norm": 0.18198885023593903, "learning_rate": 6.68616813752173e-05, "loss": 0.0079, "step": 25260 }, { "epoch": 22.72482014388489, "grad_norm": 0.19169409573078156, "learning_rate": 6.683573529558477e-05, "loss": 0.0062, "step": 25270 }, { "epoch": 22.73381294964029, "grad_norm": 0.19559836387634277, "learning_rate": 6.680978410170037e-05, "loss": 0.0058, "step": 25280 }, { "epoch": 22.742805755395683, "grad_norm": 0.18849016726016998, "learning_rate": 6.678382780144741e-05, "loss": 0.006, "step": 25290 }, { "epoch": 22.75179856115108, "grad_norm": 0.1444452852010727, "learning_rate": 6.675786640271071e-05, "loss": 0.0057, "step": 25300 }, { "epoch": 22.760791366906474, "grad_norm": 0.1781364381313324, "learning_rate": 6.673189991337665e-05, "loss": 0.0061, "step": 25310 }, { "epoch": 22.769784172661872, "grad_norm": 0.22016532719135284, "learning_rate": 6.670592834133317e-05, "loss": 0.0105, "step": 25320 }, { "epoch": 22.778776978417266, "grad_norm": 0.28686901926994324, "learning_rate": 6.667995169446979e-05, "loss": 0.006, "step": 25330 }, { "epoch": 22.78776978417266, "grad_norm": 0.300388365983963, "learning_rate": 6.665396998067747e-05, "loss": 0.0111, "step": 25340 }, { "epoch": 22.796762589928058, "grad_norm": 0.22115008533000946, "learning_rate": 6.66279832078488e-05, "loss": 0.0057, "step": 25350 }, { "epoch": 22.805755395683452, "grad_norm": 0.23608194291591644, "learning_rate": 6.660199138387786e-05, "loss": 0.0049, "step": 25360 }, { "epoch": 22.81474820143885, "grad_norm": 0.23805662989616394, "learning_rate": 6.65759945166603e-05, "loss": 0.0105, "step": 25370 }, { "epoch": 22.823741007194243, "grad_norm": 0.16314807534217834, "learning_rate": 6.654999261409326e-05, "loss": 0.0117, "step": 25380 }, { "epoch": 22.83273381294964, "grad_norm": 0.17173372209072113, "learning_rate": 6.652398568407544e-05, "loss": 0.0076, "step": 25390 }, { "epoch": 22.841726618705035, "grad_norm": 0.2301483452320099, "learning_rate": 6.649797373450707e-05, "loss": 0.0099, "step": 25400 }, { "epoch": 22.850719424460433, "grad_norm": 0.12940211594104767, "learning_rate": 6.647195677328988e-05, "loss": 0.0055, "step": 25410 }, { "epoch": 22.859712230215827, "grad_norm": 0.1764741688966751, "learning_rate": 6.644593480832712e-05, "loss": 0.0066, "step": 25420 }, { "epoch": 22.868705035971225, "grad_norm": 0.2818405032157898, "learning_rate": 6.641990784752363e-05, "loss": 0.0077, "step": 25430 }, { "epoch": 22.87769784172662, "grad_norm": 0.26845139265060425, "learning_rate": 6.639387589878566e-05, "loss": 0.0078, "step": 25440 }, { "epoch": 22.886690647482013, "grad_norm": 0.19895890355110168, "learning_rate": 6.636783897002103e-05, "loss": 0.0067, "step": 25450 }, { "epoch": 22.89568345323741, "grad_norm": 0.22665424644947052, "learning_rate": 6.63417970691391e-05, "loss": 0.0069, "step": 25460 }, { "epoch": 22.904676258992804, "grad_norm": 0.2057647407054901, "learning_rate": 6.63157502040507e-05, "loss": 0.0051, "step": 25470 }, { "epoch": 22.913669064748202, "grad_norm": 0.25163891911506653, "learning_rate": 6.628969838266819e-05, "loss": 0.0099, "step": 25480 }, { "epoch": 22.922661870503596, "grad_norm": 0.15878528356552124, "learning_rate": 6.626364161290541e-05, "loss": 0.0071, "step": 25490 }, { "epoch": 22.931654676258994, "grad_norm": 0.18646429479122162, "learning_rate": 6.623757990267774e-05, "loss": 0.0066, "step": 25500 }, { "epoch": 22.940647482014388, "grad_norm": 0.23628593981266022, "learning_rate": 6.621151325990201e-05, "loss": 0.0065, "step": 25510 }, { "epoch": 22.949640287769785, "grad_norm": 0.18446359038352966, "learning_rate": 6.618544169249657e-05, "loss": 0.0079, "step": 25520 }, { "epoch": 22.95863309352518, "grad_norm": 0.19531917572021484, "learning_rate": 6.615936520838133e-05, "loss": 0.0079, "step": 25530 }, { "epoch": 22.967625899280577, "grad_norm": 0.11496023833751678, "learning_rate": 6.613328381547759e-05, "loss": 0.0042, "step": 25540 }, { "epoch": 22.97661870503597, "grad_norm": 0.2527587115764618, "learning_rate": 6.610719752170821e-05, "loss": 0.0168, "step": 25550 }, { "epoch": 22.985611510791365, "grad_norm": 0.21324285864830017, "learning_rate": 6.60811063349975e-05, "loss": 0.0072, "step": 25560 }, { "epoch": 22.994604316546763, "grad_norm": 0.2344040423631668, "learning_rate": 6.605501026327127e-05, "loss": 0.0064, "step": 25570 }, { "epoch": 23.003597122302157, "grad_norm": 0.17872671782970428, "learning_rate": 6.602890931445685e-05, "loss": 0.0104, "step": 25580 }, { "epoch": 23.012589928057555, "grad_norm": 0.29387161135673523, "learning_rate": 6.6002803496483e-05, "loss": 0.0085, "step": 25590 }, { "epoch": 23.02158273381295, "grad_norm": 0.20236091315746307, "learning_rate": 6.597669281727997e-05, "loss": 0.0088, "step": 25600 }, { "epoch": 23.030575539568346, "grad_norm": 0.1549670398235321, "learning_rate": 6.595057728477949e-05, "loss": 0.0095, "step": 25610 }, { "epoch": 23.03956834532374, "grad_norm": 0.2565345764160156, "learning_rate": 6.59244569069148e-05, "loss": 0.0069, "step": 25620 }, { "epoch": 23.048561151079138, "grad_norm": 0.1928773820400238, "learning_rate": 6.589833169162054e-05, "loss": 0.0072, "step": 25630 }, { "epoch": 23.057553956834532, "grad_norm": 0.18872885406017303, "learning_rate": 6.587220164683291e-05, "loss": 0.0084, "step": 25640 }, { "epoch": 23.06654676258993, "grad_norm": 0.17713019251823425, "learning_rate": 6.58460667804895e-05, "loss": 0.006, "step": 25650 }, { "epoch": 23.075539568345324, "grad_norm": 0.16810496151447296, "learning_rate": 6.581992710052938e-05, "loss": 0.0085, "step": 25660 }, { "epoch": 23.084532374100718, "grad_norm": 0.2929060161113739, "learning_rate": 6.579378261489311e-05, "loss": 0.0075, "step": 25670 }, { "epoch": 23.093525179856115, "grad_norm": 0.19772882759571075, "learning_rate": 6.576763333152268e-05, "loss": 0.0071, "step": 25680 }, { "epoch": 23.10251798561151, "grad_norm": 0.19651134312152863, "learning_rate": 6.574147925836159e-05, "loss": 0.0052, "step": 25690 }, { "epoch": 23.111510791366907, "grad_norm": 0.1742914319038391, "learning_rate": 6.571532040335472e-05, "loss": 0.0069, "step": 25700 }, { "epoch": 23.1205035971223, "grad_norm": 0.2458076775074005, "learning_rate": 6.568915677444845e-05, "loss": 0.0057, "step": 25710 }, { "epoch": 23.1294964028777, "grad_norm": 0.1758688986301422, "learning_rate": 6.56629883795906e-05, "loss": 0.007, "step": 25720 }, { "epoch": 23.138489208633093, "grad_norm": 0.1598518192768097, "learning_rate": 6.563681522673043e-05, "loss": 0.0056, "step": 25730 }, { "epoch": 23.14748201438849, "grad_norm": 0.25416865944862366, "learning_rate": 6.561063732381867e-05, "loss": 0.0079, "step": 25740 }, { "epoch": 23.156474820143885, "grad_norm": 0.19872917234897614, "learning_rate": 6.558445467880745e-05, "loss": 0.0072, "step": 25750 }, { "epoch": 23.165467625899282, "grad_norm": 0.22300216555595398, "learning_rate": 6.55582672996504e-05, "loss": 0.0069, "step": 25760 }, { "epoch": 23.174460431654676, "grad_norm": 0.16222134232521057, "learning_rate": 6.553207519430253e-05, "loss": 0.0059, "step": 25770 }, { "epoch": 23.18345323741007, "grad_norm": 0.12974482774734497, "learning_rate": 6.550587837072032e-05, "loss": 0.0073, "step": 25780 }, { "epoch": 23.192446043165468, "grad_norm": 0.17593681812286377, "learning_rate": 6.547967683686166e-05, "loss": 0.0071, "step": 25790 }, { "epoch": 23.201438848920862, "grad_norm": 0.10477674007415771, "learning_rate": 6.545347060068591e-05, "loss": 0.0045, "step": 25800 }, { "epoch": 23.21043165467626, "grad_norm": 0.21561546623706818, "learning_rate": 6.542725967015382e-05, "loss": 0.0067, "step": 25810 }, { "epoch": 23.219424460431654, "grad_norm": 0.35505449771881104, "learning_rate": 6.540104405322757e-05, "loss": 0.0049, "step": 25820 }, { "epoch": 23.22841726618705, "grad_norm": 0.16265809535980225, "learning_rate": 6.537482375787077e-05, "loss": 0.0078, "step": 25830 }, { "epoch": 23.237410071942445, "grad_norm": 0.23147019743919373, "learning_rate": 6.534859879204845e-05, "loss": 0.0071, "step": 25840 }, { "epoch": 23.246402877697843, "grad_norm": 0.12245775014162064, "learning_rate": 6.532236916372709e-05, "loss": 0.006, "step": 25850 }, { "epoch": 23.255395683453237, "grad_norm": 0.2819429039955139, "learning_rate": 6.529613488087454e-05, "loss": 0.0051, "step": 25860 }, { "epoch": 23.264388489208635, "grad_norm": 0.13173362612724304, "learning_rate": 6.526989595146009e-05, "loss": 0.0044, "step": 25870 }, { "epoch": 23.27338129496403, "grad_norm": 0.13382597267627716, "learning_rate": 6.524365238345441e-05, "loss": 0.0046, "step": 25880 }, { "epoch": 23.282374100719423, "grad_norm": 0.22957465052604675, "learning_rate": 6.521740418482964e-05, "loss": 0.0069, "step": 25890 }, { "epoch": 23.29136690647482, "grad_norm": 0.15282553434371948, "learning_rate": 6.519115136355925e-05, "loss": 0.0063, "step": 25900 }, { "epoch": 23.300359712230215, "grad_norm": 0.17514668405056, "learning_rate": 6.51648939276182e-05, "loss": 0.0066, "step": 25910 }, { "epoch": 23.309352517985612, "grad_norm": 0.17105929553508759, "learning_rate": 6.513863188498277e-05, "loss": 0.007, "step": 25920 }, { "epoch": 23.318345323741006, "grad_norm": 0.12520264089107513, "learning_rate": 6.511236524363068e-05, "loss": 0.0068, "step": 25930 }, { "epoch": 23.327338129496404, "grad_norm": 0.20469623804092407, "learning_rate": 6.508609401154104e-05, "loss": 0.0086, "step": 25940 }, { "epoch": 23.336330935251798, "grad_norm": 0.16255970299243927, "learning_rate": 6.505981819669439e-05, "loss": 0.0073, "step": 25950 }, { "epoch": 23.345323741007196, "grad_norm": 0.1531354933977127, "learning_rate": 6.503353780707258e-05, "loss": 0.0091, "step": 25960 }, { "epoch": 23.35431654676259, "grad_norm": 0.20305457711219788, "learning_rate": 6.500725285065895e-05, "loss": 0.0073, "step": 25970 }, { "epoch": 23.363309352517987, "grad_norm": 0.31689563393592834, "learning_rate": 6.498096333543813e-05, "loss": 0.0104, "step": 25980 }, { "epoch": 23.37230215827338, "grad_norm": 0.1881938874721527, "learning_rate": 6.49546692693962e-05, "loss": 0.0046, "step": 25990 }, { "epoch": 23.381294964028775, "grad_norm": 0.3379007875919342, "learning_rate": 6.492837066052059e-05, "loss": 0.0101, "step": 26000 }, { "epoch": 23.390287769784173, "grad_norm": 0.21666079759597778, "learning_rate": 6.490206751680014e-05, "loss": 0.0104, "step": 26010 }, { "epoch": 23.399280575539567, "grad_norm": 0.23008207976818085, "learning_rate": 6.487575984622505e-05, "loss": 0.011, "step": 26020 }, { "epoch": 23.408273381294965, "grad_norm": 0.12897329032421112, "learning_rate": 6.484944765678689e-05, "loss": 0.0079, "step": 26030 }, { "epoch": 23.41726618705036, "grad_norm": 0.3997364044189453, "learning_rate": 6.482313095647861e-05, "loss": 0.0072, "step": 26040 }, { "epoch": 23.426258992805757, "grad_norm": 0.2732943296432495, "learning_rate": 6.479680975329451e-05, "loss": 0.009, "step": 26050 }, { "epoch": 23.43525179856115, "grad_norm": 0.1992095559835434, "learning_rate": 6.477048405523031e-05, "loss": 0.006, "step": 26060 }, { "epoch": 23.444244604316548, "grad_norm": 0.160562202334404, "learning_rate": 6.474415387028304e-05, "loss": 0.0071, "step": 26070 }, { "epoch": 23.453237410071942, "grad_norm": 0.17408934235572815, "learning_rate": 6.471781920645114e-05, "loss": 0.0046, "step": 26080 }, { "epoch": 23.46223021582734, "grad_norm": 0.33871954679489136, "learning_rate": 6.469148007173434e-05, "loss": 0.0081, "step": 26090 }, { "epoch": 23.471223021582734, "grad_norm": 0.2411004900932312, "learning_rate": 6.466513647413381e-05, "loss": 0.0072, "step": 26100 }, { "epoch": 23.480215827338128, "grad_norm": 0.2071976214647293, "learning_rate": 6.463878842165203e-05, "loss": 0.0072, "step": 26110 }, { "epoch": 23.489208633093526, "grad_norm": 0.17727524042129517, "learning_rate": 6.461243592229286e-05, "loss": 0.0068, "step": 26120 }, { "epoch": 23.49820143884892, "grad_norm": 0.14197249710559845, "learning_rate": 6.458607898406146e-05, "loss": 0.0038, "step": 26130 }, { "epoch": 23.507194244604317, "grad_norm": 0.140555277466774, "learning_rate": 6.455971761496439e-05, "loss": 0.004, "step": 26140 }, { "epoch": 23.51618705035971, "grad_norm": 0.21250902116298676, "learning_rate": 6.453335182300953e-05, "loss": 0.0057, "step": 26150 }, { "epoch": 23.52517985611511, "grad_norm": 0.24020341038703918, "learning_rate": 6.450698161620612e-05, "loss": 0.0056, "step": 26160 }, { "epoch": 23.534172661870503, "grad_norm": 0.28013789653778076, "learning_rate": 6.448060700256473e-05, "loss": 0.0118, "step": 26170 }, { "epoch": 23.5431654676259, "grad_norm": 0.16043145954608917, "learning_rate": 6.445422799009726e-05, "loss": 0.0048, "step": 26180 }, { "epoch": 23.552158273381295, "grad_norm": 0.2585199773311615, "learning_rate": 6.442784458681699e-05, "loss": 0.0074, "step": 26190 }, { "epoch": 23.56115107913669, "grad_norm": 0.24837376177310944, "learning_rate": 6.440145680073847e-05, "loss": 0.0066, "step": 26200 }, { "epoch": 23.570143884892087, "grad_norm": 0.15649797022342682, "learning_rate": 6.437506463987762e-05, "loss": 0.0082, "step": 26210 }, { "epoch": 23.57913669064748, "grad_norm": 0.127730131149292, "learning_rate": 6.434866811225168e-05, "loss": 0.0053, "step": 26220 }, { "epoch": 23.58812949640288, "grad_norm": 0.17294727265834808, "learning_rate": 6.432226722587923e-05, "loss": 0.008, "step": 26230 }, { "epoch": 23.597122302158272, "grad_norm": 0.22038313746452332, "learning_rate": 6.429586198878015e-05, "loss": 0.0066, "step": 26240 }, { "epoch": 23.60611510791367, "grad_norm": 0.2210206538438797, "learning_rate": 6.426945240897566e-05, "loss": 0.0125, "step": 26250 }, { "epoch": 23.615107913669064, "grad_norm": 0.2211197167634964, "learning_rate": 6.424303849448829e-05, "loss": 0.0062, "step": 26260 }, { "epoch": 23.62410071942446, "grad_norm": 0.2441830039024353, "learning_rate": 6.42166202533419e-05, "loss": 0.0056, "step": 26270 }, { "epoch": 23.633093525179856, "grad_norm": 0.1653791218996048, "learning_rate": 6.419019769356164e-05, "loss": 0.0053, "step": 26280 }, { "epoch": 23.642086330935253, "grad_norm": 0.22534283995628357, "learning_rate": 6.416377082317398e-05, "loss": 0.0067, "step": 26290 }, { "epoch": 23.651079136690647, "grad_norm": 0.2501504123210907, "learning_rate": 6.413733965020674e-05, "loss": 0.0071, "step": 26300 }, { "epoch": 23.66007194244604, "grad_norm": 0.24597099423408508, "learning_rate": 6.411090418268896e-05, "loss": 0.0048, "step": 26310 }, { "epoch": 23.66906474820144, "grad_norm": 0.14999598264694214, "learning_rate": 6.408446442865109e-05, "loss": 0.0046, "step": 26320 }, { "epoch": 23.678057553956833, "grad_norm": 0.22146525979042053, "learning_rate": 6.405802039612479e-05, "loss": 0.0082, "step": 26330 }, { "epoch": 23.68705035971223, "grad_norm": 0.38757237792015076, "learning_rate": 6.403157209314308e-05, "loss": 0.0073, "step": 26340 }, { "epoch": 23.696043165467625, "grad_norm": 0.3185563385486603, "learning_rate": 6.400511952774024e-05, "loss": 0.0067, "step": 26350 }, { "epoch": 23.705035971223023, "grad_norm": 0.15634842216968536, "learning_rate": 6.397866270795187e-05, "loss": 0.0063, "step": 26360 }, { "epoch": 23.714028776978417, "grad_norm": 0.19084912538528442, "learning_rate": 6.395220164181489e-05, "loss": 0.0047, "step": 26370 }, { "epoch": 23.723021582733814, "grad_norm": 0.17953072488307953, "learning_rate": 6.39257363373674e-05, "loss": 0.0053, "step": 26380 }, { "epoch": 23.73201438848921, "grad_norm": 0.15117229521274567, "learning_rate": 6.389926680264892e-05, "loss": 0.0078, "step": 26390 }, { "epoch": 23.741007194244606, "grad_norm": 0.22819961607456207, "learning_rate": 6.387279304570017e-05, "loss": 0.0109, "step": 26400 }, { "epoch": 23.75, "grad_norm": 0.15160059928894043, "learning_rate": 6.384631507456319e-05, "loss": 0.0063, "step": 26410 }, { "epoch": 23.758992805755394, "grad_norm": 0.23415794968605042, "learning_rate": 6.381983289728126e-05, "loss": 0.0072, "step": 26420 }, { "epoch": 23.76798561151079, "grad_norm": 0.2204677015542984, "learning_rate": 6.3793346521899e-05, "loss": 0.007, "step": 26430 }, { "epoch": 23.776978417266186, "grad_norm": 0.20534831285476685, "learning_rate": 6.376685595646226e-05, "loss": 0.0069, "step": 26440 }, { "epoch": 23.785971223021583, "grad_norm": 0.17284001410007477, "learning_rate": 6.374036120901816e-05, "loss": 0.0085, "step": 26450 }, { "epoch": 23.794964028776977, "grad_norm": 0.22317348420619965, "learning_rate": 6.371386228761514e-05, "loss": 0.0064, "step": 26460 }, { "epoch": 23.803956834532375, "grad_norm": 0.28411269187927246, "learning_rate": 6.368735920030283e-05, "loss": 0.011, "step": 26470 }, { "epoch": 23.81294964028777, "grad_norm": 0.1561914086341858, "learning_rate": 6.366085195513218e-05, "loss": 0.0089, "step": 26480 }, { "epoch": 23.821942446043167, "grad_norm": 0.10438711196184158, "learning_rate": 6.363434056015543e-05, "loss": 0.0132, "step": 26490 }, { "epoch": 23.83093525179856, "grad_norm": 0.16075053811073303, "learning_rate": 6.360782502342599e-05, "loss": 0.0056, "step": 26500 }, { "epoch": 23.83992805755396, "grad_norm": 0.2223426252603531, "learning_rate": 6.358130535299862e-05, "loss": 0.0091, "step": 26510 }, { "epoch": 23.848920863309353, "grad_norm": 0.19274502992630005, "learning_rate": 6.355478155692926e-05, "loss": 0.0091, "step": 26520 }, { "epoch": 23.857913669064747, "grad_norm": 0.23463091254234314, "learning_rate": 6.352825364327517e-05, "loss": 0.0063, "step": 26530 }, { "epoch": 23.866906474820144, "grad_norm": 0.24232147634029388, "learning_rate": 6.350172162009482e-05, "loss": 0.0117, "step": 26540 }, { "epoch": 23.87589928057554, "grad_norm": 0.3483547568321228, "learning_rate": 6.347518549544793e-05, "loss": 0.008, "step": 26550 }, { "epoch": 23.884892086330936, "grad_norm": 0.15717367827892303, "learning_rate": 6.344864527739547e-05, "loss": 0.0053, "step": 26560 }, { "epoch": 23.89388489208633, "grad_norm": 0.12186867743730545, "learning_rate": 6.342210097399966e-05, "loss": 0.0064, "step": 26570 }, { "epoch": 23.902877697841728, "grad_norm": 0.10475695878267288, "learning_rate": 6.339555259332398e-05, "loss": 0.0118, "step": 26580 }, { "epoch": 23.91187050359712, "grad_norm": 0.3412407338619232, "learning_rate": 6.33690001434331e-05, "loss": 0.0117, "step": 26590 }, { "epoch": 23.92086330935252, "grad_norm": 0.17111700773239136, "learning_rate": 6.334244363239296e-05, "loss": 0.0074, "step": 26600 }, { "epoch": 23.929856115107913, "grad_norm": 0.15417084097862244, "learning_rate": 6.331588306827073e-05, "loss": 0.0061, "step": 26610 }, { "epoch": 23.93884892086331, "grad_norm": 0.1527310311794281, "learning_rate": 6.328931845913483e-05, "loss": 0.0061, "step": 26620 }, { "epoch": 23.947841726618705, "grad_norm": 0.23790770769119263, "learning_rate": 6.326274981305484e-05, "loss": 0.006, "step": 26630 }, { "epoch": 23.9568345323741, "grad_norm": 0.22660741209983826, "learning_rate": 6.323617713810166e-05, "loss": 0.009, "step": 26640 }, { "epoch": 23.965827338129497, "grad_norm": 0.18968918919563293, "learning_rate": 6.320960044234734e-05, "loss": 0.0067, "step": 26650 }, { "epoch": 23.97482014388489, "grad_norm": 0.22079730033874512, "learning_rate": 6.318301973386518e-05, "loss": 0.011, "step": 26660 }, { "epoch": 23.98381294964029, "grad_norm": 0.39688870310783386, "learning_rate": 6.315643502072971e-05, "loss": 0.0079, "step": 26670 }, { "epoch": 23.992805755395683, "grad_norm": 0.31261032819747925, "learning_rate": 6.312984631101667e-05, "loss": 0.0057, "step": 26680 }, { "epoch": 24.00179856115108, "grad_norm": 0.22443854808807373, "learning_rate": 6.310325361280297e-05, "loss": 0.006, "step": 26690 }, { "epoch": 24.010791366906474, "grad_norm": 0.16960351169109344, "learning_rate": 6.30766569341668e-05, "loss": 0.0055, "step": 26700 }, { "epoch": 24.019784172661872, "grad_norm": 0.20183223485946655, "learning_rate": 6.305005628318753e-05, "loss": 0.008, "step": 26710 }, { "epoch": 24.028776978417266, "grad_norm": 0.08873871713876724, "learning_rate": 6.302345166794572e-05, "loss": 0.0054, "step": 26720 }, { "epoch": 24.037769784172664, "grad_norm": 0.17628367245197296, "learning_rate": 6.299684309652316e-05, "loss": 0.0067, "step": 26730 }, { "epoch": 24.046762589928058, "grad_norm": 0.21992948651313782, "learning_rate": 6.297023057700283e-05, "loss": 0.0056, "step": 26740 }, { "epoch": 24.055755395683452, "grad_norm": 0.14069709181785583, "learning_rate": 6.294361411746891e-05, "loss": 0.0065, "step": 26750 }, { "epoch": 24.06474820143885, "grad_norm": 0.11635030061006546, "learning_rate": 6.291699372600677e-05, "loss": 0.0057, "step": 26760 }, { "epoch": 24.073741007194243, "grad_norm": 0.19360484182834625, "learning_rate": 6.2890369410703e-05, "loss": 0.0076, "step": 26770 }, { "epoch": 24.08273381294964, "grad_norm": 0.18817460536956787, "learning_rate": 6.286374117964534e-05, "loss": 0.0064, "step": 26780 }, { "epoch": 24.091726618705035, "grad_norm": 0.14769837260246277, "learning_rate": 6.283710904092277e-05, "loss": 0.005, "step": 26790 }, { "epoch": 24.100719424460433, "grad_norm": 0.265373557806015, "learning_rate": 6.281047300262542e-05, "loss": 0.0055, "step": 26800 }, { "epoch": 24.109712230215827, "grad_norm": 0.3107026219367981, "learning_rate": 6.278383307284461e-05, "loss": 0.0062, "step": 26810 }, { "epoch": 24.118705035971225, "grad_norm": 0.08579645305871964, "learning_rate": 6.275718925967284e-05, "loss": 0.0055, "step": 26820 }, { "epoch": 24.12769784172662, "grad_norm": 0.13706117868423462, "learning_rate": 6.273054157120382e-05, "loss": 0.006, "step": 26830 }, { "epoch": 24.136690647482013, "grad_norm": 0.23599790036678314, "learning_rate": 6.270389001553238e-05, "loss": 0.0068, "step": 26840 }, { "epoch": 24.14568345323741, "grad_norm": 0.23051169514656067, "learning_rate": 6.26772346007546e-05, "loss": 0.0089, "step": 26850 }, { "epoch": 24.154676258992804, "grad_norm": 0.15975749492645264, "learning_rate": 6.265057533496767e-05, "loss": 0.0067, "step": 26860 }, { "epoch": 24.163669064748202, "grad_norm": 0.19852864742279053, "learning_rate": 6.262391222626997e-05, "loss": 0.0087, "step": 26870 }, { "epoch": 24.172661870503596, "grad_norm": 0.1195802241563797, "learning_rate": 6.259724528276106e-05, "loss": 0.0116, "step": 26880 }, { "epoch": 24.181654676258994, "grad_norm": 0.21019145846366882, "learning_rate": 6.257057451254162e-05, "loss": 0.0058, "step": 26890 }, { "epoch": 24.190647482014388, "grad_norm": 0.15979455411434174, "learning_rate": 6.254389992371357e-05, "loss": 0.0067, "step": 26900 }, { "epoch": 24.199640287769785, "grad_norm": 0.24950020015239716, "learning_rate": 6.25172215243799e-05, "loss": 0.0088, "step": 26910 }, { "epoch": 24.20863309352518, "grad_norm": 0.18225286900997162, "learning_rate": 6.249053932264486e-05, "loss": 0.0104, "step": 26920 }, { "epoch": 24.217625899280577, "grad_norm": 0.14857438206672668, "learning_rate": 6.246385332661376e-05, "loss": 0.0072, "step": 26930 }, { "epoch": 24.22661870503597, "grad_norm": 0.10678817331790924, "learning_rate": 6.24371635443931e-05, "loss": 0.0081, "step": 26940 }, { "epoch": 24.235611510791365, "grad_norm": 0.2625702917575836, "learning_rate": 6.241046998409054e-05, "loss": 0.004, "step": 26950 }, { "epoch": 24.244604316546763, "grad_norm": 0.20037072896957397, "learning_rate": 6.238377265381489e-05, "loss": 0.007, "step": 26960 }, { "epoch": 24.253597122302157, "grad_norm": 0.2337961494922638, "learning_rate": 6.235707156167607e-05, "loss": 0.0109, "step": 26970 }, { "epoch": 24.262589928057555, "grad_norm": 0.17172324657440186, "learning_rate": 6.233036671578519e-05, "loss": 0.0097, "step": 26980 }, { "epoch": 24.27158273381295, "grad_norm": 0.17924271523952484, "learning_rate": 6.230365812425445e-05, "loss": 0.0082, "step": 26990 }, { "epoch": 24.280575539568346, "grad_norm": 0.13818536698818207, "learning_rate": 6.227694579519724e-05, "loss": 0.0082, "step": 27000 }, { "epoch": 24.28956834532374, "grad_norm": 0.12133927643299103, "learning_rate": 6.225022973672805e-05, "loss": 0.007, "step": 27010 }, { "epoch": 24.298561151079138, "grad_norm": 0.197404146194458, "learning_rate": 6.222350995696253e-05, "loss": 0.0106, "step": 27020 }, { "epoch": 24.307553956834532, "grad_norm": 0.19042761623859406, "learning_rate": 6.21967864640174e-05, "loss": 0.0056, "step": 27030 }, { "epoch": 24.31654676258993, "grad_norm": 0.19916926324367523, "learning_rate": 6.217005926601059e-05, "loss": 0.0045, "step": 27040 }, { "epoch": 24.325539568345324, "grad_norm": 0.3064606189727783, "learning_rate": 6.214332837106111e-05, "loss": 0.0058, "step": 27050 }, { "epoch": 24.334532374100718, "grad_norm": 0.18601588904857635, "learning_rate": 6.21165937872891e-05, "loss": 0.0055, "step": 27060 }, { "epoch": 24.343525179856115, "grad_norm": 0.2539576292037964, "learning_rate": 6.208985552281582e-05, "loss": 0.0071, "step": 27070 }, { "epoch": 24.35251798561151, "grad_norm": 0.22139865159988403, "learning_rate": 6.206311358576364e-05, "loss": 0.0118, "step": 27080 }, { "epoch": 24.361510791366907, "grad_norm": 0.21650363504886627, "learning_rate": 6.203636798425608e-05, "loss": 0.006, "step": 27090 }, { "epoch": 24.3705035971223, "grad_norm": 0.2573196589946747, "learning_rate": 6.20096187264177e-05, "loss": 0.0088, "step": 27100 }, { "epoch": 24.3794964028777, "grad_norm": 0.26271378993988037, "learning_rate": 6.198286582037425e-05, "loss": 0.0072, "step": 27110 }, { "epoch": 24.388489208633093, "grad_norm": 0.21779562532901764, "learning_rate": 6.195610927425256e-05, "loss": 0.0099, "step": 27120 }, { "epoch": 24.39748201438849, "grad_norm": 0.1733548641204834, "learning_rate": 6.192934909618056e-05, "loss": 0.0041, "step": 27130 }, { "epoch": 24.406474820143885, "grad_norm": 0.17599235475063324, "learning_rate": 6.190258529428728e-05, "loss": 0.0058, "step": 27140 }, { "epoch": 24.415467625899282, "grad_norm": 0.14254404604434967, "learning_rate": 6.187581787670285e-05, "loss": 0.0124, "step": 27150 }, { "epoch": 24.424460431654676, "grad_norm": 0.2095545381307602, "learning_rate": 6.184904685155852e-05, "loss": 0.0062, "step": 27160 }, { "epoch": 24.43345323741007, "grad_norm": 0.2148396372795105, "learning_rate": 6.18222722269866e-05, "loss": 0.0097, "step": 27170 }, { "epoch": 24.442446043165468, "grad_norm": 0.1881568878889084, "learning_rate": 6.179549401112053e-05, "loss": 0.0044, "step": 27180 }, { "epoch": 24.451438848920862, "grad_norm": 0.16706615686416626, "learning_rate": 6.176871221209482e-05, "loss": 0.009, "step": 27190 }, { "epoch": 24.46043165467626, "grad_norm": 0.19060589373111725, "learning_rate": 6.174192683804508e-05, "loss": 0.0068, "step": 27200 }, { "epoch": 24.469424460431654, "grad_norm": 0.1220439225435257, "learning_rate": 6.1715137897108e-05, "loss": 0.005, "step": 27210 }, { "epoch": 24.47841726618705, "grad_norm": 0.19164372980594635, "learning_rate": 6.168834539742134e-05, "loss": 0.0094, "step": 27220 }, { "epoch": 24.487410071942445, "grad_norm": 0.2794719934463501, "learning_rate": 6.166154934712397e-05, "loss": 0.0076, "step": 27230 }, { "epoch": 24.496402877697843, "grad_norm": 0.34807834029197693, "learning_rate": 6.163474975435581e-05, "loss": 0.0055, "step": 27240 }, { "epoch": 24.505395683453237, "grad_norm": 0.3399955630302429, "learning_rate": 6.160794662725787e-05, "loss": 0.0077, "step": 27250 }, { "epoch": 24.514388489208635, "grad_norm": 0.17956951260566711, "learning_rate": 6.158113997397222e-05, "loss": 0.0065, "step": 27260 }, { "epoch": 24.52338129496403, "grad_norm": 0.16660495102405548, "learning_rate": 6.155432980264205e-05, "loss": 0.0057, "step": 27270 }, { "epoch": 24.532374100719423, "grad_norm": 0.24199141561985016, "learning_rate": 6.152751612141156e-05, "loss": 0.01, "step": 27280 }, { "epoch": 24.54136690647482, "grad_norm": 0.1043739840388298, "learning_rate": 6.150069893842602e-05, "loss": 0.0061, "step": 27290 }, { "epoch": 24.550359712230215, "grad_norm": 0.18421317636966705, "learning_rate": 6.147387826183182e-05, "loss": 0.0048, "step": 27300 }, { "epoch": 24.559352517985612, "grad_norm": 0.21263131499290466, "learning_rate": 6.144705409977635e-05, "loss": 0.0106, "step": 27310 }, { "epoch": 24.568345323741006, "grad_norm": 0.13337036967277527, "learning_rate": 6.142022646040808e-05, "loss": 0.0061, "step": 27320 }, { "epoch": 24.577338129496404, "grad_norm": 0.1118081584572792, "learning_rate": 6.139339535187653e-05, "loss": 0.0055, "step": 27330 }, { "epoch": 24.586330935251798, "grad_norm": 0.1551995575428009, "learning_rate": 6.136656078233232e-05, "loss": 0.0042, "step": 27340 }, { "epoch": 24.595323741007196, "grad_norm": 0.258072167634964, "learning_rate": 6.133972275992707e-05, "loss": 0.0084, "step": 27350 }, { "epoch": 24.60431654676259, "grad_norm": 0.25259584188461304, "learning_rate": 6.131288129281342e-05, "loss": 0.0091, "step": 27360 }, { "epoch": 24.613309352517987, "grad_norm": 0.20056448876857758, "learning_rate": 6.128603638914516e-05, "loss": 0.0068, "step": 27370 }, { "epoch": 24.62230215827338, "grad_norm": 0.17329782247543335, "learning_rate": 6.125918805707704e-05, "loss": 0.0053, "step": 27380 }, { "epoch": 24.631294964028775, "grad_norm": 0.1853516846895218, "learning_rate": 6.123233630476485e-05, "loss": 0.0065, "step": 27390 }, { "epoch": 24.640287769784173, "grad_norm": 0.24920305609703064, "learning_rate": 6.120548114036547e-05, "loss": 0.0058, "step": 27400 }, { "epoch": 24.649280575539567, "grad_norm": 0.2802127003669739, "learning_rate": 6.117862257203679e-05, "loss": 0.0064, "step": 27410 }, { "epoch": 24.658273381294965, "grad_norm": 0.18443720042705536, "learning_rate": 6.115176060793771e-05, "loss": 0.0086, "step": 27420 }, { "epoch": 24.66726618705036, "grad_norm": 0.25389981269836426, "learning_rate": 6.112489525622822e-05, "loss": 0.004, "step": 27430 }, { "epoch": 24.676258992805757, "grad_norm": 0.3559819161891937, "learning_rate": 6.109802652506928e-05, "loss": 0.0062, "step": 27440 }, { "epoch": 24.68525179856115, "grad_norm": 0.3521062135696411, "learning_rate": 6.107115442262291e-05, "loss": 0.0068, "step": 27450 }, { "epoch": 24.694244604316548, "grad_norm": 0.19061291217803955, "learning_rate": 6.104427895705214e-05, "loss": 0.0079, "step": 27460 }, { "epoch": 24.703237410071942, "grad_norm": 0.16607201099395752, "learning_rate": 6.101740013652103e-05, "loss": 0.0046, "step": 27470 }, { "epoch": 24.71223021582734, "grad_norm": 0.305586040019989, "learning_rate": 6.099051796919465e-05, "loss": 0.0083, "step": 27480 }, { "epoch": 24.721223021582734, "grad_norm": 0.16846969723701477, "learning_rate": 6.096363246323911e-05, "loss": 0.011, "step": 27490 }, { "epoch": 24.730215827338128, "grad_norm": 0.20260165631771088, "learning_rate": 6.0936743626821504e-05, "loss": 0.0095, "step": 27500 }, { "epoch": 24.739208633093526, "grad_norm": 0.18935884535312653, "learning_rate": 6.090985146810996e-05, "loss": 0.0052, "step": 27510 }, { "epoch": 24.74820143884892, "grad_norm": 0.1605203002691269, "learning_rate": 6.088295599527357e-05, "loss": 0.0066, "step": 27520 }, { "epoch": 24.757194244604317, "grad_norm": 0.16459092497825623, "learning_rate": 6.085605721648252e-05, "loss": 0.0049, "step": 27530 }, { "epoch": 24.76618705035971, "grad_norm": 0.27912047505378723, "learning_rate": 6.082915513990792e-05, "loss": 0.006, "step": 27540 }, { "epoch": 24.77517985611511, "grad_norm": 0.15156346559524536, "learning_rate": 6.080224977372192e-05, "loss": 0.0055, "step": 27550 }, { "epoch": 24.784172661870503, "grad_norm": 0.18607330322265625, "learning_rate": 6.0775341126097666e-05, "loss": 0.005, "step": 27560 }, { "epoch": 24.7931654676259, "grad_norm": 0.16583313047885895, "learning_rate": 6.074842920520926e-05, "loss": 0.008, "step": 27570 }, { "epoch": 24.802158273381295, "grad_norm": 0.23084305226802826, "learning_rate": 6.072151401923186e-05, "loss": 0.006, "step": 27580 }, { "epoch": 24.81115107913669, "grad_norm": 0.19824640452861786, "learning_rate": 6.069459557634159e-05, "loss": 0.0085, "step": 27590 }, { "epoch": 24.820143884892087, "grad_norm": 0.35671356320381165, "learning_rate": 6.066767388471557e-05, "loss": 0.0084, "step": 27600 }, { "epoch": 24.82913669064748, "grad_norm": 0.28251996636390686, "learning_rate": 6.064074895253188e-05, "loss": 0.0092, "step": 27610 }, { "epoch": 24.83812949640288, "grad_norm": 0.11398886889219284, "learning_rate": 6.061382078796961e-05, "loss": 0.0121, "step": 27620 }, { "epoch": 24.847122302158272, "grad_norm": 0.21225863695144653, "learning_rate": 6.0586889399208814e-05, "loss": 0.0047, "step": 27630 }, { "epoch": 24.85611510791367, "grad_norm": 0.12876038253307343, "learning_rate": 6.0559954794430565e-05, "loss": 0.0078, "step": 27640 }, { "epoch": 24.865107913669064, "grad_norm": 0.17205320298671722, "learning_rate": 6.053301698181687e-05, "loss": 0.0088, "step": 27650 }, { "epoch": 24.87410071942446, "grad_norm": 0.26349276304244995, "learning_rate": 6.0506075969550725e-05, "loss": 0.0067, "step": 27660 }, { "epoch": 24.883093525179856, "grad_norm": 0.12269105762243271, "learning_rate": 6.047913176581609e-05, "loss": 0.0064, "step": 27670 }, { "epoch": 24.892086330935253, "grad_norm": 0.17008748650550842, "learning_rate": 6.0452184378797904e-05, "loss": 0.0061, "step": 27680 }, { "epoch": 24.901079136690647, "grad_norm": 0.15489444136619568, "learning_rate": 6.042523381668209e-05, "loss": 0.0064, "step": 27690 }, { "epoch": 24.91007194244604, "grad_norm": 0.2237788587808609, "learning_rate": 6.03982800876555e-05, "loss": 0.0055, "step": 27700 }, { "epoch": 24.91906474820144, "grad_norm": 0.1570468246936798, "learning_rate": 6.0371323199905975e-05, "loss": 0.0122, "step": 27710 }, { "epoch": 24.928057553956833, "grad_norm": 0.22130072116851807, "learning_rate": 6.03443631616223e-05, "loss": 0.0076, "step": 27720 }, { "epoch": 24.93705035971223, "grad_norm": 0.17180591821670532, "learning_rate": 6.031739998099421e-05, "loss": 0.0049, "step": 27730 }, { "epoch": 24.946043165467625, "grad_norm": 0.2139139026403427, "learning_rate": 6.029043366621243e-05, "loss": 0.006, "step": 27740 }, { "epoch": 24.955035971223023, "grad_norm": 0.15099437534809113, "learning_rate": 6.0263464225468615e-05, "loss": 0.0062, "step": 27750 }, { "epoch": 24.964028776978417, "grad_norm": 0.1311468482017517, "learning_rate": 6.023649166695534e-05, "loss": 0.0044, "step": 27760 }, { "epoch": 24.973021582733814, "grad_norm": 0.23819157481193542, "learning_rate": 6.0209515998866186e-05, "loss": 0.0063, "step": 27770 }, { "epoch": 24.98201438848921, "grad_norm": 0.15484176576137543, "learning_rate": 6.018253722939563e-05, "loss": 0.0061, "step": 27780 }, { "epoch": 24.991007194244606, "grad_norm": 0.25327619910240173, "learning_rate": 6.015555536673914e-05, "loss": 0.007, "step": 27790 }, { "epoch": 25.0, "grad_norm": 0.1869395673274994, "learning_rate": 6.0128570419093054e-05, "loss": 0.0071, "step": 27800 }, { "epoch": 25.008992805755394, "grad_norm": 0.17560669779777527, "learning_rate": 6.010158239465471e-05, "loss": 0.0044, "step": 27810 }, { "epoch": 25.01798561151079, "grad_norm": 0.240479975938797, "learning_rate": 6.007459130162235e-05, "loss": 0.0068, "step": 27820 }, { "epoch": 25.026978417266186, "grad_norm": 0.23226113617420197, "learning_rate": 6.004759714819516e-05, "loss": 0.007, "step": 27830 }, { "epoch": 25.035971223021583, "grad_norm": 0.1354244500398636, "learning_rate": 6.002059994257323e-05, "loss": 0.0079, "step": 27840 }, { "epoch": 25.044964028776977, "grad_norm": 0.22845593094825745, "learning_rate": 5.999359969295764e-05, "loss": 0.0071, "step": 27850 }, { "epoch": 25.053956834532375, "grad_norm": 0.1291974037885666, "learning_rate": 5.9966596407550314e-05, "loss": 0.0045, "step": 27860 }, { "epoch": 25.06294964028777, "grad_norm": 0.1495075821876526, "learning_rate": 5.993959009455416e-05, "loss": 0.0092, "step": 27870 }, { "epoch": 25.071942446043167, "grad_norm": 0.1841580718755722, "learning_rate": 5.991258076217298e-05, "loss": 0.0049, "step": 27880 }, { "epoch": 25.08093525179856, "grad_norm": 0.20624791085720062, "learning_rate": 5.988556841861147e-05, "loss": 0.0077, "step": 27890 }, { "epoch": 25.08992805755396, "grad_norm": 0.18687202036380768, "learning_rate": 5.985855307207531e-05, "loss": 0.0087, "step": 27900 }, { "epoch": 25.098920863309353, "grad_norm": 0.178560271859169, "learning_rate": 5.9831534730771e-05, "loss": 0.0061, "step": 27910 }, { "epoch": 25.107913669064747, "grad_norm": 0.14071707427501678, "learning_rate": 5.980451340290605e-05, "loss": 0.0059, "step": 27920 }, { "epoch": 25.116906474820144, "grad_norm": 0.1677580326795578, "learning_rate": 5.97774890966888e-05, "loss": 0.0048, "step": 27930 }, { "epoch": 25.12589928057554, "grad_norm": 0.16262024641036987, "learning_rate": 5.975046182032851e-05, "loss": 0.0066, "step": 27940 }, { "epoch": 25.134892086330936, "grad_norm": 0.237189382314682, "learning_rate": 5.972343158203537e-05, "loss": 0.0062, "step": 27950 }, { "epoch": 25.14388489208633, "grad_norm": 0.1894347369670868, "learning_rate": 5.969639839002045e-05, "loss": 0.0057, "step": 27960 }, { "epoch": 25.152877697841728, "grad_norm": 0.13989651203155518, "learning_rate": 5.966936225249572e-05, "loss": 0.0068, "step": 27970 }, { "epoch": 25.16187050359712, "grad_norm": 0.19682426750659943, "learning_rate": 5.9642323177674044e-05, "loss": 0.0057, "step": 27980 }, { "epoch": 25.17086330935252, "grad_norm": 0.29609280824661255, "learning_rate": 5.9615281173769154e-05, "loss": 0.0061, "step": 27990 }, { "epoch": 25.179856115107913, "grad_norm": 0.13242420554161072, "learning_rate": 5.958823624899574e-05, "loss": 0.005, "step": 28000 }, { "epoch": 25.18884892086331, "grad_norm": 0.26146402955055237, "learning_rate": 5.956118841156933e-05, "loss": 0.0068, "step": 28010 }, { "epoch": 25.197841726618705, "grad_norm": 0.13724923133850098, "learning_rate": 5.953413766970631e-05, "loss": 0.007, "step": 28020 }, { "epoch": 25.2068345323741, "grad_norm": 0.2627466917037964, "learning_rate": 5.9507084031624e-05, "loss": 0.0077, "step": 28030 }, { "epoch": 25.215827338129497, "grad_norm": 0.3105592131614685, "learning_rate": 5.948002750554058e-05, "loss": 0.0092, "step": 28040 }, { "epoch": 25.22482014388489, "grad_norm": 0.24812224507331848, "learning_rate": 5.9452968099675124e-05, "loss": 0.0059, "step": 28050 }, { "epoch": 25.23381294964029, "grad_norm": 0.1583113670349121, "learning_rate": 5.9425905822247527e-05, "loss": 0.006, "step": 28060 }, { "epoch": 25.242805755395683, "grad_norm": 0.23874324560165405, "learning_rate": 5.939884068147864e-05, "loss": 0.0066, "step": 28070 }, { "epoch": 25.25179856115108, "grad_norm": 0.16637380421161652, "learning_rate": 5.937177268559011e-05, "loss": 0.0052, "step": 28080 }, { "epoch": 25.260791366906474, "grad_norm": 0.15701591968536377, "learning_rate": 5.934470184280448e-05, "loss": 0.0046, "step": 28090 }, { "epoch": 25.269784172661872, "grad_norm": 0.21748097240924835, "learning_rate": 5.931762816134516e-05, "loss": 0.0052, "step": 28100 }, { "epoch": 25.278776978417266, "grad_norm": 0.18005436658859253, "learning_rate": 5.9290551649436434e-05, "loss": 0.0045, "step": 28110 }, { "epoch": 25.28776978417266, "grad_norm": 0.2897353768348694, "learning_rate": 5.9263472315303416e-05, "loss": 0.006, "step": 28120 }, { "epoch": 25.296762589928058, "grad_norm": 0.28699561953544617, "learning_rate": 5.9236390167172096e-05, "loss": 0.0048, "step": 28130 }, { "epoch": 25.305755395683452, "grad_norm": 0.237450510263443, "learning_rate": 5.920930521326932e-05, "loss": 0.008, "step": 28140 }, { "epoch": 25.31474820143885, "grad_norm": 0.13684137165546417, "learning_rate": 5.918221746182276e-05, "loss": 0.0063, "step": 28150 }, { "epoch": 25.323741007194243, "grad_norm": 0.12089958041906357, "learning_rate": 5.9155126921061e-05, "loss": 0.0057, "step": 28160 }, { "epoch": 25.33273381294964, "grad_norm": 0.17545340955257416, "learning_rate": 5.91280335992134e-05, "loss": 0.0048, "step": 28170 }, { "epoch": 25.341726618705035, "grad_norm": 0.15501920878887177, "learning_rate": 5.91009375045102e-05, "loss": 0.0069, "step": 28180 }, { "epoch": 25.350719424460433, "grad_norm": 0.16968084871768951, "learning_rate": 5.9073838645182476e-05, "loss": 0.0054, "step": 28190 }, { "epoch": 25.359712230215827, "grad_norm": 0.26346904039382935, "learning_rate": 5.904673702946217e-05, "loss": 0.0081, "step": 28200 }, { "epoch": 25.368705035971225, "grad_norm": 0.176864892244339, "learning_rate": 5.9019632665582004e-05, "loss": 0.0039, "step": 28210 }, { "epoch": 25.37769784172662, "grad_norm": 0.24113591015338898, "learning_rate": 5.899252556177559e-05, "loss": 0.0047, "step": 28220 }, { "epoch": 25.386690647482013, "grad_norm": 0.12928465008735657, "learning_rate": 5.896541572627735e-05, "loss": 0.0054, "step": 28230 }, { "epoch": 25.39568345323741, "grad_norm": 0.1609971821308136, "learning_rate": 5.893830316732253e-05, "loss": 0.0056, "step": 28240 }, { "epoch": 25.404676258992804, "grad_norm": 0.1251024752855301, "learning_rate": 5.8911187893147214e-05, "loss": 0.0077, "step": 28250 }, { "epoch": 25.413669064748202, "grad_norm": 0.28218454122543335, "learning_rate": 5.888406991198828e-05, "loss": 0.0074, "step": 28260 }, { "epoch": 25.422661870503596, "grad_norm": 0.20931674540042877, "learning_rate": 5.885694923208349e-05, "loss": 0.0122, "step": 28270 }, { "epoch": 25.431654676258994, "grad_norm": 0.22955848276615143, "learning_rate": 5.882982586167138e-05, "loss": 0.0073, "step": 28280 }, { "epoch": 25.440647482014388, "grad_norm": 0.13897967338562012, "learning_rate": 5.880269980899131e-05, "loss": 0.0068, "step": 28290 }, { "epoch": 25.449640287769785, "grad_norm": 0.24851463735103607, "learning_rate": 5.8775571082283465e-05, "loss": 0.0055, "step": 28300 }, { "epoch": 25.45863309352518, "grad_norm": 0.12697187066078186, "learning_rate": 5.8748439689788824e-05, "loss": 0.0082, "step": 28310 }, { "epoch": 25.467625899280577, "grad_norm": 0.16288147866725922, "learning_rate": 5.87213056397492e-05, "loss": 0.0059, "step": 28320 }, { "epoch": 25.47661870503597, "grad_norm": 0.14439290761947632, "learning_rate": 5.869416894040719e-05, "loss": 0.0106, "step": 28330 }, { "epoch": 25.485611510791365, "grad_norm": 0.13009746372699738, "learning_rate": 5.866702960000621e-05, "loss": 0.0048, "step": 28340 }, { "epoch": 25.494604316546763, "grad_norm": 0.20892809331417084, "learning_rate": 5.863988762679048e-05, "loss": 0.0074, "step": 28350 }, { "epoch": 25.503597122302157, "grad_norm": 0.21523517370224, "learning_rate": 5.8612743029005e-05, "loss": 0.0052, "step": 28360 }, { "epoch": 25.512589928057555, "grad_norm": 0.22895187139511108, "learning_rate": 5.858559581489561e-05, "loss": 0.0048, "step": 28370 }, { "epoch": 25.52158273381295, "grad_norm": 0.14102213084697723, "learning_rate": 5.85584459927089e-05, "loss": 0.0077, "step": 28380 }, { "epoch": 25.530575539568346, "grad_norm": 0.18865621089935303, "learning_rate": 5.853129357069227e-05, "loss": 0.0059, "step": 28390 }, { "epoch": 25.53956834532374, "grad_norm": 0.19888563454151154, "learning_rate": 5.8504138557093913e-05, "loss": 0.006, "step": 28400 }, { "epoch": 25.548561151079138, "grad_norm": 0.19748961925506592, "learning_rate": 5.8476980960162784e-05, "loss": 0.007, "step": 28410 }, { "epoch": 25.557553956834532, "grad_norm": 0.23923857510089874, "learning_rate": 5.844982078814868e-05, "loss": 0.006, "step": 28420 }, { "epoch": 25.56654676258993, "grad_norm": 0.35518646240234375, "learning_rate": 5.842265804930211e-05, "loss": 0.0089, "step": 28430 }, { "epoch": 25.575539568345324, "grad_norm": 0.23011097311973572, "learning_rate": 5.839549275187444e-05, "loss": 0.0048, "step": 28440 }, { "epoch": 25.584532374100718, "grad_norm": 0.16137830913066864, "learning_rate": 5.836832490411771e-05, "loss": 0.0065, "step": 28450 }, { "epoch": 25.593525179856115, "grad_norm": 0.1524168848991394, "learning_rate": 5.834115451428485e-05, "loss": 0.0072, "step": 28460 }, { "epoch": 25.60251798561151, "grad_norm": 0.18620707094669342, "learning_rate": 5.831398159062946e-05, "loss": 0.0052, "step": 28470 }, { "epoch": 25.611510791366907, "grad_norm": 0.23728440701961517, "learning_rate": 5.828680614140599e-05, "loss": 0.0048, "step": 28480 }, { "epoch": 25.6205035971223, "grad_norm": 0.2070673257112503, "learning_rate": 5.825962817486962e-05, "loss": 0.007, "step": 28490 }, { "epoch": 25.6294964028777, "grad_norm": 0.11617770791053772, "learning_rate": 5.823244769927629e-05, "loss": 0.0054, "step": 28500 }, { "epoch": 25.638489208633093, "grad_norm": 0.2138599306344986, "learning_rate": 5.8205264722882716e-05, "loss": 0.0084, "step": 28510 }, { "epoch": 25.64748201438849, "grad_norm": 0.20132844150066376, "learning_rate": 5.817807925394636e-05, "loss": 0.0047, "step": 28520 }, { "epoch": 25.656474820143885, "grad_norm": 0.1069021001458168, "learning_rate": 5.815089130072546e-05, "loss": 0.0061, "step": 28530 }, { "epoch": 25.665467625899282, "grad_norm": 0.16554230451583862, "learning_rate": 5.8123700871479e-05, "loss": 0.0041, "step": 28540 }, { "epoch": 25.674460431654676, "grad_norm": 0.10954943299293518, "learning_rate": 5.809650797446671e-05, "loss": 0.0046, "step": 28550 }, { "epoch": 25.68345323741007, "grad_norm": 0.15271882712841034, "learning_rate": 5.806931261794907e-05, "loss": 0.0056, "step": 28560 }, { "epoch": 25.692446043165468, "grad_norm": 0.12766465544700623, "learning_rate": 5.804211481018731e-05, "loss": 0.0036, "step": 28570 }, { "epoch": 25.701438848920862, "grad_norm": 0.08332628011703491, "learning_rate": 5.801491455944341e-05, "loss": 0.0036, "step": 28580 }, { "epoch": 25.71043165467626, "grad_norm": 0.11872248351573944, "learning_rate": 5.79877118739801e-05, "loss": 0.0042, "step": 28590 }, { "epoch": 25.719424460431654, "grad_norm": 0.09980615228414536, "learning_rate": 5.7960506762060816e-05, "loss": 0.0073, "step": 28600 }, { "epoch": 25.72841726618705, "grad_norm": 0.18756337463855743, "learning_rate": 5.793329923194977e-05, "loss": 0.0038, "step": 28610 }, { "epoch": 25.737410071942445, "grad_norm": 0.15947376191616058, "learning_rate": 5.790608929191187e-05, "loss": 0.0043, "step": 28620 }, { "epoch": 25.746402877697843, "grad_norm": 0.15741682052612305, "learning_rate": 5.78788769502128e-05, "loss": 0.0095, "step": 28630 }, { "epoch": 25.755395683453237, "grad_norm": 0.25688230991363525, "learning_rate": 5.785166221511894e-05, "loss": 0.0065, "step": 28640 }, { "epoch": 25.764388489208635, "grad_norm": 0.17571362853050232, "learning_rate": 5.7824445094897415e-05, "loss": 0.0075, "step": 28650 }, { "epoch": 25.77338129496403, "grad_norm": 0.21994547545909882, "learning_rate": 5.7797225597816065e-05, "loss": 0.0066, "step": 28660 }, { "epoch": 25.782374100719423, "grad_norm": 0.2105037122964859, "learning_rate": 5.777000373214345e-05, "loss": 0.0044, "step": 28670 }, { "epoch": 25.79136690647482, "grad_norm": 0.25357750058174133, "learning_rate": 5.774277950614885e-05, "loss": 0.0084, "step": 28680 }, { "epoch": 25.800359712230215, "grad_norm": 0.17687305808067322, "learning_rate": 5.771555292810227e-05, "loss": 0.0055, "step": 28690 }, { "epoch": 25.809352517985612, "grad_norm": 0.12458084523677826, "learning_rate": 5.768832400627444e-05, "loss": 0.0056, "step": 28700 }, { "epoch": 25.818345323741006, "grad_norm": 0.23160144686698914, "learning_rate": 5.7661092748936775e-05, "loss": 0.0068, "step": 28710 }, { "epoch": 25.827338129496404, "grad_norm": 0.21421317756175995, "learning_rate": 5.76338591643614e-05, "loss": 0.0096, "step": 28720 }, { "epoch": 25.836330935251798, "grad_norm": 0.21068644523620605, "learning_rate": 5.760662326082118e-05, "loss": 0.0084, "step": 28730 }, { "epoch": 25.845323741007196, "grad_norm": 0.18153521418571472, "learning_rate": 5.757938504658965e-05, "loss": 0.0056, "step": 28740 }, { "epoch": 25.85431654676259, "grad_norm": 0.10382277518510818, "learning_rate": 5.755214452994107e-05, "loss": 0.0059, "step": 28750 }, { "epoch": 25.863309352517987, "grad_norm": 0.10424081981182098, "learning_rate": 5.752490171915039e-05, "loss": 0.0078, "step": 28760 }, { "epoch": 25.87230215827338, "grad_norm": 0.2010507434606552, "learning_rate": 5.749765662249324e-05, "loss": 0.008, "step": 28770 }, { "epoch": 25.881294964028775, "grad_norm": 0.23483875393867493, "learning_rate": 5.747040924824596e-05, "loss": 0.0034, "step": 28780 }, { "epoch": 25.890287769784173, "grad_norm": 0.1815672516822815, "learning_rate": 5.7443159604685613e-05, "loss": 0.0047, "step": 28790 }, { "epoch": 25.899280575539567, "grad_norm": 0.10272645950317383, "learning_rate": 5.74159077000899e-05, "loss": 0.0073, "step": 28800 }, { "epoch": 25.908273381294965, "grad_norm": 0.1417728066444397, "learning_rate": 5.7388653542737235e-05, "loss": 0.0062, "step": 28810 }, { "epoch": 25.91726618705036, "grad_norm": 0.1479959636926651, "learning_rate": 5.736139714090672e-05, "loss": 0.0086, "step": 28820 }, { "epoch": 25.926258992805757, "grad_norm": 0.1497190296649933, "learning_rate": 5.73341385028781e-05, "loss": 0.0066, "step": 28830 }, { "epoch": 25.93525179856115, "grad_norm": 0.15425457060337067, "learning_rate": 5.7306877636931855e-05, "loss": 0.0048, "step": 28840 }, { "epoch": 25.944244604316548, "grad_norm": 0.1447143405675888, "learning_rate": 5.7279614551349125e-05, "loss": 0.0053, "step": 28850 }, { "epoch": 25.953237410071942, "grad_norm": 0.246709406375885, "learning_rate": 5.725234925441169e-05, "loss": 0.0038, "step": 28860 }, { "epoch": 25.96223021582734, "grad_norm": 0.2624675929546356, "learning_rate": 5.7225081754402044e-05, "loss": 0.0056, "step": 28870 }, { "epoch": 25.971223021582734, "grad_norm": 0.1905740648508072, "learning_rate": 5.7197812059603326e-05, "loss": 0.0053, "step": 28880 }, { "epoch": 25.980215827338128, "grad_norm": 0.19899605214595795, "learning_rate": 5.717054017829934e-05, "loss": 0.007, "step": 28890 }, { "epoch": 25.989208633093526, "grad_norm": 0.1648496687412262, "learning_rate": 5.7143266118774584e-05, "loss": 0.0103, "step": 28900 }, { "epoch": 25.99820143884892, "grad_norm": 0.21051989495754242, "learning_rate": 5.711598988931418e-05, "loss": 0.0049, "step": 28910 }, { "epoch": 26.007194244604317, "grad_norm": 0.24787768721580505, "learning_rate": 5.7088711498203954e-05, "loss": 0.0052, "step": 28920 }, { "epoch": 26.01618705035971, "grad_norm": 0.23663271963596344, "learning_rate": 5.706143095373033e-05, "loss": 0.0114, "step": 28930 }, { "epoch": 26.02517985611511, "grad_norm": 0.24710707366466522, "learning_rate": 5.703414826418042e-05, "loss": 0.0058, "step": 28940 }, { "epoch": 26.034172661870503, "grad_norm": 0.1575959473848343, "learning_rate": 5.7006863437842007e-05, "loss": 0.0067, "step": 28950 }, { "epoch": 26.0431654676259, "grad_norm": 0.15169306099414825, "learning_rate": 5.697957648300348e-05, "loss": 0.0124, "step": 28960 }, { "epoch": 26.052158273381295, "grad_norm": 0.21847452223300934, "learning_rate": 5.695228740795391e-05, "loss": 0.0069, "step": 28970 }, { "epoch": 26.06115107913669, "grad_norm": 0.20802761614322662, "learning_rate": 5.6924996220982985e-05, "loss": 0.0064, "step": 28980 }, { "epoch": 26.070143884892087, "grad_norm": 0.24467864632606506, "learning_rate": 5.6897702930381045e-05, "loss": 0.005, "step": 28990 }, { "epoch": 26.07913669064748, "grad_norm": 0.14016994833946228, "learning_rate": 5.687040754443908e-05, "loss": 0.0046, "step": 29000 }, { "epoch": 26.08812949640288, "grad_norm": 0.1008910983800888, "learning_rate": 5.6843110071448725e-05, "loss": 0.0066, "step": 29010 }, { "epoch": 26.097122302158272, "grad_norm": 0.15839500725269318, "learning_rate": 5.6815810519702194e-05, "loss": 0.0059, "step": 29020 }, { "epoch": 26.10611510791367, "grad_norm": 0.21459253132343292, "learning_rate": 5.6788508897492396e-05, "loss": 0.0039, "step": 29030 }, { "epoch": 26.115107913669064, "grad_norm": 0.35551637411117554, "learning_rate": 5.676120521311282e-05, "loss": 0.0055, "step": 29040 }, { "epoch": 26.12410071942446, "grad_norm": 0.11352641135454178, "learning_rate": 5.6733899474857634e-05, "loss": 0.0083, "step": 29050 }, { "epoch": 26.133093525179856, "grad_norm": 0.19945818185806274, "learning_rate": 5.670659169102157e-05, "loss": 0.0055, "step": 29060 }, { "epoch": 26.142086330935253, "grad_norm": 0.24573485553264618, "learning_rate": 5.6679281869900044e-05, "loss": 0.0074, "step": 29070 }, { "epoch": 26.151079136690647, "grad_norm": 0.22386519610881805, "learning_rate": 5.6651970019789045e-05, "loss": 0.0061, "step": 29080 }, { "epoch": 26.16007194244604, "grad_norm": 0.2939847409725189, "learning_rate": 5.662465614898519e-05, "loss": 0.0048, "step": 29090 }, { "epoch": 26.16906474820144, "grad_norm": 0.13332360982894897, "learning_rate": 5.6597340265785695e-05, "loss": 0.0109, "step": 29100 }, { "epoch": 26.178057553956833, "grad_norm": 0.26053163409233093, "learning_rate": 5.657002237848843e-05, "loss": 0.0076, "step": 29110 }, { "epoch": 26.18705035971223, "grad_norm": 0.30001190304756165, "learning_rate": 5.654270249539183e-05, "loss": 0.0061, "step": 29120 }, { "epoch": 26.196043165467625, "grad_norm": 0.23821361362934113, "learning_rate": 5.651538062479498e-05, "loss": 0.0088, "step": 29130 }, { "epoch": 26.205035971223023, "grad_norm": 0.32955414056777954, "learning_rate": 5.648805677499751e-05, "loss": 0.0071, "step": 29140 }, { "epoch": 26.214028776978417, "grad_norm": 0.16553623974323273, "learning_rate": 5.646073095429969e-05, "loss": 0.007, "step": 29150 }, { "epoch": 26.223021582733814, "grad_norm": 0.14870977401733398, "learning_rate": 5.643340317100241e-05, "loss": 0.0054, "step": 29160 }, { "epoch": 26.23201438848921, "grad_norm": 0.2069435864686966, "learning_rate": 5.64060734334071e-05, "loss": 0.0064, "step": 29170 }, { "epoch": 26.241007194244606, "grad_norm": 0.14673149585723877, "learning_rate": 5.637874174981583e-05, "loss": 0.0042, "step": 29180 }, { "epoch": 26.25, "grad_norm": 0.10436747223138809, "learning_rate": 5.635140812853124e-05, "loss": 0.0051, "step": 29190 }, { "epoch": 26.258992805755394, "grad_norm": 0.13848137855529785, "learning_rate": 5.6324072577856544e-05, "loss": 0.0051, "step": 29200 }, { "epoch": 26.26798561151079, "grad_norm": 0.1561366319656372, "learning_rate": 5.629673510609559e-05, "loss": 0.0054, "step": 29210 }, { "epoch": 26.276978417266186, "grad_norm": 0.1301679164171219, "learning_rate": 5.626939572155276e-05, "loss": 0.0075, "step": 29220 }, { "epoch": 26.285971223021583, "grad_norm": 0.17796066403388977, "learning_rate": 5.6242054432533054e-05, "loss": 0.0054, "step": 29230 }, { "epoch": 26.294964028776977, "grad_norm": 0.3283892571926117, "learning_rate": 5.621471124734201e-05, "loss": 0.0068, "step": 29240 }, { "epoch": 26.303956834532375, "grad_norm": 0.3479534685611725, "learning_rate": 5.6187366174285794e-05, "loss": 0.0125, "step": 29250 }, { "epoch": 26.31294964028777, "grad_norm": 0.2108105570077896, "learning_rate": 5.616001922167109e-05, "loss": 0.0083, "step": 29260 }, { "epoch": 26.321942446043167, "grad_norm": 0.22655116021633148, "learning_rate": 5.61326703978052e-05, "loss": 0.0091, "step": 29270 }, { "epoch": 26.33093525179856, "grad_norm": 0.370311439037323, "learning_rate": 5.6105319710995964e-05, "loss": 0.0072, "step": 29280 }, { "epoch": 26.33992805755396, "grad_norm": 0.24602121114730835, "learning_rate": 5.60779671695518e-05, "loss": 0.0078, "step": 29290 }, { "epoch": 26.348920863309353, "grad_norm": 0.2239893078804016, "learning_rate": 5.6050612781781684e-05, "loss": 0.0052, "step": 29300 }, { "epoch": 26.357913669064747, "grad_norm": 0.1790538728237152, "learning_rate": 5.602325655599516e-05, "loss": 0.0053, "step": 29310 }, { "epoch": 26.366906474820144, "grad_norm": 0.14468801021575928, "learning_rate": 5.599589850050234e-05, "loss": 0.0048, "step": 29320 }, { "epoch": 26.37589928057554, "grad_norm": 0.1265774965286255, "learning_rate": 5.5968538623613874e-05, "loss": 0.0051, "step": 29330 }, { "epoch": 26.384892086330936, "grad_norm": 0.1559097319841385, "learning_rate": 5.594117693364095e-05, "loss": 0.0046, "step": 29340 }, { "epoch": 26.39388489208633, "grad_norm": 0.16868704557418823, "learning_rate": 5.591381343889535e-05, "loss": 0.0061, "step": 29350 }, { "epoch": 26.402877697841728, "grad_norm": 0.17127156257629395, "learning_rate": 5.5886448147689355e-05, "loss": 0.0058, "step": 29360 }, { "epoch": 26.41187050359712, "grad_norm": 0.1636136919260025, "learning_rate": 5.585908106833585e-05, "loss": 0.0042, "step": 29370 }, { "epoch": 26.42086330935252, "grad_norm": 0.09913573414087296, "learning_rate": 5.5831712209148226e-05, "loss": 0.0059, "step": 29380 }, { "epoch": 26.429856115107913, "grad_norm": 0.17490297555923462, "learning_rate": 5.58043415784404e-05, "loss": 0.0073, "step": 29390 }, { "epoch": 26.43884892086331, "grad_norm": 0.23224350810050964, "learning_rate": 5.577696918452686e-05, "loss": 0.0047, "step": 29400 }, { "epoch": 26.447841726618705, "grad_norm": 0.13111193478107452, "learning_rate": 5.5749595035722604e-05, "loss": 0.018, "step": 29410 }, { "epoch": 26.4568345323741, "grad_norm": 0.12267898768186569, "learning_rate": 5.5722219140343193e-05, "loss": 0.0108, "step": 29420 }, { "epoch": 26.465827338129497, "grad_norm": 0.18374031782150269, "learning_rate": 5.56948415067047e-05, "loss": 0.0058, "step": 29430 }, { "epoch": 26.47482014388489, "grad_norm": 0.24860017001628876, "learning_rate": 5.5667462143123704e-05, "loss": 0.0079, "step": 29440 }, { "epoch": 26.48381294964029, "grad_norm": 0.1637050211429596, "learning_rate": 5.564008105791737e-05, "loss": 0.0043, "step": 29450 }, { "epoch": 26.492805755395683, "grad_norm": 0.21771615743637085, "learning_rate": 5.5612698259403316e-05, "loss": 0.0092, "step": 29460 }, { "epoch": 26.50179856115108, "grad_norm": 0.15009064972400665, "learning_rate": 5.5585313755899724e-05, "loss": 0.0084, "step": 29470 }, { "epoch": 26.510791366906474, "grad_norm": 0.3115123510360718, "learning_rate": 5.5557927555725285e-05, "loss": 0.0102, "step": 29480 }, { "epoch": 26.519784172661872, "grad_norm": 0.11568615585565567, "learning_rate": 5.55305396671992e-05, "loss": 0.0049, "step": 29490 }, { "epoch": 26.528776978417266, "grad_norm": 0.14290113747119904, "learning_rate": 5.55031500986412e-05, "loss": 0.0092, "step": 29500 }, { "epoch": 26.53776978417266, "grad_norm": 0.24969980120658875, "learning_rate": 5.547575885837149e-05, "loss": 0.0077, "step": 29510 }, { "epoch": 26.546762589928058, "grad_norm": 0.11966107785701752, "learning_rate": 5.5448365954710825e-05, "loss": 0.0059, "step": 29520 }, { "epoch": 26.555755395683452, "grad_norm": 0.24171188473701477, "learning_rate": 5.5420971395980446e-05, "loss": 0.0082, "step": 29530 }, { "epoch": 26.56474820143885, "grad_norm": 0.3505202531814575, "learning_rate": 5.539357519050209e-05, "loss": 0.0086, "step": 29540 }, { "epoch": 26.573741007194243, "grad_norm": 0.19206245243549347, "learning_rate": 5.536617734659799e-05, "loss": 0.0081, "step": 29550 }, { "epoch": 26.58273381294964, "grad_norm": 0.22004660964012146, "learning_rate": 5.533877787259091e-05, "loss": 0.0058, "step": 29560 }, { "epoch": 26.591726618705035, "grad_norm": 0.15465974807739258, "learning_rate": 5.5311376776804044e-05, "loss": 0.0061, "step": 29570 }, { "epoch": 26.600719424460433, "grad_norm": 0.1726274937391281, "learning_rate": 5.528397406756118e-05, "loss": 0.0085, "step": 29580 }, { "epoch": 26.609712230215827, "grad_norm": 0.17838463187217712, "learning_rate": 5.525656975318652e-05, "loss": 0.0049, "step": 29590 }, { "epoch": 26.618705035971225, "grad_norm": 0.09296846389770508, "learning_rate": 5.522916384200474e-05, "loss": 0.003, "step": 29600 }, { "epoch": 26.62769784172662, "grad_norm": 0.14156396687030792, "learning_rate": 5.520175634234106e-05, "loss": 0.0071, "step": 29610 }, { "epoch": 26.636690647482013, "grad_norm": 0.18153752386569977, "learning_rate": 5.517434726252113e-05, "loss": 0.0053, "step": 29620 }, { "epoch": 26.64568345323741, "grad_norm": 0.1602843850851059, "learning_rate": 5.514693661087113e-05, "loss": 0.006, "step": 29630 }, { "epoch": 26.654676258992804, "grad_norm": 0.32828420400619507, "learning_rate": 5.511952439571769e-05, "loss": 0.0054, "step": 29640 }, { "epoch": 26.663669064748202, "grad_norm": 0.08544152975082397, "learning_rate": 5.509211062538791e-05, "loss": 0.0039, "step": 29650 }, { "epoch": 26.672661870503596, "grad_norm": 0.23955395817756653, "learning_rate": 5.506469530820939e-05, "loss": 0.009, "step": 29660 }, { "epoch": 26.681654676258994, "grad_norm": 0.11835609376430511, "learning_rate": 5.503727845251014e-05, "loss": 0.0037, "step": 29670 }, { "epoch": 26.690647482014388, "grad_norm": 0.1390116661787033, "learning_rate": 5.50098600666187e-05, "loss": 0.0063, "step": 29680 }, { "epoch": 26.699640287769785, "grad_norm": 0.18967607617378235, "learning_rate": 5.498244015886406e-05, "loss": 0.0052, "step": 29690 }, { "epoch": 26.70863309352518, "grad_norm": 0.12031532824039459, "learning_rate": 5.495501873757565e-05, "loss": 0.0051, "step": 29700 }, { "epoch": 26.717625899280577, "grad_norm": 0.10736491531133652, "learning_rate": 5.492759581108336e-05, "loss": 0.0048, "step": 29710 }, { "epoch": 26.72661870503597, "grad_norm": 0.1254109889268875, "learning_rate": 5.490017138771759e-05, "loss": 0.0058, "step": 29720 }, { "epoch": 26.735611510791365, "grad_norm": 0.1930311620235443, "learning_rate": 5.487274547580912e-05, "loss": 0.0065, "step": 29730 }, { "epoch": 26.744604316546763, "grad_norm": 0.27303069829940796, "learning_rate": 5.484531808368923e-05, "loss": 0.0052, "step": 29740 }, { "epoch": 26.753597122302157, "grad_norm": 0.2239455282688141, "learning_rate": 5.4817889219689656e-05, "loss": 0.0071, "step": 29750 }, { "epoch": 26.762589928057555, "grad_norm": 0.28190451860427856, "learning_rate": 5.4790458892142536e-05, "loss": 0.005, "step": 29760 }, { "epoch": 26.77158273381295, "grad_norm": 0.15335536003112793, "learning_rate": 5.476302710938048e-05, "loss": 0.0042, "step": 29770 }, { "epoch": 26.780575539568346, "grad_norm": 0.18686167895793915, "learning_rate": 5.473559387973657e-05, "loss": 0.0069, "step": 29780 }, { "epoch": 26.78956834532374, "grad_norm": 0.1633739322423935, "learning_rate": 5.470815921154425e-05, "loss": 0.0077, "step": 29790 }, { "epoch": 26.798561151079138, "grad_norm": 0.09439347684383392, "learning_rate": 5.468072311313749e-05, "loss": 0.0038, "step": 29800 }, { "epoch": 26.807553956834532, "grad_norm": 0.15711502730846405, "learning_rate": 5.465328559285063e-05, "loss": 0.0058, "step": 29810 }, { "epoch": 26.81654676258993, "grad_norm": 0.25312936305999756, "learning_rate": 5.462584665901849e-05, "loss": 0.0047, "step": 29820 }, { "epoch": 26.825539568345324, "grad_norm": 0.21102891862392426, "learning_rate": 5.4598406319976235e-05, "loss": 0.0071, "step": 29830 }, { "epoch": 26.834532374100718, "grad_norm": 0.19842177629470825, "learning_rate": 5.457096458405958e-05, "loss": 0.0105, "step": 29840 }, { "epoch": 26.843525179856115, "grad_norm": 0.20513759553432465, "learning_rate": 5.454352145960457e-05, "loss": 0.0068, "step": 29850 }, { "epoch": 26.85251798561151, "grad_norm": 0.11309336125850677, "learning_rate": 5.4516076954947715e-05, "loss": 0.005, "step": 29860 }, { "epoch": 26.861510791366907, "grad_norm": 0.2955223023891449, "learning_rate": 5.448863107842591e-05, "loss": 0.006, "step": 29870 }, { "epoch": 26.8705035971223, "grad_norm": 0.19725434482097626, "learning_rate": 5.446118383837651e-05, "loss": 0.0057, "step": 29880 }, { "epoch": 26.8794964028777, "grad_norm": 0.3247261047363281, "learning_rate": 5.443373524313722e-05, "loss": 0.0087, "step": 29890 }, { "epoch": 26.888489208633093, "grad_norm": 0.16180568933486938, "learning_rate": 5.440628530104626e-05, "loss": 0.005, "step": 29900 }, { "epoch": 26.89748201438849, "grad_norm": 0.16910065710544586, "learning_rate": 5.4378834020442146e-05, "loss": 0.005, "step": 29910 }, { "epoch": 26.906474820143885, "grad_norm": 0.20772764086723328, "learning_rate": 5.4351381409663884e-05, "loss": 0.0043, "step": 29920 }, { "epoch": 26.915467625899282, "grad_norm": 0.21511220932006836, "learning_rate": 5.432392747705084e-05, "loss": 0.0046, "step": 29930 }, { "epoch": 26.924460431654676, "grad_norm": 0.16019578278064728, "learning_rate": 5.429647223094278e-05, "loss": 0.0046, "step": 29940 }, { "epoch": 26.93345323741007, "grad_norm": 0.1976153403520584, "learning_rate": 5.4269015679679924e-05, "loss": 0.0058, "step": 29950 }, { "epoch": 26.942446043165468, "grad_norm": 0.16186751425266266, "learning_rate": 5.424155783160281e-05, "loss": 0.0041, "step": 29960 }, { "epoch": 26.951438848920862, "grad_norm": 0.18648211658000946, "learning_rate": 5.4214098695052415e-05, "loss": 0.007, "step": 29970 }, { "epoch": 26.96043165467626, "grad_norm": 0.29102522134780884, "learning_rate": 5.418663827837012e-05, "loss": 0.0072, "step": 29980 }, { "epoch": 26.969424460431654, "grad_norm": 0.15457116067409515, "learning_rate": 5.415917658989763e-05, "loss": 0.0044, "step": 29990 }, { "epoch": 26.97841726618705, "grad_norm": 0.19764849543571472, "learning_rate": 5.413171363797713e-05, "loss": 0.0066, "step": 30000 }, { "epoch": 26.987410071942445, "grad_norm": 0.1338333636522293, "learning_rate": 5.4104249430951116e-05, "loss": 0.005, "step": 30010 }, { "epoch": 26.996402877697843, "grad_norm": 0.246728777885437, "learning_rate": 5.4076783977162494e-05, "loss": 0.0049, "step": 30020 }, { "epoch": 27.005395683453237, "grad_norm": 0.3434533178806305, "learning_rate": 5.4049317284954525e-05, "loss": 0.0041, "step": 30030 }, { "epoch": 27.014388489208635, "grad_norm": 0.21402551233768463, "learning_rate": 5.4021849362670884e-05, "loss": 0.0055, "step": 30040 }, { "epoch": 27.02338129496403, "grad_norm": 0.20548830926418304, "learning_rate": 5.3994380218655604e-05, "loss": 0.0074, "step": 30050 }, { "epoch": 27.032374100719423, "grad_norm": 0.1936042606830597, "learning_rate": 5.396690986125309e-05, "loss": 0.0062, "step": 30060 }, { "epoch": 27.04136690647482, "grad_norm": 0.1678186058998108, "learning_rate": 5.3939438298808075e-05, "loss": 0.0055, "step": 30070 }, { "epoch": 27.050359712230215, "grad_norm": 0.18068940937519073, "learning_rate": 5.3911965539665744e-05, "loss": 0.0087, "step": 30080 }, { "epoch": 27.059352517985612, "grad_norm": 0.12967194616794586, "learning_rate": 5.388449159217156e-05, "loss": 0.0066, "step": 30090 }, { "epoch": 27.068345323741006, "grad_norm": 0.2055213749408722, "learning_rate": 5.3857016464671385e-05, "loss": 0.0049, "step": 30100 }, { "epoch": 27.077338129496404, "grad_norm": 0.08550507575273514, "learning_rate": 5.382954016551146e-05, "loss": 0.0041, "step": 30110 }, { "epoch": 27.086330935251798, "grad_norm": 0.13661909103393555, "learning_rate": 5.380206270303835e-05, "loss": 0.005, "step": 30120 }, { "epoch": 27.095323741007196, "grad_norm": 0.20765215158462524, "learning_rate": 5.377458408559897e-05, "loss": 0.0064, "step": 30130 }, { "epoch": 27.10431654676259, "grad_norm": 0.23615102469921112, "learning_rate": 5.374710432154061e-05, "loss": 0.0058, "step": 30140 }, { "epoch": 27.113309352517987, "grad_norm": 0.16410745680332184, "learning_rate": 5.3719623419210886e-05, "loss": 0.0048, "step": 30150 }, { "epoch": 27.12230215827338, "grad_norm": 0.22843658924102783, "learning_rate": 5.3692141386957786e-05, "loss": 0.0043, "step": 30160 }, { "epoch": 27.131294964028775, "grad_norm": 0.14093275368213654, "learning_rate": 5.3664658233129616e-05, "loss": 0.007, "step": 30170 }, { "epoch": 27.140287769784173, "grad_norm": 0.1276610791683197, "learning_rate": 5.363717396607504e-05, "loss": 0.0047, "step": 30180 }, { "epoch": 27.149280575539567, "grad_norm": 0.25986576080322266, "learning_rate": 5.360968859414305e-05, "loss": 0.0051, "step": 30190 }, { "epoch": 27.158273381294965, "grad_norm": 0.26002439856529236, "learning_rate": 5.358220212568295e-05, "loss": 0.0044, "step": 30200 }, { "epoch": 27.16726618705036, "grad_norm": 0.3245498239994049, "learning_rate": 5.355471456904444e-05, "loss": 0.0056, "step": 30210 }, { "epoch": 27.176258992805757, "grad_norm": 0.26069098711013794, "learning_rate": 5.3527225932577495e-05, "loss": 0.0107, "step": 30220 }, { "epoch": 27.18525179856115, "grad_norm": 0.1478876918554306, "learning_rate": 5.349973622463246e-05, "loss": 0.0125, "step": 30230 }, { "epoch": 27.194244604316548, "grad_norm": 0.26568931341171265, "learning_rate": 5.3472245453559956e-05, "loss": 0.007, "step": 30240 }, { "epoch": 27.203237410071942, "grad_norm": 0.11631058156490326, "learning_rate": 5.3444753627710955e-05, "loss": 0.0078, "step": 30250 }, { "epoch": 27.21223021582734, "grad_norm": 0.17223802208900452, "learning_rate": 5.341726075543676e-05, "loss": 0.0058, "step": 30260 }, { "epoch": 27.221223021582734, "grad_norm": 0.30734434723854065, "learning_rate": 5.338976684508898e-05, "loss": 0.0048, "step": 30270 }, { "epoch": 27.230215827338128, "grad_norm": 0.23335781693458557, "learning_rate": 5.336227190501953e-05, "loss": 0.0046, "step": 30280 }, { "epoch": 27.239208633093526, "grad_norm": 0.21105371415615082, "learning_rate": 5.3334775943580664e-05, "loss": 0.0067, "step": 30290 }, { "epoch": 27.24820143884892, "grad_norm": 0.18799470365047455, "learning_rate": 5.330727896912491e-05, "loss": 0.0048, "step": 30300 }, { "epoch": 27.257194244604317, "grad_norm": 0.19615738093852997, "learning_rate": 5.327978099000511e-05, "loss": 0.0051, "step": 30310 }, { "epoch": 27.26618705035971, "grad_norm": 0.1909695863723755, "learning_rate": 5.3252282014574465e-05, "loss": 0.0043, "step": 30320 }, { "epoch": 27.27517985611511, "grad_norm": 0.12927283346652985, "learning_rate": 5.322478205118641e-05, "loss": 0.0038, "step": 30330 }, { "epoch": 27.284172661870503, "grad_norm": 0.2557937204837799, "learning_rate": 5.3197281108194704e-05, "loss": 0.0083, "step": 30340 }, { "epoch": 27.2931654676259, "grad_norm": 0.16306301951408386, "learning_rate": 5.316977919395342e-05, "loss": 0.004, "step": 30350 }, { "epoch": 27.302158273381295, "grad_norm": 0.158535897731781, "learning_rate": 5.314227631681691e-05, "loss": 0.0056, "step": 30360 }, { "epoch": 27.31115107913669, "grad_norm": 0.1930062472820282, "learning_rate": 5.311477248513982e-05, "loss": 0.0044, "step": 30370 }, { "epoch": 27.320143884892087, "grad_norm": 0.18092751502990723, "learning_rate": 5.30872677072771e-05, "loss": 0.0075, "step": 30380 }, { "epoch": 27.32913669064748, "grad_norm": 0.18875491619110107, "learning_rate": 5.3059761991583954e-05, "loss": 0.0079, "step": 30390 }, { "epoch": 27.33812949640288, "grad_norm": 0.16270871460437775, "learning_rate": 5.303225534641592e-05, "loss": 0.0062, "step": 30400 }, { "epoch": 27.347122302158272, "grad_norm": 0.15398578345775604, "learning_rate": 5.300474778012875e-05, "loss": 0.0054, "step": 30410 }, { "epoch": 27.35611510791367, "grad_norm": 0.18253716826438904, "learning_rate": 5.297723930107855e-05, "loss": 0.0049, "step": 30420 }, { "epoch": 27.365107913669064, "grad_norm": 0.14208438992500305, "learning_rate": 5.294972991762167e-05, "loss": 0.006, "step": 30430 }, { "epoch": 27.37410071942446, "grad_norm": 0.14202739298343658, "learning_rate": 5.292221963811472e-05, "loss": 0.0044, "step": 30440 }, { "epoch": 27.383093525179856, "grad_norm": 0.13849472999572754, "learning_rate": 5.28947084709146e-05, "loss": 0.0042, "step": 30450 }, { "epoch": 27.392086330935253, "grad_norm": 0.2567499577999115, "learning_rate": 5.2867196424378465e-05, "loss": 0.0064, "step": 30460 }, { "epoch": 27.401079136690647, "grad_norm": 0.2522314488887787, "learning_rate": 5.2839683506863765e-05, "loss": 0.0075, "step": 30470 }, { "epoch": 27.41007194244604, "grad_norm": 0.18928559124469757, "learning_rate": 5.281216972672821e-05, "loss": 0.0063, "step": 30480 }, { "epoch": 27.41906474820144, "grad_norm": 0.222077876329422, "learning_rate": 5.278465509232973e-05, "loss": 0.0097, "step": 30490 }, { "epoch": 27.428057553956833, "grad_norm": 0.17556215822696686, "learning_rate": 5.275713961202655e-05, "loss": 0.0052, "step": 30500 }, { "epoch": 27.43705035971223, "grad_norm": 0.1820942610502243, "learning_rate": 5.2729623294177165e-05, "loss": 0.0041, "step": 30510 }, { "epoch": 27.446043165467625, "grad_norm": 0.2624826729297638, "learning_rate": 5.270210614714028e-05, "loss": 0.0076, "step": 30520 }, { "epoch": 27.455035971223023, "grad_norm": 0.30411890149116516, "learning_rate": 5.267458817927491e-05, "loss": 0.0081, "step": 30530 }, { "epoch": 27.464028776978417, "grad_norm": 0.2059175819158554, "learning_rate": 5.264706939894026e-05, "loss": 0.0053, "step": 30540 }, { "epoch": 27.473021582733814, "grad_norm": 0.1320769190788269, "learning_rate": 5.261954981449584e-05, "loss": 0.0063, "step": 30550 }, { "epoch": 27.48201438848921, "grad_norm": 0.19042743742465973, "learning_rate": 5.2592029434301324e-05, "loss": 0.0056, "step": 30560 }, { "epoch": 27.491007194244606, "grad_norm": 0.25764280557632446, "learning_rate": 5.256450826671672e-05, "loss": 0.0094, "step": 30570 }, { "epoch": 27.5, "grad_norm": 0.19991962611675262, "learning_rate": 5.253698632010221e-05, "loss": 0.0074, "step": 30580 }, { "epoch": 27.508992805755394, "grad_norm": 0.1492474526166916, "learning_rate": 5.2509463602818246e-05, "loss": 0.006, "step": 30590 }, { "epoch": 27.51798561151079, "grad_norm": 0.1639937311410904, "learning_rate": 5.248194012322549e-05, "loss": 0.0057, "step": 30600 }, { "epoch": 27.526978417266186, "grad_norm": 0.17726902663707733, "learning_rate": 5.245441588968486e-05, "loss": 0.0065, "step": 30610 }, { "epoch": 27.535971223021583, "grad_norm": 0.3083212077617645, "learning_rate": 5.242689091055748e-05, "loss": 0.0079, "step": 30620 }, { "epoch": 27.544964028776977, "grad_norm": 0.2017027735710144, "learning_rate": 5.239936519420473e-05, "loss": 0.0099, "step": 30630 }, { "epoch": 27.553956834532375, "grad_norm": 0.2095937728881836, "learning_rate": 5.2371838748988175e-05, "loss": 0.0105, "step": 30640 }, { "epoch": 27.56294964028777, "grad_norm": 0.12496661394834518, "learning_rate": 5.234431158326965e-05, "loss": 0.0089, "step": 30650 }, { "epoch": 27.571942446043167, "grad_norm": 0.10814950615167618, "learning_rate": 5.231678370541115e-05, "loss": 0.0064, "step": 30660 }, { "epoch": 27.58093525179856, "grad_norm": 0.1103668138384819, "learning_rate": 5.228925512377495e-05, "loss": 0.007, "step": 30670 }, { "epoch": 27.58992805755396, "grad_norm": 0.2231205254793167, "learning_rate": 5.2261725846723465e-05, "loss": 0.0059, "step": 30680 }, { "epoch": 27.598920863309353, "grad_norm": 0.221917524933815, "learning_rate": 5.22341958826194e-05, "loss": 0.0069, "step": 30690 }, { "epoch": 27.607913669064747, "grad_norm": 0.1537935882806778, "learning_rate": 5.22066652398256e-05, "loss": 0.0059, "step": 30700 }, { "epoch": 27.616906474820144, "grad_norm": 0.25678229331970215, "learning_rate": 5.2179133926705185e-05, "loss": 0.0059, "step": 30710 }, { "epoch": 27.62589928057554, "grad_norm": 0.14959841966629028, "learning_rate": 5.215160195162141e-05, "loss": 0.0052, "step": 30720 }, { "epoch": 27.634892086330936, "grad_norm": 0.11015471816062927, "learning_rate": 5.212406932293776e-05, "loss": 0.0039, "step": 30730 }, { "epoch": 27.64388489208633, "grad_norm": 0.21927030384540558, "learning_rate": 5.209653604901795e-05, "loss": 0.0052, "step": 30740 }, { "epoch": 27.652877697841728, "grad_norm": 0.24124890565872192, "learning_rate": 5.206900213822584e-05, "loss": 0.005, "step": 30750 }, { "epoch": 27.66187050359712, "grad_norm": 0.0953349843621254, "learning_rate": 5.204146759892551e-05, "loss": 0.0059, "step": 30760 }, { "epoch": 27.67086330935252, "grad_norm": 0.1790902018547058, "learning_rate": 5.2013932439481216e-05, "loss": 0.0052, "step": 30770 }, { "epoch": 27.679856115107913, "grad_norm": 0.28438040614128113, "learning_rate": 5.198639666825743e-05, "loss": 0.0123, "step": 30780 }, { "epoch": 27.68884892086331, "grad_norm": 0.3205413818359375, "learning_rate": 5.195886029361877e-05, "loss": 0.0059, "step": 30790 }, { "epoch": 27.697841726618705, "grad_norm": 0.3112693727016449, "learning_rate": 5.193132332393009e-05, "loss": 0.009, "step": 30800 }, { "epoch": 27.7068345323741, "grad_norm": 0.13597537577152252, "learning_rate": 5.1903785767556376e-05, "loss": 0.0064, "step": 30810 }, { "epoch": 27.715827338129497, "grad_norm": 0.321089506149292, "learning_rate": 5.187624763286282e-05, "loss": 0.0042, "step": 30820 }, { "epoch": 27.72482014388489, "grad_norm": 0.26721686124801636, "learning_rate": 5.184870892821475e-05, "loss": 0.0137, "step": 30830 }, { "epoch": 27.73381294964029, "grad_norm": 0.42228934168815613, "learning_rate": 5.182116966197773e-05, "loss": 0.0064, "step": 30840 }, { "epoch": 27.742805755395683, "grad_norm": 0.1859998255968094, "learning_rate": 5.1793629842517466e-05, "loss": 0.0052, "step": 30850 }, { "epoch": 27.75179856115108, "grad_norm": 0.20162534713745117, "learning_rate": 5.17660894781998e-05, "loss": 0.0058, "step": 30860 }, { "epoch": 27.760791366906474, "grad_norm": 0.18642808496952057, "learning_rate": 5.173854857739079e-05, "loss": 0.006, "step": 30870 }, { "epoch": 27.769784172661872, "grad_norm": 0.24237622320652008, "learning_rate": 5.171100714845661e-05, "loss": 0.0053, "step": 30880 }, { "epoch": 27.778776978417266, "grad_norm": 0.1985703408718109, "learning_rate": 5.1683465199763646e-05, "loss": 0.0094, "step": 30890 }, { "epoch": 27.78776978417266, "grad_norm": 0.19600845873355865, "learning_rate": 5.16559227396784e-05, "loss": 0.0039, "step": 30900 }, { "epoch": 27.796762589928058, "grad_norm": 0.14017055928707123, "learning_rate": 5.1628379776567556e-05, "loss": 0.0051, "step": 30910 }, { "epoch": 27.805755395683452, "grad_norm": 0.18324434757232666, "learning_rate": 5.160083631879792e-05, "loss": 0.0056, "step": 30920 }, { "epoch": 27.81474820143885, "grad_norm": 0.2263595163822174, "learning_rate": 5.1573292374736484e-05, "loss": 0.0052, "step": 30930 }, { "epoch": 27.823741007194243, "grad_norm": 0.19754981994628906, "learning_rate": 5.1545747952750356e-05, "loss": 0.0061, "step": 30940 }, { "epoch": 27.83273381294964, "grad_norm": 0.17268356680870056, "learning_rate": 5.151820306120682e-05, "loss": 0.0053, "step": 30950 }, { "epoch": 27.841726618705035, "grad_norm": 0.1344740241765976, "learning_rate": 5.149065770847328e-05, "loss": 0.0047, "step": 30960 }, { "epoch": 27.850719424460433, "grad_norm": 0.24867713451385498, "learning_rate": 5.1463111902917297e-05, "loss": 0.0069, "step": 30970 }, { "epoch": 27.859712230215827, "grad_norm": 0.23376590013504028, "learning_rate": 5.143556565290654e-05, "loss": 0.0074, "step": 30980 }, { "epoch": 27.868705035971225, "grad_norm": 0.32934239506721497, "learning_rate": 5.140801896680882e-05, "loss": 0.0054, "step": 30990 }, { "epoch": 27.87769784172662, "grad_norm": 0.1378612071275711, "learning_rate": 5.1380471852992144e-05, "loss": 0.0055, "step": 31000 }, { "epoch": 27.886690647482013, "grad_norm": 0.11399900913238525, "learning_rate": 5.135292431982457e-05, "loss": 0.0054, "step": 31010 }, { "epoch": 27.89568345323741, "grad_norm": 0.1871822327375412, "learning_rate": 5.1325376375674294e-05, "loss": 0.0054, "step": 31020 }, { "epoch": 27.904676258992804, "grad_norm": 0.10270263254642487, "learning_rate": 5.129782802890968e-05, "loss": 0.0061, "step": 31030 }, { "epoch": 27.913669064748202, "grad_norm": 0.1420876830816269, "learning_rate": 5.127027928789916e-05, "loss": 0.0048, "step": 31040 }, { "epoch": 27.922661870503596, "grad_norm": 0.19086644053459167, "learning_rate": 5.124273016101135e-05, "loss": 0.0153, "step": 31050 }, { "epoch": 27.931654676258994, "grad_norm": 0.31464383006095886, "learning_rate": 5.121518065661492e-05, "loss": 0.0063, "step": 31060 }, { "epoch": 27.940647482014388, "grad_norm": 0.20238570868968964, "learning_rate": 5.11876307830787e-05, "loss": 0.0062, "step": 31070 }, { "epoch": 27.949640287769785, "grad_norm": 0.2214030772447586, "learning_rate": 5.1160080548771596e-05, "loss": 0.0089, "step": 31080 }, { "epoch": 27.95863309352518, "grad_norm": 0.195675328373909, "learning_rate": 5.1132529962062656e-05, "loss": 0.006, "step": 31090 }, { "epoch": 27.967625899280577, "grad_norm": 0.23319917917251587, "learning_rate": 5.110497903132101e-05, "loss": 0.0053, "step": 31100 }, { "epoch": 27.97661870503597, "grad_norm": 0.19477686285972595, "learning_rate": 5.107742776491592e-05, "loss": 0.0061, "step": 31110 }, { "epoch": 27.985611510791365, "grad_norm": 0.22473591566085815, "learning_rate": 5.104987617121673e-05, "loss": 0.0052, "step": 31120 }, { "epoch": 27.994604316546763, "grad_norm": 0.17756417393684387, "learning_rate": 5.102232425859287e-05, "loss": 0.0056, "step": 31130 }, { "epoch": 28.003597122302157, "grad_norm": 0.22197496891021729, "learning_rate": 5.09947720354139e-05, "loss": 0.0041, "step": 31140 }, { "epoch": 28.012589928057555, "grad_norm": 0.11288553476333618, "learning_rate": 5.096721951004942e-05, "loss": 0.0059, "step": 31150 }, { "epoch": 28.02158273381295, "grad_norm": 0.2375703752040863, "learning_rate": 5.0939666690869227e-05, "loss": 0.0042, "step": 31160 }, { "epoch": 28.030575539568346, "grad_norm": 0.17284518480300903, "learning_rate": 5.0912113586243096e-05, "loss": 0.0045, "step": 31170 }, { "epoch": 28.03956834532374, "grad_norm": 0.22395272552967072, "learning_rate": 5.0884560204540935e-05, "loss": 0.0057, "step": 31180 }, { "epoch": 28.048561151079138, "grad_norm": 0.17225924134254456, "learning_rate": 5.0857006554132736e-05, "loss": 0.0067, "step": 31190 }, { "epoch": 28.057553956834532, "grad_norm": 0.12415143847465515, "learning_rate": 5.0829452643388575e-05, "loss": 0.004, "step": 31200 }, { "epoch": 28.06654676258993, "grad_norm": 0.20623773336410522, "learning_rate": 5.08018984806786e-05, "loss": 0.0041, "step": 31210 }, { "epoch": 28.075539568345324, "grad_norm": 0.17448003590106964, "learning_rate": 5.0774344074373036e-05, "loss": 0.0073, "step": 31220 }, { "epoch": 28.084532374100718, "grad_norm": 0.18682537972927094, "learning_rate": 5.07467894328422e-05, "loss": 0.0042, "step": 31230 }, { "epoch": 28.093525179856115, "grad_norm": 0.26465708017349243, "learning_rate": 5.0719234564456454e-05, "loss": 0.0069, "step": 31240 }, { "epoch": 28.10251798561151, "grad_norm": 0.19847042858600616, "learning_rate": 5.0691679477586216e-05, "loss": 0.0033, "step": 31250 }, { "epoch": 28.111510791366907, "grad_norm": 0.15749581158161163, "learning_rate": 5.0664124180602035e-05, "loss": 0.0046, "step": 31260 }, { "epoch": 28.1205035971223, "grad_norm": 0.19139274954795837, "learning_rate": 5.063656868187447e-05, "loss": 0.0049, "step": 31270 }, { "epoch": 28.1294964028777, "grad_norm": 0.21403415501117706, "learning_rate": 5.060901298977413e-05, "loss": 0.0056, "step": 31280 }, { "epoch": 28.138489208633093, "grad_norm": 0.2577914893627167, "learning_rate": 5.0581457112671725e-05, "loss": 0.013, "step": 31290 }, { "epoch": 28.14748201438849, "grad_norm": 0.11830167472362518, "learning_rate": 5.0553901058938016e-05, "loss": 0.0044, "step": 31300 }, { "epoch": 28.156474820143885, "grad_norm": 0.17668738961219788, "learning_rate": 5.052634483694377e-05, "loss": 0.005, "step": 31310 }, { "epoch": 28.165467625899282, "grad_norm": 0.20581893622875214, "learning_rate": 5.049878845505988e-05, "loss": 0.009, "step": 31320 }, { "epoch": 28.174460431654676, "grad_norm": 0.07917562127113342, "learning_rate": 5.047123192165721e-05, "loss": 0.0049, "step": 31330 }, { "epoch": 28.18345323741007, "grad_norm": 0.1519465297460556, "learning_rate": 5.0443675245106735e-05, "loss": 0.0054, "step": 31340 }, { "epoch": 28.192446043165468, "grad_norm": 0.1944742202758789, "learning_rate": 5.0416118433779426e-05, "loss": 0.0046, "step": 31350 }, { "epoch": 28.201438848920862, "grad_norm": 0.10519389063119888, "learning_rate": 5.038856149604633e-05, "loss": 0.0054, "step": 31360 }, { "epoch": 28.21043165467626, "grad_norm": 0.15022629499435425, "learning_rate": 5.03610044402785e-05, "loss": 0.006, "step": 31370 }, { "epoch": 28.219424460431654, "grad_norm": 0.16383858025074005, "learning_rate": 5.033344727484707e-05, "loss": 0.0076, "step": 31380 }, { "epoch": 28.22841726618705, "grad_norm": 0.23119370639324188, "learning_rate": 5.030589000812315e-05, "loss": 0.0057, "step": 31390 }, { "epoch": 28.237410071942445, "grad_norm": 0.18029092252254486, "learning_rate": 5.027833264847793e-05, "loss": 0.0046, "step": 31400 }, { "epoch": 28.246402877697843, "grad_norm": 0.18747396767139435, "learning_rate": 5.025077520428258e-05, "loss": 0.0091, "step": 31410 }, { "epoch": 28.255395683453237, "grad_norm": 0.14425688982009888, "learning_rate": 5.022321768390837e-05, "loss": 0.0045, "step": 31420 }, { "epoch": 28.264388489208635, "grad_norm": 0.18921248614788055, "learning_rate": 5.0195660095726516e-05, "loss": 0.0074, "step": 31430 }, { "epoch": 28.27338129496403, "grad_norm": 0.08874746412038803, "learning_rate": 5.016810244810829e-05, "loss": 0.0061, "step": 31440 }, { "epoch": 28.282374100719423, "grad_norm": 0.2634638547897339, "learning_rate": 5.0140544749424976e-05, "loss": 0.0044, "step": 31450 }, { "epoch": 28.29136690647482, "grad_norm": 0.29637351632118225, "learning_rate": 5.0112987008047874e-05, "loss": 0.0046, "step": 31460 }, { "epoch": 28.300359712230215, "grad_norm": 0.10054785013198853, "learning_rate": 5.008542923234831e-05, "loss": 0.0061, "step": 31470 }, { "epoch": 28.309352517985612, "grad_norm": 0.16781269013881683, "learning_rate": 5.00578714306976e-05, "loss": 0.0059, "step": 31480 }, { "epoch": 28.318345323741006, "grad_norm": 0.20206138491630554, "learning_rate": 5.0030313611467084e-05, "loss": 0.0047, "step": 31490 }, { "epoch": 28.327338129496404, "grad_norm": 0.15406100451946259, "learning_rate": 5.0002755783028074e-05, "loss": 0.005, "step": 31500 }, { "epoch": 28.336330935251798, "grad_norm": 0.3787955343723297, "learning_rate": 4.997519795375194e-05, "loss": 0.0067, "step": 31510 }, { "epoch": 28.345323741007196, "grad_norm": 0.11084385216236115, "learning_rate": 4.9947640132010016e-05, "loss": 0.0066, "step": 31520 }, { "epoch": 28.35431654676259, "grad_norm": 0.13164304196834564, "learning_rate": 4.9920082326173625e-05, "loss": 0.0061, "step": 31530 }, { "epoch": 28.363309352517987, "grad_norm": 0.23469634354114532, "learning_rate": 4.9892524544614114e-05, "loss": 0.0063, "step": 31540 }, { "epoch": 28.37230215827338, "grad_norm": 0.22910557687282562, "learning_rate": 4.986496679570283e-05, "loss": 0.0056, "step": 31550 }, { "epoch": 28.381294964028775, "grad_norm": 0.13856640458106995, "learning_rate": 4.983740908781105e-05, "loss": 0.0062, "step": 31560 }, { "epoch": 28.390287769784173, "grad_norm": 0.10611327737569809, "learning_rate": 4.9809851429310116e-05, "loss": 0.0038, "step": 31570 }, { "epoch": 28.399280575539567, "grad_norm": 0.1563033014535904, "learning_rate": 4.9782293828571275e-05, "loss": 0.0074, "step": 31580 }, { "epoch": 28.408273381294965, "grad_norm": 0.17360800504684448, "learning_rate": 4.9754736293965846e-05, "loss": 0.0053, "step": 31590 }, { "epoch": 28.41726618705036, "grad_norm": 0.21905742585659027, "learning_rate": 4.972717883386502e-05, "loss": 0.0074, "step": 31600 }, { "epoch": 28.426258992805757, "grad_norm": 0.29826879501342773, "learning_rate": 4.9699621456640075e-05, "loss": 0.0059, "step": 31610 }, { "epoch": 28.43525179856115, "grad_norm": 0.1841299682855606, "learning_rate": 4.9672064170662214e-05, "loss": 0.0049, "step": 31620 }, { "epoch": 28.444244604316548, "grad_norm": 0.20587560534477234, "learning_rate": 4.9644506984302583e-05, "loss": 0.0099, "step": 31630 }, { "epoch": 28.453237410071942, "grad_norm": 0.23623026907444, "learning_rate": 4.9616949905932356e-05, "loss": 0.0057, "step": 31640 }, { "epoch": 28.46223021582734, "grad_norm": 0.19059062004089355, "learning_rate": 4.9589392943922615e-05, "loss": 0.0062, "step": 31650 }, { "epoch": 28.471223021582734, "grad_norm": 0.22814607620239258, "learning_rate": 4.956183610664447e-05, "loss": 0.0071, "step": 31660 }, { "epoch": 28.480215827338128, "grad_norm": 0.1484614908695221, "learning_rate": 4.9534279402468945e-05, "loss": 0.0045, "step": 31670 }, { "epoch": 28.489208633093526, "grad_norm": 0.26007208228111267, "learning_rate": 4.9506722839767036e-05, "loss": 0.0051, "step": 31680 }, { "epoch": 28.49820143884892, "grad_norm": 0.19316205382347107, "learning_rate": 4.947916642690972e-05, "loss": 0.0123, "step": 31690 }, { "epoch": 28.507194244604317, "grad_norm": 0.17836593091487885, "learning_rate": 4.9451610172267874e-05, "loss": 0.0032, "step": 31700 }, { "epoch": 28.51618705035971, "grad_norm": 0.26282694935798645, "learning_rate": 4.9424054084212376e-05, "loss": 0.0053, "step": 31710 }, { "epoch": 28.52517985611511, "grad_norm": 0.1641293168067932, "learning_rate": 4.939649817111407e-05, "loss": 0.0058, "step": 31720 }, { "epoch": 28.534172661870503, "grad_norm": 0.23528896272182465, "learning_rate": 4.936894244134365e-05, "loss": 0.0054, "step": 31730 }, { "epoch": 28.5431654676259, "grad_norm": 0.19803857803344727, "learning_rate": 4.9341386903271886e-05, "loss": 0.0045, "step": 31740 }, { "epoch": 28.552158273381295, "grad_norm": 0.21679013967514038, "learning_rate": 4.931383156526936e-05, "loss": 0.011, "step": 31750 }, { "epoch": 28.56115107913669, "grad_norm": 0.16181211173534393, "learning_rate": 4.92862764357067e-05, "loss": 0.007, "step": 31760 }, { "epoch": 28.570143884892087, "grad_norm": 0.10650932788848877, "learning_rate": 4.925872152295443e-05, "loss": 0.0049, "step": 31770 }, { "epoch": 28.57913669064748, "grad_norm": 0.18703554570674896, "learning_rate": 4.923116683538296e-05, "loss": 0.0056, "step": 31780 }, { "epoch": 28.58812949640288, "grad_norm": 0.1390378177165985, "learning_rate": 4.920361238136273e-05, "loss": 0.0063, "step": 31790 }, { "epoch": 28.597122302158272, "grad_norm": 0.17824430763721466, "learning_rate": 4.9176058169264014e-05, "loss": 0.0067, "step": 31800 }, { "epoch": 28.60611510791367, "grad_norm": 0.1482369303703308, "learning_rate": 4.9148504207457074e-05, "loss": 0.0078, "step": 31810 }, { "epoch": 28.615107913669064, "grad_norm": 0.21259815990924835, "learning_rate": 4.912095050431208e-05, "loss": 0.0062, "step": 31820 }, { "epoch": 28.62410071942446, "grad_norm": 0.11528604477643967, "learning_rate": 4.909339706819911e-05, "loss": 0.0059, "step": 31830 }, { "epoch": 28.633093525179856, "grad_norm": 0.161492720246315, "learning_rate": 4.906584390748819e-05, "loss": 0.0058, "step": 31840 }, { "epoch": 28.642086330935253, "grad_norm": 0.1862720400094986, "learning_rate": 4.9038291030549195e-05, "loss": 0.0142, "step": 31850 }, { "epoch": 28.651079136690647, "grad_norm": 0.14332464337348938, "learning_rate": 4.9010738445751995e-05, "loss": 0.006, "step": 31860 }, { "epoch": 28.66007194244604, "grad_norm": 0.13267390429973602, "learning_rate": 4.8983186161466364e-05, "loss": 0.004, "step": 31870 }, { "epoch": 28.66906474820144, "grad_norm": 0.22453495860099792, "learning_rate": 4.89556341860619e-05, "loss": 0.0059, "step": 31880 }, { "epoch": 28.678057553956833, "grad_norm": 0.21568310260772705, "learning_rate": 4.892808252790822e-05, "loss": 0.0065, "step": 31890 }, { "epoch": 28.68705035971223, "grad_norm": 0.42328500747680664, "learning_rate": 4.890053119537475e-05, "loss": 0.0056, "step": 31900 }, { "epoch": 28.696043165467625, "grad_norm": 0.09949331730604172, "learning_rate": 4.887298019683087e-05, "loss": 0.0077, "step": 31910 }, { "epoch": 28.705035971223023, "grad_norm": 0.16000555455684662, "learning_rate": 4.884542954064587e-05, "loss": 0.0081, "step": 31920 }, { "epoch": 28.714028776978417, "grad_norm": 0.20759724080562592, "learning_rate": 4.881787923518887e-05, "loss": 0.0069, "step": 31930 }, { "epoch": 28.723021582733814, "grad_norm": 0.20439106225967407, "learning_rate": 4.879032928882896e-05, "loss": 0.0044, "step": 31940 }, { "epoch": 28.73201438848921, "grad_norm": 0.15287595987319946, "learning_rate": 4.876277970993505e-05, "loss": 0.0042, "step": 31950 }, { "epoch": 28.741007194244606, "grad_norm": 0.12050983309745789, "learning_rate": 4.873523050687602e-05, "loss": 0.0074, "step": 31960 }, { "epoch": 28.75, "grad_norm": 0.18182194232940674, "learning_rate": 4.870768168802056e-05, "loss": 0.0047, "step": 31970 }, { "epoch": 28.758992805755394, "grad_norm": 0.13988421857357025, "learning_rate": 4.868013326173728e-05, "loss": 0.0051, "step": 31980 }, { "epoch": 28.76798561151079, "grad_norm": 0.2210122048854828, "learning_rate": 4.865258523639468e-05, "loss": 0.0051, "step": 31990 }, { "epoch": 28.776978417266186, "grad_norm": 0.18500378727912903, "learning_rate": 4.862503762036109e-05, "loss": 0.0044, "step": 32000 }, { "epoch": 28.785971223021583, "grad_norm": 0.14010494947433472, "learning_rate": 4.859749042200478e-05, "loss": 0.0042, "step": 32010 }, { "epoch": 28.794964028776977, "grad_norm": 0.12609319388866425, "learning_rate": 4.856994364969384e-05, "loss": 0.0044, "step": 32020 }, { "epoch": 28.803956834532375, "grad_norm": 0.09950044751167297, "learning_rate": 4.854239731179625e-05, "loss": 0.0028, "step": 32030 }, { "epoch": 28.81294964028777, "grad_norm": 0.13975924253463745, "learning_rate": 4.85148514166799e-05, "loss": 0.0038, "step": 32040 }, { "epoch": 28.821942446043167, "grad_norm": 0.1382826566696167, "learning_rate": 4.8487305972712456e-05, "loss": 0.0043, "step": 32050 }, { "epoch": 28.83093525179856, "grad_norm": 0.18239620327949524, "learning_rate": 4.8459760988261526e-05, "loss": 0.0037, "step": 32060 }, { "epoch": 28.83992805755396, "grad_norm": 0.15082411468029022, "learning_rate": 4.843221647169453e-05, "loss": 0.0058, "step": 32070 }, { "epoch": 28.848920863309353, "grad_norm": 0.17206794023513794, "learning_rate": 4.840467243137878e-05, "loss": 0.0057, "step": 32080 }, { "epoch": 28.857913669064747, "grad_norm": 0.17859408259391785, "learning_rate": 4.837712887568143e-05, "loss": 0.0052, "step": 32090 }, { "epoch": 28.866906474820144, "grad_norm": 0.22073803842067719, "learning_rate": 4.8349585812969464e-05, "loss": 0.0052, "step": 32100 }, { "epoch": 28.87589928057554, "grad_norm": 0.11155100911855698, "learning_rate": 4.8322043251609775e-05, "loss": 0.0046, "step": 32110 }, { "epoch": 28.884892086330936, "grad_norm": 0.15577131509780884, "learning_rate": 4.8294501199969015e-05, "loss": 0.0045, "step": 32120 }, { "epoch": 28.89388489208633, "grad_norm": 0.17257854342460632, "learning_rate": 4.826695966641376e-05, "loss": 0.0036, "step": 32130 }, { "epoch": 28.902877697841728, "grad_norm": 0.31660565733909607, "learning_rate": 4.823941865931043e-05, "loss": 0.0077, "step": 32140 }, { "epoch": 28.91187050359712, "grad_norm": 0.28114598989486694, "learning_rate": 4.82118781870252e-05, "loss": 0.0064, "step": 32150 }, { "epoch": 28.92086330935252, "grad_norm": 0.16497904062271118, "learning_rate": 4.8184338257924185e-05, "loss": 0.0054, "step": 32160 }, { "epoch": 28.929856115107913, "grad_norm": 0.20322714745998383, "learning_rate": 4.815679888037324e-05, "loss": 0.0068, "step": 32170 }, { "epoch": 28.93884892086331, "grad_norm": 0.209806427359581, "learning_rate": 4.8129260062738135e-05, "loss": 0.0038, "step": 32180 }, { "epoch": 28.947841726618705, "grad_norm": 0.29533621668815613, "learning_rate": 4.810172181338445e-05, "loss": 0.007, "step": 32190 }, { "epoch": 28.9568345323741, "grad_norm": 0.1823563426733017, "learning_rate": 4.807418414067753e-05, "loss": 0.0033, "step": 32200 }, { "epoch": 28.965827338129497, "grad_norm": 0.11653761565685272, "learning_rate": 4.804664705298264e-05, "loss": 0.008, "step": 32210 }, { "epoch": 28.97482014388489, "grad_norm": 0.20992448925971985, "learning_rate": 4.80191105586648e-05, "loss": 0.0039, "step": 32220 }, { "epoch": 28.98381294964029, "grad_norm": 0.1842779815196991, "learning_rate": 4.799157466608886e-05, "loss": 0.0035, "step": 32230 }, { "epoch": 28.992805755395683, "grad_norm": 0.125957190990448, "learning_rate": 4.796403938361951e-05, "loss": 0.0051, "step": 32240 }, { "epoch": 29.00179856115108, "grad_norm": 0.18266314268112183, "learning_rate": 4.793650471962123e-05, "loss": 0.0035, "step": 32250 }, { "epoch": 29.010791366906474, "grad_norm": 0.18821614980697632, "learning_rate": 4.790897068245835e-05, "loss": 0.0046, "step": 32260 }, { "epoch": 29.019784172661872, "grad_norm": 0.17709213495254517, "learning_rate": 4.7881437280494954e-05, "loss": 0.0068, "step": 32270 }, { "epoch": 29.028776978417266, "grad_norm": 0.2671731412410736, "learning_rate": 4.7853904522094965e-05, "loss": 0.0059, "step": 32280 }, { "epoch": 29.037769784172664, "grad_norm": 0.1162317767739296, "learning_rate": 4.782637241562215e-05, "loss": 0.0067, "step": 32290 }, { "epoch": 29.046762589928058, "grad_norm": 0.20157171785831451, "learning_rate": 4.779884096943997e-05, "loss": 0.0051, "step": 32300 }, { "epoch": 29.055755395683452, "grad_norm": 0.24606147408485413, "learning_rate": 4.777131019191182e-05, "loss": 0.01, "step": 32310 }, { "epoch": 29.06474820143885, "grad_norm": 0.1870049089193344, "learning_rate": 4.774378009140076e-05, "loss": 0.0054, "step": 32320 }, { "epoch": 29.073741007194243, "grad_norm": 0.17507484555244446, "learning_rate": 4.7716250676269735e-05, "loss": 0.005, "step": 32330 }, { "epoch": 29.08273381294964, "grad_norm": 0.13643936812877655, "learning_rate": 4.7688721954881485e-05, "loss": 0.0044, "step": 32340 }, { "epoch": 29.091726618705035, "grad_norm": 0.09104447811841965, "learning_rate": 4.7661193935598446e-05, "loss": 0.0097, "step": 32350 }, { "epoch": 29.100719424460433, "grad_norm": 0.14108559489250183, "learning_rate": 4.763366662678296e-05, "loss": 0.0065, "step": 32360 }, { "epoch": 29.109712230215827, "grad_norm": 0.19094590842723846, "learning_rate": 4.7606140036797064e-05, "loss": 0.0058, "step": 32370 }, { "epoch": 29.118705035971225, "grad_norm": 0.16435761749744415, "learning_rate": 4.7578614174002614e-05, "loss": 0.004, "step": 32380 }, { "epoch": 29.12769784172662, "grad_norm": 0.19703495502471924, "learning_rate": 4.755108904676125e-05, "loss": 0.0059, "step": 32390 }, { "epoch": 29.136690647482013, "grad_norm": 0.214509516954422, "learning_rate": 4.752356466343436e-05, "loss": 0.0087, "step": 32400 }, { "epoch": 29.14568345323741, "grad_norm": 0.2086803913116455, "learning_rate": 4.7496041032383174e-05, "loss": 0.0038, "step": 32410 }, { "epoch": 29.154676258992804, "grad_norm": 0.14256009459495544, "learning_rate": 4.746851816196858e-05, "loss": 0.0041, "step": 32420 }, { "epoch": 29.163669064748202, "grad_norm": 0.16262219846248627, "learning_rate": 4.744099606055135e-05, "loss": 0.0049, "step": 32430 }, { "epoch": 29.172661870503596, "grad_norm": 0.18050888180732727, "learning_rate": 4.741347473649193e-05, "loss": 0.0067, "step": 32440 }, { "epoch": 29.181654676258994, "grad_norm": 0.1325003057718277, "learning_rate": 4.738595419815058e-05, "loss": 0.0041, "step": 32450 }, { "epoch": 29.190647482014388, "grad_norm": 0.26245972514152527, "learning_rate": 4.7358434453887365e-05, "loss": 0.0072, "step": 32460 }, { "epoch": 29.199640287769785, "grad_norm": 0.17075391113758087, "learning_rate": 4.7330915512061976e-05, "loss": 0.0052, "step": 32470 }, { "epoch": 29.20863309352518, "grad_norm": 0.11626191437244415, "learning_rate": 4.730339738103402e-05, "loss": 0.0054, "step": 32480 }, { "epoch": 29.217625899280577, "grad_norm": 0.21256697177886963, "learning_rate": 4.727588006916271e-05, "loss": 0.0069, "step": 32490 }, { "epoch": 29.22661870503597, "grad_norm": 0.13188357651233673, "learning_rate": 4.724836358480711e-05, "loss": 0.0037, "step": 32500 }, { "epoch": 29.235611510791365, "grad_norm": 0.20884890854358673, "learning_rate": 4.722084793632601e-05, "loss": 0.0061, "step": 32510 }, { "epoch": 29.244604316546763, "grad_norm": 0.20489652454853058, "learning_rate": 4.719333313207792e-05, "loss": 0.0081, "step": 32520 }, { "epoch": 29.253597122302157, "grad_norm": 0.212389275431633, "learning_rate": 4.716581918042114e-05, "loss": 0.0061, "step": 32530 }, { "epoch": 29.262589928057555, "grad_norm": 0.10842112451791763, "learning_rate": 4.7138306089713636e-05, "loss": 0.0041, "step": 32540 }, { "epoch": 29.27158273381295, "grad_norm": 0.12752996385097504, "learning_rate": 4.7110793868313183e-05, "loss": 0.004, "step": 32550 }, { "epoch": 29.280575539568346, "grad_norm": 0.2246636152267456, "learning_rate": 4.708328252457729e-05, "loss": 0.0061, "step": 32560 }, { "epoch": 29.28956834532374, "grad_norm": 0.14457261562347412, "learning_rate": 4.7055772066863135e-05, "loss": 0.0104, "step": 32570 }, { "epoch": 29.298561151079138, "grad_norm": 0.11736220866441727, "learning_rate": 4.702826250352771e-05, "loss": 0.0035, "step": 32580 }, { "epoch": 29.307553956834532, "grad_norm": 0.11742094159126282, "learning_rate": 4.7000753842927653e-05, "loss": 0.0042, "step": 32590 }, { "epoch": 29.31654676258993, "grad_norm": 0.1922319233417511, "learning_rate": 4.6973246093419384e-05, "loss": 0.0056, "step": 32600 }, { "epoch": 29.325539568345324, "grad_norm": 0.21799317002296448, "learning_rate": 4.694573926335906e-05, "loss": 0.0068, "step": 32610 }, { "epoch": 29.334532374100718, "grad_norm": 0.17602431774139404, "learning_rate": 4.6918233361102476e-05, "loss": 0.0044, "step": 32620 }, { "epoch": 29.343525179856115, "grad_norm": 0.2568187117576599, "learning_rate": 4.689072839500525e-05, "loss": 0.0058, "step": 32630 }, { "epoch": 29.35251798561151, "grad_norm": 0.20795442163944244, "learning_rate": 4.6863224373422635e-05, "loss": 0.0043, "step": 32640 }, { "epoch": 29.361510791366907, "grad_norm": 0.0996008962392807, "learning_rate": 4.683572130470962e-05, "loss": 0.0034, "step": 32650 }, { "epoch": 29.3705035971223, "grad_norm": 0.19925987720489502, "learning_rate": 4.680821919722094e-05, "loss": 0.0078, "step": 32660 }, { "epoch": 29.3794964028777, "grad_norm": 0.1448366343975067, "learning_rate": 4.6780718059310975e-05, "loss": 0.0053, "step": 32670 }, { "epoch": 29.388489208633093, "grad_norm": 0.1326507329940796, "learning_rate": 4.675321789933389e-05, "loss": 0.0035, "step": 32680 }, { "epoch": 29.39748201438849, "grad_norm": 0.17664183676242828, "learning_rate": 4.6725718725643464e-05, "loss": 0.0032, "step": 32690 }, { "epoch": 29.406474820143885, "grad_norm": 0.17981165647506714, "learning_rate": 4.669822054659323e-05, "loss": 0.0078, "step": 32700 }, { "epoch": 29.415467625899282, "grad_norm": 0.13339915871620178, "learning_rate": 4.667072337053644e-05, "loss": 0.0062, "step": 32710 }, { "epoch": 29.424460431654676, "grad_norm": 0.14696918427944183, "learning_rate": 4.6643227205825965e-05, "loss": 0.006, "step": 32720 }, { "epoch": 29.43345323741007, "grad_norm": 0.19443069398403168, "learning_rate": 4.6615732060814454e-05, "loss": 0.0052, "step": 32730 }, { "epoch": 29.442446043165468, "grad_norm": 0.11210601776838303, "learning_rate": 4.658823794385417e-05, "loss": 0.0047, "step": 32740 }, { "epoch": 29.451438848920862, "grad_norm": 0.16840514540672302, "learning_rate": 4.6560744863297115e-05, "loss": 0.0064, "step": 32750 }, { "epoch": 29.46043165467626, "grad_norm": 0.180673748254776, "learning_rate": 4.653325282749498e-05, "loss": 0.0062, "step": 32760 }, { "epoch": 29.469424460431654, "grad_norm": 0.22107456624507904, "learning_rate": 4.6505761844799075e-05, "loss": 0.0063, "step": 32770 }, { "epoch": 29.47841726618705, "grad_norm": 0.1775764524936676, "learning_rate": 4.647827192356048e-05, "loss": 0.0049, "step": 32780 }, { "epoch": 29.487410071942445, "grad_norm": 0.1605149507522583, "learning_rate": 4.645078307212989e-05, "loss": 0.0054, "step": 32790 }, { "epoch": 29.496402877697843, "grad_norm": 0.16662359237670898, "learning_rate": 4.642329529885768e-05, "loss": 0.0045, "step": 32800 }, { "epoch": 29.505395683453237, "grad_norm": 0.4378443658351898, "learning_rate": 4.639580861209393e-05, "loss": 0.0056, "step": 32810 }, { "epoch": 29.514388489208635, "grad_norm": 0.21728195250034332, "learning_rate": 4.636832302018835e-05, "loss": 0.0073, "step": 32820 }, { "epoch": 29.52338129496403, "grad_norm": 0.11584041267633438, "learning_rate": 4.6340838531490365e-05, "loss": 0.0037, "step": 32830 }, { "epoch": 29.532374100719423, "grad_norm": 0.17728064954280853, "learning_rate": 4.6313355154349e-05, "loss": 0.0046, "step": 32840 }, { "epoch": 29.54136690647482, "grad_norm": 0.1283402293920517, "learning_rate": 4.6285872897113025e-05, "loss": 0.0039, "step": 32850 }, { "epoch": 29.550359712230215, "grad_norm": 0.13165591657161713, "learning_rate": 4.625839176813077e-05, "loss": 0.0036, "step": 32860 }, { "epoch": 29.559352517985612, "grad_norm": 0.2359125316143036, "learning_rate": 4.623091177575031e-05, "loss": 0.0041, "step": 32870 }, { "epoch": 29.568345323741006, "grad_norm": 0.1691935509443283, "learning_rate": 4.620343292831936e-05, "loss": 0.0042, "step": 32880 }, { "epoch": 29.577338129496404, "grad_norm": 0.22839972376823425, "learning_rate": 4.6175955234185206e-05, "loss": 0.0053, "step": 32890 }, { "epoch": 29.586330935251798, "grad_norm": 0.17575302720069885, "learning_rate": 4.614847870169492e-05, "loss": 0.0032, "step": 32900 }, { "epoch": 29.595323741007196, "grad_norm": 0.11189423501491547, "learning_rate": 4.612100333919509e-05, "loss": 0.0045, "step": 32910 }, { "epoch": 29.60431654676259, "grad_norm": 0.15457582473754883, "learning_rate": 4.609352915503202e-05, "loss": 0.0052, "step": 32920 }, { "epoch": 29.613309352517987, "grad_norm": 0.16052821278572083, "learning_rate": 4.606605615755166e-05, "loss": 0.0038, "step": 32930 }, { "epoch": 29.62230215827338, "grad_norm": 0.1839350312948227, "learning_rate": 4.6038584355099576e-05, "loss": 0.0075, "step": 32940 }, { "epoch": 29.631294964028775, "grad_norm": 0.16705173254013062, "learning_rate": 4.6011113756020964e-05, "loss": 0.0046, "step": 32950 }, { "epoch": 29.640287769784173, "grad_norm": 0.15957044064998627, "learning_rate": 4.598364436866066e-05, "loss": 0.0071, "step": 32960 }, { "epoch": 29.649280575539567, "grad_norm": 0.21996454894542694, "learning_rate": 4.595617620136316e-05, "loss": 0.0063, "step": 32970 }, { "epoch": 29.658273381294965, "grad_norm": 0.16286534070968628, "learning_rate": 4.592870926247257e-05, "loss": 0.0066, "step": 32980 }, { "epoch": 29.66726618705036, "grad_norm": 0.2419711947441101, "learning_rate": 4.5901243560332594e-05, "loss": 0.0062, "step": 32990 }, { "epoch": 29.676258992805757, "grad_norm": 0.2485160380601883, "learning_rate": 4.587377910328662e-05, "loss": 0.0049, "step": 33000 }, { "epoch": 29.68525179856115, "grad_norm": 0.1292918175458908, "learning_rate": 4.5846315899677586e-05, "loss": 0.0035, "step": 33010 }, { "epoch": 29.694244604316548, "grad_norm": 0.3768192529678345, "learning_rate": 4.5818853957848114e-05, "loss": 0.0068, "step": 33020 }, { "epoch": 29.703237410071942, "grad_norm": 0.2553615868091583, "learning_rate": 4.579139328614043e-05, "loss": 0.0046, "step": 33030 }, { "epoch": 29.71223021582734, "grad_norm": 0.18640419840812683, "learning_rate": 4.576393389289633e-05, "loss": 0.0051, "step": 33040 }, { "epoch": 29.721223021582734, "grad_norm": 0.21267437934875488, "learning_rate": 4.573647578645728e-05, "loss": 0.0073, "step": 33050 }, { "epoch": 29.730215827338128, "grad_norm": 0.12486056983470917, "learning_rate": 4.57090189751643e-05, "loss": 0.0033, "step": 33060 }, { "epoch": 29.739208633093526, "grad_norm": 0.15794901549816132, "learning_rate": 4.568156346735806e-05, "loss": 0.0053, "step": 33070 }, { "epoch": 29.74820143884892, "grad_norm": 0.1566610485315323, "learning_rate": 4.565410927137882e-05, "loss": 0.0042, "step": 33080 }, { "epoch": 29.757194244604317, "grad_norm": 0.1427113264799118, "learning_rate": 4.562665639556644e-05, "loss": 0.0067, "step": 33090 }, { "epoch": 29.76618705035971, "grad_norm": 0.165365070104599, "learning_rate": 4.559920484826037e-05, "loss": 0.0048, "step": 33100 }, { "epoch": 29.77517985611511, "grad_norm": 0.21694310009479523, "learning_rate": 4.5571754637799665e-05, "loss": 0.0055, "step": 33110 }, { "epoch": 29.784172661870503, "grad_norm": 0.2768309414386749, "learning_rate": 4.554430577252298e-05, "loss": 0.0045, "step": 33120 }, { "epoch": 29.7931654676259, "grad_norm": 0.1353670209646225, "learning_rate": 4.551685826076858e-05, "loss": 0.0045, "step": 33130 }, { "epoch": 29.802158273381295, "grad_norm": 0.15782225131988525, "learning_rate": 4.5489412110874246e-05, "loss": 0.0053, "step": 33140 }, { "epoch": 29.81115107913669, "grad_norm": 0.1861574500799179, "learning_rate": 4.5461967331177444e-05, "loss": 0.006, "step": 33150 }, { "epoch": 29.820143884892087, "grad_norm": 0.24104353785514832, "learning_rate": 4.5434523930015115e-05, "loss": 0.007, "step": 33160 }, { "epoch": 29.82913669064748, "grad_norm": 0.17586538195610046, "learning_rate": 4.540708191572388e-05, "loss": 0.0059, "step": 33170 }, { "epoch": 29.83812949640288, "grad_norm": 0.3169194757938385, "learning_rate": 4.537964129663991e-05, "loss": 0.0036, "step": 33180 }, { "epoch": 29.847122302158272, "grad_norm": 0.21527649462223053, "learning_rate": 4.535220208109889e-05, "loss": 0.0068, "step": 33190 }, { "epoch": 29.85611510791367, "grad_norm": 0.21055766940116882, "learning_rate": 4.5324764277436194e-05, "loss": 0.0035, "step": 33200 }, { "epoch": 29.865107913669064, "grad_norm": 0.19242341816425323, "learning_rate": 4.529732789398664e-05, "loss": 0.0053, "step": 33210 }, { "epoch": 29.87410071942446, "grad_norm": 0.14981211721897125, "learning_rate": 4.526989293908472e-05, "loss": 0.0067, "step": 33220 }, { "epoch": 29.883093525179856, "grad_norm": 0.16097621619701385, "learning_rate": 4.524245942106442e-05, "loss": 0.0047, "step": 33230 }, { "epoch": 29.892086330935253, "grad_norm": 0.19784963130950928, "learning_rate": 4.5215027348259345e-05, "loss": 0.0057, "step": 33240 }, { "epoch": 29.901079136690647, "grad_norm": 0.11232686787843704, "learning_rate": 4.5187596729002616e-05, "loss": 0.0058, "step": 33250 }, { "epoch": 29.91007194244604, "grad_norm": 0.22047820687294006, "learning_rate": 4.516016757162693e-05, "loss": 0.0047, "step": 33260 }, { "epoch": 29.91906474820144, "grad_norm": 0.06371966004371643, "learning_rate": 4.513273988446457e-05, "loss": 0.005, "step": 33270 }, { "epoch": 29.928057553956833, "grad_norm": 0.12989617884159088, "learning_rate": 4.5105313675847296e-05, "loss": 0.0036, "step": 33280 }, { "epoch": 29.93705035971223, "grad_norm": 0.11137615889310837, "learning_rate": 4.5077888954106495e-05, "loss": 0.0051, "step": 33290 }, { "epoch": 29.946043165467625, "grad_norm": 0.19087713956832886, "learning_rate": 4.505046572757309e-05, "loss": 0.0071, "step": 33300 }, { "epoch": 29.955035971223023, "grad_norm": 0.22088702023029327, "learning_rate": 4.502304400457749e-05, "loss": 0.0072, "step": 33310 }, { "epoch": 29.964028776978417, "grad_norm": 0.28209057450294495, "learning_rate": 4.499562379344973e-05, "loss": 0.0037, "step": 33320 }, { "epoch": 29.973021582733814, "grad_norm": 0.21331152319908142, "learning_rate": 4.4968205102519306e-05, "loss": 0.0057, "step": 33330 }, { "epoch": 29.98201438848921, "grad_norm": 0.24355323612689972, "learning_rate": 4.494078794011532e-05, "loss": 0.0057, "step": 33340 }, { "epoch": 29.991007194244606, "grad_norm": 0.19251656532287598, "learning_rate": 4.491337231456639e-05, "loss": 0.0044, "step": 33350 }, { "epoch": 30.0, "grad_norm": 0.13664361834526062, "learning_rate": 4.4885958234200634e-05, "loss": 0.0032, "step": 33360 }, { "epoch": 30.008992805755394, "grad_norm": 0.08084183186292648, "learning_rate": 4.485854570734575e-05, "loss": 0.0039, "step": 33370 }, { "epoch": 30.01798561151079, "grad_norm": 0.27529552578926086, "learning_rate": 4.483113474232891e-05, "loss": 0.005, "step": 33380 }, { "epoch": 30.026978417266186, "grad_norm": 0.13741998374462128, "learning_rate": 4.480372534747688e-05, "loss": 0.005, "step": 33390 }, { "epoch": 30.035971223021583, "grad_norm": 0.2207656353712082, "learning_rate": 4.477631753111588e-05, "loss": 0.0096, "step": 33400 }, { "epoch": 30.044964028776977, "grad_norm": 0.1512993574142456, "learning_rate": 4.4748911301571686e-05, "loss": 0.0041, "step": 33410 }, { "epoch": 30.053956834532375, "grad_norm": 0.1419074684381485, "learning_rate": 4.472150666716961e-05, "loss": 0.0038, "step": 33420 }, { "epoch": 30.06294964028777, "grad_norm": 0.14277754724025726, "learning_rate": 4.469410363623442e-05, "loss": 0.0031, "step": 33430 }, { "epoch": 30.071942446043167, "grad_norm": 0.1578427106142044, "learning_rate": 4.466670221709044e-05, "loss": 0.0105, "step": 33440 }, { "epoch": 30.08093525179856, "grad_norm": 0.12627556920051575, "learning_rate": 4.463930241806154e-05, "loss": 0.0048, "step": 33450 }, { "epoch": 30.08992805755396, "grad_norm": 0.10083812475204468, "learning_rate": 4.4611904247471006e-05, "loss": 0.0028, "step": 33460 }, { "epoch": 30.098920863309353, "grad_norm": 0.21020027995109558, "learning_rate": 4.458450771364171e-05, "loss": 0.0041, "step": 33470 }, { "epoch": 30.107913669064747, "grad_norm": 0.08351470530033112, "learning_rate": 4.4557112824895965e-05, "loss": 0.0034, "step": 33480 }, { "epoch": 30.116906474820144, "grad_norm": 0.15337644517421722, "learning_rate": 4.452971958955563e-05, "loss": 0.0043, "step": 33490 }, { "epoch": 30.12589928057554, "grad_norm": 0.11897151917219162, "learning_rate": 4.450232801594208e-05, "loss": 0.0037, "step": 33500 }, { "epoch": 30.134892086330936, "grad_norm": 0.20995691418647766, "learning_rate": 4.447493811237609e-05, "loss": 0.0046, "step": 33510 }, { "epoch": 30.14388489208633, "grad_norm": 0.22867117822170258, "learning_rate": 4.444754988717804e-05, "loss": 0.0052, "step": 33520 }, { "epoch": 30.152877697841728, "grad_norm": 0.19509398937225342, "learning_rate": 4.442016334866771e-05, "loss": 0.0086, "step": 33530 }, { "epoch": 30.16187050359712, "grad_norm": 0.13119910657405853, "learning_rate": 4.4392778505164445e-05, "loss": 0.0033, "step": 33540 }, { "epoch": 30.17086330935252, "grad_norm": 0.2875320613384247, "learning_rate": 4.436539536498702e-05, "loss": 0.0035, "step": 33550 }, { "epoch": 30.179856115107913, "grad_norm": 0.28838294744491577, "learning_rate": 4.433801393645369e-05, "loss": 0.0054, "step": 33560 }, { "epoch": 30.18884892086331, "grad_norm": 0.3710838556289673, "learning_rate": 4.431063422788226e-05, "loss": 0.0059, "step": 33570 }, { "epoch": 30.197841726618705, "grad_norm": 0.2383313775062561, "learning_rate": 4.428325624758991e-05, "loss": 0.0044, "step": 33580 }, { "epoch": 30.2068345323741, "grad_norm": 0.2259088009595871, "learning_rate": 4.4255880003893366e-05, "loss": 0.0065, "step": 33590 }, { "epoch": 30.215827338129497, "grad_norm": 0.16315729916095734, "learning_rate": 4.422850550510884e-05, "loss": 0.0037, "step": 33600 }, { "epoch": 30.22482014388489, "grad_norm": 0.17947585880756378, "learning_rate": 4.4201132759551934e-05, "loss": 0.0067, "step": 33610 }, { "epoch": 30.23381294964029, "grad_norm": 0.1699284166097641, "learning_rate": 4.4173761775537804e-05, "loss": 0.0082, "step": 33620 }, { "epoch": 30.242805755395683, "grad_norm": 0.15284258127212524, "learning_rate": 4.414639256138099e-05, "loss": 0.0052, "step": 33630 }, { "epoch": 30.25179856115108, "grad_norm": 0.16455480456352234, "learning_rate": 4.411902512539557e-05, "loss": 0.0043, "step": 33640 }, { "epoch": 30.260791366906474, "grad_norm": 0.23789061605930328, "learning_rate": 4.4091659475895044e-05, "loss": 0.004, "step": 33650 }, { "epoch": 30.269784172661872, "grad_norm": 0.21209236979484558, "learning_rate": 4.406429562119235e-05, "loss": 0.0054, "step": 33660 }, { "epoch": 30.278776978417266, "grad_norm": 0.22355954349040985, "learning_rate": 4.4036933569599945e-05, "loss": 0.0056, "step": 33670 }, { "epoch": 30.28776978417266, "grad_norm": 0.12872229516506195, "learning_rate": 4.400957332942965e-05, "loss": 0.0042, "step": 33680 }, { "epoch": 30.296762589928058, "grad_norm": 0.20342375338077545, "learning_rate": 4.3982214908992844e-05, "loss": 0.0049, "step": 33690 }, { "epoch": 30.305755395683452, "grad_norm": 0.27642011642456055, "learning_rate": 4.3954858316600235e-05, "loss": 0.0042, "step": 33700 }, { "epoch": 30.31474820143885, "grad_norm": 0.11069738119840622, "learning_rate": 4.392750356056205e-05, "loss": 0.0047, "step": 33710 }, { "epoch": 30.323741007194243, "grad_norm": 0.09802371263504028, "learning_rate": 4.390015064918798e-05, "loss": 0.0037, "step": 33720 }, { "epoch": 30.33273381294964, "grad_norm": 0.11608262360095978, "learning_rate": 4.387279959078705e-05, "loss": 0.0033, "step": 33730 }, { "epoch": 30.341726618705035, "grad_norm": 0.1324302852153778, "learning_rate": 4.384545039366786e-05, "loss": 0.0046, "step": 33740 }, { "epoch": 30.350719424460433, "grad_norm": 0.1932336986064911, "learning_rate": 4.381810306613831e-05, "loss": 0.006, "step": 33750 }, { "epoch": 30.359712230215827, "grad_norm": 0.1852370798587799, "learning_rate": 4.3790757616505826e-05, "loss": 0.0052, "step": 33760 }, { "epoch": 30.368705035971225, "grad_norm": 0.1825992316007614, "learning_rate": 4.376341405307725e-05, "loss": 0.0061, "step": 33770 }, { "epoch": 30.37769784172662, "grad_norm": 0.0952954962849617, "learning_rate": 4.37360723841588e-05, "loss": 0.0058, "step": 33780 }, { "epoch": 30.386690647482013, "grad_norm": 0.2376193404197693, "learning_rate": 4.370873261805619e-05, "loss": 0.0085, "step": 33790 }, { "epoch": 30.39568345323741, "grad_norm": 0.15012162923812866, "learning_rate": 4.368139476307449e-05, "loss": 0.0076, "step": 33800 }, { "epoch": 30.404676258992804, "grad_norm": 0.2960267961025238, "learning_rate": 4.365405882751822e-05, "loss": 0.0061, "step": 33810 }, { "epoch": 30.413669064748202, "grad_norm": 0.13726963102817535, "learning_rate": 4.3626724819691326e-05, "loss": 0.0047, "step": 33820 }, { "epoch": 30.422661870503596, "grad_norm": 0.1250574141740799, "learning_rate": 4.359939274789715e-05, "loss": 0.0036, "step": 33830 }, { "epoch": 30.431654676258994, "grad_norm": 0.08706793934106827, "learning_rate": 4.357206262043848e-05, "loss": 0.006, "step": 33840 }, { "epoch": 30.440647482014388, "grad_norm": 0.137570321559906, "learning_rate": 4.354473444561745e-05, "loss": 0.0028, "step": 33850 }, { "epoch": 30.449640287769785, "grad_norm": 0.17186371982097626, "learning_rate": 4.3517408231735644e-05, "loss": 0.0059, "step": 33860 }, { "epoch": 30.45863309352518, "grad_norm": 0.20864185690879822, "learning_rate": 4.3490083987094086e-05, "loss": 0.0053, "step": 33870 }, { "epoch": 30.467625899280577, "grad_norm": 0.11898913234472275, "learning_rate": 4.34627617199931e-05, "loss": 0.0065, "step": 33880 }, { "epoch": 30.47661870503597, "grad_norm": 0.17260058224201202, "learning_rate": 4.3435441438732526e-05, "loss": 0.0035, "step": 33890 }, { "epoch": 30.485611510791365, "grad_norm": 0.08625545352697372, "learning_rate": 4.340812315161149e-05, "loss": 0.0042, "step": 33900 }, { "epoch": 30.494604316546763, "grad_norm": 0.3098987638950348, "learning_rate": 4.338080686692859e-05, "loss": 0.0071, "step": 33910 }, { "epoch": 30.503597122302157, "grad_norm": 0.14356644451618195, "learning_rate": 4.3353492592981816e-05, "loss": 0.0044, "step": 33920 }, { "epoch": 30.512589928057555, "grad_norm": 0.2595513164997101, "learning_rate": 4.3326180338068485e-05, "loss": 0.0079, "step": 33930 }, { "epoch": 30.52158273381295, "grad_norm": 0.1342419981956482, "learning_rate": 4.3298870110485356e-05, "loss": 0.0043, "step": 33940 }, { "epoch": 30.530575539568346, "grad_norm": 0.06928569078445435, "learning_rate": 4.3271561918528567e-05, "loss": 0.0029, "step": 33950 }, { "epoch": 30.53956834532374, "grad_norm": 0.15646639466285706, "learning_rate": 4.324425577049359e-05, "loss": 0.0046, "step": 33960 }, { "epoch": 30.548561151079138, "grad_norm": 0.13885685801506042, "learning_rate": 4.321695167467535e-05, "loss": 0.0052, "step": 33970 }, { "epoch": 30.557553956834532, "grad_norm": 0.30675336718559265, "learning_rate": 4.3189649639368093e-05, "loss": 0.0099, "step": 33980 }, { "epoch": 30.56654676258993, "grad_norm": 0.18265996873378754, "learning_rate": 4.316234967286547e-05, "loss": 0.0051, "step": 33990 }, { "epoch": 30.575539568345324, "grad_norm": 0.277848482131958, "learning_rate": 4.313505178346046e-05, "loss": 0.0103, "step": 34000 }, { "epoch": 30.584532374100718, "grad_norm": 0.11692449450492859, "learning_rate": 4.3107755979445465e-05, "loss": 0.0057, "step": 34010 }, { "epoch": 30.593525179856115, "grad_norm": 0.10802523791790009, "learning_rate": 4.308046226911224e-05, "loss": 0.005, "step": 34020 }, { "epoch": 30.60251798561151, "grad_norm": 0.11696400493383408, "learning_rate": 4.305317066075185e-05, "loss": 0.0061, "step": 34030 }, { "epoch": 30.611510791366907, "grad_norm": 0.19126057624816895, "learning_rate": 4.302588116265482e-05, "loss": 0.0047, "step": 34040 }, { "epoch": 30.6205035971223, "grad_norm": 0.17220205068588257, "learning_rate": 4.299859378311094e-05, "loss": 0.0071, "step": 34050 }, { "epoch": 30.6294964028777, "grad_norm": 0.27183130383491516, "learning_rate": 4.2971308530409424e-05, "loss": 0.0036, "step": 34060 }, { "epoch": 30.638489208633093, "grad_norm": 0.1851973831653595, "learning_rate": 4.2944025412838765e-05, "loss": 0.005, "step": 34070 }, { "epoch": 30.64748201438849, "grad_norm": 0.20456688106060028, "learning_rate": 4.291674443868689e-05, "loss": 0.0047, "step": 34080 }, { "epoch": 30.656474820143885, "grad_norm": 0.14269977807998657, "learning_rate": 4.288946561624104e-05, "loss": 0.0052, "step": 34090 }, { "epoch": 30.665467625899282, "grad_norm": 0.15560470521450043, "learning_rate": 4.2862188953787794e-05, "loss": 0.0056, "step": 34100 }, { "epoch": 30.674460431654676, "grad_norm": 0.3149329125881195, "learning_rate": 4.283491445961308e-05, "loss": 0.005, "step": 34110 }, { "epoch": 30.68345323741007, "grad_norm": 0.21982485055923462, "learning_rate": 4.2807642142002155e-05, "loss": 0.005, "step": 34120 }, { "epoch": 30.692446043165468, "grad_norm": 0.28733575344085693, "learning_rate": 4.278037200923966e-05, "loss": 0.0041, "step": 34130 }, { "epoch": 30.701438848920862, "grad_norm": 0.23340736329555511, "learning_rate": 4.275310406960953e-05, "loss": 0.0078, "step": 34140 }, { "epoch": 30.71043165467626, "grad_norm": 0.14144977927207947, "learning_rate": 4.272583833139502e-05, "loss": 0.007, "step": 34150 }, { "epoch": 30.719424460431654, "grad_norm": 0.20980304479599, "learning_rate": 4.2698574802878794e-05, "loss": 0.005, "step": 34160 }, { "epoch": 30.72841726618705, "grad_norm": 0.17361170053482056, "learning_rate": 4.2671313492342734e-05, "loss": 0.0036, "step": 34170 }, { "epoch": 30.737410071942445, "grad_norm": 0.09521641582250595, "learning_rate": 4.264405440806813e-05, "loss": 0.0035, "step": 34180 }, { "epoch": 30.746402877697843, "grad_norm": 0.2159942388534546, "learning_rate": 4.26167975583356e-05, "loss": 0.0082, "step": 34190 }, { "epoch": 30.755395683453237, "grad_norm": 0.17307284474372864, "learning_rate": 4.2589542951425e-05, "loss": 0.004, "step": 34200 }, { "epoch": 30.764388489208635, "grad_norm": 0.21154816448688507, "learning_rate": 4.2562290595615615e-05, "loss": 0.005, "step": 34210 }, { "epoch": 30.77338129496403, "grad_norm": 0.1829477697610855, "learning_rate": 4.2535040499185946e-05, "loss": 0.0062, "step": 34220 }, { "epoch": 30.782374100719423, "grad_norm": 0.2458418756723404, "learning_rate": 4.250779267041387e-05, "loss": 0.0052, "step": 34230 }, { "epoch": 30.79136690647482, "grad_norm": 0.21466848254203796, "learning_rate": 4.248054711757657e-05, "loss": 0.0047, "step": 34240 }, { "epoch": 30.800359712230215, "grad_norm": 0.13503770530223846, "learning_rate": 4.245330384895052e-05, "loss": 0.0077, "step": 34250 }, { "epoch": 30.809352517985612, "grad_norm": 0.1520935595035553, "learning_rate": 4.242606287281151e-05, "loss": 0.0085, "step": 34260 }, { "epoch": 30.818345323741006, "grad_norm": 0.20904862880706787, "learning_rate": 4.2398824197434595e-05, "loss": 0.0124, "step": 34270 }, { "epoch": 30.827338129496404, "grad_norm": 0.15987947583198547, "learning_rate": 4.23715878310942e-05, "loss": 0.0054, "step": 34280 }, { "epoch": 30.836330935251798, "grad_norm": 0.18267033994197845, "learning_rate": 4.234435378206402e-05, "loss": 0.0043, "step": 34290 }, { "epoch": 30.845323741007196, "grad_norm": 0.15586143732070923, "learning_rate": 4.2317122058617006e-05, "loss": 0.006, "step": 34300 }, { "epoch": 30.85431654676259, "grad_norm": 0.1669006496667862, "learning_rate": 4.2289892669025485e-05, "loss": 0.0054, "step": 34310 }, { "epoch": 30.863309352517987, "grad_norm": 0.20764394104480743, "learning_rate": 4.226266562156097e-05, "loss": 0.0064, "step": 34320 }, { "epoch": 30.87230215827338, "grad_norm": 0.1779787540435791, "learning_rate": 4.223544092449435e-05, "loss": 0.0049, "step": 34330 }, { "epoch": 30.881294964028775, "grad_norm": 0.10624182969331741, "learning_rate": 4.2208218586095784e-05, "loss": 0.0053, "step": 34340 }, { "epoch": 30.890287769784173, "grad_norm": 0.18055012822151184, "learning_rate": 4.218099861463466e-05, "loss": 0.0042, "step": 34350 }, { "epoch": 30.899280575539567, "grad_norm": 0.09846760332584381, "learning_rate": 4.215378101837972e-05, "loss": 0.0061, "step": 34360 }, { "epoch": 30.908273381294965, "grad_norm": 0.09886165708303452, "learning_rate": 4.2126565805598937e-05, "loss": 0.0048, "step": 34370 }, { "epoch": 30.91726618705036, "grad_norm": 0.2589337229728699, "learning_rate": 4.209935298455957e-05, "loss": 0.0045, "step": 34380 }, { "epoch": 30.926258992805757, "grad_norm": 0.31974294781684875, "learning_rate": 4.207214256352817e-05, "loss": 0.0048, "step": 34390 }, { "epoch": 30.93525179856115, "grad_norm": 0.16470471024513245, "learning_rate": 4.2044934550770524e-05, "loss": 0.0066, "step": 34400 }, { "epoch": 30.944244604316548, "grad_norm": 0.1290205419063568, "learning_rate": 4.201772895455174e-05, "loss": 0.0101, "step": 34410 }, { "epoch": 30.953237410071942, "grad_norm": 0.18349695205688477, "learning_rate": 4.199052578313613e-05, "loss": 0.0054, "step": 34420 }, { "epoch": 30.96223021582734, "grad_norm": 0.17824949324131012, "learning_rate": 4.1963325044787294e-05, "loss": 0.0057, "step": 34430 }, { "epoch": 30.971223021582734, "grad_norm": 0.1897476315498352, "learning_rate": 4.193612674776814e-05, "loss": 0.0039, "step": 34440 }, { "epoch": 30.980215827338128, "grad_norm": 0.13302481174468994, "learning_rate": 4.1908930900340745e-05, "loss": 0.0048, "step": 34450 }, { "epoch": 30.989208633093526, "grad_norm": 0.08620933443307877, "learning_rate": 4.1881737510766536e-05, "loss": 0.0041, "step": 34460 }, { "epoch": 30.99820143884892, "grad_norm": 0.07733263820409775, "learning_rate": 4.185454658730609e-05, "loss": 0.0027, "step": 34470 }, { "epoch": 31.007194244604317, "grad_norm": 0.13916261494159698, "learning_rate": 4.1827358138219355e-05, "loss": 0.0067, "step": 34480 }, { "epoch": 31.01618705035971, "grad_norm": 0.16933861374855042, "learning_rate": 4.1800172171765404e-05, "loss": 0.0119, "step": 34490 }, { "epoch": 31.02517985611511, "grad_norm": 0.13549919426441193, "learning_rate": 4.177298869620264e-05, "loss": 0.0032, "step": 34500 }, { "epoch": 31.034172661870503, "grad_norm": 0.14812269806861877, "learning_rate": 4.1745807719788705e-05, "loss": 0.0059, "step": 34510 }, { "epoch": 31.0431654676259, "grad_norm": 0.24509002268314362, "learning_rate": 4.1718629250780445e-05, "loss": 0.0052, "step": 34520 }, { "epoch": 31.052158273381295, "grad_norm": 0.11607417464256287, "learning_rate": 4.1691453297433956e-05, "loss": 0.0048, "step": 34530 }, { "epoch": 31.06115107913669, "grad_norm": 0.362088143825531, "learning_rate": 4.166427986800457e-05, "loss": 0.0055, "step": 34540 }, { "epoch": 31.070143884892087, "grad_norm": 0.17480304837226868, "learning_rate": 4.163710897074688e-05, "loss": 0.004, "step": 34550 }, { "epoch": 31.07913669064748, "grad_norm": 0.20644541084766388, "learning_rate": 4.1609940613914686e-05, "loss": 0.0071, "step": 34560 }, { "epoch": 31.08812949640288, "grad_norm": 0.15458247065544128, "learning_rate": 4.1582774805760996e-05, "loss": 0.0058, "step": 34570 }, { "epoch": 31.097122302158272, "grad_norm": 0.21440665423870087, "learning_rate": 4.155561155453809e-05, "loss": 0.005, "step": 34580 }, { "epoch": 31.10611510791367, "grad_norm": 0.19345581531524658, "learning_rate": 4.15284508684974e-05, "loss": 0.008, "step": 34590 }, { "epoch": 31.115107913669064, "grad_norm": 0.18404662609100342, "learning_rate": 4.1501292755889675e-05, "loss": 0.0059, "step": 34600 }, { "epoch": 31.12410071942446, "grad_norm": 0.3396949768066406, "learning_rate": 4.1474137224964833e-05, "loss": 0.0068, "step": 34610 }, { "epoch": 31.133093525179856, "grad_norm": 0.29225194454193115, "learning_rate": 4.144698428397197e-05, "loss": 0.0091, "step": 34620 }, { "epoch": 31.142086330935253, "grad_norm": 0.15834684669971466, "learning_rate": 4.1419833941159466e-05, "loss": 0.0042, "step": 34630 }, { "epoch": 31.151079136690647, "grad_norm": 0.14143195748329163, "learning_rate": 4.1392686204774846e-05, "loss": 0.0045, "step": 34640 }, { "epoch": 31.16007194244604, "grad_norm": 0.15033121407032013, "learning_rate": 4.13655410830649e-05, "loss": 0.0038, "step": 34650 }, { "epoch": 31.16906474820144, "grad_norm": 0.17169290781021118, "learning_rate": 4.1338398584275594e-05, "loss": 0.0077, "step": 34660 }, { "epoch": 31.178057553956833, "grad_norm": 0.2010451853275299, "learning_rate": 4.1311258716652104e-05, "loss": 0.0074, "step": 34670 }, { "epoch": 31.18705035971223, "grad_norm": 0.17383377254009247, "learning_rate": 4.128412148843881e-05, "loss": 0.0049, "step": 34680 }, { "epoch": 31.196043165467625, "grad_norm": 0.2800532877445221, "learning_rate": 4.125698690787926e-05, "loss": 0.007, "step": 34690 }, { "epoch": 31.205035971223023, "grad_norm": 0.10807422548532486, "learning_rate": 4.1229854983216245e-05, "loss": 0.0055, "step": 34700 }, { "epoch": 31.214028776978417, "grad_norm": 0.291316419839859, "learning_rate": 4.120272572269175e-05, "loss": 0.0059, "step": 34710 }, { "epoch": 31.223021582733814, "grad_norm": 0.2860261797904968, "learning_rate": 4.117559913454687e-05, "loss": 0.0055, "step": 34720 }, { "epoch": 31.23201438848921, "grad_norm": 0.1774010807275772, "learning_rate": 4.114847522702201e-05, "loss": 0.0045, "step": 34730 }, { "epoch": 31.241007194244606, "grad_norm": 0.13176065683364868, "learning_rate": 4.112135400835664e-05, "loss": 0.004, "step": 34740 }, { "epoch": 31.25, "grad_norm": 0.18033011257648468, "learning_rate": 4.109423548678949e-05, "loss": 0.0034, "step": 34750 }, { "epoch": 31.258992805755394, "grad_norm": 0.10182411968708038, "learning_rate": 4.106711967055848e-05, "loss": 0.0049, "step": 34760 }, { "epoch": 31.26798561151079, "grad_norm": 0.1960642784833908, "learning_rate": 4.1040006567900636e-05, "loss": 0.0046, "step": 34770 }, { "epoch": 31.276978417266186, "grad_norm": 0.2904428541660309, "learning_rate": 4.101289618705224e-05, "loss": 0.0035, "step": 34780 }, { "epoch": 31.285971223021583, "grad_norm": 0.17403583228588104, "learning_rate": 4.0985788536248675e-05, "loss": 0.0041, "step": 34790 }, { "epoch": 31.294964028776977, "grad_norm": 0.21229930222034454, "learning_rate": 4.095868362372454e-05, "loss": 0.0031, "step": 34800 }, { "epoch": 31.303956834532375, "grad_norm": 0.20412729680538177, "learning_rate": 4.0931581457713614e-05, "loss": 0.0041, "step": 34810 }, { "epoch": 31.31294964028777, "grad_norm": 0.16112712025642395, "learning_rate": 4.09044820464488e-05, "loss": 0.0057, "step": 34820 }, { "epoch": 31.321942446043167, "grad_norm": 0.16120629012584686, "learning_rate": 4.087738539816219e-05, "loss": 0.005, "step": 34830 }, { "epoch": 31.33093525179856, "grad_norm": 0.41611984372138977, "learning_rate": 4.085029152108501e-05, "loss": 0.0061, "step": 34840 }, { "epoch": 31.33992805755396, "grad_norm": 0.14874374866485596, "learning_rate": 4.0823200423447714e-05, "loss": 0.0053, "step": 34850 }, { "epoch": 31.348920863309353, "grad_norm": 0.10905720293521881, "learning_rate": 4.079611211347981e-05, "loss": 0.0081, "step": 34860 }, { "epoch": 31.357913669064747, "grad_norm": 0.11903160810470581, "learning_rate": 4.076902659941002e-05, "loss": 0.0035, "step": 34870 }, { "epoch": 31.366906474820144, "grad_norm": 0.18802125751972198, "learning_rate": 4.074194388946624e-05, "loss": 0.0039, "step": 34880 }, { "epoch": 31.37589928057554, "grad_norm": 0.3455730676651001, "learning_rate": 4.071486399187545e-05, "loss": 0.0046, "step": 34890 }, { "epoch": 31.384892086330936, "grad_norm": 0.15477631986141205, "learning_rate": 4.0687786914863836e-05, "loss": 0.0046, "step": 34900 }, { "epoch": 31.39388489208633, "grad_norm": 0.30910298228263855, "learning_rate": 4.0660712666656666e-05, "loss": 0.0038, "step": 34910 }, { "epoch": 31.402877697841728, "grad_norm": 0.11736566573381424, "learning_rate": 4.0633641255478394e-05, "loss": 0.0068, "step": 34920 }, { "epoch": 31.41187050359712, "grad_norm": 0.11949888616800308, "learning_rate": 4.0606572689552624e-05, "loss": 0.004, "step": 34930 }, { "epoch": 31.42086330935252, "grad_norm": 0.24792945384979248, "learning_rate": 4.0579506977102036e-05, "loss": 0.005, "step": 34940 }, { "epoch": 31.429856115107913, "grad_norm": 0.268237829208374, "learning_rate": 4.055244412634849e-05, "loss": 0.0049, "step": 34950 }, { "epoch": 31.43884892086331, "grad_norm": 0.0914715975522995, "learning_rate": 4.052538414551298e-05, "loss": 0.0042, "step": 34960 }, { "epoch": 31.447841726618705, "grad_norm": 0.10309208184480667, "learning_rate": 4.0498327042815596e-05, "loss": 0.0047, "step": 34970 }, { "epoch": 31.4568345323741, "grad_norm": 0.17617002129554749, "learning_rate": 4.047127282647559e-05, "loss": 0.0042, "step": 34980 }, { "epoch": 31.465827338129497, "grad_norm": 0.11141151189804077, "learning_rate": 4.04442215047113e-05, "loss": 0.0031, "step": 34990 }, { "epoch": 31.47482014388489, "grad_norm": 0.07317779213190079, "learning_rate": 4.041717308574023e-05, "loss": 0.003, "step": 35000 }, { "epoch": 31.48381294964029, "grad_norm": 0.10536406189203262, "learning_rate": 4.039012757777893e-05, "loss": 0.0041, "step": 35010 }, { "epoch": 31.492805755395683, "grad_norm": 0.12715809047222137, "learning_rate": 4.036308498904314e-05, "loss": 0.0023, "step": 35020 }, { "epoch": 31.50179856115108, "grad_norm": 0.13856440782546997, "learning_rate": 4.033604532774771e-05, "loss": 0.005, "step": 35030 }, { "epoch": 31.510791366906474, "grad_norm": 0.1182129755616188, "learning_rate": 4.030900860210652e-05, "loss": 0.0027, "step": 35040 }, { "epoch": 31.519784172661872, "grad_norm": 0.07304644584655762, "learning_rate": 4.028197482033266e-05, "loss": 0.0056, "step": 35050 }, { "epoch": 31.528776978417266, "grad_norm": 0.2539224326610565, "learning_rate": 4.0254943990638246e-05, "loss": 0.0067, "step": 35060 }, { "epoch": 31.53776978417266, "grad_norm": 0.21421955525875092, "learning_rate": 4.022791612123454e-05, "loss": 0.0094, "step": 35070 }, { "epoch": 31.546762589928058, "grad_norm": 0.11811956018209457, "learning_rate": 4.020089122033192e-05, "loss": 0.0051, "step": 35080 }, { "epoch": 31.555755395683452, "grad_norm": 0.13148893415927887, "learning_rate": 4.01738692961398e-05, "loss": 0.0105, "step": 35090 }, { "epoch": 31.56474820143885, "grad_norm": 0.24974985420703888, "learning_rate": 4.014685035686675e-05, "loss": 0.0037, "step": 35100 }, { "epoch": 31.573741007194243, "grad_norm": 0.11215627938508987, "learning_rate": 4.011983441072039e-05, "loss": 0.0049, "step": 35110 }, { "epoch": 31.58273381294964, "grad_norm": 0.2199302464723587, "learning_rate": 4.0092821465907485e-05, "loss": 0.0061, "step": 35120 }, { "epoch": 31.591726618705035, "grad_norm": 0.284015953540802, "learning_rate": 4.006581153063383e-05, "loss": 0.0043, "step": 35130 }, { "epoch": 31.600719424460433, "grad_norm": 0.1478796899318695, "learning_rate": 4.003880461310432e-05, "loss": 0.0062, "step": 35140 }, { "epoch": 31.609712230215827, "grad_norm": 0.23987512290477753, "learning_rate": 4.001180072152298e-05, "loss": 0.0063, "step": 35150 }, { "epoch": 31.618705035971225, "grad_norm": 0.25423553586006165, "learning_rate": 3.998479986409285e-05, "loss": 0.0063, "step": 35160 }, { "epoch": 31.62769784172662, "grad_norm": 0.1321532428264618, "learning_rate": 3.995780204901607e-05, "loss": 0.0047, "step": 35170 }, { "epoch": 31.636690647482013, "grad_norm": 0.14802315831184387, "learning_rate": 3.993080728449391e-05, "loss": 0.0031, "step": 35180 }, { "epoch": 31.64568345323741, "grad_norm": 0.18463632464408875, "learning_rate": 3.990381557872661e-05, "loss": 0.003, "step": 35190 }, { "epoch": 31.654676258992804, "grad_norm": 0.08904167264699936, "learning_rate": 3.987682693991359e-05, "loss": 0.0039, "step": 35200 }, { "epoch": 31.663669064748202, "grad_norm": 0.13393869996070862, "learning_rate": 3.9849841376253226e-05, "loss": 0.0029, "step": 35210 }, { "epoch": 31.672661870503596, "grad_norm": 0.09437000751495361, "learning_rate": 3.982285889594306e-05, "loss": 0.007, "step": 35220 }, { "epoch": 31.681654676258994, "grad_norm": 0.12280971556901932, "learning_rate": 3.9795879507179665e-05, "loss": 0.0042, "step": 35230 }, { "epoch": 31.690647482014388, "grad_norm": 0.2219267040491104, "learning_rate": 3.9768903218158634e-05, "loss": 0.0038, "step": 35240 }, { "epoch": 31.699640287769785, "grad_norm": 0.09211092442274094, "learning_rate": 3.974193003707468e-05, "loss": 0.004, "step": 35250 }, { "epoch": 31.70863309352518, "grad_norm": 0.16623540222644806, "learning_rate": 3.971495997212152e-05, "loss": 0.0049, "step": 35260 }, { "epoch": 31.717625899280577, "grad_norm": 0.30073976516723633, "learning_rate": 3.9687993031491985e-05, "loss": 0.0051, "step": 35270 }, { "epoch": 31.72661870503597, "grad_norm": 0.16422712802886963, "learning_rate": 3.966102922337787e-05, "loss": 0.0052, "step": 35280 }, { "epoch": 31.735611510791365, "grad_norm": 0.12430686503648758, "learning_rate": 3.963406855597009e-05, "loss": 0.0034, "step": 35290 }, { "epoch": 31.744604316546763, "grad_norm": 0.0869765356183052, "learning_rate": 3.960711103745861e-05, "loss": 0.0044, "step": 35300 }, { "epoch": 31.753597122302157, "grad_norm": 0.23627234995365143, "learning_rate": 3.958015667603237e-05, "loss": 0.0056, "step": 35310 }, { "epoch": 31.762589928057555, "grad_norm": 0.18276962637901306, "learning_rate": 3.955320547987943e-05, "loss": 0.0044, "step": 35320 }, { "epoch": 31.77158273381295, "grad_norm": 0.3418288230895996, "learning_rate": 3.952625745718681e-05, "loss": 0.0087, "step": 35330 }, { "epoch": 31.780575539568346, "grad_norm": 0.2098919302225113, "learning_rate": 3.949931261614064e-05, "loss": 0.0073, "step": 35340 }, { "epoch": 31.78956834532374, "grad_norm": 0.1969296634197235, "learning_rate": 3.947237096492605e-05, "loss": 0.0033, "step": 35350 }, { "epoch": 31.798561151079138, "grad_norm": 0.1501685529947281, "learning_rate": 3.944543251172719e-05, "loss": 0.0062, "step": 35360 }, { "epoch": 31.807553956834532, "grad_norm": 0.17649869620800018, "learning_rate": 3.941849726472725e-05, "loss": 0.0036, "step": 35370 }, { "epoch": 31.81654676258993, "grad_norm": 0.17077741026878357, "learning_rate": 3.939156523210846e-05, "loss": 0.0044, "step": 35380 }, { "epoch": 31.825539568345324, "grad_norm": 0.1421082466840744, "learning_rate": 3.9364636422052046e-05, "loss": 0.0076, "step": 35390 }, { "epoch": 31.834532374100718, "grad_norm": 0.11385701596736908, "learning_rate": 3.933771084273828e-05, "loss": 0.0084, "step": 35400 }, { "epoch": 31.843525179856115, "grad_norm": 0.32362550497055054, "learning_rate": 3.931078850234643e-05, "loss": 0.0061, "step": 35410 }, { "epoch": 31.85251798561151, "grad_norm": 0.15213260054588318, "learning_rate": 3.928386940905483e-05, "loss": 0.0037, "step": 35420 }, { "epoch": 31.861510791366907, "grad_norm": 0.10163124650716782, "learning_rate": 3.925695357104073e-05, "loss": 0.0037, "step": 35430 }, { "epoch": 31.8705035971223, "grad_norm": 0.18860965967178345, "learning_rate": 3.923004099648049e-05, "loss": 0.0056, "step": 35440 }, { "epoch": 31.8794964028777, "grad_norm": 0.27772393822669983, "learning_rate": 3.920313169354944e-05, "loss": 0.0039, "step": 35450 }, { "epoch": 31.888489208633093, "grad_norm": 0.1369529515504837, "learning_rate": 3.9176225670421897e-05, "loss": 0.0093, "step": 35460 }, { "epoch": 31.89748201438849, "grad_norm": 0.2298445999622345, "learning_rate": 3.9149322935271224e-05, "loss": 0.0039, "step": 35470 }, { "epoch": 31.906474820143885, "grad_norm": 0.19123592972755432, "learning_rate": 3.9122423496269725e-05, "loss": 0.0056, "step": 35480 }, { "epoch": 31.915467625899282, "grad_norm": 0.13999043405056, "learning_rate": 3.909552736158877e-05, "loss": 0.0048, "step": 35490 }, { "epoch": 31.924460431654676, "grad_norm": 0.11456049978733063, "learning_rate": 3.90686345393987e-05, "loss": 0.0046, "step": 35500 }, { "epoch": 31.93345323741007, "grad_norm": 0.14306707680225372, "learning_rate": 3.9041745037868816e-05, "loss": 0.0029, "step": 35510 }, { "epoch": 31.942446043165468, "grad_norm": 0.2257484346628189, "learning_rate": 3.9014858865167465e-05, "loss": 0.0067, "step": 35520 }, { "epoch": 31.951438848920862, "grad_norm": 0.11397610604763031, "learning_rate": 3.8987976029461935e-05, "loss": 0.0078, "step": 35530 }, { "epoch": 31.96043165467626, "grad_norm": 0.23576514422893524, "learning_rate": 3.896109653891853e-05, "loss": 0.0067, "step": 35540 }, { "epoch": 31.969424460431654, "grad_norm": 0.11911936104297638, "learning_rate": 3.893422040170254e-05, "loss": 0.0035, "step": 35550 }, { "epoch": 31.97841726618705, "grad_norm": 0.19846248626708984, "learning_rate": 3.8907347625978207e-05, "loss": 0.0082, "step": 35560 }, { "epoch": 31.987410071942445, "grad_norm": 0.24874387681484222, "learning_rate": 3.88804782199088e-05, "loss": 0.005, "step": 35570 }, { "epoch": 31.996402877697843, "grad_norm": 0.197606161236763, "learning_rate": 3.8853612191656495e-05, "loss": 0.0031, "step": 35580 }, { "epoch": 32.00539568345324, "grad_norm": 0.12512418627738953, "learning_rate": 3.88267495493825e-05, "loss": 0.0057, "step": 35590 }, { "epoch": 32.014388489208635, "grad_norm": 0.1826619654893875, "learning_rate": 3.8799890301247004e-05, "loss": 0.0071, "step": 35600 }, { "epoch": 32.023381294964025, "grad_norm": 0.20460477471351624, "learning_rate": 3.8773034455409096e-05, "loss": 0.0058, "step": 35610 }, { "epoch": 32.03237410071942, "grad_norm": 0.14451095461845398, "learning_rate": 3.8746182020026904e-05, "loss": 0.0045, "step": 35620 }, { "epoch": 32.04136690647482, "grad_norm": 0.1538238823413849, "learning_rate": 3.871933300325745e-05, "loss": 0.0031, "step": 35630 }, { "epoch": 32.05035971223022, "grad_norm": 0.30380871891975403, "learning_rate": 3.869248741325679e-05, "loss": 0.0051, "step": 35640 }, { "epoch": 32.05935251798561, "grad_norm": 0.11591241508722305, "learning_rate": 3.866564525817992e-05, "loss": 0.0038, "step": 35650 }, { "epoch": 32.068345323741006, "grad_norm": 0.13153445720672607, "learning_rate": 3.8638806546180725e-05, "loss": 0.0041, "step": 35660 }, { "epoch": 32.077338129496404, "grad_norm": 0.09403949230909348, "learning_rate": 3.861197128541213e-05, "loss": 0.0064, "step": 35670 }, { "epoch": 32.0863309352518, "grad_norm": 0.13865993916988373, "learning_rate": 3.858513948402599e-05, "loss": 0.0056, "step": 35680 }, { "epoch": 32.09532374100719, "grad_norm": 0.21916724741458893, "learning_rate": 3.8558311150173077e-05, "loss": 0.0139, "step": 35690 }, { "epoch": 32.10431654676259, "grad_norm": 0.20804497599601746, "learning_rate": 3.853148629200312e-05, "loss": 0.0081, "step": 35700 }, { "epoch": 32.11330935251799, "grad_norm": 0.1635100096464157, "learning_rate": 3.850466491766482e-05, "loss": 0.0057, "step": 35710 }, { "epoch": 32.12230215827338, "grad_norm": 0.2844269275665283, "learning_rate": 3.847784703530583e-05, "loss": 0.0042, "step": 35720 }, { "epoch": 32.131294964028775, "grad_norm": 0.1347886174917221, "learning_rate": 3.845103265307266e-05, "loss": 0.0049, "step": 35730 }, { "epoch": 32.14028776978417, "grad_norm": 0.12394832819700241, "learning_rate": 3.842422177911086e-05, "loss": 0.0038, "step": 35740 }, { "epoch": 32.14928057553957, "grad_norm": 0.13039597868919373, "learning_rate": 3.8397414421564826e-05, "loss": 0.006, "step": 35750 }, { "epoch": 32.15827338129496, "grad_norm": 0.1933985948562622, "learning_rate": 3.8370610588577935e-05, "loss": 0.0061, "step": 35760 }, { "epoch": 32.16726618705036, "grad_norm": 0.19424495100975037, "learning_rate": 3.834381028829251e-05, "loss": 0.0039, "step": 35770 }, { "epoch": 32.17625899280576, "grad_norm": 0.10963566601276398, "learning_rate": 3.8317013528849745e-05, "loss": 0.0036, "step": 35780 }, { "epoch": 32.185251798561154, "grad_norm": 0.10422921180725098, "learning_rate": 3.8290220318389815e-05, "loss": 0.0078, "step": 35790 }, { "epoch": 32.194244604316545, "grad_norm": 0.13257354497909546, "learning_rate": 3.8263430665051746e-05, "loss": 0.0039, "step": 35800 }, { "epoch": 32.20323741007194, "grad_norm": 0.10530588030815125, "learning_rate": 3.8236644576973554e-05, "loss": 0.0034, "step": 35810 }, { "epoch": 32.21223021582734, "grad_norm": 0.07834164798259735, "learning_rate": 3.820986206229217e-05, "loss": 0.004, "step": 35820 }, { "epoch": 32.22122302158273, "grad_norm": 0.29852524399757385, "learning_rate": 3.8183083129143384e-05, "loss": 0.0037, "step": 35830 }, { "epoch": 32.23021582733813, "grad_norm": 0.17384767532348633, "learning_rate": 3.815630778566193e-05, "loss": 0.0133, "step": 35840 }, { "epoch": 32.239208633093526, "grad_norm": 0.11935970932245255, "learning_rate": 3.812953603998145e-05, "loss": 0.0065, "step": 35850 }, { "epoch": 32.24820143884892, "grad_norm": 0.19748477637767792, "learning_rate": 3.8102767900234504e-05, "loss": 0.0052, "step": 35860 }, { "epoch": 32.257194244604314, "grad_norm": 0.12598946690559387, "learning_rate": 3.807600337455256e-05, "loss": 0.0052, "step": 35870 }, { "epoch": 32.26618705035971, "grad_norm": 0.18913491070270538, "learning_rate": 3.804924247106593e-05, "loss": 0.0061, "step": 35880 }, { "epoch": 32.27517985611511, "grad_norm": 0.23197504878044128, "learning_rate": 3.8022485197903925e-05, "loss": 0.0046, "step": 35890 }, { "epoch": 32.28417266187051, "grad_norm": 0.0971132218837738, "learning_rate": 3.799573156319464e-05, "loss": 0.0036, "step": 35900 }, { "epoch": 32.2931654676259, "grad_norm": 0.17460012435913086, "learning_rate": 3.796898157506515e-05, "loss": 0.0061, "step": 35910 }, { "epoch": 32.302158273381295, "grad_norm": 0.14137472212314606, "learning_rate": 3.794223524164143e-05, "loss": 0.0047, "step": 35920 }, { "epoch": 32.31115107913669, "grad_norm": 0.17534096539020538, "learning_rate": 3.7915492571048245e-05, "loss": 0.0066, "step": 35930 }, { "epoch": 32.32014388489208, "grad_norm": 0.13406334817409515, "learning_rate": 3.788875357140937e-05, "loss": 0.0093, "step": 35940 }, { "epoch": 32.32913669064748, "grad_norm": 0.22153626382350922, "learning_rate": 3.786201825084736e-05, "loss": 0.0048, "step": 35950 }, { "epoch": 32.33812949640288, "grad_norm": 0.1471637487411499, "learning_rate": 3.783528661748372e-05, "loss": 0.004, "step": 35960 }, { "epoch": 32.347122302158276, "grad_norm": 0.14861099421977997, "learning_rate": 3.780855867943882e-05, "loss": 0.0078, "step": 35970 }, { "epoch": 32.356115107913666, "grad_norm": 0.1649620234966278, "learning_rate": 3.778183444483189e-05, "loss": 0.0045, "step": 35980 }, { "epoch": 32.365107913669064, "grad_norm": 0.29832273721694946, "learning_rate": 3.775511392178108e-05, "loss": 0.0053, "step": 35990 }, { "epoch": 32.37410071942446, "grad_norm": 0.14417585730552673, "learning_rate": 3.772839711840332e-05, "loss": 0.0067, "step": 36000 }, { "epoch": 32.38309352517986, "grad_norm": 0.22273114323616028, "learning_rate": 3.7701684042814515e-05, "loss": 0.0068, "step": 36010 }, { "epoch": 32.39208633093525, "grad_norm": 0.21334391832351685, "learning_rate": 3.76749747031294e-05, "loss": 0.0042, "step": 36020 }, { "epoch": 32.40107913669065, "grad_norm": 0.18201231956481934, "learning_rate": 3.764826910746152e-05, "loss": 0.0141, "step": 36030 }, { "epoch": 32.410071942446045, "grad_norm": 0.12602777779102325, "learning_rate": 3.762156726392338e-05, "loss": 0.0035, "step": 36040 }, { "epoch": 32.419064748201436, "grad_norm": 0.16064053773880005, "learning_rate": 3.759486918062625e-05, "loss": 0.0098, "step": 36050 }, { "epoch": 32.42805755395683, "grad_norm": 0.15737436711788177, "learning_rate": 3.756817486568033e-05, "loss": 0.0047, "step": 36060 }, { "epoch": 32.43705035971223, "grad_norm": 0.23672771453857422, "learning_rate": 3.7541484327194654e-05, "loss": 0.0055, "step": 36070 }, { "epoch": 32.44604316546763, "grad_norm": 0.12860198318958282, "learning_rate": 3.751479757327707e-05, "loss": 0.0077, "step": 36080 }, { "epoch": 32.45503597122302, "grad_norm": 0.2222031056880951, "learning_rate": 3.7488114612034345e-05, "loss": 0.0072, "step": 36090 }, { "epoch": 32.46402877697842, "grad_norm": 0.2048756331205368, "learning_rate": 3.7461435451572044e-05, "loss": 0.0076, "step": 36100 }, { "epoch": 32.473021582733814, "grad_norm": 0.1053897887468338, "learning_rate": 3.743476009999459e-05, "loss": 0.0062, "step": 36110 }, { "epoch": 32.48201438848921, "grad_norm": 0.14647607505321503, "learning_rate": 3.7408088565405245e-05, "loss": 0.0051, "step": 36120 }, { "epoch": 32.4910071942446, "grad_norm": 0.10063329339027405, "learning_rate": 3.738142085590612e-05, "loss": 0.0042, "step": 36130 }, { "epoch": 32.5, "grad_norm": 0.11761458218097687, "learning_rate": 3.7354756979598194e-05, "loss": 0.0035, "step": 36140 }, { "epoch": 32.5089928057554, "grad_norm": 0.1640598475933075, "learning_rate": 3.7328096944581187e-05, "loss": 0.0041, "step": 36150 }, { "epoch": 32.51798561151079, "grad_norm": 0.12483038008213043, "learning_rate": 3.730144075895377e-05, "loss": 0.0048, "step": 36160 }, { "epoch": 32.526978417266186, "grad_norm": 0.12598924338817596, "learning_rate": 3.727478843081335e-05, "loss": 0.0047, "step": 36170 }, { "epoch": 32.53597122302158, "grad_norm": 0.13393576443195343, "learning_rate": 3.72481399682562e-05, "loss": 0.0034, "step": 36180 }, { "epoch": 32.54496402877698, "grad_norm": 0.13572832942008972, "learning_rate": 3.722149537937747e-05, "loss": 0.0068, "step": 36190 }, { "epoch": 32.55395683453237, "grad_norm": 0.09740599244832993, "learning_rate": 3.7194854672271015e-05, "loss": 0.0034, "step": 36200 }, { "epoch": 32.56294964028777, "grad_norm": 0.11835171282291412, "learning_rate": 3.7168217855029644e-05, "loss": 0.0048, "step": 36210 }, { "epoch": 32.57194244604317, "grad_norm": 0.11673898249864578, "learning_rate": 3.7141584935744856e-05, "loss": 0.0046, "step": 36220 }, { "epoch": 32.580935251798564, "grad_norm": 0.12591412663459778, "learning_rate": 3.7114955922507055e-05, "loss": 0.0033, "step": 36230 }, { "epoch": 32.589928057553955, "grad_norm": 0.09973619878292084, "learning_rate": 3.708833082340545e-05, "loss": 0.0048, "step": 36240 }, { "epoch": 32.59892086330935, "grad_norm": 0.13524273037910461, "learning_rate": 3.7061709646528034e-05, "loss": 0.0026, "step": 36250 }, { "epoch": 32.60791366906475, "grad_norm": 0.11054728180170059, "learning_rate": 3.7035092399961604e-05, "loss": 0.0039, "step": 36260 }, { "epoch": 32.61690647482014, "grad_norm": 0.20051123201847076, "learning_rate": 3.700847909179177e-05, "loss": 0.0049, "step": 36270 }, { "epoch": 32.62589928057554, "grad_norm": 0.17405259609222412, "learning_rate": 3.698186973010297e-05, "loss": 0.0072, "step": 36280 }, { "epoch": 32.634892086330936, "grad_norm": 0.15571843087673187, "learning_rate": 3.695526432297844e-05, "loss": 0.0043, "step": 36290 }, { "epoch": 32.643884892086334, "grad_norm": 0.1586022973060608, "learning_rate": 3.692866287850017e-05, "loss": 0.0027, "step": 36300 }, { "epoch": 32.652877697841724, "grad_norm": 0.08552747219800949, "learning_rate": 3.6902065404749006e-05, "loss": 0.0044, "step": 36310 }, { "epoch": 32.66187050359712, "grad_norm": 0.2256520390510559, "learning_rate": 3.6875471909804516e-05, "loss": 0.0077, "step": 36320 }, { "epoch": 32.67086330935252, "grad_norm": 0.12227034568786621, "learning_rate": 3.6848882401745135e-05, "loss": 0.0022, "step": 36330 }, { "epoch": 32.67985611510792, "grad_norm": 0.18630120158195496, "learning_rate": 3.682229688864806e-05, "loss": 0.0056, "step": 36340 }, { "epoch": 32.68884892086331, "grad_norm": 0.14136865735054016, "learning_rate": 3.6795715378589235e-05, "loss": 0.0035, "step": 36350 }, { "epoch": 32.697841726618705, "grad_norm": 0.2471635341644287, "learning_rate": 3.676913787964345e-05, "loss": 0.0091, "step": 36360 }, { "epoch": 32.7068345323741, "grad_norm": 0.2665761411190033, "learning_rate": 3.674256439988423e-05, "loss": 0.0056, "step": 36370 }, { "epoch": 32.71582733812949, "grad_norm": 0.24885328114032745, "learning_rate": 3.6715994947383904e-05, "loss": 0.0049, "step": 36380 }, { "epoch": 32.72482014388489, "grad_norm": 0.10000874102115631, "learning_rate": 3.668942953021357e-05, "loss": 0.0036, "step": 36390 }, { "epoch": 32.73381294964029, "grad_norm": 0.1237715557217598, "learning_rate": 3.66628681564431e-05, "loss": 0.004, "step": 36400 }, { "epoch": 32.742805755395686, "grad_norm": 0.11147783696651459, "learning_rate": 3.663631083414114e-05, "loss": 0.0025, "step": 36410 }, { "epoch": 32.75179856115108, "grad_norm": 0.2115345448255539, "learning_rate": 3.660975757137509e-05, "loss": 0.0033, "step": 36420 }, { "epoch": 32.760791366906474, "grad_norm": 0.2043893039226532, "learning_rate": 3.658320837621114e-05, "loss": 0.004, "step": 36430 }, { "epoch": 32.76978417266187, "grad_norm": 0.28188586235046387, "learning_rate": 3.655666325671426e-05, "loss": 0.0049, "step": 36440 }, { "epoch": 32.77877697841727, "grad_norm": 0.12711039185523987, "learning_rate": 3.65301222209481e-05, "loss": 0.0039, "step": 36450 }, { "epoch": 32.78776978417266, "grad_norm": 0.16100767254829407, "learning_rate": 3.650358527697519e-05, "loss": 0.0077, "step": 36460 }, { "epoch": 32.79676258992806, "grad_norm": 0.18020470440387726, "learning_rate": 3.64770524328567e-05, "loss": 0.004, "step": 36470 }, { "epoch": 32.805755395683455, "grad_norm": 0.1380329579114914, "learning_rate": 3.645052369665265e-05, "loss": 0.0039, "step": 36480 }, { "epoch": 32.814748201438846, "grad_norm": 0.12996147572994232, "learning_rate": 3.6423999076421724e-05, "loss": 0.0049, "step": 36490 }, { "epoch": 32.82374100719424, "grad_norm": 0.2400698959827423, "learning_rate": 3.639747858022142e-05, "loss": 0.0062, "step": 36500 }, { "epoch": 32.83273381294964, "grad_norm": 0.12876015901565552, "learning_rate": 3.637096221610799e-05, "loss": 0.0052, "step": 36510 }, { "epoch": 32.84172661870504, "grad_norm": 0.12992918491363525, "learning_rate": 3.634444999213638e-05, "loss": 0.0029, "step": 36520 }, { "epoch": 32.85071942446043, "grad_norm": 0.19272522628307343, "learning_rate": 3.6317941916360296e-05, "loss": 0.0046, "step": 36530 }, { "epoch": 32.85971223021583, "grad_norm": 0.09700344502925873, "learning_rate": 3.629143799683221e-05, "loss": 0.0032, "step": 36540 }, { "epoch": 32.868705035971225, "grad_norm": 0.12311165034770966, "learning_rate": 3.626493824160331e-05, "loss": 0.005, "step": 36550 }, { "epoch": 32.87769784172662, "grad_norm": 0.08934245258569717, "learning_rate": 3.623844265872352e-05, "loss": 0.0019, "step": 36560 }, { "epoch": 32.88669064748201, "grad_norm": 0.1954900324344635, "learning_rate": 3.621195125624149e-05, "loss": 0.0048, "step": 36570 }, { "epoch": 32.89568345323741, "grad_norm": 0.17960253357887268, "learning_rate": 3.618546404220463e-05, "loss": 0.007, "step": 36580 }, { "epoch": 32.90467625899281, "grad_norm": 0.12385857850313187, "learning_rate": 3.615898102465903e-05, "loss": 0.0065, "step": 36590 }, { "epoch": 32.9136690647482, "grad_norm": 0.21512864530086517, "learning_rate": 3.6132502211649544e-05, "loss": 0.005, "step": 36600 }, { "epoch": 32.922661870503596, "grad_norm": 0.1264590173959732, "learning_rate": 3.610602761121975e-05, "loss": 0.0039, "step": 36610 }, { "epoch": 32.931654676258994, "grad_norm": 0.07659988105297089, "learning_rate": 3.6079557231411897e-05, "loss": 0.0051, "step": 36620 }, { "epoch": 32.94064748201439, "grad_norm": 0.11434698104858398, "learning_rate": 3.6053091080267035e-05, "loss": 0.0023, "step": 36630 }, { "epoch": 32.94964028776978, "grad_norm": 0.12732212245464325, "learning_rate": 3.602662916582483e-05, "loss": 0.0026, "step": 36640 }, { "epoch": 32.95863309352518, "grad_norm": 0.10644665360450745, "learning_rate": 3.600017149612375e-05, "loss": 0.0041, "step": 36650 }, { "epoch": 32.96762589928058, "grad_norm": 0.17479240894317627, "learning_rate": 3.5973718079200935e-05, "loss": 0.006, "step": 36660 }, { "epoch": 32.976618705035975, "grad_norm": 0.0650029107928276, "learning_rate": 3.5947268923092216e-05, "loss": 0.0031, "step": 36670 }, { "epoch": 32.985611510791365, "grad_norm": 0.1725095957517624, "learning_rate": 3.592082403583216e-05, "loss": 0.0055, "step": 36680 }, { "epoch": 32.99460431654676, "grad_norm": 0.1963294893503189, "learning_rate": 3.5894383425454004e-05, "loss": 0.0037, "step": 36690 }, { "epoch": 33.00359712230216, "grad_norm": 0.10500229150056839, "learning_rate": 3.586794709998975e-05, "loss": 0.0021, "step": 36700 }, { "epoch": 33.01258992805755, "grad_norm": 0.29265540838241577, "learning_rate": 3.584151506747002e-05, "loss": 0.0049, "step": 36710 }, { "epoch": 33.02158273381295, "grad_norm": 0.22048397362232208, "learning_rate": 3.581508733592418e-05, "loss": 0.0059, "step": 36720 }, { "epoch": 33.030575539568346, "grad_norm": 0.16418114304542542, "learning_rate": 3.5788663913380297e-05, "loss": 0.0036, "step": 36730 }, { "epoch": 33.039568345323744, "grad_norm": 0.12311238050460815, "learning_rate": 3.576224480786506e-05, "loss": 0.009, "step": 36740 }, { "epoch": 33.048561151079134, "grad_norm": 0.1338314563035965, "learning_rate": 3.573583002740393e-05, "loss": 0.0038, "step": 36750 }, { "epoch": 33.05755395683453, "grad_norm": 0.3031538426876068, "learning_rate": 3.570941958002103e-05, "loss": 0.0052, "step": 36760 }, { "epoch": 33.06654676258993, "grad_norm": 0.23231445252895355, "learning_rate": 3.568301347373912e-05, "loss": 0.0035, "step": 36770 }, { "epoch": 33.07553956834533, "grad_norm": 0.08816884458065033, "learning_rate": 3.5656611716579726e-05, "loss": 0.0051, "step": 36780 }, { "epoch": 33.08453237410072, "grad_norm": 0.18309129774570465, "learning_rate": 3.5630214316562946e-05, "loss": 0.0079, "step": 36790 }, { "epoch": 33.093525179856115, "grad_norm": 0.16977478563785553, "learning_rate": 3.560382128170766e-05, "loss": 0.0033, "step": 36800 }, { "epoch": 33.10251798561151, "grad_norm": 0.1170697957277298, "learning_rate": 3.5577432620031374e-05, "loss": 0.0096, "step": 36810 }, { "epoch": 33.111510791366904, "grad_norm": 0.25511860847473145, "learning_rate": 3.5551048339550216e-05, "loss": 0.0051, "step": 36820 }, { "epoch": 33.1205035971223, "grad_norm": 0.16198790073394775, "learning_rate": 3.55246684482791e-05, "loss": 0.0062, "step": 36830 }, { "epoch": 33.1294964028777, "grad_norm": 0.11214792728424072, "learning_rate": 3.5498292954231496e-05, "loss": 0.0038, "step": 36840 }, { "epoch": 33.138489208633096, "grad_norm": 0.196113720536232, "learning_rate": 3.54719218654196e-05, "loss": 0.0046, "step": 36850 }, { "epoch": 33.14748201438849, "grad_norm": 0.14725282788276672, "learning_rate": 3.544555518985425e-05, "loss": 0.0042, "step": 36860 }, { "epoch": 33.156474820143885, "grad_norm": 0.11831767857074738, "learning_rate": 3.541919293554494e-05, "loss": 0.0033, "step": 36870 }, { "epoch": 33.16546762589928, "grad_norm": 0.23657670617103577, "learning_rate": 3.539283511049985e-05, "loss": 0.0058, "step": 36880 }, { "epoch": 33.17446043165467, "grad_norm": 0.16211190819740295, "learning_rate": 3.5366481722725755e-05, "loss": 0.0042, "step": 36890 }, { "epoch": 33.18345323741007, "grad_norm": 0.1427294760942459, "learning_rate": 3.534013278022816e-05, "loss": 0.0043, "step": 36900 }, { "epoch": 33.19244604316547, "grad_norm": 0.2673150599002838, "learning_rate": 3.531378829101113e-05, "loss": 0.0071, "step": 36910 }, { "epoch": 33.201438848920866, "grad_norm": 0.1999257206916809, "learning_rate": 3.528744826307746e-05, "loss": 0.0044, "step": 36920 }, { "epoch": 33.210431654676256, "grad_norm": 0.09665580093860626, "learning_rate": 3.5261112704428554e-05, "loss": 0.0046, "step": 36930 }, { "epoch": 33.219424460431654, "grad_norm": 0.1596236675977707, "learning_rate": 3.523478162306443e-05, "loss": 0.0029, "step": 36940 }, { "epoch": 33.22841726618705, "grad_norm": 0.14895783364772797, "learning_rate": 3.520845502698381e-05, "loss": 0.0025, "step": 36950 }, { "epoch": 33.23741007194245, "grad_norm": 0.21323201060295105, "learning_rate": 3.5182132924184005e-05, "loss": 0.0048, "step": 36960 }, { "epoch": 33.24640287769784, "grad_norm": 0.21958056092262268, "learning_rate": 3.5155815322660966e-05, "loss": 0.0061, "step": 36970 }, { "epoch": 33.25539568345324, "grad_norm": 0.27503564953804016, "learning_rate": 3.512950223040931e-05, "loss": 0.0036, "step": 36980 }, { "epoch": 33.264388489208635, "grad_norm": 0.2415643185377121, "learning_rate": 3.5103193655422216e-05, "loss": 0.0044, "step": 36990 }, { "epoch": 33.273381294964025, "grad_norm": 0.10453824698925018, "learning_rate": 3.5076889605691596e-05, "loss": 0.0057, "step": 37000 }, { "epoch": 33.28237410071942, "grad_norm": 0.22026193141937256, "learning_rate": 3.505059008920787e-05, "loss": 0.0065, "step": 37010 }, { "epoch": 33.29136690647482, "grad_norm": 0.19898436963558197, "learning_rate": 3.502429511396016e-05, "loss": 0.0097, "step": 37020 }, { "epoch": 33.30035971223022, "grad_norm": 0.06737720221281052, "learning_rate": 3.4998004687936196e-05, "loss": 0.0051, "step": 37030 }, { "epoch": 33.30935251798561, "grad_norm": 0.2062465101480484, "learning_rate": 3.497171881912229e-05, "loss": 0.0044, "step": 37040 }, { "epoch": 33.318345323741006, "grad_norm": 0.1247943565249443, "learning_rate": 3.494543751550342e-05, "loss": 0.005, "step": 37050 }, { "epoch": 33.327338129496404, "grad_norm": 0.20005078613758087, "learning_rate": 3.491916078506313e-05, "loss": 0.0066, "step": 37060 }, { "epoch": 33.3363309352518, "grad_norm": 0.23369520902633667, "learning_rate": 3.489288863578361e-05, "loss": 0.0094, "step": 37070 }, { "epoch": 33.34532374100719, "grad_norm": 0.2176157832145691, "learning_rate": 3.4866621075645646e-05, "loss": 0.0076, "step": 37080 }, { "epoch": 33.35431654676259, "grad_norm": 0.2301609367132187, "learning_rate": 3.4840358112628614e-05, "loss": 0.0054, "step": 37090 }, { "epoch": 33.36330935251799, "grad_norm": 0.21561501920223236, "learning_rate": 3.481409975471053e-05, "loss": 0.0035, "step": 37100 }, { "epoch": 33.37230215827338, "grad_norm": 0.1600227802991867, "learning_rate": 3.4787846009867986e-05, "loss": 0.0075, "step": 37110 }, { "epoch": 33.381294964028775, "grad_norm": 0.24332144856452942, "learning_rate": 3.476159688607615e-05, "loss": 0.0062, "step": 37120 }, { "epoch": 33.39028776978417, "grad_norm": 0.219079852104187, "learning_rate": 3.4735352391308854e-05, "loss": 0.0036, "step": 37130 }, { "epoch": 33.39928057553957, "grad_norm": 0.12785470485687256, "learning_rate": 3.4709112533538446e-05, "loss": 0.006, "step": 37140 }, { "epoch": 33.40827338129496, "grad_norm": 0.21322530508041382, "learning_rate": 3.4682877320735934e-05, "loss": 0.0026, "step": 37150 }, { "epoch": 33.41726618705036, "grad_norm": 0.151295006275177, "learning_rate": 3.465664676087085e-05, "loss": 0.0069, "step": 37160 }, { "epoch": 33.42625899280576, "grad_norm": 0.18605384230613708, "learning_rate": 3.463042086191136e-05, "loss": 0.0051, "step": 37170 }, { "epoch": 33.435251798561154, "grad_norm": 0.2128640115261078, "learning_rate": 3.460419963182423e-05, "loss": 0.0056, "step": 37180 }, { "epoch": 33.444244604316545, "grad_norm": 0.09641103446483612, "learning_rate": 3.457798307857473e-05, "loss": 0.0046, "step": 37190 }, { "epoch": 33.45323741007194, "grad_norm": 0.27586206793785095, "learning_rate": 3.455177121012678e-05, "loss": 0.0063, "step": 37200 }, { "epoch": 33.46223021582734, "grad_norm": 0.23726466298103333, "learning_rate": 3.452556403444285e-05, "loss": 0.0058, "step": 37210 }, { "epoch": 33.47122302158273, "grad_norm": 0.2953222393989563, "learning_rate": 3.4499361559483975e-05, "loss": 0.0058, "step": 37220 }, { "epoch": 33.48021582733813, "grad_norm": 0.13715006411075592, "learning_rate": 3.44731637932098e-05, "loss": 0.0031, "step": 37230 }, { "epoch": 33.489208633093526, "grad_norm": 0.14177265763282776, "learning_rate": 3.44469707435785e-05, "loss": 0.0042, "step": 37240 }, { "epoch": 33.49820143884892, "grad_norm": 0.1922750025987625, "learning_rate": 3.4420782418546835e-05, "loss": 0.005, "step": 37250 }, { "epoch": 33.507194244604314, "grad_norm": 0.13341115415096283, "learning_rate": 3.439459882607012e-05, "loss": 0.0041, "step": 37260 }, { "epoch": 33.51618705035971, "grad_norm": 0.19851651787757874, "learning_rate": 3.436841997410225e-05, "loss": 0.0031, "step": 37270 }, { "epoch": 33.52517985611511, "grad_norm": 0.0716819241642952, "learning_rate": 3.434224587059567e-05, "loss": 0.0035, "step": 37280 }, { "epoch": 33.53417266187051, "grad_norm": 0.08560550212860107, "learning_rate": 3.431607652350136e-05, "loss": 0.0039, "step": 37290 }, { "epoch": 33.5431654676259, "grad_norm": 0.09974011033773422, "learning_rate": 3.428991194076891e-05, "loss": 0.0033, "step": 37300 }, { "epoch": 33.552158273381295, "grad_norm": 0.1484803706407547, "learning_rate": 3.4263752130346394e-05, "loss": 0.0046, "step": 37310 }, { "epoch": 33.56115107913669, "grad_norm": 0.15278036892414093, "learning_rate": 3.4237597100180515e-05, "loss": 0.004, "step": 37320 }, { "epoch": 33.57014388489208, "grad_norm": 0.17956431210041046, "learning_rate": 3.4211446858216427e-05, "loss": 0.0076, "step": 37330 }, { "epoch": 33.57913669064748, "grad_norm": 0.24746179580688477, "learning_rate": 3.4185301412397915e-05, "loss": 0.0138, "step": 37340 }, { "epoch": 33.58812949640288, "grad_norm": 0.1291646510362625, "learning_rate": 3.415916077066729e-05, "loss": 0.0071, "step": 37350 }, { "epoch": 33.597122302158276, "grad_norm": 0.08161700516939163, "learning_rate": 3.413302494096535e-05, "loss": 0.0026, "step": 37360 }, { "epoch": 33.606115107913666, "grad_norm": 0.09630385786294937, "learning_rate": 3.410689393123151e-05, "loss": 0.0044, "step": 37370 }, { "epoch": 33.615107913669064, "grad_norm": 0.1761893928050995, "learning_rate": 3.408076774940364e-05, "loss": 0.0046, "step": 37380 }, { "epoch": 33.62410071942446, "grad_norm": 0.1765233725309372, "learning_rate": 3.40546464034182e-05, "loss": 0.0052, "step": 37390 }, { "epoch": 33.63309352517986, "grad_norm": 0.10150289535522461, "learning_rate": 3.4028529901210185e-05, "loss": 0.0088, "step": 37400 }, { "epoch": 33.64208633093525, "grad_norm": 0.12387221306562424, "learning_rate": 3.4002418250713086e-05, "loss": 0.0062, "step": 37410 }, { "epoch": 33.65107913669065, "grad_norm": 0.15967847406864166, "learning_rate": 3.3976311459858936e-05, "loss": 0.0035, "step": 37420 }, { "epoch": 33.660071942446045, "grad_norm": 0.25506964325904846, "learning_rate": 3.395020953657826e-05, "loss": 0.0041, "step": 37430 }, { "epoch": 33.669064748201436, "grad_norm": 0.10840175300836563, "learning_rate": 3.3924112488800165e-05, "loss": 0.0036, "step": 37440 }, { "epoch": 33.67805755395683, "grad_norm": 0.14196108281612396, "learning_rate": 3.389802032445225e-05, "loss": 0.0036, "step": 37450 }, { "epoch": 33.68705035971223, "grad_norm": 0.1298968493938446, "learning_rate": 3.38719330514606e-05, "loss": 0.0045, "step": 37460 }, { "epoch": 33.69604316546763, "grad_norm": 0.1798701137304306, "learning_rate": 3.3845850677749866e-05, "loss": 0.0046, "step": 37470 }, { "epoch": 33.70503597122302, "grad_norm": 0.17455966770648956, "learning_rate": 3.3819773211243157e-05, "loss": 0.0049, "step": 37480 }, { "epoch": 33.71402877697842, "grad_norm": 0.16496966779232025, "learning_rate": 3.379370065986213e-05, "loss": 0.0038, "step": 37490 }, { "epoch": 33.723021582733814, "grad_norm": 0.08784850686788559, "learning_rate": 3.3767633031526955e-05, "loss": 0.0043, "step": 37500 }, { "epoch": 33.73201438848921, "grad_norm": 0.08804232627153397, "learning_rate": 3.374157033415626e-05, "loss": 0.004, "step": 37510 }, { "epoch": 33.7410071942446, "grad_norm": 0.15492995083332062, "learning_rate": 3.371551257566723e-05, "loss": 0.0044, "step": 37520 }, { "epoch": 33.75, "grad_norm": 0.09433223307132721, "learning_rate": 3.36894597639755e-05, "loss": 0.0035, "step": 37530 }, { "epoch": 33.7589928057554, "grad_norm": 0.2176722288131714, "learning_rate": 3.366341190699523e-05, "loss": 0.0046, "step": 37540 }, { "epoch": 33.76798561151079, "grad_norm": 0.41197168827056885, "learning_rate": 3.36373690126391e-05, "loss": 0.0039, "step": 37550 }, { "epoch": 33.776978417266186, "grad_norm": 0.36237579584121704, "learning_rate": 3.3611331088818234e-05, "loss": 0.0046, "step": 37560 }, { "epoch": 33.78597122302158, "grad_norm": 0.1064172089099884, "learning_rate": 3.3585298143442265e-05, "loss": 0.005, "step": 37570 }, { "epoch": 33.79496402877698, "grad_norm": 0.1269969344139099, "learning_rate": 3.35592701844193e-05, "loss": 0.0051, "step": 37580 }, { "epoch": 33.80395683453237, "grad_norm": 0.11455461382865906, "learning_rate": 3.353324721965596e-05, "loss": 0.0077, "step": 37590 }, { "epoch": 33.81294964028777, "grad_norm": 0.20038601756095886, "learning_rate": 3.350722925705736e-05, "loss": 0.0035, "step": 37600 }, { "epoch": 33.82194244604317, "grad_norm": 0.17068906128406525, "learning_rate": 3.348121630452703e-05, "loss": 0.0064, "step": 37610 }, { "epoch": 33.830935251798564, "grad_norm": 0.1107720211148262, "learning_rate": 3.3455208369967044e-05, "loss": 0.0036, "step": 37620 }, { "epoch": 33.839928057553955, "grad_norm": 0.21371708810329437, "learning_rate": 3.34292054612779e-05, "loss": 0.0041, "step": 37630 }, { "epoch": 33.84892086330935, "grad_norm": 0.1670709252357483, "learning_rate": 3.340320758635861e-05, "loss": 0.0053, "step": 37640 }, { "epoch": 33.85791366906475, "grad_norm": 0.20488668978214264, "learning_rate": 3.337721475310666e-05, "loss": 0.0041, "step": 37650 }, { "epoch": 33.86690647482014, "grad_norm": 0.13036814332008362, "learning_rate": 3.335122696941795e-05, "loss": 0.0034, "step": 37660 }, { "epoch": 33.87589928057554, "grad_norm": 0.24506349861621857, "learning_rate": 3.332524424318692e-05, "loss": 0.0039, "step": 37670 }, { "epoch": 33.884892086330936, "grad_norm": 0.10111220180988312, "learning_rate": 3.32992665823064e-05, "loss": 0.0029, "step": 37680 }, { "epoch": 33.893884892086334, "grad_norm": 0.18944087624549866, "learning_rate": 3.327329399466774e-05, "loss": 0.0053, "step": 37690 }, { "epoch": 33.902877697841724, "grad_norm": 0.1792856752872467, "learning_rate": 3.324732648816072e-05, "loss": 0.0045, "step": 37700 }, { "epoch": 33.91187050359712, "grad_norm": 0.17404904961585999, "learning_rate": 3.322136407067358e-05, "loss": 0.0032, "step": 37710 }, { "epoch": 33.92086330935252, "grad_norm": 0.1282176375389099, "learning_rate": 3.3195406750093036e-05, "loss": 0.0041, "step": 37720 }, { "epoch": 33.92985611510792, "grad_norm": 0.15101295709609985, "learning_rate": 3.3169454534304205e-05, "loss": 0.0037, "step": 37730 }, { "epoch": 33.93884892086331, "grad_norm": 0.23381556570529938, "learning_rate": 3.3143507431190725e-05, "loss": 0.0049, "step": 37740 }, { "epoch": 33.947841726618705, "grad_norm": 0.22737225890159607, "learning_rate": 3.311756544863459e-05, "loss": 0.0029, "step": 37750 }, { "epoch": 33.9568345323741, "grad_norm": 0.15858609974384308, "learning_rate": 3.309162859451633e-05, "loss": 0.007, "step": 37760 }, { "epoch": 33.96582733812949, "grad_norm": 0.11234317719936371, "learning_rate": 3.306569687671487e-05, "loss": 0.0031, "step": 37770 }, { "epoch": 33.97482014388489, "grad_norm": 0.19762277603149414, "learning_rate": 3.303977030310756e-05, "loss": 0.0045, "step": 37780 }, { "epoch": 33.98381294964029, "grad_norm": 0.20248813927173615, "learning_rate": 3.3013848881570245e-05, "loss": 0.003, "step": 37790 }, { "epoch": 33.992805755395686, "grad_norm": 0.1403762400150299, "learning_rate": 3.298793261997712e-05, "loss": 0.0089, "step": 37800 }, { "epoch": 34.00179856115108, "grad_norm": 0.17663443088531494, "learning_rate": 3.2962021526200893e-05, "loss": 0.0032, "step": 37810 }, { "epoch": 34.010791366906474, "grad_norm": 0.115911103785038, "learning_rate": 3.293611560811268e-05, "loss": 0.0038, "step": 37820 }, { "epoch": 34.01978417266187, "grad_norm": 0.228347048163414, "learning_rate": 3.291021487358199e-05, "loss": 0.006, "step": 37830 }, { "epoch": 34.02877697841727, "grad_norm": 0.24011218547821045, "learning_rate": 3.28843193304768e-05, "loss": 0.007, "step": 37840 }, { "epoch": 34.03776978417266, "grad_norm": 0.15925508737564087, "learning_rate": 3.2858428986663456e-05, "loss": 0.006, "step": 37850 }, { "epoch": 34.04676258992806, "grad_norm": 0.09396004676818848, "learning_rate": 3.283254385000681e-05, "loss": 0.0032, "step": 37860 }, { "epoch": 34.055755395683455, "grad_norm": 0.10681619495153427, "learning_rate": 3.2806663928370076e-05, "loss": 0.0045, "step": 37870 }, { "epoch": 34.064748201438846, "grad_norm": 0.12467043846845627, "learning_rate": 3.278078922961485e-05, "loss": 0.0033, "step": 37880 }, { "epoch": 34.07374100719424, "grad_norm": 0.14112569391727448, "learning_rate": 3.275491976160123e-05, "loss": 0.0037, "step": 37890 }, { "epoch": 34.08273381294964, "grad_norm": 0.13652342557907104, "learning_rate": 3.2729055532187645e-05, "loss": 0.0032, "step": 37900 }, { "epoch": 34.09172661870504, "grad_norm": 0.284821093082428, "learning_rate": 3.270319654923097e-05, "loss": 0.0059, "step": 37910 }, { "epoch": 34.10071942446043, "grad_norm": 0.22463257610797882, "learning_rate": 3.2677342820586506e-05, "loss": 0.0046, "step": 37920 }, { "epoch": 34.10971223021583, "grad_norm": 0.3111638128757477, "learning_rate": 3.2651494354107905e-05, "loss": 0.0057, "step": 37930 }, { "epoch": 34.118705035971225, "grad_norm": 0.10769092291593552, "learning_rate": 3.2625651157647266e-05, "loss": 0.0029, "step": 37940 }, { "epoch": 34.12769784172662, "grad_norm": 0.12183712422847748, "learning_rate": 3.259981323905505e-05, "loss": 0.004, "step": 37950 }, { "epoch": 34.13669064748201, "grad_norm": 0.12870433926582336, "learning_rate": 3.257398060618014e-05, "loss": 0.0038, "step": 37960 }, { "epoch": 34.14568345323741, "grad_norm": 0.1530502885580063, "learning_rate": 3.254815326686983e-05, "loss": 0.0028, "step": 37970 }, { "epoch": 34.15467625899281, "grad_norm": 0.11057855933904648, "learning_rate": 3.2522331228969774e-05, "loss": 0.0022, "step": 37980 }, { "epoch": 34.1636690647482, "grad_norm": 0.19972148537635803, "learning_rate": 3.2496514500324006e-05, "loss": 0.0042, "step": 37990 }, { "epoch": 34.172661870503596, "grad_norm": 0.07257142663002014, "learning_rate": 3.247070308877498e-05, "loss": 0.002, "step": 38000 }, { "epoch": 34.181654676258994, "grad_norm": 0.07158955931663513, "learning_rate": 3.2444897002163515e-05, "loss": 0.0037, "step": 38010 }, { "epoch": 34.19064748201439, "grad_norm": 0.15696054697036743, "learning_rate": 3.241909624832885e-05, "loss": 0.0033, "step": 38020 }, { "epoch": 34.19964028776978, "grad_norm": 0.21472108364105225, "learning_rate": 3.239330083510852e-05, "loss": 0.0056, "step": 38030 }, { "epoch": 34.20863309352518, "grad_norm": 0.16033326089382172, "learning_rate": 3.236751077033855e-05, "loss": 0.0034, "step": 38040 }, { "epoch": 34.21762589928058, "grad_norm": 0.4118726849555969, "learning_rate": 3.234172606185322e-05, "loss": 0.0048, "step": 38050 }, { "epoch": 34.226618705035975, "grad_norm": 0.24701836705207825, "learning_rate": 3.231594671748528e-05, "loss": 0.0043, "step": 38060 }, { "epoch": 34.235611510791365, "grad_norm": 0.18665902316570282, "learning_rate": 3.2290172745065815e-05, "loss": 0.0028, "step": 38070 }, { "epoch": 34.24460431654676, "grad_norm": 0.23415477573871613, "learning_rate": 3.226440415242426e-05, "loss": 0.0037, "step": 38080 }, { "epoch": 34.25359712230216, "grad_norm": 0.12204888463020325, "learning_rate": 3.223864094738846e-05, "loss": 0.0036, "step": 38090 }, { "epoch": 34.26258992805755, "grad_norm": 0.11155501753091812, "learning_rate": 3.221288313778456e-05, "loss": 0.0028, "step": 38100 }, { "epoch": 34.27158273381295, "grad_norm": 0.05928825959563255, "learning_rate": 3.2187130731437125e-05, "loss": 0.0024, "step": 38110 }, { "epoch": 34.280575539568346, "grad_norm": 0.0928456112742424, "learning_rate": 3.216138373616905e-05, "loss": 0.0045, "step": 38120 }, { "epoch": 34.289568345323744, "grad_norm": 0.08512266725301743, "learning_rate": 3.21356421598016e-05, "loss": 0.0033, "step": 38130 }, { "epoch": 34.298561151079134, "grad_norm": 0.11452629417181015, "learning_rate": 3.210990601015438e-05, "loss": 0.0035, "step": 38140 }, { "epoch": 34.30755395683453, "grad_norm": 0.10861005634069443, "learning_rate": 3.208417529504535e-05, "loss": 0.0029, "step": 38150 }, { "epoch": 34.31654676258993, "grad_norm": 0.2703680098056793, "learning_rate": 3.205845002229084e-05, "loss": 0.0045, "step": 38160 }, { "epoch": 34.32553956834532, "grad_norm": 0.2591997981071472, "learning_rate": 3.203273019970547e-05, "loss": 0.0033, "step": 38170 }, { "epoch": 34.33453237410072, "grad_norm": 0.22908475995063782, "learning_rate": 3.200701583510227e-05, "loss": 0.0049, "step": 38180 }, { "epoch": 34.343525179856115, "grad_norm": 0.10898562520742416, "learning_rate": 3.198130693629261e-05, "loss": 0.0039, "step": 38190 }, { "epoch": 34.35251798561151, "grad_norm": 0.15563371777534485, "learning_rate": 3.195560351108612e-05, "loss": 0.0042, "step": 38200 }, { "epoch": 34.361510791366904, "grad_norm": 0.17078611254692078, "learning_rate": 3.1929905567290865e-05, "loss": 0.0035, "step": 38210 }, { "epoch": 34.3705035971223, "grad_norm": 0.10529383271932602, "learning_rate": 3.1904213112713164e-05, "loss": 0.0032, "step": 38220 }, { "epoch": 34.3794964028777, "grad_norm": 0.09051695466041565, "learning_rate": 3.187852615515774e-05, "loss": 0.005, "step": 38230 }, { "epoch": 34.388489208633096, "grad_norm": 0.17467322945594788, "learning_rate": 3.1852844702427606e-05, "loss": 0.0024, "step": 38240 }, { "epoch": 34.39748201438849, "grad_norm": 0.14742551743984222, "learning_rate": 3.18271687623241e-05, "loss": 0.0041, "step": 38250 }, { "epoch": 34.406474820143885, "grad_norm": 0.14003145694732666, "learning_rate": 3.1801498342646896e-05, "loss": 0.0059, "step": 38260 }, { "epoch": 34.41546762589928, "grad_norm": 0.20283710956573486, "learning_rate": 3.177583345119398e-05, "loss": 0.0052, "step": 38270 }, { "epoch": 34.42446043165468, "grad_norm": 0.20947787165641785, "learning_rate": 3.17501740957617e-05, "loss": 0.0028, "step": 38280 }, { "epoch": 34.43345323741007, "grad_norm": 0.17237792909145355, "learning_rate": 3.172452028414467e-05, "loss": 0.0043, "step": 38290 }, { "epoch": 34.44244604316547, "grad_norm": 0.19963760673999786, "learning_rate": 3.169887202413583e-05, "loss": 0.004, "step": 38300 }, { "epoch": 34.451438848920866, "grad_norm": 0.20170964300632477, "learning_rate": 3.167322932352646e-05, "loss": 0.0044, "step": 38310 }, { "epoch": 34.460431654676256, "grad_norm": 0.09476880729198456, "learning_rate": 3.164759219010613e-05, "loss": 0.0055, "step": 38320 }, { "epoch": 34.469424460431654, "grad_norm": 0.18390223383903503, "learning_rate": 3.1621960631662725e-05, "loss": 0.0039, "step": 38330 }, { "epoch": 34.47841726618705, "grad_norm": 0.34819233417510986, "learning_rate": 3.159633465598245e-05, "loss": 0.0072, "step": 38340 }, { "epoch": 34.48741007194245, "grad_norm": 0.1661214381456375, "learning_rate": 3.1570714270849767e-05, "loss": 0.0031, "step": 38350 }, { "epoch": 34.49640287769784, "grad_norm": 0.19617757201194763, "learning_rate": 3.1545099484047516e-05, "loss": 0.0036, "step": 38360 }, { "epoch": 34.50539568345324, "grad_norm": 0.20980092883110046, "learning_rate": 3.151949030335674e-05, "loss": 0.003, "step": 38370 }, { "epoch": 34.514388489208635, "grad_norm": 0.09431736916303635, "learning_rate": 3.149388673655687e-05, "loss": 0.005, "step": 38380 }, { "epoch": 34.523381294964025, "grad_norm": 0.3126133978366852, "learning_rate": 3.146828879142559e-05, "loss": 0.0047, "step": 38390 }, { "epoch": 34.53237410071942, "grad_norm": 0.1200261190533638, "learning_rate": 3.1442696475738866e-05, "loss": 0.0029, "step": 38400 }, { "epoch": 34.54136690647482, "grad_norm": 0.19648364186286926, "learning_rate": 3.141710979727098e-05, "loss": 0.0028, "step": 38410 }, { "epoch": 34.55035971223022, "grad_norm": 0.1314784735441208, "learning_rate": 3.139152876379447e-05, "loss": 0.0038, "step": 38420 }, { "epoch": 34.55935251798561, "grad_norm": 0.1743752360343933, "learning_rate": 3.1365953383080214e-05, "loss": 0.0035, "step": 38430 }, { "epoch": 34.568345323741006, "grad_norm": 0.17297261953353882, "learning_rate": 3.134038366289731e-05, "loss": 0.0032, "step": 38440 }, { "epoch": 34.577338129496404, "grad_norm": 0.1646587997674942, "learning_rate": 3.131481961101317e-05, "loss": 0.0067, "step": 38450 }, { "epoch": 34.5863309352518, "grad_norm": 0.15253634750843048, "learning_rate": 3.128926123519349e-05, "loss": 0.0038, "step": 38460 }, { "epoch": 34.59532374100719, "grad_norm": 0.12988664209842682, "learning_rate": 3.1263708543202194e-05, "loss": 0.0023, "step": 38470 }, { "epoch": 34.60431654676259, "grad_norm": 0.17446663975715637, "learning_rate": 3.123816154280155e-05, "loss": 0.0053, "step": 38480 }, { "epoch": 34.61330935251799, "grad_norm": 0.20983833074569702, "learning_rate": 3.121262024175207e-05, "loss": 0.0052, "step": 38490 }, { "epoch": 34.62230215827338, "grad_norm": 0.19906967878341675, "learning_rate": 3.118708464781248e-05, "loss": 0.0048, "step": 38500 }, { "epoch": 34.631294964028775, "grad_norm": 0.18200430274009705, "learning_rate": 3.116155476873987e-05, "loss": 0.0038, "step": 38510 }, { "epoch": 34.64028776978417, "grad_norm": 0.09832162410020828, "learning_rate": 3.11360306122895e-05, "loss": 0.0039, "step": 38520 }, { "epoch": 34.64928057553957, "grad_norm": 0.17530140280723572, "learning_rate": 3.1110512186214975e-05, "loss": 0.0032, "step": 38530 }, { "epoch": 34.65827338129496, "grad_norm": 0.0850103348493576, "learning_rate": 3.1084999498268095e-05, "loss": 0.0035, "step": 38540 }, { "epoch": 34.66726618705036, "grad_norm": 0.2977253496646881, "learning_rate": 3.1059492556198934e-05, "loss": 0.0051, "step": 38550 }, { "epoch": 34.67625899280576, "grad_norm": 0.21427787840366364, "learning_rate": 3.103399136775586e-05, "loss": 0.0055, "step": 38560 }, { "epoch": 34.685251798561154, "grad_norm": 0.16023306548595428, "learning_rate": 3.100849594068541e-05, "loss": 0.005, "step": 38570 }, { "epoch": 34.694244604316545, "grad_norm": 0.19032737612724304, "learning_rate": 3.0983006282732484e-05, "loss": 0.0035, "step": 38580 }, { "epoch": 34.70323741007194, "grad_norm": 0.13448698818683624, "learning_rate": 3.0957522401640116e-05, "loss": 0.0042, "step": 38590 }, { "epoch": 34.71223021582734, "grad_norm": 0.10707543790340424, "learning_rate": 3.0932044305149645e-05, "loss": 0.0028, "step": 38600 }, { "epoch": 34.72122302158273, "grad_norm": 0.11620192229747772, "learning_rate": 3.090657200100068e-05, "loss": 0.003, "step": 38610 }, { "epoch": 34.73021582733813, "grad_norm": 0.1134931668639183, "learning_rate": 3.088110549693099e-05, "loss": 0.0044, "step": 38620 }, { "epoch": 34.739208633093526, "grad_norm": 0.10843891650438309, "learning_rate": 3.085564480067667e-05, "loss": 0.0034, "step": 38630 }, { "epoch": 34.74820143884892, "grad_norm": 0.06669732928276062, "learning_rate": 3.0830189919971955e-05, "loss": 0.0028, "step": 38640 }, { "epoch": 34.757194244604314, "grad_norm": 0.3136693239212036, "learning_rate": 3.080474086254939e-05, "loss": 0.0056, "step": 38650 }, { "epoch": 34.76618705035971, "grad_norm": 0.1312250941991806, "learning_rate": 3.077929763613975e-05, "loss": 0.0041, "step": 38660 }, { "epoch": 34.77517985611511, "grad_norm": 0.1090632900595665, "learning_rate": 3.075386024847198e-05, "loss": 0.0058, "step": 38670 }, { "epoch": 34.78417266187051, "grad_norm": 0.1137181967496872, "learning_rate": 3.072842870727331e-05, "loss": 0.0035, "step": 38680 }, { "epoch": 34.7931654676259, "grad_norm": 0.17820729315280914, "learning_rate": 3.070300302026916e-05, "loss": 0.0033, "step": 38690 }, { "epoch": 34.802158273381295, "grad_norm": 0.23725415766239166, "learning_rate": 3.067758319518318e-05, "loss": 0.0053, "step": 38700 }, { "epoch": 34.81115107913669, "grad_norm": 0.19021913409233093, "learning_rate": 3.065216923973725e-05, "loss": 0.0034, "step": 38710 }, { "epoch": 34.82014388489208, "grad_norm": 0.1813845932483673, "learning_rate": 3.062676116165145e-05, "loss": 0.0029, "step": 38720 }, { "epoch": 34.82913669064748, "grad_norm": 0.23024357855319977, "learning_rate": 3.06013589686441e-05, "loss": 0.0035, "step": 38730 }, { "epoch": 34.83812949640288, "grad_norm": 0.13573256134986877, "learning_rate": 3.05759626684317e-05, "loss": 0.0041, "step": 38740 }, { "epoch": 34.847122302158276, "grad_norm": 0.08586493134498596, "learning_rate": 3.055057226872896e-05, "loss": 0.0036, "step": 38750 }, { "epoch": 34.856115107913666, "grad_norm": 0.1490040123462677, "learning_rate": 3.052518777724887e-05, "loss": 0.0043, "step": 38760 }, { "epoch": 34.865107913669064, "grad_norm": 0.13285507261753082, "learning_rate": 3.04998092017025e-05, "loss": 0.0027, "step": 38770 }, { "epoch": 34.87410071942446, "grad_norm": 0.21083824336528778, "learning_rate": 3.0474436549799246e-05, "loss": 0.0063, "step": 38780 }, { "epoch": 34.88309352517986, "grad_norm": 0.20102867484092712, "learning_rate": 3.044906982924661e-05, "loss": 0.0037, "step": 38790 }, { "epoch": 34.89208633093525, "grad_norm": 0.14817282557487488, "learning_rate": 3.0423709047750337e-05, "loss": 0.003, "step": 38800 }, { "epoch": 34.90107913669065, "grad_norm": 0.16349519789218903, "learning_rate": 3.03983542130144e-05, "loss": 0.0064, "step": 38810 }, { "epoch": 34.910071942446045, "grad_norm": 0.2261408418416977, "learning_rate": 3.0373005332740877e-05, "loss": 0.0072, "step": 38820 }, { "epoch": 34.919064748201436, "grad_norm": 0.1393553912639618, "learning_rate": 3.034766241463013e-05, "loss": 0.0034, "step": 38830 }, { "epoch": 34.92805755395683, "grad_norm": 0.12008748203516006, "learning_rate": 3.032232546638064e-05, "loss": 0.0028, "step": 38840 }, { "epoch": 34.93705035971223, "grad_norm": 0.11644865572452545, "learning_rate": 3.0296994495689114e-05, "loss": 0.0034, "step": 38850 }, { "epoch": 34.94604316546763, "grad_norm": 0.14924943447113037, "learning_rate": 3.0271669510250444e-05, "loss": 0.0036, "step": 38860 }, { "epoch": 34.95503597122302, "grad_norm": 0.12548232078552246, "learning_rate": 3.024635051775766e-05, "loss": 0.0031, "step": 38870 }, { "epoch": 34.96402877697842, "grad_norm": 0.15454256534576416, "learning_rate": 3.022103752590205e-05, "loss": 0.0059, "step": 38880 }, { "epoch": 34.973021582733814, "grad_norm": 0.20730049908161163, "learning_rate": 3.0195730542372992e-05, "loss": 0.0046, "step": 38890 }, { "epoch": 34.98201438848921, "grad_norm": 0.10378464311361313, "learning_rate": 3.0170429574858084e-05, "loss": 0.0029, "step": 38900 }, { "epoch": 34.9910071942446, "grad_norm": 0.0854877457022667, "learning_rate": 3.0145134631043127e-05, "loss": 0.0032, "step": 38910 }, { "epoch": 35.0, "grad_norm": 0.10604052245616913, "learning_rate": 3.0119845718612018e-05, "loss": 0.0033, "step": 38920 }, { "epoch": 35.0089928057554, "grad_norm": 0.25867629051208496, "learning_rate": 3.009456284524688e-05, "loss": 0.0048, "step": 38930 }, { "epoch": 35.01798561151079, "grad_norm": 0.09533368796110153, "learning_rate": 3.0069286018627967e-05, "loss": 0.0048, "step": 38940 }, { "epoch": 35.026978417266186, "grad_norm": 0.1863458752632141, "learning_rate": 3.0044015246433743e-05, "loss": 0.0076, "step": 38950 }, { "epoch": 35.03597122302158, "grad_norm": 0.13841129839420319, "learning_rate": 3.0018750536340755e-05, "loss": 0.006, "step": 38960 }, { "epoch": 35.04496402877698, "grad_norm": 0.17299026250839233, "learning_rate": 2.999349189602378e-05, "loss": 0.0091, "step": 38970 }, { "epoch": 35.05395683453237, "grad_norm": 0.1924801617860794, "learning_rate": 2.9968239333155733e-05, "loss": 0.0042, "step": 38980 }, { "epoch": 35.06294964028777, "grad_norm": 0.08179179579019547, "learning_rate": 2.994299285540767e-05, "loss": 0.0043, "step": 38990 }, { "epoch": 35.07194244604317, "grad_norm": 0.18786095082759857, "learning_rate": 2.9917752470448813e-05, "loss": 0.0046, "step": 39000 }, { "epoch": 35.080935251798564, "grad_norm": 0.15072616934776306, "learning_rate": 2.9892518185946495e-05, "loss": 0.004, "step": 39010 }, { "epoch": 35.089928057553955, "grad_norm": 0.12105061113834381, "learning_rate": 2.986729000956624e-05, "loss": 0.0065, "step": 39020 }, { "epoch": 35.09892086330935, "grad_norm": 0.11244203895330429, "learning_rate": 2.9842067948971736e-05, "loss": 0.004, "step": 39030 }, { "epoch": 35.10791366906475, "grad_norm": 0.10027113556861877, "learning_rate": 2.9816852011824727e-05, "loss": 0.0032, "step": 39040 }, { "epoch": 35.11690647482014, "grad_norm": 0.07554669678211212, "learning_rate": 2.979164220578519e-05, "loss": 0.0049, "step": 39050 }, { "epoch": 35.12589928057554, "grad_norm": 0.1098678782582283, "learning_rate": 2.9766438538511165e-05, "loss": 0.0054, "step": 39060 }, { "epoch": 35.134892086330936, "grad_norm": 0.2439589947462082, "learning_rate": 2.9741241017658873e-05, "loss": 0.0039, "step": 39070 }, { "epoch": 35.143884892086334, "grad_norm": 0.1797848790884018, "learning_rate": 2.971604965088267e-05, "loss": 0.0039, "step": 39080 }, { "epoch": 35.152877697841724, "grad_norm": 0.1969202607870102, "learning_rate": 2.9690864445835008e-05, "loss": 0.0043, "step": 39090 }, { "epoch": 35.16187050359712, "grad_norm": 0.18817929923534393, "learning_rate": 2.966568541016651e-05, "loss": 0.0108, "step": 39100 }, { "epoch": 35.17086330935252, "grad_norm": 0.22528798878192902, "learning_rate": 2.9640512551525867e-05, "loss": 0.0081, "step": 39110 }, { "epoch": 35.17985611510792, "grad_norm": 0.43996375799179077, "learning_rate": 2.961534587755995e-05, "loss": 0.0051, "step": 39120 }, { "epoch": 35.18884892086331, "grad_norm": 0.2051335722208023, "learning_rate": 2.959018539591375e-05, "loss": 0.0059, "step": 39130 }, { "epoch": 35.197841726618705, "grad_norm": 0.15115173161029816, "learning_rate": 2.9565031114230325e-05, "loss": 0.0069, "step": 39140 }, { "epoch": 35.2068345323741, "grad_norm": 0.36340925097465515, "learning_rate": 2.9539883040150895e-05, "loss": 0.007, "step": 39150 }, { "epoch": 35.21582733812949, "grad_norm": 0.1370088756084442, "learning_rate": 2.9514741181314774e-05, "loss": 0.004, "step": 39160 }, { "epoch": 35.22482014388489, "grad_norm": 0.19935037195682526, "learning_rate": 2.94896055453594e-05, "loss": 0.0021, "step": 39170 }, { "epoch": 35.23381294964029, "grad_norm": 0.18802322447299957, "learning_rate": 2.9464476139920332e-05, "loss": 0.0043, "step": 39180 }, { "epoch": 35.242805755395686, "grad_norm": 0.14478494226932526, "learning_rate": 2.9439352972631186e-05, "loss": 0.0024, "step": 39190 }, { "epoch": 35.25179856115108, "grad_norm": 0.152360200881958, "learning_rate": 2.9414236051123757e-05, "loss": 0.0051, "step": 39200 }, { "epoch": 35.260791366906474, "grad_norm": 0.1169886663556099, "learning_rate": 2.938912538302785e-05, "loss": 0.0032, "step": 39210 }, { "epoch": 35.26978417266187, "grad_norm": 0.11555251479148865, "learning_rate": 2.9364020975971464e-05, "loss": 0.0026, "step": 39220 }, { "epoch": 35.27877697841727, "grad_norm": 0.10666317492723465, "learning_rate": 2.9338922837580657e-05, "loss": 0.0035, "step": 39230 }, { "epoch": 35.28776978417266, "grad_norm": 0.14134226739406586, "learning_rate": 2.931383097547955e-05, "loss": 0.0037, "step": 39240 }, { "epoch": 35.29676258992806, "grad_norm": 0.23393723368644714, "learning_rate": 2.928874539729043e-05, "loss": 0.0046, "step": 39250 }, { "epoch": 35.305755395683455, "grad_norm": 0.19039222598075867, "learning_rate": 2.926366611063358e-05, "loss": 0.0026, "step": 39260 }, { "epoch": 35.314748201438846, "grad_norm": 0.10079418122768402, "learning_rate": 2.9238593123127463e-05, "loss": 0.0039, "step": 39270 }, { "epoch": 35.32374100719424, "grad_norm": 0.07172298431396484, "learning_rate": 2.9213526442388583e-05, "loss": 0.0024, "step": 39280 }, { "epoch": 35.33273381294964, "grad_norm": 0.11615845561027527, "learning_rate": 2.9188466076031545e-05, "loss": 0.0023, "step": 39290 }, { "epoch": 35.34172661870504, "grad_norm": 0.12487702816724777, "learning_rate": 2.9163412031669012e-05, "loss": 0.0033, "step": 39300 }, { "epoch": 35.35071942446043, "grad_norm": 0.11724191159009933, "learning_rate": 2.913836431691175e-05, "loss": 0.0049, "step": 39310 }, { "epoch": 35.35971223021583, "grad_norm": 0.1494453251361847, "learning_rate": 2.9113322939368583e-05, "loss": 0.0046, "step": 39320 }, { "epoch": 35.368705035971225, "grad_norm": 0.20263591408729553, "learning_rate": 2.9088287906646427e-05, "loss": 0.0054, "step": 39330 }, { "epoch": 35.37769784172662, "grad_norm": 0.3923395574092865, "learning_rate": 2.906325922635024e-05, "loss": 0.0057, "step": 39340 }, { "epoch": 35.38669064748201, "grad_norm": 0.14569388329982758, "learning_rate": 2.903823690608313e-05, "loss": 0.0025, "step": 39350 }, { "epoch": 35.39568345323741, "grad_norm": 0.11813899874687195, "learning_rate": 2.9013220953446174e-05, "loss": 0.0033, "step": 39360 }, { "epoch": 35.40467625899281, "grad_norm": 0.11413442343473434, "learning_rate": 2.8988211376038564e-05, "loss": 0.0031, "step": 39370 }, { "epoch": 35.4136690647482, "grad_norm": 0.18960683047771454, "learning_rate": 2.8963208181457564e-05, "loss": 0.004, "step": 39380 }, { "epoch": 35.422661870503596, "grad_norm": 0.12030936777591705, "learning_rate": 2.8938211377298453e-05, "loss": 0.0099, "step": 39390 }, { "epoch": 35.431654676258994, "grad_norm": 0.2172863632440567, "learning_rate": 2.8913220971154652e-05, "loss": 0.0098, "step": 39400 }, { "epoch": 35.44064748201439, "grad_norm": 0.0723147764801979, "learning_rate": 2.888823697061753e-05, "loss": 0.0029, "step": 39410 }, { "epoch": 35.44964028776978, "grad_norm": 0.11529526114463806, "learning_rate": 2.8863259383276618e-05, "loss": 0.0041, "step": 39420 }, { "epoch": 35.45863309352518, "grad_norm": 0.1270468831062317, "learning_rate": 2.8838288216719395e-05, "loss": 0.0054, "step": 39430 }, { "epoch": 35.46762589928058, "grad_norm": 0.09976531565189362, "learning_rate": 2.8813323478531484e-05, "loss": 0.0031, "step": 39440 }, { "epoch": 35.476618705035975, "grad_norm": 0.2561034858226776, "learning_rate": 2.8788365176296496e-05, "loss": 0.0072, "step": 39450 }, { "epoch": 35.485611510791365, "grad_norm": 0.09134136885404587, "learning_rate": 2.876341331759611e-05, "loss": 0.0022, "step": 39460 }, { "epoch": 35.49460431654676, "grad_norm": 0.0718105360865593, "learning_rate": 2.8738467910010036e-05, "loss": 0.009, "step": 39470 }, { "epoch": 35.50359712230216, "grad_norm": 0.25764235854148865, "learning_rate": 2.8713528961116032e-05, "loss": 0.0033, "step": 39480 }, { "epoch": 35.51258992805755, "grad_norm": 0.19383077323436737, "learning_rate": 2.8688596478489875e-05, "loss": 0.0067, "step": 39490 }, { "epoch": 35.52158273381295, "grad_norm": 0.162632554769516, "learning_rate": 2.8663670469705434e-05, "loss": 0.0027, "step": 39500 }, { "epoch": 35.530575539568346, "grad_norm": 0.12731558084487915, "learning_rate": 2.8638750942334546e-05, "loss": 0.0058, "step": 39510 }, { "epoch": 35.539568345323744, "grad_norm": 0.16048923134803772, "learning_rate": 2.8613837903947115e-05, "loss": 0.0028, "step": 39520 }, { "epoch": 35.548561151079134, "grad_norm": 0.3387151062488556, "learning_rate": 2.858893136211106e-05, "loss": 0.0051, "step": 39530 }, { "epoch": 35.55755395683453, "grad_norm": 0.23429037630558014, "learning_rate": 2.8564031324392315e-05, "loss": 0.0046, "step": 39540 }, { "epoch": 35.56654676258993, "grad_norm": 0.3614501357078552, "learning_rate": 2.85391377983549e-05, "loss": 0.0052, "step": 39550 }, { "epoch": 35.57553956834532, "grad_norm": 0.24797742068767548, "learning_rate": 2.851425079156075e-05, "loss": 0.0075, "step": 39560 }, { "epoch": 35.58453237410072, "grad_norm": 0.21253715455532074, "learning_rate": 2.848937031156994e-05, "loss": 0.0045, "step": 39570 }, { "epoch": 35.593525179856115, "grad_norm": 0.2070581614971161, "learning_rate": 2.846449636594044e-05, "loss": 0.0075, "step": 39580 }, { "epoch": 35.60251798561151, "grad_norm": 0.12982669472694397, "learning_rate": 2.843962896222836e-05, "loss": 0.0039, "step": 39590 }, { "epoch": 35.611510791366904, "grad_norm": 0.18142303824424744, "learning_rate": 2.8414768107987722e-05, "loss": 0.0041, "step": 39600 }, { "epoch": 35.6205035971223, "grad_norm": 0.24401769042015076, "learning_rate": 2.838991381077061e-05, "loss": 0.0057, "step": 39610 }, { "epoch": 35.6294964028777, "grad_norm": 0.14565172791481018, "learning_rate": 2.83650660781271e-05, "loss": 0.0034, "step": 39620 }, { "epoch": 35.638489208633096, "grad_norm": 0.08943059295415878, "learning_rate": 2.8340224917605285e-05, "loss": 0.0035, "step": 39630 }, { "epoch": 35.64748201438849, "grad_norm": 0.20206327736377716, "learning_rate": 2.831539033675122e-05, "loss": 0.0049, "step": 39640 }, { "epoch": 35.656474820143885, "grad_norm": 0.1164323091506958, "learning_rate": 2.8290562343109038e-05, "loss": 0.0065, "step": 39650 }, { "epoch": 35.66546762589928, "grad_norm": 0.06965608149766922, "learning_rate": 2.826574094422082e-05, "loss": 0.0031, "step": 39660 }, { "epoch": 35.67446043165468, "grad_norm": 0.06342167407274246, "learning_rate": 2.8240926147626645e-05, "loss": 0.0071, "step": 39670 }, { "epoch": 35.68345323741007, "grad_norm": 0.21824537217617035, "learning_rate": 2.8216117960864586e-05, "loss": 0.0059, "step": 39680 }, { "epoch": 35.69244604316547, "grad_norm": 0.26031485199928284, "learning_rate": 2.8191316391470703e-05, "loss": 0.0049, "step": 39690 }, { "epoch": 35.701438848920866, "grad_norm": 0.19127748906612396, "learning_rate": 2.816652144697911e-05, "loss": 0.0065, "step": 39700 }, { "epoch": 35.710431654676256, "grad_norm": 0.08655775338411331, "learning_rate": 2.8141733134921783e-05, "loss": 0.0071, "step": 39710 }, { "epoch": 35.719424460431654, "grad_norm": 0.21666687726974487, "learning_rate": 2.811695146282884e-05, "loss": 0.005, "step": 39720 }, { "epoch": 35.72841726618705, "grad_norm": 0.275116503238678, "learning_rate": 2.8092176438228212e-05, "loss": 0.0046, "step": 39730 }, { "epoch": 35.73741007194245, "grad_norm": 0.1168084368109703, "learning_rate": 2.806740806864598e-05, "loss": 0.0051, "step": 39740 }, { "epoch": 35.74640287769784, "grad_norm": 0.21368631720542908, "learning_rate": 2.804264636160604e-05, "loss": 0.0039, "step": 39750 }, { "epoch": 35.75539568345324, "grad_norm": 0.09222877770662308, "learning_rate": 2.8017891324630402e-05, "loss": 0.0027, "step": 39760 }, { "epoch": 35.764388489208635, "grad_norm": 0.09576284885406494, "learning_rate": 2.7993142965238976e-05, "loss": 0.006, "step": 39770 }, { "epoch": 35.773381294964025, "grad_norm": 0.10288737714290619, "learning_rate": 2.7968401290949665e-05, "loss": 0.0039, "step": 39780 }, { "epoch": 35.78237410071942, "grad_norm": 0.10678598284721375, "learning_rate": 2.7943666309278328e-05, "loss": 0.0032, "step": 39790 }, { "epoch": 35.79136690647482, "grad_norm": 0.08805840462446213, "learning_rate": 2.7918938027738783e-05, "loss": 0.0052, "step": 39800 }, { "epoch": 35.80035971223022, "grad_norm": 0.08376101404428482, "learning_rate": 2.789421645384287e-05, "loss": 0.0022, "step": 39810 }, { "epoch": 35.80935251798561, "grad_norm": 0.16360051929950714, "learning_rate": 2.786950159510032e-05, "loss": 0.005, "step": 39820 }, { "epoch": 35.818345323741006, "grad_norm": 0.1839136779308319, "learning_rate": 2.7844793459018876e-05, "loss": 0.0036, "step": 39830 }, { "epoch": 35.827338129496404, "grad_norm": 0.13111338019371033, "learning_rate": 2.7820092053104195e-05, "loss": 0.004, "step": 39840 }, { "epoch": 35.8363309352518, "grad_norm": 0.15508611500263214, "learning_rate": 2.7795397384859933e-05, "loss": 0.0029, "step": 39850 }, { "epoch": 35.84532374100719, "grad_norm": 0.1336871087551117, "learning_rate": 2.7770709461787638e-05, "loss": 0.003, "step": 39860 }, { "epoch": 35.85431654676259, "grad_norm": 0.1038479208946228, "learning_rate": 2.7746028291386915e-05, "loss": 0.0025, "step": 39870 }, { "epoch": 35.86330935251799, "grad_norm": 0.16184358298778534, "learning_rate": 2.772135388115519e-05, "loss": 0.0039, "step": 39880 }, { "epoch": 35.87230215827338, "grad_norm": 0.10509151220321655, "learning_rate": 2.7696686238587945e-05, "loss": 0.004, "step": 39890 }, { "epoch": 35.881294964028775, "grad_norm": 0.2409774661064148, "learning_rate": 2.7672025371178505e-05, "loss": 0.0051, "step": 39900 }, { "epoch": 35.89028776978417, "grad_norm": 0.1840410828590393, "learning_rate": 2.7647371286418238e-05, "loss": 0.0031, "step": 39910 }, { "epoch": 35.89928057553957, "grad_norm": 0.1701980084180832, "learning_rate": 2.762272399179639e-05, "loss": 0.0044, "step": 39920 }, { "epoch": 35.90827338129496, "grad_norm": 0.17270967364311218, "learning_rate": 2.7598083494800154e-05, "loss": 0.0037, "step": 39930 }, { "epoch": 35.91726618705036, "grad_norm": 0.20837461948394775, "learning_rate": 2.7573449802914664e-05, "loss": 0.0047, "step": 39940 }, { "epoch": 35.92625899280576, "grad_norm": 0.1248055025935173, "learning_rate": 2.7548822923622964e-05, "loss": 0.0025, "step": 39950 }, { "epoch": 35.935251798561154, "grad_norm": 0.10528218001127243, "learning_rate": 2.752420286440609e-05, "loss": 0.0036, "step": 39960 }, { "epoch": 35.944244604316545, "grad_norm": 0.11002032458782196, "learning_rate": 2.749958963274295e-05, "loss": 0.004, "step": 39970 }, { "epoch": 35.95323741007194, "grad_norm": 0.204624205827713, "learning_rate": 2.747498323611039e-05, "loss": 0.0045, "step": 39980 }, { "epoch": 35.96223021582734, "grad_norm": 0.10449928045272827, "learning_rate": 2.7450383681983184e-05, "loss": 0.0029, "step": 39990 }, { "epoch": 35.97122302158273, "grad_norm": 0.14483828842639923, "learning_rate": 2.742579097783403e-05, "loss": 0.0034, "step": 40000 }, { "epoch": 35.98021582733813, "grad_norm": 0.1061733067035675, "learning_rate": 2.7401205131133512e-05, "loss": 0.0031, "step": 40010 }, { "epoch": 35.989208633093526, "grad_norm": 0.18811269104480743, "learning_rate": 2.7376626149350238e-05, "loss": 0.0053, "step": 40020 }, { "epoch": 35.99820143884892, "grad_norm": 0.0813552737236023, "learning_rate": 2.735205403995056e-05, "loss": 0.003, "step": 40030 }, { "epoch": 36.007194244604314, "grad_norm": 0.2286427766084671, "learning_rate": 2.7327488810398917e-05, "loss": 0.0029, "step": 40040 }, { "epoch": 36.01618705035971, "grad_norm": 0.08765289187431335, "learning_rate": 2.7302930468157507e-05, "loss": 0.0039, "step": 40050 }, { "epoch": 36.02517985611511, "grad_norm": 0.19542120397090912, "learning_rate": 2.727837902068655e-05, "loss": 0.0034, "step": 40060 }, { "epoch": 36.03417266187051, "grad_norm": 0.13446110486984253, "learning_rate": 2.7253834475444123e-05, "loss": 0.0028, "step": 40070 }, { "epoch": 36.0431654676259, "grad_norm": 0.26977482438087463, "learning_rate": 2.7229296839886204e-05, "loss": 0.0044, "step": 40080 }, { "epoch": 36.052158273381295, "grad_norm": 0.10193436592817307, "learning_rate": 2.720476612146668e-05, "loss": 0.0037, "step": 40090 }, { "epoch": 36.06115107913669, "grad_norm": 0.20196829736232758, "learning_rate": 2.7180242327637317e-05, "loss": 0.0046, "step": 40100 }, { "epoch": 36.07014388489208, "grad_norm": 0.1387304812669754, "learning_rate": 2.7155725465847826e-05, "loss": 0.0052, "step": 40110 }, { "epoch": 36.07913669064748, "grad_norm": 0.09105400741100311, "learning_rate": 2.713121554354578e-05, "loss": 0.003, "step": 40120 }, { "epoch": 36.08812949640288, "grad_norm": 0.224150612950325, "learning_rate": 2.7106712568176628e-05, "loss": 0.0047, "step": 40130 }, { "epoch": 36.097122302158276, "grad_norm": 0.17286020517349243, "learning_rate": 2.708221654718374e-05, "loss": 0.0029, "step": 40140 }, { "epoch": 36.106115107913666, "grad_norm": 0.105092354118824, "learning_rate": 2.7057727488008357e-05, "loss": 0.0036, "step": 40150 }, { "epoch": 36.115107913669064, "grad_norm": 0.12135698646306992, "learning_rate": 2.703324539808961e-05, "loss": 0.0035, "step": 40160 }, { "epoch": 36.12410071942446, "grad_norm": 0.14989568293094635, "learning_rate": 2.7008770284864505e-05, "loss": 0.0025, "step": 40170 }, { "epoch": 36.13309352517986, "grad_norm": 0.10528679192066193, "learning_rate": 2.6984302155767916e-05, "loss": 0.0043, "step": 40180 }, { "epoch": 36.14208633093525, "grad_norm": 0.3294396996498108, "learning_rate": 2.6959841018232683e-05, "loss": 0.0047, "step": 40190 }, { "epoch": 36.15107913669065, "grad_norm": 0.3023735284805298, "learning_rate": 2.693538687968937e-05, "loss": 0.0025, "step": 40200 }, { "epoch": 36.160071942446045, "grad_norm": 0.37178903818130493, "learning_rate": 2.6910939747566556e-05, "loss": 0.0056, "step": 40210 }, { "epoch": 36.169064748201436, "grad_norm": 0.10647580772638321, "learning_rate": 2.6886499629290607e-05, "loss": 0.0022, "step": 40220 }, { "epoch": 36.17805755395683, "grad_norm": 0.0867614671587944, "learning_rate": 2.6862066532285802e-05, "loss": 0.0026, "step": 40230 }, { "epoch": 36.18705035971223, "grad_norm": 0.1332385092973709, "learning_rate": 2.6837640463974262e-05, "loss": 0.0036, "step": 40240 }, { "epoch": 36.19604316546763, "grad_norm": 0.1476348340511322, "learning_rate": 2.681322143177596e-05, "loss": 0.0024, "step": 40250 }, { "epoch": 36.20503597122302, "grad_norm": 0.1547943651676178, "learning_rate": 2.678880944310882e-05, "loss": 0.0032, "step": 40260 }, { "epoch": 36.21402877697842, "grad_norm": 0.11476988345384598, "learning_rate": 2.6764404505388474e-05, "loss": 0.0043, "step": 40270 }, { "epoch": 36.223021582733814, "grad_norm": 0.16376760601997375, "learning_rate": 2.6740006626028558e-05, "loss": 0.0048, "step": 40280 }, { "epoch": 36.23201438848921, "grad_norm": 0.09336930513381958, "learning_rate": 2.671561581244048e-05, "loss": 0.0058, "step": 40290 }, { "epoch": 36.2410071942446, "grad_norm": 0.05960911884903908, "learning_rate": 2.6691232072033536e-05, "loss": 0.0055, "step": 40300 }, { "epoch": 36.25, "grad_norm": 0.2610599994659424, "learning_rate": 2.6666855412214852e-05, "loss": 0.007, "step": 40310 }, { "epoch": 36.2589928057554, "grad_norm": 0.09603068977594376, "learning_rate": 2.664248584038942e-05, "loss": 0.0042, "step": 40320 }, { "epoch": 36.26798561151079, "grad_norm": 0.24612432718276978, "learning_rate": 2.6618123363960047e-05, "loss": 0.003, "step": 40330 }, { "epoch": 36.276978417266186, "grad_norm": 0.250630259513855, "learning_rate": 2.659376799032748e-05, "loss": 0.0043, "step": 40340 }, { "epoch": 36.28597122302158, "grad_norm": 0.19956345856189728, "learning_rate": 2.6569419726890145e-05, "loss": 0.0066, "step": 40350 }, { "epoch": 36.29496402877698, "grad_norm": 0.21370115876197815, "learning_rate": 2.654507858104447e-05, "loss": 0.0053, "step": 40360 }, { "epoch": 36.30395683453237, "grad_norm": 0.10838519036769867, "learning_rate": 2.652074456018463e-05, "loss": 0.0034, "step": 40370 }, { "epoch": 36.31294964028777, "grad_norm": 0.267311155796051, "learning_rate": 2.6496417671702646e-05, "loss": 0.0048, "step": 40380 }, { "epoch": 36.32194244604317, "grad_norm": 0.15350227057933807, "learning_rate": 2.6472097922988427e-05, "loss": 0.0023, "step": 40390 }, { "epoch": 36.330935251798564, "grad_norm": 0.16204893589019775, "learning_rate": 2.6447785321429607e-05, "loss": 0.014, "step": 40400 }, { "epoch": 36.339928057553955, "grad_norm": 0.1640034019947052, "learning_rate": 2.6423479874411784e-05, "loss": 0.0048, "step": 40410 }, { "epoch": 36.34892086330935, "grad_norm": 0.21220384538173676, "learning_rate": 2.6399181589318234e-05, "loss": 0.0029, "step": 40420 }, { "epoch": 36.35791366906475, "grad_norm": 0.19815143942832947, "learning_rate": 2.6374890473530188e-05, "loss": 0.0032, "step": 40430 }, { "epoch": 36.36690647482014, "grad_norm": 0.11697609722614288, "learning_rate": 2.635060653442664e-05, "loss": 0.0044, "step": 40440 }, { "epoch": 36.37589928057554, "grad_norm": 0.14410404860973358, "learning_rate": 2.6326329779384395e-05, "loss": 0.0031, "step": 40450 }, { "epoch": 36.384892086330936, "grad_norm": 0.22794954478740692, "learning_rate": 2.63020602157781e-05, "loss": 0.003, "step": 40460 }, { "epoch": 36.393884892086334, "grad_norm": 0.23640906810760498, "learning_rate": 2.62777978509802e-05, "loss": 0.0045, "step": 40470 }, { "epoch": 36.402877697841724, "grad_norm": 0.14576612412929535, "learning_rate": 2.6253542692360954e-05, "loss": 0.005, "step": 40480 }, { "epoch": 36.41187050359712, "grad_norm": 0.08227525651454926, "learning_rate": 2.6229294747288458e-05, "loss": 0.0027, "step": 40490 }, { "epoch": 36.42086330935252, "grad_norm": 0.13068567216396332, "learning_rate": 2.6205054023128596e-05, "loss": 0.0021, "step": 40500 }, { "epoch": 36.42985611510792, "grad_norm": 0.1202157735824585, "learning_rate": 2.6180820527245043e-05, "loss": 0.0056, "step": 40510 }, { "epoch": 36.43884892086331, "grad_norm": 0.1286783516407013, "learning_rate": 2.6156594266999313e-05, "loss": 0.0032, "step": 40520 }, { "epoch": 36.447841726618705, "grad_norm": 0.32658737897872925, "learning_rate": 2.6132375249750672e-05, "loss": 0.0046, "step": 40530 }, { "epoch": 36.4568345323741, "grad_norm": 0.15960243344306946, "learning_rate": 2.6108163482856286e-05, "loss": 0.0032, "step": 40540 }, { "epoch": 36.46582733812949, "grad_norm": 0.21511957049369812, "learning_rate": 2.6083958973670964e-05, "loss": 0.003, "step": 40550 }, { "epoch": 36.47482014388489, "grad_norm": 0.16167835891246796, "learning_rate": 2.6059761729547483e-05, "loss": 0.0036, "step": 40560 }, { "epoch": 36.48381294964029, "grad_norm": 0.09891469776630402, "learning_rate": 2.603557175783624e-05, "loss": 0.0021, "step": 40570 }, { "epoch": 36.492805755395686, "grad_norm": 0.1026059165596962, "learning_rate": 2.601138906588559e-05, "loss": 0.003, "step": 40580 }, { "epoch": 36.50179856115108, "grad_norm": 0.23026573657989502, "learning_rate": 2.598721366104152e-05, "loss": 0.0039, "step": 40590 }, { "epoch": 36.510791366906474, "grad_norm": 0.09826561063528061, "learning_rate": 2.5963045550647945e-05, "loss": 0.0026, "step": 40600 }, { "epoch": 36.51978417266187, "grad_norm": 0.24337269365787506, "learning_rate": 2.5938884742046466e-05, "loss": 0.0032, "step": 40610 }, { "epoch": 36.52877697841727, "grad_norm": 0.08996256440877914, "learning_rate": 2.5914731242576507e-05, "loss": 0.0041, "step": 40620 }, { "epoch": 36.53776978417266, "grad_norm": 0.07255946844816208, "learning_rate": 2.5890585059575268e-05, "loss": 0.0034, "step": 40630 }, { "epoch": 36.54676258992806, "grad_norm": 0.15208905935287476, "learning_rate": 2.5866446200377688e-05, "loss": 0.0026, "step": 40640 }, { "epoch": 36.555755395683455, "grad_norm": 0.15445013344287872, "learning_rate": 2.5842314672316566e-05, "loss": 0.0043, "step": 40650 }, { "epoch": 36.564748201438846, "grad_norm": 0.11907047778367996, "learning_rate": 2.581819048272239e-05, "loss": 0.0038, "step": 40660 }, { "epoch": 36.57374100719424, "grad_norm": 0.15415388345718384, "learning_rate": 2.5794073638923478e-05, "loss": 0.0054, "step": 40670 }, { "epoch": 36.58273381294964, "grad_norm": 0.319202184677124, "learning_rate": 2.576996414824586e-05, "loss": 0.0085, "step": 40680 }, { "epoch": 36.59172661870504, "grad_norm": 0.2974475622177124, "learning_rate": 2.574586201801339e-05, "loss": 0.0035, "step": 40690 }, { "epoch": 36.60071942446043, "grad_norm": 0.253559410572052, "learning_rate": 2.572176725554762e-05, "loss": 0.0038, "step": 40700 }, { "epoch": 36.60971223021583, "grad_norm": 0.22505944967269897, "learning_rate": 2.5697679868167966e-05, "loss": 0.0045, "step": 40710 }, { "epoch": 36.618705035971225, "grad_norm": 0.2633562386035919, "learning_rate": 2.5673599863191468e-05, "loss": 0.0032, "step": 40720 }, { "epoch": 36.62769784172662, "grad_norm": 0.14117151498794556, "learning_rate": 2.564952724793306e-05, "loss": 0.003, "step": 40730 }, { "epoch": 36.63669064748201, "grad_norm": 0.1525062471628189, "learning_rate": 2.5625462029705306e-05, "loss": 0.0038, "step": 40740 }, { "epoch": 36.64568345323741, "grad_norm": 0.1983562558889389, "learning_rate": 2.5601404215818624e-05, "loss": 0.0032, "step": 40750 }, { "epoch": 36.65467625899281, "grad_norm": 0.13439586758613586, "learning_rate": 2.5577353813581144e-05, "loss": 0.0034, "step": 40760 }, { "epoch": 36.6636690647482, "grad_norm": 0.37080252170562744, "learning_rate": 2.5553310830298733e-05, "loss": 0.0064, "step": 40770 }, { "epoch": 36.672661870503596, "grad_norm": 0.12396737188100815, "learning_rate": 2.5529275273275012e-05, "loss": 0.0038, "step": 40780 }, { "epoch": 36.681654676258994, "grad_norm": 0.19569015502929688, "learning_rate": 2.550524714981133e-05, "loss": 0.0028, "step": 40790 }, { "epoch": 36.69064748201439, "grad_norm": 0.2658216655254364, "learning_rate": 2.5481226467206837e-05, "loss": 0.0029, "step": 40800 }, { "epoch": 36.69964028776978, "grad_norm": 0.1368841677904129, "learning_rate": 2.5457213232758365e-05, "loss": 0.0025, "step": 40810 }, { "epoch": 36.70863309352518, "grad_norm": 0.12019547820091248, "learning_rate": 2.5433207453760498e-05, "loss": 0.0088, "step": 40820 }, { "epoch": 36.71762589928058, "grad_norm": 0.1440117359161377, "learning_rate": 2.5409209137505552e-05, "loss": 0.0031, "step": 40830 }, { "epoch": 36.726618705035975, "grad_norm": 0.06034635379910469, "learning_rate": 2.5385218291283597e-05, "loss": 0.0026, "step": 40840 }, { "epoch": 36.735611510791365, "grad_norm": 0.08312481641769409, "learning_rate": 2.5361234922382383e-05, "loss": 0.0036, "step": 40850 }, { "epoch": 36.74460431654676, "grad_norm": 0.0651901513338089, "learning_rate": 2.533725903808749e-05, "loss": 0.0035, "step": 40860 }, { "epoch": 36.75359712230216, "grad_norm": 0.11540555208921432, "learning_rate": 2.5313290645682085e-05, "loss": 0.0054, "step": 40870 }, { "epoch": 36.76258992805755, "grad_norm": 0.1292794942855835, "learning_rate": 2.52893297524472e-05, "loss": 0.0048, "step": 40880 }, { "epoch": 36.77158273381295, "grad_norm": 0.11750756204128265, "learning_rate": 2.526537636566145e-05, "loss": 0.0018, "step": 40890 }, { "epoch": 36.780575539568346, "grad_norm": 0.09238801151514053, "learning_rate": 2.5241430492601305e-05, "loss": 0.0037, "step": 40900 }, { "epoch": 36.789568345323744, "grad_norm": 0.10779400169849396, "learning_rate": 2.5217492140540867e-05, "loss": 0.0033, "step": 40910 }, { "epoch": 36.798561151079134, "grad_norm": 0.08078460395336151, "learning_rate": 2.5193561316751967e-05, "loss": 0.0036, "step": 40920 }, { "epoch": 36.80755395683453, "grad_norm": 0.0700666755437851, "learning_rate": 2.516963802850416e-05, "loss": 0.0055, "step": 40930 }, { "epoch": 36.81654676258993, "grad_norm": 0.08610568940639496, "learning_rate": 2.5145722283064698e-05, "loss": 0.0035, "step": 40940 }, { "epoch": 36.82553956834532, "grad_norm": 0.1367434710264206, "learning_rate": 2.5121814087698602e-05, "loss": 0.0034, "step": 40950 }, { "epoch": 36.83453237410072, "grad_norm": 0.08491601049900055, "learning_rate": 2.509791344966848e-05, "loss": 0.0025, "step": 40960 }, { "epoch": 36.843525179856115, "grad_norm": 0.09716268628835678, "learning_rate": 2.5074020376234768e-05, "loss": 0.0036, "step": 40970 }, { "epoch": 36.85251798561151, "grad_norm": 0.1897364854812622, "learning_rate": 2.5050134874655534e-05, "loss": 0.0032, "step": 40980 }, { "epoch": 36.861510791366904, "grad_norm": 0.08604174852371216, "learning_rate": 2.5026256952186566e-05, "loss": 0.0023, "step": 40990 }, { "epoch": 36.8705035971223, "grad_norm": 0.15693731606006622, "learning_rate": 2.5002386616081335e-05, "loss": 0.0021, "step": 41000 }, { "epoch": 36.8794964028777, "grad_norm": 0.09853968024253845, "learning_rate": 2.497852387359103e-05, "loss": 0.0089, "step": 41010 }, { "epoch": 36.888489208633096, "grad_norm": 0.2067396342754364, "learning_rate": 2.4954668731964496e-05, "loss": 0.003, "step": 41020 }, { "epoch": 36.89748201438849, "grad_norm": 0.1011197566986084, "learning_rate": 2.4930821198448364e-05, "loss": 0.0032, "step": 41030 }, { "epoch": 36.906474820143885, "grad_norm": 0.190323606133461, "learning_rate": 2.4906981280286796e-05, "loss": 0.0043, "step": 41040 }, { "epoch": 36.91546762589928, "grad_norm": 0.06715773791074753, "learning_rate": 2.488314898472179e-05, "loss": 0.0041, "step": 41050 }, { "epoch": 36.92446043165468, "grad_norm": 0.13033345341682434, "learning_rate": 2.485932431899295e-05, "loss": 0.0022, "step": 41060 }, { "epoch": 36.93345323741007, "grad_norm": 0.09873750060796738, "learning_rate": 2.4835507290337584e-05, "loss": 0.0048, "step": 41070 }, { "epoch": 36.94244604316547, "grad_norm": 0.10683463513851166, "learning_rate": 2.4811697905990672e-05, "loss": 0.0031, "step": 41080 }, { "epoch": 36.951438848920866, "grad_norm": 0.035893604159355164, "learning_rate": 2.4787896173184854e-05, "loss": 0.0029, "step": 41090 }, { "epoch": 36.960431654676256, "grad_norm": 0.1048302873969078, "learning_rate": 2.4764102099150534e-05, "loss": 0.0053, "step": 41100 }, { "epoch": 36.969424460431654, "grad_norm": 0.10180216282606125, "learning_rate": 2.4740315691115644e-05, "loss": 0.0044, "step": 41110 }, { "epoch": 36.97841726618705, "grad_norm": 0.06423449516296387, "learning_rate": 2.4716536956305918e-05, "loss": 0.0056, "step": 41120 }, { "epoch": 36.98741007194245, "grad_norm": 0.11589284986257553, "learning_rate": 2.4692765901944697e-05, "loss": 0.003, "step": 41130 }, { "epoch": 36.99640287769784, "grad_norm": 0.10798994451761246, "learning_rate": 2.4669002535253e-05, "loss": 0.0041, "step": 41140 }, { "epoch": 37.00539568345324, "grad_norm": 0.11255289614200592, "learning_rate": 2.46452468634495e-05, "loss": 0.0051, "step": 41150 }, { "epoch": 37.014388489208635, "grad_norm": 0.19883474707603455, "learning_rate": 2.462149889375055e-05, "loss": 0.0051, "step": 41160 }, { "epoch": 37.023381294964025, "grad_norm": 0.08359494060277939, "learning_rate": 2.459775863337014e-05, "loss": 0.0042, "step": 41170 }, { "epoch": 37.03237410071942, "grad_norm": 0.11538874357938766, "learning_rate": 2.4574026089519985e-05, "loss": 0.003, "step": 41180 }, { "epoch": 37.04136690647482, "grad_norm": 0.16420327126979828, "learning_rate": 2.4550301269409333e-05, "loss": 0.0048, "step": 41190 }, { "epoch": 37.05035971223022, "grad_norm": 0.1724124401807785, "learning_rate": 2.4526584180245216e-05, "loss": 0.0055, "step": 41200 }, { "epoch": 37.05935251798561, "grad_norm": 0.10844267159700394, "learning_rate": 2.4502874829232236e-05, "loss": 0.0049, "step": 41210 }, { "epoch": 37.068345323741006, "grad_norm": 0.2548823654651642, "learning_rate": 2.447917322357267e-05, "loss": 0.0033, "step": 41220 }, { "epoch": 37.077338129496404, "grad_norm": 0.1499309539794922, "learning_rate": 2.4455479370466443e-05, "loss": 0.0036, "step": 41230 }, { "epoch": 37.0863309352518, "grad_norm": 0.21369850635528564, "learning_rate": 2.4431793277111097e-05, "loss": 0.0034, "step": 41240 }, { "epoch": 37.09532374100719, "grad_norm": 0.13190890848636627, "learning_rate": 2.4408114950701905e-05, "loss": 0.0024, "step": 41250 }, { "epoch": 37.10431654676259, "grad_norm": 0.09594479203224182, "learning_rate": 2.4384444398431634e-05, "loss": 0.0029, "step": 41260 }, { "epoch": 37.11330935251799, "grad_norm": 0.13019965589046478, "learning_rate": 2.4360781627490837e-05, "loss": 0.0032, "step": 41270 }, { "epoch": 37.12230215827338, "grad_norm": 0.0769544318318367, "learning_rate": 2.433712664506762e-05, "loss": 0.003, "step": 41280 }, { "epoch": 37.131294964028775, "grad_norm": 0.2196192443370819, "learning_rate": 2.431347945834774e-05, "loss": 0.003, "step": 41290 }, { "epoch": 37.14028776978417, "grad_norm": 0.08628053218126297, "learning_rate": 2.428984007451458e-05, "loss": 0.0043, "step": 41300 }, { "epoch": 37.14928057553957, "grad_norm": 0.10904020071029663, "learning_rate": 2.426620850074917e-05, "loss": 0.0055, "step": 41310 }, { "epoch": 37.15827338129496, "grad_norm": 0.16105732321739197, "learning_rate": 2.424258474423014e-05, "loss": 0.0059, "step": 41320 }, { "epoch": 37.16726618705036, "grad_norm": 0.10517407953739166, "learning_rate": 2.421896881213382e-05, "loss": 0.0029, "step": 41330 }, { "epoch": 37.17625899280576, "grad_norm": 0.23645468056201935, "learning_rate": 2.419536071163402e-05, "loss": 0.0042, "step": 41340 }, { "epoch": 37.185251798561154, "grad_norm": 0.11979568004608154, "learning_rate": 2.417176044990233e-05, "loss": 0.0023, "step": 41350 }, { "epoch": 37.194244604316545, "grad_norm": 0.07438303530216217, "learning_rate": 2.4148168034107855e-05, "loss": 0.0046, "step": 41360 }, { "epoch": 37.20323741007194, "grad_norm": 0.13410411775112152, "learning_rate": 2.4124583471417355e-05, "loss": 0.0031, "step": 41370 }, { "epoch": 37.21223021582734, "grad_norm": 0.10648088902235031, "learning_rate": 2.41010067689952e-05, "loss": 0.0033, "step": 41380 }, { "epoch": 37.22122302158273, "grad_norm": 0.2872897684574127, "learning_rate": 2.4077437934003338e-05, "loss": 0.0035, "step": 41390 }, { "epoch": 37.23021582733813, "grad_norm": 0.1236020028591156, "learning_rate": 2.405387697360143e-05, "loss": 0.0035, "step": 41400 }, { "epoch": 37.239208633093526, "grad_norm": 0.09576404839754105, "learning_rate": 2.4030323894946595e-05, "loss": 0.0024, "step": 41410 }, { "epoch": 37.24820143884892, "grad_norm": 0.1401943415403366, "learning_rate": 2.40067787051937e-05, "loss": 0.0039, "step": 41420 }, { "epoch": 37.257194244604314, "grad_norm": 0.0699823647737503, "learning_rate": 2.3983241411495087e-05, "loss": 0.0034, "step": 41430 }, { "epoch": 37.26618705035971, "grad_norm": 0.2276022732257843, "learning_rate": 2.3959712021000823e-05, "loss": 0.0046, "step": 41440 }, { "epoch": 37.27517985611511, "grad_norm": 0.1261943131685257, "learning_rate": 2.3936190540858495e-05, "loss": 0.003, "step": 41450 }, { "epoch": 37.28417266187051, "grad_norm": 0.14687559008598328, "learning_rate": 2.39126769782133e-05, "loss": 0.0064, "step": 41460 }, { "epoch": 37.2931654676259, "grad_norm": 0.1705300360918045, "learning_rate": 2.388917134020805e-05, "loss": 0.0025, "step": 41470 }, { "epoch": 37.302158273381295, "grad_norm": 0.12210885435342789, "learning_rate": 2.3865673633983128e-05, "loss": 0.0035, "step": 41480 }, { "epoch": 37.31115107913669, "grad_norm": 0.0981978103518486, "learning_rate": 2.3842183866676492e-05, "loss": 0.0025, "step": 41490 }, { "epoch": 37.32014388489208, "grad_norm": 0.070584237575531, "learning_rate": 2.381870204542377e-05, "loss": 0.0032, "step": 41500 }, { "epoch": 37.32913669064748, "grad_norm": 0.06658868491649628, "learning_rate": 2.379522817735808e-05, "loss": 0.0021, "step": 41510 }, { "epoch": 37.33812949640288, "grad_norm": 0.15231207013130188, "learning_rate": 2.377176226961018e-05, "loss": 0.0038, "step": 41520 }, { "epoch": 37.347122302158276, "grad_norm": 0.08931174874305725, "learning_rate": 2.3748304329308384e-05, "loss": 0.0041, "step": 41530 }, { "epoch": 37.356115107913666, "grad_norm": 0.10999877005815506, "learning_rate": 2.372485436357858e-05, "loss": 0.0029, "step": 41540 }, { "epoch": 37.365107913669064, "grad_norm": 0.1789330393075943, "learning_rate": 2.3701412379544296e-05, "loss": 0.0059, "step": 41550 }, { "epoch": 37.37410071942446, "grad_norm": 0.17696845531463623, "learning_rate": 2.367797838432653e-05, "loss": 0.0054, "step": 41560 }, { "epoch": 37.38309352517986, "grad_norm": 0.10578563809394836, "learning_rate": 2.3654552385043967e-05, "loss": 0.0036, "step": 41570 }, { "epoch": 37.39208633093525, "grad_norm": 0.07768413424491882, "learning_rate": 2.3631134388812742e-05, "loss": 0.0057, "step": 41580 }, { "epoch": 37.40107913669065, "grad_norm": 0.11091802269220352, "learning_rate": 2.3607724402746684e-05, "loss": 0.0063, "step": 41590 }, { "epoch": 37.410071942446045, "grad_norm": 0.10519053041934967, "learning_rate": 2.35843224339571e-05, "loss": 0.0033, "step": 41600 }, { "epoch": 37.419064748201436, "grad_norm": 0.11334345489740372, "learning_rate": 2.3560928489552897e-05, "loss": 0.0025, "step": 41610 }, { "epoch": 37.42805755395683, "grad_norm": 0.24488291144371033, "learning_rate": 2.353754257664053e-05, "loss": 0.0051, "step": 41620 }, { "epoch": 37.43705035971223, "grad_norm": 0.1733037233352661, "learning_rate": 2.3514164702324037e-05, "loss": 0.0035, "step": 41630 }, { "epoch": 37.44604316546763, "grad_norm": 0.11858365684747696, "learning_rate": 2.3490794873704963e-05, "loss": 0.0032, "step": 41640 }, { "epoch": 37.45503597122302, "grad_norm": 0.07601375877857208, "learning_rate": 2.3467433097882496e-05, "loss": 0.0029, "step": 41650 }, { "epoch": 37.46402877697842, "grad_norm": 0.11409493535757065, "learning_rate": 2.34440793819533e-05, "loss": 0.0039, "step": 41660 }, { "epoch": 37.473021582733814, "grad_norm": 0.06954988837242126, "learning_rate": 2.3420733733011617e-05, "loss": 0.0028, "step": 41670 }, { "epoch": 37.48201438848921, "grad_norm": 0.15306363999843597, "learning_rate": 2.3397396158149243e-05, "loss": 0.0051, "step": 41680 }, { "epoch": 37.4910071942446, "grad_norm": 0.11322181671857834, "learning_rate": 2.3374066664455498e-05, "loss": 0.0037, "step": 41690 }, { "epoch": 37.5, "grad_norm": 0.10661999881267548, "learning_rate": 2.3350745259017315e-05, "loss": 0.0026, "step": 41700 }, { "epoch": 37.5089928057554, "grad_norm": 0.24304354190826416, "learning_rate": 2.332743194891906e-05, "loss": 0.0026, "step": 41710 }, { "epoch": 37.51798561151079, "grad_norm": 0.07347047328948975, "learning_rate": 2.330412674124276e-05, "loss": 0.0016, "step": 41720 }, { "epoch": 37.526978417266186, "grad_norm": 0.08951331675052643, "learning_rate": 2.328082964306786e-05, "loss": 0.0027, "step": 41730 }, { "epoch": 37.53597122302158, "grad_norm": 0.07304394245147705, "learning_rate": 2.325754066147145e-05, "loss": 0.0026, "step": 41740 }, { "epoch": 37.54496402877698, "grad_norm": 0.07273215800523758, "learning_rate": 2.32342598035281e-05, "loss": 0.0026, "step": 41750 }, { "epoch": 37.55395683453237, "grad_norm": 0.12249332666397095, "learning_rate": 2.321098707630991e-05, "loss": 0.0041, "step": 41760 }, { "epoch": 37.56294964028777, "grad_norm": 0.09470809996128082, "learning_rate": 2.318772248688652e-05, "loss": 0.0024, "step": 41770 }, { "epoch": 37.57194244604317, "grad_norm": 0.09075140953063965, "learning_rate": 2.3164466042325107e-05, "loss": 0.0037, "step": 41780 }, { "epoch": 37.580935251798564, "grad_norm": 0.15613147616386414, "learning_rate": 2.3141217749690353e-05, "loss": 0.0043, "step": 41790 }, { "epoch": 37.589928057553955, "grad_norm": 0.10946743190288544, "learning_rate": 2.3117977616044466e-05, "loss": 0.003, "step": 41800 }, { "epoch": 37.59892086330935, "grad_norm": 0.2293538898229599, "learning_rate": 2.309474564844722e-05, "loss": 0.0047, "step": 41810 }, { "epoch": 37.60791366906475, "grad_norm": 0.1677340418100357, "learning_rate": 2.307152185395585e-05, "loss": 0.004, "step": 41820 }, { "epoch": 37.61690647482014, "grad_norm": 0.16624358296394348, "learning_rate": 2.3048306239625144e-05, "loss": 0.0038, "step": 41830 }, { "epoch": 37.62589928057554, "grad_norm": 0.0739830955862999, "learning_rate": 2.3025098812507378e-05, "loss": 0.0046, "step": 41840 }, { "epoch": 37.634892086330936, "grad_norm": 0.3240225315093994, "learning_rate": 2.3001899579652366e-05, "loss": 0.0044, "step": 41850 }, { "epoch": 37.643884892086334, "grad_norm": 0.07586370408535004, "learning_rate": 2.2978708548107393e-05, "loss": 0.0042, "step": 41860 }, { "epoch": 37.652877697841724, "grad_norm": 0.08880262076854706, "learning_rate": 2.2955525724917348e-05, "loss": 0.0025, "step": 41870 }, { "epoch": 37.66187050359712, "grad_norm": 0.18236473202705383, "learning_rate": 2.2932351117124477e-05, "loss": 0.004, "step": 41880 }, { "epoch": 37.67086330935252, "grad_norm": 0.10391969978809357, "learning_rate": 2.29091847317687e-05, "loss": 0.0029, "step": 41890 }, { "epoch": 37.67985611510792, "grad_norm": 0.11254490911960602, "learning_rate": 2.2886026575887277e-05, "loss": 0.0035, "step": 41900 }, { "epoch": 37.68884892086331, "grad_norm": 0.26471859216690063, "learning_rate": 2.2862876656515094e-05, "loss": 0.0038, "step": 41910 }, { "epoch": 37.697841726618705, "grad_norm": 0.17705580592155457, "learning_rate": 2.2839734980684464e-05, "loss": 0.0029, "step": 41920 }, { "epoch": 37.7068345323741, "grad_norm": 0.31967267394065857, "learning_rate": 2.281660155542522e-05, "loss": 0.0051, "step": 41930 }, { "epoch": 37.71582733812949, "grad_norm": 0.18884533643722534, "learning_rate": 2.279347638776469e-05, "loss": 0.0033, "step": 41940 }, { "epoch": 37.72482014388489, "grad_norm": 0.13641445338726044, "learning_rate": 2.2770359484727665e-05, "loss": 0.0043, "step": 41950 }, { "epoch": 37.73381294964029, "grad_norm": 0.10473275929689407, "learning_rate": 2.27472508533365e-05, "loss": 0.0037, "step": 41960 }, { "epoch": 37.742805755395686, "grad_norm": 0.15373662114143372, "learning_rate": 2.2724150500610948e-05, "loss": 0.0067, "step": 41970 }, { "epoch": 37.75179856115108, "grad_norm": 0.16212646663188934, "learning_rate": 2.2701058433568302e-05, "loss": 0.0045, "step": 41980 }, { "epoch": 37.760791366906474, "grad_norm": 0.3181418776512146, "learning_rate": 2.2677974659223318e-05, "loss": 0.0082, "step": 41990 }, { "epoch": 37.76978417266187, "grad_norm": 0.1543615162372589, "learning_rate": 2.2654899184588235e-05, "loss": 0.0043, "step": 42000 }, { "epoch": 37.77877697841727, "grad_norm": 0.1485362946987152, "learning_rate": 2.2631832016672756e-05, "loss": 0.0024, "step": 42010 }, { "epoch": 37.78776978417266, "grad_norm": 0.13401907682418823, "learning_rate": 2.2608773162484127e-05, "loss": 0.0034, "step": 42020 }, { "epoch": 37.79676258992806, "grad_norm": 0.1401127576828003, "learning_rate": 2.2585722629026958e-05, "loss": 0.0027, "step": 42030 }, { "epoch": 37.805755395683455, "grad_norm": 0.08516126126050949, "learning_rate": 2.2562680423303457e-05, "loss": 0.0022, "step": 42040 }, { "epoch": 37.814748201438846, "grad_norm": 0.13182072341442108, "learning_rate": 2.2539646552313165e-05, "loss": 0.0061, "step": 42050 }, { "epoch": 37.82374100719424, "grad_norm": 0.15951551496982574, "learning_rate": 2.251662102305322e-05, "loss": 0.0031, "step": 42060 }, { "epoch": 37.83273381294964, "grad_norm": 0.13103438913822174, "learning_rate": 2.2493603842518152e-05, "loss": 0.0021, "step": 42070 }, { "epoch": 37.84172661870504, "grad_norm": 0.14828824996948242, "learning_rate": 2.2470595017699974e-05, "loss": 0.0027, "step": 42080 }, { "epoch": 37.85071942446043, "grad_norm": 0.14765946567058563, "learning_rate": 2.244759455558816e-05, "loss": 0.0029, "step": 42090 }, { "epoch": 37.85971223021583, "grad_norm": 0.10802942514419556, "learning_rate": 2.2424602463169614e-05, "loss": 0.0028, "step": 42100 }, { "epoch": 37.868705035971225, "grad_norm": 0.07415115088224411, "learning_rate": 2.2401618747428776e-05, "loss": 0.0039, "step": 42110 }, { "epoch": 37.87769784172662, "grad_norm": 0.11949926614761353, "learning_rate": 2.237864341534747e-05, "loss": 0.0055, "step": 42120 }, { "epoch": 37.88669064748201, "grad_norm": 0.07817691564559937, "learning_rate": 2.2355676473904998e-05, "loss": 0.0025, "step": 42130 }, { "epoch": 37.89568345323741, "grad_norm": 0.10703915357589722, "learning_rate": 2.2332717930078108e-05, "loss": 0.004, "step": 42140 }, { "epoch": 37.90467625899281, "grad_norm": 0.11598651111125946, "learning_rate": 2.2309767790840992e-05, "loss": 0.0082, "step": 42150 }, { "epoch": 37.9136690647482, "grad_norm": 0.08071457594633102, "learning_rate": 2.228682606316529e-05, "loss": 0.0053, "step": 42160 }, { "epoch": 37.922661870503596, "grad_norm": 0.15724404156208038, "learning_rate": 2.2263892754020138e-05, "loss": 0.0037, "step": 42170 }, { "epoch": 37.931654676258994, "grad_norm": 0.1269085556268692, "learning_rate": 2.2240967870372004e-05, "loss": 0.0021, "step": 42180 }, { "epoch": 37.94064748201439, "grad_norm": 0.10270289331674576, "learning_rate": 2.2218051419184933e-05, "loss": 0.0035, "step": 42190 }, { "epoch": 37.94964028776978, "grad_norm": 0.1193990483880043, "learning_rate": 2.219514340742026e-05, "loss": 0.004, "step": 42200 }, { "epoch": 37.95863309352518, "grad_norm": 0.3007245361804962, "learning_rate": 2.2172243842036898e-05, "loss": 0.003, "step": 42210 }, { "epoch": 37.96762589928058, "grad_norm": 0.20233039557933807, "learning_rate": 2.2149352729991107e-05, "loss": 0.0028, "step": 42220 }, { "epoch": 37.976618705035975, "grad_norm": 0.07313861697912216, "learning_rate": 2.2126470078236605e-05, "loss": 0.0049, "step": 42230 }, { "epoch": 37.985611510791365, "grad_norm": 0.19741670787334442, "learning_rate": 2.2103595893724533e-05, "loss": 0.0036, "step": 42240 }, { "epoch": 37.99460431654676, "grad_norm": 0.10323767364025116, "learning_rate": 2.208073018340345e-05, "loss": 0.0029, "step": 42250 }, { "epoch": 38.00359712230216, "grad_norm": 0.07881897687911987, "learning_rate": 2.2057872954219405e-05, "loss": 0.0055, "step": 42260 }, { "epoch": 38.01258992805755, "grad_norm": 0.10786742717027664, "learning_rate": 2.203502421311575e-05, "loss": 0.0043, "step": 42270 }, { "epoch": 38.02158273381295, "grad_norm": 0.1211419478058815, "learning_rate": 2.2012183967033388e-05, "loss": 0.0029, "step": 42280 }, { "epoch": 38.030575539568346, "grad_norm": 0.13487038016319275, "learning_rate": 2.198935222291056e-05, "loss": 0.0033, "step": 42290 }, { "epoch": 38.039568345323744, "grad_norm": 0.1135229617357254, "learning_rate": 2.1966528987682948e-05, "loss": 0.0037, "step": 42300 }, { "epoch": 38.048561151079134, "grad_norm": 0.22197432816028595, "learning_rate": 2.194371426828365e-05, "loss": 0.004, "step": 42310 }, { "epoch": 38.05755395683453, "grad_norm": 0.12927168607711792, "learning_rate": 2.192090807164317e-05, "loss": 0.0021, "step": 42320 }, { "epoch": 38.06654676258993, "grad_norm": 0.09191472828388214, "learning_rate": 2.1898110404689422e-05, "loss": 0.0037, "step": 42330 }, { "epoch": 38.07553956834533, "grad_norm": 0.13050620257854462, "learning_rate": 2.1875321274347776e-05, "loss": 0.005, "step": 42340 }, { "epoch": 38.08453237410072, "grad_norm": 0.09420249611139297, "learning_rate": 2.18525406875409e-05, "loss": 0.0029, "step": 42350 }, { "epoch": 38.093525179856115, "grad_norm": 0.21728505194187164, "learning_rate": 2.1829768651188997e-05, "loss": 0.0037, "step": 42360 }, { "epoch": 38.10251798561151, "grad_norm": 0.25643211603164673, "learning_rate": 2.180700517220958e-05, "loss": 0.0023, "step": 42370 }, { "epoch": 38.111510791366904, "grad_norm": 0.15303024649620056, "learning_rate": 2.1784250257517603e-05, "loss": 0.0043, "step": 42380 }, { "epoch": 38.1205035971223, "grad_norm": 0.20909693837165833, "learning_rate": 2.1761503914025406e-05, "loss": 0.0038, "step": 42390 }, { "epoch": 38.1294964028777, "grad_norm": 0.23997333645820618, "learning_rate": 2.1738766148642705e-05, "loss": 0.0031, "step": 42400 }, { "epoch": 38.138489208633096, "grad_norm": 0.12082915753126144, "learning_rate": 2.1716036968276683e-05, "loss": 0.0032, "step": 42410 }, { "epoch": 38.14748201438849, "grad_norm": 0.1779356598854065, "learning_rate": 2.1693316379831808e-05, "loss": 0.003, "step": 42420 }, { "epoch": 38.156474820143885, "grad_norm": 0.11356951296329498, "learning_rate": 2.1670604390210037e-05, "loss": 0.0034, "step": 42430 }, { "epoch": 38.16546762589928, "grad_norm": 0.12266916781663895, "learning_rate": 2.1647901006310656e-05, "loss": 0.0026, "step": 42440 }, { "epoch": 38.17446043165467, "grad_norm": 0.24625104665756226, "learning_rate": 2.1625206235030353e-05, "loss": 0.0041, "step": 42450 }, { "epoch": 38.18345323741007, "grad_norm": 0.11529891937971115, "learning_rate": 2.160252008326321e-05, "loss": 0.005, "step": 42460 }, { "epoch": 38.19244604316547, "grad_norm": 0.1050533652305603, "learning_rate": 2.157984255790067e-05, "loss": 0.0052, "step": 42470 }, { "epoch": 38.201438848920866, "grad_norm": 0.18563760817050934, "learning_rate": 2.1557173665831553e-05, "loss": 0.003, "step": 42480 }, { "epoch": 38.210431654676256, "grad_norm": 0.09407981485128403, "learning_rate": 2.153451341394212e-05, "loss": 0.0047, "step": 42490 }, { "epoch": 38.219424460431654, "grad_norm": 0.12706498801708221, "learning_rate": 2.151186180911589e-05, "loss": 0.003, "step": 42500 }, { "epoch": 38.22841726618705, "grad_norm": 0.104266457259655, "learning_rate": 2.1489218858233877e-05, "loss": 0.0037, "step": 42510 }, { "epoch": 38.23741007194245, "grad_norm": 0.08511265367269516, "learning_rate": 2.1466584568174392e-05, "loss": 0.0048, "step": 42520 }, { "epoch": 38.24640287769784, "grad_norm": 0.17350727319717407, "learning_rate": 2.1443958945813132e-05, "loss": 0.0048, "step": 42530 }, { "epoch": 38.25539568345324, "grad_norm": 0.09408014267683029, "learning_rate": 2.1421341998023163e-05, "loss": 0.0063, "step": 42540 }, { "epoch": 38.264388489208635, "grad_norm": 0.26631826162338257, "learning_rate": 2.139873373167491e-05, "loss": 0.0056, "step": 42550 }, { "epoch": 38.273381294964025, "grad_norm": 0.14809982478618622, "learning_rate": 2.13761341536362e-05, "loss": 0.0038, "step": 42560 }, { "epoch": 38.28237410071942, "grad_norm": 0.12563058733940125, "learning_rate": 2.1353543270772136e-05, "loss": 0.0025, "step": 42570 }, { "epoch": 38.29136690647482, "grad_norm": 0.11593254655599594, "learning_rate": 2.1330961089945297e-05, "loss": 0.0045, "step": 42580 }, { "epoch": 38.30035971223022, "grad_norm": 0.1213357225060463, "learning_rate": 2.130838761801548e-05, "loss": 0.0075, "step": 42590 }, { "epoch": 38.30935251798561, "grad_norm": 0.06699296087026596, "learning_rate": 2.1285822861839966e-05, "loss": 0.0044, "step": 42600 }, { "epoch": 38.318345323741006, "grad_norm": 0.05790594965219498, "learning_rate": 2.126326682827331e-05, "loss": 0.0016, "step": 42610 }, { "epoch": 38.327338129496404, "grad_norm": 0.20637395977973938, "learning_rate": 2.124071952416744e-05, "loss": 0.0029, "step": 42620 }, { "epoch": 38.3363309352518, "grad_norm": 0.10497503727674484, "learning_rate": 2.1218180956371634e-05, "loss": 0.0025, "step": 42630 }, { "epoch": 38.34532374100719, "grad_norm": 0.07383411377668381, "learning_rate": 2.119565113173252e-05, "loss": 0.0047, "step": 42640 }, { "epoch": 38.35431654676259, "grad_norm": 0.1198202446103096, "learning_rate": 2.1173130057094033e-05, "loss": 0.0038, "step": 42650 }, { "epoch": 38.36330935251799, "grad_norm": 0.1962605118751526, "learning_rate": 2.115061773929753e-05, "loss": 0.0029, "step": 42660 }, { "epoch": 38.37230215827338, "grad_norm": 0.2512427866458893, "learning_rate": 2.1128114185181623e-05, "loss": 0.0028, "step": 42670 }, { "epoch": 38.381294964028775, "grad_norm": 0.12666162848472595, "learning_rate": 2.1105619401582317e-05, "loss": 0.0036, "step": 42680 }, { "epoch": 38.39028776978417, "grad_norm": 0.17588230967521667, "learning_rate": 2.1083133395332928e-05, "loss": 0.0035, "step": 42690 }, { "epoch": 38.39928057553957, "grad_norm": 0.08290019631385803, "learning_rate": 2.1060656173264082e-05, "loss": 0.0036, "step": 42700 }, { "epoch": 38.40827338129496, "grad_norm": 0.1764170527458191, "learning_rate": 2.103818774220383e-05, "loss": 0.0067, "step": 42710 }, { "epoch": 38.41726618705036, "grad_norm": 0.17108969390392303, "learning_rate": 2.1015728108977412e-05, "loss": 0.0041, "step": 42720 }, { "epoch": 38.42625899280576, "grad_norm": 0.1993711292743683, "learning_rate": 2.0993277280407548e-05, "loss": 0.0048, "step": 42730 }, { "epoch": 38.435251798561154, "grad_norm": 0.0863301232457161, "learning_rate": 2.0970835263314132e-05, "loss": 0.0049, "step": 42740 }, { "epoch": 38.444244604316545, "grad_norm": 0.14708977937698364, "learning_rate": 2.094840206451451e-05, "loss": 0.0039, "step": 42750 }, { "epoch": 38.45323741007194, "grad_norm": 0.24758699536323547, "learning_rate": 2.0925977690823273e-05, "loss": 0.0046, "step": 42760 }, { "epoch": 38.46223021582734, "grad_norm": 0.11673127859830856, "learning_rate": 2.0903562149052364e-05, "loss": 0.0032, "step": 42770 }, { "epoch": 38.47122302158273, "grad_norm": 0.2009029984474182, "learning_rate": 2.0881155446011025e-05, "loss": 0.0032, "step": 42780 }, { "epoch": 38.48021582733813, "grad_norm": 0.12302002310752869, "learning_rate": 2.0858757588505823e-05, "loss": 0.0046, "step": 42790 }, { "epoch": 38.489208633093526, "grad_norm": 0.12023329734802246, "learning_rate": 2.0836368583340622e-05, "loss": 0.0023, "step": 42800 }, { "epoch": 38.49820143884892, "grad_norm": 0.08361383527517319, "learning_rate": 2.081398843731664e-05, "loss": 0.0029, "step": 42810 }, { "epoch": 38.507194244604314, "grad_norm": 0.11681298911571503, "learning_rate": 2.0791617157232357e-05, "loss": 0.0039, "step": 42820 }, { "epoch": 38.51618705035971, "grad_norm": 0.16467662155628204, "learning_rate": 2.0769254749883576e-05, "loss": 0.0046, "step": 42830 }, { "epoch": 38.52517985611511, "grad_norm": 0.11146358400583267, "learning_rate": 2.0746901222063415e-05, "loss": 0.0029, "step": 42840 }, { "epoch": 38.53417266187051, "grad_norm": 0.20369581878185272, "learning_rate": 2.072455658056226e-05, "loss": 0.0033, "step": 42850 }, { "epoch": 38.5431654676259, "grad_norm": 0.12741756439208984, "learning_rate": 2.0702220832167873e-05, "loss": 0.0032, "step": 42860 }, { "epoch": 38.552158273381295, "grad_norm": 0.12024235725402832, "learning_rate": 2.0679893983665205e-05, "loss": 0.0037, "step": 42870 }, { "epoch": 38.56115107913669, "grad_norm": 0.1543518751859665, "learning_rate": 2.0657576041836622e-05, "loss": 0.0067, "step": 42880 }, { "epoch": 38.57014388489208, "grad_norm": 0.1834220141172409, "learning_rate": 2.0635267013461666e-05, "loss": 0.0066, "step": 42890 }, { "epoch": 38.57913669064748, "grad_norm": 0.13717685639858246, "learning_rate": 2.061296690531728e-05, "loss": 0.0048, "step": 42900 }, { "epoch": 38.58812949640288, "grad_norm": 0.06935977935791016, "learning_rate": 2.0590675724177622e-05, "loss": 0.0031, "step": 42910 }, { "epoch": 38.597122302158276, "grad_norm": 0.07237153500318527, "learning_rate": 2.0568393476814167e-05, "loss": 0.0027, "step": 42920 }, { "epoch": 38.606115107913666, "grad_norm": 0.13029618561267853, "learning_rate": 2.0546120169995685e-05, "loss": 0.0046, "step": 42930 }, { "epoch": 38.615107913669064, "grad_norm": 0.2035219520330429, "learning_rate": 2.0523855810488214e-05, "loss": 0.0058, "step": 42940 }, { "epoch": 38.62410071942446, "grad_norm": 0.19234801828861237, "learning_rate": 2.050160040505505e-05, "loss": 0.0027, "step": 42950 }, { "epoch": 38.63309352517986, "grad_norm": 0.12273724377155304, "learning_rate": 2.0479353960456843e-05, "loss": 0.0037, "step": 42960 }, { "epoch": 38.64208633093525, "grad_norm": 0.07666780054569244, "learning_rate": 2.0457116483451456e-05, "loss": 0.0028, "step": 42970 }, { "epoch": 38.65107913669065, "grad_norm": 0.19629476964473724, "learning_rate": 2.0434887980794043e-05, "loss": 0.0034, "step": 42980 }, { "epoch": 38.660071942446045, "grad_norm": 0.27285635471343994, "learning_rate": 2.0412668459237043e-05, "loss": 0.0052, "step": 42990 }, { "epoch": 38.669064748201436, "grad_norm": 0.08407142013311386, "learning_rate": 2.039045792553016e-05, "loss": 0.0022, "step": 43000 }, { "epoch": 38.67805755395683, "grad_norm": 0.08890140056610107, "learning_rate": 2.036825638642036e-05, "loss": 0.0038, "step": 43010 }, { "epoch": 38.68705035971223, "grad_norm": 0.19775424897670746, "learning_rate": 2.0346063848651868e-05, "loss": 0.0033, "step": 43020 }, { "epoch": 38.69604316546763, "grad_norm": 0.12402542680501938, "learning_rate": 2.0323880318966254e-05, "loss": 0.004, "step": 43030 }, { "epoch": 38.70503597122302, "grad_norm": 0.1328822821378708, "learning_rate": 2.030170580410221e-05, "loss": 0.004, "step": 43040 }, { "epoch": 38.71402877697842, "grad_norm": 0.14454442262649536, "learning_rate": 2.0279540310795837e-05, "loss": 0.0022, "step": 43050 }, { "epoch": 38.723021582733814, "grad_norm": 0.05008704215288162, "learning_rate": 2.0257383845780365e-05, "loss": 0.0026, "step": 43060 }, { "epoch": 38.73201438848921, "grad_norm": 0.10102328658103943, "learning_rate": 2.0235236415786384e-05, "loss": 0.0033, "step": 43070 }, { "epoch": 38.7410071942446, "grad_norm": 0.10473699122667313, "learning_rate": 2.021309802754169e-05, "loss": 0.0023, "step": 43080 }, { "epoch": 38.75, "grad_norm": 0.07516639679670334, "learning_rate": 2.0190968687771332e-05, "loss": 0.0021, "step": 43090 }, { "epoch": 38.7589928057554, "grad_norm": 0.22834369540214539, "learning_rate": 2.016884840319763e-05, "loss": 0.0029, "step": 43100 }, { "epoch": 38.76798561151079, "grad_norm": 0.1453685164451599, "learning_rate": 2.0146737180540122e-05, "loss": 0.0028, "step": 43110 }, { "epoch": 38.776978417266186, "grad_norm": 0.08591201901435852, "learning_rate": 2.012463502651564e-05, "loss": 0.0021, "step": 43120 }, { "epoch": 38.78597122302158, "grad_norm": 0.0762474313378334, "learning_rate": 2.0102541947838228e-05, "loss": 0.0036, "step": 43130 }, { "epoch": 38.79496402877698, "grad_norm": 0.10143439471721649, "learning_rate": 2.0080457951219173e-05, "loss": 0.0022, "step": 43140 }, { "epoch": 38.80395683453237, "grad_norm": 0.12393487989902496, "learning_rate": 2.0058383043367017e-05, "loss": 0.0046, "step": 43150 }, { "epoch": 38.81294964028777, "grad_norm": 0.14238271117210388, "learning_rate": 2.0036317230987528e-05, "loss": 0.0022, "step": 43160 }, { "epoch": 38.82194244604317, "grad_norm": 0.09796939045190811, "learning_rate": 2.0014260520783696e-05, "loss": 0.0055, "step": 43170 }, { "epoch": 38.830935251798564, "grad_norm": 0.21152448654174805, "learning_rate": 1.9992212919455834e-05, "loss": 0.0059, "step": 43180 }, { "epoch": 38.839928057553955, "grad_norm": 0.17645572125911713, "learning_rate": 1.9970174433701333e-05, "loss": 0.0033, "step": 43190 }, { "epoch": 38.84892086330935, "grad_norm": 0.19138741493225098, "learning_rate": 1.9948145070214992e-05, "loss": 0.0021, "step": 43200 }, { "epoch": 38.85791366906475, "grad_norm": 0.08248709887266159, "learning_rate": 1.9926124835688663e-05, "loss": 0.0039, "step": 43210 }, { "epoch": 38.86690647482014, "grad_norm": 0.26906898617744446, "learning_rate": 1.9904113736811576e-05, "loss": 0.0045, "step": 43220 }, { "epoch": 38.87589928057554, "grad_norm": 0.10959883034229279, "learning_rate": 1.9882111780270096e-05, "loss": 0.0057, "step": 43230 }, { "epoch": 38.884892086330936, "grad_norm": 0.19728983938694, "learning_rate": 1.986011897274784e-05, "loss": 0.0034, "step": 43240 }, { "epoch": 38.893884892086334, "grad_norm": 0.13640664517879486, "learning_rate": 1.983813532092565e-05, "loss": 0.0082, "step": 43250 }, { "epoch": 38.902877697841724, "grad_norm": 0.16298258304595947, "learning_rate": 1.981616083148155e-05, "loss": 0.0023, "step": 43260 }, { "epoch": 38.91187050359712, "grad_norm": 0.15905342996120453, "learning_rate": 1.9794195511090845e-05, "loss": 0.0025, "step": 43270 }, { "epoch": 38.92086330935252, "grad_norm": 0.2868683934211731, "learning_rate": 1.977223936642601e-05, "loss": 0.0038, "step": 43280 }, { "epoch": 38.92985611510792, "grad_norm": 0.07061357796192169, "learning_rate": 1.975029240415674e-05, "loss": 0.0026, "step": 43290 }, { "epoch": 38.93884892086331, "grad_norm": 0.06640130281448364, "learning_rate": 1.9728354630949936e-05, "loss": 0.003, "step": 43300 }, { "epoch": 38.947841726618705, "grad_norm": 0.08118870854377747, "learning_rate": 1.9706426053469716e-05, "loss": 0.0023, "step": 43310 }, { "epoch": 38.9568345323741, "grad_norm": 0.14667662978172302, "learning_rate": 1.9684506678377396e-05, "loss": 0.0031, "step": 43320 }, { "epoch": 38.96582733812949, "grad_norm": 0.2831665277481079, "learning_rate": 1.9662596512331544e-05, "loss": 0.0027, "step": 43330 }, { "epoch": 38.97482014388489, "grad_norm": 0.14693672955036163, "learning_rate": 1.964069556198782e-05, "loss": 0.004, "step": 43340 }, { "epoch": 38.98381294964029, "grad_norm": 0.10570330172777176, "learning_rate": 1.9618803833999232e-05, "loss": 0.0034, "step": 43350 }, { "epoch": 38.992805755395686, "grad_norm": 0.17412403225898743, "learning_rate": 1.9596921335015838e-05, "loss": 0.0021, "step": 43360 }, { "epoch": 39.00179856115108, "grad_norm": 0.2120288610458374, "learning_rate": 1.957504807168501e-05, "loss": 0.005, "step": 43370 }, { "epoch": 39.010791366906474, "grad_norm": 0.12316913157701492, "learning_rate": 1.9553184050651253e-05, "loss": 0.0024, "step": 43380 }, { "epoch": 39.01978417266187, "grad_norm": 0.16934379935264587, "learning_rate": 1.953132927855628e-05, "loss": 0.0069, "step": 43390 }, { "epoch": 39.02877697841727, "grad_norm": 0.14512751996517181, "learning_rate": 1.9509483762038995e-05, "loss": 0.0035, "step": 43400 }, { "epoch": 39.03776978417266, "grad_norm": 0.11111138761043549, "learning_rate": 1.9487647507735467e-05, "loss": 0.0032, "step": 43410 }, { "epoch": 39.04676258992806, "grad_norm": 0.08442504703998566, "learning_rate": 1.9465820522279032e-05, "loss": 0.0022, "step": 43420 }, { "epoch": 39.055755395683455, "grad_norm": 0.2112729251384735, "learning_rate": 1.9444002812300078e-05, "loss": 0.0037, "step": 43430 }, { "epoch": 39.064748201438846, "grad_norm": 0.21078886091709137, "learning_rate": 1.94221943844263e-05, "loss": 0.0027, "step": 43440 }, { "epoch": 39.07374100719424, "grad_norm": 0.25282299518585205, "learning_rate": 1.9400395245282515e-05, "loss": 0.005, "step": 43450 }, { "epoch": 39.08273381294964, "grad_norm": 0.2539461851119995, "learning_rate": 1.937860540149071e-05, "loss": 0.0041, "step": 43460 }, { "epoch": 39.09172661870504, "grad_norm": 0.04505249485373497, "learning_rate": 1.9356824859670082e-05, "loss": 0.0026, "step": 43470 }, { "epoch": 39.10071942446043, "grad_norm": 0.06858880817890167, "learning_rate": 1.9335053626436967e-05, "loss": 0.0021, "step": 43480 }, { "epoch": 39.10971223021583, "grad_norm": 0.09235070645809174, "learning_rate": 1.9313291708404885e-05, "loss": 0.0035, "step": 43490 }, { "epoch": 39.118705035971225, "grad_norm": 0.06596485525369644, "learning_rate": 1.9291539112184587e-05, "loss": 0.003, "step": 43500 }, { "epoch": 39.12769784172662, "grad_norm": 0.13890917599201202, "learning_rate": 1.9269795844383854e-05, "loss": 0.0049, "step": 43510 }, { "epoch": 39.13669064748201, "grad_norm": 0.1378551870584488, "learning_rate": 1.9248061911607777e-05, "loss": 0.0026, "step": 43520 }, { "epoch": 39.14568345323741, "grad_norm": 0.12815406918525696, "learning_rate": 1.9226337320458538e-05, "loss": 0.0036, "step": 43530 }, { "epoch": 39.15467625899281, "grad_norm": 0.15682147443294525, "learning_rate": 1.9204622077535488e-05, "loss": 0.0017, "step": 43540 }, { "epoch": 39.1636690647482, "grad_norm": 0.1744755208492279, "learning_rate": 1.9182916189435147e-05, "loss": 0.0032, "step": 43550 }, { "epoch": 39.172661870503596, "grad_norm": 0.15063540637493134, "learning_rate": 1.916121966275117e-05, "loss": 0.0052, "step": 43560 }, { "epoch": 39.181654676258994, "grad_norm": 0.11181584000587463, "learning_rate": 1.9139532504074443e-05, "loss": 0.0033, "step": 43570 }, { "epoch": 39.19064748201439, "grad_norm": 0.33617639541625977, "learning_rate": 1.9117854719992885e-05, "loss": 0.003, "step": 43580 }, { "epoch": 39.19964028776978, "grad_norm": 0.12723670899868011, "learning_rate": 1.9096186317091687e-05, "loss": 0.0022, "step": 43590 }, { "epoch": 39.20863309352518, "grad_norm": 0.08111079037189484, "learning_rate": 1.9074527301953116e-05, "loss": 0.0054, "step": 43600 }, { "epoch": 39.21762589928058, "grad_norm": 0.05029533803462982, "learning_rate": 1.9052877681156607e-05, "loss": 0.0037, "step": 43610 }, { "epoch": 39.226618705035975, "grad_norm": 0.20096738636493683, "learning_rate": 1.903123746127875e-05, "loss": 0.006, "step": 43620 }, { "epoch": 39.235611510791365, "grad_norm": 0.11635233461856842, "learning_rate": 1.900960664889327e-05, "loss": 0.003, "step": 43630 }, { "epoch": 39.24460431654676, "grad_norm": 0.10015436261892319, "learning_rate": 1.8987985250571015e-05, "loss": 0.002, "step": 43640 }, { "epoch": 39.25359712230216, "grad_norm": 0.16822588443756104, "learning_rate": 1.8966373272880054e-05, "loss": 0.0039, "step": 43650 }, { "epoch": 39.26258992805755, "grad_norm": 0.11277267336845398, "learning_rate": 1.8944770722385462e-05, "loss": 0.0031, "step": 43660 }, { "epoch": 39.27158273381295, "grad_norm": 0.09171853959560394, "learning_rate": 1.8923177605649576e-05, "loss": 0.0031, "step": 43670 }, { "epoch": 39.280575539568346, "grad_norm": 0.2264767736196518, "learning_rate": 1.8901593929231802e-05, "loss": 0.0035, "step": 43680 }, { "epoch": 39.289568345323744, "grad_norm": 0.24204470217227936, "learning_rate": 1.8880019699688684e-05, "loss": 0.0041, "step": 43690 }, { "epoch": 39.298561151079134, "grad_norm": 0.17978090047836304, "learning_rate": 1.8858454923573904e-05, "loss": 0.005, "step": 43700 }, { "epoch": 39.30755395683453, "grad_norm": 0.3564485013484955, "learning_rate": 1.8836899607438253e-05, "loss": 0.003, "step": 43710 }, { "epoch": 39.31654676258993, "grad_norm": 0.09948094189167023, "learning_rate": 1.8815353757829723e-05, "loss": 0.0033, "step": 43720 }, { "epoch": 39.32553956834532, "grad_norm": 0.12765730917453766, "learning_rate": 1.879381738129331e-05, "loss": 0.0034, "step": 43730 }, { "epoch": 39.33453237410072, "grad_norm": 0.3010255694389343, "learning_rate": 1.8772290484371236e-05, "loss": 0.0034, "step": 43740 }, { "epoch": 39.343525179856115, "grad_norm": 0.25730541348457336, "learning_rate": 1.8750773073602795e-05, "loss": 0.0029, "step": 43750 }, { "epoch": 39.35251798561151, "grad_norm": 0.1494724005460739, "learning_rate": 1.8729265155524405e-05, "loss": 0.0026, "step": 43760 }, { "epoch": 39.361510791366904, "grad_norm": 0.10742408782243729, "learning_rate": 1.8707766736669607e-05, "loss": 0.0035, "step": 43770 }, { "epoch": 39.3705035971223, "grad_norm": 0.08977103233337402, "learning_rate": 1.8686277823569055e-05, "loss": 0.0024, "step": 43780 }, { "epoch": 39.3794964028777, "grad_norm": 0.1009368970990181, "learning_rate": 1.8664798422750484e-05, "loss": 0.0023, "step": 43790 }, { "epoch": 39.388489208633096, "grad_norm": 0.11483269184827805, "learning_rate": 1.8643328540738832e-05, "loss": 0.0036, "step": 43800 }, { "epoch": 39.39748201438849, "grad_norm": 0.11992176622152328, "learning_rate": 1.862186818405601e-05, "loss": 0.0039, "step": 43810 }, { "epoch": 39.406474820143885, "grad_norm": 0.0644608587026596, "learning_rate": 1.8600417359221156e-05, "loss": 0.003, "step": 43820 }, { "epoch": 39.41546762589928, "grad_norm": 0.131107360124588, "learning_rate": 1.8578976072750454e-05, "loss": 0.0023, "step": 43830 }, { "epoch": 39.42446043165468, "grad_norm": 0.18727734684944153, "learning_rate": 1.8557544331157194e-05, "loss": 0.0023, "step": 43840 }, { "epoch": 39.43345323741007, "grad_norm": 0.43073832988739014, "learning_rate": 1.8536122140951785e-05, "loss": 0.0051, "step": 43850 }, { "epoch": 39.44244604316547, "grad_norm": 0.2226419746875763, "learning_rate": 1.8514709508641688e-05, "loss": 0.0044, "step": 43860 }, { "epoch": 39.451438848920866, "grad_norm": 0.27272704243659973, "learning_rate": 1.8493306440731555e-05, "loss": 0.0049, "step": 43870 }, { "epoch": 39.460431654676256, "grad_norm": 0.2884288728237152, "learning_rate": 1.8471912943723013e-05, "loss": 0.0037, "step": 43880 }, { "epoch": 39.469424460431654, "grad_norm": 0.1144283190369606, "learning_rate": 1.8450529024114894e-05, "loss": 0.0037, "step": 43890 }, { "epoch": 39.47841726618705, "grad_norm": 0.08900394290685654, "learning_rate": 1.842915468840301e-05, "loss": 0.0033, "step": 43900 }, { "epoch": 39.48741007194245, "grad_norm": 0.1931285411119461, "learning_rate": 1.840778994308037e-05, "loss": 0.0047, "step": 43910 }, { "epoch": 39.49640287769784, "grad_norm": 0.14054138958454132, "learning_rate": 1.8386434794637004e-05, "loss": 0.0018, "step": 43920 }, { "epoch": 39.50539568345324, "grad_norm": 0.11130855977535248, "learning_rate": 1.8365089249560034e-05, "loss": 0.0017, "step": 43930 }, { "epoch": 39.514388489208635, "grad_norm": 0.31056877970695496, "learning_rate": 1.8343753314333683e-05, "loss": 0.0024, "step": 43940 }, { "epoch": 39.523381294964025, "grad_norm": 0.18172511458396912, "learning_rate": 1.8322426995439236e-05, "loss": 0.004, "step": 43950 }, { "epoch": 39.53237410071942, "grad_norm": 0.1528577357530594, "learning_rate": 1.8301110299355058e-05, "loss": 0.0118, "step": 43960 }, { "epoch": 39.54136690647482, "grad_norm": 0.18499501049518585, "learning_rate": 1.8279803232556625e-05, "loss": 0.0042, "step": 43970 }, { "epoch": 39.55035971223022, "grad_norm": 0.1380755454301834, "learning_rate": 1.8258505801516444e-05, "loss": 0.0029, "step": 43980 }, { "epoch": 39.55935251798561, "grad_norm": 0.09125539660453796, "learning_rate": 1.8237218012704117e-05, "loss": 0.0029, "step": 43990 }, { "epoch": 39.568345323741006, "grad_norm": 0.09796789288520813, "learning_rate": 1.821593987258631e-05, "loss": 0.0032, "step": 44000 }, { "epoch": 39.577338129496404, "grad_norm": 0.12198814749717712, "learning_rate": 1.8194671387626744e-05, "loss": 0.003, "step": 44010 }, { "epoch": 39.5863309352518, "grad_norm": 0.1069059744477272, "learning_rate": 1.8173412564286276e-05, "loss": 0.0038, "step": 44020 }, { "epoch": 39.59532374100719, "grad_norm": 0.09859532117843628, "learning_rate": 1.8152163409022697e-05, "loss": 0.0044, "step": 44030 }, { "epoch": 39.60431654676259, "grad_norm": 0.06095310300588608, "learning_rate": 1.8130923928291023e-05, "loss": 0.0024, "step": 44040 }, { "epoch": 39.61330935251799, "grad_norm": 0.2708319425582886, "learning_rate": 1.8109694128543163e-05, "loss": 0.0045, "step": 44050 }, { "epoch": 39.62230215827338, "grad_norm": 0.06779119372367859, "learning_rate": 1.8088474016228237e-05, "loss": 0.0039, "step": 44060 }, { "epoch": 39.631294964028775, "grad_norm": 0.14495326578617096, "learning_rate": 1.8067263597792328e-05, "loss": 0.003, "step": 44070 }, { "epoch": 39.64028776978417, "grad_norm": 0.12387742847204208, "learning_rate": 1.80460628796786e-05, "loss": 0.0037, "step": 44080 }, { "epoch": 39.64928057553957, "grad_norm": 0.07388675957918167, "learning_rate": 1.8024871868327276e-05, "loss": 0.0027, "step": 44090 }, { "epoch": 39.65827338129496, "grad_norm": 0.0811893492937088, "learning_rate": 1.8003690570175608e-05, "loss": 0.0037, "step": 44100 }, { "epoch": 39.66726618705036, "grad_norm": 0.08099673688411713, "learning_rate": 1.7982518991657943e-05, "loss": 0.003, "step": 44110 }, { "epoch": 39.67625899280576, "grad_norm": 0.07817069441080093, "learning_rate": 1.7961357139205643e-05, "loss": 0.0015, "step": 44120 }, { "epoch": 39.685251798561154, "grad_norm": 0.12707625329494476, "learning_rate": 1.7940205019247108e-05, "loss": 0.0019, "step": 44130 }, { "epoch": 39.694244604316545, "grad_norm": 0.09062229841947556, "learning_rate": 1.79190626382078e-05, "loss": 0.0024, "step": 44140 }, { "epoch": 39.70323741007194, "grad_norm": 0.19260266423225403, "learning_rate": 1.7897930002510215e-05, "loss": 0.0044, "step": 44150 }, { "epoch": 39.71223021582734, "grad_norm": 0.15497182309627533, "learning_rate": 1.787680711857387e-05, "loss": 0.0027, "step": 44160 }, { "epoch": 39.72122302158273, "grad_norm": 0.13907983899116516, "learning_rate": 1.7855693992815398e-05, "loss": 0.0043, "step": 44170 }, { "epoch": 39.73021582733813, "grad_norm": 0.17285993695259094, "learning_rate": 1.7834590631648328e-05, "loss": 0.0029, "step": 44180 }, { "epoch": 39.739208633093526, "grad_norm": 0.24075816571712494, "learning_rate": 1.7813497041483384e-05, "loss": 0.0029, "step": 44190 }, { "epoch": 39.74820143884892, "grad_norm": 0.20094025135040283, "learning_rate": 1.779241322872817e-05, "loss": 0.0044, "step": 44200 }, { "epoch": 39.757194244604314, "grad_norm": 0.09606406837701797, "learning_rate": 1.777133919978744e-05, "loss": 0.0032, "step": 44210 }, { "epoch": 39.76618705035971, "grad_norm": 0.09597271680831909, "learning_rate": 1.7750274961062912e-05, "loss": 0.0057, "step": 44220 }, { "epoch": 39.77517985611511, "grad_norm": 0.12877264618873596, "learning_rate": 1.772922051895335e-05, "loss": 0.0026, "step": 44230 }, { "epoch": 39.78417266187051, "grad_norm": 0.06123839318752289, "learning_rate": 1.770817587985453e-05, "loss": 0.0028, "step": 44240 }, { "epoch": 39.7931654676259, "grad_norm": 0.2670186161994934, "learning_rate": 1.7687141050159246e-05, "loss": 0.0033, "step": 44250 }, { "epoch": 39.802158273381295, "grad_norm": 0.07240317761898041, "learning_rate": 1.7666116036257375e-05, "loss": 0.0022, "step": 44260 }, { "epoch": 39.81115107913669, "grad_norm": 0.06696164608001709, "learning_rate": 1.764510084453569e-05, "loss": 0.0026, "step": 44270 }, { "epoch": 39.82014388489208, "grad_norm": 0.08776939660310745, "learning_rate": 1.76240954813781e-05, "loss": 0.0033, "step": 44280 }, { "epoch": 39.82913669064748, "grad_norm": 0.08537731319665909, "learning_rate": 1.7603099953165476e-05, "loss": 0.0023, "step": 44290 }, { "epoch": 39.83812949640288, "grad_norm": 0.1379663646221161, "learning_rate": 1.7582114266275683e-05, "loss": 0.0054, "step": 44300 }, { "epoch": 39.847122302158276, "grad_norm": 0.05950948968529701, "learning_rate": 1.756113842708364e-05, "loss": 0.0024, "step": 44310 }, { "epoch": 39.856115107913666, "grad_norm": 0.09139513969421387, "learning_rate": 1.7540172441961245e-05, "loss": 0.0047, "step": 44320 }, { "epoch": 39.865107913669064, "grad_norm": 0.1478779911994934, "learning_rate": 1.7519216317277387e-05, "loss": 0.0018, "step": 44330 }, { "epoch": 39.87410071942446, "grad_norm": 0.24204495549201965, "learning_rate": 1.7498270059398046e-05, "loss": 0.0038, "step": 44340 }, { "epoch": 39.88309352517986, "grad_norm": 0.07749023288488388, "learning_rate": 1.7477333674686062e-05, "loss": 0.0022, "step": 44350 }, { "epoch": 39.89208633093525, "grad_norm": 0.18754607439041138, "learning_rate": 1.745640716950142e-05, "loss": 0.0036, "step": 44360 }, { "epoch": 39.90107913669065, "grad_norm": 0.2622624337673187, "learning_rate": 1.7435490550201017e-05, "loss": 0.0044, "step": 44370 }, { "epoch": 39.910071942446045, "grad_norm": 0.08806059509515762, "learning_rate": 1.7414583823138762e-05, "loss": 0.0016, "step": 44380 }, { "epoch": 39.919064748201436, "grad_norm": 0.15050601959228516, "learning_rate": 1.739368699466558e-05, "loss": 0.0048, "step": 44390 }, { "epoch": 39.92805755395683, "grad_norm": 0.0717332661151886, "learning_rate": 1.737280007112935e-05, "loss": 0.004, "step": 44400 }, { "epoch": 39.93705035971223, "grad_norm": 0.08581036329269409, "learning_rate": 1.735192305887502e-05, "loss": 0.0019, "step": 44410 }, { "epoch": 39.94604316546763, "grad_norm": 0.08586818724870682, "learning_rate": 1.733105596424441e-05, "loss": 0.0028, "step": 44420 }, { "epoch": 39.95503597122302, "grad_norm": 0.2107354998588562, "learning_rate": 1.7310198793576437e-05, "loss": 0.0036, "step": 44430 }, { "epoch": 39.96402877697842, "grad_norm": 0.0842505544424057, "learning_rate": 1.7289351553206952e-05, "loss": 0.0029, "step": 44440 }, { "epoch": 39.973021582733814, "grad_norm": 0.2191089689731598, "learning_rate": 1.7268514249468788e-05, "loss": 0.0042, "step": 44450 }, { "epoch": 39.98201438848921, "grad_norm": 0.157072052359581, "learning_rate": 1.7247686888691765e-05, "loss": 0.003, "step": 44460 }, { "epoch": 39.9910071942446, "grad_norm": 0.09924835711717606, "learning_rate": 1.7226869477202694e-05, "loss": 0.0021, "step": 44470 }, { "epoch": 40.0, "grad_norm": 0.19892150163650513, "learning_rate": 1.7206062021325336e-05, "loss": 0.0047, "step": 44480 }, { "epoch": 40.0089928057554, "grad_norm": 0.09909424185752869, "learning_rate": 1.7185264527380502e-05, "loss": 0.0029, "step": 44490 }, { "epoch": 40.01798561151079, "grad_norm": 0.08112873882055283, "learning_rate": 1.716447700168584e-05, "loss": 0.0031, "step": 44500 }, { "epoch": 40.026978417266186, "grad_norm": 0.062229156494140625, "learning_rate": 1.714369945055611e-05, "loss": 0.0058, "step": 44510 }, { "epoch": 40.03597122302158, "grad_norm": 0.05463996157050133, "learning_rate": 1.7122931880302968e-05, "loss": 0.003, "step": 44520 }, { "epoch": 40.04496402877698, "grad_norm": 0.14566759765148163, "learning_rate": 1.710217429723505e-05, "loss": 0.0026, "step": 44530 }, { "epoch": 40.05395683453237, "grad_norm": 0.1140611544251442, "learning_rate": 1.7081426707657972e-05, "loss": 0.0025, "step": 44540 }, { "epoch": 40.06294964028777, "grad_norm": 0.09429122507572174, "learning_rate": 1.7060689117874275e-05, "loss": 0.0034, "step": 44550 }, { "epoch": 40.07194244604317, "grad_norm": 0.09366840869188309, "learning_rate": 1.703996153418354e-05, "loss": 0.0019, "step": 44560 }, { "epoch": 40.080935251798564, "grad_norm": 0.23816697299480438, "learning_rate": 1.7019243962882205e-05, "loss": 0.0022, "step": 44570 }, { "epoch": 40.089928057553955, "grad_norm": 0.06825549900531769, "learning_rate": 1.6998536410263754e-05, "loss": 0.0031, "step": 44580 }, { "epoch": 40.09892086330935, "grad_norm": 0.09240403026342392, "learning_rate": 1.6977838882618596e-05, "loss": 0.0026, "step": 44590 }, { "epoch": 40.10791366906475, "grad_norm": 0.21366555988788605, "learning_rate": 1.6957151386234088e-05, "loss": 0.0021, "step": 44600 }, { "epoch": 40.11690647482014, "grad_norm": 0.06823654472827911, "learning_rate": 1.6936473927394536e-05, "loss": 0.0026, "step": 44610 }, { "epoch": 40.12589928057554, "grad_norm": 0.11268680542707443, "learning_rate": 1.6915806512381222e-05, "loss": 0.0025, "step": 44620 }, { "epoch": 40.134892086330936, "grad_norm": 0.17415767908096313, "learning_rate": 1.6895149147472344e-05, "loss": 0.0033, "step": 44630 }, { "epoch": 40.143884892086334, "grad_norm": 0.15390220284461975, "learning_rate": 1.6874501838943073e-05, "loss": 0.0036, "step": 44640 }, { "epoch": 40.152877697841724, "grad_norm": 0.2012186199426651, "learning_rate": 1.6853864593065506e-05, "loss": 0.0039, "step": 44650 }, { "epoch": 40.16187050359712, "grad_norm": 0.14643333852291107, "learning_rate": 1.683323741610871e-05, "loss": 0.0027, "step": 44660 }, { "epoch": 40.17086330935252, "grad_norm": 0.27531108260154724, "learning_rate": 1.6812620314338674e-05, "loss": 0.0036, "step": 44670 }, { "epoch": 40.17985611510792, "grad_norm": 0.1258859932422638, "learning_rate": 1.6792013294018326e-05, "loss": 0.0025, "step": 44680 }, { "epoch": 40.18884892086331, "grad_norm": 0.11942297965288162, "learning_rate": 1.6771416361407526e-05, "loss": 0.005, "step": 44690 }, { "epoch": 40.197841726618705, "grad_norm": 0.15778788924217224, "learning_rate": 1.675082952276308e-05, "loss": 0.0025, "step": 44700 }, { "epoch": 40.2068345323741, "grad_norm": 0.25637564063072205, "learning_rate": 1.6730252784338757e-05, "loss": 0.0025, "step": 44710 }, { "epoch": 40.21582733812949, "grad_norm": 0.09739706665277481, "learning_rate": 1.6709686152385166e-05, "loss": 0.0025, "step": 44720 }, { "epoch": 40.22482014388489, "grad_norm": 0.07758933305740356, "learning_rate": 1.668912963314998e-05, "loss": 0.0029, "step": 44730 }, { "epoch": 40.23381294964029, "grad_norm": 0.1233808696269989, "learning_rate": 1.6668583232877653e-05, "loss": 0.004, "step": 44740 }, { "epoch": 40.242805755395686, "grad_norm": 0.16686464846134186, "learning_rate": 1.6648046957809698e-05, "loss": 0.0059, "step": 44750 }, { "epoch": 40.25179856115108, "grad_norm": 0.11374076455831528, "learning_rate": 1.6627520814184462e-05, "loss": 0.0035, "step": 44760 }, { "epoch": 40.260791366906474, "grad_norm": 0.1388217955827713, "learning_rate": 1.660700480823726e-05, "loss": 0.003, "step": 44770 }, { "epoch": 40.26978417266187, "grad_norm": 0.12425917387008667, "learning_rate": 1.65864989462003e-05, "loss": 0.0038, "step": 44780 }, { "epoch": 40.27877697841727, "grad_norm": 0.04945473372936249, "learning_rate": 1.656600323430273e-05, "loss": 0.0083, "step": 44790 }, { "epoch": 40.28776978417266, "grad_norm": 0.1754077821969986, "learning_rate": 1.654551767877059e-05, "loss": 0.003, "step": 44800 }, { "epoch": 40.29676258992806, "grad_norm": 0.16452743113040924, "learning_rate": 1.6525042285826874e-05, "loss": 0.0042, "step": 44810 }, { "epoch": 40.305755395683455, "grad_norm": 0.21668584644794464, "learning_rate": 1.6504577061691468e-05, "loss": 0.0046, "step": 44820 }, { "epoch": 40.314748201438846, "grad_norm": 0.22589951753616333, "learning_rate": 1.6484122012581143e-05, "loss": 0.003, "step": 44830 }, { "epoch": 40.32374100719424, "grad_norm": 0.10466425865888596, "learning_rate": 1.6463677144709623e-05, "loss": 0.0033, "step": 44840 }, { "epoch": 40.33273381294964, "grad_norm": 0.19101810455322266, "learning_rate": 1.6443242464287493e-05, "loss": 0.0026, "step": 44850 }, { "epoch": 40.34172661870504, "grad_norm": 0.12653560936450958, "learning_rate": 1.642281797752232e-05, "loss": 0.0034, "step": 44860 }, { "epoch": 40.35071942446043, "grad_norm": 0.07716281712055206, "learning_rate": 1.6402403690618456e-05, "loss": 0.0031, "step": 44870 }, { "epoch": 40.35971223021583, "grad_norm": 0.11651111394166946, "learning_rate": 1.6381999609777295e-05, "loss": 0.002, "step": 44880 }, { "epoch": 40.368705035971225, "grad_norm": 0.06925466656684875, "learning_rate": 1.6361605741196983e-05, "loss": 0.011, "step": 44890 }, { "epoch": 40.37769784172662, "grad_norm": 0.205287903547287, "learning_rate": 1.63412220910727e-05, "loss": 0.0064, "step": 44900 }, { "epoch": 40.38669064748201, "grad_norm": 0.09900832921266556, "learning_rate": 1.6320848665596433e-05, "loss": 0.0041, "step": 44910 }, { "epoch": 40.39568345323741, "grad_norm": 0.12291046977043152, "learning_rate": 1.6300485470957095e-05, "loss": 0.0045, "step": 44920 }, { "epoch": 40.40467625899281, "grad_norm": 0.14578059315681458, "learning_rate": 1.6280132513340483e-05, "loss": 0.0021, "step": 44930 }, { "epoch": 40.4136690647482, "grad_norm": 0.07447585463523865, "learning_rate": 1.62597897989293e-05, "loss": 0.0029, "step": 44940 }, { "epoch": 40.422661870503596, "grad_norm": 0.09163995087146759, "learning_rate": 1.623945733390309e-05, "loss": 0.0027, "step": 44950 }, { "epoch": 40.431654676258994, "grad_norm": 0.15831461548805237, "learning_rate": 1.6219135124438374e-05, "loss": 0.0025, "step": 44960 }, { "epoch": 40.44064748201439, "grad_norm": 0.11039089411497116, "learning_rate": 1.6198823176708465e-05, "loss": 0.0016, "step": 44970 }, { "epoch": 40.44964028776978, "grad_norm": 0.3092285692691803, "learning_rate": 1.6178521496883613e-05, "loss": 0.005, "step": 44980 }, { "epoch": 40.45863309352518, "grad_norm": 0.050561513751745224, "learning_rate": 1.6158230091130926e-05, "loss": 0.0019, "step": 44990 }, { "epoch": 40.46762589928058, "grad_norm": 0.0905882716178894, "learning_rate": 1.613794896561438e-05, "loss": 0.0027, "step": 45000 }, { "epoch": 40.476618705035975, "grad_norm": 0.11650985479354858, "learning_rate": 1.6117678126494894e-05, "loss": 0.0021, "step": 45010 }, { "epoch": 40.485611510791365, "grad_norm": 0.06268548965454102, "learning_rate": 1.6097417579930153e-05, "loss": 0.0029, "step": 45020 }, { "epoch": 40.49460431654676, "grad_norm": 0.10652846097946167, "learning_rate": 1.6077167332074834e-05, "loss": 0.0035, "step": 45030 }, { "epoch": 40.50359712230216, "grad_norm": 0.14130127429962158, "learning_rate": 1.605692738908037e-05, "loss": 0.0026, "step": 45040 }, { "epoch": 40.51258992805755, "grad_norm": 0.11042556911706924, "learning_rate": 1.6036697757095176e-05, "loss": 0.0047, "step": 45050 }, { "epoch": 40.52158273381295, "grad_norm": 0.1142437607049942, "learning_rate": 1.6016478442264428e-05, "loss": 0.003, "step": 45060 }, { "epoch": 40.530575539568346, "grad_norm": 0.1345311403274536, "learning_rate": 1.599626945073026e-05, "loss": 0.0018, "step": 45070 }, { "epoch": 40.539568345323744, "grad_norm": 0.2536688446998596, "learning_rate": 1.597607078863162e-05, "loss": 0.002, "step": 45080 }, { "epoch": 40.548561151079134, "grad_norm": 0.09132426232099533, "learning_rate": 1.595588246210432e-05, "loss": 0.0034, "step": 45090 }, { "epoch": 40.55755395683453, "grad_norm": 0.07847818732261658, "learning_rate": 1.5935704477281048e-05, "loss": 0.0015, "step": 45100 }, { "epoch": 40.56654676258993, "grad_norm": 0.20439699292182922, "learning_rate": 1.5915536840291323e-05, "loss": 0.0027, "step": 45110 }, { "epoch": 40.57553956834532, "grad_norm": 0.06927057355642319, "learning_rate": 1.5895379557261576e-05, "loss": 0.0023, "step": 45120 }, { "epoch": 40.58453237410072, "grad_norm": 0.15918448567390442, "learning_rate": 1.5875232634315033e-05, "loss": 0.0021, "step": 45130 }, { "epoch": 40.593525179856115, "grad_norm": 0.23247025907039642, "learning_rate": 1.5855096077571812e-05, "loss": 0.0046, "step": 45140 }, { "epoch": 40.60251798561151, "grad_norm": 0.09207906574010849, "learning_rate": 1.5834969893148855e-05, "loss": 0.0014, "step": 45150 }, { "epoch": 40.611510791366904, "grad_norm": 0.08072862774133682, "learning_rate": 1.581485408715997e-05, "loss": 0.0024, "step": 45160 }, { "epoch": 40.6205035971223, "grad_norm": 0.1968795210123062, "learning_rate": 1.5794748665715785e-05, "loss": 0.0027, "step": 45170 }, { "epoch": 40.6294964028777, "grad_norm": 0.1035124734044075, "learning_rate": 1.5774653634923857e-05, "loss": 0.0019, "step": 45180 }, { "epoch": 40.638489208633096, "grad_norm": 0.07912739366292953, "learning_rate": 1.575456900088845e-05, "loss": 0.0016, "step": 45190 }, { "epoch": 40.64748201438849, "grad_norm": 0.05483526363968849, "learning_rate": 1.5734494769710816e-05, "loss": 0.0042, "step": 45200 }, { "epoch": 40.656474820143885, "grad_norm": 0.14200767874717712, "learning_rate": 1.5714430947488912e-05, "loss": 0.0021, "step": 45210 }, { "epoch": 40.66546762589928, "grad_norm": 0.1656908243894577, "learning_rate": 1.5694377540317645e-05, "loss": 0.0048, "step": 45220 }, { "epoch": 40.67446043165468, "grad_norm": 0.21549692749977112, "learning_rate": 1.5674334554288694e-05, "loss": 0.0028, "step": 45230 }, { "epoch": 40.68345323741007, "grad_norm": 0.08300821483135223, "learning_rate": 1.5654301995490582e-05, "loss": 0.0028, "step": 45240 }, { "epoch": 40.69244604316547, "grad_norm": 0.21239757537841797, "learning_rate": 1.5634279870008685e-05, "loss": 0.002, "step": 45250 }, { "epoch": 40.701438848920866, "grad_norm": 0.06651327013969421, "learning_rate": 1.5614268183925174e-05, "loss": 0.0014, "step": 45260 }, { "epoch": 40.710431654676256, "grad_norm": 0.07402417808771133, "learning_rate": 1.5594266943319097e-05, "loss": 0.0039, "step": 45270 }, { "epoch": 40.719424460431654, "grad_norm": 0.14358308911323547, "learning_rate": 1.5574276154266294e-05, "loss": 0.0021, "step": 45280 }, { "epoch": 40.72841726618705, "grad_norm": 0.08577680587768555, "learning_rate": 1.5554295822839437e-05, "loss": 0.0041, "step": 45290 }, { "epoch": 40.73741007194245, "grad_norm": 0.24414581060409546, "learning_rate": 1.5534325955108025e-05, "loss": 0.0028, "step": 45300 }, { "epoch": 40.74640287769784, "grad_norm": 0.08985015749931335, "learning_rate": 1.5514366557138373e-05, "loss": 0.0026, "step": 45310 }, { "epoch": 40.75539568345324, "grad_norm": 0.09768658876419067, "learning_rate": 1.5494417634993602e-05, "loss": 0.0034, "step": 45320 }, { "epoch": 40.764388489208635, "grad_norm": 0.30751487612724304, "learning_rate": 1.547447919473372e-05, "loss": 0.005, "step": 45330 }, { "epoch": 40.773381294964025, "grad_norm": 0.10281580686569214, "learning_rate": 1.5454551242415434e-05, "loss": 0.0025, "step": 45340 }, { "epoch": 40.78237410071942, "grad_norm": 0.10442592948675156, "learning_rate": 1.543463378409239e-05, "loss": 0.0019, "step": 45350 }, { "epoch": 40.79136690647482, "grad_norm": 0.07914524525403976, "learning_rate": 1.541472682581493e-05, "loss": 0.0067, "step": 45360 }, { "epoch": 40.80035971223022, "grad_norm": 0.3535670042037964, "learning_rate": 1.5394830373630298e-05, "loss": 0.0038, "step": 45370 }, { "epoch": 40.80935251798561, "grad_norm": 0.23681575059890747, "learning_rate": 1.5374944433582506e-05, "loss": 0.0031, "step": 45380 }, { "epoch": 40.818345323741006, "grad_norm": 0.07701612263917923, "learning_rate": 1.5355069011712375e-05, "loss": 0.0026, "step": 45390 }, { "epoch": 40.827338129496404, "grad_norm": 0.08316708356142044, "learning_rate": 1.5335204114057526e-05, "loss": 0.0056, "step": 45400 }, { "epoch": 40.8363309352518, "grad_norm": 0.23218335211277008, "learning_rate": 1.5315349746652387e-05, "loss": 0.0044, "step": 45410 }, { "epoch": 40.84532374100719, "grad_norm": 0.12172622978687286, "learning_rate": 1.5295505915528212e-05, "loss": 0.0027, "step": 45420 }, { "epoch": 40.85431654676259, "grad_norm": 0.08312419801950455, "learning_rate": 1.5275672626713024e-05, "loss": 0.0027, "step": 45430 }, { "epoch": 40.86330935251799, "grad_norm": 0.07597044855356216, "learning_rate": 1.5255849886231643e-05, "loss": 0.0039, "step": 45440 }, { "epoch": 40.87230215827338, "grad_norm": 0.09598062187433243, "learning_rate": 1.523603770010571e-05, "loss": 0.0045, "step": 45450 }, { "epoch": 40.881294964028775, "grad_norm": 0.1745309680700302, "learning_rate": 1.521623607435363e-05, "loss": 0.0028, "step": 45460 }, { "epoch": 40.89028776978417, "grad_norm": 0.09169428050518036, "learning_rate": 1.5196445014990612e-05, "loss": 0.0048, "step": 45470 }, { "epoch": 40.89928057553957, "grad_norm": 0.08338768780231476, "learning_rate": 1.5176664528028672e-05, "loss": 0.0023, "step": 45480 }, { "epoch": 40.90827338129496, "grad_norm": 0.07147584110498428, "learning_rate": 1.5156894619476574e-05, "loss": 0.003, "step": 45490 }, { "epoch": 40.91726618705036, "grad_norm": 0.10298643261194229, "learning_rate": 1.5137135295339938e-05, "loss": 0.0039, "step": 45500 }, { "epoch": 40.92625899280576, "grad_norm": 0.08741804957389832, "learning_rate": 1.5117386561621073e-05, "loss": 0.0031, "step": 45510 }, { "epoch": 40.935251798561154, "grad_norm": 0.1819698065519333, "learning_rate": 1.5097648424319167e-05, "loss": 0.0031, "step": 45520 }, { "epoch": 40.944244604316545, "grad_norm": 0.13452517986297607, "learning_rate": 1.5077920889430119e-05, "loss": 0.0028, "step": 45530 }, { "epoch": 40.95323741007194, "grad_norm": 0.17913872003555298, "learning_rate": 1.5058203962946644e-05, "loss": 0.0045, "step": 45540 }, { "epoch": 40.96223021582734, "grad_norm": 0.06415146589279175, "learning_rate": 1.503849765085822e-05, "loss": 0.0028, "step": 45550 }, { "epoch": 40.97122302158273, "grad_norm": 0.12521278858184814, "learning_rate": 1.501880195915109e-05, "loss": 0.0029, "step": 45560 }, { "epoch": 40.98021582733813, "grad_norm": 0.08995820581912994, "learning_rate": 1.499911689380833e-05, "loss": 0.006, "step": 45570 }, { "epoch": 40.989208633093526, "grad_norm": 0.08290840685367584, "learning_rate": 1.4979442460809683e-05, "loss": 0.0017, "step": 45580 }, { "epoch": 40.99820143884892, "grad_norm": 0.08200828731060028, "learning_rate": 1.4959778666131763e-05, "loss": 0.0034, "step": 45590 }, { "epoch": 41.007194244604314, "grad_norm": 0.24308978021144867, "learning_rate": 1.4940125515747905e-05, "loss": 0.0036, "step": 45600 }, { "epoch": 41.01618705035971, "grad_norm": 0.15283514559268951, "learning_rate": 1.4920483015628211e-05, "loss": 0.0046, "step": 45610 }, { "epoch": 41.02517985611511, "grad_norm": 0.14029483497142792, "learning_rate": 1.490085117173956e-05, "loss": 0.0036, "step": 45620 }, { "epoch": 41.03417266187051, "grad_norm": 0.1057155430316925, "learning_rate": 1.488122999004558e-05, "loss": 0.0025, "step": 45630 }, { "epoch": 41.0431654676259, "grad_norm": 0.20536847412586212, "learning_rate": 1.486161947650666e-05, "loss": 0.0045, "step": 45640 }, { "epoch": 41.052158273381295, "grad_norm": 0.20420734584331512, "learning_rate": 1.4842019637079995e-05, "loss": 0.002, "step": 45650 }, { "epoch": 41.06115107913669, "grad_norm": 0.22214056551456451, "learning_rate": 1.482243047771944e-05, "loss": 0.0032, "step": 45660 }, { "epoch": 41.07014388489208, "grad_norm": 0.14369551837444305, "learning_rate": 1.4802852004375712e-05, "loss": 0.0024, "step": 45670 }, { "epoch": 41.07913669064748, "grad_norm": 0.07410670816898346, "learning_rate": 1.4783284222996218e-05, "loss": 0.0034, "step": 45680 }, { "epoch": 41.08812949640288, "grad_norm": 0.06964057683944702, "learning_rate": 1.4763727139525135e-05, "loss": 0.0024, "step": 45690 }, { "epoch": 41.097122302158276, "grad_norm": 0.062304750084877014, "learning_rate": 1.4744180759903392e-05, "loss": 0.0024, "step": 45700 }, { "epoch": 41.106115107913666, "grad_norm": 0.12158428132534027, "learning_rate": 1.4724645090068635e-05, "loss": 0.003, "step": 45710 }, { "epoch": 41.115107913669064, "grad_norm": 0.12087512016296387, "learning_rate": 1.4705120135955341e-05, "loss": 0.0028, "step": 45720 }, { "epoch": 41.12410071942446, "grad_norm": 0.08640029281377792, "learning_rate": 1.4685605903494614e-05, "loss": 0.0022, "step": 45730 }, { "epoch": 41.13309352517986, "grad_norm": 0.11312099546194077, "learning_rate": 1.46661023986144e-05, "loss": 0.0021, "step": 45740 }, { "epoch": 41.14208633093525, "grad_norm": 0.15280339121818542, "learning_rate": 1.4646609627239344e-05, "loss": 0.0038, "step": 45750 }, { "epoch": 41.15107913669065, "grad_norm": 0.05231907218694687, "learning_rate": 1.4627127595290835e-05, "loss": 0.0026, "step": 45760 }, { "epoch": 41.160071942446045, "grad_norm": 0.19002242386341095, "learning_rate": 1.460765630868699e-05, "loss": 0.003, "step": 45770 }, { "epoch": 41.169064748201436, "grad_norm": 0.0663478747010231, "learning_rate": 1.4588195773342678e-05, "loss": 0.005, "step": 45780 }, { "epoch": 41.17805755395683, "grad_norm": 0.07931658625602722, "learning_rate": 1.4568745995169485e-05, "loss": 0.0026, "step": 45790 }, { "epoch": 41.18705035971223, "grad_norm": 0.15559852123260498, "learning_rate": 1.4549306980075778e-05, "loss": 0.0028, "step": 45800 }, { "epoch": 41.19604316546763, "grad_norm": 0.12467260658740997, "learning_rate": 1.4529878733966557e-05, "loss": 0.004, "step": 45810 }, { "epoch": 41.20503597122302, "grad_norm": 0.11561771482229233, "learning_rate": 1.4510461262743658e-05, "loss": 0.005, "step": 45820 }, { "epoch": 41.21402877697842, "grad_norm": 0.29806146025657654, "learning_rate": 1.4491054572305585e-05, "loss": 0.0045, "step": 45830 }, { "epoch": 41.223021582733814, "grad_norm": 0.14479435980319977, "learning_rate": 1.4471658668547566e-05, "loss": 0.0024, "step": 45840 }, { "epoch": 41.23201438848921, "grad_norm": 0.4978807270526886, "learning_rate": 1.4452273557361579e-05, "loss": 0.0037, "step": 45850 }, { "epoch": 41.2410071942446, "grad_norm": 0.3862783908843994, "learning_rate": 1.4432899244636282e-05, "loss": 0.0021, "step": 45860 }, { "epoch": 41.25, "grad_norm": 0.08680523931980133, "learning_rate": 1.4413535736257134e-05, "loss": 0.0021, "step": 45870 }, { "epoch": 41.2589928057554, "grad_norm": 0.09668594598770142, "learning_rate": 1.439418303810619e-05, "loss": 0.0018, "step": 45880 }, { "epoch": 41.26798561151079, "grad_norm": 0.14572328329086304, "learning_rate": 1.4374841156062352e-05, "loss": 0.0029, "step": 45890 }, { "epoch": 41.276978417266186, "grad_norm": 0.2006797194480896, "learning_rate": 1.4355510096001112e-05, "loss": 0.0028, "step": 45900 }, { "epoch": 41.28597122302158, "grad_norm": 0.07851673662662506, "learning_rate": 1.4336189863794786e-05, "loss": 0.0025, "step": 45910 }, { "epoch": 41.29496402877698, "grad_norm": 0.07126091420650482, "learning_rate": 1.4316880465312327e-05, "loss": 0.0019, "step": 45920 }, { "epoch": 41.30395683453237, "grad_norm": 0.10993547737598419, "learning_rate": 1.4297581906419426e-05, "loss": 0.002, "step": 45930 }, { "epoch": 41.31294964028777, "grad_norm": 0.0652100220322609, "learning_rate": 1.4278294192978475e-05, "loss": 0.0018, "step": 45940 }, { "epoch": 41.32194244604317, "grad_norm": 0.23702558875083923, "learning_rate": 1.4259017330848574e-05, "loss": 0.0055, "step": 45950 }, { "epoch": 41.330935251798564, "grad_norm": 0.0570298507809639, "learning_rate": 1.4239751325885498e-05, "loss": 0.0029, "step": 45960 }, { "epoch": 41.339928057553955, "grad_norm": 0.05941262096166611, "learning_rate": 1.4220496183941795e-05, "loss": 0.0018, "step": 45970 }, { "epoch": 41.34892086330935, "grad_norm": 0.10582073777914047, "learning_rate": 1.4201251910866648e-05, "loss": 0.0024, "step": 45980 }, { "epoch": 41.35791366906475, "grad_norm": 0.16519246995449066, "learning_rate": 1.4182018512505957e-05, "loss": 0.0031, "step": 45990 }, { "epoch": 41.36690647482014, "grad_norm": 0.1982460618019104, "learning_rate": 1.4162795994702327e-05, "loss": 0.0036, "step": 46000 }, { "epoch": 41.37589928057554, "grad_norm": 0.15043361485004425, "learning_rate": 1.4143584363295032e-05, "loss": 0.0033, "step": 46010 }, { "epoch": 41.384892086330936, "grad_norm": 0.058408819139003754, "learning_rate": 1.4124383624120101e-05, "loss": 0.0037, "step": 46020 }, { "epoch": 41.393884892086334, "grad_norm": 0.17347115278244019, "learning_rate": 1.4105193783010151e-05, "loss": 0.0033, "step": 46030 }, { "epoch": 41.402877697841724, "grad_norm": 0.12953747808933258, "learning_rate": 1.4086014845794621e-05, "loss": 0.003, "step": 46040 }, { "epoch": 41.41187050359712, "grad_norm": 0.1604759246110916, "learning_rate": 1.4066846818299489e-05, "loss": 0.0039, "step": 46050 }, { "epoch": 41.42086330935252, "grad_norm": 0.06984039396047592, "learning_rate": 1.4047689706347555e-05, "loss": 0.003, "step": 46060 }, { "epoch": 41.42985611510792, "grad_norm": 0.18829065561294556, "learning_rate": 1.402854351575822e-05, "loss": 0.003, "step": 46070 }, { "epoch": 41.43884892086331, "grad_norm": 0.2088378518819809, "learning_rate": 1.4009408252347588e-05, "loss": 0.0042, "step": 46080 }, { "epoch": 41.447841726618705, "grad_norm": 0.22984431684017181, "learning_rate": 1.399028392192846e-05, "loss": 0.0039, "step": 46090 }, { "epoch": 41.4568345323741, "grad_norm": 0.12898312509059906, "learning_rate": 1.397117053031029e-05, "loss": 0.0079, "step": 46100 }, { "epoch": 41.46582733812949, "grad_norm": 0.13090702891349792, "learning_rate": 1.3952068083299213e-05, "loss": 0.0018, "step": 46110 }, { "epoch": 41.47482014388489, "grad_norm": 0.12238787859678268, "learning_rate": 1.3932976586698082e-05, "loss": 0.0047, "step": 46120 }, { "epoch": 41.48381294964029, "grad_norm": 0.1170160248875618, "learning_rate": 1.3913896046306363e-05, "loss": 0.0021, "step": 46130 }, { "epoch": 41.492805755395686, "grad_norm": 0.18757377564907074, "learning_rate": 1.389482646792023e-05, "loss": 0.0081, "step": 46140 }, { "epoch": 41.50179856115108, "grad_norm": 0.10815740376710892, "learning_rate": 1.387576785733251e-05, "loss": 0.0028, "step": 46150 }, { "epoch": 41.510791366906474, "grad_norm": 0.08548565953969955, "learning_rate": 1.3856720220332703e-05, "loss": 0.0048, "step": 46160 }, { "epoch": 41.51978417266187, "grad_norm": 0.15708136558532715, "learning_rate": 1.383768356270701e-05, "loss": 0.0022, "step": 46170 }, { "epoch": 41.52877697841727, "grad_norm": 0.1807720959186554, "learning_rate": 1.3818657890238207e-05, "loss": 0.0041, "step": 46180 }, { "epoch": 41.53776978417266, "grad_norm": 0.10198388248682022, "learning_rate": 1.3799643208705859e-05, "loss": 0.0024, "step": 46190 }, { "epoch": 41.54676258992806, "grad_norm": 0.06174037978053093, "learning_rate": 1.3780639523886058e-05, "loss": 0.006, "step": 46200 }, { "epoch": 41.555755395683455, "grad_norm": 0.06171897426247597, "learning_rate": 1.3761646841551668e-05, "loss": 0.0033, "step": 46210 }, { "epoch": 41.564748201438846, "grad_norm": 0.12071529775857925, "learning_rate": 1.3742665167472146e-05, "loss": 0.0029, "step": 46220 }, { "epoch": 41.57374100719424, "grad_norm": 0.2032708078622818, "learning_rate": 1.372369450741363e-05, "loss": 0.0037, "step": 46230 }, { "epoch": 41.58273381294964, "grad_norm": 0.07924934476613998, "learning_rate": 1.3704734867138901e-05, "loss": 0.0056, "step": 46240 }, { "epoch": 41.59172661870504, "grad_norm": 0.08660025894641876, "learning_rate": 1.36857862524074e-05, "loss": 0.002, "step": 46250 }, { "epoch": 41.60071942446043, "grad_norm": 0.235732764005661, "learning_rate": 1.3666848668975213e-05, "loss": 0.0029, "step": 46260 }, { "epoch": 41.60971223021583, "grad_norm": 0.06283828616142273, "learning_rate": 1.3647922122595063e-05, "loss": 0.0017, "step": 46270 }, { "epoch": 41.618705035971225, "grad_norm": 0.21029914915561676, "learning_rate": 1.3629006619016366e-05, "loss": 0.0053, "step": 46280 }, { "epoch": 41.62769784172662, "grad_norm": 0.08006884902715683, "learning_rate": 1.3610102163985139e-05, "loss": 0.006, "step": 46290 }, { "epoch": 41.63669064748201, "grad_norm": 0.0881815254688263, "learning_rate": 1.3591208763244057e-05, "loss": 0.0034, "step": 46300 }, { "epoch": 41.64568345323741, "grad_norm": 0.05647410824894905, "learning_rate": 1.3572326422532428e-05, "loss": 0.0056, "step": 46310 }, { "epoch": 41.65467625899281, "grad_norm": 0.1533491611480713, "learning_rate": 1.355345514758622e-05, "loss": 0.0023, "step": 46320 }, { "epoch": 41.6636690647482, "grad_norm": 0.10971742123365402, "learning_rate": 1.3534594944138007e-05, "loss": 0.003, "step": 46330 }, { "epoch": 41.672661870503596, "grad_norm": 0.14151997864246368, "learning_rate": 1.3515745817917069e-05, "loss": 0.0029, "step": 46340 }, { "epoch": 41.681654676258994, "grad_norm": 0.058012817054986954, "learning_rate": 1.3496907774649208e-05, "loss": 0.0032, "step": 46350 }, { "epoch": 41.69064748201439, "grad_norm": 0.055078256875276566, "learning_rate": 1.3478080820056987e-05, "loss": 0.0071, "step": 46360 }, { "epoch": 41.69964028776978, "grad_norm": 0.062012847512960434, "learning_rate": 1.3459264959859474e-05, "loss": 0.0036, "step": 46370 }, { "epoch": 41.70863309352518, "grad_norm": 0.0789542943239212, "learning_rate": 1.3440460199772487e-05, "loss": 0.0013, "step": 46380 }, { "epoch": 41.71762589928058, "grad_norm": 0.2901700437068939, "learning_rate": 1.3421666545508382e-05, "loss": 0.0044, "step": 46390 }, { "epoch": 41.726618705035975, "grad_norm": 0.06709913909435272, "learning_rate": 1.3402884002776194e-05, "loss": 0.0026, "step": 46400 }, { "epoch": 41.735611510791365, "grad_norm": 0.2649691104888916, "learning_rate": 1.3384112577281555e-05, "loss": 0.0043, "step": 46410 }, { "epoch": 41.74460431654676, "grad_norm": 0.09573279321193695, "learning_rate": 1.3365352274726711e-05, "loss": 0.0054, "step": 46420 }, { "epoch": 41.75359712230216, "grad_norm": 0.17298011481761932, "learning_rate": 1.3346603100810578e-05, "loss": 0.0039, "step": 46430 }, { "epoch": 41.76258992805755, "grad_norm": 0.0504801943898201, "learning_rate": 1.3327865061228645e-05, "loss": 0.0032, "step": 46440 }, { "epoch": 41.77158273381295, "grad_norm": 0.1138690933585167, "learning_rate": 1.330913816167304e-05, "loss": 0.0071, "step": 46450 }, { "epoch": 41.780575539568346, "grad_norm": 0.10261986404657364, "learning_rate": 1.3290422407832492e-05, "loss": 0.0049, "step": 46460 }, { "epoch": 41.789568345323744, "grad_norm": 0.07787872850894928, "learning_rate": 1.3271717805392354e-05, "loss": 0.005, "step": 46470 }, { "epoch": 41.798561151079134, "grad_norm": 0.1262138932943344, "learning_rate": 1.3253024360034582e-05, "loss": 0.0032, "step": 46480 }, { "epoch": 41.80755395683453, "grad_norm": 0.060599882155656815, "learning_rate": 1.323434207743779e-05, "loss": 0.0049, "step": 46490 }, { "epoch": 41.81654676258993, "grad_norm": 0.09056951105594635, "learning_rate": 1.3215670963277105e-05, "loss": 0.006, "step": 46500 }, { "epoch": 41.82553956834532, "grad_norm": 0.11879485845565796, "learning_rate": 1.3197011023224376e-05, "loss": 0.0017, "step": 46510 }, { "epoch": 41.83453237410072, "grad_norm": 0.29211685061454773, "learning_rate": 1.3178362262947941e-05, "loss": 0.0064, "step": 46520 }, { "epoch": 41.843525179856115, "grad_norm": 0.15991763770580292, "learning_rate": 1.3159724688112845e-05, "loss": 0.0024, "step": 46530 }, { "epoch": 41.85251798561151, "grad_norm": 0.14485761523246765, "learning_rate": 1.3141098304380683e-05, "loss": 0.0028, "step": 46540 }, { "epoch": 41.861510791366904, "grad_norm": 0.07116592675447464, "learning_rate": 1.3122483117409651e-05, "loss": 0.0034, "step": 46550 }, { "epoch": 41.8705035971223, "grad_norm": 0.15942645072937012, "learning_rate": 1.3103879132854552e-05, "loss": 0.0043, "step": 46560 }, { "epoch": 41.8794964028777, "grad_norm": 0.10745355486869812, "learning_rate": 1.3085286356366771e-05, "loss": 0.0043, "step": 46570 }, { "epoch": 41.888489208633096, "grad_norm": 0.20928646624088287, "learning_rate": 1.3066704793594337e-05, "loss": 0.0033, "step": 46580 }, { "epoch": 41.89748201438849, "grad_norm": 0.177933007478714, "learning_rate": 1.3048134450181816e-05, "loss": 0.0023, "step": 46590 }, { "epoch": 41.906474820143885, "grad_norm": 0.07674123346805573, "learning_rate": 1.3029575331770394e-05, "loss": 0.0026, "step": 46600 }, { "epoch": 41.91546762589928, "grad_norm": 0.1117841899394989, "learning_rate": 1.3011027443997837e-05, "loss": 0.0021, "step": 46610 }, { "epoch": 41.92446043165468, "grad_norm": 0.1406116932630539, "learning_rate": 1.2992490792498507e-05, "loss": 0.0026, "step": 46620 }, { "epoch": 41.93345323741007, "grad_norm": 0.1233619749546051, "learning_rate": 1.297396538290333e-05, "loss": 0.0035, "step": 46630 }, { "epoch": 41.94244604316547, "grad_norm": 0.09338369220495224, "learning_rate": 1.2955451220839888e-05, "loss": 0.0018, "step": 46640 }, { "epoch": 41.951438848920866, "grad_norm": 0.27210181951522827, "learning_rate": 1.2936948311932223e-05, "loss": 0.0037, "step": 46650 }, { "epoch": 41.960431654676256, "grad_norm": 0.0603594109416008, "learning_rate": 1.2918456661801104e-05, "loss": 0.0016, "step": 46660 }, { "epoch": 41.969424460431654, "grad_norm": 0.05351340025663376, "learning_rate": 1.2899976276063736e-05, "loss": 0.0024, "step": 46670 }, { "epoch": 41.97841726618705, "grad_norm": 0.11504857242107391, "learning_rate": 1.2881507160334022e-05, "loss": 0.004, "step": 46680 }, { "epoch": 41.98741007194245, "grad_norm": 0.08621446788311005, "learning_rate": 1.286304932022238e-05, "loss": 0.002, "step": 46690 }, { "epoch": 41.99640287769784, "grad_norm": 0.1567745804786682, "learning_rate": 1.2844602761335806e-05, "loss": 0.0039, "step": 46700 }, { "epoch": 42.00539568345324, "grad_norm": 0.09358047693967819, "learning_rate": 1.2826167489277885e-05, "loss": 0.0016, "step": 46710 }, { "epoch": 42.014388489208635, "grad_norm": 0.10319514572620392, "learning_rate": 1.2807743509648745e-05, "loss": 0.0014, "step": 46720 }, { "epoch": 42.023381294964025, "grad_norm": 0.03984104469418526, "learning_rate": 1.2789330828045149e-05, "loss": 0.0037, "step": 46730 }, { "epoch": 42.03237410071942, "grad_norm": 0.10316985845565796, "learning_rate": 1.2770929450060332e-05, "loss": 0.0025, "step": 46740 }, { "epoch": 42.04136690647482, "grad_norm": 0.1441543698310852, "learning_rate": 1.2752539381284184e-05, "loss": 0.0043, "step": 46750 }, { "epoch": 42.05035971223022, "grad_norm": 0.2589365839958191, "learning_rate": 1.273416062730311e-05, "loss": 0.0033, "step": 46760 }, { "epoch": 42.05935251798561, "grad_norm": 0.06733766943216324, "learning_rate": 1.2715793193700088e-05, "loss": 0.0034, "step": 46770 }, { "epoch": 42.068345323741006, "grad_norm": 0.07071132957935333, "learning_rate": 1.2697437086054664e-05, "loss": 0.0044, "step": 46780 }, { "epoch": 42.077338129496404, "grad_norm": 0.1771240085363388, "learning_rate": 1.2679092309942937e-05, "loss": 0.0044, "step": 46790 }, { "epoch": 42.0863309352518, "grad_norm": 0.09801998734474182, "learning_rate": 1.266075887093755e-05, "loss": 0.0024, "step": 46800 }, { "epoch": 42.09532374100719, "grad_norm": 0.12577831745147705, "learning_rate": 1.2642436774607757e-05, "loss": 0.0037, "step": 46810 }, { "epoch": 42.10431654676259, "grad_norm": 0.09021108597517014, "learning_rate": 1.2624126026519278e-05, "loss": 0.0022, "step": 46820 }, { "epoch": 42.11330935251799, "grad_norm": 0.12852391600608826, "learning_rate": 1.2605826632234474e-05, "loss": 0.0035, "step": 46830 }, { "epoch": 42.12230215827338, "grad_norm": 0.06046687066555023, "learning_rate": 1.2587538597312198e-05, "loss": 0.0021, "step": 46840 }, { "epoch": 42.131294964028775, "grad_norm": 0.07823927700519562, "learning_rate": 1.2569261927307884e-05, "loss": 0.004, "step": 46850 }, { "epoch": 42.14028776978417, "grad_norm": 0.07128729671239853, "learning_rate": 1.2550996627773493e-05, "loss": 0.0016, "step": 46860 }, { "epoch": 42.14928057553957, "grad_norm": 0.29219987988471985, "learning_rate": 1.2532742704257527e-05, "loss": 0.0036, "step": 46870 }, { "epoch": 42.15827338129496, "grad_norm": 0.10551954805850983, "learning_rate": 1.2514500162305087e-05, "loss": 0.0025, "step": 46880 }, { "epoch": 42.16726618705036, "grad_norm": 0.09102705866098404, "learning_rate": 1.2496269007457728e-05, "loss": 0.003, "step": 46890 }, { "epoch": 42.17625899280576, "grad_norm": 0.0561395026743412, "learning_rate": 1.2478049245253625e-05, "loss": 0.0021, "step": 46900 }, { "epoch": 42.185251798561154, "grad_norm": 0.15271160006523132, "learning_rate": 1.2459840881227459e-05, "loss": 0.0046, "step": 46910 }, { "epoch": 42.194244604316545, "grad_norm": 0.07270793616771698, "learning_rate": 1.2441643920910435e-05, "loss": 0.0045, "step": 46920 }, { "epoch": 42.20323741007194, "grad_norm": 0.0710054486989975, "learning_rate": 1.2423458369830322e-05, "loss": 0.0018, "step": 46930 }, { "epoch": 42.21223021582734, "grad_norm": 0.21278460323810577, "learning_rate": 1.2405284233511406e-05, "loss": 0.0052, "step": 46940 }, { "epoch": 42.22122302158273, "grad_norm": 0.10736870765686035, "learning_rate": 1.2387121517474487e-05, "loss": 0.0036, "step": 46950 }, { "epoch": 42.23021582733813, "grad_norm": 0.1182546615600586, "learning_rate": 1.2368970227236975e-05, "loss": 0.0036, "step": 46960 }, { "epoch": 42.239208633093526, "grad_norm": 0.12725907564163208, "learning_rate": 1.2350830368312688e-05, "loss": 0.0028, "step": 46970 }, { "epoch": 42.24820143884892, "grad_norm": 0.281747430562973, "learning_rate": 1.2332701946212083e-05, "loss": 0.0032, "step": 46980 }, { "epoch": 42.257194244604314, "grad_norm": 0.28228071331977844, "learning_rate": 1.2314584966442077e-05, "loss": 0.0033, "step": 46990 }, { "epoch": 42.26618705035971, "grad_norm": 0.0757007822394371, "learning_rate": 1.2296479434506136e-05, "loss": 0.0031, "step": 47000 }, { "epoch": 42.27517985611511, "grad_norm": 0.09638267755508423, "learning_rate": 1.2278385355904232e-05, "loss": 0.003, "step": 47010 }, { "epoch": 42.28417266187051, "grad_norm": 0.10505542159080505, "learning_rate": 1.2260302736132867e-05, "loss": 0.002, "step": 47020 }, { "epoch": 42.2931654676259, "grad_norm": 0.17674587666988373, "learning_rate": 1.2242231580685098e-05, "loss": 0.0042, "step": 47030 }, { "epoch": 42.302158273381295, "grad_norm": 0.07221243530511856, "learning_rate": 1.2224171895050413e-05, "loss": 0.0027, "step": 47040 }, { "epoch": 42.31115107913669, "grad_norm": 0.17345212399959564, "learning_rate": 1.2206123684714903e-05, "loss": 0.003, "step": 47050 }, { "epoch": 42.32014388489208, "grad_norm": 0.15086336433887482, "learning_rate": 1.2188086955161132e-05, "loss": 0.002, "step": 47060 }, { "epoch": 42.32913669064748, "grad_norm": 0.08234075456857681, "learning_rate": 1.2170061711868175e-05, "loss": 0.0032, "step": 47070 }, { "epoch": 42.33812949640288, "grad_norm": 0.060315266251564026, "learning_rate": 1.215204796031163e-05, "loss": 0.0024, "step": 47080 }, { "epoch": 42.347122302158276, "grad_norm": 0.1374746710062027, "learning_rate": 1.2134045705963599e-05, "loss": 0.0023, "step": 47090 }, { "epoch": 42.356115107913666, "grad_norm": 0.16188828647136688, "learning_rate": 1.2116054954292689e-05, "loss": 0.0016, "step": 47100 }, { "epoch": 42.365107913669064, "grad_norm": 0.11175037175416946, "learning_rate": 1.2098075710764011e-05, "loss": 0.003, "step": 47110 }, { "epoch": 42.37410071942446, "grad_norm": 0.13210754096508026, "learning_rate": 1.2080107980839183e-05, "loss": 0.0026, "step": 47120 }, { "epoch": 42.38309352517986, "grad_norm": 0.09462245553731918, "learning_rate": 1.2062151769976343e-05, "loss": 0.0035, "step": 47130 }, { "epoch": 42.39208633093525, "grad_norm": 0.1853601187467575, "learning_rate": 1.204420708363011e-05, "loss": 0.0021, "step": 47140 }, { "epoch": 42.40107913669065, "grad_norm": 0.04561648145318031, "learning_rate": 1.2026273927251597e-05, "loss": 0.0046, "step": 47150 }, { "epoch": 42.410071942446045, "grad_norm": 0.07597226649522781, "learning_rate": 1.2008352306288424e-05, "loss": 0.0024, "step": 47160 }, { "epoch": 42.419064748201436, "grad_norm": 0.1541895568370819, "learning_rate": 1.1990442226184695e-05, "loss": 0.0022, "step": 47170 }, { "epoch": 42.42805755395683, "grad_norm": 0.30795422196388245, "learning_rate": 1.1972543692381066e-05, "loss": 0.0043, "step": 47180 }, { "epoch": 42.43705035971223, "grad_norm": 0.1493304967880249, "learning_rate": 1.1954656710314576e-05, "loss": 0.0032, "step": 47190 }, { "epoch": 42.44604316546763, "grad_norm": 0.19859877228736877, "learning_rate": 1.1936781285418875e-05, "loss": 0.0029, "step": 47200 }, { "epoch": 42.45503597122302, "grad_norm": 0.08284173905849457, "learning_rate": 1.1918917423123993e-05, "loss": 0.0021, "step": 47210 }, { "epoch": 42.46402877697842, "grad_norm": 0.2861669659614563, "learning_rate": 1.1901065128856537e-05, "loss": 0.0044, "step": 47220 }, { "epoch": 42.473021582733814, "grad_norm": 0.0527038536965847, "learning_rate": 1.1883224408039551e-05, "loss": 0.0024, "step": 47230 }, { "epoch": 42.48201438848921, "grad_norm": 0.22705911099910736, "learning_rate": 1.1865395266092578e-05, "loss": 0.0039, "step": 47240 }, { "epoch": 42.4910071942446, "grad_norm": 0.08760020136833191, "learning_rate": 1.1847577708431633e-05, "loss": 0.0029, "step": 47250 }, { "epoch": 42.5, "grad_norm": 0.08050625026226044, "learning_rate": 1.1829771740469225e-05, "loss": 0.0018, "step": 47260 }, { "epoch": 42.5089928057554, "grad_norm": 0.11495508998632431, "learning_rate": 1.1811977367614324e-05, "loss": 0.0027, "step": 47270 }, { "epoch": 42.51798561151079, "grad_norm": 0.10637801885604858, "learning_rate": 1.1794194595272412e-05, "loss": 0.0085, "step": 47280 }, { "epoch": 42.526978417266186, "grad_norm": 0.17508883774280548, "learning_rate": 1.1776423428845423e-05, "loss": 0.0021, "step": 47290 }, { "epoch": 42.53597122302158, "grad_norm": 0.06889998912811279, "learning_rate": 1.1758663873731756e-05, "loss": 0.0043, "step": 47300 }, { "epoch": 42.54496402877698, "grad_norm": 0.20411255955696106, "learning_rate": 1.1740915935326302e-05, "loss": 0.0073, "step": 47310 }, { "epoch": 42.55395683453237, "grad_norm": 0.059929560869932175, "learning_rate": 1.1723179619020396e-05, "loss": 0.0024, "step": 47320 }, { "epoch": 42.56294964028777, "grad_norm": 0.10718158632516861, "learning_rate": 1.1705454930201914e-05, "loss": 0.0025, "step": 47330 }, { "epoch": 42.57194244604317, "grad_norm": 0.1609843373298645, "learning_rate": 1.1687741874255087e-05, "loss": 0.0048, "step": 47340 }, { "epoch": 42.580935251798564, "grad_norm": 0.1005096584558487, "learning_rate": 1.1670040456560728e-05, "loss": 0.0023, "step": 47350 }, { "epoch": 42.589928057553955, "grad_norm": 0.1939815729856491, "learning_rate": 1.1652350682496005e-05, "loss": 0.0035, "step": 47360 }, { "epoch": 42.59892086330935, "grad_norm": 0.09366726875305176, "learning_rate": 1.163467255743465e-05, "loss": 0.0023, "step": 47370 }, { "epoch": 42.60791366906475, "grad_norm": 0.20546899735927582, "learning_rate": 1.1617006086746796e-05, "loss": 0.0032, "step": 47380 }, { "epoch": 42.61690647482014, "grad_norm": 0.15716193616390228, "learning_rate": 1.1599351275799047e-05, "loss": 0.0032, "step": 47390 }, { "epoch": 42.62589928057554, "grad_norm": 0.05331425368785858, "learning_rate": 1.1581708129954466e-05, "loss": 0.0021, "step": 47400 }, { "epoch": 42.634892086330936, "grad_norm": 0.06760081648826599, "learning_rate": 1.1564076654572587e-05, "loss": 0.0018, "step": 47410 }, { "epoch": 42.643884892086334, "grad_norm": 0.05360262095928192, "learning_rate": 1.1546456855009358e-05, "loss": 0.0017, "step": 47420 }, { "epoch": 42.652877697841724, "grad_norm": 0.0918530523777008, "learning_rate": 1.1528848736617248e-05, "loss": 0.0028, "step": 47430 }, { "epoch": 42.66187050359712, "grad_norm": 0.09734711050987244, "learning_rate": 1.1511252304745112e-05, "loss": 0.0035, "step": 47440 }, { "epoch": 42.67086330935252, "grad_norm": 0.0986456498503685, "learning_rate": 1.1493667564738297e-05, "loss": 0.0119, "step": 47450 }, { "epoch": 42.67985611510792, "grad_norm": 0.04901038110256195, "learning_rate": 1.1476094521938574e-05, "loss": 0.0026, "step": 47460 }, { "epoch": 42.68884892086331, "grad_norm": 0.07350924611091614, "learning_rate": 1.1458533181684167e-05, "loss": 0.0028, "step": 47470 }, { "epoch": 42.697841726618705, "grad_norm": 0.04950079694390297, "learning_rate": 1.1440983549309753e-05, "loss": 0.0019, "step": 47480 }, { "epoch": 42.7068345323741, "grad_norm": 0.06535216420888901, "learning_rate": 1.1423445630146434e-05, "loss": 0.0016, "step": 47490 }, { "epoch": 42.71582733812949, "grad_norm": 0.0754798948764801, "learning_rate": 1.1405919429521799e-05, "loss": 0.0053, "step": 47500 }, { "epoch": 42.72482014388489, "grad_norm": 0.11948303878307343, "learning_rate": 1.1388404952759802e-05, "loss": 0.0032, "step": 47510 }, { "epoch": 42.73381294964029, "grad_norm": 0.0959569439291954, "learning_rate": 1.1370902205180923e-05, "loss": 0.0044, "step": 47520 }, { "epoch": 42.742805755395686, "grad_norm": 0.2021683007478714, "learning_rate": 1.1353411192101987e-05, "loss": 0.0053, "step": 47530 }, { "epoch": 42.75179856115108, "grad_norm": 0.07977064698934555, "learning_rate": 1.133593191883634e-05, "loss": 0.0015, "step": 47540 }, { "epoch": 42.760791366906474, "grad_norm": 0.07206100225448608, "learning_rate": 1.1318464390693711e-05, "loss": 0.0023, "step": 47550 }, { "epoch": 42.76978417266187, "grad_norm": 0.1340906023979187, "learning_rate": 1.1301008612980257e-05, "loss": 0.0031, "step": 47560 }, { "epoch": 42.77877697841727, "grad_norm": 0.340146005153656, "learning_rate": 1.128356459099863e-05, "loss": 0.0038, "step": 47570 }, { "epoch": 42.78776978417266, "grad_norm": 0.5137791037559509, "learning_rate": 1.1266132330047802e-05, "loss": 0.0035, "step": 47580 }, { "epoch": 42.79676258992806, "grad_norm": 0.06602612882852554, "learning_rate": 1.1248711835423281e-05, "loss": 0.0035, "step": 47590 }, { "epoch": 42.805755395683455, "grad_norm": 0.0623028427362442, "learning_rate": 1.123130311241693e-05, "loss": 0.0026, "step": 47600 }, { "epoch": 42.814748201438846, "grad_norm": 0.09618902206420898, "learning_rate": 1.1213906166317068e-05, "loss": 0.0028, "step": 47610 }, { "epoch": 42.82374100719424, "grad_norm": 0.38852813839912415, "learning_rate": 1.1196521002408427e-05, "loss": 0.0056, "step": 47620 }, { "epoch": 42.83273381294964, "grad_norm": 0.1321687549352646, "learning_rate": 1.1179147625972159e-05, "loss": 0.0027, "step": 47630 }, { "epoch": 42.84172661870504, "grad_norm": 0.10422993451356888, "learning_rate": 1.1161786042285822e-05, "loss": 0.002, "step": 47640 }, { "epoch": 42.85071942446043, "grad_norm": 0.21158863604068756, "learning_rate": 1.1144436256623447e-05, "loss": 0.0044, "step": 47650 }, { "epoch": 42.85971223021583, "grad_norm": 0.08865681290626526, "learning_rate": 1.1127098274255392e-05, "loss": 0.0044, "step": 47660 }, { "epoch": 42.868705035971225, "grad_norm": 0.0657421424984932, "learning_rate": 1.1109772100448512e-05, "loss": 0.0027, "step": 47670 }, { "epoch": 42.87769784172662, "grad_norm": 0.19172854721546173, "learning_rate": 1.1092457740466033e-05, "loss": 0.0037, "step": 47680 }, { "epoch": 42.88669064748201, "grad_norm": 0.18068479001522064, "learning_rate": 1.10751551995676e-05, "loss": 0.006, "step": 47690 }, { "epoch": 42.89568345323741, "grad_norm": 0.14375950396060944, "learning_rate": 1.1057864483009262e-05, "loss": 0.0031, "step": 47700 }, { "epoch": 42.90467625899281, "grad_norm": 0.12991450726985931, "learning_rate": 1.1040585596043473e-05, "loss": 0.0038, "step": 47710 }, { "epoch": 42.9136690647482, "grad_norm": 0.11016015708446503, "learning_rate": 1.1023318543919148e-05, "loss": 0.0038, "step": 47720 }, { "epoch": 42.922661870503596, "grad_norm": 0.0750277191400528, "learning_rate": 1.10060633318815e-05, "loss": 0.0031, "step": 47730 }, { "epoch": 42.931654676258994, "grad_norm": 0.11287371814250946, "learning_rate": 1.0988819965172248e-05, "loss": 0.0043, "step": 47740 }, { "epoch": 42.94064748201439, "grad_norm": 0.07094059884548187, "learning_rate": 1.0971588449029462e-05, "loss": 0.0046, "step": 47750 }, { "epoch": 42.94964028776978, "grad_norm": 0.1504160612821579, "learning_rate": 1.095436878868762e-05, "loss": 0.0052, "step": 47760 }, { "epoch": 42.95863309352518, "grad_norm": 0.14934386312961578, "learning_rate": 1.0937160989377598e-05, "loss": 0.006, "step": 47770 }, { "epoch": 42.96762589928058, "grad_norm": 0.12639540433883667, "learning_rate": 1.0919965056326676e-05, "loss": 0.0023, "step": 47780 }, { "epoch": 42.976618705035975, "grad_norm": 0.11107343435287476, "learning_rate": 1.0902780994758504e-05, "loss": 0.0021, "step": 47790 }, { "epoch": 42.985611510791365, "grad_norm": 0.1652643382549286, "learning_rate": 1.0885608809893193e-05, "loss": 0.0019, "step": 47800 }, { "epoch": 42.99460431654676, "grad_norm": 0.2824454605579376, "learning_rate": 1.0868448506947142e-05, "loss": 0.003, "step": 47810 }, { "epoch": 43.00359712230216, "grad_norm": 0.4403352737426758, "learning_rate": 1.0851300091133243e-05, "loss": 0.0036, "step": 47820 }, { "epoch": 43.01258992805755, "grad_norm": 0.16918572783470154, "learning_rate": 1.083416356766071e-05, "loss": 0.0029, "step": 47830 }, { "epoch": 43.02158273381295, "grad_norm": 0.1354576200246811, "learning_rate": 1.0817038941735175e-05, "loss": 0.0061, "step": 47840 }, { "epoch": 43.030575539568346, "grad_norm": 0.09442120045423508, "learning_rate": 1.0799926218558642e-05, "loss": 0.0034, "step": 47850 }, { "epoch": 43.039568345323744, "grad_norm": 0.3288913667201996, "learning_rate": 1.0782825403329488e-05, "loss": 0.004, "step": 47860 }, { "epoch": 43.048561151079134, "grad_norm": 0.0792875662446022, "learning_rate": 1.076573650124254e-05, "loss": 0.0026, "step": 47870 }, { "epoch": 43.05755395683453, "grad_norm": 0.06489250063896179, "learning_rate": 1.0748659517488891e-05, "loss": 0.003, "step": 47880 }, { "epoch": 43.06654676258993, "grad_norm": 0.2302979677915573, "learning_rate": 1.0731594457256138e-05, "loss": 0.0029, "step": 47890 }, { "epoch": 43.07553956834533, "grad_norm": 0.04075807332992554, "learning_rate": 1.0714541325728139e-05, "loss": 0.0032, "step": 47900 }, { "epoch": 43.08453237410072, "grad_norm": 0.1854514628648758, "learning_rate": 1.0697500128085231e-05, "loss": 0.003, "step": 47910 }, { "epoch": 43.093525179856115, "grad_norm": 0.08049390465021133, "learning_rate": 1.0680470869504055e-05, "loss": 0.0035, "step": 47920 }, { "epoch": 43.10251798561151, "grad_norm": 0.21936586499214172, "learning_rate": 1.066345355515766e-05, "loss": 0.0027, "step": 47930 }, { "epoch": 43.111510791366904, "grad_norm": 0.10398779809474945, "learning_rate": 1.0646448190215453e-05, "loss": 0.0027, "step": 47940 }, { "epoch": 43.1205035971223, "grad_norm": 0.04942740872502327, "learning_rate": 1.0629454779843217e-05, "loss": 0.0028, "step": 47950 }, { "epoch": 43.1294964028777, "grad_norm": 0.07303144037723541, "learning_rate": 1.0612473329203082e-05, "loss": 0.0026, "step": 47960 }, { "epoch": 43.138489208633096, "grad_norm": 0.07453762739896774, "learning_rate": 1.0595503843453596e-05, "loss": 0.0025, "step": 47970 }, { "epoch": 43.14748201438849, "grad_norm": 0.11579278856515884, "learning_rate": 1.0578546327749634e-05, "loss": 0.0044, "step": 47980 }, { "epoch": 43.156474820143885, "grad_norm": 0.05858315899968147, "learning_rate": 1.0561600787242425e-05, "loss": 0.0033, "step": 47990 }, { "epoch": 43.16546762589928, "grad_norm": 0.05897928401827812, "learning_rate": 1.0544667227079591e-05, "loss": 0.0049, "step": 48000 }, { "epoch": 43.17446043165467, "grad_norm": 0.06318452209234238, "learning_rate": 1.0527745652405085e-05, "loss": 0.0044, "step": 48010 }, { "epoch": 43.18345323741007, "grad_norm": 0.059617288410663605, "learning_rate": 1.051083606835927e-05, "loss": 0.0031, "step": 48020 }, { "epoch": 43.19244604316547, "grad_norm": 0.10380716621875763, "learning_rate": 1.049393848007878e-05, "loss": 0.0027, "step": 48030 }, { "epoch": 43.201438848920866, "grad_norm": 0.3035513758659363, "learning_rate": 1.0477052892696709e-05, "loss": 0.0061, "step": 48040 }, { "epoch": 43.210431654676256, "grad_norm": 0.05794208124279976, "learning_rate": 1.0460179311342394e-05, "loss": 0.0069, "step": 48050 }, { "epoch": 43.219424460431654, "grad_norm": 0.06615821272134781, "learning_rate": 1.0443317741141634e-05, "loss": 0.0015, "step": 48060 }, { "epoch": 43.22841726618705, "grad_norm": 0.07050798088312149, "learning_rate": 1.0426468187216514e-05, "loss": 0.0043, "step": 48070 }, { "epoch": 43.23741007194245, "grad_norm": 0.05770739167928696, "learning_rate": 1.0409630654685477e-05, "loss": 0.0016, "step": 48080 }, { "epoch": 43.24640287769784, "grad_norm": 0.06121131405234337, "learning_rate": 1.039280514866332e-05, "loss": 0.003, "step": 48090 }, { "epoch": 43.25539568345324, "grad_norm": 0.059573911130428314, "learning_rate": 1.0375991674261198e-05, "loss": 0.0028, "step": 48100 }, { "epoch": 43.264388489208635, "grad_norm": 0.08547579497098923, "learning_rate": 1.0359190236586575e-05, "loss": 0.0038, "step": 48110 }, { "epoch": 43.273381294964025, "grad_norm": 0.06717090308666229, "learning_rate": 1.0342400840743322e-05, "loss": 0.0019, "step": 48120 }, { "epoch": 43.28237410071942, "grad_norm": 0.13685524463653564, "learning_rate": 1.0325623491831593e-05, "loss": 0.002, "step": 48130 }, { "epoch": 43.29136690647482, "grad_norm": 0.1356872171163559, "learning_rate": 1.0308858194947906e-05, "loss": 0.0047, "step": 48140 }, { "epoch": 43.30035971223022, "grad_norm": 0.0709308385848999, "learning_rate": 1.0292104955185111e-05, "loss": 0.0029, "step": 48150 }, { "epoch": 43.30935251798561, "grad_norm": 0.05041252821683884, "learning_rate": 1.0275363777632396e-05, "loss": 0.0017, "step": 48160 }, { "epoch": 43.318345323741006, "grad_norm": 0.07971017062664032, "learning_rate": 1.0258634667375321e-05, "loss": 0.0061, "step": 48170 }, { "epoch": 43.327338129496404, "grad_norm": 0.17764857411384583, "learning_rate": 1.02419176294957e-05, "loss": 0.0023, "step": 48180 }, { "epoch": 43.3363309352518, "grad_norm": 0.06525114923715591, "learning_rate": 1.0225212669071782e-05, "loss": 0.0019, "step": 48190 }, { "epoch": 43.34532374100719, "grad_norm": 0.07980791479349136, "learning_rate": 1.0208519791178029e-05, "loss": 0.0017, "step": 48200 }, { "epoch": 43.35431654676259, "grad_norm": 0.2005046010017395, "learning_rate": 1.019183900088535e-05, "loss": 0.003, "step": 48210 }, { "epoch": 43.36330935251799, "grad_norm": 0.050326261669397354, "learning_rate": 1.0175170303260906e-05, "loss": 0.0022, "step": 48220 }, { "epoch": 43.37230215827338, "grad_norm": 0.2506351172924042, "learning_rate": 1.0158513703368206e-05, "loss": 0.0041, "step": 48230 }, { "epoch": 43.381294964028775, "grad_norm": 0.08076126873493195, "learning_rate": 1.0141869206267095e-05, "loss": 0.0016, "step": 48240 }, { "epoch": 43.39028776978417, "grad_norm": 0.05407333746552467, "learning_rate": 1.0125236817013723e-05, "loss": 0.0026, "step": 48250 }, { "epoch": 43.39928057553957, "grad_norm": 0.05620870739221573, "learning_rate": 1.010861654066056e-05, "loss": 0.002, "step": 48260 }, { "epoch": 43.40827338129496, "grad_norm": 0.12722696363925934, "learning_rate": 1.0092008382256434e-05, "loss": 0.0022, "step": 48270 }, { "epoch": 43.41726618705036, "grad_norm": 0.04799278452992439, "learning_rate": 1.0075412346846458e-05, "loss": 0.0019, "step": 48280 }, { "epoch": 43.42625899280576, "grad_norm": 0.05691429227590561, "learning_rate": 1.0058828439472056e-05, "loss": 0.0021, "step": 48290 }, { "epoch": 43.435251798561154, "grad_norm": 0.19535714387893677, "learning_rate": 1.0042256665170996e-05, "loss": 0.0029, "step": 48300 }, { "epoch": 43.444244604316545, "grad_norm": 0.16584624350070953, "learning_rate": 1.0025697028977332e-05, "loss": 0.0024, "step": 48310 }, { "epoch": 43.45323741007194, "grad_norm": 0.14166228473186493, "learning_rate": 1.0009149535921454e-05, "loss": 0.0024, "step": 48320 }, { "epoch": 43.46223021582734, "grad_norm": 0.09442152827978134, "learning_rate": 9.992614191030031e-06, "loss": 0.0021, "step": 48330 }, { "epoch": 43.47122302158273, "grad_norm": 0.09437274187803268, "learning_rate": 9.976090999326115e-06, "loss": 0.0029, "step": 48340 }, { "epoch": 43.48021582733813, "grad_norm": 0.06475023180246353, "learning_rate": 9.959579965828952e-06, "loss": 0.0017, "step": 48350 }, { "epoch": 43.489208633093526, "grad_norm": 0.1284782886505127, "learning_rate": 9.943081095554218e-06, "loss": 0.0021, "step": 48360 }, { "epoch": 43.49820143884892, "grad_norm": 0.111882284283638, "learning_rate": 9.926594393513783e-06, "loss": 0.0033, "step": 48370 }, { "epoch": 43.507194244604314, "grad_norm": 0.12987715005874634, "learning_rate": 9.910119864715906e-06, "loss": 0.0037, "step": 48380 }, { "epoch": 43.51618705035971, "grad_norm": 0.05401034280657768, "learning_rate": 9.8936575141651e-06, "loss": 0.002, "step": 48390 }, { "epoch": 43.52517985611511, "grad_norm": 0.05140158161520958, "learning_rate": 9.877207346862194e-06, "loss": 0.0023, "step": 48400 }, { "epoch": 43.53417266187051, "grad_norm": 0.10299351066350937, "learning_rate": 9.860769367804312e-06, "loss": 0.0025, "step": 48410 }, { "epoch": 43.5431654676259, "grad_norm": 0.08843003213405609, "learning_rate": 9.844343581984877e-06, "loss": 0.0032, "step": 48420 }, { "epoch": 43.552158273381295, "grad_norm": 0.34396010637283325, "learning_rate": 9.82792999439362e-06, "loss": 0.0034, "step": 48430 }, { "epoch": 43.56115107913669, "grad_norm": 0.2164827287197113, "learning_rate": 9.811528610016546e-06, "loss": 0.0024, "step": 48440 }, { "epoch": 43.57014388489208, "grad_norm": 0.08962060511112213, "learning_rate": 9.79513943383597e-06, "loss": 0.0025, "step": 48450 }, { "epoch": 43.57913669064748, "grad_norm": 0.06379298865795135, "learning_rate": 9.778762470830489e-06, "loss": 0.0029, "step": 48460 }, { "epoch": 43.58812949640288, "grad_norm": 0.07417736947536469, "learning_rate": 9.762397725974982e-06, "loss": 0.0023, "step": 48470 }, { "epoch": 43.597122302158276, "grad_norm": 0.19550427794456482, "learning_rate": 9.746045204240622e-06, "loss": 0.0032, "step": 48480 }, { "epoch": 43.606115107913666, "grad_norm": 0.21790887415409088, "learning_rate": 9.729704910594917e-06, "loss": 0.0028, "step": 48490 }, { "epoch": 43.615107913669064, "grad_norm": 0.055008482187986374, "learning_rate": 9.713376850001554e-06, "loss": 0.0016, "step": 48500 }, { "epoch": 43.62410071942446, "grad_norm": 0.050284262746572495, "learning_rate": 9.697061027420622e-06, "loss": 0.0027, "step": 48510 }, { "epoch": 43.63309352517986, "grad_norm": 0.03605877235531807, "learning_rate": 9.680757447808385e-06, "loss": 0.0049, "step": 48520 }, { "epoch": 43.64208633093525, "grad_norm": 0.14713655412197113, "learning_rate": 9.664466116117488e-06, "loss": 0.0017, "step": 48530 }, { "epoch": 43.65107913669065, "grad_norm": 0.10269825905561447, "learning_rate": 9.64818703729678e-06, "loss": 0.0027, "step": 48540 }, { "epoch": 43.660071942446045, "grad_norm": 0.1649022400379181, "learning_rate": 9.631920216291423e-06, "loss": 0.0023, "step": 48550 }, { "epoch": 43.669064748201436, "grad_norm": 0.06451263278722763, "learning_rate": 9.615665658042849e-06, "loss": 0.0044, "step": 48560 }, { "epoch": 43.67805755395683, "grad_norm": 0.2260674387216568, "learning_rate": 9.599423367488747e-06, "loss": 0.0025, "step": 48570 }, { "epoch": 43.68705035971223, "grad_norm": 0.13167986273765564, "learning_rate": 9.583193349563124e-06, "loss": 0.0023, "step": 48580 }, { "epoch": 43.69604316546763, "grad_norm": 0.12753169238567352, "learning_rate": 9.566975609196216e-06, "loss": 0.0029, "step": 48590 }, { "epoch": 43.70503597122302, "grad_norm": 0.07651475816965103, "learning_rate": 9.550770151314548e-06, "loss": 0.003, "step": 48600 }, { "epoch": 43.71402877697842, "grad_norm": 0.10915641486644745, "learning_rate": 9.53457698084091e-06, "loss": 0.0023, "step": 48610 }, { "epoch": 43.723021582733814, "grad_norm": 0.04526824876666069, "learning_rate": 9.518396102694355e-06, "loss": 0.0018, "step": 48620 }, { "epoch": 43.73201438848921, "grad_norm": 0.057400770485401154, "learning_rate": 9.502227521790198e-06, "loss": 0.0033, "step": 48630 }, { "epoch": 43.7410071942446, "grad_norm": 0.08911239355802536, "learning_rate": 9.486071243040063e-06, "loss": 0.0053, "step": 48640 }, { "epoch": 43.75, "grad_norm": 0.07038791477680206, "learning_rate": 9.469927271351747e-06, "loss": 0.0025, "step": 48650 }, { "epoch": 43.7589928057554, "grad_norm": 0.045821450650691986, "learning_rate": 9.453795611629419e-06, "loss": 0.0022, "step": 48660 }, { "epoch": 43.76798561151079, "grad_norm": 0.13700711727142334, "learning_rate": 9.437676268773399e-06, "loss": 0.0024, "step": 48670 }, { "epoch": 43.776978417266186, "grad_norm": 0.11524748057126999, "learning_rate": 9.421569247680357e-06, "loss": 0.002, "step": 48680 }, { "epoch": 43.78597122302158, "grad_norm": 0.06016896665096283, "learning_rate": 9.40547455324316e-06, "loss": 0.0034, "step": 48690 }, { "epoch": 43.79496402877698, "grad_norm": 0.10570862889289856, "learning_rate": 9.389392190350965e-06, "loss": 0.0029, "step": 48700 }, { "epoch": 43.80395683453237, "grad_norm": 0.0704365074634552, "learning_rate": 9.373322163889153e-06, "loss": 0.0026, "step": 48710 }, { "epoch": 43.81294964028777, "grad_norm": 0.20176386833190918, "learning_rate": 9.357264478739375e-06, "loss": 0.0032, "step": 48720 }, { "epoch": 43.82194244604317, "grad_norm": 0.0677863135933876, "learning_rate": 9.341219139779567e-06, "loss": 0.0023, "step": 48730 }, { "epoch": 43.830935251798564, "grad_norm": 0.09144079685211182, "learning_rate": 9.325186151883824e-06, "loss": 0.0016, "step": 48740 }, { "epoch": 43.839928057553955, "grad_norm": 0.05087241530418396, "learning_rate": 9.30916551992258e-06, "loss": 0.0017, "step": 48750 }, { "epoch": 43.84892086330935, "grad_norm": 0.30008429288864136, "learning_rate": 9.293157248762479e-06, "loss": 0.004, "step": 48760 }, { "epoch": 43.85791366906475, "grad_norm": 0.1943635195493698, "learning_rate": 9.2771613432664e-06, "loss": 0.0031, "step": 48770 }, { "epoch": 43.86690647482014, "grad_norm": 0.06921583414077759, "learning_rate": 9.261177808293481e-06, "loss": 0.0071, "step": 48780 }, { "epoch": 43.87589928057554, "grad_norm": 0.07063673436641693, "learning_rate": 9.245206648699096e-06, "loss": 0.0027, "step": 48790 }, { "epoch": 43.884892086330936, "grad_norm": 0.13476549088954926, "learning_rate": 9.22924786933485e-06, "loss": 0.0025, "step": 48800 }, { "epoch": 43.893884892086334, "grad_norm": 0.25972625613212585, "learning_rate": 9.213301475048642e-06, "loss": 0.0029, "step": 48810 }, { "epoch": 43.902877697841724, "grad_norm": 0.14688166975975037, "learning_rate": 9.197367470684504e-06, "loss": 0.0024, "step": 48820 }, { "epoch": 43.91187050359712, "grad_norm": 0.07870033383369446, "learning_rate": 9.181445861082816e-06, "loss": 0.0041, "step": 48830 }, { "epoch": 43.92086330935252, "grad_norm": 0.35879600048065186, "learning_rate": 9.16553665108012e-06, "loss": 0.0086, "step": 48840 }, { "epoch": 43.92985611510792, "grad_norm": 0.10298038274049759, "learning_rate": 9.149639845509223e-06, "loss": 0.0027, "step": 48850 }, { "epoch": 43.93884892086331, "grad_norm": 0.13024114072322845, "learning_rate": 9.133755449199144e-06, "loss": 0.0019, "step": 48860 }, { "epoch": 43.947841726618705, "grad_norm": 0.08485078811645508, "learning_rate": 9.117883466975135e-06, "loss": 0.0016, "step": 48870 }, { "epoch": 43.9568345323741, "grad_norm": 0.07451795786619186, "learning_rate": 9.10202390365873e-06, "loss": 0.002, "step": 48880 }, { "epoch": 43.96582733812949, "grad_norm": 0.06699002534151077, "learning_rate": 9.086176764067583e-06, "loss": 0.0025, "step": 48890 }, { "epoch": 43.97482014388489, "grad_norm": 0.27459046244621277, "learning_rate": 9.070342053015684e-06, "loss": 0.0041, "step": 48900 }, { "epoch": 43.98381294964029, "grad_norm": 0.05305350199341774, "learning_rate": 9.054519775313187e-06, "loss": 0.0035, "step": 48910 }, { "epoch": 43.992805755395686, "grad_norm": 0.056175004690885544, "learning_rate": 9.038709935766476e-06, "loss": 0.0037, "step": 48920 }, { "epoch": 44.00179856115108, "grad_norm": 0.08287128061056137, "learning_rate": 9.02291253917817e-06, "loss": 0.0045, "step": 48930 }, { "epoch": 44.010791366906474, "grad_norm": 0.11230308562517166, "learning_rate": 9.007127590347091e-06, "loss": 0.0031, "step": 48940 }, { "epoch": 44.01978417266187, "grad_norm": 0.19495150446891785, "learning_rate": 8.991355094068288e-06, "loss": 0.0029, "step": 48950 }, { "epoch": 44.02877697841727, "grad_norm": 0.05619700998067856, "learning_rate": 8.975595055133062e-06, "loss": 0.0021, "step": 48960 }, { "epoch": 44.03776978417266, "grad_norm": 0.07068542391061783, "learning_rate": 8.959847478328848e-06, "loss": 0.0025, "step": 48970 }, { "epoch": 44.04676258992806, "grad_norm": 0.06570100039243698, "learning_rate": 8.944112368439378e-06, "loss": 0.0019, "step": 48980 }, { "epoch": 44.055755395683455, "grad_norm": 0.07161951810121536, "learning_rate": 8.928389730244552e-06, "loss": 0.002, "step": 48990 }, { "epoch": 44.064748201438846, "grad_norm": 0.05307387933135033, "learning_rate": 8.912679568520494e-06, "loss": 0.0021, "step": 49000 }, { "epoch": 44.07374100719424, "grad_norm": 0.40947070717811584, "learning_rate": 8.896981888039534e-06, "loss": 0.0021, "step": 49010 }, { "epoch": 44.08273381294964, "grad_norm": 0.08274491876363754, "learning_rate": 8.881296693570201e-06, "loss": 0.0017, "step": 49020 }, { "epoch": 44.09172661870504, "grad_norm": 0.1416664868593216, "learning_rate": 8.865623989877281e-06, "loss": 0.0029, "step": 49030 }, { "epoch": 44.10071942446043, "grad_norm": 0.08239152282476425, "learning_rate": 8.849963781721681e-06, "loss": 0.0029, "step": 49040 }, { "epoch": 44.10971223021583, "grad_norm": 0.0937458947300911, "learning_rate": 8.834316073860588e-06, "loss": 0.0029, "step": 49050 }, { "epoch": 44.118705035971225, "grad_norm": 0.12311875820159912, "learning_rate": 8.818680871047357e-06, "loss": 0.0053, "step": 49060 }, { "epoch": 44.12769784172662, "grad_norm": 0.07822848856449127, "learning_rate": 8.803058178031549e-06, "loss": 0.0027, "step": 49070 }, { "epoch": 44.13669064748201, "grad_norm": 0.12087205797433853, "learning_rate": 8.787447999558922e-06, "loss": 0.0034, "step": 49080 }, { "epoch": 44.14568345323741, "grad_norm": 0.10597974061965942, "learning_rate": 8.77185034037144e-06, "loss": 0.0025, "step": 49090 }, { "epoch": 44.15467625899281, "grad_norm": 0.16093197464942932, "learning_rate": 8.756265205207259e-06, "loss": 0.0021, "step": 49100 }, { "epoch": 44.1636690647482, "grad_norm": 0.14582403004169464, "learning_rate": 8.740692598800732e-06, "loss": 0.0019, "step": 49110 }, { "epoch": 44.172661870503596, "grad_norm": 0.10600458830595016, "learning_rate": 8.72513252588239e-06, "loss": 0.0021, "step": 49120 }, { "epoch": 44.181654676258994, "grad_norm": 0.05613545700907707, "learning_rate": 8.709584991178998e-06, "loss": 0.0017, "step": 49130 }, { "epoch": 44.19064748201439, "grad_norm": 0.09666243940591812, "learning_rate": 8.694049999413479e-06, "loss": 0.0017, "step": 49140 }, { "epoch": 44.19964028776978, "grad_norm": 0.06654423475265503, "learning_rate": 8.678527555304945e-06, "loss": 0.0017, "step": 49150 }, { "epoch": 44.20863309352518, "grad_norm": 0.05452115088701248, "learning_rate": 8.663017663568712e-06, "loss": 0.0011, "step": 49160 }, { "epoch": 44.21762589928058, "grad_norm": 0.22033095359802246, "learning_rate": 8.647520328916259e-06, "loss": 0.0034, "step": 49170 }, { "epoch": 44.226618705035975, "grad_norm": 0.050536900758743286, "learning_rate": 8.632035556055307e-06, "loss": 0.0036, "step": 49180 }, { "epoch": 44.235611510791365, "grad_norm": 0.1564670354127884, "learning_rate": 8.616563349689672e-06, "loss": 0.0041, "step": 49190 }, { "epoch": 44.24460431654676, "grad_norm": 0.08658687025308609, "learning_rate": 8.601103714519448e-06, "loss": 0.0034, "step": 49200 }, { "epoch": 44.25359712230216, "grad_norm": 0.06863345950841904, "learning_rate": 8.58565665524082e-06, "loss": 0.0022, "step": 49210 }, { "epoch": 44.26258992805755, "grad_norm": 0.10591504722833633, "learning_rate": 8.570222176546222e-06, "loss": 0.0037, "step": 49220 }, { "epoch": 44.27158273381295, "grad_norm": 0.14439859986305237, "learning_rate": 8.554800283124242e-06, "loss": 0.0032, "step": 49230 }, { "epoch": 44.280575539568346, "grad_norm": 0.10173766314983368, "learning_rate": 8.539390979659639e-06, "loss": 0.0043, "step": 49240 }, { "epoch": 44.289568345323744, "grad_norm": 0.07008463144302368, "learning_rate": 8.523994270833352e-06, "loss": 0.0021, "step": 49250 }, { "epoch": 44.298561151079134, "grad_norm": 0.058912601321935654, "learning_rate": 8.5086101613225e-06, "loss": 0.0037, "step": 49260 }, { "epoch": 44.30755395683453, "grad_norm": 0.15303470194339752, "learning_rate": 8.493238655800346e-06, "loss": 0.0018, "step": 49270 }, { "epoch": 44.31654676258993, "grad_norm": 0.0782698318362236, "learning_rate": 8.47787975893638e-06, "loss": 0.0022, "step": 49280 }, { "epoch": 44.32553956834532, "grad_norm": 0.08098983764648438, "learning_rate": 8.462533475396211e-06, "loss": 0.0028, "step": 49290 }, { "epoch": 44.33453237410072, "grad_norm": 0.17585743963718414, "learning_rate": 8.447199809841643e-06, "loss": 0.0019, "step": 49300 }, { "epoch": 44.343525179856115, "grad_norm": 0.05663229525089264, "learning_rate": 8.431878766930635e-06, "loss": 0.0023, "step": 49310 }, { "epoch": 44.35251798561151, "grad_norm": 0.0584380067884922, "learning_rate": 8.416570351317304e-06, "loss": 0.0025, "step": 49320 }, { "epoch": 44.361510791366904, "grad_norm": 0.21408230066299438, "learning_rate": 8.401274567651973e-06, "loss": 0.0028, "step": 49330 }, { "epoch": 44.3705035971223, "grad_norm": 0.10718538612127304, "learning_rate": 8.385991420581058e-06, "loss": 0.0035, "step": 49340 }, { "epoch": 44.3794964028777, "grad_norm": 0.09348922222852707, "learning_rate": 8.370720914747215e-06, "loss": 0.0035, "step": 49350 }, { "epoch": 44.388489208633096, "grad_norm": 0.06521376222372055, "learning_rate": 8.355463054789181e-06, "loss": 0.0035, "step": 49360 }, { "epoch": 44.39748201438849, "grad_norm": 0.06082475930452347, "learning_rate": 8.340217845341919e-06, "loss": 0.004, "step": 49370 }, { "epoch": 44.406474820143885, "grad_norm": 0.18216301500797272, "learning_rate": 8.324985291036514e-06, "loss": 0.0034, "step": 49380 }, { "epoch": 44.41546762589928, "grad_norm": 0.08048008382320404, "learning_rate": 8.309765396500213e-06, "loss": 0.0043, "step": 49390 }, { "epoch": 44.42446043165468, "grad_norm": 0.07432486116886139, "learning_rate": 8.294558166356419e-06, "loss": 0.0018, "step": 49400 }, { "epoch": 44.43345323741007, "grad_norm": 0.08970014750957489, "learning_rate": 8.279363605224683e-06, "loss": 0.0016, "step": 49410 }, { "epoch": 44.44244604316547, "grad_norm": 0.18493948876857758, "learning_rate": 8.264181717720704e-06, "loss": 0.0018, "step": 49420 }, { "epoch": 44.451438848920866, "grad_norm": 0.1638801246881485, "learning_rate": 8.249012508456361e-06, "loss": 0.0055, "step": 49430 }, { "epoch": 44.460431654676256, "grad_norm": 0.1880413144826889, "learning_rate": 8.233855982039646e-06, "loss": 0.0022, "step": 49440 }, { "epoch": 44.469424460431654, "grad_norm": 0.14403323829174042, "learning_rate": 8.218712143074708e-06, "loss": 0.0034, "step": 49450 }, { "epoch": 44.47841726618705, "grad_norm": 0.05561595410108566, "learning_rate": 8.203580996161858e-06, "loss": 0.0015, "step": 49460 }, { "epoch": 44.48741007194245, "grad_norm": 0.24594120681285858, "learning_rate": 8.188462545897512e-06, "loss": 0.0021, "step": 49470 }, { "epoch": 44.49640287769784, "grad_norm": 0.08809708058834076, "learning_rate": 8.173356796874304e-06, "loss": 0.002, "step": 49480 }, { "epoch": 44.50539568345324, "grad_norm": 0.0402175597846508, "learning_rate": 8.158263753680906e-06, "loss": 0.0051, "step": 49490 }, { "epoch": 44.514388489208635, "grad_norm": 0.05476783961057663, "learning_rate": 8.143183420902239e-06, "loss": 0.0026, "step": 49500 }, { "epoch": 44.523381294964025, "grad_norm": 0.0919354110956192, "learning_rate": 8.128115803119258e-06, "loss": 0.0014, "step": 49510 }, { "epoch": 44.53237410071942, "grad_norm": 0.10889234393835068, "learning_rate": 8.11306090490916e-06, "loss": 0.0012, "step": 49520 }, { "epoch": 44.54136690647482, "grad_norm": 0.13387855887413025, "learning_rate": 8.098018730845169e-06, "loss": 0.0016, "step": 49530 }, { "epoch": 44.55035971223022, "grad_norm": 0.07037048786878586, "learning_rate": 8.082989285496745e-06, "loss": 0.0015, "step": 49540 }, { "epoch": 44.55935251798561, "grad_norm": 0.09268810600042343, "learning_rate": 8.067972573429416e-06, "loss": 0.0033, "step": 49550 }, { "epoch": 44.568345323741006, "grad_norm": 0.28328362107276917, "learning_rate": 8.052968599204874e-06, "loss": 0.0041, "step": 49560 }, { "epoch": 44.577338129496404, "grad_norm": 0.18518595397472382, "learning_rate": 8.037977367380922e-06, "loss": 0.0024, "step": 49570 }, { "epoch": 44.5863309352518, "grad_norm": 0.1147378608584404, "learning_rate": 8.022998882511495e-06, "loss": 0.0044, "step": 49580 }, { "epoch": 44.59532374100719, "grad_norm": 0.07284700870513916, "learning_rate": 8.008033149146677e-06, "loss": 0.003, "step": 49590 }, { "epoch": 44.60431654676259, "grad_norm": 0.18533997237682343, "learning_rate": 7.993080171832656e-06, "loss": 0.0024, "step": 49600 }, { "epoch": 44.61330935251799, "grad_norm": 0.06674599647521973, "learning_rate": 7.978139955111752e-06, "loss": 0.0022, "step": 49610 }, { "epoch": 44.62230215827338, "grad_norm": 0.06312347948551178, "learning_rate": 7.9632125035224e-06, "loss": 0.0017, "step": 49620 }, { "epoch": 44.631294964028775, "grad_norm": 0.04729248583316803, "learning_rate": 7.948297821599177e-06, "loss": 0.0014, "step": 49630 }, { "epoch": 44.64028776978417, "grad_norm": 0.04576863721013069, "learning_rate": 7.933395913872755e-06, "loss": 0.0019, "step": 49640 }, { "epoch": 44.64928057553957, "grad_norm": 0.055899329483509064, "learning_rate": 7.918506784869972e-06, "loss": 0.0019, "step": 49650 }, { "epoch": 44.65827338129496, "grad_norm": 0.08674848079681396, "learning_rate": 7.903630439113707e-06, "loss": 0.0034, "step": 49660 }, { "epoch": 44.66726618705036, "grad_norm": 0.07501323521137238, "learning_rate": 7.888766881123044e-06, "loss": 0.0029, "step": 49670 }, { "epoch": 44.67625899280576, "grad_norm": 0.12610287964344025, "learning_rate": 7.873916115413099e-06, "loss": 0.0022, "step": 49680 }, { "epoch": 44.685251798561154, "grad_norm": 0.11733745783567429, "learning_rate": 7.85907814649518e-06, "loss": 0.0043, "step": 49690 }, { "epoch": 44.694244604316545, "grad_norm": 0.06374520063400269, "learning_rate": 7.844252978876649e-06, "loss": 0.002, "step": 49700 }, { "epoch": 44.70323741007194, "grad_norm": 0.0924142375588417, "learning_rate": 7.829440617061001e-06, "loss": 0.0023, "step": 49710 }, { "epoch": 44.71223021582734, "grad_norm": 0.0996355339884758, "learning_rate": 7.814641065547851e-06, "loss": 0.004, "step": 49720 }, { "epoch": 44.72122302158273, "grad_norm": 0.0661286935210228, "learning_rate": 7.79985432883289e-06, "loss": 0.0013, "step": 49730 }, { "epoch": 44.73021582733813, "grad_norm": 0.048209529370069504, "learning_rate": 7.78508041140797e-06, "loss": 0.0014, "step": 49740 }, { "epoch": 44.739208633093526, "grad_norm": 0.24936294555664062, "learning_rate": 7.770319317760993e-06, "loss": 0.0033, "step": 49750 }, { "epoch": 44.74820143884892, "grad_norm": 0.0466034859418869, "learning_rate": 7.755571052376004e-06, "loss": 0.0024, "step": 49760 }, { "epoch": 44.757194244604314, "grad_norm": 0.05085434392094612, "learning_rate": 7.740835619733128e-06, "loss": 0.0014, "step": 49770 }, { "epoch": 44.76618705035971, "grad_norm": 0.05537072941660881, "learning_rate": 7.726113024308601e-06, "loss": 0.0032, "step": 49780 }, { "epoch": 44.77517985611511, "grad_norm": 0.053576916456222534, "learning_rate": 7.711403270574746e-06, "loss": 0.0022, "step": 49790 }, { "epoch": 44.78417266187051, "grad_norm": 0.12122176587581635, "learning_rate": 7.696706363000039e-06, "loss": 0.0018, "step": 49800 }, { "epoch": 44.7931654676259, "grad_norm": 0.04719829931855202, "learning_rate": 7.682022306048959e-06, "loss": 0.0015, "step": 49810 }, { "epoch": 44.802158273381295, "grad_norm": 0.04460684955120087, "learning_rate": 7.667351104182186e-06, "loss": 0.0021, "step": 49820 }, { "epoch": 44.81115107913669, "grad_norm": 0.04156481847167015, "learning_rate": 7.652692761856395e-06, "loss": 0.0018, "step": 49830 }, { "epoch": 44.82014388489208, "grad_norm": 0.07979625463485718, "learning_rate": 7.63804728352444e-06, "loss": 0.0016, "step": 49840 }, { "epoch": 44.82913669064748, "grad_norm": 0.25865060091018677, "learning_rate": 7.623414673635215e-06, "loss": 0.0018, "step": 49850 }, { "epoch": 44.83812949640288, "grad_norm": 0.274558424949646, "learning_rate": 7.608794936633723e-06, "loss": 0.0026, "step": 49860 }, { "epoch": 44.847122302158276, "grad_norm": 0.10773541778326035, "learning_rate": 7.594188076961056e-06, "loss": 0.0085, "step": 49870 }, { "epoch": 44.856115107913666, "grad_norm": 0.05497726425528526, "learning_rate": 7.579594099054382e-06, "loss": 0.0029, "step": 49880 }, { "epoch": 44.865107913669064, "grad_norm": 0.05296207219362259, "learning_rate": 7.565013007346983e-06, "loss": 0.0016, "step": 49890 }, { "epoch": 44.87410071942446, "grad_norm": 0.18372537195682526, "learning_rate": 7.5504448062682035e-06, "loss": 0.0044, "step": 49900 }, { "epoch": 44.88309352517986, "grad_norm": 0.1399616301059723, "learning_rate": 7.53588950024347e-06, "loss": 0.0021, "step": 49910 }, { "epoch": 44.89208633093525, "grad_norm": 0.05839722603559494, "learning_rate": 7.5213470936943145e-06, "loss": 0.0014, "step": 49920 }, { "epoch": 44.90107913669065, "grad_norm": 0.07317698746919632, "learning_rate": 7.506817591038323e-06, "loss": 0.0036, "step": 49930 }, { "epoch": 44.910071942446045, "grad_norm": 0.047465939074754715, "learning_rate": 7.492300996689183e-06, "loss": 0.0094, "step": 49940 }, { "epoch": 44.919064748201436, "grad_norm": 0.22391927242279053, "learning_rate": 7.477797315056645e-06, "loss": 0.0026, "step": 49950 }, { "epoch": 44.92805755395683, "grad_norm": 0.09318941831588745, "learning_rate": 7.463306550546539e-06, "loss": 0.002, "step": 49960 }, { "epoch": 44.93705035971223, "grad_norm": 0.11518550664186478, "learning_rate": 7.448828707560812e-06, "loss": 0.0018, "step": 49970 }, { "epoch": 44.94604316546763, "grad_norm": 0.10410498082637787, "learning_rate": 7.4343637904974e-06, "loss": 0.0025, "step": 49980 }, { "epoch": 44.95503597122302, "grad_norm": 0.11392152309417725, "learning_rate": 7.419911803750401e-06, "loss": 0.0022, "step": 49990 }, { "epoch": 44.96402877697842, "grad_norm": 0.12507598102092743, "learning_rate": 7.405472751709935e-06, "loss": 0.0018, "step": 50000 } ], "logging_steps": 10, "max_steps": 60000, "num_input_tokens_seen": 0, "num_train_epochs": 54, "save_steps": 10000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }