{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 26.97841726618705, "eval_steps": 500, "global_step": 30000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.008992805755395683, "grad_norm": 7.161594390869141, "learning_rate": 3.0000000000000004e-07, "loss": 0.9662, "step": 10 }, { "epoch": 0.017985611510791366, "grad_norm": 10.779166221618652, "learning_rate": 6.333333333333333e-07, "loss": 1.0041, "step": 20 }, { "epoch": 0.02697841726618705, "grad_norm": 6.292320251464844, "learning_rate": 9.666666666666668e-07, "loss": 0.9808, "step": 30 }, { "epoch": 0.03597122302158273, "grad_norm": 5.2348432540893555, "learning_rate": 1.3e-06, "loss": 0.8014, "step": 40 }, { "epoch": 0.044964028776978415, "grad_norm": 3.2519373893737793, "learning_rate": 1.6333333333333333e-06, "loss": 0.651, "step": 50 }, { "epoch": 0.0539568345323741, "grad_norm": 2.824371576309204, "learning_rate": 1.9666666666666668e-06, "loss": 0.5037, "step": 60 }, { "epoch": 0.06294964028776978, "grad_norm": 1.506935477256775, "learning_rate": 2.3e-06, "loss": 0.32, "step": 70 }, { "epoch": 0.07194244604316546, "grad_norm": 1.1683040857315063, "learning_rate": 2.6333333333333337e-06, "loss": 0.2527, "step": 80 }, { "epoch": 0.08093525179856115, "grad_norm": 0.8295279741287231, "learning_rate": 2.966666666666667e-06, "loss": 0.1917, "step": 90 }, { "epoch": 0.08992805755395683, "grad_norm": 1.0443888902664185, "learning_rate": 3.3e-06, "loss": 0.1597, "step": 100 }, { "epoch": 0.09892086330935251, "grad_norm": 0.9706791639328003, "learning_rate": 3.633333333333334e-06, "loss": 0.1286, "step": 110 }, { "epoch": 0.1079136690647482, "grad_norm": 0.9377447962760925, "learning_rate": 3.966666666666667e-06, "loss": 0.1196, "step": 120 }, { "epoch": 0.11690647482014388, "grad_norm": 0.6067984700202942, "learning_rate": 4.2999999999999995e-06, "loss": 0.0984, "step": 130 }, { "epoch": 0.12589928057553956, "grad_norm": 0.6696574091911316, "learning_rate": 4.633333333333334e-06, "loss": 0.0866, "step": 140 }, { "epoch": 0.13489208633093525, "grad_norm": 0.8395354747772217, "learning_rate": 4.966666666666667e-06, "loss": 0.0882, "step": 150 }, { "epoch": 0.14388489208633093, "grad_norm": 0.6977983713150024, "learning_rate": 5.3e-06, "loss": 0.0811, "step": 160 }, { "epoch": 0.1528776978417266, "grad_norm": 0.6995964050292969, "learning_rate": 5.633333333333333e-06, "loss": 0.0776, "step": 170 }, { "epoch": 0.1618705035971223, "grad_norm": 0.5513696670532227, "learning_rate": 5.9666666666666666e-06, "loss": 0.0644, "step": 180 }, { "epoch": 0.17086330935251798, "grad_norm": 0.5615208148956299, "learning_rate": 6.300000000000001e-06, "loss": 0.076, "step": 190 }, { "epoch": 0.17985611510791366, "grad_norm": 0.8523958921432495, "learning_rate": 6.633333333333333e-06, "loss": 0.0754, "step": 200 }, { "epoch": 0.18884892086330934, "grad_norm": 0.7726069688796997, "learning_rate": 6.966666666666667e-06, "loss": 0.0661, "step": 210 }, { "epoch": 0.19784172661870503, "grad_norm": 0.7372509241104126, "learning_rate": 7.2999999999999996e-06, "loss": 0.0681, "step": 220 }, { "epoch": 0.2068345323741007, "grad_norm": 0.5732981562614441, "learning_rate": 7.633333333333334e-06, "loss": 0.0645, "step": 230 }, { "epoch": 0.2158273381294964, "grad_norm": 0.6234228610992432, "learning_rate": 7.966666666666666e-06, "loss": 0.0629, "step": 240 }, { "epoch": 0.22482014388489208, "grad_norm": 0.570448637008667, "learning_rate": 8.3e-06, "loss": 0.0607, "step": 250 }, { "epoch": 0.23381294964028776, "grad_norm": 0.5964863300323486, "learning_rate": 8.633333333333334e-06, "loss": 0.0608, "step": 260 }, { "epoch": 0.24280575539568344, "grad_norm": 0.6047839522361755, "learning_rate": 8.966666666666668e-06, "loss": 0.0612, "step": 270 }, { "epoch": 0.2517985611510791, "grad_norm": 0.6755235195159912, "learning_rate": 9.3e-06, "loss": 0.064, "step": 280 }, { "epoch": 0.2607913669064748, "grad_norm": 0.5742819309234619, "learning_rate": 9.633333333333335e-06, "loss": 0.0552, "step": 290 }, { "epoch": 0.2697841726618705, "grad_norm": 0.5023522973060608, "learning_rate": 9.966666666666667e-06, "loss": 0.0636, "step": 300 }, { "epoch": 0.2787769784172662, "grad_norm": 0.6979081034660339, "learning_rate": 1.03e-05, "loss": 0.0558, "step": 310 }, { "epoch": 0.28776978417266186, "grad_norm": 0.7672016620635986, "learning_rate": 1.0633333333333334e-05, "loss": 0.0619, "step": 320 }, { "epoch": 0.29676258992805754, "grad_norm": 0.6912877559661865, "learning_rate": 1.0966666666666666e-05, "loss": 0.0572, "step": 330 }, { "epoch": 0.3057553956834532, "grad_norm": 0.8208765387535095, "learning_rate": 1.13e-05, "loss": 0.0499, "step": 340 }, { "epoch": 0.3147482014388489, "grad_norm": 0.5519169569015503, "learning_rate": 1.1633333333333334e-05, "loss": 0.0543, "step": 350 }, { "epoch": 0.3237410071942446, "grad_norm": 0.5374360084533691, "learning_rate": 1.1966666666666668e-05, "loss": 0.057, "step": 360 }, { "epoch": 0.3327338129496403, "grad_norm": 0.8427839875221252, "learning_rate": 1.23e-05, "loss": 0.0481, "step": 370 }, { "epoch": 0.34172661870503596, "grad_norm": 0.8361133933067322, "learning_rate": 1.2633333333333333e-05, "loss": 0.0527, "step": 380 }, { "epoch": 0.35071942446043164, "grad_norm": 0.8260958790779114, "learning_rate": 1.2966666666666669e-05, "loss": 0.0584, "step": 390 }, { "epoch": 0.3597122302158273, "grad_norm": 0.7628905177116394, "learning_rate": 1.3300000000000001e-05, "loss": 0.0477, "step": 400 }, { "epoch": 0.368705035971223, "grad_norm": 0.6337764859199524, "learning_rate": 1.3633333333333334e-05, "loss": 0.0522, "step": 410 }, { "epoch": 0.3776978417266187, "grad_norm": 0.5014258027076721, "learning_rate": 1.3966666666666666e-05, "loss": 0.0499, "step": 420 }, { "epoch": 0.38669064748201437, "grad_norm": 0.5158013701438904, "learning_rate": 1.43e-05, "loss": 0.0513, "step": 430 }, { "epoch": 0.39568345323741005, "grad_norm": 0.4427158236503601, "learning_rate": 1.4633333333333334e-05, "loss": 0.0493, "step": 440 }, { "epoch": 0.40467625899280574, "grad_norm": 0.6694109439849854, "learning_rate": 1.4966666666666668e-05, "loss": 0.0527, "step": 450 }, { "epoch": 0.4136690647482014, "grad_norm": 0.7517151236534119, "learning_rate": 1.53e-05, "loss": 0.0494, "step": 460 }, { "epoch": 0.4226618705035971, "grad_norm": 0.7620652914047241, "learning_rate": 1.563333333333333e-05, "loss": 0.0491, "step": 470 }, { "epoch": 0.4316546762589928, "grad_norm": 0.633745014667511, "learning_rate": 1.5966666666666667e-05, "loss": 0.0435, "step": 480 }, { "epoch": 0.44064748201438847, "grad_norm": 0.6500685214996338, "learning_rate": 1.63e-05, "loss": 0.0498, "step": 490 }, { "epoch": 0.44964028776978415, "grad_norm": 0.47834300994873047, "learning_rate": 1.6633333333333336e-05, "loss": 0.0421, "step": 500 }, { "epoch": 0.45863309352517984, "grad_norm": 0.6629493832588196, "learning_rate": 1.6966666666666668e-05, "loss": 0.0479, "step": 510 }, { "epoch": 0.4676258992805755, "grad_norm": 0.5309941172599792, "learning_rate": 1.73e-05, "loss": 0.0416, "step": 520 }, { "epoch": 0.4766187050359712, "grad_norm": 0.4896777868270874, "learning_rate": 1.7633333333333336e-05, "loss": 0.0469, "step": 530 }, { "epoch": 0.4856115107913669, "grad_norm": 0.5140358805656433, "learning_rate": 1.796666666666667e-05, "loss": 0.0525, "step": 540 }, { "epoch": 0.49460431654676257, "grad_norm": 0.9461457133293152, "learning_rate": 1.83e-05, "loss": 0.0471, "step": 550 }, { "epoch": 0.5035971223021583, "grad_norm": 0.4467833340167999, "learning_rate": 1.8633333333333333e-05, "loss": 0.0452, "step": 560 }, { "epoch": 0.512589928057554, "grad_norm": 0.6399796009063721, "learning_rate": 1.896666666666667e-05, "loss": 0.0482, "step": 570 }, { "epoch": 0.5215827338129496, "grad_norm": 0.5886995196342468, "learning_rate": 1.93e-05, "loss": 0.0444, "step": 580 }, { "epoch": 0.5305755395683454, "grad_norm": 0.5575857758522034, "learning_rate": 1.9633333333333334e-05, "loss": 0.0428, "step": 590 }, { "epoch": 0.539568345323741, "grad_norm": 0.6211395859718323, "learning_rate": 1.9966666666666666e-05, "loss": 0.0448, "step": 600 }, { "epoch": 0.5485611510791367, "grad_norm": 0.7167505025863647, "learning_rate": 2.0300000000000002e-05, "loss": 0.0514, "step": 610 }, { "epoch": 0.5575539568345323, "grad_norm": 0.7028887271881104, "learning_rate": 2.0633333333333335e-05, "loss": 0.0479, "step": 620 }, { "epoch": 0.5665467625899281, "grad_norm": 0.6112174987792969, "learning_rate": 2.0966666666666667e-05, "loss": 0.0482, "step": 630 }, { "epoch": 0.5755395683453237, "grad_norm": 0.763946533203125, "learning_rate": 2.13e-05, "loss": 0.0465, "step": 640 }, { "epoch": 0.5845323741007195, "grad_norm": 0.675342321395874, "learning_rate": 2.1633333333333332e-05, "loss": 0.0476, "step": 650 }, { "epoch": 0.5935251798561151, "grad_norm": 0.8437749743461609, "learning_rate": 2.1966666666666668e-05, "loss": 0.052, "step": 660 }, { "epoch": 0.6025179856115108, "grad_norm": 0.5639771819114685, "learning_rate": 2.23e-05, "loss": 0.0396, "step": 670 }, { "epoch": 0.6115107913669064, "grad_norm": 0.5318187475204468, "learning_rate": 2.2633333333333336e-05, "loss": 0.0544, "step": 680 }, { "epoch": 0.6205035971223022, "grad_norm": 0.5090067982673645, "learning_rate": 2.2966666666666668e-05, "loss": 0.0401, "step": 690 }, { "epoch": 0.6294964028776978, "grad_norm": 0.4463503062725067, "learning_rate": 2.3300000000000004e-05, "loss": 0.0383, "step": 700 }, { "epoch": 0.6384892086330936, "grad_norm": 0.48143503069877625, "learning_rate": 2.3633333333333336e-05, "loss": 0.0478, "step": 710 }, { "epoch": 0.6474820143884892, "grad_norm": 0.695982813835144, "learning_rate": 2.396666666666667e-05, "loss": 0.0377, "step": 720 }, { "epoch": 0.6564748201438849, "grad_norm": 0.6009023785591125, "learning_rate": 2.43e-05, "loss": 0.0461, "step": 730 }, { "epoch": 0.6654676258992805, "grad_norm": 0.39957791566848755, "learning_rate": 2.4633333333333334e-05, "loss": 0.0392, "step": 740 }, { "epoch": 0.6744604316546763, "grad_norm": 0.5580555200576782, "learning_rate": 2.496666666666667e-05, "loss": 0.0406, "step": 750 }, { "epoch": 0.6834532374100719, "grad_norm": 0.73033207654953, "learning_rate": 2.5300000000000002e-05, "loss": 0.0403, "step": 760 }, { "epoch": 0.6924460431654677, "grad_norm": 0.5423072576522827, "learning_rate": 2.5633333333333338e-05, "loss": 0.0383, "step": 770 }, { "epoch": 0.7014388489208633, "grad_norm": 0.6944575905799866, "learning_rate": 2.5966666666666667e-05, "loss": 0.041, "step": 780 }, { "epoch": 0.710431654676259, "grad_norm": 0.6694932579994202, "learning_rate": 2.6300000000000002e-05, "loss": 0.0337, "step": 790 }, { "epoch": 0.7194244604316546, "grad_norm": 0.49069318175315857, "learning_rate": 2.663333333333333e-05, "loss": 0.0406, "step": 800 }, { "epoch": 0.7284172661870504, "grad_norm": 0.5081745386123657, "learning_rate": 2.6966666666666667e-05, "loss": 0.0381, "step": 810 }, { "epoch": 0.737410071942446, "grad_norm": 0.5727393627166748, "learning_rate": 2.7300000000000003e-05, "loss": 0.0488, "step": 820 }, { "epoch": 0.7464028776978417, "grad_norm": 0.5645552277565002, "learning_rate": 2.7633333333333332e-05, "loss": 0.0437, "step": 830 }, { "epoch": 0.7553956834532374, "grad_norm": 0.8093410730361938, "learning_rate": 2.7966666666666668e-05, "loss": 0.0351, "step": 840 }, { "epoch": 0.7643884892086331, "grad_norm": 0.6967944502830505, "learning_rate": 2.83e-05, "loss": 0.0396, "step": 850 }, { "epoch": 0.7733812949640287, "grad_norm": 0.544190526008606, "learning_rate": 2.8633333333333336e-05, "loss": 0.0437, "step": 860 }, { "epoch": 0.7823741007194245, "grad_norm": 0.43499237298965454, "learning_rate": 2.8966666666666668e-05, "loss": 0.0365, "step": 870 }, { "epoch": 0.7913669064748201, "grad_norm": 0.47358328104019165, "learning_rate": 2.93e-05, "loss": 0.0402, "step": 880 }, { "epoch": 0.8003597122302158, "grad_norm": 0.5719739198684692, "learning_rate": 2.9633333333333336e-05, "loss": 0.0356, "step": 890 }, { "epoch": 0.8093525179856115, "grad_norm": 0.45060575008392334, "learning_rate": 2.9966666666666672e-05, "loss": 0.0352, "step": 900 }, { "epoch": 0.8183453237410072, "grad_norm": 0.4578852355480194, "learning_rate": 3.03e-05, "loss": 0.0385, "step": 910 }, { "epoch": 0.8273381294964028, "grad_norm": 0.40405866503715515, "learning_rate": 3.063333333333334e-05, "loss": 0.0435, "step": 920 }, { "epoch": 0.8363309352517986, "grad_norm": 0.5399798154830933, "learning_rate": 3.096666666666666e-05, "loss": 0.0481, "step": 930 }, { "epoch": 0.8453237410071942, "grad_norm": 0.42498600482940674, "learning_rate": 3.13e-05, "loss": 0.0341, "step": 940 }, { "epoch": 0.85431654676259, "grad_norm": 0.38082873821258545, "learning_rate": 3.1633333333333334e-05, "loss": 0.0314, "step": 950 }, { "epoch": 0.8633093525179856, "grad_norm": 0.5614412426948547, "learning_rate": 3.196666666666667e-05, "loss": 0.0385, "step": 960 }, { "epoch": 0.8723021582733813, "grad_norm": 0.45141687989234924, "learning_rate": 3.2300000000000006e-05, "loss": 0.0303, "step": 970 }, { "epoch": 0.8812949640287769, "grad_norm": 0.5390740633010864, "learning_rate": 3.263333333333333e-05, "loss": 0.0317, "step": 980 }, { "epoch": 0.8902877697841727, "grad_norm": 0.43618905544281006, "learning_rate": 3.296666666666667e-05, "loss": 0.0316, "step": 990 }, { "epoch": 0.8992805755395683, "grad_norm": 0.35126733779907227, "learning_rate": 3.33e-05, "loss": 0.0321, "step": 1000 }, { "epoch": 0.908273381294964, "grad_norm": 0.503820538520813, "learning_rate": 3.3633333333333335e-05, "loss": 0.0386, "step": 1010 }, { "epoch": 0.9172661870503597, "grad_norm": 0.4916991591453552, "learning_rate": 3.396666666666667e-05, "loss": 0.0314, "step": 1020 }, { "epoch": 0.9262589928057554, "grad_norm": 0.3933038115501404, "learning_rate": 3.430000000000001e-05, "loss": 0.0317, "step": 1030 }, { "epoch": 0.935251798561151, "grad_norm": 0.363800585269928, "learning_rate": 3.463333333333333e-05, "loss": 0.029, "step": 1040 }, { "epoch": 0.9442446043165468, "grad_norm": 0.46377646923065186, "learning_rate": 3.496666666666667e-05, "loss": 0.033, "step": 1050 }, { "epoch": 0.9532374100719424, "grad_norm": 0.4403754770755768, "learning_rate": 3.53e-05, "loss": 0.0377, "step": 1060 }, { "epoch": 0.9622302158273381, "grad_norm": 0.46194133162498474, "learning_rate": 3.563333333333334e-05, "loss": 0.0246, "step": 1070 }, { "epoch": 0.9712230215827338, "grad_norm": 0.3373343348503113, "learning_rate": 3.596666666666667e-05, "loss": 0.0325, "step": 1080 }, { "epoch": 0.9802158273381295, "grad_norm": 0.44301873445510864, "learning_rate": 3.63e-05, "loss": 0.0322, "step": 1090 }, { "epoch": 0.9892086330935251, "grad_norm": 0.30097445845603943, "learning_rate": 3.6633333333333334e-05, "loss": 0.026, "step": 1100 }, { "epoch": 0.9982014388489209, "grad_norm": 0.4630109667778015, "learning_rate": 3.6966666666666666e-05, "loss": 0.0263, "step": 1110 }, { "epoch": 1.0071942446043165, "grad_norm": 0.4604622721672058, "learning_rate": 3.73e-05, "loss": 0.0249, "step": 1120 }, { "epoch": 1.0161870503597121, "grad_norm": 0.45917558670043945, "learning_rate": 3.763333333333334e-05, "loss": 0.0276, "step": 1130 }, { "epoch": 1.025179856115108, "grad_norm": 0.5213702321052551, "learning_rate": 3.796666666666667e-05, "loss": 0.0256, "step": 1140 }, { "epoch": 1.0341726618705036, "grad_norm": 0.4527457654476166, "learning_rate": 3.83e-05, "loss": 0.0289, "step": 1150 }, { "epoch": 1.0431654676258992, "grad_norm": 0.33010923862457275, "learning_rate": 3.8633333333333335e-05, "loss": 0.0257, "step": 1160 }, { "epoch": 1.0521582733812949, "grad_norm": 0.30944761633872986, "learning_rate": 3.896666666666667e-05, "loss": 0.0292, "step": 1170 }, { "epoch": 1.0611510791366907, "grad_norm": 0.3330352306365967, "learning_rate": 3.9300000000000007e-05, "loss": 0.0301, "step": 1180 }, { "epoch": 1.0701438848920863, "grad_norm": 0.32693126797676086, "learning_rate": 3.963333333333333e-05, "loss": 0.0274, "step": 1190 }, { "epoch": 1.079136690647482, "grad_norm": 0.38343381881713867, "learning_rate": 3.996666666666667e-05, "loss": 0.0279, "step": 1200 }, { "epoch": 1.0881294964028776, "grad_norm": 0.4041564464569092, "learning_rate": 4.0300000000000004e-05, "loss": 0.03, "step": 1210 }, { "epoch": 1.0971223021582734, "grad_norm": 0.4574725925922394, "learning_rate": 4.0633333333333336e-05, "loss": 0.0277, "step": 1220 }, { "epoch": 1.106115107913669, "grad_norm": 0.3712930381298065, "learning_rate": 4.096666666666667e-05, "loss": 0.0235, "step": 1230 }, { "epoch": 1.1151079136690647, "grad_norm": 0.35718274116516113, "learning_rate": 4.13e-05, "loss": 0.025, "step": 1240 }, { "epoch": 1.1241007194244603, "grad_norm": 0.327541321516037, "learning_rate": 4.1633333333333333e-05, "loss": 0.0219, "step": 1250 }, { "epoch": 1.1330935251798562, "grad_norm": 0.307391881942749, "learning_rate": 4.196666666666667e-05, "loss": 0.0192, "step": 1260 }, { "epoch": 1.1420863309352518, "grad_norm": 0.2634764015674591, "learning_rate": 4.23e-05, "loss": 0.0202, "step": 1270 }, { "epoch": 1.1510791366906474, "grad_norm": 0.39152443408966064, "learning_rate": 4.263333333333334e-05, "loss": 0.0269, "step": 1280 }, { "epoch": 1.1600719424460433, "grad_norm": 0.3116639256477356, "learning_rate": 4.296666666666666e-05, "loss": 0.0233, "step": 1290 }, { "epoch": 1.169064748201439, "grad_norm": 0.46501168608665466, "learning_rate": 4.33e-05, "loss": 0.025, "step": 1300 }, { "epoch": 1.1780575539568345, "grad_norm": 0.3716205060482025, "learning_rate": 4.3633333333333335e-05, "loss": 0.0214, "step": 1310 }, { "epoch": 1.1870503597122302, "grad_norm": 0.6985133290290833, "learning_rate": 4.396666666666667e-05, "loss": 0.0266, "step": 1320 }, { "epoch": 1.1960431654676258, "grad_norm": 0.5831158757209778, "learning_rate": 4.43e-05, "loss": 0.0281, "step": 1330 }, { "epoch": 1.2050359712230216, "grad_norm": 0.4152372479438782, "learning_rate": 4.463333333333334e-05, "loss": 0.0316, "step": 1340 }, { "epoch": 1.2140287769784173, "grad_norm": 0.5114048719406128, "learning_rate": 4.496666666666667e-05, "loss": 0.0238, "step": 1350 }, { "epoch": 1.223021582733813, "grad_norm": 0.38147616386413574, "learning_rate": 4.53e-05, "loss": 0.0322, "step": 1360 }, { "epoch": 1.2320143884892087, "grad_norm": 0.5283682346343994, "learning_rate": 4.5633333333333336e-05, "loss": 0.024, "step": 1370 }, { "epoch": 1.2410071942446044, "grad_norm": 0.3835931420326233, "learning_rate": 4.596666666666667e-05, "loss": 0.024, "step": 1380 }, { "epoch": 1.25, "grad_norm": 0.3418366014957428, "learning_rate": 4.630000000000001e-05, "loss": 0.0261, "step": 1390 }, { "epoch": 1.2589928057553956, "grad_norm": 0.5319337844848633, "learning_rate": 4.663333333333333e-05, "loss": 0.0232, "step": 1400 }, { "epoch": 1.2679856115107913, "grad_norm": 0.42023277282714844, "learning_rate": 4.696666666666667e-05, "loss": 0.0344, "step": 1410 }, { "epoch": 1.276978417266187, "grad_norm": 0.5196614265441895, "learning_rate": 4.73e-05, "loss": 0.0269, "step": 1420 }, { "epoch": 1.2859712230215827, "grad_norm": 0.5012532472610474, "learning_rate": 4.763333333333334e-05, "loss": 0.0296, "step": 1430 }, { "epoch": 1.2949640287769784, "grad_norm": 0.4495078921318054, "learning_rate": 4.796666666666667e-05, "loss": 0.0197, "step": 1440 }, { "epoch": 1.3039568345323742, "grad_norm": 0.33206602931022644, "learning_rate": 4.83e-05, "loss": 0.0295, "step": 1450 }, { "epoch": 1.3129496402877698, "grad_norm": 0.3178330659866333, "learning_rate": 4.8633333333333334e-05, "loss": 0.0218, "step": 1460 }, { "epoch": 1.3219424460431655, "grad_norm": 0.5245758891105652, "learning_rate": 4.8966666666666667e-05, "loss": 0.0178, "step": 1470 }, { "epoch": 1.330935251798561, "grad_norm": 0.42255526781082153, "learning_rate": 4.93e-05, "loss": 0.0234, "step": 1480 }, { "epoch": 1.3399280575539567, "grad_norm": 0.46306881308555603, "learning_rate": 4.963333333333334e-05, "loss": 0.0223, "step": 1490 }, { "epoch": 1.3489208633093526, "grad_norm": 0.46141937375068665, "learning_rate": 4.996666666666667e-05, "loss": 0.0263, "step": 1500 }, { "epoch": 1.3579136690647482, "grad_norm": 0.24307894706726074, "learning_rate": 5.03e-05, "loss": 0.017, "step": 1510 }, { "epoch": 1.3669064748201438, "grad_norm": 0.39953431487083435, "learning_rate": 5.0633333333333335e-05, "loss": 0.0213, "step": 1520 }, { "epoch": 1.3758992805755397, "grad_norm": 0.38586172461509705, "learning_rate": 5.0966666666666674e-05, "loss": 0.0204, "step": 1530 }, { "epoch": 1.3848920863309353, "grad_norm": 0.3821638822555542, "learning_rate": 5.130000000000001e-05, "loss": 0.0187, "step": 1540 }, { "epoch": 1.393884892086331, "grad_norm": 0.39223670959472656, "learning_rate": 5.163333333333333e-05, "loss": 0.026, "step": 1550 }, { "epoch": 1.4028776978417266, "grad_norm": 0.34768274426460266, "learning_rate": 5.196666666666667e-05, "loss": 0.0167, "step": 1560 }, { "epoch": 1.4118705035971222, "grad_norm": 0.348898321390152, "learning_rate": 5.2300000000000004e-05, "loss": 0.0209, "step": 1570 }, { "epoch": 1.420863309352518, "grad_norm": 0.3536530137062073, "learning_rate": 5.2633333333333336e-05, "loss": 0.0192, "step": 1580 }, { "epoch": 1.4298561151079137, "grad_norm": 0.3424930274486542, "learning_rate": 5.296666666666666e-05, "loss": 0.0219, "step": 1590 }, { "epoch": 1.4388489208633093, "grad_norm": 0.46597951650619507, "learning_rate": 5.330000000000001e-05, "loss": 0.0222, "step": 1600 }, { "epoch": 1.4478417266187051, "grad_norm": 0.27408140897750854, "learning_rate": 5.3633333333333334e-05, "loss": 0.0232, "step": 1610 }, { "epoch": 1.4568345323741008, "grad_norm": 0.46624279022216797, "learning_rate": 5.3966666666666666e-05, "loss": 0.0219, "step": 1620 }, { "epoch": 1.4658273381294964, "grad_norm": 0.3813169300556183, "learning_rate": 5.4300000000000005e-05, "loss": 0.0249, "step": 1630 }, { "epoch": 1.474820143884892, "grad_norm": 0.5121552348136902, "learning_rate": 5.463333333333334e-05, "loss": 0.0221, "step": 1640 }, { "epoch": 1.4838129496402876, "grad_norm": 0.29733914136886597, "learning_rate": 5.496666666666666e-05, "loss": 0.0194, "step": 1650 }, { "epoch": 1.4928057553956835, "grad_norm": 0.3276270925998688, "learning_rate": 5.530000000000001e-05, "loss": 0.0273, "step": 1660 }, { "epoch": 1.5017985611510791, "grad_norm": 0.32856905460357666, "learning_rate": 5.5633333333333335e-05, "loss": 0.0196, "step": 1670 }, { "epoch": 1.5107913669064748, "grad_norm": 0.34008392691612244, "learning_rate": 5.596666666666667e-05, "loss": 0.0204, "step": 1680 }, { "epoch": 1.5197841726618706, "grad_norm": 0.3537006676197052, "learning_rate": 5.63e-05, "loss": 0.0282, "step": 1690 }, { "epoch": 1.5287769784172662, "grad_norm": 0.3886796832084656, "learning_rate": 5.663333333333334e-05, "loss": 0.0228, "step": 1700 }, { "epoch": 1.5377697841726619, "grad_norm": 0.4352680742740631, "learning_rate": 5.696666666666667e-05, "loss": 0.0198, "step": 1710 }, { "epoch": 1.5467625899280577, "grad_norm": 0.4284306466579437, "learning_rate": 5.73e-05, "loss": 0.0237, "step": 1720 }, { "epoch": 1.5557553956834531, "grad_norm": 0.3888341784477234, "learning_rate": 5.7633333333333336e-05, "loss": 0.0185, "step": 1730 }, { "epoch": 1.564748201438849, "grad_norm": 0.4072231352329254, "learning_rate": 5.796666666666667e-05, "loss": 0.0246, "step": 1740 }, { "epoch": 1.5737410071942446, "grad_norm": 0.42305272817611694, "learning_rate": 5.83e-05, "loss": 0.0202, "step": 1750 }, { "epoch": 1.5827338129496402, "grad_norm": 0.33731165528297424, "learning_rate": 5.863333333333334e-05, "loss": 0.0201, "step": 1760 }, { "epoch": 1.591726618705036, "grad_norm": 0.4102635681629181, "learning_rate": 5.896666666666667e-05, "loss": 0.0213, "step": 1770 }, { "epoch": 1.6007194244604317, "grad_norm": 0.28512465953826904, "learning_rate": 5.93e-05, "loss": 0.0176, "step": 1780 }, { "epoch": 1.6097122302158273, "grad_norm": 0.3070954382419586, "learning_rate": 5.9633333333333344e-05, "loss": 0.0243, "step": 1790 }, { "epoch": 1.6187050359712232, "grad_norm": 0.3530332148075104, "learning_rate": 5.996666666666667e-05, "loss": 0.0255, "step": 1800 }, { "epoch": 1.6276978417266186, "grad_norm": 0.2905206084251404, "learning_rate": 6.03e-05, "loss": 0.0171, "step": 1810 }, { "epoch": 1.6366906474820144, "grad_norm": 0.34203624725341797, "learning_rate": 6.063333333333333e-05, "loss": 0.0144, "step": 1820 }, { "epoch": 1.64568345323741, "grad_norm": 0.5639443397521973, "learning_rate": 6.0966666666666674e-05, "loss": 0.0198, "step": 1830 }, { "epoch": 1.6546762589928057, "grad_norm": 0.3760521113872528, "learning_rate": 6.13e-05, "loss": 0.0227, "step": 1840 }, { "epoch": 1.6636690647482015, "grad_norm": 0.30499765276908875, "learning_rate": 6.163333333333333e-05, "loss": 0.0162, "step": 1850 }, { "epoch": 1.6726618705035972, "grad_norm": 0.269974946975708, "learning_rate": 6.196666666666668e-05, "loss": 0.0201, "step": 1860 }, { "epoch": 1.6816546762589928, "grad_norm": 0.3031741678714752, "learning_rate": 6.23e-05, "loss": 0.0158, "step": 1870 }, { "epoch": 1.6906474820143886, "grad_norm": 0.30377939343452454, "learning_rate": 6.263333333333333e-05, "loss": 0.0199, "step": 1880 }, { "epoch": 1.699640287769784, "grad_norm": 0.3553202450275421, "learning_rate": 6.296666666666667e-05, "loss": 0.021, "step": 1890 }, { "epoch": 1.70863309352518, "grad_norm": 0.28272563219070435, "learning_rate": 6.330000000000001e-05, "loss": 0.0269, "step": 1900 }, { "epoch": 1.7176258992805755, "grad_norm": 0.529338538646698, "learning_rate": 6.363333333333334e-05, "loss": 0.0197, "step": 1910 }, { "epoch": 1.7266187050359711, "grad_norm": 0.37757742404937744, "learning_rate": 6.396666666666667e-05, "loss": 0.0162, "step": 1920 }, { "epoch": 1.735611510791367, "grad_norm": 0.526282548904419, "learning_rate": 6.43e-05, "loss": 0.0242, "step": 1930 }, { "epoch": 1.7446043165467626, "grad_norm": 0.3405401408672333, "learning_rate": 6.463333333333334e-05, "loss": 0.0161, "step": 1940 }, { "epoch": 1.7535971223021583, "grad_norm": 0.2887765169143677, "learning_rate": 6.496666666666667e-05, "loss": 0.0218, "step": 1950 }, { "epoch": 1.762589928057554, "grad_norm": 0.43740367889404297, "learning_rate": 6.53e-05, "loss": 0.0223, "step": 1960 }, { "epoch": 1.7715827338129495, "grad_norm": 0.43102577328681946, "learning_rate": 6.563333333333333e-05, "loss": 0.02, "step": 1970 }, { "epoch": 1.7805755395683454, "grad_norm": 0.39357802271842957, "learning_rate": 6.596666666666667e-05, "loss": 0.0236, "step": 1980 }, { "epoch": 1.789568345323741, "grad_norm": 0.3803326487541199, "learning_rate": 6.630000000000001e-05, "loss": 0.0178, "step": 1990 }, { "epoch": 1.7985611510791366, "grad_norm": 0.4465852975845337, "learning_rate": 6.663333333333333e-05, "loss": 0.0223, "step": 2000 }, { "epoch": 1.8075539568345325, "grad_norm": 0.32891613245010376, "learning_rate": 6.696666666666666e-05, "loss": 0.0166, "step": 2010 }, { "epoch": 1.816546762589928, "grad_norm": 0.3415553867816925, "learning_rate": 6.730000000000001e-05, "loss": 0.0177, "step": 2020 }, { "epoch": 1.8255395683453237, "grad_norm": 0.38854581117630005, "learning_rate": 6.763333333333334e-05, "loss": 0.0204, "step": 2030 }, { "epoch": 1.8345323741007196, "grad_norm": 0.3907355070114136, "learning_rate": 6.796666666666666e-05, "loss": 0.0181, "step": 2040 }, { "epoch": 1.843525179856115, "grad_norm": 0.4264868497848511, "learning_rate": 6.83e-05, "loss": 0.0203, "step": 2050 }, { "epoch": 1.8525179856115108, "grad_norm": 0.5366071462631226, "learning_rate": 6.863333333333334e-05, "loss": 0.0231, "step": 2060 }, { "epoch": 1.8615107913669064, "grad_norm": 0.344389945268631, "learning_rate": 6.896666666666667e-05, "loss": 0.0193, "step": 2070 }, { "epoch": 1.870503597122302, "grad_norm": 0.5388837456703186, "learning_rate": 6.93e-05, "loss": 0.0227, "step": 2080 }, { "epoch": 1.879496402877698, "grad_norm": 0.5092641115188599, "learning_rate": 6.963333333333334e-05, "loss": 0.0199, "step": 2090 }, { "epoch": 1.8884892086330936, "grad_norm": 0.3231827914714813, "learning_rate": 6.996666666666667e-05, "loss": 0.0193, "step": 2100 }, { "epoch": 1.8974820143884892, "grad_norm": 0.2860768437385559, "learning_rate": 7.03e-05, "loss": 0.0221, "step": 2110 }, { "epoch": 1.906474820143885, "grad_norm": 0.43557053804397583, "learning_rate": 7.063333333333333e-05, "loss": 0.0292, "step": 2120 }, { "epoch": 1.9154676258992804, "grad_norm": 0.3950214982032776, "learning_rate": 7.096666666666667e-05, "loss": 0.0268, "step": 2130 }, { "epoch": 1.9244604316546763, "grad_norm": 0.39169207215309143, "learning_rate": 7.13e-05, "loss": 0.0207, "step": 2140 }, { "epoch": 1.933453237410072, "grad_norm": 0.24328993260860443, "learning_rate": 7.163333333333334e-05, "loss": 0.0172, "step": 2150 }, { "epoch": 1.9424460431654675, "grad_norm": 0.378422349691391, "learning_rate": 7.196666666666668e-05, "loss": 0.0261, "step": 2160 }, { "epoch": 1.9514388489208634, "grad_norm": 0.2604777216911316, "learning_rate": 7.23e-05, "loss": 0.0153, "step": 2170 }, { "epoch": 1.960431654676259, "grad_norm": 0.1881679892539978, "learning_rate": 7.263333333333334e-05, "loss": 0.0134, "step": 2180 }, { "epoch": 1.9694244604316546, "grad_norm": 0.35805949568748474, "learning_rate": 7.296666666666667e-05, "loss": 0.0186, "step": 2190 }, { "epoch": 1.9784172661870505, "grad_norm": 0.3760420083999634, "learning_rate": 7.33e-05, "loss": 0.0223, "step": 2200 }, { "epoch": 1.987410071942446, "grad_norm": 0.3277226686477661, "learning_rate": 7.363333333333334e-05, "loss": 0.0269, "step": 2210 }, { "epoch": 1.9964028776978417, "grad_norm": 0.2980591356754303, "learning_rate": 7.396666666666667e-05, "loss": 0.0194, "step": 2220 }, { "epoch": 2.0053956834532376, "grad_norm": 0.3668059706687927, "learning_rate": 7.43e-05, "loss": 0.0228, "step": 2230 }, { "epoch": 2.014388489208633, "grad_norm": 0.25553348660469055, "learning_rate": 7.463333333333334e-05, "loss": 0.0198, "step": 2240 }, { "epoch": 2.023381294964029, "grad_norm": 0.4961934983730316, "learning_rate": 7.496666666666667e-05, "loss": 0.0232, "step": 2250 }, { "epoch": 2.0323741007194243, "grad_norm": 0.47289183735847473, "learning_rate": 7.53e-05, "loss": 0.033, "step": 2260 }, { "epoch": 2.04136690647482, "grad_norm": 0.48783043026924133, "learning_rate": 7.563333333333333e-05, "loss": 0.027, "step": 2270 }, { "epoch": 2.050359712230216, "grad_norm": 0.5283932685852051, "learning_rate": 7.596666666666668e-05, "loss": 0.0236, "step": 2280 }, { "epoch": 2.0593525179856114, "grad_norm": 0.46441230177879333, "learning_rate": 7.630000000000001e-05, "loss": 0.0271, "step": 2290 }, { "epoch": 2.068345323741007, "grad_norm": 0.4333610236644745, "learning_rate": 7.663333333333333e-05, "loss": 0.0259, "step": 2300 }, { "epoch": 2.077338129496403, "grad_norm": 0.42037081718444824, "learning_rate": 7.696666666666668e-05, "loss": 0.0193, "step": 2310 }, { "epoch": 2.0863309352517985, "grad_norm": 0.43660780787467957, "learning_rate": 7.730000000000001e-05, "loss": 0.0247, "step": 2320 }, { "epoch": 2.0953237410071943, "grad_norm": 0.28176936507225037, "learning_rate": 7.763333333333334e-05, "loss": 0.0222, "step": 2330 }, { "epoch": 2.1043165467625897, "grad_norm": 0.45725134015083313, "learning_rate": 7.796666666666666e-05, "loss": 0.0221, "step": 2340 }, { "epoch": 2.1133093525179856, "grad_norm": 0.4066891074180603, "learning_rate": 7.83e-05, "loss": 0.0232, "step": 2350 }, { "epoch": 2.1223021582733814, "grad_norm": 0.33351564407348633, "learning_rate": 7.863333333333334e-05, "loss": 0.0224, "step": 2360 }, { "epoch": 2.131294964028777, "grad_norm": 0.4768458604812622, "learning_rate": 7.896666666666667e-05, "loss": 0.0257, "step": 2370 }, { "epoch": 2.1402877697841727, "grad_norm": 0.32877546548843384, "learning_rate": 7.93e-05, "loss": 0.022, "step": 2380 }, { "epoch": 2.1492805755395685, "grad_norm": 0.44724464416503906, "learning_rate": 7.963333333333334e-05, "loss": 0.0304, "step": 2390 }, { "epoch": 2.158273381294964, "grad_norm": 0.28080689907073975, "learning_rate": 7.996666666666667e-05, "loss": 0.018, "step": 2400 }, { "epoch": 2.16726618705036, "grad_norm": 0.5758959650993347, "learning_rate": 8.030000000000001e-05, "loss": 0.0228, "step": 2410 }, { "epoch": 2.176258992805755, "grad_norm": 0.271942675113678, "learning_rate": 8.063333333333333e-05, "loss": 0.0226, "step": 2420 }, { "epoch": 2.185251798561151, "grad_norm": 0.39172282814979553, "learning_rate": 8.096666666666667e-05, "loss": 0.0173, "step": 2430 }, { "epoch": 2.194244604316547, "grad_norm": 0.4311153292655945, "learning_rate": 8.13e-05, "loss": 0.0185, "step": 2440 }, { "epoch": 2.2032374100719423, "grad_norm": 0.29360508918762207, "learning_rate": 8.163333333333334e-05, "loss": 0.0239, "step": 2450 }, { "epoch": 2.212230215827338, "grad_norm": 0.2700243592262268, "learning_rate": 8.196666666666668e-05, "loss": 0.0199, "step": 2460 }, { "epoch": 2.221223021582734, "grad_norm": 0.48092883825302124, "learning_rate": 8.23e-05, "loss": 0.0173, "step": 2470 }, { "epoch": 2.2302158273381294, "grad_norm": 0.2972237467765808, "learning_rate": 8.263333333333334e-05, "loss": 0.0201, "step": 2480 }, { "epoch": 2.2392086330935252, "grad_norm": 0.47241175174713135, "learning_rate": 8.296666666666667e-05, "loss": 0.0244, "step": 2490 }, { "epoch": 2.2482014388489207, "grad_norm": 0.39814358949661255, "learning_rate": 8.33e-05, "loss": 0.0259, "step": 2500 }, { "epoch": 2.2571942446043165, "grad_norm": 0.5467379093170166, "learning_rate": 8.363333333333334e-05, "loss": 0.0226, "step": 2510 }, { "epoch": 2.2661870503597124, "grad_norm": 0.3841770589351654, "learning_rate": 8.396666666666667e-05, "loss": 0.0169, "step": 2520 }, { "epoch": 2.2751798561151078, "grad_norm": 0.31804803013801575, "learning_rate": 8.43e-05, "loss": 0.0174, "step": 2530 }, { "epoch": 2.2841726618705036, "grad_norm": 0.25967270135879517, "learning_rate": 8.463333333333335e-05, "loss": 0.0252, "step": 2540 }, { "epoch": 2.2931654676258995, "grad_norm": 0.3577210009098053, "learning_rate": 8.496666666666667e-05, "loss": 0.0158, "step": 2550 }, { "epoch": 2.302158273381295, "grad_norm": 0.38980934023857117, "learning_rate": 8.53e-05, "loss": 0.0229, "step": 2560 }, { "epoch": 2.3111510791366907, "grad_norm": 0.35681411623954773, "learning_rate": 8.563333333333333e-05, "loss": 0.0218, "step": 2570 }, { "epoch": 2.3201438848920866, "grad_norm": 0.3214077353477478, "learning_rate": 8.596666666666668e-05, "loss": 0.0231, "step": 2580 }, { "epoch": 2.329136690647482, "grad_norm": 0.3683968484401703, "learning_rate": 8.63e-05, "loss": 0.0206, "step": 2590 }, { "epoch": 2.338129496402878, "grad_norm": 0.36662641167640686, "learning_rate": 8.663333333333333e-05, "loss": 0.0228, "step": 2600 }, { "epoch": 2.347122302158273, "grad_norm": 0.2891636788845062, "learning_rate": 8.696666666666668e-05, "loss": 0.0208, "step": 2610 }, { "epoch": 2.356115107913669, "grad_norm": 0.4314608573913574, "learning_rate": 8.730000000000001e-05, "loss": 0.0197, "step": 2620 }, { "epoch": 2.365107913669065, "grad_norm": 0.381955087184906, "learning_rate": 8.763333333333334e-05, "loss": 0.0219, "step": 2630 }, { "epoch": 2.3741007194244603, "grad_norm": 0.31789839267730713, "learning_rate": 8.796666666666667e-05, "loss": 0.0207, "step": 2640 }, { "epoch": 2.383093525179856, "grad_norm": 0.3555791676044464, "learning_rate": 8.83e-05, "loss": 0.0192, "step": 2650 }, { "epoch": 2.3920863309352516, "grad_norm": 0.28416430950164795, "learning_rate": 8.863333333333334e-05, "loss": 0.0189, "step": 2660 }, { "epoch": 2.4010791366906474, "grad_norm": 0.40298691391944885, "learning_rate": 8.896666666666667e-05, "loss": 0.0259, "step": 2670 }, { "epoch": 2.4100719424460433, "grad_norm": 0.36258330941200256, "learning_rate": 8.93e-05, "loss": 0.0181, "step": 2680 }, { "epoch": 2.4190647482014387, "grad_norm": 0.2978512644767761, "learning_rate": 8.963333333333333e-05, "loss": 0.0163, "step": 2690 }, { "epoch": 2.4280575539568345, "grad_norm": 0.2544187009334564, "learning_rate": 8.996666666666667e-05, "loss": 0.0175, "step": 2700 }, { "epoch": 2.4370503597122304, "grad_norm": 0.273519366979599, "learning_rate": 9.030000000000001e-05, "loss": 0.016, "step": 2710 }, { "epoch": 2.446043165467626, "grad_norm": 0.3343697488307953, "learning_rate": 9.063333333333333e-05, "loss": 0.0209, "step": 2720 }, { "epoch": 2.4550359712230216, "grad_norm": 0.26836642622947693, "learning_rate": 9.096666666666666e-05, "loss": 0.0137, "step": 2730 }, { "epoch": 2.4640287769784175, "grad_norm": 0.35493478178977966, "learning_rate": 9.130000000000001e-05, "loss": 0.0144, "step": 2740 }, { "epoch": 2.473021582733813, "grad_norm": 0.4677761197090149, "learning_rate": 9.163333333333334e-05, "loss": 0.0199, "step": 2750 }, { "epoch": 2.4820143884892087, "grad_norm": 0.4599294662475586, "learning_rate": 9.196666666666666e-05, "loss": 0.0193, "step": 2760 }, { "epoch": 2.491007194244604, "grad_norm": 0.38621512055397034, "learning_rate": 9.230000000000001e-05, "loss": 0.0171, "step": 2770 }, { "epoch": 2.5, "grad_norm": 0.4680689573287964, "learning_rate": 9.263333333333334e-05, "loss": 0.0189, "step": 2780 }, { "epoch": 2.508992805755396, "grad_norm": 0.3352571129798889, "learning_rate": 9.296666666666667e-05, "loss": 0.0237, "step": 2790 }, { "epoch": 2.5179856115107913, "grad_norm": 0.2650073766708374, "learning_rate": 9.33e-05, "loss": 0.0202, "step": 2800 }, { "epoch": 2.526978417266187, "grad_norm": 0.4697372317314148, "learning_rate": 9.363333333333334e-05, "loss": 0.0219, "step": 2810 }, { "epoch": 2.5359712230215825, "grad_norm": 0.5188857913017273, "learning_rate": 9.396666666666667e-05, "loss": 0.0187, "step": 2820 }, { "epoch": 2.5449640287769784, "grad_norm": 0.6226754784584045, "learning_rate": 9.43e-05, "loss": 0.029, "step": 2830 }, { "epoch": 2.553956834532374, "grad_norm": 0.3353796601295471, "learning_rate": 9.463333333333333e-05, "loss": 0.0211, "step": 2840 }, { "epoch": 2.56294964028777, "grad_norm": 0.34156882762908936, "learning_rate": 9.496666666666667e-05, "loss": 0.018, "step": 2850 }, { "epoch": 2.5719424460431655, "grad_norm": 0.36174312233924866, "learning_rate": 9.53e-05, "loss": 0.0204, "step": 2860 }, { "epoch": 2.5809352517985613, "grad_norm": 0.46889635920524597, "learning_rate": 9.563333333333334e-05, "loss": 0.0198, "step": 2870 }, { "epoch": 2.5899280575539567, "grad_norm": 0.3557160198688507, "learning_rate": 9.596666666666668e-05, "loss": 0.0171, "step": 2880 }, { "epoch": 2.5989208633093526, "grad_norm": 0.44503292441368103, "learning_rate": 9.63e-05, "loss": 0.0214, "step": 2890 }, { "epoch": 2.6079136690647484, "grad_norm": 0.4316111207008362, "learning_rate": 9.663333333333334e-05, "loss": 0.0211, "step": 2900 }, { "epoch": 2.616906474820144, "grad_norm": 0.356754332780838, "learning_rate": 9.696666666666667e-05, "loss": 0.0212, "step": 2910 }, { "epoch": 2.6258992805755397, "grad_norm": 0.3582902252674103, "learning_rate": 9.730000000000001e-05, "loss": 0.0177, "step": 2920 }, { "epoch": 2.634892086330935, "grad_norm": 0.2960597574710846, "learning_rate": 9.763333333333334e-05, "loss": 0.0174, "step": 2930 }, { "epoch": 2.643884892086331, "grad_norm": 0.32016077637672424, "learning_rate": 9.796666666666667e-05, "loss": 0.0213, "step": 2940 }, { "epoch": 2.652877697841727, "grad_norm": 0.3167326748371124, "learning_rate": 9.83e-05, "loss": 0.0302, "step": 2950 }, { "epoch": 2.661870503597122, "grad_norm": 0.4223567545413971, "learning_rate": 9.863333333333334e-05, "loss": 0.0228, "step": 2960 }, { "epoch": 2.670863309352518, "grad_norm": 0.43147212266921997, "learning_rate": 9.896666666666667e-05, "loss": 0.0207, "step": 2970 }, { "epoch": 2.6798561151079134, "grad_norm": 0.35272181034088135, "learning_rate": 9.93e-05, "loss": 0.0186, "step": 2980 }, { "epoch": 2.6888489208633093, "grad_norm": 0.47136250138282776, "learning_rate": 9.963333333333333e-05, "loss": 0.0213, "step": 2990 }, { "epoch": 2.697841726618705, "grad_norm": 0.35698091983795166, "learning_rate": 9.996666666666668e-05, "loss": 0.0291, "step": 3000 }, { "epoch": 2.706834532374101, "grad_norm": 0.2785111665725708, "learning_rate": 9.999999384858465e-05, "loss": 0.0215, "step": 3010 }, { "epoch": 2.7158273381294964, "grad_norm": 0.41589364409446716, "learning_rate": 9.999997258443473e-05, "loss": 0.0221, "step": 3020 }, { "epoch": 2.7248201438848922, "grad_norm": 0.3461451530456543, "learning_rate": 9.999993613161331e-05, "loss": 0.0198, "step": 3030 }, { "epoch": 2.7338129496402876, "grad_norm": 0.39814507961273193, "learning_rate": 9.999988449013146e-05, "loss": 0.0216, "step": 3040 }, { "epoch": 2.7428057553956835, "grad_norm": 0.5686817765235901, "learning_rate": 9.99998176600049e-05, "loss": 0.0231, "step": 3050 }, { "epoch": 2.7517985611510793, "grad_norm": 0.40655601024627686, "learning_rate": 9.999973564125389e-05, "loss": 0.018, "step": 3060 }, { "epoch": 2.7607913669064748, "grad_norm": 0.3933459222316742, "learning_rate": 9.999963843390335e-05, "loss": 0.0164, "step": 3070 }, { "epoch": 2.7697841726618706, "grad_norm": 0.36559486389160156, "learning_rate": 9.999952603798282e-05, "loss": 0.0297, "step": 3080 }, { "epoch": 2.778776978417266, "grad_norm": 0.4838719666004181, "learning_rate": 9.999939845352646e-05, "loss": 0.0214, "step": 3090 }, { "epoch": 2.787769784172662, "grad_norm": 0.3307550251483917, "learning_rate": 9.999925568057298e-05, "loss": 0.022, "step": 3100 }, { "epoch": 2.7967625899280577, "grad_norm": 0.3807447552680969, "learning_rate": 9.999909771916578e-05, "loss": 0.02, "step": 3110 }, { "epoch": 2.805755395683453, "grad_norm": 0.3807653784751892, "learning_rate": 9.999892456935285e-05, "loss": 0.0208, "step": 3120 }, { "epoch": 2.814748201438849, "grad_norm": 0.49647942185401917, "learning_rate": 9.999873623118679e-05, "loss": 0.0206, "step": 3130 }, { "epoch": 2.8237410071942444, "grad_norm": 0.4655051529407501, "learning_rate": 9.999853270472479e-05, "loss": 0.0199, "step": 3140 }, { "epoch": 2.83273381294964, "grad_norm": 0.39537522196769714, "learning_rate": 9.999831399002871e-05, "loss": 0.0222, "step": 3150 }, { "epoch": 2.841726618705036, "grad_norm": 0.5029172301292419, "learning_rate": 9.999808008716494e-05, "loss": 0.0185, "step": 3160 }, { "epoch": 2.850719424460432, "grad_norm": 0.37101319432258606, "learning_rate": 9.999783099620459e-05, "loss": 0.022, "step": 3170 }, { "epoch": 2.8597122302158273, "grad_norm": 0.3154796361923218, "learning_rate": 9.999756671722328e-05, "loss": 0.0192, "step": 3180 }, { "epoch": 2.868705035971223, "grad_norm": 0.36665502190589905, "learning_rate": 9.99972872503013e-05, "loss": 0.0173, "step": 3190 }, { "epoch": 2.8776978417266186, "grad_norm": 0.3801834285259247, "learning_rate": 9.999699259552359e-05, "loss": 0.0224, "step": 3200 }, { "epoch": 2.8866906474820144, "grad_norm": 0.35519999265670776, "learning_rate": 9.99966827529796e-05, "loss": 0.0169, "step": 3210 }, { "epoch": 2.8956834532374103, "grad_norm": 0.4756228029727936, "learning_rate": 9.999635772276348e-05, "loss": 0.019, "step": 3220 }, { "epoch": 2.9046762589928057, "grad_norm": 0.31355050206184387, "learning_rate": 9.999601750497396e-05, "loss": 0.017, "step": 3230 }, { "epoch": 2.9136690647482015, "grad_norm": 0.4299967885017395, "learning_rate": 9.99956620997144e-05, "loss": 0.0165, "step": 3240 }, { "epoch": 2.922661870503597, "grad_norm": 0.4934384822845459, "learning_rate": 9.999529150709275e-05, "loss": 0.0225, "step": 3250 }, { "epoch": 2.931654676258993, "grad_norm": 0.24226538836956024, "learning_rate": 9.999490572722158e-05, "loss": 0.0159, "step": 3260 }, { "epoch": 2.9406474820143886, "grad_norm": 0.22291293740272522, "learning_rate": 9.99945047602181e-05, "loss": 0.0205, "step": 3270 }, { "epoch": 2.949640287769784, "grad_norm": 0.5234814882278442, "learning_rate": 9.99940886062041e-05, "loss": 0.0176, "step": 3280 }, { "epoch": 2.95863309352518, "grad_norm": 0.3106469511985779, "learning_rate": 9.999365726530599e-05, "loss": 0.0178, "step": 3290 }, { "epoch": 2.9676258992805753, "grad_norm": 0.1436019092798233, "learning_rate": 9.999321073765481e-05, "loss": 0.022, "step": 3300 }, { "epoch": 2.976618705035971, "grad_norm": 0.3135868310928345, "learning_rate": 9.99927490233862e-05, "loss": 0.0157, "step": 3310 }, { "epoch": 2.985611510791367, "grad_norm": 0.260868638753891, "learning_rate": 9.999227212264043e-05, "loss": 0.0163, "step": 3320 }, { "epoch": 2.994604316546763, "grad_norm": 0.3309231996536255, "learning_rate": 9.999178003556236e-05, "loss": 0.021, "step": 3330 }, { "epoch": 3.0035971223021583, "grad_norm": 0.3278617262840271, "learning_rate": 9.999127276230146e-05, "loss": 0.019, "step": 3340 }, { "epoch": 3.012589928057554, "grad_norm": 0.4124695360660553, "learning_rate": 9.999075030301184e-05, "loss": 0.0211, "step": 3350 }, { "epoch": 3.0215827338129495, "grad_norm": 0.3957424461841583, "learning_rate": 9.999021265785221e-05, "loss": 0.0215, "step": 3360 }, { "epoch": 3.0305755395683454, "grad_norm": 0.2771298885345459, "learning_rate": 9.998965982698589e-05, "loss": 0.0229, "step": 3370 }, { "epoch": 3.039568345323741, "grad_norm": 0.26919078826904297, "learning_rate": 9.998909181058082e-05, "loss": 0.017, "step": 3380 }, { "epoch": 3.0485611510791366, "grad_norm": 0.3499986529350281, "learning_rate": 9.998850860880953e-05, "loss": 0.019, "step": 3390 }, { "epoch": 3.0575539568345325, "grad_norm": 0.30513662099838257, "learning_rate": 9.998791022184922e-05, "loss": 0.0176, "step": 3400 }, { "epoch": 3.066546762589928, "grad_norm": 0.384071409702301, "learning_rate": 9.99872966498816e-05, "loss": 0.0164, "step": 3410 }, { "epoch": 3.0755395683453237, "grad_norm": 0.2672078311443329, "learning_rate": 9.998666789309313e-05, "loss": 0.0194, "step": 3420 }, { "epoch": 3.0845323741007196, "grad_norm": 0.3458968997001648, "learning_rate": 9.998602395167475e-05, "loss": 0.0162, "step": 3430 }, { "epoch": 3.093525179856115, "grad_norm": 0.3369044065475464, "learning_rate": 9.998536482582213e-05, "loss": 0.0228, "step": 3440 }, { "epoch": 3.102517985611511, "grad_norm": 0.2862730026245117, "learning_rate": 9.998469051573544e-05, "loss": 0.0204, "step": 3450 }, { "epoch": 3.1115107913669067, "grad_norm": 0.35235124826431274, "learning_rate": 9.998400102161954e-05, "loss": 0.0204, "step": 3460 }, { "epoch": 3.120503597122302, "grad_norm": 0.324781209230423, "learning_rate": 9.998329634368388e-05, "loss": 0.0183, "step": 3470 }, { "epoch": 3.129496402877698, "grad_norm": 0.4037126898765564, "learning_rate": 9.998257648214253e-05, "loss": 0.0163, "step": 3480 }, { "epoch": 3.1384892086330933, "grad_norm": 0.36713504791259766, "learning_rate": 9.998184143721417e-05, "loss": 0.0218, "step": 3490 }, { "epoch": 3.147482014388489, "grad_norm": 0.3211263418197632, "learning_rate": 9.998109120912206e-05, "loss": 0.0162, "step": 3500 }, { "epoch": 3.156474820143885, "grad_norm": 0.3011319041252136, "learning_rate": 9.998032579809411e-05, "loss": 0.0187, "step": 3510 }, { "epoch": 3.1654676258992804, "grad_norm": 0.3489548861980438, "learning_rate": 9.997954520436286e-05, "loss": 0.0202, "step": 3520 }, { "epoch": 3.1744604316546763, "grad_norm": 0.32620060443878174, "learning_rate": 9.997874942816538e-05, "loss": 0.0161, "step": 3530 }, { "epoch": 3.183453237410072, "grad_norm": 0.37210237979888916, "learning_rate": 9.997793846974345e-05, "loss": 0.0266, "step": 3540 }, { "epoch": 3.1924460431654675, "grad_norm": 0.47682061791419983, "learning_rate": 9.997711232934341e-05, "loss": 0.0192, "step": 3550 }, { "epoch": 3.2014388489208634, "grad_norm": 0.30422520637512207, "learning_rate": 9.99762710072162e-05, "loss": 0.0202, "step": 3560 }, { "epoch": 3.210431654676259, "grad_norm": 0.2919990122318268, "learning_rate": 9.997541450361743e-05, "loss": 0.0256, "step": 3570 }, { "epoch": 3.2194244604316546, "grad_norm": 0.3007936179637909, "learning_rate": 9.997454281880723e-05, "loss": 0.0219, "step": 3580 }, { "epoch": 3.2284172661870505, "grad_norm": 0.5060188174247742, "learning_rate": 9.997365595305044e-05, "loss": 0.0177, "step": 3590 }, { "epoch": 3.237410071942446, "grad_norm": 0.42694324254989624, "learning_rate": 9.997275390661644e-05, "loss": 0.0181, "step": 3600 }, { "epoch": 3.2464028776978417, "grad_norm": 0.48606452345848083, "learning_rate": 9.997183667977926e-05, "loss": 0.0204, "step": 3610 }, { "epoch": 3.2553956834532376, "grad_norm": 0.4058678448200226, "learning_rate": 9.997090427281752e-05, "loss": 0.0167, "step": 3620 }, { "epoch": 3.264388489208633, "grad_norm": 0.29243308305740356, "learning_rate": 9.996995668601448e-05, "loss": 0.0183, "step": 3630 }, { "epoch": 3.273381294964029, "grad_norm": 0.33267757296562195, "learning_rate": 9.996899391965798e-05, "loss": 0.0138, "step": 3640 }, { "epoch": 3.2823741007194247, "grad_norm": 0.27027976512908936, "learning_rate": 9.996801597404048e-05, "loss": 0.0191, "step": 3650 }, { "epoch": 3.29136690647482, "grad_norm": 0.24774979054927826, "learning_rate": 9.996702284945905e-05, "loss": 0.0164, "step": 3660 }, { "epoch": 3.300359712230216, "grad_norm": 0.3380594849586487, "learning_rate": 9.996601454621539e-05, "loss": 0.0178, "step": 3670 }, { "epoch": 3.3093525179856114, "grad_norm": 0.22845935821533203, "learning_rate": 9.996499106461577e-05, "loss": 0.0169, "step": 3680 }, { "epoch": 3.318345323741007, "grad_norm": 0.26495763659477234, "learning_rate": 9.996395240497112e-05, "loss": 0.0192, "step": 3690 }, { "epoch": 3.327338129496403, "grad_norm": 0.34895059466362, "learning_rate": 9.996289856759696e-05, "loss": 0.0185, "step": 3700 }, { "epoch": 3.3363309352517985, "grad_norm": 0.39759933948516846, "learning_rate": 9.996182955281342e-05, "loss": 0.0191, "step": 3710 }, { "epoch": 3.3453237410071943, "grad_norm": 0.3396856188774109, "learning_rate": 9.996074536094519e-05, "loss": 0.0199, "step": 3720 }, { "epoch": 3.3543165467625897, "grad_norm": 0.3922501802444458, "learning_rate": 9.995964599232168e-05, "loss": 0.017, "step": 3730 }, { "epoch": 3.3633093525179856, "grad_norm": 0.4059715270996094, "learning_rate": 9.995853144727683e-05, "loss": 0.0205, "step": 3740 }, { "epoch": 3.3723021582733814, "grad_norm": 0.27029040455818176, "learning_rate": 9.99574017261492e-05, "loss": 0.0172, "step": 3750 }, { "epoch": 3.381294964028777, "grad_norm": 0.3742430806159973, "learning_rate": 9.995625682928198e-05, "loss": 0.0164, "step": 3760 }, { "epoch": 3.3902877697841727, "grad_norm": 0.2320055216550827, "learning_rate": 9.995509675702295e-05, "loss": 0.0145, "step": 3770 }, { "epoch": 3.3992805755395685, "grad_norm": 0.33614635467529297, "learning_rate": 9.995392150972451e-05, "loss": 0.0183, "step": 3780 }, { "epoch": 3.408273381294964, "grad_norm": 0.24800442159175873, "learning_rate": 9.995273108774366e-05, "loss": 0.022, "step": 3790 }, { "epoch": 3.41726618705036, "grad_norm": 0.2873798906803131, "learning_rate": 9.995152549144205e-05, "loss": 0.0216, "step": 3800 }, { "epoch": 3.4262589928057556, "grad_norm": 0.23299288749694824, "learning_rate": 9.995030472118587e-05, "loss": 0.0201, "step": 3810 }, { "epoch": 3.435251798561151, "grad_norm": 0.3916494846343994, "learning_rate": 9.9949068777346e-05, "loss": 0.0206, "step": 3820 }, { "epoch": 3.444244604316547, "grad_norm": 0.3938503861427307, "learning_rate": 9.994781766029786e-05, "loss": 0.0182, "step": 3830 }, { "epoch": 3.4532374100719423, "grad_norm": 0.30063286423683167, "learning_rate": 9.994655137042151e-05, "loss": 0.021, "step": 3840 }, { "epoch": 3.462230215827338, "grad_norm": 0.2900620698928833, "learning_rate": 9.99452699081016e-05, "loss": 0.0172, "step": 3850 }, { "epoch": 3.471223021582734, "grad_norm": 0.34114325046539307, "learning_rate": 9.994397327372743e-05, "loss": 0.022, "step": 3860 }, { "epoch": 3.4802158273381294, "grad_norm": 0.3807213604450226, "learning_rate": 9.994266146769286e-05, "loss": 0.0235, "step": 3870 }, { "epoch": 3.4892086330935252, "grad_norm": 0.3539610207080841, "learning_rate": 9.994133449039642e-05, "loss": 0.019, "step": 3880 }, { "epoch": 3.4982014388489207, "grad_norm": 0.341228187084198, "learning_rate": 9.993999234224118e-05, "loss": 0.0216, "step": 3890 }, { "epoch": 3.5071942446043165, "grad_norm": 0.34853991866111755, "learning_rate": 9.993863502363485e-05, "loss": 0.0176, "step": 3900 }, { "epoch": 3.5161870503597124, "grad_norm": 0.3259381055831909, "learning_rate": 9.993726253498976e-05, "loss": 0.0138, "step": 3910 }, { "epoch": 3.5251798561151078, "grad_norm": 0.3829975724220276, "learning_rate": 9.993587487672282e-05, "loss": 0.0152, "step": 3920 }, { "epoch": 3.5341726618705036, "grad_norm": 0.39613744616508484, "learning_rate": 9.993447204925558e-05, "loss": 0.0185, "step": 3930 }, { "epoch": 3.543165467625899, "grad_norm": 0.31347522139549255, "learning_rate": 9.993305405301416e-05, "loss": 0.0225, "step": 3940 }, { "epoch": 3.552158273381295, "grad_norm": 0.3787813186645508, "learning_rate": 9.993162088842935e-05, "loss": 0.0206, "step": 3950 }, { "epoch": 3.5611510791366907, "grad_norm": 0.43783602118492126, "learning_rate": 9.993017255593646e-05, "loss": 0.025, "step": 3960 }, { "epoch": 3.5701438848920866, "grad_norm": 0.38172537088394165, "learning_rate": 9.992870905597548e-05, "loss": 0.0196, "step": 3970 }, { "epoch": 3.579136690647482, "grad_norm": 0.28733494877815247, "learning_rate": 9.9927230388991e-05, "loss": 0.0169, "step": 3980 }, { "epoch": 3.588129496402878, "grad_norm": 0.30294549465179443, "learning_rate": 9.992573655543215e-05, "loss": 0.0172, "step": 3990 }, { "epoch": 3.597122302158273, "grad_norm": 0.3061724305152893, "learning_rate": 9.992422755575277e-05, "loss": 0.0148, "step": 4000 }, { "epoch": 3.606115107913669, "grad_norm": 0.2938479781150818, "learning_rate": 9.992270339041123e-05, "loss": 0.0151, "step": 4010 }, { "epoch": 3.615107913669065, "grad_norm": 0.2901189923286438, "learning_rate": 9.992116405987053e-05, "loss": 0.0166, "step": 4020 }, { "epoch": 3.6241007194244603, "grad_norm": 0.3204318881034851, "learning_rate": 9.991960956459828e-05, "loss": 0.0176, "step": 4030 }, { "epoch": 3.633093525179856, "grad_norm": 0.39673787355422974, "learning_rate": 9.991803990506669e-05, "loss": 0.0208, "step": 4040 }, { "epoch": 3.6420863309352516, "grad_norm": 0.406326562166214, "learning_rate": 9.991645508175258e-05, "loss": 0.0199, "step": 4050 }, { "epoch": 3.6510791366906474, "grad_norm": 0.37585464119911194, "learning_rate": 9.99148550951374e-05, "loss": 0.0211, "step": 4060 }, { "epoch": 3.6600719424460433, "grad_norm": 0.363315224647522, "learning_rate": 9.991323994570716e-05, "loss": 0.0264, "step": 4070 }, { "epoch": 3.6690647482014387, "grad_norm": 0.40321072936058044, "learning_rate": 9.99116096339525e-05, "loss": 0.0184, "step": 4080 }, { "epoch": 3.6780575539568345, "grad_norm": 0.39980682730674744, "learning_rate": 9.990996416036869e-05, "loss": 0.019, "step": 4090 }, { "epoch": 3.68705035971223, "grad_norm": 0.40789875388145447, "learning_rate": 9.990830352545555e-05, "loss": 0.0183, "step": 4100 }, { "epoch": 3.696043165467626, "grad_norm": 0.301623672246933, "learning_rate": 9.990662772971756e-05, "loss": 0.0158, "step": 4110 }, { "epoch": 3.7050359712230216, "grad_norm": 0.3479093015193939, "learning_rate": 9.990493677366376e-05, "loss": 0.0186, "step": 4120 }, { "epoch": 3.7140287769784175, "grad_norm": 0.30920377373695374, "learning_rate": 9.990323065780786e-05, "loss": 0.0152, "step": 4130 }, { "epoch": 3.723021582733813, "grad_norm": 0.3645305335521698, "learning_rate": 9.990150938266808e-05, "loss": 0.0204, "step": 4140 }, { "epoch": 3.7320143884892087, "grad_norm": 0.3943715989589691, "learning_rate": 9.989977294876733e-05, "loss": 0.0162, "step": 4150 }, { "epoch": 3.741007194244604, "grad_norm": 0.26295098662376404, "learning_rate": 9.989802135663308e-05, "loss": 0.0137, "step": 4160 }, { "epoch": 3.75, "grad_norm": 0.21098658442497253, "learning_rate": 9.989625460679743e-05, "loss": 0.0147, "step": 4170 }, { "epoch": 3.758992805755396, "grad_norm": 0.2970632016658783, "learning_rate": 9.989447269979706e-05, "loss": 0.0155, "step": 4180 }, { "epoch": 3.7679856115107913, "grad_norm": 0.3128618597984314, "learning_rate": 9.989267563617328e-05, "loss": 0.0158, "step": 4190 }, { "epoch": 3.776978417266187, "grad_norm": 0.22908584773540497, "learning_rate": 9.989086341647198e-05, "loss": 0.0118, "step": 4200 }, { "epoch": 3.7859712230215825, "grad_norm": 0.1597152203321457, "learning_rate": 9.988903604124366e-05, "loss": 0.0132, "step": 4210 }, { "epoch": 3.7949640287769784, "grad_norm": 0.38032087683677673, "learning_rate": 9.988719351104343e-05, "loss": 0.0157, "step": 4220 }, { "epoch": 3.803956834532374, "grad_norm": 0.2549528479576111, "learning_rate": 9.9885335826431e-05, "loss": 0.0143, "step": 4230 }, { "epoch": 3.81294964028777, "grad_norm": 0.3138325810432434, "learning_rate": 9.988346298797071e-05, "loss": 0.0137, "step": 4240 }, { "epoch": 3.8219424460431655, "grad_norm": 0.2840650975704193, "learning_rate": 9.988157499623146e-05, "loss": 0.0175, "step": 4250 }, { "epoch": 3.8309352517985613, "grad_norm": 0.23029272258281708, "learning_rate": 9.987967185178677e-05, "loss": 0.0137, "step": 4260 }, { "epoch": 3.8399280575539567, "grad_norm": 0.2623002529144287, "learning_rate": 9.987775355521476e-05, "loss": 0.0176, "step": 4270 }, { "epoch": 3.8489208633093526, "grad_norm": 0.3060980439186096, "learning_rate": 9.987582010709817e-05, "loss": 0.017, "step": 4280 }, { "epoch": 3.8579136690647484, "grad_norm": 0.3177967369556427, "learning_rate": 9.987387150802431e-05, "loss": 0.0258, "step": 4290 }, { "epoch": 3.866906474820144, "grad_norm": 0.22210489213466644, "learning_rate": 9.987190775858517e-05, "loss": 0.0121, "step": 4300 }, { "epoch": 3.8758992805755397, "grad_norm": 0.25360214710235596, "learning_rate": 9.98699288593772e-05, "loss": 0.0176, "step": 4310 }, { "epoch": 3.884892086330935, "grad_norm": 0.3745388984680176, "learning_rate": 9.986793481100161e-05, "loss": 0.0157, "step": 4320 }, { "epoch": 3.893884892086331, "grad_norm": 0.3574100434780121, "learning_rate": 9.986592561406412e-05, "loss": 0.0188, "step": 4330 }, { "epoch": 3.902877697841727, "grad_norm": 0.22912932932376862, "learning_rate": 9.986390126917503e-05, "loss": 0.0126, "step": 4340 }, { "epoch": 3.911870503597122, "grad_norm": 0.22940939664840698, "learning_rate": 9.986186177694933e-05, "loss": 0.0142, "step": 4350 }, { "epoch": 3.920863309352518, "grad_norm": 0.37141770124435425, "learning_rate": 9.985980713800656e-05, "loss": 0.0213, "step": 4360 }, { "epoch": 3.9298561151079134, "grad_norm": 0.3887629508972168, "learning_rate": 9.985773735297084e-05, "loss": 0.0137, "step": 4370 }, { "epoch": 3.9388489208633093, "grad_norm": 0.32221320271492004, "learning_rate": 9.985565242247092e-05, "loss": 0.0168, "step": 4380 }, { "epoch": 3.947841726618705, "grad_norm": 0.3141295909881592, "learning_rate": 9.985355234714016e-05, "loss": 0.0198, "step": 4390 }, { "epoch": 3.956834532374101, "grad_norm": 0.19729728996753693, "learning_rate": 9.985143712761652e-05, "loss": 0.0114, "step": 4400 }, { "epoch": 3.9658273381294964, "grad_norm": 0.2659664452075958, "learning_rate": 9.984930676454252e-05, "loss": 0.0138, "step": 4410 }, { "epoch": 3.9748201438848922, "grad_norm": 0.27771371603012085, "learning_rate": 9.984716125856532e-05, "loss": 0.0158, "step": 4420 }, { "epoch": 3.9838129496402876, "grad_norm": 0.3937804400920868, "learning_rate": 9.984500061033667e-05, "loss": 0.0118, "step": 4430 }, { "epoch": 3.9928057553956835, "grad_norm": 0.2498544305562973, "learning_rate": 9.984282482051293e-05, "loss": 0.0168, "step": 4440 }, { "epoch": 4.001798561151079, "grad_norm": 0.3050333559513092, "learning_rate": 9.9840633889755e-05, "loss": 0.0163, "step": 4450 }, { "epoch": 4.010791366906475, "grad_norm": 0.2411356121301651, "learning_rate": 9.983842781872848e-05, "loss": 0.0186, "step": 4460 }, { "epoch": 4.01978417266187, "grad_norm": 0.3227730095386505, "learning_rate": 9.98362066081035e-05, "loss": 0.0162, "step": 4470 }, { "epoch": 4.028776978417266, "grad_norm": 0.23187783360481262, "learning_rate": 9.983397025855479e-05, "loss": 0.0127, "step": 4480 }, { "epoch": 4.037769784172662, "grad_norm": 0.2692186236381531, "learning_rate": 9.983171877076171e-05, "loss": 0.0134, "step": 4490 }, { "epoch": 4.046762589928058, "grad_norm": 0.38561639189720154, "learning_rate": 9.98294521454082e-05, "loss": 0.0242, "step": 4500 }, { "epoch": 4.055755395683454, "grad_norm": 0.33778560161590576, "learning_rate": 9.98271703831828e-05, "loss": 0.0165, "step": 4510 }, { "epoch": 4.0647482014388485, "grad_norm": 0.33587396144866943, "learning_rate": 9.982487348477865e-05, "loss": 0.021, "step": 4520 }, { "epoch": 4.073741007194244, "grad_norm": 0.39896681904792786, "learning_rate": 9.982256145089347e-05, "loss": 0.0216, "step": 4530 }, { "epoch": 4.08273381294964, "grad_norm": 0.3793727457523346, "learning_rate": 9.982023428222962e-05, "loss": 0.0145, "step": 4540 }, { "epoch": 4.091726618705036, "grad_norm": 0.3179875314235687, "learning_rate": 9.981789197949403e-05, "loss": 0.0226, "step": 4550 }, { "epoch": 4.100719424460432, "grad_norm": 0.4664749503135681, "learning_rate": 9.98155345433982e-05, "loss": 0.0198, "step": 4560 }, { "epoch": 4.109712230215828, "grad_norm": 0.3499350845813751, "learning_rate": 9.981316197465831e-05, "loss": 0.0169, "step": 4570 }, { "epoch": 4.118705035971223, "grad_norm": 0.3417954742908478, "learning_rate": 9.981077427399504e-05, "loss": 0.0177, "step": 4580 }, { "epoch": 4.127697841726619, "grad_norm": 0.289497971534729, "learning_rate": 9.980837144213371e-05, "loss": 0.0153, "step": 4590 }, { "epoch": 4.136690647482014, "grad_norm": 0.22914066910743713, "learning_rate": 9.980595347980426e-05, "loss": 0.0153, "step": 4600 }, { "epoch": 4.14568345323741, "grad_norm": 0.3261573910713196, "learning_rate": 9.980352038774119e-05, "loss": 0.023, "step": 4610 }, { "epoch": 4.154676258992806, "grad_norm": 0.24407584965229034, "learning_rate": 9.98010721666836e-05, "loss": 0.0229, "step": 4620 }, { "epoch": 4.163669064748201, "grad_norm": 0.27324795722961426, "learning_rate": 9.979860881737523e-05, "loss": 0.0141, "step": 4630 }, { "epoch": 4.172661870503597, "grad_norm": 0.3559262752532959, "learning_rate": 9.979613034056434e-05, "loss": 0.0164, "step": 4640 }, { "epoch": 4.181654676258993, "grad_norm": 0.3542170524597168, "learning_rate": 9.979363673700386e-05, "loss": 0.0251, "step": 4650 }, { "epoch": 4.190647482014389, "grad_norm": 0.3783966898918152, "learning_rate": 9.979112800745124e-05, "loss": 0.0232, "step": 4660 }, { "epoch": 4.1996402877697845, "grad_norm": 0.2905197739601135, "learning_rate": 9.978860415266861e-05, "loss": 0.0183, "step": 4670 }, { "epoch": 4.2086330935251794, "grad_norm": 0.2849740982055664, "learning_rate": 9.978606517342262e-05, "loss": 0.0162, "step": 4680 }, { "epoch": 4.217625899280575, "grad_norm": 0.2994723916053772, "learning_rate": 9.978351107048456e-05, "loss": 0.0217, "step": 4690 }, { "epoch": 4.226618705035971, "grad_norm": 0.3446018397808075, "learning_rate": 9.978094184463029e-05, "loss": 0.02, "step": 4700 }, { "epoch": 4.235611510791367, "grad_norm": 0.24433118104934692, "learning_rate": 9.977835749664029e-05, "loss": 0.0189, "step": 4710 }, { "epoch": 4.244604316546763, "grad_norm": 0.37359103560447693, "learning_rate": 9.97757580272996e-05, "loss": 0.0159, "step": 4720 }, { "epoch": 4.253597122302159, "grad_norm": 0.32322385907173157, "learning_rate": 9.977314343739786e-05, "loss": 0.0125, "step": 4730 }, { "epoch": 4.262589928057554, "grad_norm": 0.30048033595085144, "learning_rate": 9.977051372772934e-05, "loss": 0.0193, "step": 4740 }, { "epoch": 4.2715827338129495, "grad_norm": 0.37636399269104004, "learning_rate": 9.976786889909286e-05, "loss": 0.013, "step": 4750 }, { "epoch": 4.280575539568345, "grad_norm": 0.3799171447753906, "learning_rate": 9.976520895229185e-05, "loss": 0.017, "step": 4760 }, { "epoch": 4.289568345323741, "grad_norm": 0.4257960915565491, "learning_rate": 9.976253388813433e-05, "loss": 0.0188, "step": 4770 }, { "epoch": 4.298561151079137, "grad_norm": 0.3441780209541321, "learning_rate": 9.975984370743293e-05, "loss": 0.016, "step": 4780 }, { "epoch": 4.307553956834532, "grad_norm": 0.3953697681427002, "learning_rate": 9.975713841100485e-05, "loss": 0.0198, "step": 4790 }, { "epoch": 4.316546762589928, "grad_norm": 0.37967726588249207, "learning_rate": 9.975441799967187e-05, "loss": 0.0165, "step": 4800 }, { "epoch": 4.325539568345324, "grad_norm": 0.3282223641872406, "learning_rate": 9.975168247426039e-05, "loss": 0.0179, "step": 4810 }, { "epoch": 4.33453237410072, "grad_norm": 0.3524059057235718, "learning_rate": 9.974893183560139e-05, "loss": 0.015, "step": 4820 }, { "epoch": 4.343525179856115, "grad_norm": 0.3272736966609955, "learning_rate": 9.974616608453045e-05, "loss": 0.0166, "step": 4830 }, { "epoch": 4.35251798561151, "grad_norm": 0.2845388948917389, "learning_rate": 9.974338522188772e-05, "loss": 0.0151, "step": 4840 }, { "epoch": 4.361510791366906, "grad_norm": 0.32479745149612427, "learning_rate": 9.974058924851797e-05, "loss": 0.0154, "step": 4850 }, { "epoch": 4.370503597122302, "grad_norm": 0.3984215259552002, "learning_rate": 9.973777816527051e-05, "loss": 0.0168, "step": 4860 }, { "epoch": 4.379496402877698, "grad_norm": 0.30480310320854187, "learning_rate": 9.973495197299931e-05, "loss": 0.0179, "step": 4870 }, { "epoch": 4.388489208633094, "grad_norm": 0.2825738489627838, "learning_rate": 9.973211067256287e-05, "loss": 0.0157, "step": 4880 }, { "epoch": 4.39748201438849, "grad_norm": 0.49365654587745667, "learning_rate": 9.97292542648243e-05, "loss": 0.0182, "step": 4890 }, { "epoch": 4.406474820143885, "grad_norm": 0.3240413963794708, "learning_rate": 9.972638275065131e-05, "loss": 0.0149, "step": 4900 }, { "epoch": 4.41546762589928, "grad_norm": 0.3255231976509094, "learning_rate": 9.972349613091621e-05, "loss": 0.0187, "step": 4910 }, { "epoch": 4.424460431654676, "grad_norm": 0.4267991781234741, "learning_rate": 9.972059440649584e-05, "loss": 0.0228, "step": 4920 }, { "epoch": 4.433453237410072, "grad_norm": 0.4832122027873993, "learning_rate": 9.971767757827168e-05, "loss": 0.0175, "step": 4930 }, { "epoch": 4.442446043165468, "grad_norm": 0.3565412759780884, "learning_rate": 9.971474564712982e-05, "loss": 0.0158, "step": 4940 }, { "epoch": 4.451438848920863, "grad_norm": 0.2599421739578247, "learning_rate": 9.971179861396084e-05, "loss": 0.0148, "step": 4950 }, { "epoch": 4.460431654676259, "grad_norm": 0.3436378240585327, "learning_rate": 9.970883647966003e-05, "loss": 0.0187, "step": 4960 }, { "epoch": 4.469424460431655, "grad_norm": 0.3332281708717346, "learning_rate": 9.970585924512717e-05, "loss": 0.0148, "step": 4970 }, { "epoch": 4.4784172661870505, "grad_norm": 0.27102231979370117, "learning_rate": 9.970286691126669e-05, "loss": 0.0124, "step": 4980 }, { "epoch": 4.487410071942446, "grad_norm": 0.2847643196582794, "learning_rate": 9.969985947898756e-05, "loss": 0.0155, "step": 4990 }, { "epoch": 4.496402877697841, "grad_norm": 0.248057022690773, "learning_rate": 9.969683694920337e-05, "loss": 0.0171, "step": 5000 }, { "epoch": 4.505395683453237, "grad_norm": 0.2434016764163971, "learning_rate": 9.969379932283228e-05, "loss": 0.0165, "step": 5010 }, { "epoch": 4.514388489208633, "grad_norm": 0.25021448731422424, "learning_rate": 9.969074660079704e-05, "loss": 0.0146, "step": 5020 }, { "epoch": 4.523381294964029, "grad_norm": 0.3211728036403656, "learning_rate": 9.968767878402501e-05, "loss": 0.0188, "step": 5030 }, { "epoch": 4.532374100719425, "grad_norm": 0.3881056308746338, "learning_rate": 9.968459587344808e-05, "loss": 0.0132, "step": 5040 }, { "epoch": 4.5413669064748206, "grad_norm": 0.26115313172340393, "learning_rate": 9.968149787000278e-05, "loss": 0.0151, "step": 5050 }, { "epoch": 4.5503597122302155, "grad_norm": 0.3807312846183777, "learning_rate": 9.967838477463018e-05, "loss": 0.0129, "step": 5060 }, { "epoch": 4.559352517985611, "grad_norm": 0.3003768026828766, "learning_rate": 9.967525658827597e-05, "loss": 0.0172, "step": 5070 }, { "epoch": 4.568345323741007, "grad_norm": 0.3619953393936157, "learning_rate": 9.967211331189042e-05, "loss": 0.0177, "step": 5080 }, { "epoch": 4.577338129496403, "grad_norm": 0.27501508593559265, "learning_rate": 9.966895494642834e-05, "loss": 0.0194, "step": 5090 }, { "epoch": 4.586330935251799, "grad_norm": 0.3910965025424957, "learning_rate": 9.96657814928492e-05, "loss": 0.0195, "step": 5100 }, { "epoch": 4.595323741007194, "grad_norm": 0.29914921522140503, "learning_rate": 9.966259295211697e-05, "loss": 0.0175, "step": 5110 }, { "epoch": 4.60431654676259, "grad_norm": 0.3021906614303589, "learning_rate": 9.965938932520028e-05, "loss": 0.0184, "step": 5120 }, { "epoch": 4.613309352517986, "grad_norm": 0.25278711318969727, "learning_rate": 9.965617061307229e-05, "loss": 0.0152, "step": 5130 }, { "epoch": 4.622302158273381, "grad_norm": 0.3401292860507965, "learning_rate": 9.965293681671077e-05, "loss": 0.0176, "step": 5140 }, { "epoch": 4.631294964028777, "grad_norm": 0.28930750489234924, "learning_rate": 9.964968793709804e-05, "loss": 0.0176, "step": 5150 }, { "epoch": 4.640287769784173, "grad_norm": 0.3500164747238159, "learning_rate": 9.964642397522106e-05, "loss": 0.0154, "step": 5160 }, { "epoch": 4.649280575539568, "grad_norm": 0.31021249294281006, "learning_rate": 9.96431449320713e-05, "loss": 0.0148, "step": 5170 }, { "epoch": 4.658273381294964, "grad_norm": 0.263254314661026, "learning_rate": 9.963985080864486e-05, "loss": 0.0182, "step": 5180 }, { "epoch": 4.66726618705036, "grad_norm": 0.29427140951156616, "learning_rate": 9.96365416059424e-05, "loss": 0.0155, "step": 5190 }, { "epoch": 4.676258992805756, "grad_norm": 0.28386831283569336, "learning_rate": 9.963321732496919e-05, "loss": 0.0158, "step": 5200 }, { "epoch": 4.685251798561151, "grad_norm": 0.203553706407547, "learning_rate": 9.962987796673506e-05, "loss": 0.0163, "step": 5210 }, { "epoch": 4.694244604316546, "grad_norm": 0.4189422130584717, "learning_rate": 9.962652353225438e-05, "loss": 0.0122, "step": 5220 }, { "epoch": 4.703237410071942, "grad_norm": 0.2627584636211395, "learning_rate": 9.962315402254619e-05, "loss": 0.0168, "step": 5230 }, { "epoch": 4.712230215827338, "grad_norm": 0.219941183924675, "learning_rate": 9.9619769438634e-05, "loss": 0.0141, "step": 5240 }, { "epoch": 4.721223021582734, "grad_norm": 0.2923353612422943, "learning_rate": 9.9616369781546e-05, "loss": 0.0131, "step": 5250 }, { "epoch": 4.73021582733813, "grad_norm": 0.31478628516197205, "learning_rate": 9.961295505231491e-05, "loss": 0.0174, "step": 5260 }, { "epoch": 4.739208633093525, "grad_norm": 0.2470293790102005, "learning_rate": 9.960952525197804e-05, "loss": 0.0157, "step": 5270 }, { "epoch": 4.748201438848921, "grad_norm": 0.32437750697135925, "learning_rate": 9.960608038157724e-05, "loss": 0.0114, "step": 5280 }, { "epoch": 4.7571942446043165, "grad_norm": 0.2513394057750702, "learning_rate": 9.960262044215901e-05, "loss": 0.0144, "step": 5290 }, { "epoch": 4.766187050359712, "grad_norm": 0.2917015552520752, "learning_rate": 9.959914543477435e-05, "loss": 0.0216, "step": 5300 }, { "epoch": 4.775179856115108, "grad_norm": 0.38225698471069336, "learning_rate": 9.959565536047892e-05, "loss": 0.0284, "step": 5310 }, { "epoch": 4.784172661870503, "grad_norm": 0.3212614059448242, "learning_rate": 9.959215022033288e-05, "loss": 0.016, "step": 5320 }, { "epoch": 4.793165467625899, "grad_norm": 0.4648503363132477, "learning_rate": 9.9588630015401e-05, "loss": 0.0189, "step": 5330 }, { "epoch": 4.802158273381295, "grad_norm": 0.2788044512271881, "learning_rate": 9.958509474675264e-05, "loss": 0.0152, "step": 5340 }, { "epoch": 4.811151079136691, "grad_norm": 0.3677975833415985, "learning_rate": 9.958154441546171e-05, "loss": 0.0268, "step": 5350 }, { "epoch": 4.820143884892087, "grad_norm": 0.3049086928367615, "learning_rate": 9.957797902260673e-05, "loss": 0.0152, "step": 5360 }, { "epoch": 4.829136690647482, "grad_norm": 0.2741518020629883, "learning_rate": 9.957439856927073e-05, "loss": 0.0113, "step": 5370 }, { "epoch": 4.838129496402877, "grad_norm": 0.26882389187812805, "learning_rate": 9.957080305654139e-05, "loss": 0.0196, "step": 5380 }, { "epoch": 4.847122302158273, "grad_norm": 0.3298853933811188, "learning_rate": 9.956719248551092e-05, "loss": 0.0184, "step": 5390 }, { "epoch": 4.856115107913669, "grad_norm": 0.3634148836135864, "learning_rate": 9.956356685727612e-05, "loss": 0.0155, "step": 5400 }, { "epoch": 4.865107913669065, "grad_norm": 0.26866599917411804, "learning_rate": 9.955992617293836e-05, "loss": 0.0162, "step": 5410 }, { "epoch": 4.874100719424461, "grad_norm": 0.213950514793396, "learning_rate": 9.955627043360358e-05, "loss": 0.0176, "step": 5420 }, { "epoch": 4.883093525179856, "grad_norm": 0.2337348908185959, "learning_rate": 9.955259964038231e-05, "loss": 0.0134, "step": 5430 }, { "epoch": 4.892086330935252, "grad_norm": 0.28681808710098267, "learning_rate": 9.954891379438962e-05, "loss": 0.0159, "step": 5440 }, { "epoch": 4.901079136690647, "grad_norm": 0.4261282980442047, "learning_rate": 9.954521289674519e-05, "loss": 0.0184, "step": 5450 }, { "epoch": 4.910071942446043, "grad_norm": 0.5873265862464905, "learning_rate": 9.954149694857325e-05, "loss": 0.0162, "step": 5460 }, { "epoch": 4.919064748201439, "grad_norm": 0.41706687211990356, "learning_rate": 9.953776595100258e-05, "loss": 0.0229, "step": 5470 }, { "epoch": 4.928057553956835, "grad_norm": 0.4314878582954407, "learning_rate": 9.95340199051666e-05, "loss": 0.0147, "step": 5480 }, { "epoch": 4.93705035971223, "grad_norm": 0.28310301899909973, "learning_rate": 9.953025881220325e-05, "loss": 0.0136, "step": 5490 }, { "epoch": 4.946043165467626, "grad_norm": 0.3950534760951996, "learning_rate": 9.952648267325504e-05, "loss": 0.013, "step": 5500 }, { "epoch": 4.955035971223022, "grad_norm": 0.3652457594871521, "learning_rate": 9.952269148946905e-05, "loss": 0.0168, "step": 5510 }, { "epoch": 4.9640287769784175, "grad_norm": 0.34009382128715515, "learning_rate": 9.951888526199697e-05, "loss": 0.0153, "step": 5520 }, { "epoch": 4.9730215827338125, "grad_norm": 0.29818886518478394, "learning_rate": 9.951506399199501e-05, "loss": 0.0156, "step": 5530 }, { "epoch": 4.982014388489208, "grad_norm": 0.33592575788497925, "learning_rate": 9.951122768062399e-05, "loss": 0.0138, "step": 5540 }, { "epoch": 4.991007194244604, "grad_norm": 0.3279249966144562, "learning_rate": 9.950737632904927e-05, "loss": 0.0162, "step": 5550 }, { "epoch": 5.0, "grad_norm": 0.27848613262176514, "learning_rate": 9.950350993844077e-05, "loss": 0.0149, "step": 5560 }, { "epoch": 5.008992805755396, "grad_norm": 0.17365288734436035, "learning_rate": 9.949962850997303e-05, "loss": 0.0156, "step": 5570 }, { "epoch": 5.017985611510792, "grad_norm": 0.41249361634254456, "learning_rate": 9.949573204482512e-05, "loss": 0.013, "step": 5580 }, { "epoch": 5.026978417266187, "grad_norm": 0.3923795223236084, "learning_rate": 9.949182054418064e-05, "loss": 0.0143, "step": 5590 }, { "epoch": 5.0359712230215825, "grad_norm": 0.28934210538864136, "learning_rate": 9.948789400922787e-05, "loss": 0.0173, "step": 5600 }, { "epoch": 5.044964028776978, "grad_norm": 0.46223384141921997, "learning_rate": 9.948395244115953e-05, "loss": 0.0173, "step": 5610 }, { "epoch": 5.053956834532374, "grad_norm": 0.299775093793869, "learning_rate": 9.9479995841173e-05, "loss": 0.0187, "step": 5620 }, { "epoch": 5.06294964028777, "grad_norm": 0.2949572205543518, "learning_rate": 9.947602421047017e-05, "loss": 0.0115, "step": 5630 }, { "epoch": 5.071942446043165, "grad_norm": 0.4921942949295044, "learning_rate": 9.947203755025753e-05, "loss": 0.0156, "step": 5640 }, { "epoch": 5.080935251798561, "grad_norm": 0.3525313436985016, "learning_rate": 9.946803586174611e-05, "loss": 0.013, "step": 5650 }, { "epoch": 5.089928057553957, "grad_norm": 0.26401862502098083, "learning_rate": 9.946401914615151e-05, "loss": 0.0152, "step": 5660 }, { "epoch": 5.098920863309353, "grad_norm": 0.26687443256378174, "learning_rate": 9.945998740469394e-05, "loss": 0.0113, "step": 5670 }, { "epoch": 5.107913669064748, "grad_norm": 0.27366024255752563, "learning_rate": 9.945594063859809e-05, "loss": 0.0181, "step": 5680 }, { "epoch": 5.116906474820144, "grad_norm": 0.3269280791282654, "learning_rate": 9.94518788490933e-05, "loss": 0.0116, "step": 5690 }, { "epoch": 5.125899280575539, "grad_norm": 0.3161935806274414, "learning_rate": 9.944780203741341e-05, "loss": 0.0194, "step": 5700 }, { "epoch": 5.134892086330935, "grad_norm": 0.276715487241745, "learning_rate": 9.944371020479686e-05, "loss": 0.0179, "step": 5710 }, { "epoch": 5.143884892086331, "grad_norm": 0.34146028757095337, "learning_rate": 9.943960335248662e-05, "loss": 0.014, "step": 5720 }, { "epoch": 5.152877697841727, "grad_norm": 0.27849307656288147, "learning_rate": 9.943548148173027e-05, "loss": 0.0188, "step": 5730 }, { "epoch": 5.161870503597123, "grad_norm": 0.3692938983440399, "learning_rate": 9.943134459377992e-05, "loss": 0.013, "step": 5740 }, { "epoch": 5.170863309352518, "grad_norm": 0.26695287227630615, "learning_rate": 9.942719268989222e-05, "loss": 0.0172, "step": 5750 }, { "epoch": 5.179856115107913, "grad_norm": 0.23300348222255707, "learning_rate": 9.942302577132844e-05, "loss": 0.0137, "step": 5760 }, { "epoch": 5.188848920863309, "grad_norm": 0.3198128938674927, "learning_rate": 9.941884383935438e-05, "loss": 0.0119, "step": 5770 }, { "epoch": 5.197841726618705, "grad_norm": 0.25590500235557556, "learning_rate": 9.941464689524039e-05, "loss": 0.0149, "step": 5780 }, { "epoch": 5.206834532374101, "grad_norm": 0.3708546757698059, "learning_rate": 9.941043494026139e-05, "loss": 0.017, "step": 5790 }, { "epoch": 5.215827338129497, "grad_norm": 0.37567588686943054, "learning_rate": 9.940620797569685e-05, "loss": 0.0129, "step": 5800 }, { "epoch": 5.224820143884892, "grad_norm": 0.36475512385368347, "learning_rate": 9.940196600283082e-05, "loss": 0.0217, "step": 5810 }, { "epoch": 5.233812949640288, "grad_norm": 0.2660433351993561, "learning_rate": 9.939770902295192e-05, "loss": 0.0202, "step": 5820 }, { "epoch": 5.2428057553956835, "grad_norm": 0.2255290448665619, "learning_rate": 9.939343703735329e-05, "loss": 0.0172, "step": 5830 }, { "epoch": 5.251798561151079, "grad_norm": 0.3985702395439148, "learning_rate": 9.938915004733264e-05, "loss": 0.0142, "step": 5840 }, { "epoch": 5.260791366906475, "grad_norm": 0.3580707311630249, "learning_rate": 9.938484805419224e-05, "loss": 0.0154, "step": 5850 }, { "epoch": 5.26978417266187, "grad_norm": 0.46078917384147644, "learning_rate": 9.938053105923894e-05, "loss": 0.0147, "step": 5860 }, { "epoch": 5.278776978417266, "grad_norm": 0.3177846074104309, "learning_rate": 9.937619906378413e-05, "loss": 0.0144, "step": 5870 }, { "epoch": 5.287769784172662, "grad_norm": 0.2357238531112671, "learning_rate": 9.937185206914374e-05, "loss": 0.0173, "step": 5880 }, { "epoch": 5.296762589928058, "grad_norm": 0.363540381193161, "learning_rate": 9.936749007663829e-05, "loss": 0.0135, "step": 5890 }, { "epoch": 5.305755395683454, "grad_norm": 0.3043900728225708, "learning_rate": 9.93631130875928e-05, "loss": 0.016, "step": 5900 }, { "epoch": 5.3147482014388485, "grad_norm": 0.3860911726951599, "learning_rate": 9.935872110333692e-05, "loss": 0.0207, "step": 5910 }, { "epoch": 5.323741007194244, "grad_norm": 0.3087952435016632, "learning_rate": 9.935431412520484e-05, "loss": 0.0177, "step": 5920 }, { "epoch": 5.33273381294964, "grad_norm": 0.2014888972043991, "learning_rate": 9.934989215453523e-05, "loss": 0.0168, "step": 5930 }, { "epoch": 5.341726618705036, "grad_norm": 0.27562060952186584, "learning_rate": 9.934545519267139e-05, "loss": 0.0193, "step": 5940 }, { "epoch": 5.350719424460432, "grad_norm": 0.2748214304447174, "learning_rate": 9.934100324096117e-05, "loss": 0.0131, "step": 5950 }, { "epoch": 5.359712230215827, "grad_norm": 0.2583296298980713, "learning_rate": 9.933653630075692e-05, "loss": 0.014, "step": 5960 }, { "epoch": 5.368705035971223, "grad_norm": 0.3385663628578186, "learning_rate": 9.93320543734156e-05, "loss": 0.0135, "step": 5970 }, { "epoch": 5.377697841726619, "grad_norm": 0.2826624810695648, "learning_rate": 9.932755746029871e-05, "loss": 0.0105, "step": 5980 }, { "epoch": 5.386690647482014, "grad_norm": 0.40540552139282227, "learning_rate": 9.932304556277228e-05, "loss": 0.0156, "step": 5990 }, { "epoch": 5.39568345323741, "grad_norm": 0.3079284131526947, "learning_rate": 9.93185186822069e-05, "loss": 0.0189, "step": 6000 }, { "epoch": 5.404676258992806, "grad_norm": 0.2572016417980194, "learning_rate": 9.931397681997773e-05, "loss": 0.0144, "step": 6010 }, { "epoch": 5.413669064748201, "grad_norm": 0.3550288677215576, "learning_rate": 9.930941997746446e-05, "loss": 0.0135, "step": 6020 }, { "epoch": 5.422661870503597, "grad_norm": 0.275757372379303, "learning_rate": 9.930484815605134e-05, "loss": 0.0149, "step": 6030 }, { "epoch": 5.431654676258993, "grad_norm": 0.32258763909339905, "learning_rate": 9.930026135712717e-05, "loss": 0.0197, "step": 6040 }, { "epoch": 5.440647482014389, "grad_norm": 0.37829795479774475, "learning_rate": 9.92956595820853e-05, "loss": 0.0159, "step": 6050 }, { "epoch": 5.4496402877697845, "grad_norm": 0.28570154309272766, "learning_rate": 9.929104283232362e-05, "loss": 0.0128, "step": 6060 }, { "epoch": 5.4586330935251794, "grad_norm": 0.22984115779399872, "learning_rate": 9.92864111092446e-05, "loss": 0.0143, "step": 6070 }, { "epoch": 5.467625899280575, "grad_norm": 0.33705592155456543, "learning_rate": 9.92817644142552e-05, "loss": 0.0122, "step": 6080 }, { "epoch": 5.476618705035971, "grad_norm": 0.3047640919685364, "learning_rate": 9.927710274876698e-05, "loss": 0.0175, "step": 6090 }, { "epoch": 5.485611510791367, "grad_norm": 0.3513724207878113, "learning_rate": 9.927242611419603e-05, "loss": 0.0177, "step": 6100 }, { "epoch": 5.494604316546763, "grad_norm": 0.21840344369411469, "learning_rate": 9.926773451196301e-05, "loss": 0.0191, "step": 6110 }, { "epoch": 5.503597122302159, "grad_norm": 0.26402801275253296, "learning_rate": 9.926302794349306e-05, "loss": 0.0127, "step": 6120 }, { "epoch": 5.512589928057554, "grad_norm": 0.35281679034233093, "learning_rate": 9.925830641021594e-05, "loss": 0.02, "step": 6130 }, { "epoch": 5.5215827338129495, "grad_norm": 0.35755160450935364, "learning_rate": 9.925356991356593e-05, "loss": 0.0148, "step": 6140 }, { "epoch": 5.530575539568345, "grad_norm": 0.3439820408821106, "learning_rate": 9.924881845498184e-05, "loss": 0.0145, "step": 6150 }, { "epoch": 5.539568345323741, "grad_norm": 0.28238144516944885, "learning_rate": 9.924405203590705e-05, "loss": 0.0152, "step": 6160 }, { "epoch": 5.548561151079137, "grad_norm": 0.4018684923648834, "learning_rate": 9.923927065778946e-05, "loss": 0.0156, "step": 6170 }, { "epoch": 5.557553956834532, "grad_norm": 0.2909099757671356, "learning_rate": 9.923447432208154e-05, "loss": 0.0158, "step": 6180 }, { "epoch": 5.566546762589928, "grad_norm": 0.3199147582054138, "learning_rate": 9.922966303024027e-05, "loss": 0.0136, "step": 6190 }, { "epoch": 5.575539568345324, "grad_norm": 0.32823070883750916, "learning_rate": 9.922483678372721e-05, "loss": 0.0184, "step": 6200 }, { "epoch": 5.58453237410072, "grad_norm": 0.3699824810028076, "learning_rate": 9.921999558400845e-05, "loss": 0.0158, "step": 6210 }, { "epoch": 5.593525179856115, "grad_norm": 0.38300320506095886, "learning_rate": 9.92151394325546e-05, "loss": 0.0206, "step": 6220 }, { "epoch": 5.602517985611511, "grad_norm": 0.44542986154556274, "learning_rate": 9.921026833084084e-05, "loss": 0.0165, "step": 6230 }, { "epoch": 5.611510791366906, "grad_norm": 0.2622534930706024, "learning_rate": 9.920538228034689e-05, "loss": 0.0133, "step": 6240 }, { "epoch": 5.620503597122302, "grad_norm": 0.26979103684425354, "learning_rate": 9.920048128255699e-05, "loss": 0.0155, "step": 6250 }, { "epoch": 5.629496402877698, "grad_norm": 0.3620791733264923, "learning_rate": 9.919556533895995e-05, "loss": 0.0184, "step": 6260 }, { "epoch": 5.638489208633094, "grad_norm": 0.2844395935535431, "learning_rate": 9.919063445104907e-05, "loss": 0.0178, "step": 6270 }, { "epoch": 5.647482014388489, "grad_norm": 0.28123122453689575, "learning_rate": 9.918568862032227e-05, "loss": 0.0164, "step": 6280 }, { "epoch": 5.656474820143885, "grad_norm": 0.2473420798778534, "learning_rate": 9.918072784828194e-05, "loss": 0.0151, "step": 6290 }, { "epoch": 5.66546762589928, "grad_norm": 0.34066158533096313, "learning_rate": 9.917575213643501e-05, "loss": 0.0181, "step": 6300 }, { "epoch": 5.674460431654676, "grad_norm": 0.26071932911872864, "learning_rate": 9.917076148629302e-05, "loss": 0.0133, "step": 6310 }, { "epoch": 5.683453237410072, "grad_norm": 0.3538845479488373, "learning_rate": 9.916575589937196e-05, "loss": 0.0157, "step": 6320 }, { "epoch": 5.692446043165468, "grad_norm": 0.36438947916030884, "learning_rate": 9.916073537719239e-05, "loss": 0.0164, "step": 6330 }, { "epoch": 5.701438848920863, "grad_norm": 0.19403941929340363, "learning_rate": 9.915569992127944e-05, "loss": 0.0147, "step": 6340 }, { "epoch": 5.710431654676259, "grad_norm": 0.207066610455513, "learning_rate": 9.915064953316273e-05, "loss": 0.0142, "step": 6350 }, { "epoch": 5.719424460431655, "grad_norm": 0.1934175342321396, "learning_rate": 9.914558421437645e-05, "loss": 0.013, "step": 6360 }, { "epoch": 5.7284172661870505, "grad_norm": 0.29466554522514343, "learning_rate": 9.914050396645929e-05, "loss": 0.0147, "step": 6370 }, { "epoch": 5.737410071942446, "grad_norm": 0.3039249777793884, "learning_rate": 9.913540879095452e-05, "loss": 0.0164, "step": 6380 }, { "epoch": 5.746402877697841, "grad_norm": 0.343955934047699, "learning_rate": 9.913029868940987e-05, "loss": 0.0189, "step": 6390 }, { "epoch": 5.755395683453237, "grad_norm": 0.26605644822120667, "learning_rate": 9.912517366337772e-05, "loss": 0.0165, "step": 6400 }, { "epoch": 5.764388489208633, "grad_norm": 0.28061211109161377, "learning_rate": 9.912003371441487e-05, "loss": 0.0176, "step": 6410 }, { "epoch": 5.773381294964029, "grad_norm": 0.29045650362968445, "learning_rate": 9.911487884408271e-05, "loss": 0.0146, "step": 6420 }, { "epoch": 5.782374100719425, "grad_norm": 0.2403353750705719, "learning_rate": 9.910970905394719e-05, "loss": 0.0142, "step": 6430 }, { "epoch": 5.7913669064748206, "grad_norm": 0.255099356174469, "learning_rate": 9.91045243455787e-05, "loss": 0.0111, "step": 6440 }, { "epoch": 5.8003597122302155, "grad_norm": 0.19524502754211426, "learning_rate": 9.909932472055225e-05, "loss": 0.0167, "step": 6450 }, { "epoch": 5.809352517985611, "grad_norm": 0.23526139557361603, "learning_rate": 9.909411018044734e-05, "loss": 0.0112, "step": 6460 }, { "epoch": 5.818345323741007, "grad_norm": 0.27532145380973816, "learning_rate": 9.908888072684802e-05, "loss": 0.0148, "step": 6470 }, { "epoch": 5.827338129496403, "grad_norm": 0.28828737139701843, "learning_rate": 9.908363636134285e-05, "loss": 0.0103, "step": 6480 }, { "epoch": 5.836330935251799, "grad_norm": 0.17287935316562653, "learning_rate": 9.907837708552493e-05, "loss": 0.0097, "step": 6490 }, { "epoch": 5.845323741007194, "grad_norm": 0.2100699245929718, "learning_rate": 9.90731029009919e-05, "loss": 0.0121, "step": 6500 }, { "epoch": 5.85431654676259, "grad_norm": 0.38841381669044495, "learning_rate": 9.906781380934589e-05, "loss": 0.0197, "step": 6510 }, { "epoch": 5.863309352517986, "grad_norm": 0.2968766391277313, "learning_rate": 9.906250981219362e-05, "loss": 0.0146, "step": 6520 }, { "epoch": 5.872302158273381, "grad_norm": 0.3161607086658478, "learning_rate": 9.905719091114628e-05, "loss": 0.0117, "step": 6530 }, { "epoch": 5.881294964028777, "grad_norm": 0.4405044615268707, "learning_rate": 9.905185710781964e-05, "loss": 0.0158, "step": 6540 }, { "epoch": 5.890287769784173, "grad_norm": 0.19869445264339447, "learning_rate": 9.904650840383392e-05, "loss": 0.0149, "step": 6550 }, { "epoch": 5.899280575539568, "grad_norm": 0.21362881362438202, "learning_rate": 9.904114480081397e-05, "loss": 0.0106, "step": 6560 }, { "epoch": 5.908273381294964, "grad_norm": 0.18850210309028625, "learning_rate": 9.903576630038906e-05, "loss": 0.0189, "step": 6570 }, { "epoch": 5.91726618705036, "grad_norm": 0.22497278451919556, "learning_rate": 9.903037290419309e-05, "loss": 0.0131, "step": 6580 }, { "epoch": 5.926258992805756, "grad_norm": 0.295487642288208, "learning_rate": 9.902496461386439e-05, "loss": 0.013, "step": 6590 }, { "epoch": 5.935251798561151, "grad_norm": 0.4569377899169922, "learning_rate": 9.901954143104588e-05, "loss": 0.0137, "step": 6600 }, { "epoch": 5.944244604316546, "grad_norm": 0.2921936810016632, "learning_rate": 9.901410335738496e-05, "loss": 0.0115, "step": 6610 }, { "epoch": 5.953237410071942, "grad_norm": 0.3509035110473633, "learning_rate": 9.900865039453358e-05, "loss": 0.0167, "step": 6620 }, { "epoch": 5.962230215827338, "grad_norm": 0.3515634536743164, "learning_rate": 9.900318254414821e-05, "loss": 0.0145, "step": 6630 }, { "epoch": 5.971223021582734, "grad_norm": 0.21421606838703156, "learning_rate": 9.899769980788985e-05, "loss": 0.0124, "step": 6640 }, { "epoch": 5.98021582733813, "grad_norm": 0.24855414032936096, "learning_rate": 9.899220218742398e-05, "loss": 0.0119, "step": 6650 }, { "epoch": 5.989208633093525, "grad_norm": 0.2953544557094574, "learning_rate": 9.898668968442066e-05, "loss": 0.0153, "step": 6660 }, { "epoch": 5.998201438848921, "grad_norm": 0.29817381501197815, "learning_rate": 9.898116230055443e-05, "loss": 0.0135, "step": 6670 }, { "epoch": 6.0071942446043165, "grad_norm": 0.25262001156806946, "learning_rate": 9.897562003750437e-05, "loss": 0.0119, "step": 6680 }, { "epoch": 6.016187050359712, "grad_norm": 0.21685372292995453, "learning_rate": 9.897006289695407e-05, "loss": 0.0166, "step": 6690 }, { "epoch": 6.025179856115108, "grad_norm": 0.3285217583179474, "learning_rate": 9.896449088059164e-05, "loss": 0.0194, "step": 6700 }, { "epoch": 6.034172661870503, "grad_norm": 0.3668815791606903, "learning_rate": 9.89589039901097e-05, "loss": 0.0165, "step": 6710 }, { "epoch": 6.043165467625899, "grad_norm": 0.41207870841026306, "learning_rate": 9.895330222720542e-05, "loss": 0.0146, "step": 6720 }, { "epoch": 6.052158273381295, "grad_norm": 0.256803423166275, "learning_rate": 9.894768559358047e-05, "loss": 0.0214, "step": 6730 }, { "epoch": 6.061151079136691, "grad_norm": 0.3041722774505615, "learning_rate": 9.894205409094101e-05, "loss": 0.0228, "step": 6740 }, { "epoch": 6.070143884892087, "grad_norm": 0.25463688373565674, "learning_rate": 9.893640772099777e-05, "loss": 0.0127, "step": 6750 }, { "epoch": 6.079136690647482, "grad_norm": 0.2159307897090912, "learning_rate": 9.893074648546595e-05, "loss": 0.0176, "step": 6760 }, { "epoch": 6.088129496402877, "grad_norm": 0.2970280051231384, "learning_rate": 9.892507038606528e-05, "loss": 0.017, "step": 6770 }, { "epoch": 6.097122302158273, "grad_norm": 0.23876969516277313, "learning_rate": 9.891937942452003e-05, "loss": 0.0161, "step": 6780 }, { "epoch": 6.106115107913669, "grad_norm": 0.326885461807251, "learning_rate": 9.891367360255895e-05, "loss": 0.0137, "step": 6790 }, { "epoch": 6.115107913669065, "grad_norm": 0.3087436258792877, "learning_rate": 9.890795292191532e-05, "loss": 0.022, "step": 6800 }, { "epoch": 6.124100719424461, "grad_norm": 0.39865806698799133, "learning_rate": 9.890221738432694e-05, "loss": 0.0175, "step": 6810 }, { "epoch": 6.133093525179856, "grad_norm": 0.3163766860961914, "learning_rate": 9.88964669915361e-05, "loss": 0.0139, "step": 6820 }, { "epoch": 6.142086330935252, "grad_norm": 0.3594772517681122, "learning_rate": 9.889070174528963e-05, "loss": 0.0181, "step": 6830 }, { "epoch": 6.151079136690647, "grad_norm": 0.23638330399990082, "learning_rate": 9.888492164733883e-05, "loss": 0.0143, "step": 6840 }, { "epoch": 6.160071942446043, "grad_norm": 0.30321386456489563, "learning_rate": 9.88791266994396e-05, "loss": 0.0156, "step": 6850 }, { "epoch": 6.169064748201439, "grad_norm": 0.2922966778278351, "learning_rate": 9.887331690335223e-05, "loss": 0.0135, "step": 6860 }, { "epoch": 6.178057553956835, "grad_norm": 0.23546849191188812, "learning_rate": 9.886749226084163e-05, "loss": 0.0141, "step": 6870 }, { "epoch": 6.18705035971223, "grad_norm": 0.3592751622200012, "learning_rate": 9.886165277367714e-05, "loss": 0.0224, "step": 6880 }, { "epoch": 6.196043165467626, "grad_norm": 0.2928001284599304, "learning_rate": 9.885579844363265e-05, "loss": 0.0123, "step": 6890 }, { "epoch": 6.205035971223022, "grad_norm": 0.3087821900844574, "learning_rate": 9.884992927248656e-05, "loss": 0.0144, "step": 6900 }, { "epoch": 6.2140287769784175, "grad_norm": 0.20122426748275757, "learning_rate": 9.884404526202178e-05, "loss": 0.0182, "step": 6910 }, { "epoch": 6.223021582733813, "grad_norm": 0.33201470971107483, "learning_rate": 9.883814641402568e-05, "loss": 0.0166, "step": 6920 }, { "epoch": 6.232014388489208, "grad_norm": 0.40648800134658813, "learning_rate": 9.88322327302902e-05, "loss": 0.0129, "step": 6930 }, { "epoch": 6.241007194244604, "grad_norm": 0.23864005506038666, "learning_rate": 9.882630421261176e-05, "loss": 0.0152, "step": 6940 }, { "epoch": 6.25, "grad_norm": 0.2321750372648239, "learning_rate": 9.88203608627913e-05, "loss": 0.0153, "step": 6950 }, { "epoch": 6.258992805755396, "grad_norm": 0.35090172290802, "learning_rate": 9.881440268263422e-05, "loss": 0.0167, "step": 6960 }, { "epoch": 6.267985611510792, "grad_norm": 0.33419692516326904, "learning_rate": 9.880842967395048e-05, "loss": 0.015, "step": 6970 }, { "epoch": 6.276978417266187, "grad_norm": 0.24577505886554718, "learning_rate": 9.880244183855452e-05, "loss": 0.0149, "step": 6980 }, { "epoch": 6.2859712230215825, "grad_norm": 0.3034267723560333, "learning_rate": 9.879643917826527e-05, "loss": 0.0243, "step": 6990 }, { "epoch": 6.294964028776978, "grad_norm": 0.28019770979881287, "learning_rate": 9.87904216949062e-05, "loss": 0.0157, "step": 7000 }, { "epoch": 6.303956834532374, "grad_norm": 0.4423854947090149, "learning_rate": 9.878438939030526e-05, "loss": 0.0177, "step": 7010 }, { "epoch": 6.31294964028777, "grad_norm": 0.2933022379875183, "learning_rate": 9.877834226629489e-05, "loss": 0.016, "step": 7020 }, { "epoch": 6.321942446043165, "grad_norm": 0.19073952734470367, "learning_rate": 9.877228032471206e-05, "loss": 0.0135, "step": 7030 }, { "epoch": 6.330935251798561, "grad_norm": 0.354657918214798, "learning_rate": 9.876620356739823e-05, "loss": 0.0159, "step": 7040 }, { "epoch": 6.339928057553957, "grad_norm": 0.24034789204597473, "learning_rate": 9.876011199619935e-05, "loss": 0.017, "step": 7050 }, { "epoch": 6.348920863309353, "grad_norm": 0.29431310296058655, "learning_rate": 9.875400561296589e-05, "loss": 0.0135, "step": 7060 }, { "epoch": 6.357913669064748, "grad_norm": 0.26254159212112427, "learning_rate": 9.874788441955278e-05, "loss": 0.017, "step": 7070 }, { "epoch": 6.366906474820144, "grad_norm": 0.30860403180122375, "learning_rate": 9.874174841781951e-05, "loss": 0.0171, "step": 7080 }, { "epoch": 6.375899280575539, "grad_norm": 0.2742941379547119, "learning_rate": 9.873559760963003e-05, "loss": 0.0158, "step": 7090 }, { "epoch": 6.384892086330935, "grad_norm": 0.23195849359035492, "learning_rate": 9.872943199685278e-05, "loss": 0.0151, "step": 7100 }, { "epoch": 6.393884892086331, "grad_norm": 0.28078845143318176, "learning_rate": 9.872325158136071e-05, "loss": 0.0158, "step": 7110 }, { "epoch": 6.402877697841727, "grad_norm": 0.3057679235935211, "learning_rate": 9.871705636503128e-05, "loss": 0.014, "step": 7120 }, { "epoch": 6.411870503597123, "grad_norm": 0.1650935411453247, "learning_rate": 9.871084634974641e-05, "loss": 0.0156, "step": 7130 }, { "epoch": 6.420863309352518, "grad_norm": 0.4699845314025879, "learning_rate": 9.870462153739257e-05, "loss": 0.0171, "step": 7140 }, { "epoch": 6.429856115107913, "grad_norm": 0.3665204346179962, "learning_rate": 9.869838192986067e-05, "loss": 0.016, "step": 7150 }, { "epoch": 6.438848920863309, "grad_norm": 0.2584769129753113, "learning_rate": 9.869212752904616e-05, "loss": 0.0201, "step": 7160 }, { "epoch": 6.447841726618705, "grad_norm": 0.29642537236213684, "learning_rate": 9.868585833684894e-05, "loss": 0.0154, "step": 7170 }, { "epoch": 6.456834532374101, "grad_norm": 0.26599782705307007, "learning_rate": 9.867957435517342e-05, "loss": 0.0132, "step": 7180 }, { "epoch": 6.465827338129497, "grad_norm": 0.3340703845024109, "learning_rate": 9.867327558592854e-05, "loss": 0.0139, "step": 7190 }, { "epoch": 6.474820143884892, "grad_norm": 0.34967750310897827, "learning_rate": 9.866696203102766e-05, "loss": 0.0191, "step": 7200 }, { "epoch": 6.483812949640288, "grad_norm": 0.34618183970451355, "learning_rate": 9.86606336923887e-05, "loss": 0.0117, "step": 7210 }, { "epoch": 6.4928057553956835, "grad_norm": 0.2635325491428375, "learning_rate": 9.865429057193403e-05, "loss": 0.0132, "step": 7220 }, { "epoch": 6.501798561151079, "grad_norm": 0.3070240616798401, "learning_rate": 9.864793267159053e-05, "loss": 0.0166, "step": 7230 }, { "epoch": 6.510791366906475, "grad_norm": 0.2510862648487091, "learning_rate": 9.864155999328957e-05, "loss": 0.0139, "step": 7240 }, { "epoch": 6.51978417266187, "grad_norm": 0.3472216725349426, "learning_rate": 9.8635172538967e-05, "loss": 0.017, "step": 7250 }, { "epoch": 6.528776978417266, "grad_norm": 0.24802415072917938, "learning_rate": 9.862877031056312e-05, "loss": 0.0142, "step": 7260 }, { "epoch": 6.537769784172662, "grad_norm": 0.25429603457450867, "learning_rate": 9.862235331002279e-05, "loss": 0.018, "step": 7270 }, { "epoch": 6.546762589928058, "grad_norm": 0.2617835998535156, "learning_rate": 9.861592153929533e-05, "loss": 0.0134, "step": 7280 }, { "epoch": 6.555755395683454, "grad_norm": 0.3904010057449341, "learning_rate": 9.860947500033455e-05, "loss": 0.0139, "step": 7290 }, { "epoch": 6.564748201438849, "grad_norm": 0.23301546275615692, "learning_rate": 9.86030136950987e-05, "loss": 0.0191, "step": 7300 }, { "epoch": 6.573741007194244, "grad_norm": 0.22388359904289246, "learning_rate": 9.85965376255506e-05, "loss": 0.0124, "step": 7310 }, { "epoch": 6.58273381294964, "grad_norm": 0.26173508167266846, "learning_rate": 9.859004679365747e-05, "loss": 0.013, "step": 7320 }, { "epoch": 6.591726618705036, "grad_norm": 0.24747982621192932, "learning_rate": 9.858354120139108e-05, "loss": 0.0137, "step": 7330 }, { "epoch": 6.600719424460432, "grad_norm": 0.29107666015625, "learning_rate": 9.857702085072764e-05, "loss": 0.0144, "step": 7340 }, { "epoch": 6.609712230215827, "grad_norm": 0.24783724546432495, "learning_rate": 9.857048574364787e-05, "loss": 0.0141, "step": 7350 }, { "epoch": 6.618705035971223, "grad_norm": 0.186409592628479, "learning_rate": 9.856393588213698e-05, "loss": 0.0172, "step": 7360 }, { "epoch": 6.627697841726619, "grad_norm": 0.30081653594970703, "learning_rate": 9.855737126818458e-05, "loss": 0.0177, "step": 7370 }, { "epoch": 6.636690647482014, "grad_norm": 0.2902117073535919, "learning_rate": 9.855079190378491e-05, "loss": 0.0153, "step": 7380 }, { "epoch": 6.64568345323741, "grad_norm": 0.2269926220178604, "learning_rate": 9.854419779093655e-05, "loss": 0.0143, "step": 7390 }, { "epoch": 6.654676258992806, "grad_norm": 0.3138973116874695, "learning_rate": 9.853758893164264e-05, "loss": 0.0117, "step": 7400 }, { "epoch": 6.663669064748201, "grad_norm": 0.17554324865341187, "learning_rate": 9.853096532791078e-05, "loss": 0.0132, "step": 7410 }, { "epoch": 6.672661870503597, "grad_norm": 0.2607845664024353, "learning_rate": 9.852432698175304e-05, "loss": 0.0201, "step": 7420 }, { "epoch": 6.681654676258993, "grad_norm": 0.25775662064552307, "learning_rate": 9.851767389518597e-05, "loss": 0.0172, "step": 7430 }, { "epoch": 6.690647482014389, "grad_norm": 0.2720528542995453, "learning_rate": 9.85110060702306e-05, "loss": 0.0138, "step": 7440 }, { "epoch": 6.6996402877697845, "grad_norm": 0.37452414631843567, "learning_rate": 9.850432350891245e-05, "loss": 0.0122, "step": 7450 }, { "epoch": 6.7086330935251794, "grad_norm": 0.21141643822193146, "learning_rate": 9.84976262132615e-05, "loss": 0.0125, "step": 7460 }, { "epoch": 6.717625899280575, "grad_norm": 0.24193207919597626, "learning_rate": 9.849091418531222e-05, "loss": 0.0151, "step": 7470 }, { "epoch": 6.726618705035971, "grad_norm": 0.2563992440700531, "learning_rate": 9.848418742710353e-05, "loss": 0.0189, "step": 7480 }, { "epoch": 6.735611510791367, "grad_norm": 0.2745803892612457, "learning_rate": 9.847744594067885e-05, "loss": 0.0144, "step": 7490 }, { "epoch": 6.744604316546763, "grad_norm": 0.532640278339386, "learning_rate": 9.847068972808607e-05, "loss": 0.017, "step": 7500 }, { "epoch": 6.753597122302159, "grad_norm": 0.36586374044418335, "learning_rate": 9.846391879137756e-05, "loss": 0.0137, "step": 7510 }, { "epoch": 6.762589928057554, "grad_norm": 0.33521145582199097, "learning_rate": 9.845713313261012e-05, "loss": 0.017, "step": 7520 }, { "epoch": 6.7715827338129495, "grad_norm": 0.3294682502746582, "learning_rate": 9.845033275384505e-05, "loss": 0.0215, "step": 7530 }, { "epoch": 6.780575539568345, "grad_norm": 0.23994086682796478, "learning_rate": 9.844351765714818e-05, "loss": 0.0148, "step": 7540 }, { "epoch": 6.789568345323741, "grad_norm": 0.3577191233634949, "learning_rate": 9.843668784458971e-05, "loss": 0.0169, "step": 7550 }, { "epoch": 6.798561151079137, "grad_norm": 0.33507096767425537, "learning_rate": 9.842984331824437e-05, "loss": 0.0124, "step": 7560 }, { "epoch": 6.807553956834532, "grad_norm": 0.22500911355018616, "learning_rate": 9.842298408019133e-05, "loss": 0.0095, "step": 7570 }, { "epoch": 6.816546762589928, "grad_norm": 0.31267327070236206, "learning_rate": 9.841611013251429e-05, "loss": 0.0148, "step": 7580 }, { "epoch": 6.825539568345324, "grad_norm": 0.19126851856708527, "learning_rate": 9.840922147730133e-05, "loss": 0.0149, "step": 7590 }, { "epoch": 6.83453237410072, "grad_norm": 0.33014538884162903, "learning_rate": 9.840231811664506e-05, "loss": 0.0139, "step": 7600 }, { "epoch": 6.843525179856115, "grad_norm": 0.2650357782840729, "learning_rate": 9.839540005264252e-05, "loss": 0.0133, "step": 7610 }, { "epoch": 6.852517985611511, "grad_norm": 0.27233779430389404, "learning_rate": 9.838846728739527e-05, "loss": 0.0112, "step": 7620 }, { "epoch": 6.861510791366906, "grad_norm": 0.36017823219299316, "learning_rate": 9.838151982300927e-05, "loss": 0.0221, "step": 7630 }, { "epoch": 6.870503597122302, "grad_norm": 0.38159939646720886, "learning_rate": 9.8374557661595e-05, "loss": 0.0129, "step": 7640 }, { "epoch": 6.879496402877698, "grad_norm": 0.25477907061576843, "learning_rate": 9.836758080526735e-05, "loss": 0.0185, "step": 7650 }, { "epoch": 6.888489208633094, "grad_norm": 0.22923333942890167, "learning_rate": 9.836058925614575e-05, "loss": 0.0157, "step": 7660 }, { "epoch": 6.897482014388489, "grad_norm": 0.14186909794807434, "learning_rate": 9.8353583016354e-05, "loss": 0.0129, "step": 7670 }, { "epoch": 6.906474820143885, "grad_norm": 0.2799581289291382, "learning_rate": 9.834656208802044e-05, "loss": 0.0165, "step": 7680 }, { "epoch": 6.91546762589928, "grad_norm": 0.25981229543685913, "learning_rate": 9.833952647327784e-05, "loss": 0.013, "step": 7690 }, { "epoch": 6.924460431654676, "grad_norm": 0.23918108642101288, "learning_rate": 9.833247617426342e-05, "loss": 0.013, "step": 7700 }, { "epoch": 6.933453237410072, "grad_norm": 0.22677376866340637, "learning_rate": 9.832541119311889e-05, "loss": 0.0137, "step": 7710 }, { "epoch": 6.942446043165468, "grad_norm": 0.37426236271858215, "learning_rate": 9.83183315319904e-05, "loss": 0.0198, "step": 7720 }, { "epoch": 6.951438848920863, "grad_norm": 0.4243506193161011, "learning_rate": 9.831123719302855e-05, "loss": 0.0182, "step": 7730 }, { "epoch": 6.960431654676259, "grad_norm": 0.35298433899879456, "learning_rate": 9.830412817838842e-05, "loss": 0.0156, "step": 7740 }, { "epoch": 6.969424460431655, "grad_norm": 0.3635365068912506, "learning_rate": 9.829700449022956e-05, "loss": 0.0171, "step": 7750 }, { "epoch": 6.9784172661870505, "grad_norm": 0.3278764486312866, "learning_rate": 9.828986613071593e-05, "loss": 0.0153, "step": 7760 }, { "epoch": 6.987410071942446, "grad_norm": 0.3794202506542206, "learning_rate": 9.828271310201601e-05, "loss": 0.0143, "step": 7770 }, { "epoch": 6.996402877697841, "grad_norm": 0.3186244070529938, "learning_rate": 9.827554540630268e-05, "loss": 0.014, "step": 7780 }, { "epoch": 7.005395683453237, "grad_norm": 0.3380180597305298, "learning_rate": 9.826836304575329e-05, "loss": 0.0186, "step": 7790 }, { "epoch": 7.014388489208633, "grad_norm": 0.2902567386627197, "learning_rate": 9.826116602254966e-05, "loss": 0.0124, "step": 7800 }, { "epoch": 7.023381294964029, "grad_norm": 0.3330790400505066, "learning_rate": 9.825395433887805e-05, "loss": 0.0149, "step": 7810 }, { "epoch": 7.032374100719425, "grad_norm": 0.2913691997528076, "learning_rate": 9.824672799692917e-05, "loss": 0.0134, "step": 7820 }, { "epoch": 7.0413669064748206, "grad_norm": 0.36073336005210876, "learning_rate": 9.823948699889823e-05, "loss": 0.0178, "step": 7830 }, { "epoch": 7.0503597122302155, "grad_norm": 0.2325957864522934, "learning_rate": 9.823223134698483e-05, "loss": 0.0139, "step": 7840 }, { "epoch": 7.059352517985611, "grad_norm": 0.24054881930351257, "learning_rate": 9.822496104339303e-05, "loss": 0.0144, "step": 7850 }, { "epoch": 7.068345323741007, "grad_norm": 0.3253759741783142, "learning_rate": 9.821767609033138e-05, "loss": 0.0145, "step": 7860 }, { "epoch": 7.077338129496403, "grad_norm": 0.25350624322891235, "learning_rate": 9.821037649001284e-05, "loss": 0.0147, "step": 7870 }, { "epoch": 7.086330935251799, "grad_norm": 0.2976308763027191, "learning_rate": 9.820306224465486e-05, "loss": 0.0144, "step": 7880 }, { "epoch": 7.095323741007194, "grad_norm": 0.23495031893253326, "learning_rate": 9.819573335647928e-05, "loss": 0.014, "step": 7890 }, { "epoch": 7.10431654676259, "grad_norm": 0.2940058708190918, "learning_rate": 9.818838982771246e-05, "loss": 0.0175, "step": 7900 }, { "epoch": 7.113309352517986, "grad_norm": 0.24272629618644714, "learning_rate": 9.818103166058514e-05, "loss": 0.0124, "step": 7910 }, { "epoch": 7.122302158273381, "grad_norm": 0.22094126045703888, "learning_rate": 9.817365885733254e-05, "loss": 0.0133, "step": 7920 }, { "epoch": 7.131294964028777, "grad_norm": 0.1992361843585968, "learning_rate": 9.816627142019434e-05, "loss": 0.0133, "step": 7930 }, { "epoch": 7.140287769784172, "grad_norm": 0.22654350101947784, "learning_rate": 9.815886935141463e-05, "loss": 0.0158, "step": 7940 }, { "epoch": 7.149280575539568, "grad_norm": 0.16148202121257782, "learning_rate": 9.8151452653242e-05, "loss": 0.0112, "step": 7950 }, { "epoch": 7.158273381294964, "grad_norm": 0.3512052297592163, "learning_rate": 9.814402132792939e-05, "loss": 0.0133, "step": 7960 }, { "epoch": 7.16726618705036, "grad_norm": 0.21312746405601501, "learning_rate": 9.813657537773428e-05, "loss": 0.0161, "step": 7970 }, { "epoch": 7.176258992805756, "grad_norm": 0.2715415954589844, "learning_rate": 9.812911480491854e-05, "loss": 0.0139, "step": 7980 }, { "epoch": 7.1852517985611515, "grad_norm": 0.2871270775794983, "learning_rate": 9.81216396117485e-05, "loss": 0.014, "step": 7990 }, { "epoch": 7.194244604316546, "grad_norm": 0.26196718215942383, "learning_rate": 9.811414980049491e-05, "loss": 0.017, "step": 8000 }, { "epoch": 7.203237410071942, "grad_norm": 0.21282245218753815, "learning_rate": 9.810664537343301e-05, "loss": 0.0138, "step": 8010 }, { "epoch": 7.212230215827338, "grad_norm": 0.23533280193805695, "learning_rate": 9.809912633284243e-05, "loss": 0.0137, "step": 8020 }, { "epoch": 7.221223021582734, "grad_norm": 0.21808907389640808, "learning_rate": 9.809159268100725e-05, "loss": 0.0121, "step": 8030 }, { "epoch": 7.23021582733813, "grad_norm": 0.27711760997772217, "learning_rate": 9.808404442021599e-05, "loss": 0.0145, "step": 8040 }, { "epoch": 7.239208633093525, "grad_norm": 0.23941557109355927, "learning_rate": 9.807648155276163e-05, "loss": 0.0149, "step": 8050 }, { "epoch": 7.248201438848921, "grad_norm": 0.3589705228805542, "learning_rate": 9.806890408094156e-05, "loss": 0.0154, "step": 8060 }, { "epoch": 7.2571942446043165, "grad_norm": 0.2471356838941574, "learning_rate": 9.806131200705761e-05, "loss": 0.0141, "step": 8070 }, { "epoch": 7.266187050359712, "grad_norm": 0.2641923427581787, "learning_rate": 9.805370533341605e-05, "loss": 0.0168, "step": 8080 }, { "epoch": 7.275179856115108, "grad_norm": 0.20023564994335175, "learning_rate": 9.804608406232762e-05, "loss": 0.0133, "step": 8090 }, { "epoch": 7.284172661870503, "grad_norm": 0.25396618247032166, "learning_rate": 9.803844819610741e-05, "loss": 0.014, "step": 8100 }, { "epoch": 7.293165467625899, "grad_norm": 0.25542977452278137, "learning_rate": 9.803079773707504e-05, "loss": 0.0152, "step": 8110 }, { "epoch": 7.302158273381295, "grad_norm": 0.35425037145614624, "learning_rate": 9.802313268755447e-05, "loss": 0.0182, "step": 8120 }, { "epoch": 7.311151079136691, "grad_norm": 0.34697073698043823, "learning_rate": 9.801545304987419e-05, "loss": 0.0155, "step": 8130 }, { "epoch": 7.320143884892087, "grad_norm": 0.20295856893062592, "learning_rate": 9.800775882636704e-05, "loss": 0.0141, "step": 8140 }, { "epoch": 7.329136690647482, "grad_norm": 0.2415427416563034, "learning_rate": 9.800005001937034e-05, "loss": 0.0116, "step": 8150 }, { "epoch": 7.338129496402877, "grad_norm": 0.33267131447792053, "learning_rate": 9.79923266312258e-05, "loss": 0.0133, "step": 8160 }, { "epoch": 7.347122302158273, "grad_norm": 0.18135084211826324, "learning_rate": 9.79845886642796e-05, "loss": 0.014, "step": 8170 }, { "epoch": 7.356115107913669, "grad_norm": 0.17997874319553375, "learning_rate": 9.797683612088233e-05, "loss": 0.0125, "step": 8180 }, { "epoch": 7.365107913669065, "grad_norm": 0.162912517786026, "learning_rate": 9.796906900338898e-05, "loss": 0.0156, "step": 8190 }, { "epoch": 7.374100719424461, "grad_norm": 0.37409913539886475, "learning_rate": 9.796128731415903e-05, "loss": 0.015, "step": 8200 }, { "epoch": 7.383093525179856, "grad_norm": 0.2886311411857605, "learning_rate": 9.795349105555634e-05, "loss": 0.0147, "step": 8210 }, { "epoch": 7.392086330935252, "grad_norm": 0.31851452589035034, "learning_rate": 9.794568022994922e-05, "loss": 0.0143, "step": 8220 }, { "epoch": 7.401079136690647, "grad_norm": 0.35819700360298157, "learning_rate": 9.793785483971034e-05, "loss": 0.0145, "step": 8230 }, { "epoch": 7.410071942446043, "grad_norm": 0.29011157155036926, "learning_rate": 9.793001488721691e-05, "loss": 0.0139, "step": 8240 }, { "epoch": 7.419064748201439, "grad_norm": 0.27372094988822937, "learning_rate": 9.792216037485047e-05, "loss": 0.0133, "step": 8250 }, { "epoch": 7.428057553956835, "grad_norm": 0.24649383127689362, "learning_rate": 9.791429130499704e-05, "loss": 0.015, "step": 8260 }, { "epoch": 7.43705035971223, "grad_norm": 0.2831062972545624, "learning_rate": 9.790640768004698e-05, "loss": 0.015, "step": 8270 }, { "epoch": 7.446043165467626, "grad_norm": 0.43599945306777954, "learning_rate": 9.789850950239518e-05, "loss": 0.0158, "step": 8280 }, { "epoch": 7.455035971223022, "grad_norm": 0.2261166125535965, "learning_rate": 9.789059677444089e-05, "loss": 0.017, "step": 8290 }, { "epoch": 7.4640287769784175, "grad_norm": 0.2138502597808838, "learning_rate": 9.788266949858776e-05, "loss": 0.0094, "step": 8300 }, { "epoch": 7.473021582733813, "grad_norm": 0.36814284324645996, "learning_rate": 9.787472767724392e-05, "loss": 0.0091, "step": 8310 }, { "epoch": 7.482014388489208, "grad_norm": 0.2569561004638672, "learning_rate": 9.786677131282185e-05, "loss": 0.0123, "step": 8320 }, { "epoch": 7.491007194244604, "grad_norm": 0.3180640935897827, "learning_rate": 9.785880040773853e-05, "loss": 0.0136, "step": 8330 }, { "epoch": 7.5, "grad_norm": 0.4227116107940674, "learning_rate": 9.785081496441527e-05, "loss": 0.0172, "step": 8340 }, { "epoch": 7.508992805755396, "grad_norm": 0.39142587780952454, "learning_rate": 9.784281498527785e-05, "loss": 0.013, "step": 8350 }, { "epoch": 7.517985611510792, "grad_norm": 0.31163540482521057, "learning_rate": 9.783480047275646e-05, "loss": 0.0183, "step": 8360 }, { "epoch": 7.5269784172661875, "grad_norm": 0.19598673284053802, "learning_rate": 9.78267714292857e-05, "loss": 0.0183, "step": 8370 }, { "epoch": 7.5359712230215825, "grad_norm": 0.3933129608631134, "learning_rate": 9.781872785730454e-05, "loss": 0.0137, "step": 8380 }, { "epoch": 7.544964028776978, "grad_norm": 0.23103243112564087, "learning_rate": 9.781066975925646e-05, "loss": 0.0123, "step": 8390 }, { "epoch": 7.553956834532374, "grad_norm": 0.21570663154125214, "learning_rate": 9.780259713758928e-05, "loss": 0.0111, "step": 8400 }, { "epoch": 7.56294964028777, "grad_norm": 0.38326698541641235, "learning_rate": 9.779450999475524e-05, "loss": 0.0114, "step": 8410 }, { "epoch": 7.571942446043165, "grad_norm": 0.2132871150970459, "learning_rate": 9.7786408333211e-05, "loss": 0.0126, "step": 8420 }, { "epoch": 7.580935251798561, "grad_norm": 0.35799601674079895, "learning_rate": 9.777829215541764e-05, "loss": 0.0156, "step": 8430 }, { "epoch": 7.589928057553957, "grad_norm": 0.21733297407627106, "learning_rate": 9.777016146384064e-05, "loss": 0.0113, "step": 8440 }, { "epoch": 7.598920863309353, "grad_norm": 0.24193908274173737, "learning_rate": 9.776201626094988e-05, "loss": 0.0115, "step": 8450 }, { "epoch": 7.607913669064748, "grad_norm": 0.2563636898994446, "learning_rate": 9.775385654921965e-05, "loss": 0.0107, "step": 8460 }, { "epoch": 7.616906474820144, "grad_norm": 0.15761901438236237, "learning_rate": 9.774568233112868e-05, "loss": 0.0133, "step": 8470 }, { "epoch": 7.625899280575539, "grad_norm": 0.33520352840423584, "learning_rate": 9.773749360916007e-05, "loss": 0.0168, "step": 8480 }, { "epoch": 7.634892086330935, "grad_norm": 0.322664350271225, "learning_rate": 9.772929038580134e-05, "loss": 0.014, "step": 8490 }, { "epoch": 7.643884892086331, "grad_norm": 0.2394154667854309, "learning_rate": 9.772107266354439e-05, "loss": 0.0164, "step": 8500 }, { "epoch": 7.652877697841727, "grad_norm": 0.31150689721107483, "learning_rate": 9.77128404448856e-05, "loss": 0.0205, "step": 8510 }, { "epoch": 7.661870503597123, "grad_norm": 0.2392643392086029, "learning_rate": 9.770459373232565e-05, "loss": 0.0104, "step": 8520 }, { "epoch": 7.670863309352518, "grad_norm": 0.3756093680858612, "learning_rate": 9.769633252836969e-05, "loss": 0.0157, "step": 8530 }, { "epoch": 7.679856115107913, "grad_norm": 0.31293150782585144, "learning_rate": 9.768805683552724e-05, "loss": 0.0108, "step": 8540 }, { "epoch": 7.688848920863309, "grad_norm": 0.3132123053073883, "learning_rate": 9.767976665631228e-05, "loss": 0.0118, "step": 8550 }, { "epoch": 7.697841726618705, "grad_norm": 0.3254460096359253, "learning_rate": 9.767146199324311e-05, "loss": 0.0137, "step": 8560 }, { "epoch": 7.706834532374101, "grad_norm": 0.22962415218353271, "learning_rate": 9.766314284884249e-05, "loss": 0.0108, "step": 8570 }, { "epoch": 7.715827338129497, "grad_norm": 0.24315223097801208, "learning_rate": 9.765480922563752e-05, "loss": 0.0151, "step": 8580 }, { "epoch": 7.724820143884892, "grad_norm": 0.37839409708976746, "learning_rate": 9.764646112615978e-05, "loss": 0.0161, "step": 8590 }, { "epoch": 7.733812949640288, "grad_norm": 0.27892330288887024, "learning_rate": 9.763809855294517e-05, "loss": 0.0158, "step": 8600 }, { "epoch": 7.7428057553956835, "grad_norm": 0.18356046080589294, "learning_rate": 9.762972150853404e-05, "loss": 0.0121, "step": 8610 }, { "epoch": 7.751798561151079, "grad_norm": 0.31612858176231384, "learning_rate": 9.762132999547111e-05, "loss": 0.011, "step": 8620 }, { "epoch": 7.760791366906475, "grad_norm": 0.2611798346042633, "learning_rate": 9.761292401630549e-05, "loss": 0.0158, "step": 8630 }, { "epoch": 7.76978417266187, "grad_norm": 0.17813576757907867, "learning_rate": 9.76045035735907e-05, "loss": 0.01, "step": 8640 }, { "epoch": 7.778776978417266, "grad_norm": 0.28628915548324585, "learning_rate": 9.759606866988464e-05, "loss": 0.0134, "step": 8650 }, { "epoch": 7.787769784172662, "grad_norm": 0.3146328628063202, "learning_rate": 9.758761930774963e-05, "loss": 0.0112, "step": 8660 }, { "epoch": 7.796762589928058, "grad_norm": 0.2696491479873657, "learning_rate": 9.757915548975235e-05, "loss": 0.0126, "step": 8670 }, { "epoch": 7.805755395683454, "grad_norm": 0.30333197116851807, "learning_rate": 9.757067721846389e-05, "loss": 0.0148, "step": 8680 }, { "epoch": 7.814748201438849, "grad_norm": 0.3656044900417328, "learning_rate": 9.756218449645971e-05, "loss": 0.0134, "step": 8690 }, { "epoch": 7.823741007194244, "grad_norm": 0.31708964705467224, "learning_rate": 9.75536773263197e-05, "loss": 0.013, "step": 8700 }, { "epoch": 7.83273381294964, "grad_norm": 0.2991757392883301, "learning_rate": 9.75451557106281e-05, "loss": 0.0103, "step": 8710 }, { "epoch": 7.841726618705036, "grad_norm": 0.24203068017959595, "learning_rate": 9.753661965197354e-05, "loss": 0.0127, "step": 8720 }, { "epoch": 7.850719424460432, "grad_norm": 0.23689019680023193, "learning_rate": 9.752806915294908e-05, "loss": 0.0131, "step": 8730 }, { "epoch": 7.859712230215827, "grad_norm": 0.3502141535282135, "learning_rate": 9.75195042161521e-05, "loss": 0.0128, "step": 8740 }, { "epoch": 7.868705035971223, "grad_norm": 0.3144720196723938, "learning_rate": 9.751092484418442e-05, "loss": 0.0116, "step": 8750 }, { "epoch": 7.877697841726619, "grad_norm": 0.24209707975387573, "learning_rate": 9.750233103965224e-05, "loss": 0.0148, "step": 8760 }, { "epoch": 7.886690647482014, "grad_norm": 0.29126518964767456, "learning_rate": 9.749372280516611e-05, "loss": 0.0144, "step": 8770 }, { "epoch": 7.89568345323741, "grad_norm": 0.2701042592525482, "learning_rate": 9.748510014334097e-05, "loss": 0.0168, "step": 8780 }, { "epoch": 7.904676258992806, "grad_norm": 0.3938939869403839, "learning_rate": 9.747646305679621e-05, "loss": 0.0165, "step": 8790 }, { "epoch": 7.913669064748201, "grad_norm": 0.26875096559524536, "learning_rate": 9.74678115481555e-05, "loss": 0.0191, "step": 8800 }, { "epoch": 7.922661870503597, "grad_norm": 0.21883656084537506, "learning_rate": 9.745914562004696e-05, "loss": 0.0157, "step": 8810 }, { "epoch": 7.931654676258993, "grad_norm": 0.2579021453857422, "learning_rate": 9.745046527510307e-05, "loss": 0.0116, "step": 8820 }, { "epoch": 7.940647482014389, "grad_norm": 0.39173564314842224, "learning_rate": 9.744177051596068e-05, "loss": 0.0171, "step": 8830 }, { "epoch": 7.9496402877697845, "grad_norm": 0.43187591433525085, "learning_rate": 9.743306134526105e-05, "loss": 0.0151, "step": 8840 }, { "epoch": 7.9586330935251794, "grad_norm": 0.2884480655193329, "learning_rate": 9.742433776564977e-05, "loss": 0.0147, "step": 8850 }, { "epoch": 7.967625899280575, "grad_norm": 0.19106455147266388, "learning_rate": 9.741559977977683e-05, "loss": 0.0133, "step": 8860 }, { "epoch": 7.976618705035971, "grad_norm": 0.21319887042045593, "learning_rate": 9.740684739029661e-05, "loss": 0.0146, "step": 8870 }, { "epoch": 7.985611510791367, "grad_norm": 0.26533445715904236, "learning_rate": 9.739808059986789e-05, "loss": 0.0208, "step": 8880 }, { "epoch": 7.994604316546763, "grad_norm": 0.2718990445137024, "learning_rate": 9.738929941115373e-05, "loss": 0.0132, "step": 8890 }, { "epoch": 8.003597122302159, "grad_norm": 0.30758169293403625, "learning_rate": 9.738050382682167e-05, "loss": 0.0087, "step": 8900 }, { "epoch": 8.012589928057555, "grad_norm": 0.336193710565567, "learning_rate": 9.737169384954355e-05, "loss": 0.0164, "step": 8910 }, { "epoch": 8.02158273381295, "grad_norm": 0.2799166738986969, "learning_rate": 9.736286948199562e-05, "loss": 0.014, "step": 8920 }, { "epoch": 8.030575539568344, "grad_norm": 0.27806058526039124, "learning_rate": 9.735403072685848e-05, "loss": 0.0114, "step": 8930 }, { "epoch": 8.03956834532374, "grad_norm": 0.28222304582595825, "learning_rate": 9.734517758681712e-05, "loss": 0.0139, "step": 8940 }, { "epoch": 8.048561151079136, "grad_norm": 0.25649574398994446, "learning_rate": 9.733631006456088e-05, "loss": 0.0136, "step": 8950 }, { "epoch": 8.057553956834532, "grad_norm": 0.29809045791625977, "learning_rate": 9.732742816278348e-05, "loss": 0.0153, "step": 8960 }, { "epoch": 8.066546762589928, "grad_norm": 0.23999394476413727, "learning_rate": 9.731853188418302e-05, "loss": 0.0138, "step": 8970 }, { "epoch": 8.075539568345324, "grad_norm": 0.2642103433609009, "learning_rate": 9.730962123146194e-05, "loss": 0.0166, "step": 8980 }, { "epoch": 8.08453237410072, "grad_norm": 0.35026654601097107, "learning_rate": 9.730069620732709e-05, "loss": 0.015, "step": 8990 }, { "epoch": 8.093525179856115, "grad_norm": 0.27499619126319885, "learning_rate": 9.72917568144896e-05, "loss": 0.014, "step": 9000 }, { "epoch": 8.102517985611511, "grad_norm": 0.38224107027053833, "learning_rate": 9.728280305566509e-05, "loss": 0.0194, "step": 9010 }, { "epoch": 8.111510791366907, "grad_norm": 0.2625437378883362, "learning_rate": 9.727383493357343e-05, "loss": 0.0196, "step": 9020 }, { "epoch": 8.120503597122303, "grad_norm": 0.29659032821655273, "learning_rate": 9.726485245093891e-05, "loss": 0.0151, "step": 9030 }, { "epoch": 8.129496402877697, "grad_norm": 0.2446860373020172, "learning_rate": 9.725585561049018e-05, "loss": 0.0131, "step": 9040 }, { "epoch": 8.138489208633093, "grad_norm": 0.22687260806560516, "learning_rate": 9.724684441496022e-05, "loss": 0.0168, "step": 9050 }, { "epoch": 8.147482014388489, "grad_norm": 0.32025641202926636, "learning_rate": 9.72378188670864e-05, "loss": 0.0132, "step": 9060 }, { "epoch": 8.156474820143885, "grad_norm": 0.23571762442588806, "learning_rate": 9.722877896961047e-05, "loss": 0.0129, "step": 9070 }, { "epoch": 8.16546762589928, "grad_norm": 0.22234782576560974, "learning_rate": 9.721972472527848e-05, "loss": 0.0134, "step": 9080 }, { "epoch": 8.174460431654676, "grad_norm": 0.2579546570777893, "learning_rate": 9.721065613684089e-05, "loss": 0.0121, "step": 9090 }, { "epoch": 8.183453237410072, "grad_norm": 0.20253287255764008, "learning_rate": 9.72015732070525e-05, "loss": 0.0109, "step": 9100 }, { "epoch": 8.192446043165468, "grad_norm": 0.3251790702342987, "learning_rate": 9.719247593867244e-05, "loss": 0.0121, "step": 9110 }, { "epoch": 8.201438848920864, "grad_norm": 0.31158289313316345, "learning_rate": 9.718336433446423e-05, "loss": 0.0127, "step": 9120 }, { "epoch": 8.21043165467626, "grad_norm": 0.3265640139579773, "learning_rate": 9.717423839719574e-05, "loss": 0.0155, "step": 9130 }, { "epoch": 8.219424460431656, "grad_norm": 0.4963051378726959, "learning_rate": 9.71650981296392e-05, "loss": 0.0135, "step": 9140 }, { "epoch": 8.22841726618705, "grad_norm": 0.43739551305770874, "learning_rate": 9.715594353457118e-05, "loss": 0.0139, "step": 9150 }, { "epoch": 8.237410071942445, "grad_norm": 0.2776900827884674, "learning_rate": 9.714677461477257e-05, "loss": 0.015, "step": 9160 }, { "epoch": 8.246402877697841, "grad_norm": 0.25526297092437744, "learning_rate": 9.713759137302869e-05, "loss": 0.0145, "step": 9170 }, { "epoch": 8.255395683453237, "grad_norm": 0.3066314160823822, "learning_rate": 9.712839381212914e-05, "loss": 0.0132, "step": 9180 }, { "epoch": 8.264388489208633, "grad_norm": 0.2635073661804199, "learning_rate": 9.71191819348679e-05, "loss": 0.0129, "step": 9190 }, { "epoch": 8.273381294964029, "grad_norm": 0.2569413483142853, "learning_rate": 9.710995574404331e-05, "loss": 0.0143, "step": 9200 }, { "epoch": 8.282374100719425, "grad_norm": 0.3898301124572754, "learning_rate": 9.710071524245802e-05, "loss": 0.0182, "step": 9210 }, { "epoch": 8.29136690647482, "grad_norm": 0.3349723517894745, "learning_rate": 9.709146043291906e-05, "loss": 0.0115, "step": 9220 }, { "epoch": 8.300359712230216, "grad_norm": 0.3721258342266083, "learning_rate": 9.70821913182378e-05, "loss": 0.0159, "step": 9230 }, { "epoch": 8.309352517985612, "grad_norm": 0.2633220851421356, "learning_rate": 9.707290790122995e-05, "loss": 0.0143, "step": 9240 }, { "epoch": 8.318345323741006, "grad_norm": 0.19288669526576996, "learning_rate": 9.706361018471557e-05, "loss": 0.0154, "step": 9250 }, { "epoch": 8.327338129496402, "grad_norm": 0.29984554648399353, "learning_rate": 9.705429817151906e-05, "loss": 0.0135, "step": 9260 }, { "epoch": 8.336330935251798, "grad_norm": 0.2699214816093445, "learning_rate": 9.704497186446917e-05, "loss": 0.0127, "step": 9270 }, { "epoch": 8.345323741007194, "grad_norm": 0.26467958092689514, "learning_rate": 9.703563126639896e-05, "loss": 0.0182, "step": 9280 }, { "epoch": 8.35431654676259, "grad_norm": 0.38897502422332764, "learning_rate": 9.70262763801459e-05, "loss": 0.0128, "step": 9290 }, { "epoch": 8.363309352517986, "grad_norm": 0.23433686792850494, "learning_rate": 9.701690720855171e-05, "loss": 0.0181, "step": 9300 }, { "epoch": 8.372302158273381, "grad_norm": 0.3450007140636444, "learning_rate": 9.700752375446253e-05, "loss": 0.0156, "step": 9310 }, { "epoch": 8.381294964028777, "grad_norm": 0.39197105169296265, "learning_rate": 9.69981260207288e-05, "loss": 0.0258, "step": 9320 }, { "epoch": 8.390287769784173, "grad_norm": 0.35622638463974, "learning_rate": 9.698871401020529e-05, "loss": 0.0151, "step": 9330 }, { "epoch": 8.399280575539569, "grad_norm": 0.3915688395500183, "learning_rate": 9.697928772575112e-05, "loss": 0.0147, "step": 9340 }, { "epoch": 8.408273381294965, "grad_norm": 0.4114217460155487, "learning_rate": 9.696984717022976e-05, "loss": 0.0121, "step": 9350 }, { "epoch": 8.417266187050359, "grad_norm": 0.3659985661506653, "learning_rate": 9.6960392346509e-05, "loss": 0.0113, "step": 9360 }, { "epoch": 8.426258992805755, "grad_norm": 0.2351645678281784, "learning_rate": 9.695092325746097e-05, "loss": 0.0221, "step": 9370 }, { "epoch": 8.43525179856115, "grad_norm": 0.2771818935871124, "learning_rate": 9.694143990596211e-05, "loss": 0.0119, "step": 9380 }, { "epoch": 8.444244604316546, "grad_norm": 0.25896593928337097, "learning_rate": 9.693194229489325e-05, "loss": 0.023, "step": 9390 }, { "epoch": 8.453237410071942, "grad_norm": 0.2459593415260315, "learning_rate": 9.692243042713944e-05, "loss": 0.0121, "step": 9400 }, { "epoch": 8.462230215827338, "grad_norm": 0.25138968229293823, "learning_rate": 9.691290430559022e-05, "loss": 0.0121, "step": 9410 }, { "epoch": 8.471223021582734, "grad_norm": 0.3521564304828644, "learning_rate": 9.690336393313932e-05, "loss": 0.0126, "step": 9420 }, { "epoch": 8.48021582733813, "grad_norm": 0.21399082243442535, "learning_rate": 9.689380931268487e-05, "loss": 0.0158, "step": 9430 }, { "epoch": 8.489208633093526, "grad_norm": 0.1931777149438858, "learning_rate": 9.688424044712932e-05, "loss": 0.0132, "step": 9440 }, { "epoch": 8.498201438848922, "grad_norm": 0.28317922353744507, "learning_rate": 9.687465733937942e-05, "loss": 0.017, "step": 9450 }, { "epoch": 8.507194244604317, "grad_norm": 0.467664897441864, "learning_rate": 9.686505999234627e-05, "loss": 0.0123, "step": 9460 }, { "epoch": 8.516187050359711, "grad_norm": 0.26401013135910034, "learning_rate": 9.685544840894529e-05, "loss": 0.013, "step": 9470 }, { "epoch": 8.525179856115107, "grad_norm": 0.2789142429828644, "learning_rate": 9.684582259209624e-05, "loss": 0.012, "step": 9480 }, { "epoch": 8.534172661870503, "grad_norm": 0.2470809370279312, "learning_rate": 9.683618254472317e-05, "loss": 0.0149, "step": 9490 }, { "epoch": 8.543165467625899, "grad_norm": 0.26178333163261414, "learning_rate": 9.682652826975449e-05, "loss": 0.0145, "step": 9500 }, { "epoch": 8.552158273381295, "grad_norm": 0.2460278421640396, "learning_rate": 9.681685977012291e-05, "loss": 0.0136, "step": 9510 }, { "epoch": 8.56115107913669, "grad_norm": 0.25614339113235474, "learning_rate": 9.680717704876546e-05, "loss": 0.0126, "step": 9520 }, { "epoch": 8.570143884892087, "grad_norm": 0.31106534600257874, "learning_rate": 9.679748010862349e-05, "loss": 0.0138, "step": 9530 }, { "epoch": 8.579136690647482, "grad_norm": 0.30014997720718384, "learning_rate": 9.678776895264267e-05, "loss": 0.0148, "step": 9540 }, { "epoch": 8.588129496402878, "grad_norm": 0.2603328227996826, "learning_rate": 9.6778043583773e-05, "loss": 0.0158, "step": 9550 }, { "epoch": 8.597122302158274, "grad_norm": 0.1786000281572342, "learning_rate": 9.67683040049688e-05, "loss": 0.0117, "step": 9560 }, { "epoch": 8.60611510791367, "grad_norm": 0.2966461181640625, "learning_rate": 9.675855021918869e-05, "loss": 0.0152, "step": 9570 }, { "epoch": 8.615107913669064, "grad_norm": 0.22816437482833862, "learning_rate": 9.674878222939561e-05, "loss": 0.0108, "step": 9580 }, { "epoch": 8.62410071942446, "grad_norm": 0.2695694863796234, "learning_rate": 9.673900003855681e-05, "loss": 0.0127, "step": 9590 }, { "epoch": 8.633093525179856, "grad_norm": 0.4088931679725647, "learning_rate": 9.672920364964389e-05, "loss": 0.0205, "step": 9600 }, { "epoch": 8.642086330935252, "grad_norm": 0.26349127292633057, "learning_rate": 9.671939306563269e-05, "loss": 0.011, "step": 9610 }, { "epoch": 8.651079136690647, "grad_norm": 0.20513306558132172, "learning_rate": 9.670956828950345e-05, "loss": 0.0138, "step": 9620 }, { "epoch": 8.660071942446043, "grad_norm": 0.24403762817382812, "learning_rate": 9.669972932424065e-05, "loss": 0.0119, "step": 9630 }, { "epoch": 8.66906474820144, "grad_norm": 0.20387578010559082, "learning_rate": 9.668987617283312e-05, "loss": 0.0093, "step": 9640 }, { "epoch": 8.678057553956835, "grad_norm": 0.33177539706230164, "learning_rate": 9.668000883827397e-05, "loss": 0.0151, "step": 9650 }, { "epoch": 8.68705035971223, "grad_norm": 0.3348906636238098, "learning_rate": 9.667012732356067e-05, "loss": 0.0145, "step": 9660 }, { "epoch": 8.696043165467627, "grad_norm": 0.38235750794410706, "learning_rate": 9.666023163169493e-05, "loss": 0.0266, "step": 9670 }, { "epoch": 8.70503597122302, "grad_norm": 0.3132467269897461, "learning_rate": 9.665032176568281e-05, "loss": 0.0144, "step": 9680 }, { "epoch": 8.714028776978417, "grad_norm": 0.27128085494041443, "learning_rate": 9.664039772853469e-05, "loss": 0.0165, "step": 9690 }, { "epoch": 8.723021582733812, "grad_norm": 0.2640955150127411, "learning_rate": 9.663045952326518e-05, "loss": 0.0129, "step": 9700 }, { "epoch": 8.732014388489208, "grad_norm": 0.22967351973056793, "learning_rate": 9.662050715289328e-05, "loss": 0.0127, "step": 9710 }, { "epoch": 8.741007194244604, "grad_norm": 0.3094545602798462, "learning_rate": 9.661054062044226e-05, "loss": 0.0181, "step": 9720 }, { "epoch": 8.75, "grad_norm": 0.23484420776367188, "learning_rate": 9.660055992893968e-05, "loss": 0.0118, "step": 9730 }, { "epoch": 8.758992805755396, "grad_norm": 0.2522447407245636, "learning_rate": 9.659056508141739e-05, "loss": 0.0126, "step": 9740 }, { "epoch": 8.767985611510792, "grad_norm": 0.2372351735830307, "learning_rate": 9.658055608091161e-05, "loss": 0.0184, "step": 9750 }, { "epoch": 8.776978417266188, "grad_norm": 0.25137031078338623, "learning_rate": 9.657053293046276e-05, "loss": 0.0138, "step": 9760 }, { "epoch": 8.785971223021583, "grad_norm": 0.3484756052494049, "learning_rate": 9.656049563311564e-05, "loss": 0.0103, "step": 9770 }, { "epoch": 8.79496402877698, "grad_norm": 0.2569250464439392, "learning_rate": 9.655044419191929e-05, "loss": 0.0151, "step": 9780 }, { "epoch": 8.803956834532373, "grad_norm": 0.20487700402736664, "learning_rate": 9.654037860992711e-05, "loss": 0.0125, "step": 9790 }, { "epoch": 8.81294964028777, "grad_norm": 0.2892457842826843, "learning_rate": 9.653029889019672e-05, "loss": 0.0156, "step": 9800 }, { "epoch": 8.821942446043165, "grad_norm": 0.3083125650882721, "learning_rate": 9.65202050357901e-05, "loss": 0.0138, "step": 9810 }, { "epoch": 8.83093525179856, "grad_norm": 0.21122056245803833, "learning_rate": 9.651009704977347e-05, "loss": 0.0131, "step": 9820 }, { "epoch": 8.839928057553957, "grad_norm": 0.275920033454895, "learning_rate": 9.649997493521738e-05, "loss": 0.0121, "step": 9830 }, { "epoch": 8.848920863309353, "grad_norm": 0.32029402256011963, "learning_rate": 9.64898386951967e-05, "loss": 0.012, "step": 9840 }, { "epoch": 8.857913669064748, "grad_norm": 0.43231892585754395, "learning_rate": 9.647968833279049e-05, "loss": 0.0142, "step": 9850 }, { "epoch": 8.866906474820144, "grad_norm": 0.4320095181465149, "learning_rate": 9.646952385108218e-05, "loss": 0.015, "step": 9860 }, { "epoch": 8.87589928057554, "grad_norm": 0.2432362139225006, "learning_rate": 9.645934525315951e-05, "loss": 0.0126, "step": 9870 }, { "epoch": 8.884892086330936, "grad_norm": 0.26913028955459595, "learning_rate": 9.644915254211442e-05, "loss": 0.0127, "step": 9880 }, { "epoch": 8.89388489208633, "grad_norm": 0.31466227769851685, "learning_rate": 9.643894572104321e-05, "loss": 0.0139, "step": 9890 }, { "epoch": 8.902877697841726, "grad_norm": 0.3063904047012329, "learning_rate": 9.642872479304644e-05, "loss": 0.0151, "step": 9900 }, { "epoch": 8.911870503597122, "grad_norm": 0.30676084756851196, "learning_rate": 9.641848976122895e-05, "loss": 0.0129, "step": 9910 }, { "epoch": 8.920863309352518, "grad_norm": 0.40115654468536377, "learning_rate": 9.64082406286999e-05, "loss": 0.0117, "step": 9920 }, { "epoch": 8.929856115107913, "grad_norm": 0.40747588872909546, "learning_rate": 9.639797739857269e-05, "loss": 0.0211, "step": 9930 }, { "epoch": 8.93884892086331, "grad_norm": 0.1958310902118683, "learning_rate": 9.638770007396498e-05, "loss": 0.0136, "step": 9940 }, { "epoch": 8.947841726618705, "grad_norm": 0.26091015338897705, "learning_rate": 9.63774086579988e-05, "loss": 0.021, "step": 9950 }, { "epoch": 8.956834532374101, "grad_norm": 0.2732226848602295, "learning_rate": 9.63671031538004e-05, "loss": 0.0125, "step": 9960 }, { "epoch": 8.965827338129497, "grad_norm": 0.3002973198890686, "learning_rate": 9.635678356450031e-05, "loss": 0.0134, "step": 9970 }, { "epoch": 8.974820143884893, "grad_norm": 0.28648504614830017, "learning_rate": 9.634644989323336e-05, "loss": 0.0135, "step": 9980 }, { "epoch": 8.983812949640289, "grad_norm": 0.23923958837985992, "learning_rate": 9.633610214313861e-05, "loss": 0.0141, "step": 9990 }, { "epoch": 8.992805755395683, "grad_norm": 0.3425154387950897, "learning_rate": 9.632574031735951e-05, "loss": 0.0124, "step": 10000 }, { "epoch": 9.001798561151078, "grad_norm": 0.23487482964992523, "learning_rate": 9.631536441904364e-05, "loss": 0.0114, "step": 10010 }, { "epoch": 9.010791366906474, "grad_norm": 0.3611086308956146, "learning_rate": 9.630497445134293e-05, "loss": 0.0192, "step": 10020 }, { "epoch": 9.01978417266187, "grad_norm": 0.17642107605934143, "learning_rate": 9.62945704174136e-05, "loss": 0.0136, "step": 10030 }, { "epoch": 9.028776978417266, "grad_norm": 0.2169889211654663, "learning_rate": 9.628415232041612e-05, "loss": 0.0171, "step": 10040 }, { "epoch": 9.037769784172662, "grad_norm": 0.33248838782310486, "learning_rate": 9.627372016351524e-05, "loss": 0.0128, "step": 10050 }, { "epoch": 9.046762589928058, "grad_norm": 0.296078085899353, "learning_rate": 9.626327394987995e-05, "loss": 0.0135, "step": 10060 }, { "epoch": 9.055755395683454, "grad_norm": 0.2821689546108246, "learning_rate": 9.625281368268355e-05, "loss": 0.0124, "step": 10070 }, { "epoch": 9.06474820143885, "grad_norm": 0.2797353267669678, "learning_rate": 9.624233936510357e-05, "loss": 0.0139, "step": 10080 }, { "epoch": 9.073741007194245, "grad_norm": 0.2696920931339264, "learning_rate": 9.623185100032187e-05, "loss": 0.0162, "step": 10090 }, { "epoch": 9.082733812949641, "grad_norm": 0.3188522160053253, "learning_rate": 9.62213485915245e-05, "loss": 0.0114, "step": 10100 }, { "epoch": 9.091726618705035, "grad_norm": 0.24980589747428894, "learning_rate": 9.621083214190186e-05, "loss": 0.0116, "step": 10110 }, { "epoch": 9.100719424460431, "grad_norm": 0.2234557718038559, "learning_rate": 9.62003016546485e-05, "loss": 0.0151, "step": 10120 }, { "epoch": 9.109712230215827, "grad_norm": 0.2943405210971832, "learning_rate": 9.618975713296339e-05, "loss": 0.0108, "step": 10130 }, { "epoch": 9.118705035971223, "grad_norm": 0.32265469431877136, "learning_rate": 9.61791985800496e-05, "loss": 0.0095, "step": 10140 }, { "epoch": 9.127697841726619, "grad_norm": 0.20157112181186676, "learning_rate": 9.616862599911458e-05, "loss": 0.0153, "step": 10150 }, { "epoch": 9.136690647482014, "grad_norm": 0.2779116630554199, "learning_rate": 9.615803939337e-05, "loss": 0.0148, "step": 10160 }, { "epoch": 9.14568345323741, "grad_norm": 0.3303905725479126, "learning_rate": 9.614743876603178e-05, "loss": 0.0152, "step": 10170 }, { "epoch": 9.154676258992806, "grad_norm": 0.228925883769989, "learning_rate": 9.613682412032013e-05, "loss": 0.0161, "step": 10180 }, { "epoch": 9.163669064748202, "grad_norm": 0.37447142601013184, "learning_rate": 9.612619545945947e-05, "loss": 0.0208, "step": 10190 }, { "epoch": 9.172661870503598, "grad_norm": 0.3139713406562805, "learning_rate": 9.611555278667852e-05, "loss": 0.0137, "step": 10200 }, { "epoch": 9.181654676258994, "grad_norm": 0.2693127989768982, "learning_rate": 9.610489610521024e-05, "loss": 0.0119, "step": 10210 }, { "epoch": 9.190647482014388, "grad_norm": 0.1938103586435318, "learning_rate": 9.609422541829187e-05, "loss": 0.0129, "step": 10220 }, { "epoch": 9.199640287769784, "grad_norm": 0.23710542917251587, "learning_rate": 9.608354072916486e-05, "loss": 0.0199, "step": 10230 }, { "epoch": 9.20863309352518, "grad_norm": 0.33341819047927856, "learning_rate": 9.607284204107493e-05, "loss": 0.0144, "step": 10240 }, { "epoch": 9.217625899280575, "grad_norm": 0.2757159173488617, "learning_rate": 9.606212935727208e-05, "loss": 0.0103, "step": 10250 }, { "epoch": 9.226618705035971, "grad_norm": 0.3363325297832489, "learning_rate": 9.605140268101052e-05, "loss": 0.0122, "step": 10260 }, { "epoch": 9.235611510791367, "grad_norm": 0.3005695939064026, "learning_rate": 9.604066201554875e-05, "loss": 0.012, "step": 10270 }, { "epoch": 9.244604316546763, "grad_norm": 0.30185753107070923, "learning_rate": 9.60299073641495e-05, "loss": 0.0128, "step": 10280 }, { "epoch": 9.253597122302159, "grad_norm": 0.20607152581214905, "learning_rate": 9.601913873007974e-05, "loss": 0.0104, "step": 10290 }, { "epoch": 9.262589928057555, "grad_norm": 0.2845562994480133, "learning_rate": 9.60083561166107e-05, "loss": 0.0131, "step": 10300 }, { "epoch": 9.27158273381295, "grad_norm": 0.25937381386756897, "learning_rate": 9.599755952701783e-05, "loss": 0.0136, "step": 10310 }, { "epoch": 9.280575539568344, "grad_norm": 0.2757509648799896, "learning_rate": 9.598674896458089e-05, "loss": 0.0138, "step": 10320 }, { "epoch": 9.28956834532374, "grad_norm": 0.20381809771060944, "learning_rate": 9.597592443258383e-05, "loss": 0.0091, "step": 10330 }, { "epoch": 9.298561151079136, "grad_norm": 0.18685847520828247, "learning_rate": 9.596508593431483e-05, "loss": 0.008, "step": 10340 }, { "epoch": 9.307553956834532, "grad_norm": 0.17556191980838776, "learning_rate": 9.59542334730664e-05, "loss": 0.0123, "step": 10350 }, { "epoch": 9.316546762589928, "grad_norm": 0.2360699623823166, "learning_rate": 9.594336705213516e-05, "loss": 0.0118, "step": 10360 }, { "epoch": 9.325539568345324, "grad_norm": 0.29616066813468933, "learning_rate": 9.593248667482208e-05, "loss": 0.0155, "step": 10370 }, { "epoch": 9.33453237410072, "grad_norm": 0.5748569965362549, "learning_rate": 9.592159234443233e-05, "loss": 0.016, "step": 10380 }, { "epoch": 9.343525179856115, "grad_norm": 0.3004048764705658, "learning_rate": 9.59106840642753e-05, "loss": 0.0187, "step": 10390 }, { "epoch": 9.352517985611511, "grad_norm": 0.4129689335823059, "learning_rate": 9.589976183766467e-05, "loss": 0.014, "step": 10400 }, { "epoch": 9.361510791366907, "grad_norm": 0.2978154718875885, "learning_rate": 9.58888256679183e-05, "loss": 0.0141, "step": 10410 }, { "epoch": 9.370503597122303, "grad_norm": 0.38200581073760986, "learning_rate": 9.587787555835832e-05, "loss": 0.0122, "step": 10420 }, { "epoch": 9.379496402877697, "grad_norm": 0.32919979095458984, "learning_rate": 9.586691151231107e-05, "loss": 0.0137, "step": 10430 }, { "epoch": 9.388489208633093, "grad_norm": 0.23144948482513428, "learning_rate": 9.585593353310715e-05, "loss": 0.0143, "step": 10440 }, { "epoch": 9.397482014388489, "grad_norm": 0.2742293179035187, "learning_rate": 9.58449416240814e-05, "loss": 0.0144, "step": 10450 }, { "epoch": 9.406474820143885, "grad_norm": 0.21731062233448029, "learning_rate": 9.583393578857283e-05, "loss": 0.0122, "step": 10460 }, { "epoch": 9.41546762589928, "grad_norm": 0.2817080318927765, "learning_rate": 9.582291602992474e-05, "loss": 0.0117, "step": 10470 }, { "epoch": 9.424460431654676, "grad_norm": 0.24914409220218658, "learning_rate": 9.581188235148466e-05, "loss": 0.0124, "step": 10480 }, { "epoch": 9.433453237410072, "grad_norm": 0.1967020481824875, "learning_rate": 9.58008347566043e-05, "loss": 0.0105, "step": 10490 }, { "epoch": 9.442446043165468, "grad_norm": 0.25374850630760193, "learning_rate": 9.578977324863965e-05, "loss": 0.0111, "step": 10500 }, { "epoch": 9.451438848920864, "grad_norm": 0.3393069803714752, "learning_rate": 9.577869783095089e-05, "loss": 0.0129, "step": 10510 }, { "epoch": 9.46043165467626, "grad_norm": 0.2842247486114502, "learning_rate": 9.576760850690245e-05, "loss": 0.0124, "step": 10520 }, { "epoch": 9.469424460431656, "grad_norm": 0.3857848346233368, "learning_rate": 9.575650527986298e-05, "loss": 0.0164, "step": 10530 }, { "epoch": 9.47841726618705, "grad_norm": 0.38991469144821167, "learning_rate": 9.574538815320531e-05, "loss": 0.0132, "step": 10540 }, { "epoch": 9.487410071942445, "grad_norm": 0.3576919138431549, "learning_rate": 9.573425713030656e-05, "loss": 0.0158, "step": 10550 }, { "epoch": 9.496402877697841, "grad_norm": 0.24706457555294037, "learning_rate": 9.572311221454806e-05, "loss": 0.0106, "step": 10560 }, { "epoch": 9.505395683453237, "grad_norm": 0.17570984363555908, "learning_rate": 9.57119534093153e-05, "loss": 0.0148, "step": 10570 }, { "epoch": 9.514388489208633, "grad_norm": 0.2493375837802887, "learning_rate": 9.570078071799806e-05, "loss": 0.0123, "step": 10580 }, { "epoch": 9.523381294964029, "grad_norm": 0.23676083981990814, "learning_rate": 9.568959414399028e-05, "loss": 0.0112, "step": 10590 }, { "epoch": 9.532374100719425, "grad_norm": 0.1909630298614502, "learning_rate": 9.567839369069018e-05, "loss": 0.0137, "step": 10600 }, { "epoch": 9.54136690647482, "grad_norm": 0.2503344416618347, "learning_rate": 9.566717936150013e-05, "loss": 0.0135, "step": 10610 }, { "epoch": 9.550359712230216, "grad_norm": 0.31291982531547546, "learning_rate": 9.565595115982678e-05, "loss": 0.0113, "step": 10620 }, { "epoch": 9.559352517985612, "grad_norm": 0.3383905589580536, "learning_rate": 9.564470908908094e-05, "loss": 0.0132, "step": 10630 }, { "epoch": 9.568345323741006, "grad_norm": 0.2673322260379791, "learning_rate": 9.563345315267764e-05, "loss": 0.0129, "step": 10640 }, { "epoch": 9.577338129496402, "grad_norm": 0.24555708467960358, "learning_rate": 9.562218335403616e-05, "loss": 0.0123, "step": 10650 }, { "epoch": 9.586330935251798, "grad_norm": 0.22710347175598145, "learning_rate": 9.561089969657999e-05, "loss": 0.0104, "step": 10660 }, { "epoch": 9.595323741007194, "grad_norm": 0.1835491955280304, "learning_rate": 9.559960218373673e-05, "loss": 0.0133, "step": 10670 }, { "epoch": 9.60431654676259, "grad_norm": 0.2907150685787201, "learning_rate": 9.558829081893836e-05, "loss": 0.0121, "step": 10680 }, { "epoch": 9.613309352517986, "grad_norm": 0.21535447239875793, "learning_rate": 9.55769656056209e-05, "loss": 0.0114, "step": 10690 }, { "epoch": 9.622302158273381, "grad_norm": 0.3908759653568268, "learning_rate": 9.556562654722469e-05, "loss": 0.0141, "step": 10700 }, { "epoch": 9.631294964028777, "grad_norm": 0.20674598217010498, "learning_rate": 9.555427364719422e-05, "loss": 0.0159, "step": 10710 }, { "epoch": 9.640287769784173, "grad_norm": 0.17869777977466583, "learning_rate": 9.55429069089782e-05, "loss": 0.0113, "step": 10720 }, { "epoch": 9.649280575539569, "grad_norm": 0.1889093667268753, "learning_rate": 9.553152633602956e-05, "loss": 0.0149, "step": 10730 }, { "epoch": 9.658273381294965, "grad_norm": 0.25737273693084717, "learning_rate": 9.552013193180543e-05, "loss": 0.0152, "step": 10740 }, { "epoch": 9.667266187050359, "grad_norm": 0.2870809733867645, "learning_rate": 9.550872369976707e-05, "loss": 0.0136, "step": 10750 }, { "epoch": 9.676258992805755, "grad_norm": 0.25305336713790894, "learning_rate": 9.549730164338007e-05, "loss": 0.0134, "step": 10760 }, { "epoch": 9.68525179856115, "grad_norm": 0.2363482117652893, "learning_rate": 9.548586576611408e-05, "loss": 0.0117, "step": 10770 }, { "epoch": 9.694244604316546, "grad_norm": 0.17157866060733795, "learning_rate": 9.54744160714431e-05, "loss": 0.0114, "step": 10780 }, { "epoch": 9.703237410071942, "grad_norm": 0.2365128993988037, "learning_rate": 9.546295256284516e-05, "loss": 0.0118, "step": 10790 }, { "epoch": 9.712230215827338, "grad_norm": 0.20632924139499664, "learning_rate": 9.545147524380265e-05, "loss": 0.0138, "step": 10800 }, { "epoch": 9.721223021582734, "grad_norm": 0.20564602315425873, "learning_rate": 9.543998411780201e-05, "loss": 0.0134, "step": 10810 }, { "epoch": 9.73021582733813, "grad_norm": 0.2470715492963791, "learning_rate": 9.542847918833397e-05, "loss": 0.0128, "step": 10820 }, { "epoch": 9.739208633093526, "grad_norm": 0.2094346433877945, "learning_rate": 9.541696045889343e-05, "loss": 0.0119, "step": 10830 }, { "epoch": 9.748201438848922, "grad_norm": 0.14724990725517273, "learning_rate": 9.540542793297947e-05, "loss": 0.01, "step": 10840 }, { "epoch": 9.757194244604317, "grad_norm": 0.2507741451263428, "learning_rate": 9.539388161409537e-05, "loss": 0.0117, "step": 10850 }, { "epoch": 9.766187050359711, "grad_norm": 0.37561413645744324, "learning_rate": 9.538232150574857e-05, "loss": 0.0131, "step": 10860 }, { "epoch": 9.775179856115107, "grad_norm": 0.2460334151983261, "learning_rate": 9.537074761145076e-05, "loss": 0.0093, "step": 10870 }, { "epoch": 9.784172661870503, "grad_norm": 0.23433168232440948, "learning_rate": 9.535915993471778e-05, "loss": 0.0147, "step": 10880 }, { "epoch": 9.793165467625899, "grad_norm": 0.29277220368385315, "learning_rate": 9.534755847906964e-05, "loss": 0.018, "step": 10890 }, { "epoch": 9.802158273381295, "grad_norm": 0.45322710275650024, "learning_rate": 9.533594324803057e-05, "loss": 0.0121, "step": 10900 }, { "epoch": 9.81115107913669, "grad_norm": 0.24296750128269196, "learning_rate": 9.532431424512895e-05, "loss": 0.009, "step": 10910 }, { "epoch": 9.820143884892087, "grad_norm": 0.2529078423976898, "learning_rate": 9.531267147389741e-05, "loss": 0.0139, "step": 10920 }, { "epoch": 9.829136690647482, "grad_norm": 0.19998855888843536, "learning_rate": 9.530101493787266e-05, "loss": 0.0125, "step": 10930 }, { "epoch": 9.838129496402878, "grad_norm": 0.23031993210315704, "learning_rate": 9.528934464059571e-05, "loss": 0.0121, "step": 10940 }, { "epoch": 9.847122302158274, "grad_norm": 0.17122609913349152, "learning_rate": 9.527766058561163e-05, "loss": 0.0098, "step": 10950 }, { "epoch": 9.85611510791367, "grad_norm": 0.39304429292678833, "learning_rate": 9.526596277646976e-05, "loss": 0.0133, "step": 10960 }, { "epoch": 9.865107913669064, "grad_norm": 0.32369598746299744, "learning_rate": 9.525425121672358e-05, "loss": 0.0111, "step": 10970 }, { "epoch": 9.87410071942446, "grad_norm": 0.22205951809883118, "learning_rate": 9.524252590993074e-05, "loss": 0.0099, "step": 10980 }, { "epoch": 9.883093525179856, "grad_norm": 0.2755987048149109, "learning_rate": 9.523078685965309e-05, "loss": 0.0127, "step": 10990 }, { "epoch": 9.892086330935252, "grad_norm": 0.2870744466781616, "learning_rate": 9.521903406945664e-05, "loss": 0.012, "step": 11000 }, { "epoch": 9.901079136690647, "grad_norm": 0.347053587436676, "learning_rate": 9.520726754291158e-05, "loss": 0.0123, "step": 11010 }, { "epoch": 9.910071942446043, "grad_norm": 0.35708799958229065, "learning_rate": 9.519548728359227e-05, "loss": 0.013, "step": 11020 }, { "epoch": 9.91906474820144, "grad_norm": 0.21585969626903534, "learning_rate": 9.518369329507726e-05, "loss": 0.011, "step": 11030 }, { "epoch": 9.928057553956835, "grad_norm": 0.2993195950984955, "learning_rate": 9.51718855809492e-05, "loss": 0.0103, "step": 11040 }, { "epoch": 9.93705035971223, "grad_norm": 0.18671444058418274, "learning_rate": 9.516006414479502e-05, "loss": 0.0133, "step": 11050 }, { "epoch": 9.946043165467627, "grad_norm": 0.2929985821247101, "learning_rate": 9.514822899020572e-05, "loss": 0.0225, "step": 11060 }, { "epoch": 9.95503597122302, "grad_norm": 0.25441938638687134, "learning_rate": 9.513638012077654e-05, "loss": 0.0126, "step": 11070 }, { "epoch": 9.964028776978417, "grad_norm": 0.24907279014587402, "learning_rate": 9.512451754010683e-05, "loss": 0.0107, "step": 11080 }, { "epoch": 9.973021582733812, "grad_norm": 0.4231383204460144, "learning_rate": 9.511264125180013e-05, "loss": 0.0094, "step": 11090 }, { "epoch": 9.982014388489208, "grad_norm": 0.22354304790496826, "learning_rate": 9.510075125946414e-05, "loss": 0.0117, "step": 11100 }, { "epoch": 9.991007194244604, "grad_norm": 0.2596941292285919, "learning_rate": 9.508884756671075e-05, "loss": 0.0131, "step": 11110 }, { "epoch": 10.0, "grad_norm": 0.43386390805244446, "learning_rate": 9.507693017715596e-05, "loss": 0.0126, "step": 11120 }, { "epoch": 10.008992805755396, "grad_norm": 0.4065648913383484, "learning_rate": 9.506499909441997e-05, "loss": 0.0125, "step": 11130 }, { "epoch": 10.017985611510792, "grad_norm": 0.35652610659599304, "learning_rate": 9.505305432212713e-05, "loss": 0.0125, "step": 11140 }, { "epoch": 10.026978417266188, "grad_norm": 0.29576385021209717, "learning_rate": 9.504109586390595e-05, "loss": 0.0126, "step": 11150 }, { "epoch": 10.035971223021583, "grad_norm": 0.24569939076900482, "learning_rate": 9.502912372338908e-05, "loss": 0.0145, "step": 11160 }, { "epoch": 10.04496402877698, "grad_norm": 0.1832357943058014, "learning_rate": 9.501713790421335e-05, "loss": 0.0142, "step": 11170 }, { "epoch": 10.053956834532373, "grad_norm": 0.16974139213562012, "learning_rate": 9.500513841001974e-05, "loss": 0.0121, "step": 11180 }, { "epoch": 10.06294964028777, "grad_norm": 0.20808322727680206, "learning_rate": 9.499312524445336e-05, "loss": 0.0141, "step": 11190 }, { "epoch": 10.071942446043165, "grad_norm": 0.30492159724235535, "learning_rate": 9.498109841116351e-05, "loss": 0.0114, "step": 11200 }, { "epoch": 10.08093525179856, "grad_norm": 0.2499491423368454, "learning_rate": 9.496905791380363e-05, "loss": 0.0183, "step": 11210 }, { "epoch": 10.089928057553957, "grad_norm": 0.26465028524398804, "learning_rate": 9.495700375603129e-05, "loss": 0.0112, "step": 11220 }, { "epoch": 10.098920863309353, "grad_norm": 0.3018263578414917, "learning_rate": 9.494493594150822e-05, "loss": 0.0135, "step": 11230 }, { "epoch": 10.107913669064748, "grad_norm": 0.2151605635881424, "learning_rate": 9.493285447390032e-05, "loss": 0.0116, "step": 11240 }, { "epoch": 10.116906474820144, "grad_norm": 0.2307339608669281, "learning_rate": 9.492075935687761e-05, "loss": 0.0116, "step": 11250 }, { "epoch": 10.12589928057554, "grad_norm": 0.22900022566318512, "learning_rate": 9.490865059411427e-05, "loss": 0.011, "step": 11260 }, { "epoch": 10.134892086330936, "grad_norm": 0.2128586769104004, "learning_rate": 9.489652818928863e-05, "loss": 0.0135, "step": 11270 }, { "epoch": 10.14388489208633, "grad_norm": 0.23380453884601593, "learning_rate": 9.488439214608315e-05, "loss": 0.0211, "step": 11280 }, { "epoch": 10.152877697841726, "grad_norm": 0.3042012155056, "learning_rate": 9.487224246818444e-05, "loss": 0.01, "step": 11290 }, { "epoch": 10.161870503597122, "grad_norm": 0.2634100317955017, "learning_rate": 9.486007915928325e-05, "loss": 0.0093, "step": 11300 }, { "epoch": 10.170863309352518, "grad_norm": 0.2132919877767563, "learning_rate": 9.484790222307448e-05, "loss": 0.0111, "step": 11310 }, { "epoch": 10.179856115107913, "grad_norm": 0.2544447183609009, "learning_rate": 9.483571166325716e-05, "loss": 0.0136, "step": 11320 }, { "epoch": 10.18884892086331, "grad_norm": 0.5414437055587769, "learning_rate": 9.482350748353444e-05, "loss": 0.0141, "step": 11330 }, { "epoch": 10.197841726618705, "grad_norm": 0.3394736647605896, "learning_rate": 9.481128968761363e-05, "loss": 0.0118, "step": 11340 }, { "epoch": 10.206834532374101, "grad_norm": 0.26428869366645813, "learning_rate": 9.479905827920621e-05, "loss": 0.0113, "step": 11350 }, { "epoch": 10.215827338129497, "grad_norm": 0.39524462819099426, "learning_rate": 9.478681326202773e-05, "loss": 0.0129, "step": 11360 }, { "epoch": 10.224820143884893, "grad_norm": 0.18053320050239563, "learning_rate": 9.477455463979791e-05, "loss": 0.0107, "step": 11370 }, { "epoch": 10.233812949640289, "grad_norm": 0.29521289467811584, "learning_rate": 9.476228241624059e-05, "loss": 0.0185, "step": 11380 }, { "epoch": 10.242805755395683, "grad_norm": 0.29301753640174866, "learning_rate": 9.474999659508374e-05, "loss": 0.0107, "step": 11390 }, { "epoch": 10.251798561151078, "grad_norm": 0.17323842644691467, "learning_rate": 9.47376971800595e-05, "loss": 0.0079, "step": 11400 }, { "epoch": 10.260791366906474, "grad_norm": 0.2893944978713989, "learning_rate": 9.472538417490409e-05, "loss": 0.0143, "step": 11410 }, { "epoch": 10.26978417266187, "grad_norm": 0.35254666209220886, "learning_rate": 9.471305758335784e-05, "loss": 0.0129, "step": 11420 }, { "epoch": 10.278776978417266, "grad_norm": 0.3011517822742462, "learning_rate": 9.47007174091653e-05, "loss": 0.0178, "step": 11430 }, { "epoch": 10.287769784172662, "grad_norm": 0.33603015542030334, "learning_rate": 9.468836365607507e-05, "loss": 0.0153, "step": 11440 }, { "epoch": 10.296762589928058, "grad_norm": 0.3576168417930603, "learning_rate": 9.467599632783988e-05, "loss": 0.0161, "step": 11450 }, { "epoch": 10.305755395683454, "grad_norm": 0.22006964683532715, "learning_rate": 9.466361542821662e-05, "loss": 0.0151, "step": 11460 }, { "epoch": 10.31474820143885, "grad_norm": 0.2499951422214508, "learning_rate": 9.465122096096625e-05, "loss": 0.0102, "step": 11470 }, { "epoch": 10.323741007194245, "grad_norm": 0.284951776266098, "learning_rate": 9.463881292985391e-05, "loss": 0.0155, "step": 11480 }, { "epoch": 10.332733812949641, "grad_norm": 0.29241612553596497, "learning_rate": 9.462639133864881e-05, "loss": 0.0112, "step": 11490 }, { "epoch": 10.341726618705035, "grad_norm": 0.2576180398464203, "learning_rate": 9.461395619112432e-05, "loss": 0.0131, "step": 11500 }, { "epoch": 10.350719424460431, "grad_norm": 0.26346394419670105, "learning_rate": 9.460150749105791e-05, "loss": 0.0108, "step": 11510 }, { "epoch": 10.359712230215827, "grad_norm": 0.33523935079574585, "learning_rate": 9.458904524223116e-05, "loss": 0.0123, "step": 11520 }, { "epoch": 10.368705035971223, "grad_norm": 0.34431949257850647, "learning_rate": 9.457656944842976e-05, "loss": 0.0156, "step": 11530 }, { "epoch": 10.377697841726619, "grad_norm": 0.27424386143684387, "learning_rate": 9.456408011344353e-05, "loss": 0.0109, "step": 11540 }, { "epoch": 10.386690647482014, "grad_norm": 0.34835588932037354, "learning_rate": 9.455157724106643e-05, "loss": 0.0136, "step": 11550 }, { "epoch": 10.39568345323741, "grad_norm": 0.19564810395240784, "learning_rate": 9.453906083509647e-05, "loss": 0.015, "step": 11560 }, { "epoch": 10.404676258992806, "grad_norm": 0.2526419162750244, "learning_rate": 9.45265308993358e-05, "loss": 0.0104, "step": 11570 }, { "epoch": 10.413669064748202, "grad_norm": 0.30502068996429443, "learning_rate": 9.451398743759071e-05, "loss": 0.018, "step": 11580 }, { "epoch": 10.422661870503598, "grad_norm": 0.2093430757522583, "learning_rate": 9.450143045367156e-05, "loss": 0.0108, "step": 11590 }, { "epoch": 10.431654676258994, "grad_norm": 0.22534649074077606, "learning_rate": 9.448885995139283e-05, "loss": 0.0122, "step": 11600 }, { "epoch": 10.440647482014388, "grad_norm": 0.24782954156398773, "learning_rate": 9.44762759345731e-05, "loss": 0.0098, "step": 11610 }, { "epoch": 10.449640287769784, "grad_norm": 0.24284401535987854, "learning_rate": 9.446367840703509e-05, "loss": 0.0116, "step": 11620 }, { "epoch": 10.45863309352518, "grad_norm": 0.2705310881137848, "learning_rate": 9.445106737260556e-05, "loss": 0.0108, "step": 11630 }, { "epoch": 10.467625899280575, "grad_norm": 0.32981443405151367, "learning_rate": 9.443844283511543e-05, "loss": 0.0127, "step": 11640 }, { "epoch": 10.476618705035971, "grad_norm": 0.31409549713134766, "learning_rate": 9.442580479839968e-05, "loss": 0.0163, "step": 11650 }, { "epoch": 10.485611510791367, "grad_norm": 0.37936270236968994, "learning_rate": 9.441315326629745e-05, "loss": 0.0143, "step": 11660 }, { "epoch": 10.494604316546763, "grad_norm": 0.2747897803783417, "learning_rate": 9.44004882426519e-05, "loss": 0.0127, "step": 11670 }, { "epoch": 10.503597122302159, "grad_norm": 0.24813611805438995, "learning_rate": 9.438780973131037e-05, "loss": 0.012, "step": 11680 }, { "epoch": 10.512589928057555, "grad_norm": 0.27431565523147583, "learning_rate": 9.437511773612423e-05, "loss": 0.0156, "step": 11690 }, { "epoch": 10.52158273381295, "grad_norm": 0.23535150289535522, "learning_rate": 9.436241226094896e-05, "loss": 0.0186, "step": 11700 }, { "epoch": 10.530575539568346, "grad_norm": 0.16883379220962524, "learning_rate": 9.434969330964418e-05, "loss": 0.0122, "step": 11710 }, { "epoch": 10.53956834532374, "grad_norm": 0.16631333529949188, "learning_rate": 9.433696088607356e-05, "loss": 0.0123, "step": 11720 }, { "epoch": 10.548561151079136, "grad_norm": 0.20950153470039368, "learning_rate": 9.432421499410486e-05, "loss": 0.0125, "step": 11730 }, { "epoch": 10.557553956834532, "grad_norm": 0.19244195520877838, "learning_rate": 9.431145563760998e-05, "loss": 0.0117, "step": 11740 }, { "epoch": 10.566546762589928, "grad_norm": 0.251476913690567, "learning_rate": 9.429868282046484e-05, "loss": 0.0155, "step": 11750 }, { "epoch": 10.575539568345324, "grad_norm": 0.22861024737358093, "learning_rate": 9.428589654654951e-05, "loss": 0.0177, "step": 11760 }, { "epoch": 10.58453237410072, "grad_norm": 0.2843776047229767, "learning_rate": 9.42730968197481e-05, "loss": 0.011, "step": 11770 }, { "epoch": 10.593525179856115, "grad_norm": 0.371252179145813, "learning_rate": 9.426028364394883e-05, "loss": 0.0109, "step": 11780 }, { "epoch": 10.602517985611511, "grad_norm": 0.26733922958374023, "learning_rate": 9.424745702304402e-05, "loss": 0.0105, "step": 11790 }, { "epoch": 10.611510791366907, "grad_norm": 0.2177855670452118, "learning_rate": 9.423461696093006e-05, "loss": 0.0159, "step": 11800 }, { "epoch": 10.620503597122303, "grad_norm": 0.24670732021331787, "learning_rate": 9.422176346150741e-05, "loss": 0.0114, "step": 11810 }, { "epoch": 10.629496402877697, "grad_norm": 0.28919199109077454, "learning_rate": 9.420889652868063e-05, "loss": 0.0129, "step": 11820 }, { "epoch": 10.638489208633093, "grad_norm": 0.20299310982227325, "learning_rate": 9.419601616635836e-05, "loss": 0.0113, "step": 11830 }, { "epoch": 10.647482014388489, "grad_norm": 0.21988487243652344, "learning_rate": 9.418312237845331e-05, "loss": 0.0127, "step": 11840 }, { "epoch": 10.656474820143885, "grad_norm": 0.3286900222301483, "learning_rate": 9.417021516888225e-05, "loss": 0.0113, "step": 11850 }, { "epoch": 10.66546762589928, "grad_norm": 0.35677340626716614, "learning_rate": 9.415729454156608e-05, "loss": 0.0111, "step": 11860 }, { "epoch": 10.674460431654676, "grad_norm": 0.21054509282112122, "learning_rate": 9.414436050042973e-05, "loss": 0.0129, "step": 11870 }, { "epoch": 10.683453237410072, "grad_norm": 0.2161281406879425, "learning_rate": 9.413141304940223e-05, "loss": 0.0104, "step": 11880 }, { "epoch": 10.692446043165468, "grad_norm": 0.19273941218852997, "learning_rate": 9.411845219241666e-05, "loss": 0.0097, "step": 11890 }, { "epoch": 10.701438848920864, "grad_norm": 0.2533096373081207, "learning_rate": 9.410547793341021e-05, "loss": 0.0111, "step": 11900 }, { "epoch": 10.71043165467626, "grad_norm": 0.1987481564283371, "learning_rate": 9.409249027632408e-05, "loss": 0.0107, "step": 11910 }, { "epoch": 10.719424460431654, "grad_norm": 0.231144517660141, "learning_rate": 9.407948922510362e-05, "loss": 0.0109, "step": 11920 }, { "epoch": 10.72841726618705, "grad_norm": 0.189524844288826, "learning_rate": 9.406647478369817e-05, "loss": 0.0088, "step": 11930 }, { "epoch": 10.737410071942445, "grad_norm": 0.31613436341285706, "learning_rate": 9.405344695606118e-05, "loss": 0.0111, "step": 11940 }, { "epoch": 10.746402877697841, "grad_norm": 0.25611191987991333, "learning_rate": 9.404040574615018e-05, "loss": 0.0109, "step": 11950 }, { "epoch": 10.755395683453237, "grad_norm": 0.3949185013771057, "learning_rate": 9.402735115792674e-05, "loss": 0.0132, "step": 11960 }, { "epoch": 10.764388489208633, "grad_norm": 0.2318660318851471, "learning_rate": 9.401428319535649e-05, "loss": 0.0107, "step": 11970 }, { "epoch": 10.773381294964029, "grad_norm": 0.2407703548669815, "learning_rate": 9.400120186240912e-05, "loss": 0.0135, "step": 11980 }, { "epoch": 10.782374100719425, "grad_norm": 0.19971679151058197, "learning_rate": 9.398810716305844e-05, "loss": 0.0099, "step": 11990 }, { "epoch": 10.79136690647482, "grad_norm": 0.29861655831336975, "learning_rate": 9.397499910128222e-05, "loss": 0.0118, "step": 12000 }, { "epoch": 10.800359712230216, "grad_norm": 0.22986899316310883, "learning_rate": 9.396187768106237e-05, "loss": 0.0087, "step": 12010 }, { "epoch": 10.809352517985612, "grad_norm": 0.2802552282810211, "learning_rate": 9.394874290638482e-05, "loss": 0.0124, "step": 12020 }, { "epoch": 10.818345323741006, "grad_norm": 0.34848418831825256, "learning_rate": 9.393559478123959e-05, "loss": 0.0139, "step": 12030 }, { "epoch": 10.827338129496402, "grad_norm": 0.2801781892776489, "learning_rate": 9.39224333096207e-05, "loss": 0.0118, "step": 12040 }, { "epoch": 10.836330935251798, "grad_norm": 0.32038965821266174, "learning_rate": 9.390925849552629e-05, "loss": 0.0105, "step": 12050 }, { "epoch": 10.845323741007194, "grad_norm": 0.2585030198097229, "learning_rate": 9.389607034295849e-05, "loss": 0.0101, "step": 12060 }, { "epoch": 10.85431654676259, "grad_norm": 0.1850133091211319, "learning_rate": 9.388286885592355e-05, "loss": 0.015, "step": 12070 }, { "epoch": 10.863309352517986, "grad_norm": 0.17078320682048798, "learning_rate": 9.386965403843168e-05, "loss": 0.0113, "step": 12080 }, { "epoch": 10.872302158273381, "grad_norm": 0.2015753537416458, "learning_rate": 9.385642589449726e-05, "loss": 0.0098, "step": 12090 }, { "epoch": 10.881294964028777, "grad_norm": 0.23777903616428375, "learning_rate": 9.38431844281386e-05, "loss": 0.0088, "step": 12100 }, { "epoch": 10.890287769784173, "grad_norm": 0.25391480326652527, "learning_rate": 9.38299296433781e-05, "loss": 0.0105, "step": 12110 }, { "epoch": 10.899280575539569, "grad_norm": 0.14192743599414825, "learning_rate": 9.381666154424226e-05, "loss": 0.0111, "step": 12120 }, { "epoch": 10.908273381294965, "grad_norm": 0.3183654248714447, "learning_rate": 9.380338013476157e-05, "loss": 0.0102, "step": 12130 }, { "epoch": 10.917266187050359, "grad_norm": 0.315866619348526, "learning_rate": 9.379008541897054e-05, "loss": 0.0124, "step": 12140 }, { "epoch": 10.926258992805755, "grad_norm": 0.1814686805009842, "learning_rate": 9.377677740090777e-05, "loss": 0.014, "step": 12150 }, { "epoch": 10.93525179856115, "grad_norm": 0.22197580337524414, "learning_rate": 9.376345608461588e-05, "loss": 0.0113, "step": 12160 }, { "epoch": 10.944244604316546, "grad_norm": 0.20767511427402496, "learning_rate": 9.375012147414155e-05, "loss": 0.01, "step": 12170 }, { "epoch": 10.953237410071942, "grad_norm": 0.24444420635700226, "learning_rate": 9.373677357353545e-05, "loss": 0.011, "step": 12180 }, { "epoch": 10.962230215827338, "grad_norm": 0.17644494771957397, "learning_rate": 9.372341238685237e-05, "loss": 0.0082, "step": 12190 }, { "epoch": 10.971223021582734, "grad_norm": 0.29753535985946655, "learning_rate": 9.371003791815102e-05, "loss": 0.0097, "step": 12200 }, { "epoch": 10.98021582733813, "grad_norm": 0.3099006414413452, "learning_rate": 9.369665017149429e-05, "loss": 0.0085, "step": 12210 }, { "epoch": 10.989208633093526, "grad_norm": 0.1122778132557869, "learning_rate": 9.368324915094895e-05, "loss": 0.0084, "step": 12220 }, { "epoch": 10.998201438848922, "grad_norm": 0.18985335528850555, "learning_rate": 9.366983486058591e-05, "loss": 0.0117, "step": 12230 }, { "epoch": 11.007194244604317, "grad_norm": 0.29135796427726746, "learning_rate": 9.365640730448009e-05, "loss": 0.0125, "step": 12240 }, { "epoch": 11.016187050359711, "grad_norm": 0.3251189887523651, "learning_rate": 9.36429664867104e-05, "loss": 0.0127, "step": 12250 }, { "epoch": 11.025179856115107, "grad_norm": 0.3399899899959564, "learning_rate": 9.362951241135982e-05, "loss": 0.0126, "step": 12260 }, { "epoch": 11.034172661870503, "grad_norm": 0.3336639404296875, "learning_rate": 9.361604508251534e-05, "loss": 0.0111, "step": 12270 }, { "epoch": 11.043165467625899, "grad_norm": 0.23579061031341553, "learning_rate": 9.360256450426799e-05, "loss": 0.0107, "step": 12280 }, { "epoch": 11.052158273381295, "grad_norm": 0.2653978168964386, "learning_rate": 9.358907068071279e-05, "loss": 0.0101, "step": 12290 }, { "epoch": 11.06115107913669, "grad_norm": 0.22985270619392395, "learning_rate": 9.357556361594882e-05, "loss": 0.0096, "step": 12300 }, { "epoch": 11.070143884892087, "grad_norm": 0.211660698056221, "learning_rate": 9.356204331407917e-05, "loss": 0.0121, "step": 12310 }, { "epoch": 11.079136690647482, "grad_norm": 0.17352068424224854, "learning_rate": 9.354850977921094e-05, "loss": 0.0147, "step": 12320 }, { "epoch": 11.088129496402878, "grad_norm": 0.16236570477485657, "learning_rate": 9.353496301545529e-05, "loss": 0.0116, "step": 12330 }, { "epoch": 11.097122302158274, "grad_norm": 0.25066229701042175, "learning_rate": 9.352140302692733e-05, "loss": 0.0143, "step": 12340 }, { "epoch": 11.10611510791367, "grad_norm": 0.3426693379878998, "learning_rate": 9.350782981774627e-05, "loss": 0.0123, "step": 12350 }, { "epoch": 11.115107913669064, "grad_norm": 0.24417756497859955, "learning_rate": 9.349424339203526e-05, "loss": 0.012, "step": 12360 }, { "epoch": 11.12410071942446, "grad_norm": 0.31147927045822144, "learning_rate": 9.34806437539215e-05, "loss": 0.0135, "step": 12370 }, { "epoch": 11.133093525179856, "grad_norm": 0.22756804525852203, "learning_rate": 9.346703090753622e-05, "loss": 0.015, "step": 12380 }, { "epoch": 11.142086330935252, "grad_norm": 0.37909024953842163, "learning_rate": 9.345340485701461e-05, "loss": 0.014, "step": 12390 }, { "epoch": 11.151079136690647, "grad_norm": 0.2645686864852905, "learning_rate": 9.343976560649595e-05, "loss": 0.0111, "step": 12400 }, { "epoch": 11.160071942446043, "grad_norm": 0.18217377364635468, "learning_rate": 9.342611316012344e-05, "loss": 0.0112, "step": 12410 }, { "epoch": 11.16906474820144, "grad_norm": 0.3018819987773895, "learning_rate": 9.341244752204437e-05, "loss": 0.0127, "step": 12420 }, { "epoch": 11.178057553956835, "grad_norm": 0.2810276746749878, "learning_rate": 9.339876869640995e-05, "loss": 0.0105, "step": 12430 }, { "epoch": 11.18705035971223, "grad_norm": 0.3484347462654114, "learning_rate": 9.33850766873755e-05, "loss": 0.013, "step": 12440 }, { "epoch": 11.196043165467627, "grad_norm": 0.3491142988204956, "learning_rate": 9.337137149910028e-05, "loss": 0.0121, "step": 12450 }, { "epoch": 11.20503597122302, "grad_norm": 0.28387948870658875, "learning_rate": 9.335765313574753e-05, "loss": 0.0096, "step": 12460 }, { "epoch": 11.214028776978417, "grad_norm": 0.2699604630470276, "learning_rate": 9.334392160148457e-05, "loss": 0.0108, "step": 12470 }, { "epoch": 11.223021582733812, "grad_norm": 0.28079381585121155, "learning_rate": 9.333017690048264e-05, "loss": 0.0116, "step": 12480 }, { "epoch": 11.232014388489208, "grad_norm": 0.21289914846420288, "learning_rate": 9.331641903691706e-05, "loss": 0.0106, "step": 12490 }, { "epoch": 11.241007194244604, "grad_norm": 0.24884949624538422, "learning_rate": 9.330264801496707e-05, "loss": 0.0175, "step": 12500 }, { "epoch": 11.25, "grad_norm": 0.3031194508075714, "learning_rate": 9.328886383881594e-05, "loss": 0.0118, "step": 12510 }, { "epoch": 11.258992805755396, "grad_norm": 0.19542022049427032, "learning_rate": 9.327506651265095e-05, "loss": 0.0165, "step": 12520 }, { "epoch": 11.267985611510792, "grad_norm": 0.229074165225029, "learning_rate": 9.326125604066338e-05, "loss": 0.0144, "step": 12530 }, { "epoch": 11.276978417266188, "grad_norm": 0.2973995804786682, "learning_rate": 9.324743242704847e-05, "loss": 0.012, "step": 12540 }, { "epoch": 11.285971223021583, "grad_norm": 0.2860524654388428, "learning_rate": 9.323359567600546e-05, "loss": 0.0136, "step": 12550 }, { "epoch": 11.29496402877698, "grad_norm": 0.286653608083725, "learning_rate": 9.321974579173761e-05, "loss": 0.0108, "step": 12560 }, { "epoch": 11.303956834532373, "grad_norm": 0.23838846385478973, "learning_rate": 9.320588277845213e-05, "loss": 0.0176, "step": 12570 }, { "epoch": 11.31294964028777, "grad_norm": 0.19658106565475464, "learning_rate": 9.319200664036026e-05, "loss": 0.0114, "step": 12580 }, { "epoch": 11.321942446043165, "grad_norm": 0.22345292568206787, "learning_rate": 9.31781173816772e-05, "loss": 0.0109, "step": 12590 }, { "epoch": 11.33093525179856, "grad_norm": 0.1615925431251526, "learning_rate": 9.316421500662212e-05, "loss": 0.013, "step": 12600 }, { "epoch": 11.339928057553957, "grad_norm": 0.167790025472641, "learning_rate": 9.31502995194182e-05, "loss": 0.0106, "step": 12610 }, { "epoch": 11.348920863309353, "grad_norm": 0.24855217337608337, "learning_rate": 9.31363709242926e-05, "loss": 0.0117, "step": 12620 }, { "epoch": 11.357913669064748, "grad_norm": 0.24448968470096588, "learning_rate": 9.312242922547647e-05, "loss": 0.009, "step": 12630 }, { "epoch": 11.366906474820144, "grad_norm": 0.33086052536964417, "learning_rate": 9.310847442720492e-05, "loss": 0.0116, "step": 12640 }, { "epoch": 11.37589928057554, "grad_norm": 0.3119520843029022, "learning_rate": 9.309450653371706e-05, "loss": 0.0114, "step": 12650 }, { "epoch": 11.384892086330936, "grad_norm": 0.26332196593284607, "learning_rate": 9.308052554925595e-05, "loss": 0.0102, "step": 12660 }, { "epoch": 11.39388489208633, "grad_norm": 0.22860994935035706, "learning_rate": 9.306653147806867e-05, "loss": 0.0139, "step": 12670 }, { "epoch": 11.402877697841726, "grad_norm": 0.2101936936378479, "learning_rate": 9.305252432440622e-05, "loss": 0.0099, "step": 12680 }, { "epoch": 11.411870503597122, "grad_norm": 0.16255390644073486, "learning_rate": 9.303850409252361e-05, "loss": 0.0099, "step": 12690 }, { "epoch": 11.420863309352518, "grad_norm": 0.2157665193080902, "learning_rate": 9.302447078667985e-05, "loss": 0.0083, "step": 12700 }, { "epoch": 11.429856115107913, "grad_norm": 0.33020299673080444, "learning_rate": 9.301042441113783e-05, "loss": 0.0101, "step": 12710 }, { "epoch": 11.43884892086331, "grad_norm": 0.2113770693540573, "learning_rate": 9.299636497016451e-05, "loss": 0.0127, "step": 12720 }, { "epoch": 11.447841726618705, "grad_norm": 0.25366657972335815, "learning_rate": 9.298229246803076e-05, "loss": 0.0115, "step": 12730 }, { "epoch": 11.456834532374101, "grad_norm": 0.27692487835884094, "learning_rate": 9.296820690901144e-05, "loss": 0.0117, "step": 12740 }, { "epoch": 11.465827338129497, "grad_norm": 0.3290925621986389, "learning_rate": 9.295410829738539e-05, "loss": 0.0117, "step": 12750 }, { "epoch": 11.474820143884893, "grad_norm": 0.19961003959178925, "learning_rate": 9.293999663743535e-05, "loss": 0.0108, "step": 12760 }, { "epoch": 11.483812949640289, "grad_norm": 0.24511009454727173, "learning_rate": 9.292587193344813e-05, "loss": 0.0155, "step": 12770 }, { "epoch": 11.492805755395683, "grad_norm": 0.33234649896621704, "learning_rate": 9.291173418971437e-05, "loss": 0.0121, "step": 12780 }, { "epoch": 11.501798561151078, "grad_norm": 0.2192455679178238, "learning_rate": 9.28975834105288e-05, "loss": 0.0125, "step": 12790 }, { "epoch": 11.510791366906474, "grad_norm": 0.2737628221511841, "learning_rate": 9.288341960019004e-05, "loss": 0.0128, "step": 12800 }, { "epoch": 11.51978417266187, "grad_norm": 0.2267358899116516, "learning_rate": 9.286924276300067e-05, "loss": 0.014, "step": 12810 }, { "epoch": 11.528776978417266, "grad_norm": 0.21181468665599823, "learning_rate": 9.285505290326726e-05, "loss": 0.0111, "step": 12820 }, { "epoch": 11.537769784172662, "grad_norm": 0.22628723084926605, "learning_rate": 9.284085002530027e-05, "loss": 0.0091, "step": 12830 }, { "epoch": 11.546762589928058, "grad_norm": 0.2200169861316681, "learning_rate": 9.282663413341422e-05, "loss": 0.0135, "step": 12840 }, { "epoch": 11.555755395683454, "grad_norm": 0.19303393363952637, "learning_rate": 9.281240523192747e-05, "loss": 0.0096, "step": 12850 }, { "epoch": 11.56474820143885, "grad_norm": 0.24148491024971008, "learning_rate": 9.279816332516242e-05, "loss": 0.0104, "step": 12860 }, { "epoch": 11.573741007194245, "grad_norm": 0.20118117332458496, "learning_rate": 9.278390841744536e-05, "loss": 0.0125, "step": 12870 }, { "epoch": 11.582733812949641, "grad_norm": 0.22388987243175507, "learning_rate": 9.276964051310658e-05, "loss": 0.0114, "step": 12880 }, { "epoch": 11.591726618705035, "grad_norm": 0.20980286598205566, "learning_rate": 9.275535961648027e-05, "loss": 0.0089, "step": 12890 }, { "epoch": 11.600719424460431, "grad_norm": 0.22444649040699005, "learning_rate": 9.274106573190459e-05, "loss": 0.01, "step": 12900 }, { "epoch": 11.609712230215827, "grad_norm": 0.31055182218551636, "learning_rate": 9.272675886372168e-05, "loss": 0.0084, "step": 12910 }, { "epoch": 11.618705035971223, "grad_norm": 0.2704773545265198, "learning_rate": 9.271243901627754e-05, "loss": 0.012, "step": 12920 }, { "epoch": 11.627697841726619, "grad_norm": 0.2735306918621063, "learning_rate": 9.269810619392219e-05, "loss": 0.011, "step": 12930 }, { "epoch": 11.636690647482014, "grad_norm": 0.30948126316070557, "learning_rate": 9.268376040100955e-05, "loss": 0.0159, "step": 12940 }, { "epoch": 11.64568345323741, "grad_norm": 0.24070248007774353, "learning_rate": 9.266940164189752e-05, "loss": 0.0098, "step": 12950 }, { "epoch": 11.654676258992806, "grad_norm": 0.3356586694717407, "learning_rate": 9.265502992094787e-05, "loss": 0.0109, "step": 12960 }, { "epoch": 11.663669064748202, "grad_norm": 0.20583730936050415, "learning_rate": 9.264064524252638e-05, "loss": 0.0092, "step": 12970 }, { "epoch": 11.672661870503598, "grad_norm": 0.2096441388130188, "learning_rate": 9.262624761100271e-05, "loss": 0.0135, "step": 12980 }, { "epoch": 11.681654676258994, "grad_norm": 0.25416824221611023, "learning_rate": 9.261183703075051e-05, "loss": 0.0116, "step": 12990 }, { "epoch": 11.690647482014388, "grad_norm": 0.4353386163711548, "learning_rate": 9.259741350614733e-05, "loss": 0.0118, "step": 13000 }, { "epoch": 11.699640287769784, "grad_norm": 0.3577336370944977, "learning_rate": 9.258297704157464e-05, "loss": 0.0116, "step": 13010 }, { "epoch": 11.70863309352518, "grad_norm": 0.22919116914272308, "learning_rate": 9.256852764141786e-05, "loss": 0.014, "step": 13020 }, { "epoch": 11.717625899280575, "grad_norm": 0.22853630781173706, "learning_rate": 9.255406531006634e-05, "loss": 0.0097, "step": 13030 }, { "epoch": 11.726618705035971, "grad_norm": 0.3798763155937195, "learning_rate": 9.253959005191335e-05, "loss": 0.0163, "step": 13040 }, { "epoch": 11.735611510791367, "grad_norm": 0.2964405119419098, "learning_rate": 9.25251018713561e-05, "loss": 0.0148, "step": 13050 }, { "epoch": 11.744604316546763, "grad_norm": 0.22174900770187378, "learning_rate": 9.251060077279571e-05, "loss": 0.0097, "step": 13060 }, { "epoch": 11.753597122302159, "grad_norm": 0.21656674146652222, "learning_rate": 9.249608676063724e-05, "loss": 0.0113, "step": 13070 }, { "epoch": 11.762589928057555, "grad_norm": 0.20484080910682678, "learning_rate": 9.248155983928964e-05, "loss": 0.0137, "step": 13080 }, { "epoch": 11.77158273381295, "grad_norm": 0.3100835084915161, "learning_rate": 9.246702001316583e-05, "loss": 0.0116, "step": 13090 }, { "epoch": 11.780575539568346, "grad_norm": 0.3022490441799164, "learning_rate": 9.245246728668262e-05, "loss": 0.0128, "step": 13100 }, { "epoch": 11.78956834532374, "grad_norm": 0.36203402280807495, "learning_rate": 9.243790166426073e-05, "loss": 0.0139, "step": 13110 }, { "epoch": 11.798561151079136, "grad_norm": 0.22752994298934937, "learning_rate": 9.242332315032484e-05, "loss": 0.0103, "step": 13120 }, { "epoch": 11.807553956834532, "grad_norm": 0.25339561700820923, "learning_rate": 9.240873174930349e-05, "loss": 0.0163, "step": 13130 }, { "epoch": 11.816546762589928, "grad_norm": 0.3081503212451935, "learning_rate": 9.239412746562917e-05, "loss": 0.0171, "step": 13140 }, { "epoch": 11.825539568345324, "grad_norm": 0.19274716079235077, "learning_rate": 9.237951030373828e-05, "loss": 0.0192, "step": 13150 }, { "epoch": 11.83453237410072, "grad_norm": 0.33813968300819397, "learning_rate": 9.236488026807113e-05, "loss": 0.0178, "step": 13160 }, { "epoch": 11.843525179856115, "grad_norm": 0.29009801149368286, "learning_rate": 9.235023736307193e-05, "loss": 0.012, "step": 13170 }, { "epoch": 11.852517985611511, "grad_norm": 0.4432103633880615, "learning_rate": 9.233558159318881e-05, "loss": 0.0123, "step": 13180 }, { "epoch": 11.861510791366907, "grad_norm": 0.3403881788253784, "learning_rate": 9.232091296287382e-05, "loss": 0.0105, "step": 13190 }, { "epoch": 11.870503597122303, "grad_norm": 0.1895306259393692, "learning_rate": 9.230623147658288e-05, "loss": 0.0116, "step": 13200 }, { "epoch": 11.879496402877697, "grad_norm": 0.12024836987257004, "learning_rate": 9.229153713877586e-05, "loss": 0.0091, "step": 13210 }, { "epoch": 11.888489208633093, "grad_norm": 0.2791457772254944, "learning_rate": 9.227682995391649e-05, "loss": 0.0116, "step": 13220 }, { "epoch": 11.897482014388489, "grad_norm": 0.2697121500968933, "learning_rate": 9.226210992647243e-05, "loss": 0.013, "step": 13230 }, { "epoch": 11.906474820143885, "grad_norm": 0.35669729113578796, "learning_rate": 9.224737706091525e-05, "loss": 0.0097, "step": 13240 }, { "epoch": 11.91546762589928, "grad_norm": 0.17821015417575836, "learning_rate": 9.223263136172039e-05, "loss": 0.0122, "step": 13250 }, { "epoch": 11.924460431654676, "grad_norm": 0.273267924785614, "learning_rate": 9.22178728333672e-05, "loss": 0.0152, "step": 13260 }, { "epoch": 11.933453237410072, "grad_norm": 0.3050313889980316, "learning_rate": 9.220310148033897e-05, "loss": 0.0131, "step": 13270 }, { "epoch": 11.942446043165468, "grad_norm": 0.24271784722805023, "learning_rate": 9.21883173071228e-05, "loss": 0.012, "step": 13280 }, { "epoch": 11.951438848920864, "grad_norm": 0.21442359685897827, "learning_rate": 9.217352031820976e-05, "loss": 0.0083, "step": 13290 }, { "epoch": 11.96043165467626, "grad_norm": 0.2601500451564789, "learning_rate": 9.215871051809477e-05, "loss": 0.0109, "step": 13300 }, { "epoch": 11.969424460431654, "grad_norm": 0.36562857031822205, "learning_rate": 9.214388791127666e-05, "loss": 0.0118, "step": 13310 }, { "epoch": 11.97841726618705, "grad_norm": 0.24409112334251404, "learning_rate": 9.212905250225814e-05, "loss": 0.011, "step": 13320 }, { "epoch": 11.987410071942445, "grad_norm": 0.2648206949234009, "learning_rate": 9.211420429554583e-05, "loss": 0.0103, "step": 13330 }, { "epoch": 11.996402877697841, "grad_norm": 0.24376776814460754, "learning_rate": 9.209934329565022e-05, "loss": 0.0094, "step": 13340 }, { "epoch": 12.005395683453237, "grad_norm": 0.2743682861328125, "learning_rate": 9.208446950708568e-05, "loss": 0.0145, "step": 13350 }, { "epoch": 12.014388489208633, "grad_norm": 0.21054396033287048, "learning_rate": 9.20695829343705e-05, "loss": 0.0105, "step": 13360 }, { "epoch": 12.023381294964029, "grad_norm": 0.1729123592376709, "learning_rate": 9.205468358202678e-05, "loss": 0.0089, "step": 13370 }, { "epoch": 12.032374100719425, "grad_norm": 0.2703857719898224, "learning_rate": 9.203977145458059e-05, "loss": 0.0137, "step": 13380 }, { "epoch": 12.04136690647482, "grad_norm": 0.25312456488609314, "learning_rate": 9.202484655656182e-05, "loss": 0.011, "step": 13390 }, { "epoch": 12.050359712230216, "grad_norm": 0.27564093470573425, "learning_rate": 9.200990889250427e-05, "loss": 0.0124, "step": 13400 }, { "epoch": 12.059352517985612, "grad_norm": 0.27306339144706726, "learning_rate": 9.19949584669456e-05, "loss": 0.0098, "step": 13410 }, { "epoch": 12.068345323741006, "grad_norm": 0.41827744245529175, "learning_rate": 9.197999528442738e-05, "loss": 0.0144, "step": 13420 }, { "epoch": 12.077338129496402, "grad_norm": 0.18380193412303925, "learning_rate": 9.196501934949499e-05, "loss": 0.0117, "step": 13430 }, { "epoch": 12.086330935251798, "grad_norm": 0.2917230427265167, "learning_rate": 9.195003066669776e-05, "loss": 0.0117, "step": 13440 }, { "epoch": 12.095323741007194, "grad_norm": 0.23030340671539307, "learning_rate": 9.193502924058884e-05, "loss": 0.0123, "step": 13450 }, { "epoch": 12.10431654676259, "grad_norm": 0.2477351576089859, "learning_rate": 9.192001507572526e-05, "loss": 0.0132, "step": 13460 }, { "epoch": 12.113309352517986, "grad_norm": 0.3518655300140381, "learning_rate": 9.190498817666793e-05, "loss": 0.0144, "step": 13470 }, { "epoch": 12.122302158273381, "grad_norm": 0.24782630801200867, "learning_rate": 9.188994854798163e-05, "loss": 0.0132, "step": 13480 }, { "epoch": 12.131294964028777, "grad_norm": 0.1617719978094101, "learning_rate": 9.187489619423499e-05, "loss": 0.0107, "step": 13490 }, { "epoch": 12.140287769784173, "grad_norm": 0.231026753783226, "learning_rate": 9.185983112000056e-05, "loss": 0.0089, "step": 13500 }, { "epoch": 12.149280575539569, "grad_norm": 0.20609772205352783, "learning_rate": 9.184475332985464e-05, "loss": 0.0077, "step": 13510 }, { "epoch": 12.158273381294965, "grad_norm": 0.2774205207824707, "learning_rate": 9.182966282837754e-05, "loss": 0.0113, "step": 13520 }, { "epoch": 12.167266187050359, "grad_norm": 0.2027207612991333, "learning_rate": 9.18145596201533e-05, "loss": 0.0072, "step": 13530 }, { "epoch": 12.176258992805755, "grad_norm": 0.16503570973873138, "learning_rate": 9.179944370976991e-05, "loss": 0.0085, "step": 13540 }, { "epoch": 12.18525179856115, "grad_norm": 0.20800024271011353, "learning_rate": 9.178431510181918e-05, "loss": 0.008, "step": 13550 }, { "epoch": 12.194244604316546, "grad_norm": 0.2012498527765274, "learning_rate": 9.176917380089675e-05, "loss": 0.0114, "step": 13560 }, { "epoch": 12.203237410071942, "grad_norm": 0.3094259202480316, "learning_rate": 9.175401981160219e-05, "loss": 0.0118, "step": 13570 }, { "epoch": 12.212230215827338, "grad_norm": 0.22109389305114746, "learning_rate": 9.173885313853885e-05, "loss": 0.0101, "step": 13580 }, { "epoch": 12.221223021582734, "grad_norm": 0.298629492521286, "learning_rate": 9.172367378631398e-05, "loss": 0.0141, "step": 13590 }, { "epoch": 12.23021582733813, "grad_norm": 0.22415338456630707, "learning_rate": 9.170848175953866e-05, "loss": 0.0158, "step": 13600 }, { "epoch": 12.239208633093526, "grad_norm": 0.1911367028951645, "learning_rate": 9.169327706282784e-05, "loss": 0.0097, "step": 13610 }, { "epoch": 12.248201438848922, "grad_norm": 0.27094152569770813, "learning_rate": 9.167805970080029e-05, "loss": 0.0083, "step": 13620 }, { "epoch": 12.257194244604317, "grad_norm": 0.20860126614570618, "learning_rate": 9.166282967807864e-05, "loss": 0.0105, "step": 13630 }, { "epoch": 12.266187050359711, "grad_norm": 0.21129123866558075, "learning_rate": 9.16475869992894e-05, "loss": 0.0075, "step": 13640 }, { "epoch": 12.275179856115107, "grad_norm": 0.1886698305606842, "learning_rate": 9.163233166906284e-05, "loss": 0.008, "step": 13650 }, { "epoch": 12.284172661870503, "grad_norm": 0.2760466933250427, "learning_rate": 9.161706369203317e-05, "loss": 0.0103, "step": 13660 }, { "epoch": 12.293165467625899, "grad_norm": 0.19162878394126892, "learning_rate": 9.16017830728384e-05, "loss": 0.0074, "step": 13670 }, { "epoch": 12.302158273381295, "grad_norm": 0.271524041891098, "learning_rate": 9.158648981612035e-05, "loss": 0.011, "step": 13680 }, { "epoch": 12.31115107913669, "grad_norm": 0.2698982059955597, "learning_rate": 9.157118392652472e-05, "loss": 0.0094, "step": 13690 }, { "epoch": 12.320143884892087, "grad_norm": 0.2710948884487152, "learning_rate": 9.155586540870104e-05, "loss": 0.0082, "step": 13700 }, { "epoch": 12.329136690647482, "grad_norm": 0.3507538437843323, "learning_rate": 9.154053426730267e-05, "loss": 0.0117, "step": 13710 }, { "epoch": 12.338129496402878, "grad_norm": 0.21325108408927917, "learning_rate": 9.15251905069868e-05, "loss": 0.0132, "step": 13720 }, { "epoch": 12.347122302158274, "grad_norm": 0.1651911586523056, "learning_rate": 9.150983413241446e-05, "loss": 0.0099, "step": 13730 }, { "epoch": 12.35611510791367, "grad_norm": 0.22695432603359222, "learning_rate": 9.149446514825051e-05, "loss": 0.014, "step": 13740 }, { "epoch": 12.365107913669064, "grad_norm": 0.41052332520484924, "learning_rate": 9.147908355916365e-05, "loss": 0.0133, "step": 13750 }, { "epoch": 12.37410071942446, "grad_norm": 0.22086571156978607, "learning_rate": 9.146368936982642e-05, "loss": 0.0145, "step": 13760 }, { "epoch": 12.383093525179856, "grad_norm": 0.2676173448562622, "learning_rate": 9.144828258491511e-05, "loss": 0.0119, "step": 13770 }, { "epoch": 12.392086330935252, "grad_norm": 0.19554154574871063, "learning_rate": 9.143286320910996e-05, "loss": 0.0113, "step": 13780 }, { "epoch": 12.401079136690647, "grad_norm": 0.24883292615413666, "learning_rate": 9.141743124709491e-05, "loss": 0.0113, "step": 13790 }, { "epoch": 12.410071942446043, "grad_norm": 0.236687570810318, "learning_rate": 9.140198670355784e-05, "loss": 0.0121, "step": 13800 }, { "epoch": 12.41906474820144, "grad_norm": 0.25778916478157043, "learning_rate": 9.138652958319034e-05, "loss": 0.0142, "step": 13810 }, { "epoch": 12.428057553956835, "grad_norm": 0.2465975433588028, "learning_rate": 9.137105989068791e-05, "loss": 0.0105, "step": 13820 }, { "epoch": 12.43705035971223, "grad_norm": 0.3302468955516815, "learning_rate": 9.135557763074983e-05, "loss": 0.02, "step": 13830 }, { "epoch": 12.446043165467627, "grad_norm": 0.30142948031425476, "learning_rate": 9.13400828080792e-05, "loss": 0.0137, "step": 13840 }, { "epoch": 12.45503597122302, "grad_norm": 0.27832353115081787, "learning_rate": 9.132457542738292e-05, "loss": 0.0105, "step": 13850 }, { "epoch": 12.464028776978417, "grad_norm": 0.17222201824188232, "learning_rate": 9.130905549337174e-05, "loss": 0.0093, "step": 13860 }, { "epoch": 12.473021582733812, "grad_norm": 0.2190890908241272, "learning_rate": 9.129352301076021e-05, "loss": 0.0096, "step": 13870 }, { "epoch": 12.482014388489208, "grad_norm": 0.21937359869480133, "learning_rate": 9.127797798426668e-05, "loss": 0.0092, "step": 13880 }, { "epoch": 12.491007194244604, "grad_norm": 0.20400205254554749, "learning_rate": 9.126242041861333e-05, "loss": 0.0131, "step": 13890 }, { "epoch": 12.5, "grad_norm": 0.24970640242099762, "learning_rate": 9.124685031852611e-05, "loss": 0.0132, "step": 13900 }, { "epoch": 12.508992805755396, "grad_norm": 0.2680847942829132, "learning_rate": 9.123126768873482e-05, "loss": 0.0106, "step": 13910 }, { "epoch": 12.517985611510792, "grad_norm": 0.3549495041370392, "learning_rate": 9.121567253397308e-05, "loss": 0.0129, "step": 13920 }, { "epoch": 12.526978417266188, "grad_norm": 0.18843591213226318, "learning_rate": 9.120006485897824e-05, "loss": 0.0091, "step": 13930 }, { "epoch": 12.535971223021583, "grad_norm": 0.3775230646133423, "learning_rate": 9.118444466849152e-05, "loss": 0.0146, "step": 13940 }, { "epoch": 12.54496402877698, "grad_norm": 0.2280072569847107, "learning_rate": 9.116881196725793e-05, "loss": 0.01, "step": 13950 }, { "epoch": 12.553956834532373, "grad_norm": 0.1957143098115921, "learning_rate": 9.115316676002627e-05, "loss": 0.0108, "step": 13960 }, { "epoch": 12.56294964028777, "grad_norm": 0.21143287420272827, "learning_rate": 9.113750905154911e-05, "loss": 0.011, "step": 13970 }, { "epoch": 12.571942446043165, "grad_norm": 0.24879960715770721, "learning_rate": 9.112183884658289e-05, "loss": 0.0137, "step": 13980 }, { "epoch": 12.58093525179856, "grad_norm": 0.1526785045862198, "learning_rate": 9.11061561498878e-05, "loss": 0.0068, "step": 13990 }, { "epoch": 12.589928057553957, "grad_norm": 0.248024582862854, "learning_rate": 9.109046096622779e-05, "loss": 0.0123, "step": 14000 }, { "epoch": 12.598920863309353, "grad_norm": 0.36253681778907776, "learning_rate": 9.107475330037069e-05, "loss": 0.0109, "step": 14010 }, { "epoch": 12.607913669064748, "grad_norm": 0.21254277229309082, "learning_rate": 9.105903315708806e-05, "loss": 0.0107, "step": 14020 }, { "epoch": 12.616906474820144, "grad_norm": 0.24280430376529694, "learning_rate": 9.104330054115524e-05, "loss": 0.0092, "step": 14030 }, { "epoch": 12.62589928057554, "grad_norm": 0.2645450234413147, "learning_rate": 9.102755545735141e-05, "loss": 0.0088, "step": 14040 }, { "epoch": 12.634892086330936, "grad_norm": 0.19426457583904266, "learning_rate": 9.10117979104595e-05, "loss": 0.0105, "step": 14050 }, { "epoch": 12.64388489208633, "grad_norm": 0.23139739036560059, "learning_rate": 9.099602790526624e-05, "loss": 0.0123, "step": 14060 }, { "epoch": 12.652877697841726, "grad_norm": 0.22734524309635162, "learning_rate": 9.098024544656212e-05, "loss": 0.0095, "step": 14070 }, { "epoch": 12.661870503597122, "grad_norm": 0.2666371166706085, "learning_rate": 9.096445053914148e-05, "loss": 0.0148, "step": 14080 }, { "epoch": 12.670863309352518, "grad_norm": 0.20420897006988525, "learning_rate": 9.094864318780236e-05, "loss": 0.0123, "step": 14090 }, { "epoch": 12.679856115107913, "grad_norm": 0.3342059254646301, "learning_rate": 9.093282339734663e-05, "loss": 0.0202, "step": 14100 }, { "epoch": 12.68884892086331, "grad_norm": 0.17243489623069763, "learning_rate": 9.091699117257992e-05, "loss": 0.0109, "step": 14110 }, { "epoch": 12.697841726618705, "grad_norm": 0.2850978970527649, "learning_rate": 9.090114651831163e-05, "loss": 0.0096, "step": 14120 }, { "epoch": 12.706834532374101, "grad_norm": 0.3151565194129944, "learning_rate": 9.088528943935497e-05, "loss": 0.0159, "step": 14130 }, { "epoch": 12.715827338129497, "grad_norm": 0.18361997604370117, "learning_rate": 9.086941994052689e-05, "loss": 0.01, "step": 14140 }, { "epoch": 12.724820143884893, "grad_norm": 0.22996453940868378, "learning_rate": 9.085353802664813e-05, "loss": 0.0091, "step": 14150 }, { "epoch": 12.733812949640289, "grad_norm": 0.20914024114608765, "learning_rate": 9.08376437025432e-05, "loss": 0.0104, "step": 14160 }, { "epoch": 12.742805755395683, "grad_norm": 0.29819056391716003, "learning_rate": 9.082173697304035e-05, "loss": 0.012, "step": 14170 }, { "epoch": 12.751798561151078, "grad_norm": 0.17086073756217957, "learning_rate": 9.080581784297166e-05, "loss": 0.0079, "step": 14180 }, { "epoch": 12.760791366906474, "grad_norm": 0.2433379590511322, "learning_rate": 9.078988631717291e-05, "loss": 0.0115, "step": 14190 }, { "epoch": 12.76978417266187, "grad_norm": 0.2407350093126297, "learning_rate": 9.077394240048369e-05, "loss": 0.0097, "step": 14200 }, { "epoch": 12.778776978417266, "grad_norm": 0.2824952006340027, "learning_rate": 9.075798609774736e-05, "loss": 0.0106, "step": 14210 }, { "epoch": 12.787769784172662, "grad_norm": 0.20797856152057648, "learning_rate": 9.0742017413811e-05, "loss": 0.0081, "step": 14220 }, { "epoch": 12.796762589928058, "grad_norm": 0.265487939119339, "learning_rate": 9.072603635352548e-05, "loss": 0.01, "step": 14230 }, { "epoch": 12.805755395683454, "grad_norm": 0.41181719303131104, "learning_rate": 9.071004292174541e-05, "loss": 0.0127, "step": 14240 }, { "epoch": 12.81474820143885, "grad_norm": 0.2312803715467453, "learning_rate": 9.06940371233292e-05, "loss": 0.0099, "step": 14250 }, { "epoch": 12.823741007194245, "grad_norm": 0.19818851351737976, "learning_rate": 9.067801896313898e-05, "loss": 0.0129, "step": 14260 }, { "epoch": 12.832733812949641, "grad_norm": 0.1524214893579483, "learning_rate": 9.066198844604064e-05, "loss": 0.0114, "step": 14270 }, { "epoch": 12.841726618705035, "grad_norm": 0.2697843909263611, "learning_rate": 9.06459455769038e-05, "loss": 0.0145, "step": 14280 }, { "epoch": 12.850719424460431, "grad_norm": 0.3688749670982361, "learning_rate": 9.062989036060193e-05, "loss": 0.0182, "step": 14290 }, { "epoch": 12.859712230215827, "grad_norm": 0.13475191593170166, "learning_rate": 9.061382280201212e-05, "loss": 0.0095, "step": 14300 }, { "epoch": 12.868705035971223, "grad_norm": 0.16598422825336456, "learning_rate": 9.059774290601528e-05, "loss": 0.0161, "step": 14310 }, { "epoch": 12.877697841726619, "grad_norm": 0.18007928133010864, "learning_rate": 9.058165067749606e-05, "loss": 0.0109, "step": 14320 }, { "epoch": 12.886690647482014, "grad_norm": 0.30204856395721436, "learning_rate": 9.056554612134288e-05, "loss": 0.0114, "step": 14330 }, { "epoch": 12.89568345323741, "grad_norm": 0.17585065960884094, "learning_rate": 9.054942924244785e-05, "loss": 0.0108, "step": 14340 }, { "epoch": 12.904676258992806, "grad_norm": 0.2805998921394348, "learning_rate": 9.053330004570686e-05, "loss": 0.0126, "step": 14350 }, { "epoch": 12.913669064748202, "grad_norm": 0.1955961287021637, "learning_rate": 9.051715853601955e-05, "loss": 0.0104, "step": 14360 }, { "epoch": 12.922661870503598, "grad_norm": 0.22856222093105316, "learning_rate": 9.050100471828926e-05, "loss": 0.0152, "step": 14370 }, { "epoch": 12.931654676258994, "grad_norm": 0.21125560998916626, "learning_rate": 9.048483859742311e-05, "loss": 0.0164, "step": 14380 }, { "epoch": 12.940647482014388, "grad_norm": 0.30177733302116394, "learning_rate": 9.046866017833193e-05, "loss": 0.0115, "step": 14390 }, { "epoch": 12.949640287769784, "grad_norm": 0.320056289434433, "learning_rate": 9.045246946593029e-05, "loss": 0.013, "step": 14400 }, { "epoch": 12.95863309352518, "grad_norm": 0.2607099413871765, "learning_rate": 9.043626646513652e-05, "loss": 0.0113, "step": 14410 }, { "epoch": 12.967625899280575, "grad_norm": 0.18773777782917023, "learning_rate": 9.042005118087267e-05, "loss": 0.009, "step": 14420 }, { "epoch": 12.976618705035971, "grad_norm": 0.19817867875099182, "learning_rate": 9.040382361806448e-05, "loss": 0.0131, "step": 14430 }, { "epoch": 12.985611510791367, "grad_norm": 0.12172901630401611, "learning_rate": 9.038758378164148e-05, "loss": 0.0138, "step": 14440 }, { "epoch": 12.994604316546763, "grad_norm": 0.21699270606040955, "learning_rate": 9.037133167653691e-05, "loss": 0.0094, "step": 14450 }, { "epoch": 13.003597122302159, "grad_norm": 0.22456544637680054, "learning_rate": 9.035506730768771e-05, "loss": 0.0094, "step": 14460 }, { "epoch": 13.012589928057555, "grad_norm": 0.2735946774482727, "learning_rate": 9.033879068003458e-05, "loss": 0.0107, "step": 14470 }, { "epoch": 13.02158273381295, "grad_norm": 0.14928172528743744, "learning_rate": 9.032250179852193e-05, "loss": 0.0115, "step": 14480 }, { "epoch": 13.030575539568344, "grad_norm": 0.22018547356128693, "learning_rate": 9.030620066809787e-05, "loss": 0.0092, "step": 14490 }, { "epoch": 13.03956834532374, "grad_norm": 0.3109798729419708, "learning_rate": 9.028988729371428e-05, "loss": 0.0098, "step": 14500 }, { "epoch": 13.048561151079136, "grad_norm": 0.14190156757831573, "learning_rate": 9.027356168032673e-05, "loss": 0.01, "step": 14510 }, { "epoch": 13.057553956834532, "grad_norm": 0.3585223853588104, "learning_rate": 9.02572238328945e-05, "loss": 0.0133, "step": 14520 }, { "epoch": 13.066546762589928, "grad_norm": 0.22552068531513214, "learning_rate": 9.02408737563806e-05, "loss": 0.0118, "step": 14530 }, { "epoch": 13.075539568345324, "grad_norm": 0.19305174052715302, "learning_rate": 9.022451145575174e-05, "loss": 0.0108, "step": 14540 }, { "epoch": 13.08453237410072, "grad_norm": 0.29998359084129333, "learning_rate": 9.02081369359784e-05, "loss": 0.0104, "step": 14550 }, { "epoch": 13.093525179856115, "grad_norm": 0.28513994812965393, "learning_rate": 9.019175020203465e-05, "loss": 0.0092, "step": 14560 }, { "epoch": 13.102517985611511, "grad_norm": 0.25784236192703247, "learning_rate": 9.017535125889842e-05, "loss": 0.0081, "step": 14570 }, { "epoch": 13.111510791366907, "grad_norm": 0.21355178952217102, "learning_rate": 9.015894011155124e-05, "loss": 0.0122, "step": 14580 }, { "epoch": 13.120503597122303, "grad_norm": 0.2458760291337967, "learning_rate": 9.014251676497838e-05, "loss": 0.0149, "step": 14590 }, { "epoch": 13.129496402877697, "grad_norm": 0.20571498572826385, "learning_rate": 9.012608122416884e-05, "loss": 0.009, "step": 14600 }, { "epoch": 13.138489208633093, "grad_norm": 0.2010141760110855, "learning_rate": 9.010963349411529e-05, "loss": 0.0125, "step": 14610 }, { "epoch": 13.147482014388489, "grad_norm": 0.15296240150928497, "learning_rate": 9.00931735798141e-05, "loss": 0.014, "step": 14620 }, { "epoch": 13.156474820143885, "grad_norm": 0.23267903923988342, "learning_rate": 9.00767014862654e-05, "loss": 0.009, "step": 14630 }, { "epoch": 13.16546762589928, "grad_norm": 0.23899300396442413, "learning_rate": 9.006021721847295e-05, "loss": 0.0107, "step": 14640 }, { "epoch": 13.174460431654676, "grad_norm": 0.31596964597702026, "learning_rate": 9.004372078144423e-05, "loss": 0.013, "step": 14650 }, { "epoch": 13.183453237410072, "grad_norm": 0.30362096428871155, "learning_rate": 9.002721218019043e-05, "loss": 0.0146, "step": 14660 }, { "epoch": 13.192446043165468, "grad_norm": 0.23348034918308258, "learning_rate": 9.001069141972642e-05, "loss": 0.0093, "step": 14670 }, { "epoch": 13.201438848920864, "grad_norm": 0.23769429326057434, "learning_rate": 8.99941585050708e-05, "loss": 0.0117, "step": 14680 }, { "epoch": 13.21043165467626, "grad_norm": 0.26894906163215637, "learning_rate": 8.997761344124578e-05, "loss": 0.0104, "step": 14690 }, { "epoch": 13.219424460431656, "grad_norm": 0.17734076082706451, "learning_rate": 8.996105623327737e-05, "loss": 0.0078, "step": 14700 }, { "epoch": 13.22841726618705, "grad_norm": 0.24068190157413483, "learning_rate": 8.994448688619517e-05, "loss": 0.0092, "step": 14710 }, { "epoch": 13.237410071942445, "grad_norm": 0.1661675125360489, "learning_rate": 8.992790540503253e-05, "loss": 0.0097, "step": 14720 }, { "epoch": 13.246402877697841, "grad_norm": 0.2924773097038269, "learning_rate": 8.991131179482648e-05, "loss": 0.0085, "step": 14730 }, { "epoch": 13.255395683453237, "grad_norm": 0.23348107933998108, "learning_rate": 8.989470606061768e-05, "loss": 0.0104, "step": 14740 }, { "epoch": 13.264388489208633, "grad_norm": 0.21529029309749603, "learning_rate": 8.987808820745056e-05, "loss": 0.0137, "step": 14750 }, { "epoch": 13.273381294964029, "grad_norm": 0.2496749460697174, "learning_rate": 8.986145824037315e-05, "loss": 0.0091, "step": 14760 }, { "epoch": 13.282374100719425, "grad_norm": 0.21816065907478333, "learning_rate": 8.984481616443721e-05, "loss": 0.0162, "step": 14770 }, { "epoch": 13.29136690647482, "grad_norm": 0.21290165185928345, "learning_rate": 8.982816198469815e-05, "loss": 0.0079, "step": 14780 }, { "epoch": 13.300359712230216, "grad_norm": 0.21746376156806946, "learning_rate": 8.98114957062151e-05, "loss": 0.0119, "step": 14790 }, { "epoch": 13.309352517985612, "grad_norm": 0.1789768636226654, "learning_rate": 8.97948173340508e-05, "loss": 0.0093, "step": 14800 }, { "epoch": 13.318345323741006, "grad_norm": 0.2018897980451584, "learning_rate": 8.977812687327172e-05, "loss": 0.0077, "step": 14810 }, { "epoch": 13.327338129496402, "grad_norm": 0.2707889676094055, "learning_rate": 8.976142432894798e-05, "loss": 0.0103, "step": 14820 }, { "epoch": 13.336330935251798, "grad_norm": 0.20224887132644653, "learning_rate": 8.974470970615336e-05, "loss": 0.0112, "step": 14830 }, { "epoch": 13.345323741007194, "grad_norm": 0.22354251146316528, "learning_rate": 8.972798300996534e-05, "loss": 0.0117, "step": 14840 }, { "epoch": 13.35431654676259, "grad_norm": 0.21821460127830505, "learning_rate": 8.971124424546504e-05, "loss": 0.0082, "step": 14850 }, { "epoch": 13.363309352517986, "grad_norm": 0.24182917177677155, "learning_rate": 8.969449341773724e-05, "loss": 0.012, "step": 14860 }, { "epoch": 13.372302158273381, "grad_norm": 0.15081559121608734, "learning_rate": 8.967773053187042e-05, "loss": 0.0149, "step": 14870 }, { "epoch": 13.381294964028777, "grad_norm": 0.17408177256584167, "learning_rate": 8.966095559295668e-05, "loss": 0.0095, "step": 14880 }, { "epoch": 13.390287769784173, "grad_norm": 0.20570339262485504, "learning_rate": 8.964416860609184e-05, "loss": 0.0116, "step": 14890 }, { "epoch": 13.399280575539569, "grad_norm": 0.24083878099918365, "learning_rate": 8.962736957637532e-05, "loss": 0.0081, "step": 14900 }, { "epoch": 13.408273381294965, "grad_norm": 0.15825434029102325, "learning_rate": 8.96105585089102e-05, "loss": 0.0077, "step": 14910 }, { "epoch": 13.417266187050359, "grad_norm": 0.19344258308410645, "learning_rate": 8.959373540880329e-05, "loss": 0.0157, "step": 14920 }, { "epoch": 13.426258992805755, "grad_norm": 0.2402864545583725, "learning_rate": 8.957690028116495e-05, "loss": 0.0117, "step": 14930 }, { "epoch": 13.43525179856115, "grad_norm": 0.26676592230796814, "learning_rate": 8.956005313110928e-05, "loss": 0.0102, "step": 14940 }, { "epoch": 13.444244604316546, "grad_norm": 0.22831247746944427, "learning_rate": 8.9543193963754e-05, "loss": 0.0129, "step": 14950 }, { "epoch": 13.453237410071942, "grad_norm": 0.3596191108226776, "learning_rate": 8.952632278422048e-05, "loss": 0.0099, "step": 14960 }, { "epoch": 13.462230215827338, "grad_norm": 0.21380792558193207, "learning_rate": 8.95094395976337e-05, "loss": 0.0133, "step": 14970 }, { "epoch": 13.471223021582734, "grad_norm": 0.29379627108573914, "learning_rate": 8.949254440912239e-05, "loss": 0.0102, "step": 14980 }, { "epoch": 13.48021582733813, "grad_norm": 0.37093135714530945, "learning_rate": 8.94756372238188e-05, "loss": 0.0128, "step": 14990 }, { "epoch": 13.489208633093526, "grad_norm": 0.288646399974823, "learning_rate": 8.945871804685892e-05, "loss": 0.0105, "step": 15000 }, { "epoch": 13.498201438848922, "grad_norm": 0.17932194471359253, "learning_rate": 8.944178688338236e-05, "loss": 0.0098, "step": 15010 }, { "epoch": 13.507194244604317, "grad_norm": 0.2859274744987488, "learning_rate": 8.942484373853233e-05, "loss": 0.0101, "step": 15020 }, { "epoch": 13.516187050359711, "grad_norm": 0.24770952761173248, "learning_rate": 8.940788861745572e-05, "loss": 0.015, "step": 15030 }, { "epoch": 13.525179856115107, "grad_norm": 0.2924429178237915, "learning_rate": 8.939092152530308e-05, "loss": 0.0118, "step": 15040 }, { "epoch": 13.534172661870503, "grad_norm": 0.23677320778369904, "learning_rate": 8.937394246722853e-05, "loss": 0.0088, "step": 15050 }, { "epoch": 13.543165467625899, "grad_norm": 0.30752697587013245, "learning_rate": 8.935695144838984e-05, "loss": 0.0142, "step": 15060 }, { "epoch": 13.552158273381295, "grad_norm": 0.26452726125717163, "learning_rate": 8.933994847394849e-05, "loss": 0.0105, "step": 15070 }, { "epoch": 13.56115107913669, "grad_norm": 0.22731105983257294, "learning_rate": 8.932293354906949e-05, "loss": 0.0099, "step": 15080 }, { "epoch": 13.570143884892087, "grad_norm": 0.21999333798885345, "learning_rate": 8.930590667892153e-05, "loss": 0.0098, "step": 15090 }, { "epoch": 13.579136690647482, "grad_norm": 0.3491087555885315, "learning_rate": 8.928886786867696e-05, "loss": 0.0093, "step": 15100 }, { "epoch": 13.588129496402878, "grad_norm": 0.17552609741687775, "learning_rate": 8.927181712351168e-05, "loss": 0.0082, "step": 15110 }, { "epoch": 13.597122302158274, "grad_norm": 0.24263985455036163, "learning_rate": 8.925475444860527e-05, "loss": 0.0094, "step": 15120 }, { "epoch": 13.60611510791367, "grad_norm": 0.20188076794147491, "learning_rate": 8.923767984914092e-05, "loss": 0.0067, "step": 15130 }, { "epoch": 13.615107913669064, "grad_norm": 0.2270517349243164, "learning_rate": 8.922059333030545e-05, "loss": 0.0134, "step": 15140 }, { "epoch": 13.62410071942446, "grad_norm": 0.16194631159305573, "learning_rate": 8.920349489728928e-05, "loss": 0.0084, "step": 15150 }, { "epoch": 13.633093525179856, "grad_norm": 0.27757498621940613, "learning_rate": 8.918638455528646e-05, "loss": 0.0131, "step": 15160 }, { "epoch": 13.642086330935252, "grad_norm": 0.11738858371973038, "learning_rate": 8.916926230949468e-05, "loss": 0.0101, "step": 15170 }, { "epoch": 13.651079136690647, "grad_norm": 0.26244214177131653, "learning_rate": 8.915212816511522e-05, "loss": 0.0102, "step": 15180 }, { "epoch": 13.660071942446043, "grad_norm": 0.2899591624736786, "learning_rate": 8.913498212735296e-05, "loss": 0.0121, "step": 15190 }, { "epoch": 13.66906474820144, "grad_norm": 0.12153500318527222, "learning_rate": 8.911782420141643e-05, "loss": 0.0106, "step": 15200 }, { "epoch": 13.678057553956835, "grad_norm": 0.2553713619709015, "learning_rate": 8.910065439251775e-05, "loss": 0.0083, "step": 15210 }, { "epoch": 13.68705035971223, "grad_norm": 0.3894456923007965, "learning_rate": 8.908347270587268e-05, "loss": 0.0091, "step": 15220 }, { "epoch": 13.696043165467627, "grad_norm": 0.2350134402513504, "learning_rate": 8.906627914670054e-05, "loss": 0.0089, "step": 15230 }, { "epoch": 13.70503597122302, "grad_norm": 0.22562094032764435, "learning_rate": 8.904907372022427e-05, "loss": 0.0085, "step": 15240 }, { "epoch": 13.714028776978417, "grad_norm": 0.3145650029182434, "learning_rate": 8.903185643167042e-05, "loss": 0.0107, "step": 15250 }, { "epoch": 13.723021582733812, "grad_norm": 0.19472414255142212, "learning_rate": 8.901462728626919e-05, "loss": 0.0097, "step": 15260 }, { "epoch": 13.732014388489208, "grad_norm": 0.2883903682231903, "learning_rate": 8.899738628925429e-05, "loss": 0.0111, "step": 15270 }, { "epoch": 13.741007194244604, "grad_norm": 0.2494482398033142, "learning_rate": 8.898013344586312e-05, "loss": 0.0082, "step": 15280 }, { "epoch": 13.75, "grad_norm": 0.2047293782234192, "learning_rate": 8.896286876133661e-05, "loss": 0.016, "step": 15290 }, { "epoch": 13.758992805755396, "grad_norm": 0.36113241314888, "learning_rate": 8.894559224091933e-05, "loss": 0.011, "step": 15300 }, { "epoch": 13.767985611510792, "grad_norm": 0.2125655561685562, "learning_rate": 8.892830388985942e-05, "loss": 0.012, "step": 15310 }, { "epoch": 13.776978417266188, "grad_norm": 0.19207368791103363, "learning_rate": 8.891100371340864e-05, "loss": 0.0086, "step": 15320 }, { "epoch": 13.785971223021583, "grad_norm": 0.24318797886371613, "learning_rate": 8.889369171682231e-05, "loss": 0.0143, "step": 15330 }, { "epoch": 13.79496402877698, "grad_norm": 0.226509690284729, "learning_rate": 8.887636790535936e-05, "loss": 0.0094, "step": 15340 }, { "epoch": 13.803956834532373, "grad_norm": 0.21386075019836426, "learning_rate": 8.885903228428231e-05, "loss": 0.0091, "step": 15350 }, { "epoch": 13.81294964028777, "grad_norm": 0.2872220277786255, "learning_rate": 8.884168485885727e-05, "loss": 0.0107, "step": 15360 }, { "epoch": 13.821942446043165, "grad_norm": 0.11672324687242508, "learning_rate": 8.882432563435393e-05, "loss": 0.0106, "step": 15370 }, { "epoch": 13.83093525179856, "grad_norm": 0.1854148954153061, "learning_rate": 8.880695461604556e-05, "loss": 0.0088, "step": 15380 }, { "epoch": 13.839928057553957, "grad_norm": 0.23022009432315826, "learning_rate": 8.878957180920901e-05, "loss": 0.0094, "step": 15390 }, { "epoch": 13.848920863309353, "grad_norm": 0.2468973696231842, "learning_rate": 8.877217721912473e-05, "loss": 0.0093, "step": 15400 }, { "epoch": 13.857913669064748, "grad_norm": 0.1798933744430542, "learning_rate": 8.875477085107673e-05, "loss": 0.0092, "step": 15410 }, { "epoch": 13.866906474820144, "grad_norm": 0.3874712586402893, "learning_rate": 8.87373527103526e-05, "loss": 0.0128, "step": 15420 }, { "epoch": 13.87589928057554, "grad_norm": 0.2809624969959259, "learning_rate": 8.871992280224353e-05, "loss": 0.0085, "step": 15430 }, { "epoch": 13.884892086330936, "grad_norm": 0.17906266450881958, "learning_rate": 8.870248113204422e-05, "loss": 0.009, "step": 15440 }, { "epoch": 13.89388489208633, "grad_norm": 0.27327960729599, "learning_rate": 8.868502770505306e-05, "loss": 0.0124, "step": 15450 }, { "epoch": 13.902877697841726, "grad_norm": 0.15610255300998688, "learning_rate": 8.86675625265719e-05, "loss": 0.0122, "step": 15460 }, { "epoch": 13.911870503597122, "grad_norm": 0.20549115538597107, "learning_rate": 8.865008560190618e-05, "loss": 0.0092, "step": 15470 }, { "epoch": 13.920863309352518, "grad_norm": 0.22228257358074188, "learning_rate": 8.863259693636496e-05, "loss": 0.0111, "step": 15480 }, { "epoch": 13.929856115107913, "grad_norm": 0.21533739566802979, "learning_rate": 8.861509653526083e-05, "loss": 0.0098, "step": 15490 }, { "epoch": 13.93884892086331, "grad_norm": 0.1858481764793396, "learning_rate": 8.859758440390993e-05, "loss": 0.0103, "step": 15500 }, { "epoch": 13.947841726618705, "grad_norm": 0.1716640442609787, "learning_rate": 8.858006054763202e-05, "loss": 0.0088, "step": 15510 }, { "epoch": 13.956834532374101, "grad_norm": 0.2869868576526642, "learning_rate": 8.856252497175035e-05, "loss": 0.0108, "step": 15520 }, { "epoch": 13.965827338129497, "grad_norm": 0.1952451914548874, "learning_rate": 8.854497768159178e-05, "loss": 0.0063, "step": 15530 }, { "epoch": 13.974820143884893, "grad_norm": 0.29497194290161133, "learning_rate": 8.852741868248671e-05, "loss": 0.0082, "step": 15540 }, { "epoch": 13.983812949640289, "grad_norm": 0.2870565354824066, "learning_rate": 8.85098479797691e-05, "loss": 0.0098, "step": 15550 }, { "epoch": 13.992805755395683, "grad_norm": 0.29583585262298584, "learning_rate": 8.849226557877646e-05, "loss": 0.0089, "step": 15560 }, { "epoch": 14.001798561151078, "grad_norm": 0.2944290339946747, "learning_rate": 8.84746714848499e-05, "loss": 0.0094, "step": 15570 }, { "epoch": 14.010791366906474, "grad_norm": 0.2579784095287323, "learning_rate": 8.845706570333397e-05, "loss": 0.0082, "step": 15580 }, { "epoch": 14.01978417266187, "grad_norm": 0.14952094852924347, "learning_rate": 8.84394482395769e-05, "loss": 0.0136, "step": 15590 }, { "epoch": 14.028776978417266, "grad_norm": 0.25061914324760437, "learning_rate": 8.842181909893038e-05, "loss": 0.0087, "step": 15600 }, { "epoch": 14.037769784172662, "grad_norm": 0.23977071046829224, "learning_rate": 8.840417828674969e-05, "loss": 0.0098, "step": 15610 }, { "epoch": 14.046762589928058, "grad_norm": 0.22438737750053406, "learning_rate": 8.838652580839364e-05, "loss": 0.0124, "step": 15620 }, { "epoch": 14.055755395683454, "grad_norm": 0.305894672870636, "learning_rate": 8.836886166922458e-05, "loss": 0.0178, "step": 15630 }, { "epoch": 14.06474820143885, "grad_norm": 0.2361755520105362, "learning_rate": 8.835118587460844e-05, "loss": 0.0096, "step": 15640 }, { "epoch": 14.073741007194245, "grad_norm": 0.3202846646308899, "learning_rate": 8.83334984299146e-05, "loss": 0.015, "step": 15650 }, { "epoch": 14.082733812949641, "grad_norm": 0.22945845127105713, "learning_rate": 8.83157993405161e-05, "loss": 0.0175, "step": 15660 }, { "epoch": 14.091726618705035, "grad_norm": 0.2539166212081909, "learning_rate": 8.829808861178943e-05, "loss": 0.0098, "step": 15670 }, { "epoch": 14.100719424460431, "grad_norm": 0.28999248147010803, "learning_rate": 8.828036624911464e-05, "loss": 0.0142, "step": 15680 }, { "epoch": 14.109712230215827, "grad_norm": 0.3064739406108856, "learning_rate": 8.826263225787532e-05, "loss": 0.0108, "step": 15690 }, { "epoch": 14.118705035971223, "grad_norm": 0.30878397822380066, "learning_rate": 8.824488664345858e-05, "loss": 0.0107, "step": 15700 }, { "epoch": 14.127697841726619, "grad_norm": 0.3342963457107544, "learning_rate": 8.822712941125508e-05, "loss": 0.0109, "step": 15710 }, { "epoch": 14.136690647482014, "grad_norm": 0.3043003976345062, "learning_rate": 8.820936056665898e-05, "loss": 0.0117, "step": 15720 }, { "epoch": 14.14568345323741, "grad_norm": 0.27247154712677, "learning_rate": 8.819158011506801e-05, "loss": 0.0114, "step": 15730 }, { "epoch": 14.154676258992806, "grad_norm": 0.23193004727363586, "learning_rate": 8.81737880618834e-05, "loss": 0.0093, "step": 15740 }, { "epoch": 14.163669064748202, "grad_norm": 0.22341786324977875, "learning_rate": 8.815598441250987e-05, "loss": 0.0124, "step": 15750 }, { "epoch": 14.172661870503598, "grad_norm": 0.17934224009513855, "learning_rate": 8.813816917235576e-05, "loss": 0.0093, "step": 15760 }, { "epoch": 14.181654676258994, "grad_norm": 0.1953420490026474, "learning_rate": 8.812034234683282e-05, "loss": 0.011, "step": 15770 }, { "epoch": 14.190647482014388, "grad_norm": 0.21563628315925598, "learning_rate": 8.810250394135637e-05, "loss": 0.0088, "step": 15780 }, { "epoch": 14.199640287769784, "grad_norm": 0.16159741580486298, "learning_rate": 8.808465396134529e-05, "loss": 0.0108, "step": 15790 }, { "epoch": 14.20863309352518, "grad_norm": 0.2393561154603958, "learning_rate": 8.806679241222189e-05, "loss": 0.0082, "step": 15800 }, { "epoch": 14.217625899280575, "grad_norm": 0.4141492545604706, "learning_rate": 8.804891929941203e-05, "loss": 0.0137, "step": 15810 }, { "epoch": 14.226618705035971, "grad_norm": 0.28106510639190674, "learning_rate": 8.803103462834514e-05, "loss": 0.0128, "step": 15820 }, { "epoch": 14.235611510791367, "grad_norm": 0.21820348501205444, "learning_rate": 8.801313840445408e-05, "loss": 0.0095, "step": 15830 }, { "epoch": 14.244604316546763, "grad_norm": 0.14486849308013916, "learning_rate": 8.799523063317524e-05, "loss": 0.0085, "step": 15840 }, { "epoch": 14.253597122302159, "grad_norm": 0.29054418206214905, "learning_rate": 8.797731131994854e-05, "loss": 0.0087, "step": 15850 }, { "epoch": 14.262589928057555, "grad_norm": 0.31967201828956604, "learning_rate": 8.795938047021739e-05, "loss": 0.0117, "step": 15860 }, { "epoch": 14.27158273381295, "grad_norm": 0.22367221117019653, "learning_rate": 8.794143808942872e-05, "loss": 0.0108, "step": 15870 }, { "epoch": 14.280575539568344, "grad_norm": 0.34833523631095886, "learning_rate": 8.792348418303296e-05, "loss": 0.0113, "step": 15880 }, { "epoch": 14.28956834532374, "grad_norm": 0.2411731779575348, "learning_rate": 8.790551875648398e-05, "loss": 0.0145, "step": 15890 }, { "epoch": 14.298561151079136, "grad_norm": 0.2111077457666397, "learning_rate": 8.788754181523926e-05, "loss": 0.0155, "step": 15900 }, { "epoch": 14.307553956834532, "grad_norm": 0.35236358642578125, "learning_rate": 8.78695533647597e-05, "loss": 0.0148, "step": 15910 }, { "epoch": 14.316546762589928, "grad_norm": 0.32358598709106445, "learning_rate": 8.785155341050972e-05, "loss": 0.0174, "step": 15920 }, { "epoch": 14.325539568345324, "grad_norm": 0.2156430333852768, "learning_rate": 8.783354195795721e-05, "loss": 0.0119, "step": 15930 }, { "epoch": 14.33453237410072, "grad_norm": 0.4186049699783325, "learning_rate": 8.78155190125736e-05, "loss": 0.013, "step": 15940 }, { "epoch": 14.343525179856115, "grad_norm": 0.2547266185283661, "learning_rate": 8.779748457983378e-05, "loss": 0.0141, "step": 15950 }, { "epoch": 14.352517985611511, "grad_norm": 0.21285440027713776, "learning_rate": 8.777943866521612e-05, "loss": 0.0188, "step": 15960 }, { "epoch": 14.361510791366907, "grad_norm": 0.3008649945259094, "learning_rate": 8.77613812742025e-05, "loss": 0.0075, "step": 15970 }, { "epoch": 14.370503597122303, "grad_norm": 0.3220824897289276, "learning_rate": 8.774331241227829e-05, "loss": 0.0133, "step": 15980 }, { "epoch": 14.379496402877697, "grad_norm": 0.19007839262485504, "learning_rate": 8.772523208493232e-05, "loss": 0.0078, "step": 15990 }, { "epoch": 14.388489208633093, "grad_norm": 0.21430204808712006, "learning_rate": 8.770714029765692e-05, "loss": 0.0108, "step": 16000 }, { "epoch": 14.397482014388489, "grad_norm": 0.25413382053375244, "learning_rate": 8.768903705594789e-05, "loss": 0.0118, "step": 16010 }, { "epoch": 14.406474820143885, "grad_norm": 0.25049853324890137, "learning_rate": 8.767092236530453e-05, "loss": 0.0116, "step": 16020 }, { "epoch": 14.41546762589928, "grad_norm": 0.3005725145339966, "learning_rate": 8.76527962312296e-05, "loss": 0.0123, "step": 16030 }, { "epoch": 14.424460431654676, "grad_norm": 0.2540719509124756, "learning_rate": 8.763465865922934e-05, "loss": 0.0085, "step": 16040 }, { "epoch": 14.433453237410072, "grad_norm": 0.34110090136528015, "learning_rate": 8.761650965481347e-05, "loss": 0.0128, "step": 16050 }, { "epoch": 14.442446043165468, "grad_norm": 0.17483095824718475, "learning_rate": 8.759834922349516e-05, "loss": 0.0088, "step": 16060 }, { "epoch": 14.451438848920864, "grad_norm": 0.18936604261398315, "learning_rate": 8.758017737079108e-05, "loss": 0.01, "step": 16070 }, { "epoch": 14.46043165467626, "grad_norm": 0.20103982090950012, "learning_rate": 8.756199410222137e-05, "loss": 0.0067, "step": 16080 }, { "epoch": 14.469424460431656, "grad_norm": 0.19742275774478912, "learning_rate": 8.754379942330963e-05, "loss": 0.0085, "step": 16090 }, { "epoch": 14.47841726618705, "grad_norm": 0.175361767411232, "learning_rate": 8.75255933395829e-05, "loss": 0.0084, "step": 16100 }, { "epoch": 14.487410071942445, "grad_norm": 0.2532597780227661, "learning_rate": 8.750737585657171e-05, "loss": 0.0104, "step": 16110 }, { "epoch": 14.496402877697841, "grad_norm": 0.2781008780002594, "learning_rate": 8.748914697981008e-05, "loss": 0.0096, "step": 16120 }, { "epoch": 14.505395683453237, "grad_norm": 0.27127864956855774, "learning_rate": 8.747090671483542e-05, "loss": 0.0103, "step": 16130 }, { "epoch": 14.514388489208633, "grad_norm": 0.15573066473007202, "learning_rate": 8.745265506718869e-05, "loss": 0.0064, "step": 16140 }, { "epoch": 14.523381294964029, "grad_norm": 0.21429665386676788, "learning_rate": 8.74343920424142e-05, "loss": 0.0092, "step": 16150 }, { "epoch": 14.532374100719425, "grad_norm": 0.22140882909297943, "learning_rate": 8.741611764605982e-05, "loss": 0.0079, "step": 16160 }, { "epoch": 14.54136690647482, "grad_norm": 0.14445729553699493, "learning_rate": 8.739783188367682e-05, "loss": 0.0091, "step": 16170 }, { "epoch": 14.550359712230216, "grad_norm": 0.16977271437644958, "learning_rate": 8.737953476081991e-05, "loss": 0.0071, "step": 16180 }, { "epoch": 14.559352517985612, "grad_norm": 0.19994422793388367, "learning_rate": 8.73612262830473e-05, "loss": 0.0082, "step": 16190 }, { "epoch": 14.568345323741006, "grad_norm": 0.18389743566513062, "learning_rate": 8.734290645592061e-05, "loss": 0.008, "step": 16200 }, { "epoch": 14.577338129496402, "grad_norm": 0.2270323485136032, "learning_rate": 8.732457528500493e-05, "loss": 0.0092, "step": 16210 }, { "epoch": 14.586330935251798, "grad_norm": 0.174245685338974, "learning_rate": 8.730623277586875e-05, "loss": 0.0102, "step": 16220 }, { "epoch": 14.595323741007194, "grad_norm": 0.20942485332489014, "learning_rate": 8.72878789340841e-05, "loss": 0.0079, "step": 16230 }, { "epoch": 14.60431654676259, "grad_norm": 0.16292549669742584, "learning_rate": 8.726951376522635e-05, "loss": 0.0116, "step": 16240 }, { "epoch": 14.613309352517986, "grad_norm": 0.2612450122833252, "learning_rate": 8.725113727487435e-05, "loss": 0.0097, "step": 16250 }, { "epoch": 14.622302158273381, "grad_norm": 0.12932074069976807, "learning_rate": 8.723274946861042e-05, "loss": 0.0075, "step": 16260 }, { "epoch": 14.631294964028777, "grad_norm": 0.23403342068195343, "learning_rate": 8.721435035202026e-05, "loss": 0.008, "step": 16270 }, { "epoch": 14.640287769784173, "grad_norm": 0.24784354865550995, "learning_rate": 8.719593993069306e-05, "loss": 0.0123, "step": 16280 }, { "epoch": 14.649280575539569, "grad_norm": 0.22112242877483368, "learning_rate": 8.717751821022139e-05, "loss": 0.0095, "step": 16290 }, { "epoch": 14.658273381294965, "grad_norm": 0.2011929154396057, "learning_rate": 8.715908519620134e-05, "loss": 0.0096, "step": 16300 }, { "epoch": 14.667266187050359, "grad_norm": 0.1911080777645111, "learning_rate": 8.71406408942323e-05, "loss": 0.0104, "step": 16310 }, { "epoch": 14.676258992805755, "grad_norm": 0.1798175722360611, "learning_rate": 8.712218530991723e-05, "loss": 0.0075, "step": 16320 }, { "epoch": 14.68525179856115, "grad_norm": 0.4935576915740967, "learning_rate": 8.710371844886241e-05, "loss": 0.0093, "step": 16330 }, { "epoch": 14.694244604316546, "grad_norm": 0.2817740738391876, "learning_rate": 8.708524031667758e-05, "loss": 0.0094, "step": 16340 }, { "epoch": 14.703237410071942, "grad_norm": 0.22346408665180206, "learning_rate": 8.706675091897592e-05, "loss": 0.0113, "step": 16350 }, { "epoch": 14.712230215827338, "grad_norm": 0.2520354688167572, "learning_rate": 8.704825026137404e-05, "loss": 0.012, "step": 16360 }, { "epoch": 14.721223021582734, "grad_norm": 0.17065365612506866, "learning_rate": 8.702973834949192e-05, "loss": 0.011, "step": 16370 }, { "epoch": 14.73021582733813, "grad_norm": 0.20864053070545197, "learning_rate": 8.701121518895301e-05, "loss": 0.0092, "step": 16380 }, { "epoch": 14.739208633093526, "grad_norm": 0.2590331435203552, "learning_rate": 8.699268078538414e-05, "loss": 0.0112, "step": 16390 }, { "epoch": 14.748201438848922, "grad_norm": 0.2743064761161804, "learning_rate": 8.69741351444156e-05, "loss": 0.0079, "step": 16400 }, { "epoch": 14.757194244604317, "grad_norm": 0.32096463441848755, "learning_rate": 8.695557827168101e-05, "loss": 0.0095, "step": 16410 }, { "epoch": 14.766187050359711, "grad_norm": 0.14506079256534576, "learning_rate": 8.693701017281753e-05, "loss": 0.0119, "step": 16420 }, { "epoch": 14.775179856115107, "grad_norm": 0.20023350417613983, "learning_rate": 8.691843085346563e-05, "loss": 0.0113, "step": 16430 }, { "epoch": 14.784172661870503, "grad_norm": 0.19229848682880402, "learning_rate": 8.689984031926919e-05, "loss": 0.0076, "step": 16440 }, { "epoch": 14.793165467625899, "grad_norm": 0.24695101380348206, "learning_rate": 8.688123857587555e-05, "loss": 0.0082, "step": 16450 }, { "epoch": 14.802158273381295, "grad_norm": 0.31226834654808044, "learning_rate": 8.686262562893544e-05, "loss": 0.0113, "step": 16460 }, { "epoch": 14.81115107913669, "grad_norm": 0.2853970527648926, "learning_rate": 8.684400148410294e-05, "loss": 0.0089, "step": 16470 }, { "epoch": 14.820143884892087, "grad_norm": 0.24486105144023895, "learning_rate": 8.682536614703562e-05, "loss": 0.009, "step": 16480 }, { "epoch": 14.829136690647482, "grad_norm": 0.2436089962720871, "learning_rate": 8.680671962339437e-05, "loss": 0.0084, "step": 16490 }, { "epoch": 14.838129496402878, "grad_norm": 0.1951298713684082, "learning_rate": 8.678806191884352e-05, "loss": 0.008, "step": 16500 }, { "epoch": 14.847122302158274, "grad_norm": 0.14315788447856903, "learning_rate": 8.67693930390508e-05, "loss": 0.0083, "step": 16510 }, { "epoch": 14.85611510791367, "grad_norm": 0.23264971375465393, "learning_rate": 8.67507129896873e-05, "loss": 0.0074, "step": 16520 }, { "epoch": 14.865107913669064, "grad_norm": 0.33328935503959656, "learning_rate": 8.673202177642757e-05, "loss": 0.0141, "step": 16530 }, { "epoch": 14.87410071942446, "grad_norm": 0.3342328667640686, "learning_rate": 8.671331940494945e-05, "loss": 0.0176, "step": 16540 }, { "epoch": 14.883093525179856, "grad_norm": 0.2776394784450531, "learning_rate": 8.669460588093427e-05, "loss": 0.0114, "step": 16550 }, { "epoch": 14.892086330935252, "grad_norm": 0.16570916771888733, "learning_rate": 8.667588121006667e-05, "loss": 0.0082, "step": 16560 }, { "epoch": 14.901079136690647, "grad_norm": 0.2653316557407379, "learning_rate": 8.665714539803475e-05, "loss": 0.0131, "step": 16570 }, { "epoch": 14.910071942446043, "grad_norm": 0.29213666915893555, "learning_rate": 8.663839845052993e-05, "loss": 0.0082, "step": 16580 }, { "epoch": 14.91906474820144, "grad_norm": 0.35285186767578125, "learning_rate": 8.661964037324703e-05, "loss": 0.01, "step": 16590 }, { "epoch": 14.928057553956835, "grad_norm": 0.2571507692337036, "learning_rate": 8.660087117188427e-05, "loss": 0.0067, "step": 16600 }, { "epoch": 14.93705035971223, "grad_norm": 0.21609552204608917, "learning_rate": 8.658209085214325e-05, "loss": 0.0119, "step": 16610 }, { "epoch": 14.946043165467627, "grad_norm": 0.28172141313552856, "learning_rate": 8.656329941972891e-05, "loss": 0.0154, "step": 16620 }, { "epoch": 14.95503597122302, "grad_norm": 0.13497298955917358, "learning_rate": 8.654449688034963e-05, "loss": 0.0104, "step": 16630 }, { "epoch": 14.964028776978417, "grad_norm": 0.2215050309896469, "learning_rate": 8.652568323971706e-05, "loss": 0.0109, "step": 16640 }, { "epoch": 14.973021582733812, "grad_norm": 0.314174085855484, "learning_rate": 8.650685850354636e-05, "loss": 0.0097, "step": 16650 }, { "epoch": 14.982014388489208, "grad_norm": 0.2582972049713135, "learning_rate": 8.648802267755593e-05, "loss": 0.0116, "step": 16660 }, { "epoch": 14.991007194244604, "grad_norm": 0.17763130366802216, "learning_rate": 8.646917576746764e-05, "loss": 0.0101, "step": 16670 }, { "epoch": 15.0, "grad_norm": 0.23598304390907288, "learning_rate": 8.645031777900666e-05, "loss": 0.0081, "step": 16680 }, { "epoch": 15.008992805755396, "grad_norm": 0.23135341703891754, "learning_rate": 8.643144871790154e-05, "loss": 0.0122, "step": 16690 }, { "epoch": 15.017985611510792, "grad_norm": 0.23390547931194305, "learning_rate": 8.641256858988424e-05, "loss": 0.0083, "step": 16700 }, { "epoch": 15.026978417266188, "grad_norm": 0.2893403470516205, "learning_rate": 8.639367740069e-05, "loss": 0.0084, "step": 16710 }, { "epoch": 15.035971223021583, "grad_norm": 0.25736114382743835, "learning_rate": 8.63747751560575e-05, "loss": 0.0097, "step": 16720 }, { "epoch": 15.04496402877698, "grad_norm": 0.16018222272396088, "learning_rate": 8.635586186172871e-05, "loss": 0.0071, "step": 16730 }, { "epoch": 15.053956834532373, "grad_norm": 0.22338375449180603, "learning_rate": 8.633693752344902e-05, "loss": 0.007, "step": 16740 }, { "epoch": 15.06294964028777, "grad_norm": 0.17009106278419495, "learning_rate": 8.631800214696713e-05, "loss": 0.0098, "step": 16750 }, { "epoch": 15.071942446043165, "grad_norm": 0.23991821706295013, "learning_rate": 8.629905573803511e-05, "loss": 0.0106, "step": 16760 }, { "epoch": 15.08093525179856, "grad_norm": 0.19353508949279785, "learning_rate": 8.628009830240839e-05, "loss": 0.0105, "step": 16770 }, { "epoch": 15.089928057553957, "grad_norm": 0.12838749587535858, "learning_rate": 8.626112984584571e-05, "loss": 0.0108, "step": 16780 }, { "epoch": 15.098920863309353, "grad_norm": 0.19683590531349182, "learning_rate": 8.62421503741092e-05, "loss": 0.0101, "step": 16790 }, { "epoch": 15.107913669064748, "grad_norm": 0.13332000374794006, "learning_rate": 8.622315989296432e-05, "loss": 0.0073, "step": 16800 }, { "epoch": 15.116906474820144, "grad_norm": 0.3228794038295746, "learning_rate": 8.62041584081799e-05, "loss": 0.0109, "step": 16810 }, { "epoch": 15.12589928057554, "grad_norm": 0.2336861938238144, "learning_rate": 8.618514592552807e-05, "loss": 0.0102, "step": 16820 }, { "epoch": 15.134892086330936, "grad_norm": 0.15914547443389893, "learning_rate": 8.616612245078431e-05, "loss": 0.0092, "step": 16830 }, { "epoch": 15.14388489208633, "grad_norm": 0.17249666154384613, "learning_rate": 8.614708798972746e-05, "loss": 0.0099, "step": 16840 }, { "epoch": 15.152877697841726, "grad_norm": 0.21927478909492493, "learning_rate": 8.61280425481397e-05, "loss": 0.0081, "step": 16850 }, { "epoch": 15.161870503597122, "grad_norm": 0.14603641629219055, "learning_rate": 8.61089861318065e-05, "loss": 0.0092, "step": 16860 }, { "epoch": 15.170863309352518, "grad_norm": 0.300249308347702, "learning_rate": 8.608991874651673e-05, "loss": 0.0077, "step": 16870 }, { "epoch": 15.179856115107913, "grad_norm": 0.2740229666233063, "learning_rate": 8.607084039806255e-05, "loss": 0.01, "step": 16880 }, { "epoch": 15.18884892086331, "grad_norm": 0.35262489318847656, "learning_rate": 8.605175109223944e-05, "loss": 0.0084, "step": 16890 }, { "epoch": 15.197841726618705, "grad_norm": 0.252310574054718, "learning_rate": 8.603265083484624e-05, "loss": 0.0087, "step": 16900 }, { "epoch": 15.206834532374101, "grad_norm": 0.14634770154953003, "learning_rate": 8.60135396316851e-05, "loss": 0.0073, "step": 16910 }, { "epoch": 15.215827338129497, "grad_norm": 0.18353427946567535, "learning_rate": 8.599441748856152e-05, "loss": 0.0083, "step": 16920 }, { "epoch": 15.224820143884893, "grad_norm": 0.1386830359697342, "learning_rate": 8.597528441128427e-05, "loss": 0.0087, "step": 16930 }, { "epoch": 15.233812949640289, "grad_norm": 0.2259155958890915, "learning_rate": 8.595614040566549e-05, "loss": 0.0072, "step": 16940 }, { "epoch": 15.242805755395683, "grad_norm": 0.23133037984371185, "learning_rate": 8.593698547752063e-05, "loss": 0.0087, "step": 16950 }, { "epoch": 15.251798561151078, "grad_norm": 0.28230321407318115, "learning_rate": 8.591781963266843e-05, "loss": 0.0178, "step": 16960 }, { "epoch": 15.260791366906474, "grad_norm": 0.18553322553634644, "learning_rate": 8.5898642876931e-05, "loss": 0.0071, "step": 16970 }, { "epoch": 15.26978417266187, "grad_norm": 0.31828123331069946, "learning_rate": 8.587945521613369e-05, "loss": 0.0149, "step": 16980 }, { "epoch": 15.278776978417266, "grad_norm": 0.30777212977409363, "learning_rate": 8.586025665610524e-05, "loss": 0.0085, "step": 16990 }, { "epoch": 15.287769784172662, "grad_norm": 0.2510148286819458, "learning_rate": 8.584104720267765e-05, "loss": 0.0102, "step": 17000 }, { "epoch": 15.296762589928058, "grad_norm": 0.13826167583465576, "learning_rate": 8.582182686168625e-05, "loss": 0.0088, "step": 17010 }, { "epoch": 15.305755395683454, "grad_norm": 0.15985709428787231, "learning_rate": 8.580259563896967e-05, "loss": 0.0099, "step": 17020 }, { "epoch": 15.31474820143885, "grad_norm": 0.29900023341178894, "learning_rate": 8.578335354036983e-05, "loss": 0.0093, "step": 17030 }, { "epoch": 15.323741007194245, "grad_norm": 0.18648387491703033, "learning_rate": 8.576410057173201e-05, "loss": 0.0088, "step": 17040 }, { "epoch": 15.332733812949641, "grad_norm": 0.20739565789699554, "learning_rate": 8.574483673890474e-05, "loss": 0.009, "step": 17050 }, { "epoch": 15.341726618705035, "grad_norm": 0.22807294130325317, "learning_rate": 8.572556204773983e-05, "loss": 0.0083, "step": 17060 }, { "epoch": 15.350719424460431, "grad_norm": 0.21090683341026306, "learning_rate": 8.570627650409246e-05, "loss": 0.0075, "step": 17070 }, { "epoch": 15.359712230215827, "grad_norm": 0.1569303423166275, "learning_rate": 8.568698011382107e-05, "loss": 0.0087, "step": 17080 }, { "epoch": 15.368705035971223, "grad_norm": 0.17533865571022034, "learning_rate": 8.566767288278738e-05, "loss": 0.0114, "step": 17090 }, { "epoch": 15.377697841726619, "grad_norm": 0.1812097281217575, "learning_rate": 8.56483548168564e-05, "loss": 0.0083, "step": 17100 }, { "epoch": 15.386690647482014, "grad_norm": 0.23628661036491394, "learning_rate": 8.562902592189648e-05, "loss": 0.0097, "step": 17110 }, { "epoch": 15.39568345323741, "grad_norm": 0.19653306901454926, "learning_rate": 8.560968620377921e-05, "loss": 0.0076, "step": 17120 }, { "epoch": 15.404676258992806, "grad_norm": 0.19465294480323792, "learning_rate": 8.559033566837951e-05, "loss": 0.0111, "step": 17130 }, { "epoch": 15.413669064748202, "grad_norm": 0.41369280219078064, "learning_rate": 8.557097432157551e-05, "loss": 0.0128, "step": 17140 }, { "epoch": 15.422661870503598, "grad_norm": 0.25346115231513977, "learning_rate": 8.555160216924872e-05, "loss": 0.0094, "step": 17150 }, { "epoch": 15.431654676258994, "grad_norm": 0.260539710521698, "learning_rate": 8.55322192172839e-05, "loss": 0.0106, "step": 17160 }, { "epoch": 15.440647482014388, "grad_norm": 0.16566987335681915, "learning_rate": 8.551282547156902e-05, "loss": 0.0071, "step": 17170 }, { "epoch": 15.449640287769784, "grad_norm": 0.27605193853378296, "learning_rate": 8.549342093799544e-05, "loss": 0.0107, "step": 17180 }, { "epoch": 15.45863309352518, "grad_norm": 0.35362035036087036, "learning_rate": 8.547400562245773e-05, "loss": 0.0125, "step": 17190 }, { "epoch": 15.467625899280575, "grad_norm": 0.20987387001514435, "learning_rate": 8.545457953085374e-05, "loss": 0.0107, "step": 17200 }, { "epoch": 15.476618705035971, "grad_norm": 0.24252741038799286, "learning_rate": 8.543514266908463e-05, "loss": 0.0094, "step": 17210 }, { "epoch": 15.485611510791367, "grad_norm": 0.2581281363964081, "learning_rate": 8.541569504305478e-05, "loss": 0.0119, "step": 17220 }, { "epoch": 15.494604316546763, "grad_norm": 0.2655697464942932, "learning_rate": 8.539623665867187e-05, "loss": 0.0108, "step": 17230 }, { "epoch": 15.503597122302159, "grad_norm": 0.15335489809513092, "learning_rate": 8.537676752184685e-05, "loss": 0.0113, "step": 17240 }, { "epoch": 15.512589928057555, "grad_norm": 0.16203831136226654, "learning_rate": 8.53572876384939e-05, "loss": 0.0083, "step": 17250 }, { "epoch": 15.52158273381295, "grad_norm": 0.2960353195667267, "learning_rate": 8.533779701453056e-05, "loss": 0.0098, "step": 17260 }, { "epoch": 15.530575539568346, "grad_norm": 0.13893818855285645, "learning_rate": 8.53182956558775e-05, "loss": 0.0094, "step": 17270 }, { "epoch": 15.53956834532374, "grad_norm": 0.20552976429462433, "learning_rate": 8.529878356845877e-05, "loss": 0.007, "step": 17280 }, { "epoch": 15.548561151079136, "grad_norm": 0.2589678466320038, "learning_rate": 8.527926075820158e-05, "loss": 0.0103, "step": 17290 }, { "epoch": 15.557553956834532, "grad_norm": 0.22273637354373932, "learning_rate": 8.525972723103648e-05, "loss": 0.0121, "step": 17300 }, { "epoch": 15.566546762589928, "grad_norm": 0.17171558737754822, "learning_rate": 8.524018299289722e-05, "loss": 0.0076, "step": 17310 }, { "epoch": 15.575539568345324, "grad_norm": 0.3476008474826813, "learning_rate": 8.522062804972083e-05, "loss": 0.0102, "step": 17320 }, { "epoch": 15.58453237410072, "grad_norm": 0.2601185441017151, "learning_rate": 8.520106240744759e-05, "loss": 0.0086, "step": 17330 }, { "epoch": 15.593525179856115, "grad_norm": 0.3161734640598297, "learning_rate": 8.518148607202102e-05, "loss": 0.011, "step": 17340 }, { "epoch": 15.602517985611511, "grad_norm": 0.2871564030647278, "learning_rate": 8.51618990493879e-05, "loss": 0.0116, "step": 17350 }, { "epoch": 15.611510791366907, "grad_norm": 0.26741018891334534, "learning_rate": 8.514230134549823e-05, "loss": 0.0128, "step": 17360 }, { "epoch": 15.620503597122303, "grad_norm": 0.1719665378332138, "learning_rate": 8.51226929663053e-05, "loss": 0.0099, "step": 17370 }, { "epoch": 15.629496402877697, "grad_norm": 0.30885568261146545, "learning_rate": 8.51030739177656e-05, "loss": 0.0101, "step": 17380 }, { "epoch": 15.638489208633093, "grad_norm": 0.2685886025428772, "learning_rate": 8.508344420583889e-05, "loss": 0.0142, "step": 17390 }, { "epoch": 15.647482014388489, "grad_norm": 0.20034073293209076, "learning_rate": 8.506380383648816e-05, "loss": 0.0099, "step": 17400 }, { "epoch": 15.656474820143885, "grad_norm": 0.1917337328195572, "learning_rate": 8.504415281567963e-05, "loss": 0.0086, "step": 17410 }, { "epoch": 15.66546762589928, "grad_norm": 0.25447142124176025, "learning_rate": 8.502449114938275e-05, "loss": 0.008, "step": 17420 }, { "epoch": 15.674460431654676, "grad_norm": 0.26326337456703186, "learning_rate": 8.500481884357025e-05, "loss": 0.0097, "step": 17430 }, { "epoch": 15.683453237410072, "grad_norm": 0.22156952321529388, "learning_rate": 8.498513590421801e-05, "loss": 0.0085, "step": 17440 }, { "epoch": 15.692446043165468, "grad_norm": 0.24909541010856628, "learning_rate": 8.496544233730522e-05, "loss": 0.0118, "step": 17450 }, { "epoch": 15.701438848920864, "grad_norm": 0.27627086639404297, "learning_rate": 8.494573814881426e-05, "loss": 0.0106, "step": 17460 }, { "epoch": 15.71043165467626, "grad_norm": 0.18228572607040405, "learning_rate": 8.492602334473074e-05, "loss": 0.0079, "step": 17470 }, { "epoch": 15.719424460431654, "grad_norm": 0.2420186847448349, "learning_rate": 8.49062979310435e-05, "loss": 0.0118, "step": 17480 }, { "epoch": 15.72841726618705, "grad_norm": 0.14009611308574677, "learning_rate": 8.488656191374458e-05, "loss": 0.0108, "step": 17490 }, { "epoch": 15.737410071942445, "grad_norm": 0.2587183117866516, "learning_rate": 8.48668152988293e-05, "loss": 0.0089, "step": 17500 }, { "epoch": 15.746402877697841, "grad_norm": 0.19581790268421173, "learning_rate": 8.484705809229612e-05, "loss": 0.0094, "step": 17510 }, { "epoch": 15.755395683453237, "grad_norm": 0.2984723746776581, "learning_rate": 8.482729030014677e-05, "loss": 0.0119, "step": 17520 }, { "epoch": 15.764388489208633, "grad_norm": 0.18281982839107513, "learning_rate": 8.48075119283862e-05, "loss": 0.0084, "step": 17530 }, { "epoch": 15.773381294964029, "grad_norm": 0.24509994685649872, "learning_rate": 8.478772298302254e-05, "loss": 0.0093, "step": 17540 }, { "epoch": 15.782374100719425, "grad_norm": 0.2637336254119873, "learning_rate": 8.476792347006716e-05, "loss": 0.0124, "step": 17550 }, { "epoch": 15.79136690647482, "grad_norm": 0.27172547578811646, "learning_rate": 8.474811339553462e-05, "loss": 0.0085, "step": 17560 }, { "epoch": 15.800359712230216, "grad_norm": 0.341156005859375, "learning_rate": 8.47282927654427e-05, "loss": 0.0144, "step": 17570 }, { "epoch": 15.809352517985612, "grad_norm": 0.23841482400894165, "learning_rate": 8.470846158581238e-05, "loss": 0.0077, "step": 17580 }, { "epoch": 15.818345323741006, "grad_norm": 0.2929033637046814, "learning_rate": 8.468861986266787e-05, "loss": 0.0094, "step": 17590 }, { "epoch": 15.827338129496402, "grad_norm": 0.24552804231643677, "learning_rate": 8.466876760203654e-05, "loss": 0.0093, "step": 17600 }, { "epoch": 15.836330935251798, "grad_norm": 0.2948184311389923, "learning_rate": 8.464890480994898e-05, "loss": 0.0129, "step": 17610 }, { "epoch": 15.845323741007194, "grad_norm": 0.18068663775920868, "learning_rate": 8.462903149243899e-05, "loss": 0.0086, "step": 17620 }, { "epoch": 15.85431654676259, "grad_norm": 0.2252768576145172, "learning_rate": 8.460914765554357e-05, "loss": 0.007, "step": 17630 }, { "epoch": 15.863309352517986, "grad_norm": 0.22862623631954193, "learning_rate": 8.458925330530288e-05, "loss": 0.0135, "step": 17640 }, { "epoch": 15.872302158273381, "grad_norm": 0.2439860999584198, "learning_rate": 8.456934844776032e-05, "loss": 0.0092, "step": 17650 }, { "epoch": 15.881294964028777, "grad_norm": 0.3764236569404602, "learning_rate": 8.454943308896246e-05, "loss": 0.0088, "step": 17660 }, { "epoch": 15.890287769784173, "grad_norm": 0.1889955997467041, "learning_rate": 8.452950723495905e-05, "loss": 0.0074, "step": 17670 }, { "epoch": 15.899280575539569, "grad_norm": 0.2490379810333252, "learning_rate": 8.450957089180303e-05, "loss": 0.0088, "step": 17680 }, { "epoch": 15.908273381294965, "grad_norm": 0.3010033667087555, "learning_rate": 8.448962406555055e-05, "loss": 0.0094, "step": 17690 }, { "epoch": 15.917266187050359, "grad_norm": 0.20050841569900513, "learning_rate": 8.446966676226093e-05, "loss": 0.012, "step": 17700 }, { "epoch": 15.926258992805755, "grad_norm": 0.24586272239685059, "learning_rate": 8.444969898799667e-05, "loss": 0.0111, "step": 17710 }, { "epoch": 15.93525179856115, "grad_norm": 0.21537694334983826, "learning_rate": 8.442972074882343e-05, "loss": 0.0103, "step": 17720 }, { "epoch": 15.944244604316546, "grad_norm": 0.20452576875686646, "learning_rate": 8.44097320508101e-05, "loss": 0.0091, "step": 17730 }, { "epoch": 15.953237410071942, "grad_norm": 0.3364766538143158, "learning_rate": 8.43897329000287e-05, "loss": 0.007, "step": 17740 }, { "epoch": 15.962230215827338, "grad_norm": 0.2742777168750763, "learning_rate": 8.436972330255448e-05, "loss": 0.01, "step": 17750 }, { "epoch": 15.971223021582734, "grad_norm": 0.25032860040664673, "learning_rate": 8.434970326446579e-05, "loss": 0.0094, "step": 17760 }, { "epoch": 15.98021582733813, "grad_norm": 0.16874240338802338, "learning_rate": 8.432967279184418e-05, "loss": 0.0086, "step": 17770 }, { "epoch": 15.989208633093526, "grad_norm": 0.2687866687774658, "learning_rate": 8.430963189077441e-05, "loss": 0.0078, "step": 17780 }, { "epoch": 15.998201438848922, "grad_norm": 0.22682049870491028, "learning_rate": 8.428958056734437e-05, "loss": 0.0073, "step": 17790 }, { "epoch": 16.007194244604317, "grad_norm": 0.2280644029378891, "learning_rate": 8.426951882764513e-05, "loss": 0.0162, "step": 17800 }, { "epoch": 16.01618705035971, "grad_norm": 0.13538292050361633, "learning_rate": 8.424944667777089e-05, "loss": 0.0075, "step": 17810 }, { "epoch": 16.02517985611511, "grad_norm": 0.2588070333003998, "learning_rate": 8.422936412381905e-05, "loss": 0.0083, "step": 17820 }, { "epoch": 16.034172661870503, "grad_norm": 0.1999845653772354, "learning_rate": 8.420927117189017e-05, "loss": 0.0074, "step": 17830 }, { "epoch": 16.0431654676259, "grad_norm": 0.1875191628932953, "learning_rate": 8.418916782808795e-05, "loss": 0.0072, "step": 17840 }, { "epoch": 16.052158273381295, "grad_norm": 0.30026715993881226, "learning_rate": 8.416905409851926e-05, "loss": 0.0087, "step": 17850 }, { "epoch": 16.06115107913669, "grad_norm": 0.20805159211158752, "learning_rate": 8.41489299892941e-05, "loss": 0.0077, "step": 17860 }, { "epoch": 16.070143884892087, "grad_norm": 0.16514109075069427, "learning_rate": 8.412879550652566e-05, "loss": 0.0082, "step": 17870 }, { "epoch": 16.07913669064748, "grad_norm": 0.2793535888195038, "learning_rate": 8.410865065633029e-05, "loss": 0.0091, "step": 17880 }, { "epoch": 16.08812949640288, "grad_norm": 0.26290756464004517, "learning_rate": 8.408849544482742e-05, "loss": 0.0113, "step": 17890 }, { "epoch": 16.097122302158272, "grad_norm": 0.2616439759731293, "learning_rate": 8.406832987813968e-05, "loss": 0.0093, "step": 17900 }, { "epoch": 16.10611510791367, "grad_norm": 0.22558055818080902, "learning_rate": 8.404815396239286e-05, "loss": 0.0097, "step": 17910 }, { "epoch": 16.115107913669064, "grad_norm": 0.24364352226257324, "learning_rate": 8.402796770371587e-05, "loss": 0.0091, "step": 17920 }, { "epoch": 16.12410071942446, "grad_norm": 0.19516511261463165, "learning_rate": 8.400777110824071e-05, "loss": 0.0078, "step": 17930 }, { "epoch": 16.133093525179856, "grad_norm": 0.21218717098236084, "learning_rate": 8.398756418210263e-05, "loss": 0.0086, "step": 17940 }, { "epoch": 16.142086330935253, "grad_norm": 0.216374471783638, "learning_rate": 8.396734693143993e-05, "loss": 0.0078, "step": 17950 }, { "epoch": 16.151079136690647, "grad_norm": 0.22714188694953918, "learning_rate": 8.39471193623941e-05, "loss": 0.0157, "step": 17960 }, { "epoch": 16.16007194244604, "grad_norm": 0.3339462876319885, "learning_rate": 8.392688148110974e-05, "loss": 0.01, "step": 17970 }, { "epoch": 16.16906474820144, "grad_norm": 0.24232205748558044, "learning_rate": 8.390663329373456e-05, "loss": 0.0089, "step": 17980 }, { "epoch": 16.178057553956833, "grad_norm": 0.2666022479534149, "learning_rate": 8.388637480641944e-05, "loss": 0.0124, "step": 17990 }, { "epoch": 16.18705035971223, "grad_norm": 0.18815158307552338, "learning_rate": 8.386610602531837e-05, "loss": 0.0111, "step": 18000 }, { "epoch": 16.196043165467625, "grad_norm": 0.30359891057014465, "learning_rate": 8.384582695658847e-05, "loss": 0.0088, "step": 18010 }, { "epoch": 16.205035971223023, "grad_norm": 0.20025666058063507, "learning_rate": 8.382553760638999e-05, "loss": 0.0088, "step": 18020 }, { "epoch": 16.214028776978417, "grad_norm": 0.1412167102098465, "learning_rate": 8.380523798088631e-05, "loss": 0.0069, "step": 18030 }, { "epoch": 16.223021582733814, "grad_norm": 0.1952935755252838, "learning_rate": 8.378492808624389e-05, "loss": 0.0079, "step": 18040 }, { "epoch": 16.23201438848921, "grad_norm": 0.19464755058288574, "learning_rate": 8.376460792863237e-05, "loss": 0.0113, "step": 18050 }, { "epoch": 16.241007194244606, "grad_norm": 0.2284800410270691, "learning_rate": 8.374427751422444e-05, "loss": 0.0063, "step": 18060 }, { "epoch": 16.25, "grad_norm": 0.15881352126598358, "learning_rate": 8.3723936849196e-05, "loss": 0.0073, "step": 18070 }, { "epoch": 16.258992805755394, "grad_norm": 0.15748518705368042, "learning_rate": 8.370358593972595e-05, "loss": 0.0074, "step": 18080 }, { "epoch": 16.26798561151079, "grad_norm": 0.20313070714473724, "learning_rate": 8.36832247919964e-05, "loss": 0.0082, "step": 18090 }, { "epoch": 16.276978417266186, "grad_norm": 0.259963721036911, "learning_rate": 8.36628534121925e-05, "loss": 0.0108, "step": 18100 }, { "epoch": 16.285971223021583, "grad_norm": 0.29478415846824646, "learning_rate": 8.364247180650254e-05, "loss": 0.0131, "step": 18110 }, { "epoch": 16.294964028776977, "grad_norm": 0.13105881214141846, "learning_rate": 8.362207998111794e-05, "loss": 0.0071, "step": 18120 }, { "epoch": 16.303956834532375, "grad_norm": 0.20223690569400787, "learning_rate": 8.360167794223318e-05, "loss": 0.0093, "step": 18130 }, { "epoch": 16.31294964028777, "grad_norm": 0.28364333510398865, "learning_rate": 8.358126569604586e-05, "loss": 0.0105, "step": 18140 }, { "epoch": 16.321942446043167, "grad_norm": 0.1426386833190918, "learning_rate": 8.356084324875668e-05, "loss": 0.012, "step": 18150 }, { "epoch": 16.33093525179856, "grad_norm": 0.1875537633895874, "learning_rate": 8.354041060656945e-05, "loss": 0.0074, "step": 18160 }, { "epoch": 16.33992805755396, "grad_norm": 0.1768856793642044, "learning_rate": 8.351996777569106e-05, "loss": 0.0067, "step": 18170 }, { "epoch": 16.348920863309353, "grad_norm": 0.15300393104553223, "learning_rate": 8.349951476233148e-05, "loss": 0.0092, "step": 18180 }, { "epoch": 16.357913669064747, "grad_norm": 0.16664627194404602, "learning_rate": 8.347905157270386e-05, "loss": 0.007, "step": 18190 }, { "epoch": 16.366906474820144, "grad_norm": 0.19490045309066772, "learning_rate": 8.345857821302432e-05, "loss": 0.0093, "step": 18200 }, { "epoch": 16.37589928057554, "grad_norm": 0.21971145272254944, "learning_rate": 8.343809468951213e-05, "loss": 0.0103, "step": 18210 }, { "epoch": 16.384892086330936, "grad_norm": 0.21249350905418396, "learning_rate": 8.341760100838965e-05, "loss": 0.0113, "step": 18220 }, { "epoch": 16.39388489208633, "grad_norm": 0.19109812378883362, "learning_rate": 8.339709717588233e-05, "loss": 0.0101, "step": 18230 }, { "epoch": 16.402877697841728, "grad_norm": 0.2381245344877243, "learning_rate": 8.33765831982187e-05, "loss": 0.0082, "step": 18240 }, { "epoch": 16.41187050359712, "grad_norm": 0.18082746863365173, "learning_rate": 8.335605908163035e-05, "loss": 0.0062, "step": 18250 }, { "epoch": 16.42086330935252, "grad_norm": 0.2029227316379547, "learning_rate": 8.333552483235196e-05, "loss": 0.0069, "step": 18260 }, { "epoch": 16.429856115107913, "grad_norm": 0.21490541100502014, "learning_rate": 8.33149804566213e-05, "loss": 0.0085, "step": 18270 }, { "epoch": 16.43884892086331, "grad_norm": 0.20536912977695465, "learning_rate": 8.329442596067921e-05, "loss": 0.0086, "step": 18280 }, { "epoch": 16.447841726618705, "grad_norm": 0.24999071657657623, "learning_rate": 8.32738613507696e-05, "loss": 0.0107, "step": 18290 }, { "epoch": 16.4568345323741, "grad_norm": 0.14665527641773224, "learning_rate": 8.325328663313946e-05, "loss": 0.0103, "step": 18300 }, { "epoch": 16.465827338129497, "grad_norm": 0.21603940427303314, "learning_rate": 8.323270181403884e-05, "loss": 0.0089, "step": 18310 }, { "epoch": 16.47482014388489, "grad_norm": 0.15400370955467224, "learning_rate": 8.321210689972086e-05, "loss": 0.0101, "step": 18320 }, { "epoch": 16.48381294964029, "grad_norm": 0.1755083203315735, "learning_rate": 8.319150189644174e-05, "loss": 0.0087, "step": 18330 }, { "epoch": 16.492805755395683, "grad_norm": 0.25029850006103516, "learning_rate": 8.31708868104607e-05, "loss": 0.0143, "step": 18340 }, { "epoch": 16.50179856115108, "grad_norm": 0.21640685200691223, "learning_rate": 8.315026164804007e-05, "loss": 0.0087, "step": 18350 }, { "epoch": 16.510791366906474, "grad_norm": 0.38940268754959106, "learning_rate": 8.312962641544524e-05, "loss": 0.0123, "step": 18360 }, { "epoch": 16.519784172661872, "grad_norm": 0.2333754599094391, "learning_rate": 8.310898111894465e-05, "loss": 0.0082, "step": 18370 }, { "epoch": 16.528776978417266, "grad_norm": 0.23502220213413239, "learning_rate": 8.308832576480977e-05, "loss": 0.0098, "step": 18380 }, { "epoch": 16.53776978417266, "grad_norm": 0.2883624732494354, "learning_rate": 8.306766035931519e-05, "loss": 0.0113, "step": 18390 }, { "epoch": 16.546762589928058, "grad_norm": 0.14418868720531464, "learning_rate": 8.304698490873847e-05, "loss": 0.0086, "step": 18400 }, { "epoch": 16.555755395683452, "grad_norm": 0.196679949760437, "learning_rate": 8.30262994193603e-05, "loss": 0.0102, "step": 18410 }, { "epoch": 16.56474820143885, "grad_norm": 0.23871399462223053, "learning_rate": 8.300560389746438e-05, "loss": 0.0081, "step": 18420 }, { "epoch": 16.573741007194243, "grad_norm": 0.2755158543586731, "learning_rate": 8.298489834933745e-05, "loss": 0.0112, "step": 18430 }, { "epoch": 16.58273381294964, "grad_norm": 0.17013143002986908, "learning_rate": 8.296418278126934e-05, "loss": 0.0115, "step": 18440 }, { "epoch": 16.591726618705035, "grad_norm": 0.12944157421588898, "learning_rate": 8.294345719955284e-05, "loss": 0.0096, "step": 18450 }, { "epoch": 16.600719424460433, "grad_norm": 0.2215220034122467, "learning_rate": 8.29227216104839e-05, "loss": 0.0085, "step": 18460 }, { "epoch": 16.609712230215827, "grad_norm": 0.26697710156440735, "learning_rate": 8.290197602036137e-05, "loss": 0.0103, "step": 18470 }, { "epoch": 16.618705035971225, "grad_norm": 0.20161058008670807, "learning_rate": 8.288122043548725e-05, "loss": 0.0076, "step": 18480 }, { "epoch": 16.62769784172662, "grad_norm": 0.19910353422164917, "learning_rate": 8.286045486216657e-05, "loss": 0.0123, "step": 18490 }, { "epoch": 16.636690647482013, "grad_norm": 0.290787011384964, "learning_rate": 8.283967930670733e-05, "loss": 0.0084, "step": 18500 }, { "epoch": 16.64568345323741, "grad_norm": 0.3183029592037201, "learning_rate": 8.281889377542058e-05, "loss": 0.0091, "step": 18510 }, { "epoch": 16.654676258992804, "grad_norm": 0.3543996512889862, "learning_rate": 8.279809827462045e-05, "loss": 0.0097, "step": 18520 }, { "epoch": 16.663669064748202, "grad_norm": 0.21686862409114838, "learning_rate": 8.277729281062402e-05, "loss": 0.011, "step": 18530 }, { "epoch": 16.672661870503596, "grad_norm": 0.1728285700082779, "learning_rate": 8.27564773897515e-05, "loss": 0.0088, "step": 18540 }, { "epoch": 16.681654676258994, "grad_norm": 0.1874157339334488, "learning_rate": 8.273565201832602e-05, "loss": 0.0066, "step": 18550 }, { "epoch": 16.690647482014388, "grad_norm": 0.15565705299377441, "learning_rate": 8.27148167026738e-05, "loss": 0.0085, "step": 18560 }, { "epoch": 16.699640287769785, "grad_norm": 0.21052099764347076, "learning_rate": 8.269397144912405e-05, "loss": 0.0093, "step": 18570 }, { "epoch": 16.70863309352518, "grad_norm": 0.18187624216079712, "learning_rate": 8.267311626400899e-05, "loss": 0.0085, "step": 18580 }, { "epoch": 16.717625899280577, "grad_norm": 0.32336121797561646, "learning_rate": 8.26522511536639e-05, "loss": 0.012, "step": 18590 }, { "epoch": 16.72661870503597, "grad_norm": 0.4249754250049591, "learning_rate": 8.263137612442706e-05, "loss": 0.0109, "step": 18600 }, { "epoch": 16.735611510791365, "grad_norm": 0.3697386085987091, "learning_rate": 8.261049118263971e-05, "loss": 0.01, "step": 18610 }, { "epoch": 16.744604316546763, "grad_norm": 0.23664885759353638, "learning_rate": 8.258959633464619e-05, "loss": 0.0107, "step": 18620 }, { "epoch": 16.753597122302157, "grad_norm": 0.19303658604621887, "learning_rate": 8.256869158679377e-05, "loss": 0.0085, "step": 18630 }, { "epoch": 16.762589928057555, "grad_norm": 0.12246458232402802, "learning_rate": 8.254777694543278e-05, "loss": 0.0102, "step": 18640 }, { "epoch": 16.77158273381295, "grad_norm": 0.17197053134441376, "learning_rate": 8.252685241691651e-05, "loss": 0.0085, "step": 18650 }, { "epoch": 16.780575539568346, "grad_norm": 0.20055337250232697, "learning_rate": 8.250591800760133e-05, "loss": 0.011, "step": 18660 }, { "epoch": 16.78956834532374, "grad_norm": 0.36057600378990173, "learning_rate": 8.248497372384649e-05, "loss": 0.0105, "step": 18670 }, { "epoch": 16.798561151079138, "grad_norm": 0.2978443503379822, "learning_rate": 8.246401957201437e-05, "loss": 0.0103, "step": 18680 }, { "epoch": 16.807553956834532, "grad_norm": 0.22030824422836304, "learning_rate": 8.244305555847027e-05, "loss": 0.0081, "step": 18690 }, { "epoch": 16.81654676258993, "grad_norm": 0.10684169828891754, "learning_rate": 8.24220816895825e-05, "loss": 0.007, "step": 18700 }, { "epoch": 16.825539568345324, "grad_norm": 0.20010808110237122, "learning_rate": 8.240109797172237e-05, "loss": 0.0104, "step": 18710 }, { "epoch": 16.834532374100718, "grad_norm": 0.24672919511795044, "learning_rate": 8.238010441126416e-05, "loss": 0.0145, "step": 18720 }, { "epoch": 16.843525179856115, "grad_norm": 0.2127103954553604, "learning_rate": 8.23591010145852e-05, "loss": 0.0138, "step": 18730 }, { "epoch": 16.85251798561151, "grad_norm": 0.2684765160083771, "learning_rate": 8.233808778806571e-05, "loss": 0.0117, "step": 18740 }, { "epoch": 16.861510791366907, "grad_norm": 0.22615520656108856, "learning_rate": 8.231706473808903e-05, "loss": 0.0094, "step": 18750 }, { "epoch": 16.8705035971223, "grad_norm": 0.2343394160270691, "learning_rate": 8.229603187104133e-05, "loss": 0.0122, "step": 18760 }, { "epoch": 16.8794964028777, "grad_norm": 0.34216371178627014, "learning_rate": 8.22749891933119e-05, "loss": 0.0094, "step": 18770 }, { "epoch": 16.888489208633093, "grad_norm": 0.20182324945926666, "learning_rate": 8.225393671129291e-05, "loss": 0.0116, "step": 18780 }, { "epoch": 16.89748201438849, "grad_norm": 0.1360066682100296, "learning_rate": 8.223287443137957e-05, "loss": 0.0065, "step": 18790 }, { "epoch": 16.906474820143885, "grad_norm": 0.15457922220230103, "learning_rate": 8.221180235997004e-05, "loss": 0.0087, "step": 18800 }, { "epoch": 16.915467625899282, "grad_norm": 0.1548563688993454, "learning_rate": 8.219072050346544e-05, "loss": 0.0094, "step": 18810 }, { "epoch": 16.924460431654676, "grad_norm": 0.3175719082355499, "learning_rate": 8.216962886826992e-05, "loss": 0.0088, "step": 18820 }, { "epoch": 16.93345323741007, "grad_norm": 0.3054463863372803, "learning_rate": 8.214852746079054e-05, "loss": 0.0117, "step": 18830 }, { "epoch": 16.942446043165468, "grad_norm": 0.19069208204746246, "learning_rate": 8.212741628743732e-05, "loss": 0.0079, "step": 18840 }, { "epoch": 16.951438848920862, "grad_norm": 0.16523689031600952, "learning_rate": 8.210629535462333e-05, "loss": 0.0088, "step": 18850 }, { "epoch": 16.96043165467626, "grad_norm": 0.21013031899929047, "learning_rate": 8.208516466876453e-05, "loss": 0.0088, "step": 18860 }, { "epoch": 16.969424460431654, "grad_norm": 0.30330947041511536, "learning_rate": 8.206402423627986e-05, "loss": 0.0062, "step": 18870 }, { "epoch": 16.97841726618705, "grad_norm": 0.2225397229194641, "learning_rate": 8.204287406359124e-05, "loss": 0.0171, "step": 18880 }, { "epoch": 16.987410071942445, "grad_norm": 0.21543481945991516, "learning_rate": 8.20217141571235e-05, "loss": 0.0093, "step": 18890 }, { "epoch": 16.996402877697843, "grad_norm": 0.19775204360485077, "learning_rate": 8.200054452330449e-05, "loss": 0.0073, "step": 18900 }, { "epoch": 17.005395683453237, "grad_norm": 0.2569115161895752, "learning_rate": 8.197936516856499e-05, "loss": 0.011, "step": 18910 }, { "epoch": 17.014388489208635, "grad_norm": 0.2682529091835022, "learning_rate": 8.195817609933871e-05, "loss": 0.0109, "step": 18920 }, { "epoch": 17.02338129496403, "grad_norm": 0.26387184858322144, "learning_rate": 8.193697732206233e-05, "loss": 0.0112, "step": 18930 }, { "epoch": 17.032374100719423, "grad_norm": 0.33818188309669495, "learning_rate": 8.19157688431755e-05, "loss": 0.0118, "step": 18940 }, { "epoch": 17.04136690647482, "grad_norm": 0.3060305118560791, "learning_rate": 8.189455066912077e-05, "loss": 0.009, "step": 18950 }, { "epoch": 17.050359712230215, "grad_norm": 0.2275174856185913, "learning_rate": 8.187332280634369e-05, "loss": 0.0099, "step": 18960 }, { "epoch": 17.059352517985612, "grad_norm": 0.21299146115779877, "learning_rate": 8.18520852612927e-05, "loss": 0.0099, "step": 18970 }, { "epoch": 17.068345323741006, "grad_norm": 0.26105913519859314, "learning_rate": 8.183083804041921e-05, "loss": 0.0117, "step": 18980 }, { "epoch": 17.077338129496404, "grad_norm": 0.2947919964790344, "learning_rate": 8.180958115017757e-05, "loss": 0.0095, "step": 18990 }, { "epoch": 17.086330935251798, "grad_norm": 0.3421371281147003, "learning_rate": 8.178831459702505e-05, "loss": 0.0168, "step": 19000 }, { "epoch": 17.095323741007196, "grad_norm": 0.3538321852684021, "learning_rate": 8.17670383874219e-05, "loss": 0.0127, "step": 19010 }, { "epoch": 17.10431654676259, "grad_norm": 0.28438252210617065, "learning_rate": 8.174575252783124e-05, "loss": 0.0095, "step": 19020 }, { "epoch": 17.113309352517987, "grad_norm": 0.17756663262844086, "learning_rate": 8.172445702471914e-05, "loss": 0.0132, "step": 19030 }, { "epoch": 17.12230215827338, "grad_norm": 0.26068392395973206, "learning_rate": 8.170315188455466e-05, "loss": 0.0111, "step": 19040 }, { "epoch": 17.131294964028775, "grad_norm": 0.21353381872177124, "learning_rate": 8.168183711380969e-05, "loss": 0.0093, "step": 19050 }, { "epoch": 17.140287769784173, "grad_norm": 0.1767028421163559, "learning_rate": 8.166051271895913e-05, "loss": 0.0098, "step": 19060 }, { "epoch": 17.149280575539567, "grad_norm": 0.2206844836473465, "learning_rate": 8.163917870648075e-05, "loss": 0.0161, "step": 19070 }, { "epoch": 17.158273381294965, "grad_norm": 0.2665724754333496, "learning_rate": 8.161783508285526e-05, "loss": 0.0091, "step": 19080 }, { "epoch": 17.16726618705036, "grad_norm": 0.2820550203323364, "learning_rate": 8.159648185456628e-05, "loss": 0.0097, "step": 19090 }, { "epoch": 17.176258992805757, "grad_norm": 0.17055144906044006, "learning_rate": 8.157511902810038e-05, "loss": 0.012, "step": 19100 }, { "epoch": 17.18525179856115, "grad_norm": 0.2562007009983063, "learning_rate": 8.155374660994701e-05, "loss": 0.0081, "step": 19110 }, { "epoch": 17.194244604316548, "grad_norm": 0.24457935988903046, "learning_rate": 8.153236460659857e-05, "loss": 0.012, "step": 19120 }, { "epoch": 17.203237410071942, "grad_norm": 0.26084309816360474, "learning_rate": 8.151097302455031e-05, "loss": 0.0079, "step": 19130 }, { "epoch": 17.21223021582734, "grad_norm": 0.23870083689689636, "learning_rate": 8.148957187030044e-05, "loss": 0.012, "step": 19140 }, { "epoch": 17.221223021582734, "grad_norm": 0.22679510712623596, "learning_rate": 8.146816115035006e-05, "loss": 0.0098, "step": 19150 }, { "epoch": 17.230215827338128, "grad_norm": 0.20509688556194305, "learning_rate": 8.14467408712032e-05, "loss": 0.0115, "step": 19160 }, { "epoch": 17.239208633093526, "grad_norm": 0.2567571699619293, "learning_rate": 8.142531103936678e-05, "loss": 0.0098, "step": 19170 }, { "epoch": 17.24820143884892, "grad_norm": 0.24167242646217346, "learning_rate": 8.14038716613506e-05, "loss": 0.0054, "step": 19180 }, { "epoch": 17.257194244604317, "grad_norm": 0.1755676418542862, "learning_rate": 8.138242274366736e-05, "loss": 0.0085, "step": 19190 }, { "epoch": 17.26618705035971, "grad_norm": 0.27926406264305115, "learning_rate": 8.136096429283271e-05, "loss": 0.0117, "step": 19200 }, { "epoch": 17.27517985611511, "grad_norm": 0.22775708138942719, "learning_rate": 8.133949631536515e-05, "loss": 0.0125, "step": 19210 }, { "epoch": 17.284172661870503, "grad_norm": 0.22902646660804749, "learning_rate": 8.131801881778607e-05, "loss": 0.0121, "step": 19220 }, { "epoch": 17.2931654676259, "grad_norm": 0.17221777141094208, "learning_rate": 8.129653180661978e-05, "loss": 0.0094, "step": 19230 }, { "epoch": 17.302158273381295, "grad_norm": 0.2716238796710968, "learning_rate": 8.127503528839346e-05, "loss": 0.0106, "step": 19240 }, { "epoch": 17.31115107913669, "grad_norm": 0.18426914513111115, "learning_rate": 8.125352926963721e-05, "loss": 0.0139, "step": 19250 }, { "epoch": 17.320143884892087, "grad_norm": 0.29052069783210754, "learning_rate": 8.123201375688395e-05, "loss": 0.0093, "step": 19260 }, { "epoch": 17.32913669064748, "grad_norm": 0.20958445966243744, "learning_rate": 8.121048875666954e-05, "loss": 0.01, "step": 19270 }, { "epoch": 17.33812949640288, "grad_norm": 0.2144363969564438, "learning_rate": 8.118895427553274e-05, "loss": 0.0068, "step": 19280 }, { "epoch": 17.347122302158272, "grad_norm": 0.27888011932373047, "learning_rate": 8.116741032001511e-05, "loss": 0.0103, "step": 19290 }, { "epoch": 17.35611510791367, "grad_norm": 0.2824777364730835, "learning_rate": 8.114585689666114e-05, "loss": 0.0102, "step": 19300 }, { "epoch": 17.365107913669064, "grad_norm": 0.1990499198436737, "learning_rate": 8.112429401201821e-05, "loss": 0.008, "step": 19310 }, { "epoch": 17.37410071942446, "grad_norm": 0.21967598795890808, "learning_rate": 8.110272167263656e-05, "loss": 0.0113, "step": 19320 }, { "epoch": 17.383093525179856, "grad_norm": 0.2858216464519501, "learning_rate": 8.108113988506929e-05, "loss": 0.0091, "step": 19330 }, { "epoch": 17.392086330935253, "grad_norm": 0.19498732686042786, "learning_rate": 8.105954865587235e-05, "loss": 0.0103, "step": 19340 }, { "epoch": 17.401079136690647, "grad_norm": 0.24620568752288818, "learning_rate": 8.103794799160463e-05, "loss": 0.0094, "step": 19350 }, { "epoch": 17.41007194244604, "grad_norm": 0.2984185218811035, "learning_rate": 8.101633789882781e-05, "loss": 0.0117, "step": 19360 }, { "epoch": 17.41906474820144, "grad_norm": 0.23898689448833466, "learning_rate": 8.099471838410648e-05, "loss": 0.0076, "step": 19370 }, { "epoch": 17.428057553956833, "grad_norm": 0.30906108021736145, "learning_rate": 8.097308945400806e-05, "loss": 0.0084, "step": 19380 }, { "epoch": 17.43705035971223, "grad_norm": 0.22638580203056335, "learning_rate": 8.095145111510288e-05, "loss": 0.0084, "step": 19390 }, { "epoch": 17.446043165467625, "grad_norm": 0.18623857200145721, "learning_rate": 8.092980337396406e-05, "loss": 0.0071, "step": 19400 }, { "epoch": 17.455035971223023, "grad_norm": 0.24615254998207092, "learning_rate": 8.090814623716763e-05, "loss": 0.0111, "step": 19410 }, { "epoch": 17.464028776978417, "grad_norm": 0.1820739209651947, "learning_rate": 8.088647971129246e-05, "loss": 0.0106, "step": 19420 }, { "epoch": 17.473021582733814, "grad_norm": 0.14808206260204315, "learning_rate": 8.086480380292026e-05, "loss": 0.0115, "step": 19430 }, { "epoch": 17.48201438848921, "grad_norm": 0.21218548715114594, "learning_rate": 8.084311851863562e-05, "loss": 0.0101, "step": 19440 }, { "epoch": 17.491007194244606, "grad_norm": 0.2601390480995178, "learning_rate": 8.082142386502591e-05, "loss": 0.0084, "step": 19450 }, { "epoch": 17.5, "grad_norm": 0.17285151779651642, "learning_rate": 8.079971984868145e-05, "loss": 0.0114, "step": 19460 }, { "epoch": 17.508992805755394, "grad_norm": 0.1563260704278946, "learning_rate": 8.077800647619532e-05, "loss": 0.0093, "step": 19470 }, { "epoch": 17.51798561151079, "grad_norm": 0.2025204747915268, "learning_rate": 8.075628375416345e-05, "loss": 0.0079, "step": 19480 }, { "epoch": 17.526978417266186, "grad_norm": 0.21678927540779114, "learning_rate": 8.073455168918464e-05, "loss": 0.0104, "step": 19490 }, { "epoch": 17.535971223021583, "grad_norm": 0.17705991864204407, "learning_rate": 8.071281028786055e-05, "loss": 0.0074, "step": 19500 }, { "epoch": 17.544964028776977, "grad_norm": 0.2621769905090332, "learning_rate": 8.069105955679562e-05, "loss": 0.0082, "step": 19510 }, { "epoch": 17.553956834532375, "grad_norm": 0.25834956765174866, "learning_rate": 8.066929950259713e-05, "loss": 0.0084, "step": 19520 }, { "epoch": 17.56294964028777, "grad_norm": 0.14309963583946228, "learning_rate": 8.064753013187522e-05, "loss": 0.009, "step": 19530 }, { "epoch": 17.571942446043167, "grad_norm": 0.15136873722076416, "learning_rate": 8.062575145124289e-05, "loss": 0.007, "step": 19540 }, { "epoch": 17.58093525179856, "grad_norm": 0.1570838838815689, "learning_rate": 8.060396346731587e-05, "loss": 0.0088, "step": 19550 }, { "epoch": 17.58992805755396, "grad_norm": 0.2085190862417221, "learning_rate": 8.058216618671281e-05, "loss": 0.0074, "step": 19560 }, { "epoch": 17.598920863309353, "grad_norm": 0.16283133625984192, "learning_rate": 8.056035961605514e-05, "loss": 0.006, "step": 19570 }, { "epoch": 17.607913669064747, "grad_norm": 0.1707983762025833, "learning_rate": 8.05385437619671e-05, "loss": 0.0136, "step": 19580 }, { "epoch": 17.616906474820144, "grad_norm": 0.21083706617355347, "learning_rate": 8.05167186310758e-05, "loss": 0.0065, "step": 19590 }, { "epoch": 17.62589928057554, "grad_norm": 0.19817592203617096, "learning_rate": 8.049488423001113e-05, "loss": 0.0074, "step": 19600 }, { "epoch": 17.634892086330936, "grad_norm": 0.24058941006660461, "learning_rate": 8.047304056540581e-05, "loss": 0.0093, "step": 19610 }, { "epoch": 17.64388489208633, "grad_norm": 0.27994367480278015, "learning_rate": 8.045118764389534e-05, "loss": 0.0104, "step": 19620 }, { "epoch": 17.652877697841728, "grad_norm": 0.16247953474521637, "learning_rate": 8.042932547211809e-05, "loss": 0.0058, "step": 19630 }, { "epoch": 17.66187050359712, "grad_norm": 0.2920841574668884, "learning_rate": 8.04074540567152e-05, "loss": 0.0081, "step": 19640 }, { "epoch": 17.67086330935252, "grad_norm": 0.19620846211910248, "learning_rate": 8.038557340433063e-05, "loss": 0.0151, "step": 19650 }, { "epoch": 17.679856115107913, "grad_norm": 0.12669476866722107, "learning_rate": 8.036368352161115e-05, "loss": 0.0075, "step": 19660 }, { "epoch": 17.68884892086331, "grad_norm": 0.2717382609844208, "learning_rate": 8.034178441520633e-05, "loss": 0.0086, "step": 19670 }, { "epoch": 17.697841726618705, "grad_norm": 0.31017059087753296, "learning_rate": 8.031987609176852e-05, "loss": 0.0109, "step": 19680 }, { "epoch": 17.7068345323741, "grad_norm": 0.14785514771938324, "learning_rate": 8.02979585579529e-05, "loss": 0.0098, "step": 19690 }, { "epoch": 17.715827338129497, "grad_norm": 0.2353411316871643, "learning_rate": 8.027603182041745e-05, "loss": 0.0121, "step": 19700 }, { "epoch": 17.72482014388489, "grad_norm": 0.17294760048389435, "learning_rate": 8.025409588582292e-05, "loss": 0.0127, "step": 19710 }, { "epoch": 17.73381294964029, "grad_norm": 0.2522541880607605, "learning_rate": 8.023215076083288e-05, "loss": 0.0088, "step": 19720 }, { "epoch": 17.742805755395683, "grad_norm": 0.23890849947929382, "learning_rate": 8.021019645211367e-05, "loss": 0.0072, "step": 19730 }, { "epoch": 17.75179856115108, "grad_norm": 0.23531025648117065, "learning_rate": 8.018823296633441e-05, "loss": 0.0078, "step": 19740 }, { "epoch": 17.760791366906474, "grad_norm": 0.19123706221580505, "learning_rate": 8.016626031016708e-05, "loss": 0.0122, "step": 19750 }, { "epoch": 17.769784172661872, "grad_norm": 0.2541978359222412, "learning_rate": 8.014427849028636e-05, "loss": 0.0083, "step": 19760 }, { "epoch": 17.778776978417266, "grad_norm": 0.19967658817768097, "learning_rate": 8.012228751336974e-05, "loss": 0.0099, "step": 19770 }, { "epoch": 17.78776978417266, "grad_norm": 0.2136913537979126, "learning_rate": 8.01002873860975e-05, "loss": 0.0142, "step": 19780 }, { "epoch": 17.796762589928058, "grad_norm": 0.2240915596485138, "learning_rate": 8.00782781151527e-05, "loss": 0.0086, "step": 19790 }, { "epoch": 17.805755395683452, "grad_norm": 0.19286037981510162, "learning_rate": 8.005625970722119e-05, "loss": 0.0132, "step": 19800 }, { "epoch": 17.81474820143885, "grad_norm": 0.2202056348323822, "learning_rate": 8.003423216899158e-05, "loss": 0.0085, "step": 19810 }, { "epoch": 17.823741007194243, "grad_norm": 0.19655784964561462, "learning_rate": 8.001219550715522e-05, "loss": 0.0128, "step": 19820 }, { "epoch": 17.83273381294964, "grad_norm": 0.11697965115308762, "learning_rate": 7.999014972840632e-05, "loss": 0.0148, "step": 19830 }, { "epoch": 17.841726618705035, "grad_norm": 0.25134092569351196, "learning_rate": 7.996809483944174e-05, "loss": 0.0082, "step": 19840 }, { "epoch": 17.850719424460433, "grad_norm": 0.18941907584667206, "learning_rate": 7.994603084696124e-05, "loss": 0.0059, "step": 19850 }, { "epoch": 17.859712230215827, "grad_norm": 0.14385409653186798, "learning_rate": 7.992395775766724e-05, "loss": 0.0074, "step": 19860 }, { "epoch": 17.868705035971225, "grad_norm": 0.16521021723747253, "learning_rate": 7.990187557826497e-05, "loss": 0.011, "step": 19870 }, { "epoch": 17.87769784172662, "grad_norm": 0.18218575417995453, "learning_rate": 7.987978431546242e-05, "loss": 0.0099, "step": 19880 }, { "epoch": 17.886690647482013, "grad_norm": 0.17116142809391022, "learning_rate": 7.985768397597031e-05, "loss": 0.0098, "step": 19890 }, { "epoch": 17.89568345323741, "grad_norm": 0.22747501730918884, "learning_rate": 7.983557456650216e-05, "loss": 0.0114, "step": 19900 }, { "epoch": 17.904676258992804, "grad_norm": 0.27424824237823486, "learning_rate": 7.981345609377422e-05, "loss": 0.0124, "step": 19910 }, { "epoch": 17.913669064748202, "grad_norm": 0.259092777967453, "learning_rate": 7.97913285645055e-05, "loss": 0.0074, "step": 19920 }, { "epoch": 17.922661870503596, "grad_norm": 0.1875603348016739, "learning_rate": 7.976919198541776e-05, "loss": 0.008, "step": 19930 }, { "epoch": 17.931654676258994, "grad_norm": 0.24115003645420074, "learning_rate": 7.974704636323548e-05, "loss": 0.0109, "step": 19940 }, { "epoch": 17.940647482014388, "grad_norm": 0.23098403215408325, "learning_rate": 7.972489170468597e-05, "loss": 0.0077, "step": 19950 }, { "epoch": 17.949640287769785, "grad_norm": 0.2762037515640259, "learning_rate": 7.970272801649918e-05, "loss": 0.0118, "step": 19960 }, { "epoch": 17.95863309352518, "grad_norm": 0.15233728289604187, "learning_rate": 7.96805553054079e-05, "loss": 0.0101, "step": 19970 }, { "epoch": 17.967625899280577, "grad_norm": 0.2526564598083496, "learning_rate": 7.965837357814756e-05, "loss": 0.0098, "step": 19980 }, { "epoch": 17.97661870503597, "grad_norm": 0.23537757992744446, "learning_rate": 7.963618284145643e-05, "loss": 0.0131, "step": 19990 }, { "epoch": 17.985611510791365, "grad_norm": 0.21834048628807068, "learning_rate": 7.961398310207544e-05, "loss": 0.0107, "step": 20000 }, { "epoch": 17.994604316546763, "grad_norm": 0.18336622416973114, "learning_rate": 7.95917743667483e-05, "loss": 0.0085, "step": 20010 }, { "epoch": 18.003597122302157, "grad_norm": 0.20830973982810974, "learning_rate": 7.956955664222144e-05, "loss": 0.0118, "step": 20020 }, { "epoch": 18.012589928057555, "grad_norm": 0.18180298805236816, "learning_rate": 7.954732993524399e-05, "loss": 0.01, "step": 20030 }, { "epoch": 18.02158273381295, "grad_norm": 0.17440424859523773, "learning_rate": 7.952509425256786e-05, "loss": 0.0097, "step": 20040 }, { "epoch": 18.030575539568346, "grad_norm": 0.21332691609859467, "learning_rate": 7.950284960094767e-05, "loss": 0.0095, "step": 20050 }, { "epoch": 18.03956834532374, "grad_norm": 0.20942890644073486, "learning_rate": 7.948059598714076e-05, "loss": 0.0088, "step": 20060 }, { "epoch": 18.048561151079138, "grad_norm": 0.17362816631793976, "learning_rate": 7.945833341790717e-05, "loss": 0.0113, "step": 20070 }, { "epoch": 18.057553956834532, "grad_norm": 0.36073073744773865, "learning_rate": 7.94360619000097e-05, "loss": 0.0117, "step": 20080 }, { "epoch": 18.06654676258993, "grad_norm": 0.15441137552261353, "learning_rate": 7.941378144021381e-05, "loss": 0.009, "step": 20090 }, { "epoch": 18.075539568345324, "grad_norm": 0.1627844274044037, "learning_rate": 7.939149204528777e-05, "loss": 0.0091, "step": 20100 }, { "epoch": 18.084532374100718, "grad_norm": 0.31060096621513367, "learning_rate": 7.936919372200246e-05, "loss": 0.011, "step": 20110 }, { "epoch": 18.093525179856115, "grad_norm": 0.19829502701759338, "learning_rate": 7.934688647713158e-05, "loss": 0.0095, "step": 20120 }, { "epoch": 18.10251798561151, "grad_norm": 0.16822485625743866, "learning_rate": 7.932457031745143e-05, "loss": 0.0083, "step": 20130 }, { "epoch": 18.111510791366907, "grad_norm": 0.18011747300624847, "learning_rate": 7.930224524974108e-05, "loss": 0.0099, "step": 20140 }, { "epoch": 18.1205035971223, "grad_norm": 0.20448604226112366, "learning_rate": 7.927991128078232e-05, "loss": 0.0095, "step": 20150 }, { "epoch": 18.1294964028777, "grad_norm": 0.16844958066940308, "learning_rate": 7.925756841735958e-05, "loss": 0.0074, "step": 20160 }, { "epoch": 18.138489208633093, "grad_norm": 0.1841575652360916, "learning_rate": 7.923521666626008e-05, "loss": 0.0074, "step": 20170 }, { "epoch": 18.14748201438849, "grad_norm": 0.16481657326221466, "learning_rate": 7.921285603427366e-05, "loss": 0.0069, "step": 20180 }, { "epoch": 18.156474820143885, "grad_norm": 0.26711320877075195, "learning_rate": 7.91904865281929e-05, "loss": 0.0079, "step": 20190 }, { "epoch": 18.165467625899282, "grad_norm": 0.17442145943641663, "learning_rate": 7.916810815481307e-05, "loss": 0.008, "step": 20200 }, { "epoch": 18.174460431654676, "grad_norm": 0.1935424953699112, "learning_rate": 7.914572092093211e-05, "loss": 0.0082, "step": 20210 }, { "epoch": 18.18345323741007, "grad_norm": 0.16565677523612976, "learning_rate": 7.912332483335068e-05, "loss": 0.0082, "step": 20220 }, { "epoch": 18.192446043165468, "grad_norm": 0.20707614719867706, "learning_rate": 7.910091989887213e-05, "loss": 0.0075, "step": 20230 }, { "epoch": 18.201438848920862, "grad_norm": 0.19892056286334991, "learning_rate": 7.907850612430248e-05, "loss": 0.0086, "step": 20240 }, { "epoch": 18.21043165467626, "grad_norm": 0.2215111255645752, "learning_rate": 7.905608351645044e-05, "loss": 0.0074, "step": 20250 }, { "epoch": 18.219424460431654, "grad_norm": 0.18182168900966644, "learning_rate": 7.90336520821274e-05, "loss": 0.0081, "step": 20260 }, { "epoch": 18.22841726618705, "grad_norm": 0.09125512838363647, "learning_rate": 7.901121182814746e-05, "loss": 0.0064, "step": 20270 }, { "epoch": 18.237410071942445, "grad_norm": 0.1662946194410324, "learning_rate": 7.898876276132736e-05, "loss": 0.0086, "step": 20280 }, { "epoch": 18.246402877697843, "grad_norm": 0.18437865376472473, "learning_rate": 7.896630488848654e-05, "loss": 0.0066, "step": 20290 }, { "epoch": 18.255395683453237, "grad_norm": 0.2457776665687561, "learning_rate": 7.89438382164471e-05, "loss": 0.0074, "step": 20300 }, { "epoch": 18.264388489208635, "grad_norm": 0.19666531682014465, "learning_rate": 7.892136275203383e-05, "loss": 0.0078, "step": 20310 }, { "epoch": 18.27338129496403, "grad_norm": 0.2960019111633301, "learning_rate": 7.889887850207418e-05, "loss": 0.0081, "step": 20320 }, { "epoch": 18.282374100719423, "grad_norm": 0.19132447242736816, "learning_rate": 7.887638547339827e-05, "loss": 0.0106, "step": 20330 }, { "epoch": 18.29136690647482, "grad_norm": 0.12847855687141418, "learning_rate": 7.885388367283891e-05, "loss": 0.0074, "step": 20340 }, { "epoch": 18.300359712230215, "grad_norm": 0.3025101125240326, "learning_rate": 7.88313731072315e-05, "loss": 0.007, "step": 20350 }, { "epoch": 18.309352517985612, "grad_norm": 0.267711341381073, "learning_rate": 7.88088537834142e-05, "loss": 0.0091, "step": 20360 }, { "epoch": 18.318345323741006, "grad_norm": 0.21356989443302155, "learning_rate": 7.878632570822778e-05, "loss": 0.0083, "step": 20370 }, { "epoch": 18.327338129496404, "grad_norm": 0.22357633709907532, "learning_rate": 7.876378888851567e-05, "loss": 0.0083, "step": 20380 }, { "epoch": 18.336330935251798, "grad_norm": 0.20435622334480286, "learning_rate": 7.874124333112396e-05, "loss": 0.0091, "step": 20390 }, { "epoch": 18.345323741007196, "grad_norm": 0.12184172868728638, "learning_rate": 7.871868904290138e-05, "loss": 0.008, "step": 20400 }, { "epoch": 18.35431654676259, "grad_norm": 0.11380337923765182, "learning_rate": 7.869612603069935e-05, "loss": 0.0104, "step": 20410 }, { "epoch": 18.363309352517987, "grad_norm": 0.171237051486969, "learning_rate": 7.867355430137192e-05, "loss": 0.0079, "step": 20420 }, { "epoch": 18.37230215827338, "grad_norm": 0.1914692521095276, "learning_rate": 7.865097386177577e-05, "loss": 0.007, "step": 20430 }, { "epoch": 18.381294964028775, "grad_norm": 0.2893975079059601, "learning_rate": 7.862838471877023e-05, "loss": 0.0116, "step": 20440 }, { "epoch": 18.390287769784173, "grad_norm": 0.19974933564662933, "learning_rate": 7.860578687921731e-05, "loss": 0.0096, "step": 20450 }, { "epoch": 18.399280575539567, "grad_norm": 0.22937534749507904, "learning_rate": 7.858318034998164e-05, "loss": 0.0092, "step": 20460 }, { "epoch": 18.408273381294965, "grad_norm": 0.18049094080924988, "learning_rate": 7.856056513793046e-05, "loss": 0.0066, "step": 20470 }, { "epoch": 18.41726618705036, "grad_norm": 0.1615358293056488, "learning_rate": 7.85379412499337e-05, "loss": 0.0091, "step": 20480 }, { "epoch": 18.426258992805757, "grad_norm": 0.18805940449237823, "learning_rate": 7.851530869286389e-05, "loss": 0.0079, "step": 20490 }, { "epoch": 18.43525179856115, "grad_norm": 0.32721519470214844, "learning_rate": 7.849266747359619e-05, "loss": 0.0086, "step": 20500 }, { "epoch": 18.444244604316548, "grad_norm": 0.2136598378419876, "learning_rate": 7.847001759900843e-05, "loss": 0.0056, "step": 20510 }, { "epoch": 18.453237410071942, "grad_norm": 0.22056017816066742, "learning_rate": 7.844735907598102e-05, "loss": 0.0134, "step": 20520 }, { "epoch": 18.46223021582734, "grad_norm": 0.11842656135559082, "learning_rate": 7.842469191139703e-05, "loss": 0.0078, "step": 20530 }, { "epoch": 18.471223021582734, "grad_norm": 0.13975954055786133, "learning_rate": 7.840201611214215e-05, "loss": 0.0081, "step": 20540 }, { "epoch": 18.480215827338128, "grad_norm": 0.17096863687038422, "learning_rate": 7.837933168510469e-05, "loss": 0.0131, "step": 20550 }, { "epoch": 18.489208633093526, "grad_norm": 0.2908725440502167, "learning_rate": 7.835663863717559e-05, "loss": 0.0097, "step": 20560 }, { "epoch": 18.49820143884892, "grad_norm": 0.21602898836135864, "learning_rate": 7.833393697524838e-05, "loss": 0.0109, "step": 20570 }, { "epoch": 18.507194244604317, "grad_norm": 0.22537001967430115, "learning_rate": 7.831122670621922e-05, "loss": 0.0085, "step": 20580 }, { "epoch": 18.51618705035971, "grad_norm": 0.16096647083759308, "learning_rate": 7.82885078369869e-05, "loss": 0.0089, "step": 20590 }, { "epoch": 18.52517985611511, "grad_norm": 0.18708816170692444, "learning_rate": 7.826578037445283e-05, "loss": 0.0066, "step": 20600 }, { "epoch": 18.534172661870503, "grad_norm": 0.16687354445457458, "learning_rate": 7.824304432552097e-05, "loss": 0.0058, "step": 20610 }, { "epoch": 18.5431654676259, "grad_norm": 0.167221799492836, "learning_rate": 7.822029969709798e-05, "loss": 0.0102, "step": 20620 }, { "epoch": 18.552158273381295, "grad_norm": 0.19178546965122223, "learning_rate": 7.819754649609306e-05, "loss": 0.0139, "step": 20630 }, { "epoch": 18.56115107913669, "grad_norm": 0.18236054480075836, "learning_rate": 7.817478472941802e-05, "loss": 0.0107, "step": 20640 }, { "epoch": 18.570143884892087, "grad_norm": 0.19788235425949097, "learning_rate": 7.815201440398727e-05, "loss": 0.0062, "step": 20650 }, { "epoch": 18.57913669064748, "grad_norm": 0.2454807460308075, "learning_rate": 7.812923552671789e-05, "loss": 0.0106, "step": 20660 }, { "epoch": 18.58812949640288, "grad_norm": 0.17556864023208618, "learning_rate": 7.810644810452945e-05, "loss": 0.0066, "step": 20670 }, { "epoch": 18.597122302158272, "grad_norm": 0.2681271433830261, "learning_rate": 7.808365214434417e-05, "loss": 0.008, "step": 20680 }, { "epoch": 18.60611510791367, "grad_norm": 0.21881914138793945, "learning_rate": 7.80608476530869e-05, "loss": 0.0088, "step": 20690 }, { "epoch": 18.615107913669064, "grad_norm": 0.24794340133666992, "learning_rate": 7.8038034637685e-05, "loss": 0.0066, "step": 20700 }, { "epoch": 18.62410071942446, "grad_norm": 0.22637996077537537, "learning_rate": 7.801521310506848e-05, "loss": 0.0074, "step": 20710 }, { "epoch": 18.633093525179856, "grad_norm": 0.26179373264312744, "learning_rate": 7.799238306216994e-05, "loss": 0.0058, "step": 20720 }, { "epoch": 18.642086330935253, "grad_norm": 0.199912428855896, "learning_rate": 7.796954451592448e-05, "loss": 0.0093, "step": 20730 }, { "epoch": 18.651079136690647, "grad_norm": 0.2172669768333435, "learning_rate": 7.794669747326992e-05, "loss": 0.0109, "step": 20740 }, { "epoch": 18.66007194244604, "grad_norm": 0.2731919586658478, "learning_rate": 7.792384194114654e-05, "loss": 0.0081, "step": 20750 }, { "epoch": 18.66906474820144, "grad_norm": 0.3763325810432434, "learning_rate": 7.790097792649729e-05, "loss": 0.0091, "step": 20760 }, { "epoch": 18.678057553956833, "grad_norm": 0.1889001429080963, "learning_rate": 7.787810543626762e-05, "loss": 0.0078, "step": 20770 }, { "epoch": 18.68705035971223, "grad_norm": 0.2521246075630188, "learning_rate": 7.785522447740558e-05, "loss": 0.0073, "step": 20780 }, { "epoch": 18.696043165467625, "grad_norm": 0.2379106879234314, "learning_rate": 7.783233505686182e-05, "loss": 0.0139, "step": 20790 }, { "epoch": 18.705035971223023, "grad_norm": 0.12632471323013306, "learning_rate": 7.780943718158955e-05, "loss": 0.0065, "step": 20800 }, { "epoch": 18.714028776978417, "grad_norm": 0.30058223009109497, "learning_rate": 7.778653085854453e-05, "loss": 0.0107, "step": 20810 }, { "epoch": 18.723021582733814, "grad_norm": 0.31949692964553833, "learning_rate": 7.77636160946851e-05, "loss": 0.0081, "step": 20820 }, { "epoch": 18.73201438848921, "grad_norm": 0.28292199969291687, "learning_rate": 7.774069289697215e-05, "loss": 0.0107, "step": 20830 }, { "epoch": 18.741007194244606, "grad_norm": 0.14474084973335266, "learning_rate": 7.771776127236913e-05, "loss": 0.0075, "step": 20840 }, { "epoch": 18.75, "grad_norm": 0.1802893877029419, "learning_rate": 7.769482122784212e-05, "loss": 0.007, "step": 20850 }, { "epoch": 18.758992805755394, "grad_norm": 0.23647889494895935, "learning_rate": 7.767187277035963e-05, "loss": 0.0088, "step": 20860 }, { "epoch": 18.76798561151079, "grad_norm": 0.24282823503017426, "learning_rate": 7.764891590689285e-05, "loss": 0.0106, "step": 20870 }, { "epoch": 18.776978417266186, "grad_norm": 0.17293626070022583, "learning_rate": 7.762595064441542e-05, "loss": 0.009, "step": 20880 }, { "epoch": 18.785971223021583, "grad_norm": 0.22529222071170807, "learning_rate": 7.760297698990362e-05, "loss": 0.0084, "step": 20890 }, { "epoch": 18.794964028776977, "grad_norm": 0.24642440676689148, "learning_rate": 7.757999495033623e-05, "loss": 0.0092, "step": 20900 }, { "epoch": 18.803956834532375, "grad_norm": 0.2042914777994156, "learning_rate": 7.755700453269456e-05, "loss": 0.0056, "step": 20910 }, { "epoch": 18.81294964028777, "grad_norm": 0.24541394412517548, "learning_rate": 7.753400574396254e-05, "loss": 0.0107, "step": 20920 }, { "epoch": 18.821942446043167, "grad_norm": 0.32857635617256165, "learning_rate": 7.751099859112655e-05, "loss": 0.0122, "step": 20930 }, { "epoch": 18.83093525179856, "grad_norm": 0.16724243760108948, "learning_rate": 7.748798308117557e-05, "loss": 0.0071, "step": 20940 }, { "epoch": 18.83992805755396, "grad_norm": 0.2385992854833603, "learning_rate": 7.746495922110112e-05, "loss": 0.0088, "step": 20950 }, { "epoch": 18.848920863309353, "grad_norm": 0.24027864634990692, "learning_rate": 7.744192701789723e-05, "loss": 0.0072, "step": 20960 }, { "epoch": 18.857913669064747, "grad_norm": 0.2354765087366104, "learning_rate": 7.741888647856046e-05, "loss": 0.0079, "step": 20970 }, { "epoch": 18.866906474820144, "grad_norm": 0.17609930038452148, "learning_rate": 7.739583761008994e-05, "loss": 0.0082, "step": 20980 }, { "epoch": 18.87589928057554, "grad_norm": 0.24497565627098083, "learning_rate": 7.73727804194873e-05, "loss": 0.0074, "step": 20990 }, { "epoch": 18.884892086330936, "grad_norm": 0.1388169676065445, "learning_rate": 7.734971491375671e-05, "loss": 0.0069, "step": 21000 }, { "epoch": 18.89388489208633, "grad_norm": 0.16142421960830688, "learning_rate": 7.732664109990485e-05, "loss": 0.0077, "step": 21010 }, { "epoch": 18.902877697841728, "grad_norm": 0.21818765997886658, "learning_rate": 7.730355898494095e-05, "loss": 0.0152, "step": 21020 }, { "epoch": 18.91187050359712, "grad_norm": 0.13303321599960327, "learning_rate": 7.728046857587673e-05, "loss": 0.0121, "step": 21030 }, { "epoch": 18.92086330935252, "grad_norm": 0.3817123472690582, "learning_rate": 7.725736987972647e-05, "loss": 0.0089, "step": 21040 }, { "epoch": 18.929856115107913, "grad_norm": 0.2710184156894684, "learning_rate": 7.723426290350691e-05, "loss": 0.0103, "step": 21050 }, { "epoch": 18.93884892086331, "grad_norm": 0.15506304800510406, "learning_rate": 7.721114765423736e-05, "loss": 0.0103, "step": 21060 }, { "epoch": 18.947841726618705, "grad_norm": 0.1582183986902237, "learning_rate": 7.718802413893963e-05, "loss": 0.0081, "step": 21070 }, { "epoch": 18.9568345323741, "grad_norm": 0.2766926884651184, "learning_rate": 7.716489236463802e-05, "loss": 0.0086, "step": 21080 }, { "epoch": 18.965827338129497, "grad_norm": 0.1575281322002411, "learning_rate": 7.714175233835936e-05, "loss": 0.0065, "step": 21090 }, { "epoch": 18.97482014388489, "grad_norm": 0.16162556409835815, "learning_rate": 7.711860406713299e-05, "loss": 0.0079, "step": 21100 }, { "epoch": 18.98381294964029, "grad_norm": 0.2542513906955719, "learning_rate": 7.70954475579907e-05, "loss": 0.0082, "step": 21110 }, { "epoch": 18.992805755395683, "grad_norm": 0.26356041431427, "learning_rate": 7.707228281796688e-05, "loss": 0.0086, "step": 21120 }, { "epoch": 19.00179856115108, "grad_norm": 0.17519281804561615, "learning_rate": 7.704910985409833e-05, "loss": 0.008, "step": 21130 }, { "epoch": 19.010791366906474, "grad_norm": 0.19148507714271545, "learning_rate": 7.702592867342439e-05, "loss": 0.0093, "step": 21140 }, { "epoch": 19.019784172661872, "grad_norm": 0.18937061727046967, "learning_rate": 7.700273928298691e-05, "loss": 0.0074, "step": 21150 }, { "epoch": 19.028776978417266, "grad_norm": 0.19391657412052155, "learning_rate": 7.697954168983021e-05, "loss": 0.0067, "step": 21160 }, { "epoch": 19.037769784172664, "grad_norm": 0.13860532641410828, "learning_rate": 7.695633590100109e-05, "loss": 0.0092, "step": 21170 }, { "epoch": 19.046762589928058, "grad_norm": 0.2669145464897156, "learning_rate": 7.693312192354886e-05, "loss": 0.0085, "step": 21180 }, { "epoch": 19.055755395683452, "grad_norm": 0.12783308327198029, "learning_rate": 7.690989976452532e-05, "loss": 0.0067, "step": 21190 }, { "epoch": 19.06474820143885, "grad_norm": 0.24108441174030304, "learning_rate": 7.688666943098475e-05, "loss": 0.0085, "step": 21200 }, { "epoch": 19.073741007194243, "grad_norm": 0.1317795217037201, "learning_rate": 7.686343092998389e-05, "loss": 0.0063, "step": 21210 }, { "epoch": 19.08273381294964, "grad_norm": 0.23457501828670502, "learning_rate": 7.684018426858202e-05, "loss": 0.0094, "step": 21220 }, { "epoch": 19.091726618705035, "grad_norm": 0.29607057571411133, "learning_rate": 7.681692945384084e-05, "loss": 0.0091, "step": 21230 }, { "epoch": 19.100719424460433, "grad_norm": 0.19760675728321075, "learning_rate": 7.679366649282456e-05, "loss": 0.0066, "step": 21240 }, { "epoch": 19.109712230215827, "grad_norm": 0.2767833471298218, "learning_rate": 7.677039539259983e-05, "loss": 0.0097, "step": 21250 }, { "epoch": 19.118705035971225, "grad_norm": 0.19448347389698029, "learning_rate": 7.674711616023581e-05, "loss": 0.0083, "step": 21260 }, { "epoch": 19.12769784172662, "grad_norm": 0.22657035291194916, "learning_rate": 7.672382880280413e-05, "loss": 0.008, "step": 21270 }, { "epoch": 19.136690647482013, "grad_norm": 0.17637509107589722, "learning_rate": 7.670053332737885e-05, "loss": 0.0075, "step": 21280 }, { "epoch": 19.14568345323741, "grad_norm": 0.3172597289085388, "learning_rate": 7.667722974103654e-05, "loss": 0.0063, "step": 21290 }, { "epoch": 19.154676258992804, "grad_norm": 0.2033046931028366, "learning_rate": 7.66539180508562e-05, "loss": 0.0071, "step": 21300 }, { "epoch": 19.163669064748202, "grad_norm": 0.1377367377281189, "learning_rate": 7.663059826391932e-05, "loss": 0.0099, "step": 21310 }, { "epoch": 19.172661870503596, "grad_norm": 0.10075319558382034, "learning_rate": 7.660727038730981e-05, "loss": 0.0071, "step": 21320 }, { "epoch": 19.181654676258994, "grad_norm": 0.24743258953094482, "learning_rate": 7.65839344281141e-05, "loss": 0.0085, "step": 21330 }, { "epoch": 19.190647482014388, "grad_norm": 0.2517131268978119, "learning_rate": 7.656059039342101e-05, "loss": 0.0067, "step": 21340 }, { "epoch": 19.199640287769785, "grad_norm": 0.17452841997146606, "learning_rate": 7.653723829032187e-05, "loss": 0.0096, "step": 21350 }, { "epoch": 19.20863309352518, "grad_norm": 0.22887882590293884, "learning_rate": 7.65138781259104e-05, "loss": 0.0128, "step": 21360 }, { "epoch": 19.217625899280577, "grad_norm": 0.26428139209747314, "learning_rate": 7.649050990728279e-05, "loss": 0.0061, "step": 21370 }, { "epoch": 19.22661870503597, "grad_norm": 0.20820632576942444, "learning_rate": 7.646713364153774e-05, "loss": 0.0084, "step": 21380 }, { "epoch": 19.235611510791365, "grad_norm": 0.19981758296489716, "learning_rate": 7.64437493357763e-05, "loss": 0.007, "step": 21390 }, { "epoch": 19.244604316546763, "grad_norm": 0.13182079792022705, "learning_rate": 7.642035699710202e-05, "loss": 0.0081, "step": 21400 }, { "epoch": 19.253597122302157, "grad_norm": 0.20174546539783478, "learning_rate": 7.639695663262089e-05, "loss": 0.0069, "step": 21410 }, { "epoch": 19.262589928057555, "grad_norm": 0.18110722303390503, "learning_rate": 7.637354824944128e-05, "loss": 0.017, "step": 21420 }, { "epoch": 19.27158273381295, "grad_norm": 0.14606614410877228, "learning_rate": 7.635013185467408e-05, "loss": 0.0065, "step": 21430 }, { "epoch": 19.280575539568346, "grad_norm": 0.10180311650037766, "learning_rate": 7.632670745543256e-05, "loss": 0.0058, "step": 21440 }, { "epoch": 19.28956834532374, "grad_norm": 0.16614070534706116, "learning_rate": 7.630327505883242e-05, "loss": 0.0081, "step": 21450 }, { "epoch": 19.298561151079138, "grad_norm": 0.20761100947856903, "learning_rate": 7.627983467199182e-05, "loss": 0.005, "step": 21460 }, { "epoch": 19.307553956834532, "grad_norm": 0.212063729763031, "learning_rate": 7.625638630203132e-05, "loss": 0.0099, "step": 21470 }, { "epoch": 19.31654676258993, "grad_norm": 0.14157988131046295, "learning_rate": 7.623292995607394e-05, "loss": 0.0074, "step": 21480 }, { "epoch": 19.325539568345324, "grad_norm": 0.2280322164297104, "learning_rate": 7.620946564124507e-05, "loss": 0.0084, "step": 21490 }, { "epoch": 19.334532374100718, "grad_norm": 0.1646229773759842, "learning_rate": 7.618599336467256e-05, "loss": 0.0075, "step": 21500 }, { "epoch": 19.343525179856115, "grad_norm": 0.21653838455677032, "learning_rate": 7.616251313348666e-05, "loss": 0.0074, "step": 21510 }, { "epoch": 19.35251798561151, "grad_norm": 0.3951760530471802, "learning_rate": 7.613902495482005e-05, "loss": 0.0109, "step": 21520 }, { "epoch": 19.361510791366907, "grad_norm": 0.2838052809238434, "learning_rate": 7.611552883580784e-05, "loss": 0.0078, "step": 21530 }, { "epoch": 19.3705035971223, "grad_norm": 0.16301803290843964, "learning_rate": 7.609202478358748e-05, "loss": 0.0091, "step": 21540 }, { "epoch": 19.3794964028777, "grad_norm": 0.2719302177429199, "learning_rate": 7.606851280529895e-05, "loss": 0.0084, "step": 21550 }, { "epoch": 19.388489208633093, "grad_norm": 0.16181887686252594, "learning_rate": 7.604499290808449e-05, "loss": 0.0077, "step": 21560 }, { "epoch": 19.39748201438849, "grad_norm": 0.16345655918121338, "learning_rate": 7.602146509908888e-05, "loss": 0.0081, "step": 21570 }, { "epoch": 19.406474820143885, "grad_norm": 0.23066361248493195, "learning_rate": 7.599792938545921e-05, "loss": 0.0145, "step": 21580 }, { "epoch": 19.415467625899282, "grad_norm": 0.19428177177906036, "learning_rate": 7.597438577434506e-05, "loss": 0.0091, "step": 21590 }, { "epoch": 19.424460431654676, "grad_norm": 0.19794225692749023, "learning_rate": 7.595083427289831e-05, "loss": 0.0069, "step": 21600 }, { "epoch": 19.43345323741007, "grad_norm": 0.1436307728290558, "learning_rate": 7.59272748882733e-05, "loss": 0.007, "step": 21610 }, { "epoch": 19.442446043165468, "grad_norm": 0.16219305992126465, "learning_rate": 7.590370762762675e-05, "loss": 0.0085, "step": 21620 }, { "epoch": 19.451438848920862, "grad_norm": 0.21749091148376465, "learning_rate": 7.588013249811777e-05, "loss": 0.0114, "step": 21630 }, { "epoch": 19.46043165467626, "grad_norm": 0.17153990268707275, "learning_rate": 7.585654950690786e-05, "loss": 0.0075, "step": 21640 }, { "epoch": 19.469424460431654, "grad_norm": 0.19174623489379883, "learning_rate": 7.583295866116091e-05, "loss": 0.0096, "step": 21650 }, { "epoch": 19.47841726618705, "grad_norm": 0.15600115060806274, "learning_rate": 7.580935996804321e-05, "loss": 0.0092, "step": 21660 }, { "epoch": 19.487410071942445, "grad_norm": 0.2633126974105835, "learning_rate": 7.57857534347234e-05, "loss": 0.0092, "step": 21670 }, { "epoch": 19.496402877697843, "grad_norm": 0.22459305822849274, "learning_rate": 7.576213906837254e-05, "loss": 0.0077, "step": 21680 }, { "epoch": 19.505395683453237, "grad_norm": 0.23170985281467438, "learning_rate": 7.573851687616403e-05, "loss": 0.0093, "step": 21690 }, { "epoch": 19.514388489208635, "grad_norm": 0.1764099895954132, "learning_rate": 7.571488686527368e-05, "loss": 0.0061, "step": 21700 }, { "epoch": 19.52338129496403, "grad_norm": 0.14184315502643585, "learning_rate": 7.569124904287968e-05, "loss": 0.0065, "step": 21710 }, { "epoch": 19.532374100719423, "grad_norm": 0.15777531266212463, "learning_rate": 7.566760341616254e-05, "loss": 0.0091, "step": 21720 }, { "epoch": 19.54136690647482, "grad_norm": 0.18472030758857727, "learning_rate": 7.564394999230519e-05, "loss": 0.0086, "step": 21730 }, { "epoch": 19.550359712230215, "grad_norm": 0.13344137370586395, "learning_rate": 7.562028877849294e-05, "loss": 0.0061, "step": 21740 }, { "epoch": 19.559352517985612, "grad_norm": 0.2171095609664917, "learning_rate": 7.559661978191341e-05, "loss": 0.008, "step": 21750 }, { "epoch": 19.568345323741006, "grad_norm": 0.1772361397743225, "learning_rate": 7.557294300975664e-05, "loss": 0.0073, "step": 21760 }, { "epoch": 19.577338129496404, "grad_norm": 0.21805430948734283, "learning_rate": 7.554925846921499e-05, "loss": 0.0073, "step": 21770 }, { "epoch": 19.586330935251798, "grad_norm": 0.23182043433189392, "learning_rate": 7.552556616748321e-05, "loss": 0.0077, "step": 21780 }, { "epoch": 19.595323741007196, "grad_norm": 0.25334665179252625, "learning_rate": 7.550186611175838e-05, "loss": 0.0112, "step": 21790 }, { "epoch": 19.60431654676259, "grad_norm": 0.17852099239826202, "learning_rate": 7.547815830923998e-05, "loss": 0.0055, "step": 21800 }, { "epoch": 19.613309352517987, "grad_norm": 0.22022029757499695, "learning_rate": 7.54544427671298e-05, "loss": 0.0085, "step": 21810 }, { "epoch": 19.62230215827338, "grad_norm": 0.275683730840683, "learning_rate": 7.543071949263198e-05, "loss": 0.0078, "step": 21820 }, { "epoch": 19.631294964028775, "grad_norm": 0.18993708491325378, "learning_rate": 7.540698849295305e-05, "loss": 0.0054, "step": 21830 }, { "epoch": 19.640287769784173, "grad_norm": 0.11253254115581512, "learning_rate": 7.538324977530183e-05, "loss": 0.0067, "step": 21840 }, { "epoch": 19.649280575539567, "grad_norm": 0.3012467920780182, "learning_rate": 7.535950334688955e-05, "loss": 0.01, "step": 21850 }, { "epoch": 19.658273381294965, "grad_norm": 0.11857690662145615, "learning_rate": 7.533574921492972e-05, "loss": 0.0081, "step": 21860 }, { "epoch": 19.66726618705036, "grad_norm": 0.10787932574748993, "learning_rate": 7.531198738663824e-05, "loss": 0.006, "step": 21870 }, { "epoch": 19.676258992805757, "grad_norm": 0.14911767840385437, "learning_rate": 7.528821786923333e-05, "loss": 0.007, "step": 21880 }, { "epoch": 19.68525179856115, "grad_norm": 0.2727572023868561, "learning_rate": 7.52644406699355e-05, "loss": 0.0093, "step": 21890 }, { "epoch": 19.694244604316548, "grad_norm": 0.2223670929670334, "learning_rate": 7.524065579596766e-05, "loss": 0.0061, "step": 21900 }, { "epoch": 19.703237410071942, "grad_norm": 0.22115330398082733, "learning_rate": 7.521686325455506e-05, "loss": 0.0066, "step": 21910 }, { "epoch": 19.71223021582734, "grad_norm": 0.2611043453216553, "learning_rate": 7.51930630529252e-05, "loss": 0.0091, "step": 21920 }, { "epoch": 19.721223021582734, "grad_norm": 0.24787011742591858, "learning_rate": 7.516925519830797e-05, "loss": 0.0077, "step": 21930 }, { "epoch": 19.730215827338128, "grad_norm": 0.31330060958862305, "learning_rate": 7.514543969793557e-05, "loss": 0.0074, "step": 21940 }, { "epoch": 19.739208633093526, "grad_norm": 0.3025539815425873, "learning_rate": 7.512161655904251e-05, "loss": 0.0106, "step": 21950 }, { "epoch": 19.74820143884892, "grad_norm": 0.3386908173561096, "learning_rate": 7.509778578886563e-05, "loss": 0.0087, "step": 21960 }, { "epoch": 19.757194244604317, "grad_norm": 0.1990545243024826, "learning_rate": 7.507394739464412e-05, "loss": 0.0081, "step": 21970 }, { "epoch": 19.76618705035971, "grad_norm": 0.1919456273317337, "learning_rate": 7.50501013836194e-05, "loss": 0.0105, "step": 21980 }, { "epoch": 19.77517985611511, "grad_norm": 0.32367846369743347, "learning_rate": 7.50262477630353e-05, "loss": 0.0097, "step": 21990 }, { "epoch": 19.784172661870503, "grad_norm": 0.2537568211555481, "learning_rate": 7.500238654013794e-05, "loss": 0.0122, "step": 22000 }, { "epoch": 19.7931654676259, "grad_norm": 0.21425388753414154, "learning_rate": 7.497851772217566e-05, "loss": 0.0076, "step": 22010 }, { "epoch": 19.802158273381295, "grad_norm": 0.2408066987991333, "learning_rate": 7.495464131639924e-05, "loss": 0.0065, "step": 22020 }, { "epoch": 19.81115107913669, "grad_norm": 0.33483439683914185, "learning_rate": 7.493075733006166e-05, "loss": 0.0086, "step": 22030 }, { "epoch": 19.820143884892087, "grad_norm": 0.26474529504776, "learning_rate": 7.490686577041828e-05, "loss": 0.0085, "step": 22040 }, { "epoch": 19.82913669064748, "grad_norm": 0.2404182404279709, "learning_rate": 7.488296664472668e-05, "loss": 0.0131, "step": 22050 }, { "epoch": 19.83812949640288, "grad_norm": 0.27171590924263, "learning_rate": 7.485905996024682e-05, "loss": 0.0084, "step": 22060 }, { "epoch": 19.847122302158272, "grad_norm": 0.3389495611190796, "learning_rate": 7.483514572424093e-05, "loss": 0.0066, "step": 22070 }, { "epoch": 19.85611510791367, "grad_norm": 0.12681709229946136, "learning_rate": 7.481122394397349e-05, "loss": 0.0072, "step": 22080 }, { "epoch": 19.865107913669064, "grad_norm": 0.17871515452861786, "learning_rate": 7.478729462671131e-05, "loss": 0.0065, "step": 22090 }, { "epoch": 19.87410071942446, "grad_norm": 0.2729225158691406, "learning_rate": 7.47633577797235e-05, "loss": 0.0104, "step": 22100 }, { "epoch": 19.883093525179856, "grad_norm": 0.2813331186771393, "learning_rate": 7.473941341028144e-05, "loss": 0.0071, "step": 22110 }, { "epoch": 19.892086330935253, "grad_norm": 0.1583283394575119, "learning_rate": 7.471546152565879e-05, "loss": 0.0063, "step": 22120 }, { "epoch": 19.901079136690647, "grad_norm": 0.21126806735992432, "learning_rate": 7.46915021331315e-05, "loss": 0.0109, "step": 22130 }, { "epoch": 19.91007194244604, "grad_norm": 0.2653857171535492, "learning_rate": 7.466753523997778e-05, "loss": 0.0096, "step": 22140 }, { "epoch": 19.91906474820144, "grad_norm": 0.30254727602005005, "learning_rate": 7.464356085347819e-05, "loss": 0.0098, "step": 22150 }, { "epoch": 19.928057553956833, "grad_norm": 0.1568879634141922, "learning_rate": 7.461957898091548e-05, "loss": 0.0079, "step": 22160 }, { "epoch": 19.93705035971223, "grad_norm": 0.22706690430641174, "learning_rate": 7.459558962957473e-05, "loss": 0.0096, "step": 22170 }, { "epoch": 19.946043165467625, "grad_norm": 0.21674518287181854, "learning_rate": 7.457159280674326e-05, "loss": 0.0076, "step": 22180 }, { "epoch": 19.955035971223023, "grad_norm": 0.21759633719921112, "learning_rate": 7.454758851971066e-05, "loss": 0.0089, "step": 22190 }, { "epoch": 19.964028776978417, "grad_norm": 0.211856409907341, "learning_rate": 7.45235767757688e-05, "loss": 0.0088, "step": 22200 }, { "epoch": 19.973021582733814, "grad_norm": 0.25624001026153564, "learning_rate": 7.449955758221183e-05, "loss": 0.0121, "step": 22210 }, { "epoch": 19.98201438848921, "grad_norm": 0.2297237664461136, "learning_rate": 7.447553094633615e-05, "loss": 0.008, "step": 22220 }, { "epoch": 19.991007194244606, "grad_norm": 0.2527174651622772, "learning_rate": 7.445149687544039e-05, "loss": 0.0075, "step": 22230 }, { "epoch": 20.0, "grad_norm": 0.2673991024494171, "learning_rate": 7.44274553768255e-05, "loss": 0.0078, "step": 22240 }, { "epoch": 20.008992805755394, "grad_norm": 0.17383833229541779, "learning_rate": 7.440340645779464e-05, "loss": 0.0079, "step": 22250 }, { "epoch": 20.01798561151079, "grad_norm": 0.10388574004173279, "learning_rate": 7.437935012565322e-05, "loss": 0.0076, "step": 22260 }, { "epoch": 20.026978417266186, "grad_norm": 0.15122608840465546, "learning_rate": 7.435528638770893e-05, "loss": 0.0097, "step": 22270 }, { "epoch": 20.035971223021583, "grad_norm": 0.2727122902870178, "learning_rate": 7.433121525127171e-05, "loss": 0.0115, "step": 22280 }, { "epoch": 20.044964028776977, "grad_norm": 0.1448647379875183, "learning_rate": 7.430713672365371e-05, "loss": 0.0199, "step": 22290 }, { "epoch": 20.053956834532375, "grad_norm": 0.19930526614189148, "learning_rate": 7.428305081216938e-05, "loss": 0.007, "step": 22300 }, { "epoch": 20.06294964028777, "grad_norm": 0.16481268405914307, "learning_rate": 7.425895752413536e-05, "loss": 0.0094, "step": 22310 }, { "epoch": 20.071942446043167, "grad_norm": 0.1897663027048111, "learning_rate": 7.423485686687057e-05, "loss": 0.0143, "step": 22320 }, { "epoch": 20.08093525179856, "grad_norm": 0.21639981865882874, "learning_rate": 7.421074884769616e-05, "loss": 0.0074, "step": 22330 }, { "epoch": 20.08992805755396, "grad_norm": 0.22091920673847198, "learning_rate": 7.418663347393548e-05, "loss": 0.0118, "step": 22340 }, { "epoch": 20.098920863309353, "grad_norm": 0.2623682916164398, "learning_rate": 7.416251075291418e-05, "loss": 0.01, "step": 22350 }, { "epoch": 20.107913669064747, "grad_norm": 0.15092960000038147, "learning_rate": 7.413838069196007e-05, "loss": 0.0053, "step": 22360 }, { "epoch": 20.116906474820144, "grad_norm": 0.16106803715229034, "learning_rate": 7.411424329840324e-05, "loss": 0.0117, "step": 22370 }, { "epoch": 20.12589928057554, "grad_norm": 0.19680990278720856, "learning_rate": 7.409009857957601e-05, "loss": 0.0145, "step": 22380 }, { "epoch": 20.134892086330936, "grad_norm": 0.26122668385505676, "learning_rate": 7.40659465428129e-05, "loss": 0.0116, "step": 22390 }, { "epoch": 20.14388489208633, "grad_norm": 0.2042994201183319, "learning_rate": 7.404178719545063e-05, "loss": 0.0094, "step": 22400 }, { "epoch": 20.152877697841728, "grad_norm": 0.2998097836971283, "learning_rate": 7.401762054482822e-05, "loss": 0.0133, "step": 22410 }, { "epoch": 20.16187050359712, "grad_norm": 0.18511314690113068, "learning_rate": 7.39934465982868e-05, "loss": 0.009, "step": 22420 }, { "epoch": 20.17086330935252, "grad_norm": 0.20212873816490173, "learning_rate": 7.396926536316984e-05, "loss": 0.0078, "step": 22430 }, { "epoch": 20.179856115107913, "grad_norm": 0.1910778433084488, "learning_rate": 7.394507684682293e-05, "loss": 0.0107, "step": 22440 }, { "epoch": 20.18884892086331, "grad_norm": 0.21444422006607056, "learning_rate": 7.392088105659393e-05, "loss": 0.009, "step": 22450 }, { "epoch": 20.197841726618705, "grad_norm": 0.26581186056137085, "learning_rate": 7.389667799983284e-05, "loss": 0.0127, "step": 22460 }, { "epoch": 20.2068345323741, "grad_norm": 0.2490559071302414, "learning_rate": 7.387246768389193e-05, "loss": 0.0084, "step": 22470 }, { "epoch": 20.215827338129497, "grad_norm": 0.26921290159225464, "learning_rate": 7.384825011612563e-05, "loss": 0.0068, "step": 22480 }, { "epoch": 20.22482014388489, "grad_norm": 0.25329455733299255, "learning_rate": 7.382402530389066e-05, "loss": 0.0131, "step": 22490 }, { "epoch": 20.23381294964029, "grad_norm": 0.21663044393062592, "learning_rate": 7.379979325454582e-05, "loss": 0.0085, "step": 22500 }, { "epoch": 20.242805755395683, "grad_norm": 0.21126553416252136, "learning_rate": 7.37755539754522e-05, "loss": 0.0088, "step": 22510 }, { "epoch": 20.25179856115108, "grad_norm": 0.17821000516414642, "learning_rate": 7.375130747397302e-05, "loss": 0.0064, "step": 22520 }, { "epoch": 20.260791366906474, "grad_norm": 0.22387447953224182, "learning_rate": 7.372705375747377e-05, "loss": 0.0078, "step": 22530 }, { "epoch": 20.269784172661872, "grad_norm": 0.1306295543909073, "learning_rate": 7.370279283332205e-05, "loss": 0.0066, "step": 22540 }, { "epoch": 20.278776978417266, "grad_norm": 0.19959305226802826, "learning_rate": 7.36785247088877e-05, "loss": 0.0071, "step": 22550 }, { "epoch": 20.28776978417266, "grad_norm": 0.1339711993932724, "learning_rate": 7.365424939154275e-05, "loss": 0.0074, "step": 22560 }, { "epoch": 20.296762589928058, "grad_norm": 0.19446898996829987, "learning_rate": 7.362996688866138e-05, "loss": 0.0075, "step": 22570 }, { "epoch": 20.305755395683452, "grad_norm": 0.17833161354064941, "learning_rate": 7.360567720761999e-05, "loss": 0.0097, "step": 22580 }, { "epoch": 20.31474820143885, "grad_norm": 0.15179751813411713, "learning_rate": 7.358138035579711e-05, "loss": 0.0076, "step": 22590 }, { "epoch": 20.323741007194243, "grad_norm": 0.17183123528957367, "learning_rate": 7.355707634057354e-05, "loss": 0.0076, "step": 22600 }, { "epoch": 20.33273381294964, "grad_norm": 0.13129879534244537, "learning_rate": 7.353276516933215e-05, "loss": 0.0047, "step": 22610 }, { "epoch": 20.341726618705035, "grad_norm": 0.12345442175865173, "learning_rate": 7.350844684945806e-05, "loss": 0.0087, "step": 22620 }, { "epoch": 20.350719424460433, "grad_norm": 0.209528848528862, "learning_rate": 7.348412138833851e-05, "loss": 0.0067, "step": 22630 }, { "epoch": 20.359712230215827, "grad_norm": 0.22729289531707764, "learning_rate": 7.345978879336295e-05, "loss": 0.0066, "step": 22640 }, { "epoch": 20.368705035971225, "grad_norm": 0.17076562345027924, "learning_rate": 7.343544907192296e-05, "loss": 0.0108, "step": 22650 }, { "epoch": 20.37769784172662, "grad_norm": 0.16400931775569916, "learning_rate": 7.341110223141235e-05, "loss": 0.0078, "step": 22660 }, { "epoch": 20.386690647482013, "grad_norm": 0.39029765129089355, "learning_rate": 7.3386748279227e-05, "loss": 0.0085, "step": 22670 }, { "epoch": 20.39568345323741, "grad_norm": 0.23960378766059875, "learning_rate": 7.336238722276501e-05, "loss": 0.0117, "step": 22680 }, { "epoch": 20.404676258992804, "grad_norm": 0.17088788747787476, "learning_rate": 7.333801906942663e-05, "loss": 0.0072, "step": 22690 }, { "epoch": 20.413669064748202, "grad_norm": 0.16069748997688293, "learning_rate": 7.331364382661428e-05, "loss": 0.0075, "step": 22700 }, { "epoch": 20.422661870503596, "grad_norm": 0.16932103037834167, "learning_rate": 7.328926150173248e-05, "loss": 0.0071, "step": 22710 }, { "epoch": 20.431654676258994, "grad_norm": 0.2200268805027008, "learning_rate": 7.326487210218795e-05, "loss": 0.0078, "step": 22720 }, { "epoch": 20.440647482014388, "grad_norm": 0.38070613145828247, "learning_rate": 7.324047563538955e-05, "loss": 0.0109, "step": 22730 }, { "epoch": 20.449640287769785, "grad_norm": 0.1601511836051941, "learning_rate": 7.321607210874828e-05, "loss": 0.009, "step": 22740 }, { "epoch": 20.45863309352518, "grad_norm": 0.2327185422182083, "learning_rate": 7.31916615296773e-05, "loss": 0.0072, "step": 22750 }, { "epoch": 20.467625899280577, "grad_norm": 0.1405697762966156, "learning_rate": 7.316724390559188e-05, "loss": 0.0072, "step": 22760 }, { "epoch": 20.47661870503597, "grad_norm": 0.1485958695411682, "learning_rate": 7.314281924390946e-05, "loss": 0.0055, "step": 22770 }, { "epoch": 20.485611510791365, "grad_norm": 0.14189259707927704, "learning_rate": 7.311838755204959e-05, "loss": 0.0087, "step": 22780 }, { "epoch": 20.494604316546763, "grad_norm": 0.19148467481136322, "learning_rate": 7.3093948837434e-05, "loss": 0.0068, "step": 22790 }, { "epoch": 20.503597122302157, "grad_norm": 0.19545897841453552, "learning_rate": 7.306950310748651e-05, "loss": 0.0058, "step": 22800 }, { "epoch": 20.512589928057555, "grad_norm": 0.24866802990436554, "learning_rate": 7.304505036963311e-05, "loss": 0.0089, "step": 22810 }, { "epoch": 20.52158273381295, "grad_norm": 0.212190642952919, "learning_rate": 7.302059063130186e-05, "loss": 0.0072, "step": 22820 }, { "epoch": 20.530575539568346, "grad_norm": 0.20032955706119537, "learning_rate": 7.2996123899923e-05, "loss": 0.0079, "step": 22830 }, { "epoch": 20.53956834532374, "grad_norm": 0.21813178062438965, "learning_rate": 7.297165018292886e-05, "loss": 0.0103, "step": 22840 }, { "epoch": 20.548561151079138, "grad_norm": 0.24549600481987, "learning_rate": 7.294716948775396e-05, "loss": 0.0089, "step": 22850 }, { "epoch": 20.557553956834532, "grad_norm": 0.2380782663822174, "learning_rate": 7.292268182183484e-05, "loss": 0.0086, "step": 22860 }, { "epoch": 20.56654676258993, "grad_norm": 0.18293079733848572, "learning_rate": 7.28981871926102e-05, "loss": 0.0059, "step": 22870 }, { "epoch": 20.575539568345324, "grad_norm": 0.19956356287002563, "learning_rate": 7.28736856075209e-05, "loss": 0.0086, "step": 22880 }, { "epoch": 20.584532374100718, "grad_norm": 0.22960864007472992, "learning_rate": 7.284917707400985e-05, "loss": 0.0066, "step": 22890 }, { "epoch": 20.593525179856115, "grad_norm": 0.1288348138332367, "learning_rate": 7.282466159952212e-05, "loss": 0.009, "step": 22900 }, { "epoch": 20.60251798561151, "grad_norm": 0.14978590607643127, "learning_rate": 7.280013919150483e-05, "loss": 0.0082, "step": 22910 }, { "epoch": 20.611510791366907, "grad_norm": 0.2105398029088974, "learning_rate": 7.277560985740728e-05, "loss": 0.0136, "step": 22920 }, { "epoch": 20.6205035971223, "grad_norm": 0.1567944586277008, "learning_rate": 7.275107360468079e-05, "loss": 0.0088, "step": 22930 }, { "epoch": 20.6294964028777, "grad_norm": 0.18537582457065582, "learning_rate": 7.272653044077885e-05, "loss": 0.0083, "step": 22940 }, { "epoch": 20.638489208633093, "grad_norm": 0.20219826698303223, "learning_rate": 7.270198037315703e-05, "loss": 0.0105, "step": 22950 }, { "epoch": 20.64748201438849, "grad_norm": 0.20157970488071442, "learning_rate": 7.267742340927297e-05, "loss": 0.0065, "step": 22960 }, { "epoch": 20.656474820143885, "grad_norm": 0.1613970547914505, "learning_rate": 7.265285955658645e-05, "loss": 0.0046, "step": 22970 }, { "epoch": 20.665467625899282, "grad_norm": 0.12806087732315063, "learning_rate": 7.26282888225593e-05, "loss": 0.0067, "step": 22980 }, { "epoch": 20.674460431654676, "grad_norm": 0.22018736600875854, "learning_rate": 7.260371121465548e-05, "loss": 0.0075, "step": 22990 }, { "epoch": 20.68345323741007, "grad_norm": 0.2725037932395935, "learning_rate": 7.2579126740341e-05, "loss": 0.0103, "step": 23000 }, { "epoch": 20.692446043165468, "grad_norm": 0.13095299899578094, "learning_rate": 7.2554535407084e-05, "loss": 0.0058, "step": 23010 }, { "epoch": 20.701438848920862, "grad_norm": 0.2235889732837677, "learning_rate": 7.252993722235464e-05, "loss": 0.0089, "step": 23020 }, { "epoch": 20.71043165467626, "grad_norm": 0.24780146777629852, "learning_rate": 7.250533219362523e-05, "loss": 0.0066, "step": 23030 }, { "epoch": 20.719424460431654, "grad_norm": 0.2097705751657486, "learning_rate": 7.248072032837012e-05, "loss": 0.0125, "step": 23040 }, { "epoch": 20.72841726618705, "grad_norm": 0.23969100415706635, "learning_rate": 7.245610163406575e-05, "loss": 0.0084, "step": 23050 }, { "epoch": 20.737410071942445, "grad_norm": 0.24534352123737335, "learning_rate": 7.243147611819061e-05, "loss": 0.0091, "step": 23060 }, { "epoch": 20.746402877697843, "grad_norm": 0.20372216403484344, "learning_rate": 7.240684378822531e-05, "loss": 0.0069, "step": 23070 }, { "epoch": 20.755395683453237, "grad_norm": 0.14360104501247406, "learning_rate": 7.238220465165248e-05, "loss": 0.0064, "step": 23080 }, { "epoch": 20.764388489208635, "grad_norm": 0.19216081500053406, "learning_rate": 7.235755871595684e-05, "loss": 0.0099, "step": 23090 }, { "epoch": 20.77338129496403, "grad_norm": 0.12954087555408478, "learning_rate": 7.233290598862517e-05, "loss": 0.0071, "step": 23100 }, { "epoch": 20.782374100719423, "grad_norm": 0.16520938277244568, "learning_rate": 7.230824647714635e-05, "loss": 0.007, "step": 23110 }, { "epoch": 20.79136690647482, "grad_norm": 0.22886797785758972, "learning_rate": 7.228358018901124e-05, "loss": 0.0074, "step": 23120 }, { "epoch": 20.800359712230215, "grad_norm": 0.33449700474739075, "learning_rate": 7.225890713171286e-05, "loss": 0.0093, "step": 23130 }, { "epoch": 20.809352517985612, "grad_norm": 0.17766764760017395, "learning_rate": 7.223422731274618e-05, "loss": 0.0059, "step": 23140 }, { "epoch": 20.818345323741006, "grad_norm": 0.13557566702365875, "learning_rate": 7.220954073960832e-05, "loss": 0.0058, "step": 23150 }, { "epoch": 20.827338129496404, "grad_norm": 0.19620490074157715, "learning_rate": 7.218484741979838e-05, "loss": 0.0066, "step": 23160 }, { "epoch": 20.836330935251798, "grad_norm": 0.19891205430030823, "learning_rate": 7.216014736081756e-05, "loss": 0.0115, "step": 23170 }, { "epoch": 20.845323741007196, "grad_norm": 0.1968846470117569, "learning_rate": 7.213544057016906e-05, "loss": 0.008, "step": 23180 }, { "epoch": 20.85431654676259, "grad_norm": 0.279606431722641, "learning_rate": 7.211072705535819e-05, "loss": 0.0075, "step": 23190 }, { "epoch": 20.863309352517987, "grad_norm": 0.2098541557788849, "learning_rate": 7.208600682389224e-05, "loss": 0.0067, "step": 23200 }, { "epoch": 20.87230215827338, "grad_norm": 0.18698015809059143, "learning_rate": 7.206127988328055e-05, "loss": 0.0086, "step": 23210 }, { "epoch": 20.881294964028775, "grad_norm": 0.21731361746788025, "learning_rate": 7.203654624103453e-05, "loss": 0.007, "step": 23220 }, { "epoch": 20.890287769784173, "grad_norm": 0.18646618723869324, "learning_rate": 7.201180590466761e-05, "loss": 0.0082, "step": 23230 }, { "epoch": 20.899280575539567, "grad_norm": 0.2551482319831848, "learning_rate": 7.198705888169523e-05, "loss": 0.0103, "step": 23240 }, { "epoch": 20.908273381294965, "grad_norm": 0.3362575173377991, "learning_rate": 7.196230517963491e-05, "loss": 0.0075, "step": 23250 }, { "epoch": 20.91726618705036, "grad_norm": 0.2144898623228073, "learning_rate": 7.193754480600615e-05, "loss": 0.0092, "step": 23260 }, { "epoch": 20.926258992805757, "grad_norm": 0.37764161825180054, "learning_rate": 7.19127777683305e-05, "loss": 0.0137, "step": 23270 }, { "epoch": 20.93525179856115, "grad_norm": 0.17299753427505493, "learning_rate": 7.188800407413156e-05, "loss": 0.0057, "step": 23280 }, { "epoch": 20.944244604316548, "grad_norm": 0.19182540476322174, "learning_rate": 7.186322373093489e-05, "loss": 0.0092, "step": 23290 }, { "epoch": 20.953237410071942, "grad_norm": 0.21629805862903595, "learning_rate": 7.18384367462681e-05, "loss": 0.0065, "step": 23300 }, { "epoch": 20.96223021582734, "grad_norm": 0.19433967769145966, "learning_rate": 7.181364312766085e-05, "loss": 0.0082, "step": 23310 }, { "epoch": 20.971223021582734, "grad_norm": 0.15025167167186737, "learning_rate": 7.178884288264477e-05, "loss": 0.0063, "step": 23320 }, { "epoch": 20.980215827338128, "grad_norm": 0.1647573858499527, "learning_rate": 7.176403601875353e-05, "loss": 0.0094, "step": 23330 }, { "epoch": 20.989208633093526, "grad_norm": 0.1435290426015854, "learning_rate": 7.173922254352279e-05, "loss": 0.0083, "step": 23340 }, { "epoch": 20.99820143884892, "grad_norm": 0.18957646191120148, "learning_rate": 7.171440246449024e-05, "loss": 0.0071, "step": 23350 }, { "epoch": 21.007194244604317, "grad_norm": 0.18103750050067902, "learning_rate": 7.168957578919555e-05, "loss": 0.006, "step": 23360 }, { "epoch": 21.01618705035971, "grad_norm": 0.10305972397327423, "learning_rate": 7.16647425251804e-05, "loss": 0.0075, "step": 23370 }, { "epoch": 21.02517985611511, "grad_norm": 0.11756797134876251, "learning_rate": 7.163990267998852e-05, "loss": 0.0084, "step": 23380 }, { "epoch": 21.034172661870503, "grad_norm": 0.1930074691772461, "learning_rate": 7.161505626116556e-05, "loss": 0.0085, "step": 23390 }, { "epoch": 21.0431654676259, "grad_norm": 0.20818951725959778, "learning_rate": 7.159020327625923e-05, "loss": 0.0077, "step": 23400 }, { "epoch": 21.052158273381295, "grad_norm": 0.27482783794403076, "learning_rate": 7.15653437328192e-05, "loss": 0.0126, "step": 23410 }, { "epoch": 21.06115107913669, "grad_norm": 0.20551590621471405, "learning_rate": 7.154047763839713e-05, "loss": 0.0075, "step": 23420 }, { "epoch": 21.070143884892087, "grad_norm": 0.22701849043369293, "learning_rate": 7.15156050005467e-05, "loss": 0.0062, "step": 23430 }, { "epoch": 21.07913669064748, "grad_norm": 0.1898394078016281, "learning_rate": 7.149072582682357e-05, "loss": 0.0062, "step": 23440 }, { "epoch": 21.08812949640288, "grad_norm": 0.1582619696855545, "learning_rate": 7.146584012478535e-05, "loss": 0.0064, "step": 23450 }, { "epoch": 21.097122302158272, "grad_norm": 0.18301399052143097, "learning_rate": 7.144094790199169e-05, "loss": 0.0089, "step": 23460 }, { "epoch": 21.10611510791367, "grad_norm": 0.15358999371528625, "learning_rate": 7.141604916600415e-05, "loss": 0.0077, "step": 23470 }, { "epoch": 21.115107913669064, "grad_norm": 0.16442975401878357, "learning_rate": 7.139114392438635e-05, "loss": 0.0058, "step": 23480 }, { "epoch": 21.12410071942446, "grad_norm": 0.1704999953508377, "learning_rate": 7.136623218470382e-05, "loss": 0.0078, "step": 23490 }, { "epoch": 21.133093525179856, "grad_norm": 0.2196054756641388, "learning_rate": 7.13413139545241e-05, "loss": 0.0069, "step": 23500 }, { "epoch": 21.142086330935253, "grad_norm": 0.20399650931358337, "learning_rate": 7.131638924141668e-05, "loss": 0.0097, "step": 23510 }, { "epoch": 21.151079136690647, "grad_norm": 0.27018505334854126, "learning_rate": 7.129145805295304e-05, "loss": 0.0099, "step": 23520 }, { "epoch": 21.16007194244604, "grad_norm": 0.15355055034160614, "learning_rate": 7.126652039670661e-05, "loss": 0.0051, "step": 23530 }, { "epoch": 21.16906474820144, "grad_norm": 0.1810114085674286, "learning_rate": 7.124157628025278e-05, "loss": 0.0074, "step": 23540 }, { "epoch": 21.178057553956833, "grad_norm": 0.2588927745819092, "learning_rate": 7.121662571116894e-05, "loss": 0.0075, "step": 23550 }, { "epoch": 21.18705035971223, "grad_norm": 0.24091245234012604, "learning_rate": 7.119166869703441e-05, "loss": 0.0051, "step": 23560 }, { "epoch": 21.196043165467625, "grad_norm": 0.17462222278118134, "learning_rate": 7.116670524543044e-05, "loss": 0.0093, "step": 23570 }, { "epoch": 21.205035971223023, "grad_norm": 0.26529350876808167, "learning_rate": 7.114173536394032e-05, "loss": 0.0077, "step": 23580 }, { "epoch": 21.214028776978417, "grad_norm": 0.1828717738389969, "learning_rate": 7.111675906014917e-05, "loss": 0.0091, "step": 23590 }, { "epoch": 21.223021582733814, "grad_norm": 0.15313392877578735, "learning_rate": 7.109177634164421e-05, "loss": 0.0059, "step": 23600 }, { "epoch": 21.23201438848921, "grad_norm": 0.13867533206939697, "learning_rate": 7.106678721601449e-05, "loss": 0.0057, "step": 23610 }, { "epoch": 21.241007194244606, "grad_norm": 0.20531444251537323, "learning_rate": 7.104179169085103e-05, "loss": 0.0081, "step": 23620 }, { "epoch": 21.25, "grad_norm": 0.2606280446052551, "learning_rate": 7.101678977374683e-05, "loss": 0.0119, "step": 23630 }, { "epoch": 21.258992805755394, "grad_norm": 0.2003364861011505, "learning_rate": 7.099178147229685e-05, "loss": 0.0064, "step": 23640 }, { "epoch": 21.26798561151079, "grad_norm": 0.10598264634609222, "learning_rate": 7.096676679409789e-05, "loss": 0.0081, "step": 23650 }, { "epoch": 21.276978417266186, "grad_norm": 0.18153834342956543, "learning_rate": 7.094174574674877e-05, "loss": 0.0075, "step": 23660 }, { "epoch": 21.285971223021583, "grad_norm": 0.1576564759016037, "learning_rate": 7.091671833785025e-05, "loss": 0.0059, "step": 23670 }, { "epoch": 21.294964028776977, "grad_norm": 0.2049456238746643, "learning_rate": 7.089168457500493e-05, "loss": 0.0068, "step": 23680 }, { "epoch": 21.303956834532375, "grad_norm": 0.1553819626569748, "learning_rate": 7.086664446581747e-05, "loss": 0.0119, "step": 23690 }, { "epoch": 21.31294964028777, "grad_norm": 0.12647266685962677, "learning_rate": 7.084159801789438e-05, "loss": 0.0061, "step": 23700 }, { "epoch": 21.321942446043167, "grad_norm": 0.14039157330989838, "learning_rate": 7.081654523884411e-05, "loss": 0.009, "step": 23710 }, { "epoch": 21.33093525179856, "grad_norm": 0.2030833661556244, "learning_rate": 7.0791486136277e-05, "loss": 0.0093, "step": 23720 }, { "epoch": 21.33992805755396, "grad_norm": 0.2685246169567108, "learning_rate": 7.07664207178054e-05, "loss": 0.0103, "step": 23730 }, { "epoch": 21.348920863309353, "grad_norm": 0.27612948417663574, "learning_rate": 7.074134899104345e-05, "loss": 0.0087, "step": 23740 }, { "epoch": 21.357913669064747, "grad_norm": 0.09129795432090759, "learning_rate": 7.071627096360735e-05, "loss": 0.0073, "step": 23750 }, { "epoch": 21.366906474820144, "grad_norm": 0.19778358936309814, "learning_rate": 7.069118664311511e-05, "loss": 0.0062, "step": 23760 }, { "epoch": 21.37589928057554, "grad_norm": 0.16528411209583282, "learning_rate": 7.06660960371867e-05, "loss": 0.0056, "step": 23770 }, { "epoch": 21.384892086330936, "grad_norm": 0.3073403537273407, "learning_rate": 7.064099915344396e-05, "loss": 0.0087, "step": 23780 }, { "epoch": 21.39388489208633, "grad_norm": 0.21721510589122772, "learning_rate": 7.061589599951066e-05, "loss": 0.0077, "step": 23790 }, { "epoch": 21.402877697841728, "grad_norm": 0.12830999493598938, "learning_rate": 7.05907865830125e-05, "loss": 0.012, "step": 23800 }, { "epoch": 21.41187050359712, "grad_norm": 0.2180357575416565, "learning_rate": 7.056567091157703e-05, "loss": 0.0064, "step": 23810 }, { "epoch": 21.42086330935252, "grad_norm": 0.1245712861418724, "learning_rate": 7.054054899283375e-05, "loss": 0.0091, "step": 23820 }, { "epoch": 21.429856115107913, "grad_norm": 0.2171310931444168, "learning_rate": 7.051542083441403e-05, "loss": 0.0063, "step": 23830 }, { "epoch": 21.43884892086331, "grad_norm": 0.1884419322013855, "learning_rate": 7.049028644395113e-05, "loss": 0.0073, "step": 23840 }, { "epoch": 21.447841726618705, "grad_norm": 0.1446194350719452, "learning_rate": 7.046514582908024e-05, "loss": 0.0054, "step": 23850 }, { "epoch": 21.4568345323741, "grad_norm": 0.23529227077960968, "learning_rate": 7.043999899743838e-05, "loss": 0.0067, "step": 23860 }, { "epoch": 21.465827338129497, "grad_norm": 0.151106059551239, "learning_rate": 7.041484595666451e-05, "loss": 0.0098, "step": 23870 }, { "epoch": 21.47482014388489, "grad_norm": 0.17628327012062073, "learning_rate": 7.038968671439948e-05, "loss": 0.0068, "step": 23880 }, { "epoch": 21.48381294964029, "grad_norm": 0.1458258181810379, "learning_rate": 7.036452127828596e-05, "loss": 0.0087, "step": 23890 }, { "epoch": 21.492805755395683, "grad_norm": 0.20773795247077942, "learning_rate": 7.033934965596859e-05, "loss": 0.0063, "step": 23900 }, { "epoch": 21.50179856115108, "grad_norm": 0.252623587846756, "learning_rate": 7.031417185509381e-05, "loss": 0.0094, "step": 23910 }, { "epoch": 21.510791366906474, "grad_norm": 0.19039547443389893, "learning_rate": 7.028898788331e-05, "loss": 0.0082, "step": 23920 }, { "epoch": 21.519784172661872, "grad_norm": 0.16971880197525024, "learning_rate": 7.026379774826736e-05, "loss": 0.0143, "step": 23930 }, { "epoch": 21.528776978417266, "grad_norm": 0.2156539261341095, "learning_rate": 7.0238601457618e-05, "loss": 0.0064, "step": 23940 }, { "epoch": 21.53776978417266, "grad_norm": 0.1423102170228958, "learning_rate": 7.02133990190159e-05, "loss": 0.007, "step": 23950 }, { "epoch": 21.546762589928058, "grad_norm": 0.16265925765037537, "learning_rate": 7.018819044011687e-05, "loss": 0.0072, "step": 23960 }, { "epoch": 21.555755395683452, "grad_norm": 0.1330670565366745, "learning_rate": 7.016297572857863e-05, "loss": 0.0054, "step": 23970 }, { "epoch": 21.56474820143885, "grad_norm": 0.12092871218919754, "learning_rate": 7.013775489206072e-05, "loss": 0.0072, "step": 23980 }, { "epoch": 21.573741007194243, "grad_norm": 0.25533249974250793, "learning_rate": 7.01125279382246e-05, "loss": 0.0053, "step": 23990 }, { "epoch": 21.58273381294964, "grad_norm": 0.18763615190982819, "learning_rate": 7.008729487473351e-05, "loss": 0.007, "step": 24000 }, { "epoch": 21.591726618705035, "grad_norm": 0.27922987937927246, "learning_rate": 7.006205570925263e-05, "loss": 0.0098, "step": 24010 }, { "epoch": 21.600719424460433, "grad_norm": 0.21830543875694275, "learning_rate": 7.003681044944892e-05, "loss": 0.0079, "step": 24020 }, { "epoch": 21.609712230215827, "grad_norm": 0.2209630310535431, "learning_rate": 7.001155910299126e-05, "loss": 0.0072, "step": 24030 }, { "epoch": 21.618705035971225, "grad_norm": 0.21059133112430573, "learning_rate": 6.99863016775503e-05, "loss": 0.0114, "step": 24040 }, { "epoch": 21.62769784172662, "grad_norm": 0.25998133420944214, "learning_rate": 6.996103818079859e-05, "loss": 0.0084, "step": 24050 }, { "epoch": 21.636690647482013, "grad_norm": 0.2778080105781555, "learning_rate": 6.993576862041054e-05, "loss": 0.0081, "step": 24060 }, { "epoch": 21.64568345323741, "grad_norm": 0.18205256760120392, "learning_rate": 6.991049300406235e-05, "loss": 0.0057, "step": 24070 }, { "epoch": 21.654676258992804, "grad_norm": 0.2668272852897644, "learning_rate": 6.988521133943209e-05, "loss": 0.0078, "step": 24080 }, { "epoch": 21.663669064748202, "grad_norm": 0.2444581389427185, "learning_rate": 6.985992363419966e-05, "loss": 0.0096, "step": 24090 }, { "epoch": 21.672661870503596, "grad_norm": 0.2771233916282654, "learning_rate": 6.983462989604682e-05, "loss": 0.0078, "step": 24100 }, { "epoch": 21.681654676258994, "grad_norm": 0.1935633420944214, "learning_rate": 6.980933013265709e-05, "loss": 0.0057, "step": 24110 }, { "epoch": 21.690647482014388, "grad_norm": 0.25823137164115906, "learning_rate": 6.978402435171592e-05, "loss": 0.0146, "step": 24120 }, { "epoch": 21.699640287769785, "grad_norm": 0.194122314453125, "learning_rate": 6.975871256091052e-05, "loss": 0.0096, "step": 24130 }, { "epoch": 21.70863309352518, "grad_norm": 0.2707781195640564, "learning_rate": 6.973339476792995e-05, "loss": 0.0065, "step": 24140 }, { "epoch": 21.717625899280577, "grad_norm": 0.23092208802700043, "learning_rate": 6.970807098046505e-05, "loss": 0.0067, "step": 24150 }, { "epoch": 21.72661870503597, "grad_norm": 0.26244932413101196, "learning_rate": 6.968274120620858e-05, "loss": 0.0084, "step": 24160 }, { "epoch": 21.735611510791365, "grad_norm": 0.20250827074050903, "learning_rate": 6.965740545285499e-05, "loss": 0.0061, "step": 24170 }, { "epoch": 21.744604316546763, "grad_norm": 0.17937368154525757, "learning_rate": 6.963206372810068e-05, "loss": 0.0073, "step": 24180 }, { "epoch": 21.753597122302157, "grad_norm": 0.17926937341690063, "learning_rate": 6.960671603964375e-05, "loss": 0.0097, "step": 24190 }, { "epoch": 21.762589928057555, "grad_norm": 0.3020070493221283, "learning_rate": 6.958136239518418e-05, "loss": 0.0093, "step": 24200 }, { "epoch": 21.77158273381295, "grad_norm": 0.17477823793888092, "learning_rate": 6.955600280242371e-05, "loss": 0.006, "step": 24210 }, { "epoch": 21.780575539568346, "grad_norm": 0.2013876885175705, "learning_rate": 6.953063726906596e-05, "loss": 0.008, "step": 24220 }, { "epoch": 21.78956834532374, "grad_norm": 0.175314262509346, "learning_rate": 6.950526580281626e-05, "loss": 0.008, "step": 24230 }, { "epoch": 21.798561151079138, "grad_norm": 0.13848651945590973, "learning_rate": 6.947988841138184e-05, "loss": 0.0086, "step": 24240 }, { "epoch": 21.807553956834532, "grad_norm": 0.30038022994995117, "learning_rate": 6.945450510247165e-05, "loss": 0.0074, "step": 24250 }, { "epoch": 21.81654676258993, "grad_norm": 0.21927566826343536, "learning_rate": 6.942911588379647e-05, "loss": 0.0075, "step": 24260 }, { "epoch": 21.825539568345324, "grad_norm": 0.19314879179000854, "learning_rate": 6.940372076306888e-05, "loss": 0.0073, "step": 24270 }, { "epoch": 21.834532374100718, "grad_norm": 0.15745970606803894, "learning_rate": 6.937831974800326e-05, "loss": 0.0079, "step": 24280 }, { "epoch": 21.843525179856115, "grad_norm": 0.27389398217201233, "learning_rate": 6.935291284631574e-05, "loss": 0.01, "step": 24290 }, { "epoch": 21.85251798561151, "grad_norm": 0.20325039327144623, "learning_rate": 6.932750006572428e-05, "loss": 0.0057, "step": 24300 }, { "epoch": 21.861510791366907, "grad_norm": 0.2895835340023041, "learning_rate": 6.930208141394863e-05, "loss": 0.019, "step": 24310 }, { "epoch": 21.8705035971223, "grad_norm": 0.30090662837028503, "learning_rate": 6.927665689871026e-05, "loss": 0.007, "step": 24320 }, { "epoch": 21.8794964028777, "grad_norm": 0.2685568332672119, "learning_rate": 6.925122652773253e-05, "loss": 0.0077, "step": 24330 }, { "epoch": 21.888489208633093, "grad_norm": 0.17758943140506744, "learning_rate": 6.922579030874046e-05, "loss": 0.0074, "step": 24340 }, { "epoch": 21.89748201438849, "grad_norm": 0.19267933070659637, "learning_rate": 6.920034824946093e-05, "loss": 0.0101, "step": 24350 }, { "epoch": 21.906474820143885, "grad_norm": 0.16041316092014313, "learning_rate": 6.917490035762255e-05, "loss": 0.0105, "step": 24360 }, { "epoch": 21.915467625899282, "grad_norm": 0.17419841885566711, "learning_rate": 6.914944664095573e-05, "loss": 0.012, "step": 24370 }, { "epoch": 21.924460431654676, "grad_norm": 0.22776322066783905, "learning_rate": 6.912398710719264e-05, "loss": 0.0069, "step": 24380 }, { "epoch": 21.93345323741007, "grad_norm": 0.18141162395477295, "learning_rate": 6.90985217640672e-05, "loss": 0.0051, "step": 24390 }, { "epoch": 21.942446043165468, "grad_norm": 0.12333383411169052, "learning_rate": 6.90730506193151e-05, "loss": 0.0066, "step": 24400 }, { "epoch": 21.951438848920862, "grad_norm": 0.15897537767887115, "learning_rate": 6.904757368067384e-05, "loss": 0.0087, "step": 24410 }, { "epoch": 21.96043165467626, "grad_norm": 0.2583451569080353, "learning_rate": 6.90220909558826e-05, "loss": 0.0098, "step": 24420 }, { "epoch": 21.969424460431654, "grad_norm": 0.1928388476371765, "learning_rate": 6.899660245268237e-05, "loss": 0.0075, "step": 24430 }, { "epoch": 21.97841726618705, "grad_norm": 0.20998996496200562, "learning_rate": 6.897110817881592e-05, "loss": 0.009, "step": 24440 }, { "epoch": 21.987410071942445, "grad_norm": 0.2144559770822525, "learning_rate": 6.894560814202769e-05, "loss": 0.0086, "step": 24450 }, { "epoch": 21.996402877697843, "grad_norm": 0.18991434574127197, "learning_rate": 6.892010235006394e-05, "loss": 0.0114, "step": 24460 }, { "epoch": 22.005395683453237, "grad_norm": 0.2051633596420288, "learning_rate": 6.889459081067264e-05, "loss": 0.0063, "step": 24470 }, { "epoch": 22.014388489208635, "grad_norm": 0.1881057620048523, "learning_rate": 6.886907353160356e-05, "loss": 0.0092, "step": 24480 }, { "epoch": 22.02338129496403, "grad_norm": 0.15087875723838806, "learning_rate": 6.884355052060814e-05, "loss": 0.0073, "step": 24490 }, { "epoch": 22.032374100719423, "grad_norm": 0.2097758799791336, "learning_rate": 6.88180217854396e-05, "loss": 0.0081, "step": 24500 }, { "epoch": 22.04136690647482, "grad_norm": 0.16699303686618805, "learning_rate": 6.87924873338529e-05, "loss": 0.0091, "step": 24510 }, { "epoch": 22.050359712230215, "grad_norm": 0.21670804917812347, "learning_rate": 6.876694717360475e-05, "loss": 0.0076, "step": 24520 }, { "epoch": 22.059352517985612, "grad_norm": 0.12896722555160522, "learning_rate": 6.874140131245355e-05, "loss": 0.0073, "step": 24530 }, { "epoch": 22.068345323741006, "grad_norm": 0.3003644049167633, "learning_rate": 6.871584975815948e-05, "loss": 0.0089, "step": 24540 }, { "epoch": 22.077338129496404, "grad_norm": 0.11861032247543335, "learning_rate": 6.86902925184844e-05, "loss": 0.0069, "step": 24550 }, { "epoch": 22.086330935251798, "grad_norm": 0.26723814010620117, "learning_rate": 6.866472960119195e-05, "loss": 0.008, "step": 24560 }, { "epoch": 22.095323741007196, "grad_norm": 0.21820564568042755, "learning_rate": 6.863916101404748e-05, "loss": 0.0073, "step": 24570 }, { "epoch": 22.10431654676259, "grad_norm": 0.22580917179584503, "learning_rate": 6.8613586764818e-05, "loss": 0.0064, "step": 24580 }, { "epoch": 22.113309352517987, "grad_norm": 0.17664559185504913, "learning_rate": 6.858800686127233e-05, "loss": 0.0057, "step": 24590 }, { "epoch": 22.12230215827338, "grad_norm": 0.15224596858024597, "learning_rate": 6.856242131118097e-05, "loss": 0.0063, "step": 24600 }, { "epoch": 22.131294964028775, "grad_norm": 0.2847544848918915, "learning_rate": 6.853683012231614e-05, "loss": 0.0069, "step": 24610 }, { "epoch": 22.140287769784173, "grad_norm": 0.2761646807193756, "learning_rate": 6.851123330245173e-05, "loss": 0.0067, "step": 24620 }, { "epoch": 22.149280575539567, "grad_norm": 0.1593773066997528, "learning_rate": 6.848563085936343e-05, "loss": 0.0071, "step": 24630 }, { "epoch": 22.158273381294965, "grad_norm": 0.1723693311214447, "learning_rate": 6.846002280082853e-05, "loss": 0.0059, "step": 24640 }, { "epoch": 22.16726618705036, "grad_norm": 0.13132010400295258, "learning_rate": 6.843440913462614e-05, "loss": 0.0054, "step": 24650 }, { "epoch": 22.176258992805757, "grad_norm": 0.20701155066490173, "learning_rate": 6.840878986853698e-05, "loss": 0.0067, "step": 24660 }, { "epoch": 22.18525179856115, "grad_norm": 0.19843943417072296, "learning_rate": 6.838316501034352e-05, "loss": 0.0073, "step": 24670 }, { "epoch": 22.194244604316548, "grad_norm": 0.10480709373950958, "learning_rate": 6.83575345678299e-05, "loss": 0.0056, "step": 24680 }, { "epoch": 22.203237410071942, "grad_norm": 0.2195524275302887, "learning_rate": 6.833189854878196e-05, "loss": 0.0075, "step": 24690 }, { "epoch": 22.21223021582734, "grad_norm": 0.19363448023796082, "learning_rate": 6.83062569609873e-05, "loss": 0.0069, "step": 24700 }, { "epoch": 22.221223021582734, "grad_norm": 0.15127499401569366, "learning_rate": 6.828060981223512e-05, "loss": 0.0053, "step": 24710 }, { "epoch": 22.230215827338128, "grad_norm": 0.14906257390975952, "learning_rate": 6.825495711031634e-05, "loss": 0.0062, "step": 24720 }, { "epoch": 22.239208633093526, "grad_norm": 0.24017536640167236, "learning_rate": 6.822929886302359e-05, "loss": 0.006, "step": 24730 }, { "epoch": 22.24820143884892, "grad_norm": 0.1355249136686325, "learning_rate": 6.820363507815116e-05, "loss": 0.0048, "step": 24740 }, { "epoch": 22.257194244604317, "grad_norm": 0.24263711273670197, "learning_rate": 6.817796576349501e-05, "loss": 0.0089, "step": 24750 }, { "epoch": 22.26618705035971, "grad_norm": 0.17202961444854736, "learning_rate": 6.815229092685285e-05, "loss": 0.0097, "step": 24760 }, { "epoch": 22.27517985611511, "grad_norm": 0.1491989940404892, "learning_rate": 6.812661057602399e-05, "loss": 0.0058, "step": 24770 }, { "epoch": 22.284172661870503, "grad_norm": 0.12727579474449158, "learning_rate": 6.810092471880943e-05, "loss": 0.0077, "step": 24780 }, { "epoch": 22.2931654676259, "grad_norm": 0.3208616375923157, "learning_rate": 6.807523336301187e-05, "loss": 0.0085, "step": 24790 }, { "epoch": 22.302158273381295, "grad_norm": 0.20244742929935455, "learning_rate": 6.804953651643566e-05, "loss": 0.0087, "step": 24800 }, { "epoch": 22.31115107913669, "grad_norm": 0.15663734078407288, "learning_rate": 6.802383418688685e-05, "loss": 0.009, "step": 24810 }, { "epoch": 22.320143884892087, "grad_norm": 0.2429014891386032, "learning_rate": 6.799812638217309e-05, "loss": 0.0094, "step": 24820 }, { "epoch": 22.32913669064748, "grad_norm": 0.2896154820919037, "learning_rate": 6.797241311010373e-05, "loss": 0.0085, "step": 24830 }, { "epoch": 22.33812949640288, "grad_norm": 0.13062892854213715, "learning_rate": 6.794669437848982e-05, "loss": 0.009, "step": 24840 }, { "epoch": 22.347122302158272, "grad_norm": 0.18188107013702393, "learning_rate": 6.792097019514402e-05, "loss": 0.008, "step": 24850 }, { "epoch": 22.35611510791367, "grad_norm": 0.2798314094543457, "learning_rate": 6.789524056788064e-05, "loss": 0.0071, "step": 24860 }, { "epoch": 22.365107913669064, "grad_norm": 0.1573629528284073, "learning_rate": 6.786950550451567e-05, "loss": 0.006, "step": 24870 }, { "epoch": 22.37410071942446, "grad_norm": 0.1362411379814148, "learning_rate": 6.784376501286676e-05, "loss": 0.0069, "step": 24880 }, { "epoch": 22.383093525179856, "grad_norm": 0.11757874488830566, "learning_rate": 6.781801910075316e-05, "loss": 0.0049, "step": 24890 }, { "epoch": 22.392086330935253, "grad_norm": 0.12416736781597137, "learning_rate": 6.779226777599581e-05, "loss": 0.0059, "step": 24900 }, { "epoch": 22.401079136690647, "grad_norm": 0.134080708026886, "learning_rate": 6.776651104641729e-05, "loss": 0.0078, "step": 24910 }, { "epoch": 22.41007194244604, "grad_norm": 0.25557905435562134, "learning_rate": 6.774074891984183e-05, "loss": 0.0076, "step": 24920 }, { "epoch": 22.41906474820144, "grad_norm": 0.1789587140083313, "learning_rate": 6.771498140409526e-05, "loss": 0.0072, "step": 24930 }, { "epoch": 22.428057553956833, "grad_norm": 0.16841350495815277, "learning_rate": 6.768920850700506e-05, "loss": 0.0051, "step": 24940 }, { "epoch": 22.43705035971223, "grad_norm": 0.18310226500034332, "learning_rate": 6.766343023640039e-05, "loss": 0.0102, "step": 24950 }, { "epoch": 22.446043165467625, "grad_norm": 0.216702401638031, "learning_rate": 6.763764660011198e-05, "loss": 0.0076, "step": 24960 }, { "epoch": 22.455035971223023, "grad_norm": 0.2696366608142853, "learning_rate": 6.761185760597223e-05, "loss": 0.0062, "step": 24970 }, { "epoch": 22.464028776978417, "grad_norm": 0.20537343621253967, "learning_rate": 6.758606326181515e-05, "loss": 0.0079, "step": 24980 }, { "epoch": 22.473021582733814, "grad_norm": 0.2021787017583847, "learning_rate": 6.75602635754764e-05, "loss": 0.0052, "step": 24990 }, { "epoch": 22.48201438848921, "grad_norm": 0.32928431034088135, "learning_rate": 6.75344585547932e-05, "loss": 0.0091, "step": 25000 }, { "epoch": 22.491007194244606, "grad_norm": 0.3478153944015503, "learning_rate": 6.750864820760449e-05, "loss": 0.0061, "step": 25010 }, { "epoch": 22.5, "grad_norm": 0.2561070919036865, "learning_rate": 6.748283254175072e-05, "loss": 0.0071, "step": 25020 }, { "epoch": 22.508992805755394, "grad_norm": 0.18090561032295227, "learning_rate": 6.745701156507404e-05, "loss": 0.0077, "step": 25030 }, { "epoch": 22.51798561151079, "grad_norm": 0.14265228807926178, "learning_rate": 6.743118528541818e-05, "loss": 0.0062, "step": 25040 }, { "epoch": 22.526978417266186, "grad_norm": 0.19372382760047913, "learning_rate": 6.740535371062846e-05, "loss": 0.0073, "step": 25050 }, { "epoch": 22.535971223021583, "grad_norm": 0.10559871047735214, "learning_rate": 6.737951684855185e-05, "loss": 0.0053, "step": 25060 }, { "epoch": 22.544964028776977, "grad_norm": 0.21911992132663727, "learning_rate": 6.735367470703691e-05, "loss": 0.0068, "step": 25070 }, { "epoch": 22.553956834532375, "grad_norm": 0.2503381669521332, "learning_rate": 6.732782729393379e-05, "loss": 0.0067, "step": 25080 }, { "epoch": 22.56294964028777, "grad_norm": 0.13851159811019897, "learning_rate": 6.730197461709425e-05, "loss": 0.0057, "step": 25090 }, { "epoch": 22.571942446043167, "grad_norm": 0.1367097645998001, "learning_rate": 6.727611668437164e-05, "loss": 0.0109, "step": 25100 }, { "epoch": 22.58093525179856, "grad_norm": 0.1331806480884552, "learning_rate": 6.725025350362094e-05, "loss": 0.006, "step": 25110 }, { "epoch": 22.58992805755396, "grad_norm": 0.15210901200771332, "learning_rate": 6.72243850826987e-05, "loss": 0.0055, "step": 25120 }, { "epoch": 22.598920863309353, "grad_norm": 0.20189796388149261, "learning_rate": 6.719851142946305e-05, "loss": 0.0058, "step": 25130 }, { "epoch": 22.607913669064747, "grad_norm": 0.38144007325172424, "learning_rate": 6.717263255177372e-05, "loss": 0.0105, "step": 25140 }, { "epoch": 22.616906474820144, "grad_norm": 0.18104280531406403, "learning_rate": 6.714674845749205e-05, "loss": 0.0062, "step": 25150 }, { "epoch": 22.62589928057554, "grad_norm": 0.1649109423160553, "learning_rate": 6.712085915448092e-05, "loss": 0.0076, "step": 25160 }, { "epoch": 22.634892086330936, "grad_norm": 0.36217039823532104, "learning_rate": 6.709496465060486e-05, "loss": 0.0076, "step": 25170 }, { "epoch": 22.64388489208633, "grad_norm": 0.1507437527179718, "learning_rate": 6.706906495372987e-05, "loss": 0.0055, "step": 25180 }, { "epoch": 22.652877697841728, "grad_norm": 0.22313013672828674, "learning_rate": 6.704316007172365e-05, "loss": 0.0063, "step": 25190 }, { "epoch": 22.66187050359712, "grad_norm": 0.17551612854003906, "learning_rate": 6.701725001245539e-05, "loss": 0.0087, "step": 25200 }, { "epoch": 22.67086330935252, "grad_norm": 0.1374734491109848, "learning_rate": 6.699133478379588e-05, "loss": 0.006, "step": 25210 }, { "epoch": 22.679856115107913, "grad_norm": 0.13685105741024017, "learning_rate": 6.69654143936175e-05, "loss": 0.0061, "step": 25220 }, { "epoch": 22.68884892086331, "grad_norm": 0.1384391039609909, "learning_rate": 6.693948884979419e-05, "loss": 0.0081, "step": 25230 }, { "epoch": 22.697841726618705, "grad_norm": 0.1426679641008377, "learning_rate": 6.691355816020142e-05, "loss": 0.0049, "step": 25240 }, { "epoch": 22.7068345323741, "grad_norm": 0.1111745685338974, "learning_rate": 6.688762233271624e-05, "loss": 0.0063, "step": 25250 }, { "epoch": 22.715827338129497, "grad_norm": 0.17632897198200226, "learning_rate": 6.68616813752173e-05, "loss": 0.009, "step": 25260 }, { "epoch": 22.72482014388489, "grad_norm": 0.15665659308433533, "learning_rate": 6.683573529558477e-05, "loss": 0.0058, "step": 25270 }, { "epoch": 22.73381294964029, "grad_norm": 0.14568965137004852, "learning_rate": 6.680978410170037e-05, "loss": 0.0057, "step": 25280 }, { "epoch": 22.742805755395683, "grad_norm": 0.12054117023944855, "learning_rate": 6.678382780144741e-05, "loss": 0.006, "step": 25290 }, { "epoch": 22.75179856115108, "grad_norm": 0.15646293759346008, "learning_rate": 6.675786640271071e-05, "loss": 0.006, "step": 25300 }, { "epoch": 22.760791366906474, "grad_norm": 0.1629125326871872, "learning_rate": 6.673189991337665e-05, "loss": 0.0062, "step": 25310 }, { "epoch": 22.769784172661872, "grad_norm": 0.17799417674541473, "learning_rate": 6.670592834133317e-05, "loss": 0.0094, "step": 25320 }, { "epoch": 22.778776978417266, "grad_norm": 0.18210183084011078, "learning_rate": 6.667995169446979e-05, "loss": 0.0057, "step": 25330 }, { "epoch": 22.78776978417266, "grad_norm": 0.1375424712896347, "learning_rate": 6.665396998067747e-05, "loss": 0.011, "step": 25340 }, { "epoch": 22.796762589928058, "grad_norm": 0.1770014464855194, "learning_rate": 6.66279832078488e-05, "loss": 0.0055, "step": 25350 }, { "epoch": 22.805755395683452, "grad_norm": 0.15527352690696716, "learning_rate": 6.660199138387786e-05, "loss": 0.0051, "step": 25360 }, { "epoch": 22.81474820143885, "grad_norm": 0.2765344977378845, "learning_rate": 6.65759945166603e-05, "loss": 0.0101, "step": 25370 }, { "epoch": 22.823741007194243, "grad_norm": 0.20583675801753998, "learning_rate": 6.654999261409326e-05, "loss": 0.0103, "step": 25380 }, { "epoch": 22.83273381294964, "grad_norm": 0.33298856019973755, "learning_rate": 6.652398568407544e-05, "loss": 0.0089, "step": 25390 }, { "epoch": 22.841726618705035, "grad_norm": 0.2756043076515198, "learning_rate": 6.649797373450707e-05, "loss": 0.01, "step": 25400 }, { "epoch": 22.850719424460433, "grad_norm": 0.16589416563510895, "learning_rate": 6.647195677328988e-05, "loss": 0.007, "step": 25410 }, { "epoch": 22.859712230215827, "grad_norm": 0.19862031936645508, "learning_rate": 6.644593480832712e-05, "loss": 0.0064, "step": 25420 }, { "epoch": 22.868705035971225, "grad_norm": 0.17948709428310394, "learning_rate": 6.641990784752363e-05, "loss": 0.0086, "step": 25430 }, { "epoch": 22.87769784172662, "grad_norm": 0.2757781147956848, "learning_rate": 6.639387589878566e-05, "loss": 0.008, "step": 25440 }, { "epoch": 22.886690647482013, "grad_norm": 0.18118411302566528, "learning_rate": 6.636783897002103e-05, "loss": 0.0067, "step": 25450 }, { "epoch": 22.89568345323741, "grad_norm": 0.18148255348205566, "learning_rate": 6.63417970691391e-05, "loss": 0.0076, "step": 25460 }, { "epoch": 22.904676258992804, "grad_norm": 0.18144090473651886, "learning_rate": 6.63157502040507e-05, "loss": 0.0058, "step": 25470 }, { "epoch": 22.913669064748202, "grad_norm": 0.1513368934392929, "learning_rate": 6.628969838266819e-05, "loss": 0.0087, "step": 25480 }, { "epoch": 22.922661870503596, "grad_norm": 0.11800356209278107, "learning_rate": 6.626364161290541e-05, "loss": 0.0073, "step": 25490 }, { "epoch": 22.931654676258994, "grad_norm": 0.14338426291942596, "learning_rate": 6.623757990267774e-05, "loss": 0.0066, "step": 25500 }, { "epoch": 22.940647482014388, "grad_norm": 0.23645760118961334, "learning_rate": 6.621151325990201e-05, "loss": 0.0058, "step": 25510 }, { "epoch": 22.949640287769785, "grad_norm": 0.1861431747674942, "learning_rate": 6.618544169249657e-05, "loss": 0.0078, "step": 25520 }, { "epoch": 22.95863309352518, "grad_norm": 0.1816338747739792, "learning_rate": 6.615936520838133e-05, "loss": 0.0074, "step": 25530 }, { "epoch": 22.967625899280577, "grad_norm": 0.15043193101882935, "learning_rate": 6.613328381547759e-05, "loss": 0.0045, "step": 25540 }, { "epoch": 22.97661870503597, "grad_norm": 0.27944204211235046, "learning_rate": 6.610719752170821e-05, "loss": 0.0149, "step": 25550 }, { "epoch": 22.985611510791365, "grad_norm": 0.1848621368408203, "learning_rate": 6.60811063349975e-05, "loss": 0.0064, "step": 25560 }, { "epoch": 22.994604316546763, "grad_norm": 0.3161589503288269, "learning_rate": 6.605501026327127e-05, "loss": 0.0063, "step": 25570 }, { "epoch": 23.003597122302157, "grad_norm": 0.14073428511619568, "learning_rate": 6.602890931445685e-05, "loss": 0.0106, "step": 25580 }, { "epoch": 23.012589928057555, "grad_norm": 0.24671363830566406, "learning_rate": 6.6002803496483e-05, "loss": 0.0087, "step": 25590 }, { "epoch": 23.02158273381295, "grad_norm": 0.1906801462173462, "learning_rate": 6.597669281727997e-05, "loss": 0.0086, "step": 25600 }, { "epoch": 23.030575539568346, "grad_norm": 0.19133326411247253, "learning_rate": 6.595057728477949e-05, "loss": 0.0102, "step": 25610 }, { "epoch": 23.03956834532374, "grad_norm": 0.26323235034942627, "learning_rate": 6.59244569069148e-05, "loss": 0.0074, "step": 25620 }, { "epoch": 23.048561151079138, "grad_norm": 0.25128602981567383, "learning_rate": 6.589833169162054e-05, "loss": 0.0069, "step": 25630 }, { "epoch": 23.057553956834532, "grad_norm": 0.18908916413784027, "learning_rate": 6.587220164683291e-05, "loss": 0.0089, "step": 25640 }, { "epoch": 23.06654676258993, "grad_norm": 0.21027907729148865, "learning_rate": 6.58460667804895e-05, "loss": 0.0063, "step": 25650 }, { "epoch": 23.075539568345324, "grad_norm": 0.2236495018005371, "learning_rate": 6.581992710052938e-05, "loss": 0.0085, "step": 25660 }, { "epoch": 23.084532374100718, "grad_norm": 0.3790237605571747, "learning_rate": 6.579378261489311e-05, "loss": 0.0083, "step": 25670 }, { "epoch": 23.093525179856115, "grad_norm": 0.17857371270656586, "learning_rate": 6.576763333152268e-05, "loss": 0.0081, "step": 25680 }, { "epoch": 23.10251798561151, "grad_norm": 0.14880013465881348, "learning_rate": 6.574147925836159e-05, "loss": 0.0053, "step": 25690 }, { "epoch": 23.111510791366907, "grad_norm": 0.24416889250278473, "learning_rate": 6.571532040335472e-05, "loss": 0.007, "step": 25700 }, { "epoch": 23.1205035971223, "grad_norm": 0.1440621018409729, "learning_rate": 6.568915677444845e-05, "loss": 0.0062, "step": 25710 }, { "epoch": 23.1294964028777, "grad_norm": 0.2326250821352005, "learning_rate": 6.56629883795906e-05, "loss": 0.0075, "step": 25720 }, { "epoch": 23.138489208633093, "grad_norm": 0.14511360228061676, "learning_rate": 6.563681522673043e-05, "loss": 0.006, "step": 25730 }, { "epoch": 23.14748201438849, "grad_norm": 0.2504035532474518, "learning_rate": 6.561063732381867e-05, "loss": 0.0089, "step": 25740 }, { "epoch": 23.156474820143885, "grad_norm": 0.1863662302494049, "learning_rate": 6.558445467880745e-05, "loss": 0.0071, "step": 25750 }, { "epoch": 23.165467625899282, "grad_norm": 0.19774459302425385, "learning_rate": 6.55582672996504e-05, "loss": 0.0075, "step": 25760 }, { "epoch": 23.174460431654676, "grad_norm": 0.21148592233657837, "learning_rate": 6.553207519430253e-05, "loss": 0.0074, "step": 25770 }, { "epoch": 23.18345323741007, "grad_norm": 0.13089913129806519, "learning_rate": 6.550587837072032e-05, "loss": 0.0082, "step": 25780 }, { "epoch": 23.192446043165468, "grad_norm": 0.15332163870334625, "learning_rate": 6.547967683686166e-05, "loss": 0.0075, "step": 25790 }, { "epoch": 23.201438848920862, "grad_norm": 0.10031285881996155, "learning_rate": 6.545347060068591e-05, "loss": 0.0054, "step": 25800 }, { "epoch": 23.21043165467626, "grad_norm": 0.22961485385894775, "learning_rate": 6.542725967015382e-05, "loss": 0.0075, "step": 25810 }, { "epoch": 23.219424460431654, "grad_norm": 0.25289830565452576, "learning_rate": 6.540104405322757e-05, "loss": 0.0055, "step": 25820 }, { "epoch": 23.22841726618705, "grad_norm": 0.19237257540225983, "learning_rate": 6.537482375787077e-05, "loss": 0.0081, "step": 25830 }, { "epoch": 23.237410071942445, "grad_norm": 0.23317882418632507, "learning_rate": 6.534859879204845e-05, "loss": 0.0077, "step": 25840 }, { "epoch": 23.246402877697843, "grad_norm": 0.21947437524795532, "learning_rate": 6.532236916372709e-05, "loss": 0.0063, "step": 25850 }, { "epoch": 23.255395683453237, "grad_norm": 0.2092047482728958, "learning_rate": 6.529613488087454e-05, "loss": 0.0058, "step": 25860 }, { "epoch": 23.264388489208635, "grad_norm": 0.1510646790266037, "learning_rate": 6.526989595146009e-05, "loss": 0.0048, "step": 25870 }, { "epoch": 23.27338129496403, "grad_norm": 0.12800753116607666, "learning_rate": 6.524365238345441e-05, "loss": 0.0051, "step": 25880 }, { "epoch": 23.282374100719423, "grad_norm": 0.20205998420715332, "learning_rate": 6.521740418482964e-05, "loss": 0.0071, "step": 25890 }, { "epoch": 23.29136690647482, "grad_norm": 0.13706208765506744, "learning_rate": 6.519115136355925e-05, "loss": 0.0067, "step": 25900 }, { "epoch": 23.300359712230215, "grad_norm": 0.13595454394817352, "learning_rate": 6.51648939276182e-05, "loss": 0.0075, "step": 25910 }, { "epoch": 23.309352517985612, "grad_norm": 0.15856662392616272, "learning_rate": 6.513863188498277e-05, "loss": 0.0071, "step": 25920 }, { "epoch": 23.318345323741006, "grad_norm": 0.11881467700004578, "learning_rate": 6.511236524363068e-05, "loss": 0.0064, "step": 25930 }, { "epoch": 23.327338129496404, "grad_norm": 0.25111910700798035, "learning_rate": 6.508609401154104e-05, "loss": 0.0086, "step": 25940 }, { "epoch": 23.336330935251798, "grad_norm": 0.15315183997154236, "learning_rate": 6.505981819669439e-05, "loss": 0.009, "step": 25950 }, { "epoch": 23.345323741007196, "grad_norm": 0.159446582198143, "learning_rate": 6.503353780707258e-05, "loss": 0.0096, "step": 25960 }, { "epoch": 23.35431654676259, "grad_norm": 0.19314564764499664, "learning_rate": 6.500725285065895e-05, "loss": 0.008, "step": 25970 }, { "epoch": 23.363309352517987, "grad_norm": 0.4017908573150635, "learning_rate": 6.498096333543813e-05, "loss": 0.0089, "step": 25980 }, { "epoch": 23.37230215827338, "grad_norm": 0.17630931735038757, "learning_rate": 6.49546692693962e-05, "loss": 0.0052, "step": 25990 }, { "epoch": 23.381294964028775, "grad_norm": 0.2723309099674225, "learning_rate": 6.492837066052059e-05, "loss": 0.0105, "step": 26000 }, { "epoch": 23.390287769784173, "grad_norm": 0.14616867899894714, "learning_rate": 6.490206751680014e-05, "loss": 0.0096, "step": 26010 }, { "epoch": 23.399280575539567, "grad_norm": 0.20272952318191528, "learning_rate": 6.487575984622505e-05, "loss": 0.0099, "step": 26020 }, { "epoch": 23.408273381294965, "grad_norm": 0.10520059615373611, "learning_rate": 6.484944765678689e-05, "loss": 0.0067, "step": 26030 }, { "epoch": 23.41726618705036, "grad_norm": 0.48352065682411194, "learning_rate": 6.482313095647861e-05, "loss": 0.0071, "step": 26040 }, { "epoch": 23.426258992805757, "grad_norm": 0.3357718884944916, "learning_rate": 6.479680975329451e-05, "loss": 0.0096, "step": 26050 }, { "epoch": 23.43525179856115, "grad_norm": 0.2901657223701477, "learning_rate": 6.477048405523031e-05, "loss": 0.0067, "step": 26060 }, { "epoch": 23.444244604316548, "grad_norm": 0.18459759652614594, "learning_rate": 6.474415387028304e-05, "loss": 0.0079, "step": 26070 }, { "epoch": 23.453237410071942, "grad_norm": 0.2038591355085373, "learning_rate": 6.471781920645114e-05, "loss": 0.005, "step": 26080 }, { "epoch": 23.46223021582734, "grad_norm": 0.25626707077026367, "learning_rate": 6.469148007173434e-05, "loss": 0.0086, "step": 26090 }, { "epoch": 23.471223021582734, "grad_norm": 0.17704123258590698, "learning_rate": 6.466513647413381e-05, "loss": 0.0075, "step": 26100 }, { "epoch": 23.480215827338128, "grad_norm": 0.23391221463680267, "learning_rate": 6.463878842165203e-05, "loss": 0.0072, "step": 26110 }, { "epoch": 23.489208633093526, "grad_norm": 0.1458730399608612, "learning_rate": 6.461243592229286e-05, "loss": 0.0072, "step": 26120 }, { "epoch": 23.49820143884892, "grad_norm": 0.15742690861225128, "learning_rate": 6.458607898406146e-05, "loss": 0.0045, "step": 26130 }, { "epoch": 23.507194244604317, "grad_norm": 0.18708443641662598, "learning_rate": 6.455971761496439e-05, "loss": 0.0046, "step": 26140 }, { "epoch": 23.51618705035971, "grad_norm": 0.212814599275589, "learning_rate": 6.453335182300953e-05, "loss": 0.0052, "step": 26150 }, { "epoch": 23.52517985611511, "grad_norm": 0.2414463758468628, "learning_rate": 6.450698161620612e-05, "loss": 0.0065, "step": 26160 }, { "epoch": 23.534172661870503, "grad_norm": 0.2749905288219452, "learning_rate": 6.448060700256473e-05, "loss": 0.0121, "step": 26170 }, { "epoch": 23.5431654676259, "grad_norm": 0.17520064115524292, "learning_rate": 6.445422799009726e-05, "loss": 0.0057, "step": 26180 }, { "epoch": 23.552158273381295, "grad_norm": 0.3478931784629822, "learning_rate": 6.442784458681699e-05, "loss": 0.0084, "step": 26190 }, { "epoch": 23.56115107913669, "grad_norm": 0.26101386547088623, "learning_rate": 6.440145680073847e-05, "loss": 0.0069, "step": 26200 }, { "epoch": 23.570143884892087, "grad_norm": 0.13973654806613922, "learning_rate": 6.437506463987762e-05, "loss": 0.0087, "step": 26210 }, { "epoch": 23.57913669064748, "grad_norm": 0.11446481943130493, "learning_rate": 6.434866811225168e-05, "loss": 0.005, "step": 26220 }, { "epoch": 23.58812949640288, "grad_norm": 0.14279060065746307, "learning_rate": 6.432226722587923e-05, "loss": 0.009, "step": 26230 }, { "epoch": 23.597122302158272, "grad_norm": 0.2088620811700821, "learning_rate": 6.429586198878015e-05, "loss": 0.007, "step": 26240 }, { "epoch": 23.60611510791367, "grad_norm": 0.27354928851127625, "learning_rate": 6.426945240897566e-05, "loss": 0.0135, "step": 26250 }, { "epoch": 23.615107913669064, "grad_norm": 0.17760923504829407, "learning_rate": 6.424303849448829e-05, "loss": 0.0065, "step": 26260 }, { "epoch": 23.62410071942446, "grad_norm": 0.2706226110458374, "learning_rate": 6.42166202533419e-05, "loss": 0.0071, "step": 26270 }, { "epoch": 23.633093525179856, "grad_norm": 0.16586323082447052, "learning_rate": 6.419019769356164e-05, "loss": 0.0063, "step": 26280 }, { "epoch": 23.642086330935253, "grad_norm": 0.22564707696437836, "learning_rate": 6.416377082317398e-05, "loss": 0.0069, "step": 26290 }, { "epoch": 23.651079136690647, "grad_norm": 0.19878174364566803, "learning_rate": 6.413733965020674e-05, "loss": 0.0063, "step": 26300 }, { "epoch": 23.66007194244604, "grad_norm": 0.17607808113098145, "learning_rate": 6.411090418268896e-05, "loss": 0.0052, "step": 26310 }, { "epoch": 23.66906474820144, "grad_norm": 0.14652352035045624, "learning_rate": 6.408446442865109e-05, "loss": 0.0053, "step": 26320 }, { "epoch": 23.678057553956833, "grad_norm": 0.17579865455627441, "learning_rate": 6.405802039612479e-05, "loss": 0.0075, "step": 26330 }, { "epoch": 23.68705035971223, "grad_norm": 0.22758135199546814, "learning_rate": 6.403157209314308e-05, "loss": 0.0078, "step": 26340 }, { "epoch": 23.696043165467625, "grad_norm": 0.20382003486156464, "learning_rate": 6.400511952774024e-05, "loss": 0.0073, "step": 26350 }, { "epoch": 23.705035971223023, "grad_norm": 0.15831315517425537, "learning_rate": 6.397866270795187e-05, "loss": 0.0068, "step": 26360 }, { "epoch": 23.714028776978417, "grad_norm": 0.21666809916496277, "learning_rate": 6.395220164181489e-05, "loss": 0.005, "step": 26370 }, { "epoch": 23.723021582733814, "grad_norm": 0.19216574728488922, "learning_rate": 6.39257363373674e-05, "loss": 0.0051, "step": 26380 }, { "epoch": 23.73201438848921, "grad_norm": 0.15805286169052124, "learning_rate": 6.389926680264892e-05, "loss": 0.0091, "step": 26390 }, { "epoch": 23.741007194244606, "grad_norm": 0.2101830691099167, "learning_rate": 6.387279304570017e-05, "loss": 0.0117, "step": 26400 }, { "epoch": 23.75, "grad_norm": 0.1660936325788498, "learning_rate": 6.384631507456319e-05, "loss": 0.0074, "step": 26410 }, { "epoch": 23.758992805755394, "grad_norm": 0.21272364258766174, "learning_rate": 6.381983289728126e-05, "loss": 0.0068, "step": 26420 }, { "epoch": 23.76798561151079, "grad_norm": 0.1385766863822937, "learning_rate": 6.3793346521899e-05, "loss": 0.0068, "step": 26430 }, { "epoch": 23.776978417266186, "grad_norm": 0.15894818305969238, "learning_rate": 6.376685595646226e-05, "loss": 0.0069, "step": 26440 }, { "epoch": 23.785971223021583, "grad_norm": 0.2907315194606781, "learning_rate": 6.374036120901816e-05, "loss": 0.009, "step": 26450 }, { "epoch": 23.794964028776977, "grad_norm": 0.22830186784267426, "learning_rate": 6.371386228761514e-05, "loss": 0.0068, "step": 26460 }, { "epoch": 23.803956834532375, "grad_norm": 0.26390495896339417, "learning_rate": 6.368735920030283e-05, "loss": 0.0106, "step": 26470 }, { "epoch": 23.81294964028777, "grad_norm": 0.09685473889112473, "learning_rate": 6.366085195513218e-05, "loss": 0.0092, "step": 26480 }, { "epoch": 23.821942446043167, "grad_norm": 0.1776161789894104, "learning_rate": 6.363434056015543e-05, "loss": 0.0129, "step": 26490 }, { "epoch": 23.83093525179856, "grad_norm": 0.12572427093982697, "learning_rate": 6.360782502342599e-05, "loss": 0.0057, "step": 26500 }, { "epoch": 23.83992805755396, "grad_norm": 0.1843932420015335, "learning_rate": 6.358130535299862e-05, "loss": 0.0087, "step": 26510 }, { "epoch": 23.848920863309353, "grad_norm": 0.26497378945350647, "learning_rate": 6.355478155692926e-05, "loss": 0.0091, "step": 26520 }, { "epoch": 23.857913669064747, "grad_norm": 0.21331404149532318, "learning_rate": 6.352825364327517e-05, "loss": 0.0062, "step": 26530 }, { "epoch": 23.866906474820144, "grad_norm": 0.2162286341190338, "learning_rate": 6.350172162009482e-05, "loss": 0.0119, "step": 26540 }, { "epoch": 23.87589928057554, "grad_norm": 0.28049737215042114, "learning_rate": 6.347518549544793e-05, "loss": 0.0087, "step": 26550 }, { "epoch": 23.884892086330936, "grad_norm": 0.2332017719745636, "learning_rate": 6.344864527739547e-05, "loss": 0.006, "step": 26560 }, { "epoch": 23.89388489208633, "grad_norm": 0.18533667922019958, "learning_rate": 6.342210097399966e-05, "loss": 0.0073, "step": 26570 }, { "epoch": 23.902877697841728, "grad_norm": 0.20297561585903168, "learning_rate": 6.339555259332398e-05, "loss": 0.0137, "step": 26580 }, { "epoch": 23.91187050359712, "grad_norm": 0.3228927254676819, "learning_rate": 6.33690001434331e-05, "loss": 0.0124, "step": 26590 }, { "epoch": 23.92086330935252, "grad_norm": 0.21087822318077087, "learning_rate": 6.334244363239296e-05, "loss": 0.0079, "step": 26600 }, { "epoch": 23.929856115107913, "grad_norm": 0.1486770361661911, "learning_rate": 6.331588306827073e-05, "loss": 0.007, "step": 26610 }, { "epoch": 23.93884892086331, "grad_norm": 0.14848268032073975, "learning_rate": 6.328931845913483e-05, "loss": 0.0058, "step": 26620 }, { "epoch": 23.947841726618705, "grad_norm": 0.20527763664722443, "learning_rate": 6.326274981305484e-05, "loss": 0.0057, "step": 26630 }, { "epoch": 23.9568345323741, "grad_norm": 0.26840731501579285, "learning_rate": 6.323617713810166e-05, "loss": 0.0083, "step": 26640 }, { "epoch": 23.965827338129497, "grad_norm": 0.15019084513187408, "learning_rate": 6.320960044234734e-05, "loss": 0.0069, "step": 26650 }, { "epoch": 23.97482014388489, "grad_norm": 0.26800814270973206, "learning_rate": 6.318301973386518e-05, "loss": 0.0113, "step": 26660 }, { "epoch": 23.98381294964029, "grad_norm": 0.22283291816711426, "learning_rate": 6.315643502072971e-05, "loss": 0.0081, "step": 26670 }, { "epoch": 23.992805755395683, "grad_norm": 0.15310679376125336, "learning_rate": 6.312984631101667e-05, "loss": 0.0054, "step": 26680 }, { "epoch": 24.00179856115108, "grad_norm": 0.16804300248622894, "learning_rate": 6.310325361280297e-05, "loss": 0.0053, "step": 26690 }, { "epoch": 24.010791366906474, "grad_norm": 0.12831439077854156, "learning_rate": 6.30766569341668e-05, "loss": 0.0051, "step": 26700 }, { "epoch": 24.019784172661872, "grad_norm": 0.1418512761592865, "learning_rate": 6.305005628318753e-05, "loss": 0.0074, "step": 26710 }, { "epoch": 24.028776978417266, "grad_norm": 0.10307572782039642, "learning_rate": 6.302345166794572e-05, "loss": 0.0054, "step": 26720 }, { "epoch": 24.037769784172664, "grad_norm": 0.14776986837387085, "learning_rate": 6.299684309652316e-05, "loss": 0.0065, "step": 26730 }, { "epoch": 24.046762589928058, "grad_norm": 0.15830323100090027, "learning_rate": 6.297023057700283e-05, "loss": 0.0052, "step": 26740 }, { "epoch": 24.055755395683452, "grad_norm": 0.16873276233673096, "learning_rate": 6.294361411746891e-05, "loss": 0.0067, "step": 26750 }, { "epoch": 24.06474820143885, "grad_norm": 0.1512424647808075, "learning_rate": 6.291699372600677e-05, "loss": 0.0068, "step": 26760 }, { "epoch": 24.073741007194243, "grad_norm": 0.2288951575756073, "learning_rate": 6.2890369410703e-05, "loss": 0.0081, "step": 26770 }, { "epoch": 24.08273381294964, "grad_norm": 0.1558353751897812, "learning_rate": 6.286374117964534e-05, "loss": 0.0067, "step": 26780 }, { "epoch": 24.091726618705035, "grad_norm": 0.09837435185909271, "learning_rate": 6.283710904092277e-05, "loss": 0.0053, "step": 26790 }, { "epoch": 24.100719424460433, "grad_norm": 0.28206029534339905, "learning_rate": 6.281047300262542e-05, "loss": 0.0061, "step": 26800 }, { "epoch": 24.109712230215827, "grad_norm": 0.28128477931022644, "learning_rate": 6.278383307284461e-05, "loss": 0.0067, "step": 26810 }, { "epoch": 24.118705035971225, "grad_norm": 0.15941990911960602, "learning_rate": 6.275718925967284e-05, "loss": 0.0064, "step": 26820 }, { "epoch": 24.12769784172662, "grad_norm": 0.1567220389842987, "learning_rate": 6.273054157120382e-05, "loss": 0.0063, "step": 26830 }, { "epoch": 24.136690647482013, "grad_norm": 0.23184865713119507, "learning_rate": 6.270389001553238e-05, "loss": 0.0074, "step": 26840 }, { "epoch": 24.14568345323741, "grad_norm": 0.20232561230659485, "learning_rate": 6.26772346007546e-05, "loss": 0.009, "step": 26850 }, { "epoch": 24.154676258992804, "grad_norm": 0.19401253759860992, "learning_rate": 6.265057533496767e-05, "loss": 0.0074, "step": 26860 }, { "epoch": 24.163669064748202, "grad_norm": 0.24394579231739044, "learning_rate": 6.262391222626997e-05, "loss": 0.0089, "step": 26870 }, { "epoch": 24.172661870503596, "grad_norm": 0.17188675701618195, "learning_rate": 6.259724528276106e-05, "loss": 0.0125, "step": 26880 }, { "epoch": 24.181654676258994, "grad_norm": 0.26484525203704834, "learning_rate": 6.257057451254162e-05, "loss": 0.0071, "step": 26890 }, { "epoch": 24.190647482014388, "grad_norm": 0.08891928941011429, "learning_rate": 6.254389992371357e-05, "loss": 0.0072, "step": 26900 }, { "epoch": 24.199640287769785, "grad_norm": 0.11444924771785736, "learning_rate": 6.25172215243799e-05, "loss": 0.0074, "step": 26910 }, { "epoch": 24.20863309352518, "grad_norm": 0.14699991047382355, "learning_rate": 6.249053932264486e-05, "loss": 0.009, "step": 26920 }, { "epoch": 24.217625899280577, "grad_norm": 0.1503024697303772, "learning_rate": 6.246385332661376e-05, "loss": 0.0074, "step": 26930 }, { "epoch": 24.22661870503597, "grad_norm": 0.1536182165145874, "learning_rate": 6.24371635443931e-05, "loss": 0.0086, "step": 26940 }, { "epoch": 24.235611510791365, "grad_norm": 0.15683214366436005, "learning_rate": 6.241046998409054e-05, "loss": 0.0037, "step": 26950 }, { "epoch": 24.244604316546763, "grad_norm": 0.12906165421009064, "learning_rate": 6.238377265381489e-05, "loss": 0.0063, "step": 26960 }, { "epoch": 24.253597122302157, "grad_norm": 0.23295213282108307, "learning_rate": 6.235707156167607e-05, "loss": 0.0119, "step": 26970 }, { "epoch": 24.262589928057555, "grad_norm": 0.18819467723369598, "learning_rate": 6.233036671578519e-05, "loss": 0.0098, "step": 26980 }, { "epoch": 24.27158273381295, "grad_norm": 0.12769514322280884, "learning_rate": 6.230365812425445e-05, "loss": 0.008, "step": 26990 }, { "epoch": 24.280575539568346, "grad_norm": 0.19931042194366455, "learning_rate": 6.227694579519724e-05, "loss": 0.0089, "step": 27000 }, { "epoch": 24.28956834532374, "grad_norm": 0.1390257328748703, "learning_rate": 6.225022973672805e-05, "loss": 0.0061, "step": 27010 }, { "epoch": 24.298561151079138, "grad_norm": 0.15013477206230164, "learning_rate": 6.222350995696253e-05, "loss": 0.0103, "step": 27020 }, { "epoch": 24.307553956834532, "grad_norm": 0.21422505378723145, "learning_rate": 6.21967864640174e-05, "loss": 0.0056, "step": 27030 }, { "epoch": 24.31654676258993, "grad_norm": 0.1568661630153656, "learning_rate": 6.217005926601059e-05, "loss": 0.0046, "step": 27040 }, { "epoch": 24.325539568345324, "grad_norm": 0.18199457228183746, "learning_rate": 6.214332837106111e-05, "loss": 0.0054, "step": 27050 }, { "epoch": 24.334532374100718, "grad_norm": 0.14438752830028534, "learning_rate": 6.21165937872891e-05, "loss": 0.0058, "step": 27060 }, { "epoch": 24.343525179856115, "grad_norm": 0.20538084208965302, "learning_rate": 6.208985552281582e-05, "loss": 0.0077, "step": 27070 }, { "epoch": 24.35251798561151, "grad_norm": 0.3290236294269562, "learning_rate": 6.206311358576364e-05, "loss": 0.0123, "step": 27080 }, { "epoch": 24.361510791366907, "grad_norm": 0.18382839858531952, "learning_rate": 6.203636798425608e-05, "loss": 0.006, "step": 27090 }, { "epoch": 24.3705035971223, "grad_norm": 0.25300490856170654, "learning_rate": 6.20096187264177e-05, "loss": 0.0093, "step": 27100 }, { "epoch": 24.3794964028777, "grad_norm": 0.18936243653297424, "learning_rate": 6.198286582037425e-05, "loss": 0.0075, "step": 27110 }, { "epoch": 24.388489208633093, "grad_norm": 0.16165630519390106, "learning_rate": 6.195610927425256e-05, "loss": 0.0097, "step": 27120 }, { "epoch": 24.39748201438849, "grad_norm": 0.17116044461727142, "learning_rate": 6.192934909618056e-05, "loss": 0.004, "step": 27130 }, { "epoch": 24.406474820143885, "grad_norm": 0.15629717707633972, "learning_rate": 6.190258529428728e-05, "loss": 0.0061, "step": 27140 }, { "epoch": 24.415467625899282, "grad_norm": 0.16115422546863556, "learning_rate": 6.187581787670285e-05, "loss": 0.0148, "step": 27150 }, { "epoch": 24.424460431654676, "grad_norm": 0.19356130063533783, "learning_rate": 6.184904685155852e-05, "loss": 0.0066, "step": 27160 }, { "epoch": 24.43345323741007, "grad_norm": 0.1741614043712616, "learning_rate": 6.18222722269866e-05, "loss": 0.0098, "step": 27170 }, { "epoch": 24.442446043165468, "grad_norm": 0.1975606232881546, "learning_rate": 6.179549401112053e-05, "loss": 0.0049, "step": 27180 }, { "epoch": 24.451438848920862, "grad_norm": 0.1349312961101532, "learning_rate": 6.176871221209482e-05, "loss": 0.009, "step": 27190 }, { "epoch": 24.46043165467626, "grad_norm": 0.15844416618347168, "learning_rate": 6.174192683804508e-05, "loss": 0.0071, "step": 27200 }, { "epoch": 24.469424460431654, "grad_norm": 0.15487584471702576, "learning_rate": 6.1715137897108e-05, "loss": 0.0055, "step": 27210 }, { "epoch": 24.47841726618705, "grad_norm": 0.1362638771533966, "learning_rate": 6.168834539742134e-05, "loss": 0.0094, "step": 27220 }, { "epoch": 24.487410071942445, "grad_norm": 0.21179376542568207, "learning_rate": 6.166154934712397e-05, "loss": 0.0077, "step": 27230 }, { "epoch": 24.496402877697843, "grad_norm": 0.23161131143569946, "learning_rate": 6.163474975435581e-05, "loss": 0.0058, "step": 27240 }, { "epoch": 24.505395683453237, "grad_norm": 0.21260462701320648, "learning_rate": 6.160794662725787e-05, "loss": 0.0088, "step": 27250 }, { "epoch": 24.514388489208635, "grad_norm": 0.23092950880527496, "learning_rate": 6.158113997397222e-05, "loss": 0.0075, "step": 27260 }, { "epoch": 24.52338129496403, "grad_norm": 0.22751058638095856, "learning_rate": 6.155432980264205e-05, "loss": 0.0069, "step": 27270 }, { "epoch": 24.532374100719423, "grad_norm": 0.1616983264684677, "learning_rate": 6.152751612141156e-05, "loss": 0.0099, "step": 27280 }, { "epoch": 24.54136690647482, "grad_norm": 0.13193482160568237, "learning_rate": 6.150069893842602e-05, "loss": 0.007, "step": 27290 }, { "epoch": 24.550359712230215, "grad_norm": 0.28294143080711365, "learning_rate": 6.147387826183182e-05, "loss": 0.0059, "step": 27300 }, { "epoch": 24.559352517985612, "grad_norm": 0.26173582673072815, "learning_rate": 6.144705409977635e-05, "loss": 0.0117, "step": 27310 }, { "epoch": 24.568345323741006, "grad_norm": 0.16996510326862335, "learning_rate": 6.142022646040808e-05, "loss": 0.0063, "step": 27320 }, { "epoch": 24.577338129496404, "grad_norm": 0.12086814641952515, "learning_rate": 6.139339535187653e-05, "loss": 0.0061, "step": 27330 }, { "epoch": 24.586330935251798, "grad_norm": 0.16722819209098816, "learning_rate": 6.136656078233232e-05, "loss": 0.0047, "step": 27340 }, { "epoch": 24.595323741007196, "grad_norm": 0.16757091879844666, "learning_rate": 6.133972275992707e-05, "loss": 0.0078, "step": 27350 }, { "epoch": 24.60431654676259, "grad_norm": 0.14977942407131195, "learning_rate": 6.131288129281342e-05, "loss": 0.01, "step": 27360 }, { "epoch": 24.613309352517987, "grad_norm": 0.1902356892824173, "learning_rate": 6.128603638914516e-05, "loss": 0.0076, "step": 27370 }, { "epoch": 24.62230215827338, "grad_norm": 0.26360607147216797, "learning_rate": 6.125918805707704e-05, "loss": 0.0056, "step": 27380 }, { "epoch": 24.631294964028775, "grad_norm": 0.13585945963859558, "learning_rate": 6.123233630476485e-05, "loss": 0.0075, "step": 27390 }, { "epoch": 24.640287769784173, "grad_norm": 0.14883165061473846, "learning_rate": 6.120548114036547e-05, "loss": 0.006, "step": 27400 }, { "epoch": 24.649280575539567, "grad_norm": 0.34925132989883423, "learning_rate": 6.117862257203679e-05, "loss": 0.0071, "step": 27410 }, { "epoch": 24.658273381294965, "grad_norm": 0.26078736782073975, "learning_rate": 6.115176060793771e-05, "loss": 0.0086, "step": 27420 }, { "epoch": 24.66726618705036, "grad_norm": 0.1562698781490326, "learning_rate": 6.112489525622822e-05, "loss": 0.0041, "step": 27430 }, { "epoch": 24.676258992805757, "grad_norm": 0.2791453003883362, "learning_rate": 6.109802652506928e-05, "loss": 0.0062, "step": 27440 }, { "epoch": 24.68525179856115, "grad_norm": 0.25097930431365967, "learning_rate": 6.107115442262291e-05, "loss": 0.0064, "step": 27450 }, { "epoch": 24.694244604316548, "grad_norm": 0.2092629373073578, "learning_rate": 6.104427895705214e-05, "loss": 0.007, "step": 27460 }, { "epoch": 24.703237410071942, "grad_norm": 0.1289272904396057, "learning_rate": 6.101740013652103e-05, "loss": 0.0042, "step": 27470 }, { "epoch": 24.71223021582734, "grad_norm": 0.26602703332901, "learning_rate": 6.099051796919465e-05, "loss": 0.0078, "step": 27480 }, { "epoch": 24.721223021582734, "grad_norm": 0.19239863753318787, "learning_rate": 6.096363246323911e-05, "loss": 0.0113, "step": 27490 }, { "epoch": 24.730215827338128, "grad_norm": 0.2421208769083023, "learning_rate": 6.0936743626821504e-05, "loss": 0.0091, "step": 27500 }, { "epoch": 24.739208633093526, "grad_norm": 0.21951213479042053, "learning_rate": 6.090985146810996e-05, "loss": 0.006, "step": 27510 }, { "epoch": 24.74820143884892, "grad_norm": 0.19143535196781158, "learning_rate": 6.088295599527357e-05, "loss": 0.0078, "step": 27520 }, { "epoch": 24.757194244604317, "grad_norm": 0.23687666654586792, "learning_rate": 6.085605721648252e-05, "loss": 0.0059, "step": 27530 }, { "epoch": 24.76618705035971, "grad_norm": 0.2875826358795166, "learning_rate": 6.082915513990792e-05, "loss": 0.0064, "step": 27540 }, { "epoch": 24.77517985611511, "grad_norm": 0.11219276487827301, "learning_rate": 6.080224977372192e-05, "loss": 0.0061, "step": 27550 }, { "epoch": 24.784172661870503, "grad_norm": 0.22875627875328064, "learning_rate": 6.0775341126097666e-05, "loss": 0.0067, "step": 27560 }, { "epoch": 24.7931654676259, "grad_norm": 0.18363720178604126, "learning_rate": 6.074842920520926e-05, "loss": 0.0085, "step": 27570 }, { "epoch": 24.802158273381295, "grad_norm": 0.19081436097621918, "learning_rate": 6.072151401923186e-05, "loss": 0.0056, "step": 27580 }, { "epoch": 24.81115107913669, "grad_norm": 0.1986910104751587, "learning_rate": 6.069459557634159e-05, "loss": 0.0084, "step": 27590 }, { "epoch": 24.820143884892087, "grad_norm": 0.30283570289611816, "learning_rate": 6.066767388471557e-05, "loss": 0.0079, "step": 27600 }, { "epoch": 24.82913669064748, "grad_norm": 0.20376287400722504, "learning_rate": 6.064074895253188e-05, "loss": 0.0083, "step": 27610 }, { "epoch": 24.83812949640288, "grad_norm": 0.15135259926319122, "learning_rate": 6.061382078796961e-05, "loss": 0.0112, "step": 27620 }, { "epoch": 24.847122302158272, "grad_norm": 0.15173113346099854, "learning_rate": 6.0586889399208814e-05, "loss": 0.0046, "step": 27630 }, { "epoch": 24.85611510791367, "grad_norm": 0.2635461986064911, "learning_rate": 6.0559954794430565e-05, "loss": 0.0076, "step": 27640 }, { "epoch": 24.865107913669064, "grad_norm": 0.20962361991405487, "learning_rate": 6.053301698181687e-05, "loss": 0.0094, "step": 27650 }, { "epoch": 24.87410071942446, "grad_norm": 0.27897801995277405, "learning_rate": 6.0506075969550725e-05, "loss": 0.0075, "step": 27660 }, { "epoch": 24.883093525179856, "grad_norm": 0.08955704420804977, "learning_rate": 6.047913176581609e-05, "loss": 0.0072, "step": 27670 }, { "epoch": 24.892086330935253, "grad_norm": 0.25077512860298157, "learning_rate": 6.0452184378797904e-05, "loss": 0.0074, "step": 27680 }, { "epoch": 24.901079136690647, "grad_norm": 0.1417132019996643, "learning_rate": 6.042523381668209e-05, "loss": 0.0068, "step": 27690 }, { "epoch": 24.91007194244604, "grad_norm": 0.2192545235157013, "learning_rate": 6.03982800876555e-05, "loss": 0.0058, "step": 27700 }, { "epoch": 24.91906474820144, "grad_norm": 0.15473748743534088, "learning_rate": 6.0371323199905975e-05, "loss": 0.0118, "step": 27710 }, { "epoch": 24.928057553956833, "grad_norm": 0.22532284259796143, "learning_rate": 6.03443631616223e-05, "loss": 0.008, "step": 27720 }, { "epoch": 24.93705035971223, "grad_norm": 0.21295033395290375, "learning_rate": 6.031739998099421e-05, "loss": 0.0051, "step": 27730 }, { "epoch": 24.946043165467625, "grad_norm": 0.16548573970794678, "learning_rate": 6.029043366621243e-05, "loss": 0.0062, "step": 27740 }, { "epoch": 24.955035971223023, "grad_norm": 0.2114981859922409, "learning_rate": 6.0263464225468615e-05, "loss": 0.0068, "step": 27750 }, { "epoch": 24.964028776978417, "grad_norm": 0.16481715440750122, "learning_rate": 6.023649166695534e-05, "loss": 0.0048, "step": 27760 }, { "epoch": 24.973021582733814, "grad_norm": 0.16796591877937317, "learning_rate": 6.0209515998866186e-05, "loss": 0.0067, "step": 27770 }, { "epoch": 24.98201438848921, "grad_norm": 0.1676366627216339, "learning_rate": 6.018253722939563e-05, "loss": 0.0063, "step": 27780 }, { "epoch": 24.991007194244606, "grad_norm": 0.22673571109771729, "learning_rate": 6.015555536673914e-05, "loss": 0.0065, "step": 27790 }, { "epoch": 25.0, "grad_norm": 0.20212966203689575, "learning_rate": 6.0128570419093054e-05, "loss": 0.0071, "step": 27800 }, { "epoch": 25.008992805755394, "grad_norm": 0.1746504008769989, "learning_rate": 6.010158239465471e-05, "loss": 0.0045, "step": 27810 }, { "epoch": 25.01798561151079, "grad_norm": 0.18650616705417633, "learning_rate": 6.007459130162235e-05, "loss": 0.0068, "step": 27820 }, { "epoch": 25.026978417266186, "grad_norm": 0.17777326703071594, "learning_rate": 6.004759714819516e-05, "loss": 0.0071, "step": 27830 }, { "epoch": 25.035971223021583, "grad_norm": 0.18039408326148987, "learning_rate": 6.002059994257323e-05, "loss": 0.0083, "step": 27840 }, { "epoch": 25.044964028776977, "grad_norm": 0.1937517672777176, "learning_rate": 5.999359969295764e-05, "loss": 0.0067, "step": 27850 }, { "epoch": 25.053956834532375, "grad_norm": 0.1649021953344345, "learning_rate": 5.9966596407550314e-05, "loss": 0.0046, "step": 27860 }, { "epoch": 25.06294964028777, "grad_norm": 0.27440178394317627, "learning_rate": 5.993959009455416e-05, "loss": 0.0108, "step": 27870 }, { "epoch": 25.071942446043167, "grad_norm": 0.2390398532152176, "learning_rate": 5.991258076217298e-05, "loss": 0.0054, "step": 27880 }, { "epoch": 25.08093525179856, "grad_norm": 0.15511967241764069, "learning_rate": 5.988556841861147e-05, "loss": 0.0075, "step": 27890 }, { "epoch": 25.08992805755396, "grad_norm": 0.19304533302783966, "learning_rate": 5.985855307207531e-05, "loss": 0.0084, "step": 27900 }, { "epoch": 25.098920863309353, "grad_norm": 0.15904508531093597, "learning_rate": 5.9831534730771e-05, "loss": 0.0059, "step": 27910 }, { "epoch": 25.107913669064747, "grad_norm": 0.19949626922607422, "learning_rate": 5.980451340290605e-05, "loss": 0.0072, "step": 27920 }, { "epoch": 25.116906474820144, "grad_norm": 0.18776145577430725, "learning_rate": 5.97774890966888e-05, "loss": 0.005, "step": 27930 }, { "epoch": 25.12589928057554, "grad_norm": 0.17499293386936188, "learning_rate": 5.975046182032851e-05, "loss": 0.0068, "step": 27940 }, { "epoch": 25.134892086330936, "grad_norm": 0.19242428243160248, "learning_rate": 5.972343158203537e-05, "loss": 0.0069, "step": 27950 }, { "epoch": 25.14388489208633, "grad_norm": 0.13506488502025604, "learning_rate": 5.969639839002045e-05, "loss": 0.0058, "step": 27960 }, { "epoch": 25.152877697841728, "grad_norm": 0.13532759249210358, "learning_rate": 5.966936225249572e-05, "loss": 0.0072, "step": 27970 }, { "epoch": 25.16187050359712, "grad_norm": 0.14794452488422394, "learning_rate": 5.9642323177674044e-05, "loss": 0.0063, "step": 27980 }, { "epoch": 25.17086330935252, "grad_norm": 0.3247672915458679, "learning_rate": 5.9615281173769154e-05, "loss": 0.0062, "step": 27990 }, { "epoch": 25.179856115107913, "grad_norm": 0.11832495033740997, "learning_rate": 5.958823624899574e-05, "loss": 0.0058, "step": 28000 }, { "epoch": 25.18884892086331, "grad_norm": 0.15757109224796295, "learning_rate": 5.956118841156933e-05, "loss": 0.006, "step": 28010 }, { "epoch": 25.197841726618705, "grad_norm": 0.10346778482198715, "learning_rate": 5.953413766970631e-05, "loss": 0.0069, "step": 28020 }, { "epoch": 25.2068345323741, "grad_norm": 0.19851090013980865, "learning_rate": 5.9507084031624e-05, "loss": 0.0082, "step": 28030 }, { "epoch": 25.215827338129497, "grad_norm": 0.24682755768299103, "learning_rate": 5.948002750554058e-05, "loss": 0.0089, "step": 28040 }, { "epoch": 25.22482014388489, "grad_norm": 0.20365683734416962, "learning_rate": 5.9452968099675124e-05, "loss": 0.0057, "step": 28050 }, { "epoch": 25.23381294964029, "grad_norm": 0.17023731768131256, "learning_rate": 5.9425905822247527e-05, "loss": 0.0062, "step": 28060 }, { "epoch": 25.242805755395683, "grad_norm": 0.28733715415000916, "learning_rate": 5.939884068147864e-05, "loss": 0.007, "step": 28070 }, { "epoch": 25.25179856115108, "grad_norm": 0.12749285995960236, "learning_rate": 5.937177268559011e-05, "loss": 0.0053, "step": 28080 }, { "epoch": 25.260791366906474, "grad_norm": 0.18783038854599, "learning_rate": 5.934470184280448e-05, "loss": 0.0047, "step": 28090 }, { "epoch": 25.269784172661872, "grad_norm": 0.16727297008037567, "learning_rate": 5.931762816134516e-05, "loss": 0.0058, "step": 28100 }, { "epoch": 25.278776978417266, "grad_norm": 0.2150467038154602, "learning_rate": 5.9290551649436434e-05, "loss": 0.0054, "step": 28110 }, { "epoch": 25.28776978417266, "grad_norm": 0.27036675810813904, "learning_rate": 5.9263472315303416e-05, "loss": 0.0073, "step": 28120 }, { "epoch": 25.296762589928058, "grad_norm": 0.3089682459831238, "learning_rate": 5.9236390167172096e-05, "loss": 0.0049, "step": 28130 }, { "epoch": 25.305755395683452, "grad_norm": 0.24450194835662842, "learning_rate": 5.920930521326932e-05, "loss": 0.0086, "step": 28140 }, { "epoch": 25.31474820143885, "grad_norm": 0.17669200897216797, "learning_rate": 5.918221746182276e-05, "loss": 0.0068, "step": 28150 }, { "epoch": 25.323741007194243, "grad_norm": 0.1260657012462616, "learning_rate": 5.9155126921061e-05, "loss": 0.0062, "step": 28160 }, { "epoch": 25.33273381294964, "grad_norm": 0.19453807175159454, "learning_rate": 5.91280335992134e-05, "loss": 0.0044, "step": 28170 }, { "epoch": 25.341726618705035, "grad_norm": 0.12137271463871002, "learning_rate": 5.91009375045102e-05, "loss": 0.0061, "step": 28180 }, { "epoch": 25.350719424460433, "grad_norm": 0.1369408220052719, "learning_rate": 5.9073838645182476e-05, "loss": 0.005, "step": 28190 }, { "epoch": 25.359712230215827, "grad_norm": 0.16925187408924103, "learning_rate": 5.904673702946217e-05, "loss": 0.0077, "step": 28200 }, { "epoch": 25.368705035971225, "grad_norm": 0.16107138991355896, "learning_rate": 5.9019632665582004e-05, "loss": 0.004, "step": 28210 }, { "epoch": 25.37769784172662, "grad_norm": 0.19644039869308472, "learning_rate": 5.899252556177559e-05, "loss": 0.0053, "step": 28220 }, { "epoch": 25.386690647482013, "grad_norm": 0.17508180439472198, "learning_rate": 5.896541572627735e-05, "loss": 0.0055, "step": 28230 }, { "epoch": 25.39568345323741, "grad_norm": 0.17449603974819183, "learning_rate": 5.893830316732253e-05, "loss": 0.0059, "step": 28240 }, { "epoch": 25.404676258992804, "grad_norm": 0.14008885622024536, "learning_rate": 5.8911187893147214e-05, "loss": 0.0083, "step": 28250 }, { "epoch": 25.413669064748202, "grad_norm": 0.1866237372159958, "learning_rate": 5.888406991198828e-05, "loss": 0.0072, "step": 28260 }, { "epoch": 25.422661870503596, "grad_norm": 0.21662594377994537, "learning_rate": 5.885694923208349e-05, "loss": 0.0118, "step": 28270 }, { "epoch": 25.431654676258994, "grad_norm": 0.20380759239196777, "learning_rate": 5.882982586167138e-05, "loss": 0.0075, "step": 28280 }, { "epoch": 25.440647482014388, "grad_norm": 0.13675053417682648, "learning_rate": 5.880269980899131e-05, "loss": 0.0062, "step": 28290 }, { "epoch": 25.449640287769785, "grad_norm": 0.1352764368057251, "learning_rate": 5.8775571082283465e-05, "loss": 0.0048, "step": 28300 }, { "epoch": 25.45863309352518, "grad_norm": 0.11910674721002579, "learning_rate": 5.8748439689788824e-05, "loss": 0.0085, "step": 28310 }, { "epoch": 25.467625899280577, "grad_norm": 0.10746945440769196, "learning_rate": 5.87213056397492e-05, "loss": 0.0054, "step": 28320 }, { "epoch": 25.47661870503597, "grad_norm": 0.14848598837852478, "learning_rate": 5.869416894040719e-05, "loss": 0.0095, "step": 28330 }, { "epoch": 25.485611510791365, "grad_norm": 0.13931459188461304, "learning_rate": 5.866702960000621e-05, "loss": 0.0056, "step": 28340 }, { "epoch": 25.494604316546763, "grad_norm": 0.22272823750972748, "learning_rate": 5.863988762679048e-05, "loss": 0.0075, "step": 28350 }, { "epoch": 25.503597122302157, "grad_norm": 0.23193036019802094, "learning_rate": 5.8612743029005e-05, "loss": 0.0055, "step": 28360 }, { "epoch": 25.512589928057555, "grad_norm": 0.2054457813501358, "learning_rate": 5.858559581489561e-05, "loss": 0.0056, "step": 28370 }, { "epoch": 25.52158273381295, "grad_norm": 0.1851906180381775, "learning_rate": 5.85584459927089e-05, "loss": 0.008, "step": 28380 }, { "epoch": 25.530575539568346, "grad_norm": 0.2321723848581314, "learning_rate": 5.853129357069227e-05, "loss": 0.0058, "step": 28390 }, { "epoch": 25.53956834532374, "grad_norm": 0.19401982426643372, "learning_rate": 5.8504138557093913e-05, "loss": 0.0067, "step": 28400 }, { "epoch": 25.548561151079138, "grad_norm": 0.17503796517848969, "learning_rate": 5.8476980960162784e-05, "loss": 0.0073, "step": 28410 }, { "epoch": 25.557553956834532, "grad_norm": 0.2828863561153412, "learning_rate": 5.844982078814868e-05, "loss": 0.0072, "step": 28420 }, { "epoch": 25.56654676258993, "grad_norm": 0.4041643440723419, "learning_rate": 5.842265804930211e-05, "loss": 0.0103, "step": 28430 }, { "epoch": 25.575539568345324, "grad_norm": 0.1785629838705063, "learning_rate": 5.839549275187444e-05, "loss": 0.0062, "step": 28440 }, { "epoch": 25.584532374100718, "grad_norm": 0.21938374638557434, "learning_rate": 5.836832490411771e-05, "loss": 0.007, "step": 28450 }, { "epoch": 25.593525179856115, "grad_norm": 0.27240484952926636, "learning_rate": 5.834115451428485e-05, "loss": 0.0083, "step": 28460 }, { "epoch": 25.60251798561151, "grad_norm": 0.17861460149288177, "learning_rate": 5.831398159062946e-05, "loss": 0.0055, "step": 28470 }, { "epoch": 25.611510791366907, "grad_norm": 0.2578239440917969, "learning_rate": 5.828680614140599e-05, "loss": 0.0054, "step": 28480 }, { "epoch": 25.6205035971223, "grad_norm": 0.18912149965763092, "learning_rate": 5.825962817486962e-05, "loss": 0.0075, "step": 28490 }, { "epoch": 25.6294964028777, "grad_norm": 0.11097251623868942, "learning_rate": 5.823244769927629e-05, "loss": 0.0062, "step": 28500 }, { "epoch": 25.638489208633093, "grad_norm": 0.24130874872207642, "learning_rate": 5.8205264722882716e-05, "loss": 0.0105, "step": 28510 }, { "epoch": 25.64748201438849, "grad_norm": 0.2058960497379303, "learning_rate": 5.817807925394636e-05, "loss": 0.0046, "step": 28520 }, { "epoch": 25.656474820143885, "grad_norm": 0.12753643095493317, "learning_rate": 5.815089130072546e-05, "loss": 0.0068, "step": 28530 }, { "epoch": 25.665467625899282, "grad_norm": 0.14661084115505219, "learning_rate": 5.8123700871479e-05, "loss": 0.0045, "step": 28540 }, { "epoch": 25.674460431654676, "grad_norm": 0.1261759102344513, "learning_rate": 5.809650797446671e-05, "loss": 0.0044, "step": 28550 }, { "epoch": 25.68345323741007, "grad_norm": 0.12134291231632233, "learning_rate": 5.806931261794907e-05, "loss": 0.0073, "step": 28560 }, { "epoch": 25.692446043165468, "grad_norm": 0.13826051354408264, "learning_rate": 5.804211481018731e-05, "loss": 0.0039, "step": 28570 }, { "epoch": 25.701438848920862, "grad_norm": 0.09997893124818802, "learning_rate": 5.801491455944341e-05, "loss": 0.0043, "step": 28580 }, { "epoch": 25.71043165467626, "grad_norm": 0.12796296179294586, "learning_rate": 5.79877118739801e-05, "loss": 0.0052, "step": 28590 }, { "epoch": 25.719424460431654, "grad_norm": 0.22541621327400208, "learning_rate": 5.7960506762060816e-05, "loss": 0.0072, "step": 28600 }, { "epoch": 25.72841726618705, "grad_norm": 0.34165725111961365, "learning_rate": 5.793329923194977e-05, "loss": 0.0054, "step": 28610 }, { "epoch": 25.737410071942445, "grad_norm": 0.20831666886806488, "learning_rate": 5.790608929191187e-05, "loss": 0.0059, "step": 28620 }, { "epoch": 25.746402877697843, "grad_norm": 0.13987083733081818, "learning_rate": 5.78788769502128e-05, "loss": 0.0104, "step": 28630 }, { "epoch": 25.755395683453237, "grad_norm": 0.22738029062747955, "learning_rate": 5.785166221511894e-05, "loss": 0.0072, "step": 28640 }, { "epoch": 25.764388489208635, "grad_norm": 0.12659136950969696, "learning_rate": 5.7824445094897415e-05, "loss": 0.0083, "step": 28650 }, { "epoch": 25.77338129496403, "grad_norm": 0.19532017409801483, "learning_rate": 5.7797225597816065e-05, "loss": 0.0073, "step": 28660 }, { "epoch": 25.782374100719423, "grad_norm": 0.2025105208158493, "learning_rate": 5.777000373214345e-05, "loss": 0.005, "step": 28670 }, { "epoch": 25.79136690647482, "grad_norm": 0.275947630405426, "learning_rate": 5.774277950614885e-05, "loss": 0.0086, "step": 28680 }, { "epoch": 25.800359712230215, "grad_norm": 0.2836247980594635, "learning_rate": 5.771555292810227e-05, "loss": 0.0065, "step": 28690 }, { "epoch": 25.809352517985612, "grad_norm": 0.1573379784822464, "learning_rate": 5.768832400627444e-05, "loss": 0.0061, "step": 28700 }, { "epoch": 25.818345323741006, "grad_norm": 0.24729467928409576, "learning_rate": 5.7661092748936775e-05, "loss": 0.0076, "step": 28710 }, { "epoch": 25.827338129496404, "grad_norm": 0.141748309135437, "learning_rate": 5.76338591643614e-05, "loss": 0.0101, "step": 28720 }, { "epoch": 25.836330935251798, "grad_norm": 0.22974085807800293, "learning_rate": 5.760662326082118e-05, "loss": 0.0085, "step": 28730 }, { "epoch": 25.845323741007196, "grad_norm": 0.20804443955421448, "learning_rate": 5.757938504658965e-05, "loss": 0.0065, "step": 28740 }, { "epoch": 25.85431654676259, "grad_norm": 0.17103342711925507, "learning_rate": 5.755214452994107e-05, "loss": 0.0074, "step": 28750 }, { "epoch": 25.863309352517987, "grad_norm": 0.13403983414173126, "learning_rate": 5.752490171915039e-05, "loss": 0.0077, "step": 28760 }, { "epoch": 25.87230215827338, "grad_norm": 0.20772719383239746, "learning_rate": 5.749765662249324e-05, "loss": 0.0097, "step": 28770 }, { "epoch": 25.881294964028775, "grad_norm": 0.24767421185970306, "learning_rate": 5.747040924824596e-05, "loss": 0.0039, "step": 28780 }, { "epoch": 25.890287769784173, "grad_norm": 0.17923374474048615, "learning_rate": 5.7443159604685613e-05, "loss": 0.0053, "step": 28790 }, { "epoch": 25.899280575539567, "grad_norm": 0.1814834475517273, "learning_rate": 5.74159077000899e-05, "loss": 0.0078, "step": 28800 }, { "epoch": 25.908273381294965, "grad_norm": 0.18174999952316284, "learning_rate": 5.7388653542737235e-05, "loss": 0.0068, "step": 28810 }, { "epoch": 25.91726618705036, "grad_norm": 0.15340077877044678, "learning_rate": 5.736139714090672e-05, "loss": 0.0101, "step": 28820 }, { "epoch": 25.926258992805757, "grad_norm": 0.1476346254348755, "learning_rate": 5.73341385028781e-05, "loss": 0.0074, "step": 28830 }, { "epoch": 25.93525179856115, "grad_norm": 0.095841184258461, "learning_rate": 5.7306877636931855e-05, "loss": 0.0051, "step": 28840 }, { "epoch": 25.944244604316548, "grad_norm": 0.2618107199668884, "learning_rate": 5.7279614551349125e-05, "loss": 0.0063, "step": 28850 }, { "epoch": 25.953237410071942, "grad_norm": 0.21700593829154968, "learning_rate": 5.725234925441169e-05, "loss": 0.0045, "step": 28860 }, { "epoch": 25.96223021582734, "grad_norm": 0.18304471671581268, "learning_rate": 5.7225081754402044e-05, "loss": 0.0054, "step": 28870 }, { "epoch": 25.971223021582734, "grad_norm": 0.14098183810710907, "learning_rate": 5.7197812059603326e-05, "loss": 0.0061, "step": 28880 }, { "epoch": 25.980215827338128, "grad_norm": 0.21495763957500458, "learning_rate": 5.717054017829934e-05, "loss": 0.0066, "step": 28890 }, { "epoch": 25.989208633093526, "grad_norm": 0.18292920291423798, "learning_rate": 5.7143266118774584e-05, "loss": 0.0106, "step": 28900 }, { "epoch": 25.99820143884892, "grad_norm": 0.25421252846717834, "learning_rate": 5.711598988931418e-05, "loss": 0.0055, "step": 28910 }, { "epoch": 26.007194244604317, "grad_norm": 0.20004208385944366, "learning_rate": 5.7088711498203954e-05, "loss": 0.006, "step": 28920 }, { "epoch": 26.01618705035971, "grad_norm": 0.15711890161037445, "learning_rate": 5.706143095373033e-05, "loss": 0.0108, "step": 28930 }, { "epoch": 26.02517985611511, "grad_norm": 0.22860503196716309, "learning_rate": 5.703414826418042e-05, "loss": 0.0062, "step": 28940 }, { "epoch": 26.034172661870503, "grad_norm": 0.1417461335659027, "learning_rate": 5.7006863437842007e-05, "loss": 0.0063, "step": 28950 }, { "epoch": 26.0431654676259, "grad_norm": 0.12102147936820984, "learning_rate": 5.697957648300348e-05, "loss": 0.0132, "step": 28960 }, { "epoch": 26.052158273381295, "grad_norm": 0.1567157506942749, "learning_rate": 5.695228740795391e-05, "loss": 0.0064, "step": 28970 }, { "epoch": 26.06115107913669, "grad_norm": 0.2730419933795929, "learning_rate": 5.6924996220982985e-05, "loss": 0.0074, "step": 28980 }, { "epoch": 26.070143884892087, "grad_norm": 0.14896653592586517, "learning_rate": 5.6897702930381045e-05, "loss": 0.0051, "step": 28990 }, { "epoch": 26.07913669064748, "grad_norm": 0.11505764722824097, "learning_rate": 5.687040754443908e-05, "loss": 0.0043, "step": 29000 }, { "epoch": 26.08812949640288, "grad_norm": 0.13559813797473907, "learning_rate": 5.6843110071448725e-05, "loss": 0.0064, "step": 29010 }, { "epoch": 26.097122302158272, "grad_norm": 0.15125572681427002, "learning_rate": 5.6815810519702194e-05, "loss": 0.0066, "step": 29020 }, { "epoch": 26.10611510791367, "grad_norm": 0.18235976994037628, "learning_rate": 5.6788508897492396e-05, "loss": 0.0041, "step": 29030 }, { "epoch": 26.115107913669064, "grad_norm": 0.2509373724460602, "learning_rate": 5.676120521311282e-05, "loss": 0.0056, "step": 29040 }, { "epoch": 26.12410071942446, "grad_norm": 0.12027518451213837, "learning_rate": 5.6733899474857634e-05, "loss": 0.0085, "step": 29050 }, { "epoch": 26.133093525179856, "grad_norm": 0.17338693141937256, "learning_rate": 5.670659169102157e-05, "loss": 0.0057, "step": 29060 }, { "epoch": 26.142086330935253, "grad_norm": 0.21177440881729126, "learning_rate": 5.6679281869900044e-05, "loss": 0.0067, "step": 29070 }, { "epoch": 26.151079136690647, "grad_norm": 0.17362351715564728, "learning_rate": 5.6651970019789045e-05, "loss": 0.006, "step": 29080 }, { "epoch": 26.16007194244604, "grad_norm": 0.2849655747413635, "learning_rate": 5.662465614898519e-05, "loss": 0.0048, "step": 29090 }, { "epoch": 26.16906474820144, "grad_norm": 0.17158235609531403, "learning_rate": 5.6597340265785695e-05, "loss": 0.0109, "step": 29100 }, { "epoch": 26.178057553956833, "grad_norm": 0.1381220668554306, "learning_rate": 5.657002237848843e-05, "loss": 0.0066, "step": 29110 }, { "epoch": 26.18705035971223, "grad_norm": 0.1734568178653717, "learning_rate": 5.654270249539183e-05, "loss": 0.0055, "step": 29120 }, { "epoch": 26.196043165467625, "grad_norm": 0.18830499053001404, "learning_rate": 5.651538062479498e-05, "loss": 0.0088, "step": 29130 }, { "epoch": 26.205035971223023, "grad_norm": 0.29989179968833923, "learning_rate": 5.648805677499751e-05, "loss": 0.0065, "step": 29140 }, { "epoch": 26.214028776978417, "grad_norm": 0.1434948742389679, "learning_rate": 5.646073095429969e-05, "loss": 0.0079, "step": 29150 }, { "epoch": 26.223021582733814, "grad_norm": 0.18283826112747192, "learning_rate": 5.643340317100241e-05, "loss": 0.0059, "step": 29160 }, { "epoch": 26.23201438848921, "grad_norm": 0.24819408357143402, "learning_rate": 5.64060734334071e-05, "loss": 0.0059, "step": 29170 }, { "epoch": 26.241007194244606, "grad_norm": 0.1708010584115982, "learning_rate": 5.637874174981583e-05, "loss": 0.0045, "step": 29180 }, { "epoch": 26.25, "grad_norm": 0.15270595252513885, "learning_rate": 5.635140812853124e-05, "loss": 0.0061, "step": 29190 }, { "epoch": 26.258992805755394, "grad_norm": 0.11934954673051834, "learning_rate": 5.6324072577856544e-05, "loss": 0.0052, "step": 29200 }, { "epoch": 26.26798561151079, "grad_norm": 0.26876863837242126, "learning_rate": 5.629673510609559e-05, "loss": 0.0065, "step": 29210 }, { "epoch": 26.276978417266186, "grad_norm": 0.1503201425075531, "learning_rate": 5.626939572155276e-05, "loss": 0.0082, "step": 29220 }, { "epoch": 26.285971223021583, "grad_norm": 0.1716649830341339, "learning_rate": 5.6242054432533054e-05, "loss": 0.0058, "step": 29230 }, { "epoch": 26.294964028776977, "grad_norm": 0.20248842239379883, "learning_rate": 5.621471124734201e-05, "loss": 0.0054, "step": 29240 }, { "epoch": 26.303956834532375, "grad_norm": 0.2338636964559555, "learning_rate": 5.6187366174285794e-05, "loss": 0.0115, "step": 29250 }, { "epoch": 26.31294964028777, "grad_norm": 0.2013469785451889, "learning_rate": 5.616001922167109e-05, "loss": 0.0075, "step": 29260 }, { "epoch": 26.321942446043167, "grad_norm": 0.16577298939228058, "learning_rate": 5.61326703978052e-05, "loss": 0.0087, "step": 29270 }, { "epoch": 26.33093525179856, "grad_norm": 0.3641568422317505, "learning_rate": 5.6105319710995964e-05, "loss": 0.0064, "step": 29280 }, { "epoch": 26.33992805755396, "grad_norm": 0.17689627408981323, "learning_rate": 5.60779671695518e-05, "loss": 0.007, "step": 29290 }, { "epoch": 26.348920863309353, "grad_norm": 0.2202010601758957, "learning_rate": 5.6050612781781684e-05, "loss": 0.005, "step": 29300 }, { "epoch": 26.357913669064747, "grad_norm": 0.15275342762470245, "learning_rate": 5.602325655599516e-05, "loss": 0.0053, "step": 29310 }, { "epoch": 26.366906474820144, "grad_norm": 0.12229560315608978, "learning_rate": 5.599589850050234e-05, "loss": 0.0044, "step": 29320 }, { "epoch": 26.37589928057554, "grad_norm": 0.1275167316198349, "learning_rate": 5.5968538623613874e-05, "loss": 0.005, "step": 29330 }, { "epoch": 26.384892086330936, "grad_norm": 0.17273807525634766, "learning_rate": 5.594117693364095e-05, "loss": 0.0051, "step": 29340 }, { "epoch": 26.39388489208633, "grad_norm": 0.13791784644126892, "learning_rate": 5.591381343889535e-05, "loss": 0.0065, "step": 29350 }, { "epoch": 26.402877697841728, "grad_norm": 0.11277294903993607, "learning_rate": 5.5886448147689355e-05, "loss": 0.0063, "step": 29360 }, { "epoch": 26.41187050359712, "grad_norm": 0.1173112690448761, "learning_rate": 5.585908106833585e-05, "loss": 0.0044, "step": 29370 }, { "epoch": 26.42086330935252, "grad_norm": 0.12376224249601364, "learning_rate": 5.5831712209148226e-05, "loss": 0.0058, "step": 29380 }, { "epoch": 26.429856115107913, "grad_norm": 0.17072290182113647, "learning_rate": 5.58043415784404e-05, "loss": 0.0075, "step": 29390 }, { "epoch": 26.43884892086331, "grad_norm": 0.1826130598783493, "learning_rate": 5.577696918452686e-05, "loss": 0.0048, "step": 29400 }, { "epoch": 26.447841726618705, "grad_norm": 0.10627509653568268, "learning_rate": 5.5749595035722604e-05, "loss": 0.0187, "step": 29410 }, { "epoch": 26.4568345323741, "grad_norm": 0.10839961469173431, "learning_rate": 5.5722219140343193e-05, "loss": 0.0112, "step": 29420 }, { "epoch": 26.465827338129497, "grad_norm": 0.23787203431129456, "learning_rate": 5.56948415067047e-05, "loss": 0.0071, "step": 29430 }, { "epoch": 26.47482014388489, "grad_norm": 0.2852526307106018, "learning_rate": 5.5667462143123704e-05, "loss": 0.0086, "step": 29440 }, { "epoch": 26.48381294964029, "grad_norm": 0.1405961662530899, "learning_rate": 5.564008105791737e-05, "loss": 0.0057, "step": 29450 }, { "epoch": 26.492805755395683, "grad_norm": 0.20126870274543762, "learning_rate": 5.5612698259403316e-05, "loss": 0.0097, "step": 29460 }, { "epoch": 26.50179856115108, "grad_norm": 0.17822569608688354, "learning_rate": 5.5585313755899724e-05, "loss": 0.0081, "step": 29470 }, { "epoch": 26.510791366906474, "grad_norm": 0.27274879813194275, "learning_rate": 5.5557927555725285e-05, "loss": 0.0105, "step": 29480 }, { "epoch": 26.519784172661872, "grad_norm": 0.10084344446659088, "learning_rate": 5.55305396671992e-05, "loss": 0.0051, "step": 29490 }, { "epoch": 26.528776978417266, "grad_norm": 0.19738633930683136, "learning_rate": 5.55031500986412e-05, "loss": 0.0099, "step": 29500 }, { "epoch": 26.53776978417266, "grad_norm": 0.25565725564956665, "learning_rate": 5.547575885837149e-05, "loss": 0.0086, "step": 29510 }, { "epoch": 26.546762589928058, "grad_norm": 0.10778793692588806, "learning_rate": 5.5448365954710825e-05, "loss": 0.0061, "step": 29520 }, { "epoch": 26.555755395683452, "grad_norm": 0.2022084891796112, "learning_rate": 5.5420971395980446e-05, "loss": 0.0089, "step": 29530 }, { "epoch": 26.56474820143885, "grad_norm": 0.34032702445983887, "learning_rate": 5.539357519050209e-05, "loss": 0.0091, "step": 29540 }, { "epoch": 26.573741007194243, "grad_norm": 0.12440850585699081, "learning_rate": 5.536617734659799e-05, "loss": 0.0079, "step": 29550 }, { "epoch": 26.58273381294964, "grad_norm": 0.22457706928253174, "learning_rate": 5.533877787259091e-05, "loss": 0.0059, "step": 29560 }, { "epoch": 26.591726618705035, "grad_norm": 0.20270079374313354, "learning_rate": 5.5311376776804044e-05, "loss": 0.0062, "step": 29570 }, { "epoch": 26.600719424460433, "grad_norm": 0.24442392587661743, "learning_rate": 5.528397406756118e-05, "loss": 0.0089, "step": 29580 }, { "epoch": 26.609712230215827, "grad_norm": 0.21804559230804443, "learning_rate": 5.525656975318652e-05, "loss": 0.0056, "step": 29590 }, { "epoch": 26.618705035971225, "grad_norm": 0.11011067777872086, "learning_rate": 5.522916384200474e-05, "loss": 0.0036, "step": 29600 }, { "epoch": 26.62769784172662, "grad_norm": 0.10670044273138046, "learning_rate": 5.520175634234106e-05, "loss": 0.0064, "step": 29610 }, { "epoch": 26.636690647482013, "grad_norm": 0.23709097504615784, "learning_rate": 5.517434726252113e-05, "loss": 0.0055, "step": 29620 }, { "epoch": 26.64568345323741, "grad_norm": 0.1859293431043625, "learning_rate": 5.514693661087113e-05, "loss": 0.0077, "step": 29630 }, { "epoch": 26.654676258992804, "grad_norm": 0.22783033549785614, "learning_rate": 5.511952439571769e-05, "loss": 0.0052, "step": 29640 }, { "epoch": 26.663669064748202, "grad_norm": 0.17663319408893585, "learning_rate": 5.509211062538791e-05, "loss": 0.0048, "step": 29650 }, { "epoch": 26.672661870503596, "grad_norm": 0.2042214572429657, "learning_rate": 5.506469530820939e-05, "loss": 0.0095, "step": 29660 }, { "epoch": 26.681654676258994, "grad_norm": 0.17841225862503052, "learning_rate": 5.503727845251014e-05, "loss": 0.0047, "step": 29670 }, { "epoch": 26.690647482014388, "grad_norm": 0.11539847403764725, "learning_rate": 5.50098600666187e-05, "loss": 0.0067, "step": 29680 }, { "epoch": 26.699640287769785, "grad_norm": 0.16289064288139343, "learning_rate": 5.498244015886406e-05, "loss": 0.0066, "step": 29690 }, { "epoch": 26.70863309352518, "grad_norm": 0.18486282229423523, "learning_rate": 5.495501873757565e-05, "loss": 0.0056, "step": 29700 }, { "epoch": 26.717625899280577, "grad_norm": 0.16270902752876282, "learning_rate": 5.492759581108336e-05, "loss": 0.0059, "step": 29710 }, { "epoch": 26.72661870503597, "grad_norm": 0.1928163319826126, "learning_rate": 5.490017138771759e-05, "loss": 0.0075, "step": 29720 }, { "epoch": 26.735611510791365, "grad_norm": 0.19285179674625397, "learning_rate": 5.487274547580912e-05, "loss": 0.0073, "step": 29730 }, { "epoch": 26.744604316546763, "grad_norm": 0.23380529880523682, "learning_rate": 5.484531808368923e-05, "loss": 0.0057, "step": 29740 }, { "epoch": 26.753597122302157, "grad_norm": 0.2051352858543396, "learning_rate": 5.4817889219689656e-05, "loss": 0.0072, "step": 29750 }, { "epoch": 26.762589928057555, "grad_norm": 0.29587289690971375, "learning_rate": 5.4790458892142536e-05, "loss": 0.0053, "step": 29760 }, { "epoch": 26.77158273381295, "grad_norm": 0.1648315191268921, "learning_rate": 5.476302710938048e-05, "loss": 0.0051, "step": 29770 }, { "epoch": 26.780575539568346, "grad_norm": 0.2088617980480194, "learning_rate": 5.473559387973657e-05, "loss": 0.0071, "step": 29780 }, { "epoch": 26.78956834532374, "grad_norm": 0.18436530232429504, "learning_rate": 5.470815921154425e-05, "loss": 0.0073, "step": 29790 }, { "epoch": 26.798561151079138, "grad_norm": 0.0957002267241478, "learning_rate": 5.468072311313749e-05, "loss": 0.0039, "step": 29800 }, { "epoch": 26.807553956834532, "grad_norm": 0.14233945310115814, "learning_rate": 5.465328559285063e-05, "loss": 0.006, "step": 29810 }, { "epoch": 26.81654676258993, "grad_norm": 0.17077228426933289, "learning_rate": 5.462584665901849e-05, "loss": 0.005, "step": 29820 }, { "epoch": 26.825539568345324, "grad_norm": 0.22198648750782013, "learning_rate": 5.4598406319976235e-05, "loss": 0.007, "step": 29830 }, { "epoch": 26.834532374100718, "grad_norm": 0.19979974627494812, "learning_rate": 5.457096458405958e-05, "loss": 0.0101, "step": 29840 }, { "epoch": 26.843525179856115, "grad_norm": 0.1363685131072998, "learning_rate": 5.454352145960457e-05, "loss": 0.0061, "step": 29850 }, { "epoch": 26.85251798561151, "grad_norm": 0.10141397267580032, "learning_rate": 5.4516076954947715e-05, "loss": 0.0048, "step": 29860 }, { "epoch": 26.861510791366907, "grad_norm": 0.2660808265209198, "learning_rate": 5.448863107842591e-05, "loss": 0.0062, "step": 29870 }, { "epoch": 26.8705035971223, "grad_norm": 0.21517479419708252, "learning_rate": 5.446118383837651e-05, "loss": 0.006, "step": 29880 }, { "epoch": 26.8794964028777, "grad_norm": 0.2695595324039459, "learning_rate": 5.443373524313722e-05, "loss": 0.0093, "step": 29890 }, { "epoch": 26.888489208633093, "grad_norm": 0.21296542882919312, "learning_rate": 5.440628530104626e-05, "loss": 0.0048, "step": 29900 }, { "epoch": 26.89748201438849, "grad_norm": 0.20556853711605072, "learning_rate": 5.4378834020442146e-05, "loss": 0.0059, "step": 29910 }, { "epoch": 26.906474820143885, "grad_norm": 0.1583034098148346, "learning_rate": 5.4351381409663884e-05, "loss": 0.0044, "step": 29920 }, { "epoch": 26.915467625899282, "grad_norm": 0.2184469848871231, "learning_rate": 5.432392747705084e-05, "loss": 0.0048, "step": 29930 }, { "epoch": 26.924460431654676, "grad_norm": 0.137073814868927, "learning_rate": 5.429647223094278e-05, "loss": 0.0048, "step": 29940 }, { "epoch": 26.93345323741007, "grad_norm": 0.1425035297870636, "learning_rate": 5.4269015679679924e-05, "loss": 0.0062, "step": 29950 }, { "epoch": 26.942446043165468, "grad_norm": 0.12292914092540741, "learning_rate": 5.424155783160281e-05, "loss": 0.004, "step": 29960 }, { "epoch": 26.951438848920862, "grad_norm": 0.17451541125774384, "learning_rate": 5.4214098695052415e-05, "loss": 0.0078, "step": 29970 }, { "epoch": 26.96043165467626, "grad_norm": 0.13674312829971313, "learning_rate": 5.418663827837012e-05, "loss": 0.0075, "step": 29980 }, { "epoch": 26.969424460431654, "grad_norm": 0.11516491323709488, "learning_rate": 5.415917658989763e-05, "loss": 0.0045, "step": 29990 }, { "epoch": 26.97841726618705, "grad_norm": 0.14940856397151947, "learning_rate": 5.413171363797713e-05, "loss": 0.0074, "step": 30000 } ], "logging_steps": 10, "max_steps": 60000, "num_input_tokens_seen": 0, "num_train_epochs": 54, "save_steps": 10000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }