{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1315.7894736842106, "eval_steps": 500, "global_step": 200000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06578947368421052, "grad_norm": 165.546875, "learning_rate": 0.0001, "loss": 21.3446, "step": 10 }, { "epoch": 0.13157894736842105, "grad_norm": 92.7155990600586, "learning_rate": 0.0001, "loss": 4.7673, "step": 20 }, { "epoch": 0.19736842105263158, "grad_norm": 64.37399291992188, "learning_rate": 0.0001, "loss": 3.561, "step": 30 }, { "epoch": 0.2631578947368421, "grad_norm": 55.39424514770508, "learning_rate": 0.0001, "loss": 3.1393, "step": 40 }, { "epoch": 0.32894736842105265, "grad_norm": 55.13695526123047, "learning_rate": 0.0001, "loss": 2.8728, "step": 50 }, { "epoch": 0.39473684210526316, "grad_norm": 51.248966217041016, "learning_rate": 0.0001, "loss": 2.6172, "step": 60 }, { "epoch": 0.4605263157894737, "grad_norm": 39.10142135620117, "learning_rate": 0.0001, "loss": 2.415, "step": 70 }, { "epoch": 0.5263157894736842, "grad_norm": 47.69247055053711, "learning_rate": 0.0001, "loss": 2.2343, "step": 80 }, { "epoch": 0.5921052631578947, "grad_norm": 49.21741485595703, "learning_rate": 0.0001, "loss": 2.0767, "step": 90 }, { "epoch": 0.6578947368421053, "grad_norm": 44.604793548583984, "learning_rate": 0.0001, "loss": 2.0309, "step": 100 }, { "epoch": 0.7236842105263158, "grad_norm": 51.46554183959961, "learning_rate": 0.0001, "loss": 1.8886, "step": 110 }, { "epoch": 0.7894736842105263, "grad_norm": 46.2479133605957, "learning_rate": 0.0001, "loss": 1.7757, "step": 120 }, { "epoch": 0.8552631578947368, "grad_norm": 49.21684646606445, "learning_rate": 0.0001, "loss": 1.6854, "step": 130 }, { "epoch": 0.9210526315789473, "grad_norm": 49.37424087524414, "learning_rate": 0.0001, "loss": 1.614, "step": 140 }, { "epoch": 0.9868421052631579, "grad_norm": 40.79842758178711, "learning_rate": 0.0001, "loss": 1.494, "step": 150 }, { "epoch": 1.0526315789473684, "grad_norm": 43.07915115356445, "learning_rate": 0.0001, "loss": 1.4474, "step": 160 }, { "epoch": 1.118421052631579, "grad_norm": 37.799560546875, "learning_rate": 0.0001, "loss": 1.3618, "step": 170 }, { "epoch": 1.1842105263157894, "grad_norm": 37.14927291870117, "learning_rate": 0.0001, "loss": 1.3081, "step": 180 }, { "epoch": 1.25, "grad_norm": 35.22480010986328, "learning_rate": 0.0001, "loss": 1.2341, "step": 190 }, { "epoch": 1.3157894736842106, "grad_norm": 33.31498718261719, "learning_rate": 0.0001, "loss": 1.169, "step": 200 }, { "epoch": 1.381578947368421, "grad_norm": 27.80243492126465, "learning_rate": 0.0001, "loss": 1.0758, "step": 210 }, { "epoch": 1.4473684210526316, "grad_norm": 35.85008239746094, "learning_rate": 0.0001, "loss": 1.0106, "step": 220 }, { "epoch": 1.513157894736842, "grad_norm": 29.738868713378906, "learning_rate": 0.0001, "loss": 0.9601, "step": 230 }, { "epoch": 1.5789473684210527, "grad_norm": 29.869279861450195, "learning_rate": 0.0001, "loss": 0.9334, "step": 240 }, { "epoch": 1.6447368421052633, "grad_norm": 23.091772079467773, "learning_rate": 0.0001, "loss": 0.8842, "step": 250 }, { "epoch": 1.7105263157894737, "grad_norm": 31.321971893310547, "learning_rate": 0.0001, "loss": 0.8304, "step": 260 }, { "epoch": 1.776315789473684, "grad_norm": 31.007795333862305, "learning_rate": 0.0001, "loss": 0.7835, "step": 270 }, { "epoch": 1.8421052631578947, "grad_norm": 23.761444091796875, "learning_rate": 0.0001, "loss": 0.741, "step": 280 }, { "epoch": 1.9078947368421053, "grad_norm": 29.322738647460938, "learning_rate": 0.0001, "loss": 0.7034, "step": 290 }, { "epoch": 1.973684210526316, "grad_norm": 24.82952308654785, "learning_rate": 0.0001, "loss": 0.6807, "step": 300 }, { "epoch": 2.039473684210526, "grad_norm": 20.884836196899414, "learning_rate": 0.0001, "loss": 0.6234, "step": 310 }, { "epoch": 2.1052631578947367, "grad_norm": 22.192235946655273, "learning_rate": 0.0001, "loss": 0.5987, "step": 320 }, { "epoch": 2.1710526315789473, "grad_norm": 22.435510635375977, "learning_rate": 0.0001, "loss": 0.5711, "step": 330 }, { "epoch": 2.236842105263158, "grad_norm": 20.451364517211914, "learning_rate": 0.0001, "loss": 0.5399, "step": 340 }, { "epoch": 2.3026315789473686, "grad_norm": 22.708984375, "learning_rate": 0.0001, "loss": 0.5199, "step": 350 }, { "epoch": 2.3684210526315788, "grad_norm": 20.40320587158203, "learning_rate": 0.0001, "loss": 0.4973, "step": 360 }, { "epoch": 2.4342105263157894, "grad_norm": 17.256328582763672, "learning_rate": 0.0001, "loss": 0.4806, "step": 370 }, { "epoch": 2.5, "grad_norm": 19.842121124267578, "learning_rate": 0.0001, "loss": 0.4759, "step": 380 }, { "epoch": 2.5657894736842106, "grad_norm": 17.68121337890625, "learning_rate": 0.0001, "loss": 0.4528, "step": 390 }, { "epoch": 2.6315789473684212, "grad_norm": 19.98098373413086, "learning_rate": 0.0001, "loss": 0.4489, "step": 400 }, { "epoch": 2.6973684210526314, "grad_norm": 18.062952041625977, "learning_rate": 0.0001, "loss": 0.4187, "step": 410 }, { "epoch": 2.763157894736842, "grad_norm": 15.641585350036621, "learning_rate": 0.0001, "loss": 0.4068, "step": 420 }, { "epoch": 2.8289473684210527, "grad_norm": 16.796161651611328, "learning_rate": 0.0001, "loss": 0.3884, "step": 430 }, { "epoch": 2.8947368421052633, "grad_norm": 15.872922897338867, "learning_rate": 0.0001, "loss": 0.3997, "step": 440 }, { "epoch": 2.9605263157894735, "grad_norm": 15.037050247192383, "learning_rate": 0.0001, "loss": 0.3687, "step": 450 }, { "epoch": 3.026315789473684, "grad_norm": 18.485353469848633, "learning_rate": 0.0001, "loss": 0.3606, "step": 460 }, { "epoch": 3.0921052631578947, "grad_norm": 17.94466209411621, "learning_rate": 0.0001, "loss": 0.3568, "step": 470 }, { "epoch": 3.1578947368421053, "grad_norm": 15.609881401062012, "learning_rate": 0.0001, "loss": 0.3631, "step": 480 }, { "epoch": 3.223684210526316, "grad_norm": 15.842222213745117, "learning_rate": 0.0001, "loss": 0.3499, "step": 490 }, { "epoch": 3.2894736842105265, "grad_norm": 14.633220672607422, "learning_rate": 0.0001, "loss": 0.3442, "step": 500 }, { "epoch": 3.3552631578947367, "grad_norm": 16.64904022216797, "learning_rate": 0.0001, "loss": 0.3311, "step": 510 }, { "epoch": 3.4210526315789473, "grad_norm": 15.60223388671875, "learning_rate": 0.0001, "loss": 0.3356, "step": 520 }, { "epoch": 3.486842105263158, "grad_norm": 14.84737491607666, "learning_rate": 0.0001, "loss": 0.3172, "step": 530 }, { "epoch": 3.5526315789473686, "grad_norm": 15.952341079711914, "learning_rate": 0.0001, "loss": 0.3196, "step": 540 }, { "epoch": 3.6184210526315788, "grad_norm": 14.867337226867676, "learning_rate": 0.0001, "loss": 0.3137, "step": 550 }, { "epoch": 3.6842105263157894, "grad_norm": 12.948201179504395, "learning_rate": 0.0001, "loss": 0.2949, "step": 560 }, { "epoch": 3.75, "grad_norm": 13.009255409240723, "learning_rate": 0.0001, "loss": 0.2955, "step": 570 }, { "epoch": 3.8157894736842106, "grad_norm": 13.705317497253418, "learning_rate": 0.0001, "loss": 0.2905, "step": 580 }, { "epoch": 3.8815789473684212, "grad_norm": 17.22597885131836, "learning_rate": 0.0001, "loss": 0.2974, "step": 590 }, { "epoch": 3.9473684210526314, "grad_norm": 14.919693946838379, "learning_rate": 0.0001, "loss": 0.2898, "step": 600 }, { "epoch": 4.0131578947368425, "grad_norm": 18.780914306640625, "learning_rate": 0.0001, "loss": 0.2816, "step": 610 }, { "epoch": 4.078947368421052, "grad_norm": 16.027746200561523, "learning_rate": 0.0001, "loss": 0.2761, "step": 620 }, { "epoch": 4.144736842105263, "grad_norm": 13.083127975463867, "learning_rate": 0.0001, "loss": 0.2688, "step": 630 }, { "epoch": 4.2105263157894735, "grad_norm": 13.55245590209961, "learning_rate": 0.0001, "loss": 0.2667, "step": 640 }, { "epoch": 4.276315789473684, "grad_norm": 11.151076316833496, "learning_rate": 0.0001, "loss": 0.2677, "step": 650 }, { "epoch": 4.342105263157895, "grad_norm": 12.883306503295898, "learning_rate": 0.0001, "loss": 0.2619, "step": 660 }, { "epoch": 4.407894736842105, "grad_norm": 13.993839263916016, "learning_rate": 0.0001, "loss": 0.2626, "step": 670 }, { "epoch": 4.473684210526316, "grad_norm": 13.596793174743652, "learning_rate": 0.0001, "loss": 0.253, "step": 680 }, { "epoch": 4.5394736842105265, "grad_norm": 12.375000953674316, "learning_rate": 0.0001, "loss": 0.2449, "step": 690 }, { "epoch": 4.605263157894737, "grad_norm": 12.333367347717285, "learning_rate": 0.0001, "loss": 0.2449, "step": 700 }, { "epoch": 4.671052631578947, "grad_norm": 14.13685131072998, "learning_rate": 0.0001, "loss": 0.2363, "step": 710 }, { "epoch": 4.7368421052631575, "grad_norm": 12.23291015625, "learning_rate": 0.0001, "loss": 0.2374, "step": 720 }, { "epoch": 4.802631578947368, "grad_norm": 11.502880096435547, "learning_rate": 0.0001, "loss": 0.2289, "step": 730 }, { "epoch": 4.868421052631579, "grad_norm": 10.85533332824707, "learning_rate": 0.0001, "loss": 0.2266, "step": 740 }, { "epoch": 4.934210526315789, "grad_norm": 12.013697624206543, "learning_rate": 0.0001, "loss": 0.2188, "step": 750 }, { "epoch": 5.0, "grad_norm": 13.170636177062988, "learning_rate": 0.0001, "loss": 0.2321, "step": 760 }, { "epoch": 5.065789473684211, "grad_norm": 12.095258712768555, "learning_rate": 0.0001, "loss": 0.2229, "step": 770 }, { "epoch": 5.131578947368421, "grad_norm": 10.180475234985352, "learning_rate": 0.0001, "loss": 0.2231, "step": 780 }, { "epoch": 5.197368421052632, "grad_norm": 13.165190696716309, "learning_rate": 0.0001, "loss": 0.223, "step": 790 }, { "epoch": 5.2631578947368425, "grad_norm": 10.063823699951172, "learning_rate": 0.0001, "loss": 0.2118, "step": 800 }, { "epoch": 5.328947368421053, "grad_norm": 12.834861755371094, "learning_rate": 0.0001, "loss": 0.217, "step": 810 }, { "epoch": 5.394736842105263, "grad_norm": 12.119063377380371, "learning_rate": 0.0001, "loss": 0.2053, "step": 820 }, { "epoch": 5.4605263157894735, "grad_norm": 12.132641792297363, "learning_rate": 0.0001, "loss": 0.206, "step": 830 }, { "epoch": 5.526315789473684, "grad_norm": 11.347237586975098, "learning_rate": 0.0001, "loss": 0.201, "step": 840 }, { "epoch": 5.592105263157895, "grad_norm": 11.609687805175781, "learning_rate": 0.0001, "loss": 0.2039, "step": 850 }, { "epoch": 5.657894736842105, "grad_norm": 10.659272193908691, "learning_rate": 0.0001, "loss": 0.203, "step": 860 }, { "epoch": 5.723684210526316, "grad_norm": 12.028623580932617, "learning_rate": 0.0001, "loss": 0.21, "step": 870 }, { "epoch": 5.7894736842105265, "grad_norm": 10.963968276977539, "learning_rate": 0.0001, "loss": 0.2019, "step": 880 }, { "epoch": 5.855263157894737, "grad_norm": 10.573838233947754, "learning_rate": 0.0001, "loss": 0.206, "step": 890 }, { "epoch": 5.921052631578947, "grad_norm": 10.971415519714355, "learning_rate": 0.0001, "loss": 0.1975, "step": 900 }, { "epoch": 5.9868421052631575, "grad_norm": 12.391592025756836, "learning_rate": 0.0001, "loss": 0.2002, "step": 910 }, { "epoch": 6.052631578947368, "grad_norm": 11.677957534790039, "learning_rate": 0.0001, "loss": 0.1949, "step": 920 }, { "epoch": 6.118421052631579, "grad_norm": 9.757022857666016, "learning_rate": 0.0001, "loss": 0.1887, "step": 930 }, { "epoch": 6.184210526315789, "grad_norm": 10.96141529083252, "learning_rate": 0.0001, "loss": 0.1888, "step": 940 }, { "epoch": 6.25, "grad_norm": 11.207174301147461, "learning_rate": 0.0001, "loss": 0.1881, "step": 950 }, { "epoch": 6.315789473684211, "grad_norm": 9.526222229003906, "learning_rate": 0.0001, "loss": 0.19, "step": 960 }, { "epoch": 6.381578947368421, "grad_norm": 10.266683578491211, "learning_rate": 0.0001, "loss": 0.18, "step": 970 }, { "epoch": 6.447368421052632, "grad_norm": 10.702817916870117, "learning_rate": 0.0001, "loss": 0.1877, "step": 980 }, { "epoch": 6.5131578947368425, "grad_norm": 9.538935661315918, "learning_rate": 0.0001, "loss": 0.1879, "step": 990 }, { "epoch": 6.578947368421053, "grad_norm": 9.56125545501709, "learning_rate": 0.0001, "loss": 0.1785, "step": 1000 }, { "epoch": 6.644736842105263, "grad_norm": 9.874727249145508, "learning_rate": 0.0001, "loss": 0.1856, "step": 1010 }, { "epoch": 6.7105263157894735, "grad_norm": 9.713470458984375, "learning_rate": 0.0001, "loss": 0.1788, "step": 1020 }, { "epoch": 6.776315789473684, "grad_norm": 9.037981986999512, "learning_rate": 0.0001, "loss": 0.1694, "step": 1030 }, { "epoch": 6.842105263157895, "grad_norm": 10.102965354919434, "learning_rate": 0.0001, "loss": 0.1689, "step": 1040 }, { "epoch": 6.907894736842105, "grad_norm": 9.202942848205566, "learning_rate": 0.0001, "loss": 0.18, "step": 1050 }, { "epoch": 6.973684210526316, "grad_norm": 10.504417419433594, "learning_rate": 0.0001, "loss": 0.1721, "step": 1060 }, { "epoch": 7.0394736842105265, "grad_norm": 8.70566463470459, "learning_rate": 0.0001, "loss": 0.1724, "step": 1070 }, { "epoch": 7.105263157894737, "grad_norm": 10.730690956115723, "learning_rate": 0.0001, "loss": 0.1737, "step": 1080 }, { "epoch": 7.171052631578948, "grad_norm": 9.544865608215332, "learning_rate": 0.0001, "loss": 0.1752, "step": 1090 }, { "epoch": 7.2368421052631575, "grad_norm": 10.796236038208008, "learning_rate": 0.0001, "loss": 0.174, "step": 1100 }, { "epoch": 7.302631578947368, "grad_norm": 9.174856185913086, "learning_rate": 0.0001, "loss": 0.1705, "step": 1110 }, { "epoch": 7.368421052631579, "grad_norm": 10.11459732055664, "learning_rate": 0.0001, "loss": 0.1612, "step": 1120 }, { "epoch": 7.434210526315789, "grad_norm": 10.245781898498535, "learning_rate": 0.0001, "loss": 0.1604, "step": 1130 }, { "epoch": 7.5, "grad_norm": 9.092036247253418, "learning_rate": 0.0001, "loss": 0.1582, "step": 1140 }, { "epoch": 7.565789473684211, "grad_norm": 8.524528503417969, "learning_rate": 0.0001, "loss": 0.1528, "step": 1150 }, { "epoch": 7.631578947368421, "grad_norm": 9.26682186126709, "learning_rate": 0.0001, "loss": 0.1543, "step": 1160 }, { "epoch": 7.697368421052632, "grad_norm": 10.105231285095215, "learning_rate": 0.0001, "loss": 0.1629, "step": 1170 }, { "epoch": 7.7631578947368425, "grad_norm": 9.100499153137207, "learning_rate": 0.0001, "loss": 0.1575, "step": 1180 }, { "epoch": 7.828947368421053, "grad_norm": 7.819246768951416, "learning_rate": 0.0001, "loss": 0.1586, "step": 1190 }, { "epoch": 7.894736842105263, "grad_norm": 8.291773796081543, "learning_rate": 0.0001, "loss": 0.1518, "step": 1200 }, { "epoch": 7.9605263157894735, "grad_norm": 8.024109840393066, "learning_rate": 0.0001, "loss": 0.1551, "step": 1210 }, { "epoch": 8.026315789473685, "grad_norm": 8.904583930969238, "learning_rate": 0.0001, "loss": 0.1506, "step": 1220 }, { "epoch": 8.092105263157896, "grad_norm": 7.31992769241333, "learning_rate": 0.0001, "loss": 0.1453, "step": 1230 }, { "epoch": 8.157894736842104, "grad_norm": 9.026426315307617, "learning_rate": 0.0001, "loss": 0.1496, "step": 1240 }, { "epoch": 8.223684210526315, "grad_norm": 9.364782333374023, "learning_rate": 0.0001, "loss": 0.1504, "step": 1250 }, { "epoch": 8.289473684210526, "grad_norm": 8.736329078674316, "learning_rate": 0.0001, "loss": 0.1445, "step": 1260 }, { "epoch": 8.355263157894736, "grad_norm": 11.340313911437988, "learning_rate": 0.0001, "loss": 0.1469, "step": 1270 }, { "epoch": 8.421052631578947, "grad_norm": 10.251652717590332, "learning_rate": 0.0001, "loss": 0.1559, "step": 1280 }, { "epoch": 8.486842105263158, "grad_norm": 9.239073753356934, "learning_rate": 0.0001, "loss": 0.1456, "step": 1290 }, { "epoch": 8.552631578947368, "grad_norm": 8.867827415466309, "learning_rate": 0.0001, "loss": 0.1507, "step": 1300 }, { "epoch": 8.618421052631579, "grad_norm": 9.638751983642578, "learning_rate": 0.0001, "loss": 0.1521, "step": 1310 }, { "epoch": 8.68421052631579, "grad_norm": 9.176225662231445, "learning_rate": 0.0001, "loss": 0.1474, "step": 1320 }, { "epoch": 8.75, "grad_norm": 8.754321098327637, "learning_rate": 0.0001, "loss": 0.1423, "step": 1330 }, { "epoch": 8.81578947368421, "grad_norm": 9.146409034729004, "learning_rate": 0.0001, "loss": 0.1397, "step": 1340 }, { "epoch": 8.881578947368421, "grad_norm": 9.756692886352539, "learning_rate": 0.0001, "loss": 0.1339, "step": 1350 }, { "epoch": 8.947368421052632, "grad_norm": 8.44552230834961, "learning_rate": 0.0001, "loss": 0.1433, "step": 1360 }, { "epoch": 9.013157894736842, "grad_norm": 9.708316802978516, "learning_rate": 0.0001, "loss": 0.1443, "step": 1370 }, { "epoch": 9.078947368421053, "grad_norm": 10.184338569641113, "learning_rate": 0.0001, "loss": 0.1397, "step": 1380 }, { "epoch": 9.144736842105264, "grad_norm": 8.404060363769531, "learning_rate": 0.0001, "loss": 0.1458, "step": 1390 }, { "epoch": 9.210526315789474, "grad_norm": 6.799143314361572, "learning_rate": 0.0001, "loss": 0.1391, "step": 1400 }, { "epoch": 9.276315789473685, "grad_norm": 6.993669033050537, "learning_rate": 0.0001, "loss": 0.1356, "step": 1410 }, { "epoch": 9.342105263157896, "grad_norm": 7.0708537101745605, "learning_rate": 0.0001, "loss": 0.1348, "step": 1420 }, { "epoch": 9.407894736842104, "grad_norm": 7.106939792633057, "learning_rate": 0.0001, "loss": 0.1395, "step": 1430 }, { "epoch": 9.473684210526315, "grad_norm": 8.091696739196777, "learning_rate": 0.0001, "loss": 0.1347, "step": 1440 }, { "epoch": 9.539473684210526, "grad_norm": 7.712802410125732, "learning_rate": 0.0001, "loss": 0.136, "step": 1450 }, { "epoch": 9.605263157894736, "grad_norm": 7.227226734161377, "learning_rate": 0.0001, "loss": 0.1302, "step": 1460 }, { "epoch": 9.671052631578947, "grad_norm": 7.4794464111328125, "learning_rate": 0.0001, "loss": 0.1238, "step": 1470 }, { "epoch": 9.736842105263158, "grad_norm": 8.05570125579834, "learning_rate": 0.0001, "loss": 0.134, "step": 1480 }, { "epoch": 9.802631578947368, "grad_norm": 8.51727294921875, "learning_rate": 0.0001, "loss": 0.1262, "step": 1490 }, { "epoch": 9.868421052631579, "grad_norm": 8.46773910522461, "learning_rate": 0.0001, "loss": 0.133, "step": 1500 }, { "epoch": 9.93421052631579, "grad_norm": 8.314281463623047, "learning_rate": 0.0001, "loss": 0.1297, "step": 1510 }, { "epoch": 10.0, "grad_norm": 8.289139747619629, "learning_rate": 0.0001, "loss": 0.131, "step": 1520 }, { "epoch": 10.06578947368421, "grad_norm": 8.90967082977295, "learning_rate": 0.0001, "loss": 0.1232, "step": 1530 }, { "epoch": 10.131578947368421, "grad_norm": 8.999959945678711, "learning_rate": 0.0001, "loss": 0.1273, "step": 1540 }, { "epoch": 10.197368421052632, "grad_norm": 8.188413619995117, "learning_rate": 0.0001, "loss": 0.1263, "step": 1550 }, { "epoch": 10.263157894736842, "grad_norm": 8.14466381072998, "learning_rate": 0.0001, "loss": 0.1204, "step": 1560 }, { "epoch": 10.328947368421053, "grad_norm": 8.005374908447266, "learning_rate": 0.0001, "loss": 0.1232, "step": 1570 }, { "epoch": 10.394736842105264, "grad_norm": 8.354129791259766, "learning_rate": 0.0001, "loss": 0.1164, "step": 1580 }, { "epoch": 10.460526315789474, "grad_norm": 7.809772491455078, "learning_rate": 0.0001, "loss": 0.1256, "step": 1590 }, { "epoch": 10.526315789473685, "grad_norm": 7.232692241668701, "learning_rate": 0.0001, "loss": 0.1226, "step": 1600 }, { "epoch": 10.592105263157894, "grad_norm": 7.019472122192383, "learning_rate": 0.0001, "loss": 0.1184, "step": 1610 }, { "epoch": 10.657894736842106, "grad_norm": 6.870980262756348, "learning_rate": 0.0001, "loss": 0.1196, "step": 1620 }, { "epoch": 10.723684210526315, "grad_norm": 7.686590671539307, "learning_rate": 0.0001, "loss": 0.123, "step": 1630 }, { "epoch": 10.789473684210526, "grad_norm": 6.786712169647217, "learning_rate": 0.0001, "loss": 0.119, "step": 1640 }, { "epoch": 10.855263157894736, "grad_norm": 8.563821792602539, "learning_rate": 0.0001, "loss": 0.1186, "step": 1650 }, { "epoch": 10.921052631578947, "grad_norm": 8.261466026306152, "learning_rate": 0.0001, "loss": 0.1246, "step": 1660 }, { "epoch": 10.986842105263158, "grad_norm": 6.972947120666504, "learning_rate": 0.0001, "loss": 0.123, "step": 1670 }, { "epoch": 11.052631578947368, "grad_norm": 8.273945808410645, "learning_rate": 0.0001, "loss": 0.1213, "step": 1680 }, { "epoch": 11.118421052631579, "grad_norm": 6.411956310272217, "learning_rate": 0.0001, "loss": 0.1123, "step": 1690 }, { "epoch": 11.18421052631579, "grad_norm": 7.9852166175842285, "learning_rate": 0.0001, "loss": 0.1125, "step": 1700 }, { "epoch": 11.25, "grad_norm": 6.184416770935059, "learning_rate": 0.0001, "loss": 0.1113, "step": 1710 }, { "epoch": 11.31578947368421, "grad_norm": 7.070769786834717, "learning_rate": 0.0001, "loss": 0.115, "step": 1720 }, { "epoch": 11.381578947368421, "grad_norm": 6.914794921875, "learning_rate": 0.0001, "loss": 0.111, "step": 1730 }, { "epoch": 11.447368421052632, "grad_norm": 7.74068546295166, "learning_rate": 0.0001, "loss": 0.113, "step": 1740 }, { "epoch": 11.513157894736842, "grad_norm": 6.931029319763184, "learning_rate": 0.0001, "loss": 0.1152, "step": 1750 }, { "epoch": 11.578947368421053, "grad_norm": 5.994091510772705, "learning_rate": 0.0001, "loss": 0.1119, "step": 1760 }, { "epoch": 11.644736842105264, "grad_norm": 6.253146648406982, "learning_rate": 0.0001, "loss": 0.1093, "step": 1770 }, { "epoch": 11.710526315789474, "grad_norm": 5.8790974617004395, "learning_rate": 0.0001, "loss": 0.1122, "step": 1780 }, { "epoch": 11.776315789473685, "grad_norm": 6.187469959259033, "learning_rate": 0.0001, "loss": 0.1122, "step": 1790 }, { "epoch": 11.842105263157894, "grad_norm": 5.537177562713623, "learning_rate": 0.0001, "loss": 0.1123, "step": 1800 }, { "epoch": 11.907894736842106, "grad_norm": 7.146965503692627, "learning_rate": 0.0001, "loss": 0.1118, "step": 1810 }, { "epoch": 11.973684210526315, "grad_norm": 6.597342491149902, "learning_rate": 0.0001, "loss": 0.1053, "step": 1820 }, { "epoch": 12.039473684210526, "grad_norm": 6.711949825286865, "learning_rate": 0.0001, "loss": 0.1098, "step": 1830 }, { "epoch": 12.105263157894736, "grad_norm": 5.853811264038086, "learning_rate": 0.0001, "loss": 0.1072, "step": 1840 }, { "epoch": 12.171052631578947, "grad_norm": 5.994247913360596, "learning_rate": 0.0001, "loss": 0.1058, "step": 1850 }, { "epoch": 12.236842105263158, "grad_norm": 4.701684951782227, "learning_rate": 0.0001, "loss": 0.1044, "step": 1860 }, { "epoch": 12.302631578947368, "grad_norm": 6.289170742034912, "learning_rate": 0.0001, "loss": 0.1025, "step": 1870 }, { "epoch": 12.368421052631579, "grad_norm": 4.761482238769531, "learning_rate": 0.0001, "loss": 0.1028, "step": 1880 }, { "epoch": 12.43421052631579, "grad_norm": 7.5639448165893555, "learning_rate": 0.0001, "loss": 0.1028, "step": 1890 }, { "epoch": 12.5, "grad_norm": 5.687307357788086, "learning_rate": 0.0001, "loss": 0.1056, "step": 1900 }, { "epoch": 12.56578947368421, "grad_norm": 6.268471717834473, "learning_rate": 0.0001, "loss": 0.0987, "step": 1910 }, { "epoch": 12.631578947368421, "grad_norm": 7.3990254402160645, "learning_rate": 0.0001, "loss": 0.0981, "step": 1920 }, { "epoch": 12.697368421052632, "grad_norm": 6.721970558166504, "learning_rate": 0.0001, "loss": 0.1024, "step": 1930 }, { "epoch": 12.763157894736842, "grad_norm": 6.922434329986572, "learning_rate": 0.0001, "loss": 0.1034, "step": 1940 }, { "epoch": 12.828947368421053, "grad_norm": 6.5644097328186035, "learning_rate": 0.0001, "loss": 0.1012, "step": 1950 }, { "epoch": 12.894736842105264, "grad_norm": 5.8850908279418945, "learning_rate": 0.0001, "loss": 0.1011, "step": 1960 }, { "epoch": 12.960526315789474, "grad_norm": 7.03394889831543, "learning_rate": 0.0001, "loss": 0.1031, "step": 1970 }, { "epoch": 13.026315789473685, "grad_norm": 7.228178024291992, "learning_rate": 0.0001, "loss": 0.1021, "step": 1980 }, { "epoch": 13.092105263157896, "grad_norm": 6.708334922790527, "learning_rate": 0.0001, "loss": 0.1003, "step": 1990 }, { "epoch": 13.157894736842104, "grad_norm": 6.730384826660156, "learning_rate": 0.0001, "loss": 0.1034, "step": 2000 }, { "epoch": 13.223684210526315, "grad_norm": 6.330638408660889, "learning_rate": 0.0001, "loss": 0.0981, "step": 2010 }, { "epoch": 13.289473684210526, "grad_norm": 6.243671417236328, "learning_rate": 0.0001, "loss": 0.1005, "step": 2020 }, { "epoch": 13.355263157894736, "grad_norm": 7.014003276824951, "learning_rate": 0.0001, "loss": 0.0937, "step": 2030 }, { "epoch": 13.421052631578947, "grad_norm": 6.188398361206055, "learning_rate": 0.0001, "loss": 0.0967, "step": 2040 }, { "epoch": 13.486842105263158, "grad_norm": 6.1966938972473145, "learning_rate": 0.0001, "loss": 0.1026, "step": 2050 }, { "epoch": 13.552631578947368, "grad_norm": 5.604138374328613, "learning_rate": 0.0001, "loss": 0.1022, "step": 2060 }, { "epoch": 13.618421052631579, "grad_norm": 6.079825401306152, "learning_rate": 0.0001, "loss": 0.096, "step": 2070 }, { "epoch": 13.68421052631579, "grad_norm": 6.047208786010742, "learning_rate": 0.0001, "loss": 0.0943, "step": 2080 }, { "epoch": 13.75, "grad_norm": 6.325393199920654, "learning_rate": 0.0001, "loss": 0.0975, "step": 2090 }, { "epoch": 13.81578947368421, "grad_norm": 5.832712650299072, "learning_rate": 0.0001, "loss": 0.0944, "step": 2100 }, { "epoch": 13.881578947368421, "grad_norm": 5.263245105743408, "learning_rate": 0.0001, "loss": 0.0933, "step": 2110 }, { "epoch": 13.947368421052632, "grad_norm": 5.7324934005737305, "learning_rate": 0.0001, "loss": 0.0919, "step": 2120 }, { "epoch": 14.013157894736842, "grad_norm": 4.923053741455078, "learning_rate": 0.0001, "loss": 0.0895, "step": 2130 }, { "epoch": 14.078947368421053, "grad_norm": 5.948666095733643, "learning_rate": 0.0001, "loss": 0.0914, "step": 2140 }, { "epoch": 14.144736842105264, "grad_norm": 5.863275051116943, "learning_rate": 0.0001, "loss": 0.0898, "step": 2150 }, { "epoch": 14.210526315789474, "grad_norm": 5.876058101654053, "learning_rate": 0.0001, "loss": 0.0926, "step": 2160 }, { "epoch": 14.276315789473685, "grad_norm": 5.606838703155518, "learning_rate": 0.0001, "loss": 0.0927, "step": 2170 }, { "epoch": 14.342105263157896, "grad_norm": 5.575480937957764, "learning_rate": 0.0001, "loss": 0.0929, "step": 2180 }, { "epoch": 14.407894736842104, "grad_norm": 5.6978440284729, "learning_rate": 0.0001, "loss": 0.0933, "step": 2190 }, { "epoch": 14.473684210526315, "grad_norm": 6.567765712738037, "learning_rate": 0.0001, "loss": 0.09, "step": 2200 }, { "epoch": 14.539473684210526, "grad_norm": 6.093446254730225, "learning_rate": 0.0001, "loss": 0.0933, "step": 2210 }, { "epoch": 14.605263157894736, "grad_norm": 5.184361457824707, "learning_rate": 0.0001, "loss": 0.0942, "step": 2220 }, { "epoch": 14.671052631578947, "grad_norm": 5.555819511413574, "learning_rate": 0.0001, "loss": 0.086, "step": 2230 }, { "epoch": 14.736842105263158, "grad_norm": 5.339320182800293, "learning_rate": 0.0001, "loss": 0.0918, "step": 2240 }, { "epoch": 14.802631578947368, "grad_norm": 5.596290588378906, "learning_rate": 0.0001, "loss": 0.0912, "step": 2250 }, { "epoch": 14.868421052631579, "grad_norm": 4.995247840881348, "learning_rate": 0.0001, "loss": 0.0881, "step": 2260 }, { "epoch": 14.93421052631579, "grad_norm": 5.6531548500061035, "learning_rate": 0.0001, "loss": 0.0871, "step": 2270 }, { "epoch": 15.0, "grad_norm": 5.662561893463135, "learning_rate": 0.0001, "loss": 0.0852, "step": 2280 }, { "epoch": 15.06578947368421, "grad_norm": 5.818594932556152, "learning_rate": 0.0001, "loss": 0.087, "step": 2290 }, { "epoch": 15.131578947368421, "grad_norm": 6.480447769165039, "learning_rate": 0.0001, "loss": 0.0856, "step": 2300 }, { "epoch": 15.197368421052632, "grad_norm": 5.37237548828125, "learning_rate": 0.0001, "loss": 0.0855, "step": 2310 }, { "epoch": 15.263157894736842, "grad_norm": 5.8733696937561035, "learning_rate": 0.0001, "loss": 0.0835, "step": 2320 }, { "epoch": 15.328947368421053, "grad_norm": 6.209512710571289, "learning_rate": 0.0001, "loss": 0.0856, "step": 2330 }, { "epoch": 15.394736842105264, "grad_norm": 6.127361297607422, "learning_rate": 0.0001, "loss": 0.0864, "step": 2340 }, { "epoch": 15.460526315789474, "grad_norm": 5.621913433074951, "learning_rate": 0.0001, "loss": 0.0818, "step": 2350 }, { "epoch": 15.526315789473685, "grad_norm": 5.90310525894165, "learning_rate": 0.0001, "loss": 0.0811, "step": 2360 }, { "epoch": 15.592105263157894, "grad_norm": 5.0243239402771, "learning_rate": 0.0001, "loss": 0.0819, "step": 2370 }, { "epoch": 15.657894736842106, "grad_norm": 6.154341697692871, "learning_rate": 0.0001, "loss": 0.0839, "step": 2380 }, { "epoch": 15.723684210526315, "grad_norm": 5.303261756896973, "learning_rate": 0.0001, "loss": 0.0798, "step": 2390 }, { "epoch": 15.789473684210526, "grad_norm": 5.788863658905029, "learning_rate": 0.0001, "loss": 0.0804, "step": 2400 }, { "epoch": 15.855263157894736, "grad_norm": 5.377126693725586, "learning_rate": 0.0001, "loss": 0.0812, "step": 2410 }, { "epoch": 15.921052631578947, "grad_norm": 5.7089972496032715, "learning_rate": 0.0001, "loss": 0.0777, "step": 2420 }, { "epoch": 15.986842105263158, "grad_norm": 5.824652671813965, "learning_rate": 0.0001, "loss": 0.0834, "step": 2430 }, { "epoch": 16.05263157894737, "grad_norm": 4.787572383880615, "learning_rate": 0.0001, "loss": 0.078, "step": 2440 }, { "epoch": 16.11842105263158, "grad_norm": 6.0126800537109375, "learning_rate": 0.0001, "loss": 0.0794, "step": 2450 }, { "epoch": 16.18421052631579, "grad_norm": 6.0084919929504395, "learning_rate": 0.0001, "loss": 0.08, "step": 2460 }, { "epoch": 16.25, "grad_norm": 5.7310380935668945, "learning_rate": 0.0001, "loss": 0.0795, "step": 2470 }, { "epoch": 16.31578947368421, "grad_norm": 5.645249366760254, "learning_rate": 0.0001, "loss": 0.0772, "step": 2480 }, { "epoch": 16.38157894736842, "grad_norm": 4.670828342437744, "learning_rate": 0.0001, "loss": 0.0838, "step": 2490 }, { "epoch": 16.44736842105263, "grad_norm": 4.980633735656738, "learning_rate": 0.0001, "loss": 0.0772, "step": 2500 }, { "epoch": 16.513157894736842, "grad_norm": 4.4033660888671875, "learning_rate": 0.0001, "loss": 0.0819, "step": 2510 }, { "epoch": 16.57894736842105, "grad_norm": 5.503461837768555, "learning_rate": 0.0001, "loss": 0.0792, "step": 2520 }, { "epoch": 16.644736842105264, "grad_norm": 4.978002071380615, "learning_rate": 0.0001, "loss": 0.0797, "step": 2530 }, { "epoch": 16.710526315789473, "grad_norm": 4.877954006195068, "learning_rate": 0.0001, "loss": 0.0792, "step": 2540 }, { "epoch": 16.776315789473685, "grad_norm": 4.718324184417725, "learning_rate": 0.0001, "loss": 0.0781, "step": 2550 }, { "epoch": 16.842105263157894, "grad_norm": 4.839942455291748, "learning_rate": 0.0001, "loss": 0.0768, "step": 2560 }, { "epoch": 16.907894736842106, "grad_norm": 4.971443176269531, "learning_rate": 0.0001, "loss": 0.081, "step": 2570 }, { "epoch": 16.973684210526315, "grad_norm": 6.21569299697876, "learning_rate": 0.0001, "loss": 0.0835, "step": 2580 }, { "epoch": 17.039473684210527, "grad_norm": 5.315542697906494, "learning_rate": 0.0001, "loss": 0.0771, "step": 2590 }, { "epoch": 17.105263157894736, "grad_norm": 5.427717685699463, "learning_rate": 0.0001, "loss": 0.0807, "step": 2600 }, { "epoch": 17.17105263157895, "grad_norm": 4.329200267791748, "learning_rate": 0.0001, "loss": 0.0737, "step": 2610 }, { "epoch": 17.236842105263158, "grad_norm": 4.520540714263916, "learning_rate": 0.0001, "loss": 0.0826, "step": 2620 }, { "epoch": 17.30263157894737, "grad_norm": 4.716585636138916, "learning_rate": 0.0001, "loss": 0.0765, "step": 2630 }, { "epoch": 17.36842105263158, "grad_norm": 5.242930889129639, "learning_rate": 0.0001, "loss": 0.0739, "step": 2640 }, { "epoch": 17.43421052631579, "grad_norm": 4.814362525939941, "learning_rate": 0.0001, "loss": 0.0766, "step": 2650 }, { "epoch": 17.5, "grad_norm": 4.858095645904541, "learning_rate": 0.0001, "loss": 0.0761, "step": 2660 }, { "epoch": 17.56578947368421, "grad_norm": 4.718443393707275, "learning_rate": 0.0001, "loss": 0.073, "step": 2670 }, { "epoch": 17.63157894736842, "grad_norm": 4.559201240539551, "learning_rate": 0.0001, "loss": 0.0749, "step": 2680 }, { "epoch": 17.69736842105263, "grad_norm": 3.9688594341278076, "learning_rate": 0.0001, "loss": 0.0782, "step": 2690 }, { "epoch": 17.763157894736842, "grad_norm": 4.480958938598633, "learning_rate": 0.0001, "loss": 0.0776, "step": 2700 }, { "epoch": 17.82894736842105, "grad_norm": 5.901112079620361, "learning_rate": 0.0001, "loss": 0.0756, "step": 2710 }, { "epoch": 17.894736842105264, "grad_norm": 5.043224811553955, "learning_rate": 0.0001, "loss": 0.0784, "step": 2720 }, { "epoch": 17.960526315789473, "grad_norm": 4.380178451538086, "learning_rate": 0.0001, "loss": 0.0725, "step": 2730 }, { "epoch": 18.026315789473685, "grad_norm": 3.8729310035705566, "learning_rate": 0.0001, "loss": 0.0751, "step": 2740 }, { "epoch": 18.092105263157894, "grad_norm": 4.4962639808654785, "learning_rate": 0.0001, "loss": 0.0727, "step": 2750 }, { "epoch": 18.157894736842106, "grad_norm": 4.605258941650391, "learning_rate": 0.0001, "loss": 0.0745, "step": 2760 }, { "epoch": 18.223684210526315, "grad_norm": 4.574273586273193, "learning_rate": 0.0001, "loss": 0.0758, "step": 2770 }, { "epoch": 18.289473684210527, "grad_norm": 4.572812557220459, "learning_rate": 0.0001, "loss": 0.0755, "step": 2780 }, { "epoch": 18.355263157894736, "grad_norm": 4.3042378425598145, "learning_rate": 0.0001, "loss": 0.0707, "step": 2790 }, { "epoch": 18.42105263157895, "grad_norm": 4.969744682312012, "learning_rate": 0.0001, "loss": 0.0726, "step": 2800 }, { "epoch": 18.486842105263158, "grad_norm": 5.771834850311279, "learning_rate": 0.0001, "loss": 0.0723, "step": 2810 }, { "epoch": 18.55263157894737, "grad_norm": 4.393187046051025, "learning_rate": 0.0001, "loss": 0.0732, "step": 2820 }, { "epoch": 18.61842105263158, "grad_norm": 4.077311038970947, "learning_rate": 0.0001, "loss": 0.0703, "step": 2830 }, { "epoch": 18.68421052631579, "grad_norm": 3.5009090900421143, "learning_rate": 0.0001, "loss": 0.0705, "step": 2840 }, { "epoch": 18.75, "grad_norm": 4.456014156341553, "learning_rate": 0.0001, "loss": 0.0685, "step": 2850 }, { "epoch": 18.81578947368421, "grad_norm": 4.8032307624816895, "learning_rate": 0.0001, "loss": 0.0707, "step": 2860 }, { "epoch": 18.88157894736842, "grad_norm": 4.6024603843688965, "learning_rate": 0.0001, "loss": 0.0754, "step": 2870 }, { "epoch": 18.94736842105263, "grad_norm": 5.125010013580322, "learning_rate": 0.0001, "loss": 0.0707, "step": 2880 }, { "epoch": 19.013157894736842, "grad_norm": 4.631536483764648, "learning_rate": 0.0001, "loss": 0.0743, "step": 2890 }, { "epoch": 19.07894736842105, "grad_norm": 4.642434120178223, "learning_rate": 0.0001, "loss": 0.0679, "step": 2900 }, { "epoch": 19.144736842105264, "grad_norm": 4.4804911613464355, "learning_rate": 0.0001, "loss": 0.0688, "step": 2910 }, { "epoch": 19.210526315789473, "grad_norm": 3.7083773612976074, "learning_rate": 0.0001, "loss": 0.0698, "step": 2920 }, { "epoch": 19.276315789473685, "grad_norm": 4.256147384643555, "learning_rate": 0.0001, "loss": 0.0725, "step": 2930 }, { "epoch": 19.342105263157894, "grad_norm": 4.125244140625, "learning_rate": 0.0001, "loss": 0.0674, "step": 2940 }, { "epoch": 19.407894736842106, "grad_norm": 4.567220211029053, "learning_rate": 0.0001, "loss": 0.0683, "step": 2950 }, { "epoch": 19.473684210526315, "grad_norm": 4.298739433288574, "learning_rate": 0.0001, "loss": 0.0674, "step": 2960 }, { "epoch": 19.539473684210527, "grad_norm": 4.303213596343994, "learning_rate": 0.0001, "loss": 0.0704, "step": 2970 }, { "epoch": 19.605263157894736, "grad_norm": 4.842376232147217, "learning_rate": 0.0001, "loss": 0.0687, "step": 2980 }, { "epoch": 19.67105263157895, "grad_norm": 4.336226940155029, "learning_rate": 0.0001, "loss": 0.0693, "step": 2990 }, { "epoch": 19.736842105263158, "grad_norm": 4.425253868103027, "learning_rate": 0.0001, "loss": 0.0674, "step": 3000 }, { "epoch": 19.80263157894737, "grad_norm": 4.074360370635986, "learning_rate": 0.0001, "loss": 0.0622, "step": 3010 }, { "epoch": 19.86842105263158, "grad_norm": 4.191122531890869, "learning_rate": 0.0001, "loss": 0.0692, "step": 3020 }, { "epoch": 19.93421052631579, "grad_norm": 4.672008514404297, "learning_rate": 0.0001, "loss": 0.0653, "step": 3030 }, { "epoch": 20.0, "grad_norm": 4.377945423126221, "learning_rate": 0.0001, "loss": 0.0627, "step": 3040 }, { "epoch": 20.06578947368421, "grad_norm": 4.234347820281982, "learning_rate": 0.0001, "loss": 0.0642, "step": 3050 }, { "epoch": 20.13157894736842, "grad_norm": 4.540591716766357, "learning_rate": 0.0001, "loss": 0.0683, "step": 3060 }, { "epoch": 20.19736842105263, "grad_norm": 4.472556114196777, "learning_rate": 0.0001, "loss": 0.0631, "step": 3070 }, { "epoch": 20.263157894736842, "grad_norm": 4.038685321807861, "learning_rate": 0.0001, "loss": 0.0646, "step": 3080 }, { "epoch": 20.32894736842105, "grad_norm": 4.310518741607666, "learning_rate": 0.0001, "loss": 0.066, "step": 3090 }, { "epoch": 20.394736842105264, "grad_norm": 3.681676149368286, "learning_rate": 0.0001, "loss": 0.0628, "step": 3100 }, { "epoch": 20.460526315789473, "grad_norm": 4.144742012023926, "learning_rate": 0.0001, "loss": 0.0632, "step": 3110 }, { "epoch": 20.526315789473685, "grad_norm": 3.517277479171753, "learning_rate": 0.0001, "loss": 0.0645, "step": 3120 }, { "epoch": 20.592105263157894, "grad_norm": 3.710193395614624, "learning_rate": 0.0001, "loss": 0.0645, "step": 3130 }, { "epoch": 20.657894736842106, "grad_norm": 4.236420631408691, "learning_rate": 0.0001, "loss": 0.0644, "step": 3140 }, { "epoch": 20.723684210526315, "grad_norm": 3.9308488368988037, "learning_rate": 0.0001, "loss": 0.0672, "step": 3150 }, { "epoch": 20.789473684210527, "grad_norm": 4.319930553436279, "learning_rate": 0.0001, "loss": 0.0642, "step": 3160 }, { "epoch": 20.855263157894736, "grad_norm": 4.539423942565918, "learning_rate": 0.0001, "loss": 0.0676, "step": 3170 }, { "epoch": 20.92105263157895, "grad_norm": 4.0434956550598145, "learning_rate": 0.0001, "loss": 0.0683, "step": 3180 }, { "epoch": 20.986842105263158, "grad_norm": 3.5958993434906006, "learning_rate": 0.0001, "loss": 0.0699, "step": 3190 }, { "epoch": 21.05263157894737, "grad_norm": 3.827382802963257, "learning_rate": 0.0001, "loss": 0.0679, "step": 3200 }, { "epoch": 21.11842105263158, "grad_norm": 4.15602445602417, "learning_rate": 0.0001, "loss": 0.0655, "step": 3210 }, { "epoch": 21.18421052631579, "grad_norm": 4.118391036987305, "learning_rate": 0.0001, "loss": 0.0676, "step": 3220 }, { "epoch": 21.25, "grad_norm": 4.27808141708374, "learning_rate": 0.0001, "loss": 0.0624, "step": 3230 }, { "epoch": 21.31578947368421, "grad_norm": 4.284823894500732, "learning_rate": 0.0001, "loss": 0.0621, "step": 3240 }, { "epoch": 21.38157894736842, "grad_norm": 3.516188621520996, "learning_rate": 0.0001, "loss": 0.0621, "step": 3250 }, { "epoch": 21.44736842105263, "grad_norm": 3.9423298835754395, "learning_rate": 0.0001, "loss": 0.0633, "step": 3260 }, { "epoch": 21.513157894736842, "grad_norm": 4.444387435913086, "learning_rate": 0.0001, "loss": 0.0624, "step": 3270 }, { "epoch": 21.57894736842105, "grad_norm": 4.271636486053467, "learning_rate": 0.0001, "loss": 0.0638, "step": 3280 }, { "epoch": 21.644736842105264, "grad_norm": 4.359874725341797, "learning_rate": 0.0001, "loss": 0.0678, "step": 3290 }, { "epoch": 21.710526315789473, "grad_norm": 4.128505229949951, "learning_rate": 0.0001, "loss": 0.0632, "step": 3300 }, { "epoch": 21.776315789473685, "grad_norm": 3.6902530193328857, "learning_rate": 0.0001, "loss": 0.0611, "step": 3310 }, { "epoch": 21.842105263157894, "grad_norm": 4.0358805656433105, "learning_rate": 0.0001, "loss": 0.059, "step": 3320 }, { "epoch": 21.907894736842106, "grad_norm": 3.965226173400879, "learning_rate": 0.0001, "loss": 0.062, "step": 3330 }, { "epoch": 21.973684210526315, "grad_norm": 3.338334083557129, "learning_rate": 0.0001, "loss": 0.0588, "step": 3340 }, { "epoch": 22.039473684210527, "grad_norm": 4.442704200744629, "learning_rate": 0.0001, "loss": 0.0591, "step": 3350 }, { "epoch": 22.105263157894736, "grad_norm": 4.312819480895996, "learning_rate": 0.0001, "loss": 0.057, "step": 3360 }, { "epoch": 22.17105263157895, "grad_norm": 3.8974449634552, "learning_rate": 0.0001, "loss": 0.0603, "step": 3370 }, { "epoch": 22.236842105263158, "grad_norm": 3.41093111038208, "learning_rate": 0.0001, "loss": 0.0557, "step": 3380 }, { "epoch": 22.30263157894737, "grad_norm": 3.5688576698303223, "learning_rate": 0.0001, "loss": 0.0561, "step": 3390 }, { "epoch": 22.36842105263158, "grad_norm": 3.9463613033294678, "learning_rate": 0.0001, "loss": 0.0595, "step": 3400 }, { "epoch": 22.43421052631579, "grad_norm": 4.227533340454102, "learning_rate": 0.0001, "loss": 0.0583, "step": 3410 }, { "epoch": 22.5, "grad_norm": 3.651512861251831, "learning_rate": 0.0001, "loss": 0.0568, "step": 3420 }, { "epoch": 22.56578947368421, "grad_norm": 3.8400089740753174, "learning_rate": 0.0001, "loss": 0.0603, "step": 3430 }, { "epoch": 22.63157894736842, "grad_norm": 3.567005157470703, "learning_rate": 0.0001, "loss": 0.0592, "step": 3440 }, { "epoch": 22.69736842105263, "grad_norm": 3.756096363067627, "learning_rate": 0.0001, "loss": 0.0565, "step": 3450 }, { "epoch": 22.763157894736842, "grad_norm": 3.8833200931549072, "learning_rate": 0.0001, "loss": 0.0575, "step": 3460 }, { "epoch": 22.82894736842105, "grad_norm": 3.5916409492492676, "learning_rate": 0.0001, "loss": 0.0604, "step": 3470 }, { "epoch": 22.894736842105264, "grad_norm": 3.6396472454071045, "learning_rate": 0.0001, "loss": 0.0621, "step": 3480 }, { "epoch": 22.960526315789473, "grad_norm": 4.026757717132568, "learning_rate": 0.0001, "loss": 0.0609, "step": 3490 }, { "epoch": 23.026315789473685, "grad_norm": 3.412045955657959, "learning_rate": 0.0001, "loss": 0.0635, "step": 3500 }, { "epoch": 23.092105263157894, "grad_norm": 3.8278250694274902, "learning_rate": 0.0001, "loss": 0.0607, "step": 3510 }, { "epoch": 23.157894736842106, "grad_norm": 4.461212635040283, "learning_rate": 0.0001, "loss": 0.0583, "step": 3520 }, { "epoch": 23.223684210526315, "grad_norm": 3.1517691612243652, "learning_rate": 0.0001, "loss": 0.0564, "step": 3530 }, { "epoch": 23.289473684210527, "grad_norm": 3.9116435050964355, "learning_rate": 0.0001, "loss": 0.0614, "step": 3540 }, { "epoch": 23.355263157894736, "grad_norm": 3.633558511734009, "learning_rate": 0.0001, "loss": 0.0562, "step": 3550 }, { "epoch": 23.42105263157895, "grad_norm": 3.1927671432495117, "learning_rate": 0.0001, "loss": 0.0592, "step": 3560 }, { "epoch": 23.486842105263158, "grad_norm": 3.9745633602142334, "learning_rate": 0.0001, "loss": 0.0603, "step": 3570 }, { "epoch": 23.55263157894737, "grad_norm": 4.017717361450195, "learning_rate": 0.0001, "loss": 0.0599, "step": 3580 }, { "epoch": 23.61842105263158, "grad_norm": 3.3736987113952637, "learning_rate": 0.0001, "loss": 0.0561, "step": 3590 }, { "epoch": 23.68421052631579, "grad_norm": 3.3666791915893555, "learning_rate": 0.0001, "loss": 0.0589, "step": 3600 }, { "epoch": 23.75, "grad_norm": 3.2723591327667236, "learning_rate": 0.0001, "loss": 0.0588, "step": 3610 }, { "epoch": 23.81578947368421, "grad_norm": 3.429962396621704, "learning_rate": 0.0001, "loss": 0.0572, "step": 3620 }, { "epoch": 23.88157894736842, "grad_norm": 4.048157691955566, "learning_rate": 0.0001, "loss": 0.0558, "step": 3630 }, { "epoch": 23.94736842105263, "grad_norm": 4.095017910003662, "learning_rate": 0.0001, "loss": 0.0532, "step": 3640 }, { "epoch": 24.013157894736842, "grad_norm": 3.7212188243865967, "learning_rate": 0.0001, "loss": 0.0527, "step": 3650 }, { "epoch": 24.07894736842105, "grad_norm": 3.424227237701416, "learning_rate": 0.0001, "loss": 0.0525, "step": 3660 }, { "epoch": 24.144736842105264, "grad_norm": 3.869912624359131, "learning_rate": 0.0001, "loss": 0.0529, "step": 3670 }, { "epoch": 24.210526315789473, "grad_norm": 3.593370199203491, "learning_rate": 0.0001, "loss": 0.0547, "step": 3680 }, { "epoch": 24.276315789473685, "grad_norm": 3.430244207382202, "learning_rate": 0.0001, "loss": 0.0546, "step": 3690 }, { "epoch": 24.342105263157894, "grad_norm": 3.3279993534088135, "learning_rate": 0.0001, "loss": 0.0561, "step": 3700 }, { "epoch": 24.407894736842106, "grad_norm": 3.5217323303222656, "learning_rate": 0.0001, "loss": 0.0588, "step": 3710 }, { "epoch": 24.473684210526315, "grad_norm": 3.2047455310821533, "learning_rate": 0.0001, "loss": 0.0528, "step": 3720 }, { "epoch": 24.539473684210527, "grad_norm": 3.5667405128479004, "learning_rate": 0.0001, "loss": 0.0565, "step": 3730 }, { "epoch": 24.605263157894736, "grad_norm": 3.1612064838409424, "learning_rate": 0.0001, "loss": 0.0575, "step": 3740 }, { "epoch": 24.67105263157895, "grad_norm": 3.379948616027832, "learning_rate": 0.0001, "loss": 0.0536, "step": 3750 }, { "epoch": 24.736842105263158, "grad_norm": 3.004784107208252, "learning_rate": 0.0001, "loss": 0.0534, "step": 3760 }, { "epoch": 24.80263157894737, "grad_norm": 3.648918867111206, "learning_rate": 0.0001, "loss": 0.054, "step": 3770 }, { "epoch": 24.86842105263158, "grad_norm": 3.0311264991760254, "learning_rate": 0.0001, "loss": 0.06, "step": 3780 }, { "epoch": 24.93421052631579, "grad_norm": 3.015455722808838, "learning_rate": 0.0001, "loss": 0.0618, "step": 3790 }, { "epoch": 25.0, "grad_norm": 3.084538698196411, "learning_rate": 0.0001, "loss": 0.0562, "step": 3800 }, { "epoch": 25.06578947368421, "grad_norm": 3.5729260444641113, "learning_rate": 0.0001, "loss": 0.056, "step": 3810 }, { "epoch": 25.13157894736842, "grad_norm": 3.5942578315734863, "learning_rate": 0.0001, "loss": 0.0569, "step": 3820 }, { "epoch": 25.19736842105263, "grad_norm": 3.8373870849609375, "learning_rate": 0.0001, "loss": 0.0578, "step": 3830 }, { "epoch": 25.263157894736842, "grad_norm": 3.1174817085266113, "learning_rate": 0.0001, "loss": 0.0539, "step": 3840 }, { "epoch": 25.32894736842105, "grad_norm": 2.9916906356811523, "learning_rate": 0.0001, "loss": 0.0545, "step": 3850 }, { "epoch": 25.394736842105264, "grad_norm": 3.7236101627349854, "learning_rate": 0.0001, "loss": 0.0531, "step": 3860 }, { "epoch": 25.460526315789473, "grad_norm": 3.196599006652832, "learning_rate": 0.0001, "loss": 0.0613, "step": 3870 }, { "epoch": 25.526315789473685, "grad_norm": 3.3245861530303955, "learning_rate": 0.0001, "loss": 0.0595, "step": 3880 }, { "epoch": 25.592105263157894, "grad_norm": 3.1229844093322754, "learning_rate": 0.0001, "loss": 0.0581, "step": 3890 }, { "epoch": 25.657894736842106, "grad_norm": 3.090834617614746, "learning_rate": 0.0001, "loss": 0.057, "step": 3900 }, { "epoch": 25.723684210526315, "grad_norm": 3.7173891067504883, "learning_rate": 0.0001, "loss": 0.0553, "step": 3910 }, { "epoch": 25.789473684210527, "grad_norm": 3.4529900550842285, "learning_rate": 0.0001, "loss": 0.06, "step": 3920 }, { "epoch": 25.855263157894736, "grad_norm": 3.41372013092041, "learning_rate": 0.0001, "loss": 0.0534, "step": 3930 }, { "epoch": 25.92105263157895, "grad_norm": 3.5578503608703613, "learning_rate": 0.0001, "loss": 0.0579, "step": 3940 }, { "epoch": 25.986842105263158, "grad_norm": 2.5233664512634277, "learning_rate": 0.0001, "loss": 0.0571, "step": 3950 }, { "epoch": 26.05263157894737, "grad_norm": 3.310196876525879, "learning_rate": 0.0001, "loss": 0.0505, "step": 3960 }, { "epoch": 26.11842105263158, "grad_norm": 3.0970399379730225, "learning_rate": 0.0001, "loss": 0.0532, "step": 3970 }, { "epoch": 26.18421052631579, "grad_norm": 2.9945688247680664, "learning_rate": 0.0001, "loss": 0.05, "step": 3980 }, { "epoch": 26.25, "grad_norm": 2.966948986053467, "learning_rate": 0.0001, "loss": 0.051, "step": 3990 }, { "epoch": 26.31578947368421, "grad_norm": 3.1473228931427, "learning_rate": 0.0001, "loss": 0.0548, "step": 4000 }, { "epoch": 26.38157894736842, "grad_norm": 3.781965494155884, "learning_rate": 0.0001, "loss": 0.0513, "step": 4010 }, { "epoch": 26.44736842105263, "grad_norm": 3.1109609603881836, "learning_rate": 0.0001, "loss": 0.0551, "step": 4020 }, { "epoch": 26.513157894736842, "grad_norm": 3.153053045272827, "learning_rate": 0.0001, "loss": 0.0601, "step": 4030 }, { "epoch": 26.57894736842105, "grad_norm": 2.897550582885742, "learning_rate": 0.0001, "loss": 0.0563, "step": 4040 }, { "epoch": 26.644736842105264, "grad_norm": 3.540456771850586, "learning_rate": 0.0001, "loss": 0.0529, "step": 4050 }, { "epoch": 26.710526315789473, "grad_norm": 3.526008129119873, "learning_rate": 0.0001, "loss": 0.0529, "step": 4060 }, { "epoch": 26.776315789473685, "grad_norm": 2.84975004196167, "learning_rate": 0.0001, "loss": 0.0546, "step": 4070 }, { "epoch": 26.842105263157894, "grad_norm": 2.8743505477905273, "learning_rate": 0.0001, "loss": 0.0516, "step": 4080 }, { "epoch": 26.907894736842106, "grad_norm": 2.8977339267730713, "learning_rate": 0.0001, "loss": 0.0531, "step": 4090 }, { "epoch": 26.973684210526315, "grad_norm": 2.9389920234680176, "learning_rate": 0.0001, "loss": 0.054, "step": 4100 }, { "epoch": 27.039473684210527, "grad_norm": 3.0356078147888184, "learning_rate": 0.0001, "loss": 0.0549, "step": 4110 }, { "epoch": 27.105263157894736, "grad_norm": 2.933394193649292, "learning_rate": 0.0001, "loss": 0.0495, "step": 4120 }, { "epoch": 27.17105263157895, "grad_norm": 3.342971086502075, "learning_rate": 0.0001, "loss": 0.0512, "step": 4130 }, { "epoch": 27.236842105263158, "grad_norm": 3.144812822341919, "learning_rate": 0.0001, "loss": 0.0526, "step": 4140 }, { "epoch": 27.30263157894737, "grad_norm": 2.595964193344116, "learning_rate": 0.0001, "loss": 0.0513, "step": 4150 }, { "epoch": 27.36842105263158, "grad_norm": 3.5423874855041504, "learning_rate": 0.0001, "loss": 0.0504, "step": 4160 }, { "epoch": 27.43421052631579, "grad_norm": 3.1405887603759766, "learning_rate": 0.0001, "loss": 0.0499, "step": 4170 }, { "epoch": 27.5, "grad_norm": 3.289807081222534, "learning_rate": 0.0001, "loss": 0.0499, "step": 4180 }, { "epoch": 27.56578947368421, "grad_norm": 3.1032474040985107, "learning_rate": 0.0001, "loss": 0.0528, "step": 4190 }, { "epoch": 27.63157894736842, "grad_norm": 2.8093903064727783, "learning_rate": 0.0001, "loss": 0.0496, "step": 4200 }, { "epoch": 27.69736842105263, "grad_norm": 2.896862268447876, "learning_rate": 0.0001, "loss": 0.0486, "step": 4210 }, { "epoch": 27.763157894736842, "grad_norm": 3.2978949546813965, "learning_rate": 0.0001, "loss": 0.0462, "step": 4220 }, { "epoch": 27.82894736842105, "grad_norm": 3.006734609603882, "learning_rate": 0.0001, "loss": 0.0477, "step": 4230 }, { "epoch": 27.894736842105264, "grad_norm": 3.1355373859405518, "learning_rate": 0.0001, "loss": 0.0491, "step": 4240 }, { "epoch": 27.960526315789473, "grad_norm": 2.898782253265381, "learning_rate": 0.0001, "loss": 0.0476, "step": 4250 }, { "epoch": 28.026315789473685, "grad_norm": 2.8229269981384277, "learning_rate": 0.0001, "loss": 0.0495, "step": 4260 }, { "epoch": 28.092105263157894, "grad_norm": 2.915052890777588, "learning_rate": 0.0001, "loss": 0.0515, "step": 4270 }, { "epoch": 28.157894736842106, "grad_norm": 3.278546094894409, "learning_rate": 0.0001, "loss": 0.0483, "step": 4280 }, { "epoch": 28.223684210526315, "grad_norm": 3.0418665409088135, "learning_rate": 0.0001, "loss": 0.046, "step": 4290 }, { "epoch": 28.289473684210527, "grad_norm": 3.2005770206451416, "learning_rate": 0.0001, "loss": 0.0459, "step": 4300 }, { "epoch": 28.355263157894736, "grad_norm": 3.2774760723114014, "learning_rate": 0.0001, "loss": 0.0503, "step": 4310 }, { "epoch": 28.42105263157895, "grad_norm": 3.155532121658325, "learning_rate": 0.0001, "loss": 0.0469, "step": 4320 }, { "epoch": 28.486842105263158, "grad_norm": 3.1832993030548096, "learning_rate": 0.0001, "loss": 0.0483, "step": 4330 }, { "epoch": 28.55263157894737, "grad_norm": 3.1676037311553955, "learning_rate": 0.0001, "loss": 0.0497, "step": 4340 }, { "epoch": 28.61842105263158, "grad_norm": 3.4129903316497803, "learning_rate": 0.0001, "loss": 0.0495, "step": 4350 }, { "epoch": 28.68421052631579, "grad_norm": 3.1655561923980713, "learning_rate": 0.0001, "loss": 0.0461, "step": 4360 }, { "epoch": 28.75, "grad_norm": 2.708021640777588, "learning_rate": 0.0001, "loss": 0.0466, "step": 4370 }, { "epoch": 28.81578947368421, "grad_norm": 2.68147873878479, "learning_rate": 0.0001, "loss": 0.0499, "step": 4380 }, { "epoch": 28.88157894736842, "grad_norm": 3.428018808364868, "learning_rate": 0.0001, "loss": 0.0489, "step": 4390 }, { "epoch": 28.94736842105263, "grad_norm": 2.94804048538208, "learning_rate": 0.0001, "loss": 0.0513, "step": 4400 }, { "epoch": 29.013157894736842, "grad_norm": 2.679884910583496, "learning_rate": 0.0001, "loss": 0.0524, "step": 4410 }, { "epoch": 29.07894736842105, "grad_norm": 2.392934560775757, "learning_rate": 0.0001, "loss": 0.0483, "step": 4420 }, { "epoch": 29.144736842105264, "grad_norm": 2.7461166381835938, "learning_rate": 0.0001, "loss": 0.0474, "step": 4430 }, { "epoch": 29.210526315789473, "grad_norm": 2.788827896118164, "learning_rate": 0.0001, "loss": 0.0489, "step": 4440 }, { "epoch": 29.276315789473685, "grad_norm": 3.0062499046325684, "learning_rate": 0.0001, "loss": 0.0495, "step": 4450 }, { "epoch": 29.342105263157894, "grad_norm": 3.0573859214782715, "learning_rate": 0.0001, "loss": 0.05, "step": 4460 }, { "epoch": 29.407894736842106, "grad_norm": 2.665456533432007, "learning_rate": 0.0001, "loss": 0.051, "step": 4470 }, { "epoch": 29.473684210526315, "grad_norm": 2.522658586502075, "learning_rate": 0.0001, "loss": 0.0467, "step": 4480 }, { "epoch": 29.539473684210527, "grad_norm": 2.600645065307617, "learning_rate": 0.0001, "loss": 0.0489, "step": 4490 }, { "epoch": 29.605263157894736, "grad_norm": 2.6609113216400146, "learning_rate": 0.0001, "loss": 0.0476, "step": 4500 }, { "epoch": 29.67105263157895, "grad_norm": 2.691256284713745, "learning_rate": 0.0001, "loss": 0.0455, "step": 4510 }, { "epoch": 29.736842105263158, "grad_norm": 2.8132810592651367, "learning_rate": 0.0001, "loss": 0.0458, "step": 4520 }, { "epoch": 29.80263157894737, "grad_norm": 2.914437770843506, "learning_rate": 0.0001, "loss": 0.0486, "step": 4530 }, { "epoch": 29.86842105263158, "grad_norm": 2.497580051422119, "learning_rate": 0.0001, "loss": 0.0462, "step": 4540 }, { "epoch": 29.93421052631579, "grad_norm": 2.6248748302459717, "learning_rate": 0.0001, "loss": 0.0456, "step": 4550 }, { "epoch": 30.0, "grad_norm": 2.8012468814849854, "learning_rate": 0.0001, "loss": 0.0457, "step": 4560 }, { "epoch": 30.06578947368421, "grad_norm": 2.505524158477783, "learning_rate": 0.0001, "loss": 0.047, "step": 4570 }, { "epoch": 30.13157894736842, "grad_norm": 2.6117448806762695, "learning_rate": 0.0001, "loss": 0.0503, "step": 4580 }, { "epoch": 30.19736842105263, "grad_norm": 2.7365365028381348, "learning_rate": 0.0001, "loss": 0.0424, "step": 4590 }, { "epoch": 30.263157894736842, "grad_norm": 2.8292315006256104, "learning_rate": 0.0001, "loss": 0.0467, "step": 4600 }, { "epoch": 30.32894736842105, "grad_norm": 2.5775272846221924, "learning_rate": 0.0001, "loss": 0.0455, "step": 4610 }, { "epoch": 30.394736842105264, "grad_norm": 2.720210313796997, "learning_rate": 0.0001, "loss": 0.0455, "step": 4620 }, { "epoch": 30.460526315789473, "grad_norm": 2.5070748329162598, "learning_rate": 0.0001, "loss": 0.0499, "step": 4630 }, { "epoch": 30.526315789473685, "grad_norm": 2.8825769424438477, "learning_rate": 0.0001, "loss": 0.0495, "step": 4640 }, { "epoch": 30.592105263157894, "grad_norm": 2.553520441055298, "learning_rate": 0.0001, "loss": 0.0458, "step": 4650 }, { "epoch": 30.657894736842106, "grad_norm": 2.818341016769409, "learning_rate": 0.0001, "loss": 0.0488, "step": 4660 }, { "epoch": 30.723684210526315, "grad_norm": 3.132817268371582, "learning_rate": 0.0001, "loss": 0.0455, "step": 4670 }, { "epoch": 30.789473684210527, "grad_norm": 2.931819200515747, "learning_rate": 0.0001, "loss": 0.0488, "step": 4680 }, { "epoch": 30.855263157894736, "grad_norm": 2.6675987243652344, "learning_rate": 0.0001, "loss": 0.0449, "step": 4690 }, { "epoch": 30.92105263157895, "grad_norm": 3.1334962844848633, "learning_rate": 0.0001, "loss": 0.0458, "step": 4700 }, { "epoch": 30.986842105263158, "grad_norm": 2.9134418964385986, "learning_rate": 0.0001, "loss": 0.0438, "step": 4710 }, { "epoch": 31.05263157894737, "grad_norm": 2.820773124694824, "learning_rate": 0.0001, "loss": 0.0437, "step": 4720 }, { "epoch": 31.11842105263158, "grad_norm": 2.4287455081939697, "learning_rate": 0.0001, "loss": 0.0434, "step": 4730 }, { "epoch": 31.18421052631579, "grad_norm": 2.476181745529175, "learning_rate": 0.0001, "loss": 0.044, "step": 4740 }, { "epoch": 31.25, "grad_norm": 2.6724979877471924, "learning_rate": 0.0001, "loss": 0.043, "step": 4750 }, { "epoch": 31.31578947368421, "grad_norm": 2.832930326461792, "learning_rate": 0.0001, "loss": 0.0455, "step": 4760 }, { "epoch": 31.38157894736842, "grad_norm": 2.3849406242370605, "learning_rate": 0.0001, "loss": 0.0481, "step": 4770 }, { "epoch": 31.44736842105263, "grad_norm": 2.4562463760375977, "learning_rate": 0.0001, "loss": 0.0444, "step": 4780 }, { "epoch": 31.513157894736842, "grad_norm": 2.278359889984131, "learning_rate": 0.0001, "loss": 0.0423, "step": 4790 }, { "epoch": 31.57894736842105, "grad_norm": 2.701538562774658, "learning_rate": 0.0001, "loss": 0.0441, "step": 4800 }, { "epoch": 31.644736842105264, "grad_norm": 3.181910276412964, "learning_rate": 0.0001, "loss": 0.0451, "step": 4810 }, { "epoch": 31.710526315789473, "grad_norm": 2.4333291053771973, "learning_rate": 0.0001, "loss": 0.0464, "step": 4820 }, { "epoch": 31.776315789473685, "grad_norm": 2.6533899307250977, "learning_rate": 0.0001, "loss": 0.0448, "step": 4830 }, { "epoch": 31.842105263157894, "grad_norm": 2.8121790885925293, "learning_rate": 0.0001, "loss": 0.0476, "step": 4840 }, { "epoch": 31.907894736842106, "grad_norm": 2.6064651012420654, "learning_rate": 0.0001, "loss": 0.0479, "step": 4850 }, { "epoch": 31.973684210526315, "grad_norm": 2.4942471981048584, "learning_rate": 0.0001, "loss": 0.0494, "step": 4860 }, { "epoch": 32.03947368421053, "grad_norm": 2.7892401218414307, "learning_rate": 0.0001, "loss": 0.0495, "step": 4870 }, { "epoch": 32.10526315789474, "grad_norm": 2.6274139881134033, "learning_rate": 0.0001, "loss": 0.0454, "step": 4880 }, { "epoch": 32.171052631578945, "grad_norm": 2.729278802871704, "learning_rate": 0.0001, "loss": 0.0437, "step": 4890 }, { "epoch": 32.23684210526316, "grad_norm": 2.4070258140563965, "learning_rate": 0.0001, "loss": 0.0448, "step": 4900 }, { "epoch": 32.30263157894737, "grad_norm": 2.6891207695007324, "learning_rate": 0.0001, "loss": 0.0415, "step": 4910 }, { "epoch": 32.36842105263158, "grad_norm": 2.2059195041656494, "learning_rate": 0.0001, "loss": 0.0466, "step": 4920 }, { "epoch": 32.43421052631579, "grad_norm": 2.6894748210906982, "learning_rate": 0.0001, "loss": 0.0456, "step": 4930 }, { "epoch": 32.5, "grad_norm": 2.526822805404663, "learning_rate": 0.0001, "loss": 0.0422, "step": 4940 }, { "epoch": 32.56578947368421, "grad_norm": 2.509107828140259, "learning_rate": 0.0001, "loss": 0.0439, "step": 4950 }, { "epoch": 32.63157894736842, "grad_norm": 2.662522792816162, "learning_rate": 0.0001, "loss": 0.0468, "step": 4960 }, { "epoch": 32.69736842105263, "grad_norm": 2.4000043869018555, "learning_rate": 0.0001, "loss": 0.0448, "step": 4970 }, { "epoch": 32.76315789473684, "grad_norm": 2.2575981616973877, "learning_rate": 0.0001, "loss": 0.0427, "step": 4980 }, { "epoch": 32.828947368421055, "grad_norm": 2.4728028774261475, "learning_rate": 0.0001, "loss": 0.0459, "step": 4990 }, { "epoch": 32.89473684210526, "grad_norm": 2.4107658863067627, "learning_rate": 0.0001, "loss": 0.0419, "step": 5000 }, { "epoch": 32.96052631578947, "grad_norm": 2.1474099159240723, "learning_rate": 0.0001, "loss": 0.045, "step": 5010 }, { "epoch": 33.026315789473685, "grad_norm": 2.5930263996124268, "learning_rate": 0.0001, "loss": 0.0428, "step": 5020 }, { "epoch": 33.0921052631579, "grad_norm": 2.652590036392212, "learning_rate": 0.0001, "loss": 0.0416, "step": 5030 }, { "epoch": 33.1578947368421, "grad_norm": 2.6589486598968506, "learning_rate": 0.0001, "loss": 0.041, "step": 5040 }, { "epoch": 33.223684210526315, "grad_norm": 2.6637887954711914, "learning_rate": 0.0001, "loss": 0.0413, "step": 5050 }, { "epoch": 33.28947368421053, "grad_norm": 2.2335708141326904, "learning_rate": 0.0001, "loss": 0.0417, "step": 5060 }, { "epoch": 33.35526315789474, "grad_norm": 2.3757381439208984, "learning_rate": 0.0001, "loss": 0.0414, "step": 5070 }, { "epoch": 33.421052631578945, "grad_norm": 2.569889545440674, "learning_rate": 0.0001, "loss": 0.0414, "step": 5080 }, { "epoch": 33.48684210526316, "grad_norm": 2.7300987243652344, "learning_rate": 0.0001, "loss": 0.0438, "step": 5090 }, { "epoch": 33.55263157894737, "grad_norm": 2.591104745864868, "learning_rate": 0.0001, "loss": 0.0446, "step": 5100 }, { "epoch": 33.61842105263158, "grad_norm": 2.4056169986724854, "learning_rate": 0.0001, "loss": 0.0443, "step": 5110 }, { "epoch": 33.68421052631579, "grad_norm": 2.4610161781311035, "learning_rate": 0.0001, "loss": 0.041, "step": 5120 }, { "epoch": 33.75, "grad_norm": 2.3633317947387695, "learning_rate": 0.0001, "loss": 0.0457, "step": 5130 }, { "epoch": 33.81578947368421, "grad_norm": 2.227242946624756, "learning_rate": 0.0001, "loss": 0.0451, "step": 5140 }, { "epoch": 33.88157894736842, "grad_norm": 2.552421808242798, "learning_rate": 0.0001, "loss": 0.0453, "step": 5150 }, { "epoch": 33.94736842105263, "grad_norm": 2.2416305541992188, "learning_rate": 0.0001, "loss": 0.0462, "step": 5160 }, { "epoch": 34.01315789473684, "grad_norm": 2.6720187664031982, "learning_rate": 0.0001, "loss": 0.0449, "step": 5170 }, { "epoch": 34.078947368421055, "grad_norm": 2.6561310291290283, "learning_rate": 0.0001, "loss": 0.0471, "step": 5180 }, { "epoch": 34.14473684210526, "grad_norm": 2.019381523132324, "learning_rate": 0.0001, "loss": 0.0406, "step": 5190 }, { "epoch": 34.21052631578947, "grad_norm": 2.435896873474121, "learning_rate": 0.0001, "loss": 0.0441, "step": 5200 }, { "epoch": 34.276315789473685, "grad_norm": 2.5385663509368896, "learning_rate": 0.0001, "loss": 0.0443, "step": 5210 }, { "epoch": 34.3421052631579, "grad_norm": 2.449340343475342, "learning_rate": 0.0001, "loss": 0.0447, "step": 5220 }, { "epoch": 34.4078947368421, "grad_norm": 2.6000161170959473, "learning_rate": 0.0001, "loss": 0.044, "step": 5230 }, { "epoch": 34.473684210526315, "grad_norm": 2.6699042320251465, "learning_rate": 0.0001, "loss": 0.0446, "step": 5240 }, { "epoch": 34.53947368421053, "grad_norm": 2.4187822341918945, "learning_rate": 0.0001, "loss": 0.0442, "step": 5250 }, { "epoch": 34.60526315789474, "grad_norm": 2.450216770172119, "learning_rate": 0.0001, "loss": 0.0413, "step": 5260 }, { "epoch": 34.671052631578945, "grad_norm": 2.4974207878112793, "learning_rate": 0.0001, "loss": 0.042, "step": 5270 }, { "epoch": 34.73684210526316, "grad_norm": 2.525740623474121, "learning_rate": 0.0001, "loss": 0.0414, "step": 5280 }, { "epoch": 34.80263157894737, "grad_norm": 2.640798568725586, "learning_rate": 0.0001, "loss": 0.0428, "step": 5290 }, { "epoch": 34.86842105263158, "grad_norm": 2.456847667694092, "learning_rate": 0.0001, "loss": 0.0441, "step": 5300 }, { "epoch": 34.93421052631579, "grad_norm": 2.523120164871216, "learning_rate": 0.0001, "loss": 0.0423, "step": 5310 }, { "epoch": 35.0, "grad_norm": 1.9672609567642212, "learning_rate": 0.0001, "loss": 0.0419, "step": 5320 }, { "epoch": 35.06578947368421, "grad_norm": 2.4729208946228027, "learning_rate": 0.0001, "loss": 0.0423, "step": 5330 }, { "epoch": 35.13157894736842, "grad_norm": 2.3115689754486084, "learning_rate": 0.0001, "loss": 0.0391, "step": 5340 }, { "epoch": 35.19736842105263, "grad_norm": 2.2606678009033203, "learning_rate": 0.0001, "loss": 0.039, "step": 5350 }, { "epoch": 35.26315789473684, "grad_norm": 2.3886468410491943, "learning_rate": 0.0001, "loss": 0.0404, "step": 5360 }, { "epoch": 35.328947368421055, "grad_norm": 2.1863503456115723, "learning_rate": 0.0001, "loss": 0.041, "step": 5370 }, { "epoch": 35.39473684210526, "grad_norm": 2.3488121032714844, "learning_rate": 0.0001, "loss": 0.0428, "step": 5380 }, { "epoch": 35.46052631578947, "grad_norm": 2.360938310623169, "learning_rate": 0.0001, "loss": 0.0409, "step": 5390 }, { "epoch": 35.526315789473685, "grad_norm": 2.5234484672546387, "learning_rate": 0.0001, "loss": 0.0428, "step": 5400 }, { "epoch": 35.5921052631579, "grad_norm": 2.612522602081299, "learning_rate": 0.0001, "loss": 0.0421, "step": 5410 }, { "epoch": 35.6578947368421, "grad_norm": 2.419776678085327, "learning_rate": 0.0001, "loss": 0.0415, "step": 5420 }, { "epoch": 35.723684210526315, "grad_norm": 2.3333466053009033, "learning_rate": 0.0001, "loss": 0.0411, "step": 5430 }, { "epoch": 35.78947368421053, "grad_norm": 2.3613359928131104, "learning_rate": 0.0001, "loss": 0.0461, "step": 5440 }, { "epoch": 35.85526315789474, "grad_norm": 1.962768316268921, "learning_rate": 0.0001, "loss": 0.0409, "step": 5450 }, { "epoch": 35.921052631578945, "grad_norm": 2.3597044944763184, "learning_rate": 0.0001, "loss": 0.0404, "step": 5460 }, { "epoch": 35.98684210526316, "grad_norm": 2.2614336013793945, "learning_rate": 0.0001, "loss": 0.044, "step": 5470 }, { "epoch": 36.05263157894737, "grad_norm": 2.350679397583008, "learning_rate": 0.0001, "loss": 0.0433, "step": 5480 }, { "epoch": 36.11842105263158, "grad_norm": 2.374591588973999, "learning_rate": 0.0001, "loss": 0.0433, "step": 5490 }, { "epoch": 36.18421052631579, "grad_norm": 2.3889739513397217, "learning_rate": 0.0001, "loss": 0.0422, "step": 5500 }, { "epoch": 36.25, "grad_norm": 2.4520390033721924, "learning_rate": 0.0001, "loss": 0.0417, "step": 5510 }, { "epoch": 36.31578947368421, "grad_norm": 2.349119186401367, "learning_rate": 0.0001, "loss": 0.0387, "step": 5520 }, { "epoch": 36.38157894736842, "grad_norm": 2.202120780944824, "learning_rate": 0.0001, "loss": 0.0401, "step": 5530 }, { "epoch": 36.44736842105263, "grad_norm": 2.2714016437530518, "learning_rate": 0.0001, "loss": 0.0379, "step": 5540 }, { "epoch": 36.51315789473684, "grad_norm": 2.2632577419281006, "learning_rate": 0.0001, "loss": 0.0381, "step": 5550 }, { "epoch": 36.578947368421055, "grad_norm": 2.608222007751465, "learning_rate": 0.0001, "loss": 0.0388, "step": 5560 }, { "epoch": 36.64473684210526, "grad_norm": 2.298335552215576, "learning_rate": 0.0001, "loss": 0.0407, "step": 5570 }, { "epoch": 36.71052631578947, "grad_norm": 2.498187303543091, "learning_rate": 0.0001, "loss": 0.0381, "step": 5580 }, { "epoch": 36.776315789473685, "grad_norm": 2.62980580329895, "learning_rate": 0.0001, "loss": 0.0393, "step": 5590 }, { "epoch": 36.8421052631579, "grad_norm": 2.6925511360168457, "learning_rate": 0.0001, "loss": 0.0376, "step": 5600 }, { "epoch": 36.9078947368421, "grad_norm": 2.493591070175171, "learning_rate": 0.0001, "loss": 0.039, "step": 5610 }, { "epoch": 36.973684210526315, "grad_norm": 2.0936026573181152, "learning_rate": 0.0001, "loss": 0.0396, "step": 5620 }, { "epoch": 37.03947368421053, "grad_norm": 2.3660683631896973, "learning_rate": 0.0001, "loss": 0.039, "step": 5630 }, { "epoch": 37.10526315789474, "grad_norm": 2.5689046382904053, "learning_rate": 0.0001, "loss": 0.0374, "step": 5640 }, { "epoch": 37.171052631578945, "grad_norm": 2.339864730834961, "learning_rate": 0.0001, "loss": 0.0373, "step": 5650 }, { "epoch": 37.23684210526316, "grad_norm": 1.9542019367218018, "learning_rate": 0.0001, "loss": 0.0375, "step": 5660 }, { "epoch": 37.30263157894737, "grad_norm": 2.236436128616333, "learning_rate": 0.0001, "loss": 0.0381, "step": 5670 }, { "epoch": 37.36842105263158, "grad_norm": 2.5332870483398438, "learning_rate": 0.0001, "loss": 0.0395, "step": 5680 }, { "epoch": 37.43421052631579, "grad_norm": 2.4465630054473877, "learning_rate": 0.0001, "loss": 0.0406, "step": 5690 }, { "epoch": 37.5, "grad_norm": 2.6095876693725586, "learning_rate": 0.0001, "loss": 0.039, "step": 5700 }, { "epoch": 37.56578947368421, "grad_norm": 2.1583876609802246, "learning_rate": 0.0001, "loss": 0.0374, "step": 5710 }, { "epoch": 37.63157894736842, "grad_norm": 2.2866389751434326, "learning_rate": 0.0001, "loss": 0.0388, "step": 5720 }, { "epoch": 37.69736842105263, "grad_norm": 2.5979535579681396, "learning_rate": 0.0001, "loss": 0.0421, "step": 5730 }, { "epoch": 37.76315789473684, "grad_norm": 2.4028756618499756, "learning_rate": 0.0001, "loss": 0.0405, "step": 5740 }, { "epoch": 37.828947368421055, "grad_norm": 2.268890619277954, "learning_rate": 0.0001, "loss": 0.0388, "step": 5750 }, { "epoch": 37.89473684210526, "grad_norm": 2.3834378719329834, "learning_rate": 0.0001, "loss": 0.0403, "step": 5760 }, { "epoch": 37.96052631578947, "grad_norm": 2.4863436222076416, "learning_rate": 0.0001, "loss": 0.0416, "step": 5770 }, { "epoch": 38.026315789473685, "grad_norm": 2.2150402069091797, "learning_rate": 0.0001, "loss": 0.0428, "step": 5780 }, { "epoch": 38.0921052631579, "grad_norm": 2.104177474975586, "learning_rate": 0.0001, "loss": 0.0407, "step": 5790 }, { "epoch": 38.1578947368421, "grad_norm": 2.027252674102783, "learning_rate": 0.0001, "loss": 0.0367, "step": 5800 }, { "epoch": 38.223684210526315, "grad_norm": 2.5156586170196533, "learning_rate": 0.0001, "loss": 0.0394, "step": 5810 }, { "epoch": 38.28947368421053, "grad_norm": 2.186910629272461, "learning_rate": 0.0001, "loss": 0.0391, "step": 5820 }, { "epoch": 38.35526315789474, "grad_norm": 2.3481905460357666, "learning_rate": 0.0001, "loss": 0.0391, "step": 5830 }, { "epoch": 38.421052631578945, "grad_norm": 2.13398814201355, "learning_rate": 0.0001, "loss": 0.0372, "step": 5840 }, { "epoch": 38.48684210526316, "grad_norm": 2.2620978355407715, "learning_rate": 0.0001, "loss": 0.0377, "step": 5850 }, { "epoch": 38.55263157894737, "grad_norm": 2.134782075881958, "learning_rate": 0.0001, "loss": 0.0385, "step": 5860 }, { "epoch": 38.61842105263158, "grad_norm": 1.88546884059906, "learning_rate": 0.0001, "loss": 0.0398, "step": 5870 }, { "epoch": 38.68421052631579, "grad_norm": 2.0278677940368652, "learning_rate": 0.0001, "loss": 0.0396, "step": 5880 }, { "epoch": 38.75, "grad_norm": 2.1986711025238037, "learning_rate": 0.0001, "loss": 0.0391, "step": 5890 }, { "epoch": 38.81578947368421, "grad_norm": 2.5093231201171875, "learning_rate": 0.0001, "loss": 0.0394, "step": 5900 }, { "epoch": 38.88157894736842, "grad_norm": 2.205291509628296, "learning_rate": 0.0001, "loss": 0.041, "step": 5910 }, { "epoch": 38.94736842105263, "grad_norm": 2.2630720138549805, "learning_rate": 0.0001, "loss": 0.042, "step": 5920 }, { "epoch": 39.01315789473684, "grad_norm": 2.1553659439086914, "learning_rate": 0.0001, "loss": 0.0405, "step": 5930 }, { "epoch": 39.078947368421055, "grad_norm": 2.4210903644561768, "learning_rate": 0.0001, "loss": 0.037, "step": 5940 }, { "epoch": 39.14473684210526, "grad_norm": 2.5638651847839355, "learning_rate": 0.0001, "loss": 0.0381, "step": 5950 }, { "epoch": 39.21052631578947, "grad_norm": 2.3888773918151855, "learning_rate": 0.0001, "loss": 0.0391, "step": 5960 }, { "epoch": 39.276315789473685, "grad_norm": 2.2515876293182373, "learning_rate": 0.0001, "loss": 0.0364, "step": 5970 }, { "epoch": 39.3421052631579, "grad_norm": 2.554680347442627, "learning_rate": 0.0001, "loss": 0.0373, "step": 5980 }, { "epoch": 39.4078947368421, "grad_norm": 2.454371690750122, "learning_rate": 0.0001, "loss": 0.0358, "step": 5990 }, { "epoch": 39.473684210526315, "grad_norm": 2.179824113845825, "learning_rate": 0.0001, "loss": 0.0363, "step": 6000 }, { "epoch": 39.53947368421053, "grad_norm": 2.5795116424560547, "learning_rate": 0.0001, "loss": 0.0376, "step": 6010 }, { "epoch": 39.60526315789474, "grad_norm": 2.0184319019317627, "learning_rate": 0.0001, "loss": 0.038, "step": 6020 }, { "epoch": 39.671052631578945, "grad_norm": 2.221646547317505, "learning_rate": 0.0001, "loss": 0.0373, "step": 6030 }, { "epoch": 39.73684210526316, "grad_norm": 2.4176130294799805, "learning_rate": 0.0001, "loss": 0.0343, "step": 6040 }, { "epoch": 39.80263157894737, "grad_norm": 2.258746385574341, "learning_rate": 0.0001, "loss": 0.0375, "step": 6050 }, { "epoch": 39.86842105263158, "grad_norm": 2.2716710567474365, "learning_rate": 0.0001, "loss": 0.036, "step": 6060 }, { "epoch": 39.93421052631579, "grad_norm": 2.2062830924987793, "learning_rate": 0.0001, "loss": 0.0341, "step": 6070 }, { "epoch": 40.0, "grad_norm": 2.2521860599517822, "learning_rate": 0.0001, "loss": 0.0364, "step": 6080 }, { "epoch": 40.06578947368421, "grad_norm": 2.0055341720581055, "learning_rate": 0.0001, "loss": 0.0347, "step": 6090 }, { "epoch": 40.13157894736842, "grad_norm": 2.273428201675415, "learning_rate": 0.0001, "loss": 0.0363, "step": 6100 }, { "epoch": 40.19736842105263, "grad_norm": 2.1523613929748535, "learning_rate": 0.0001, "loss": 0.0363, "step": 6110 }, { "epoch": 40.26315789473684, "grad_norm": 2.2679247856140137, "learning_rate": 0.0001, "loss": 0.039, "step": 6120 }, { "epoch": 40.328947368421055, "grad_norm": 2.01351261138916, "learning_rate": 0.0001, "loss": 0.0373, "step": 6130 }, { "epoch": 40.39473684210526, "grad_norm": 2.1786999702453613, "learning_rate": 0.0001, "loss": 0.0361, "step": 6140 }, { "epoch": 40.46052631578947, "grad_norm": 2.424417734146118, "learning_rate": 0.0001, "loss": 0.0347, "step": 6150 }, { "epoch": 40.526315789473685, "grad_norm": 1.980931043624878, "learning_rate": 0.0001, "loss": 0.0409, "step": 6160 }, { "epoch": 40.5921052631579, "grad_norm": 2.2606894969940186, "learning_rate": 0.0001, "loss": 0.0359, "step": 6170 }, { "epoch": 40.6578947368421, "grad_norm": 2.0380303859710693, "learning_rate": 0.0001, "loss": 0.0382, "step": 6180 }, { "epoch": 40.723684210526315, "grad_norm": 2.143332004547119, "learning_rate": 0.0001, "loss": 0.0415, "step": 6190 }, { "epoch": 40.78947368421053, "grad_norm": 2.299978017807007, "learning_rate": 0.0001, "loss": 0.0405, "step": 6200 }, { "epoch": 40.85526315789474, "grad_norm": 2.454505205154419, "learning_rate": 0.0001, "loss": 0.0393, "step": 6210 }, { "epoch": 40.921052631578945, "grad_norm": 2.5504910945892334, "learning_rate": 0.0001, "loss": 0.0359, "step": 6220 }, { "epoch": 40.98684210526316, "grad_norm": 2.2120444774627686, "learning_rate": 0.0001, "loss": 0.0363, "step": 6230 }, { "epoch": 41.05263157894737, "grad_norm": 2.1109986305236816, "learning_rate": 0.0001, "loss": 0.0386, "step": 6240 }, { "epoch": 41.11842105263158, "grad_norm": 2.4341702461242676, "learning_rate": 0.0001, "loss": 0.0383, "step": 6250 }, { "epoch": 41.18421052631579, "grad_norm": 2.1559791564941406, "learning_rate": 0.0001, "loss": 0.0398, "step": 6260 }, { "epoch": 41.25, "grad_norm": 2.233827590942383, "learning_rate": 0.0001, "loss": 0.0377, "step": 6270 }, { "epoch": 41.31578947368421, "grad_norm": 2.0615148544311523, "learning_rate": 0.0001, "loss": 0.0369, "step": 6280 }, { "epoch": 41.38157894736842, "grad_norm": 2.319382667541504, "learning_rate": 0.0001, "loss": 0.0365, "step": 6290 }, { "epoch": 41.44736842105263, "grad_norm": 2.304764986038208, "learning_rate": 0.0001, "loss": 0.0381, "step": 6300 }, { "epoch": 41.51315789473684, "grad_norm": 2.4223990440368652, "learning_rate": 0.0001, "loss": 0.0388, "step": 6310 }, { "epoch": 41.578947368421055, "grad_norm": 2.1475377082824707, "learning_rate": 0.0001, "loss": 0.0381, "step": 6320 }, { "epoch": 41.64473684210526, "grad_norm": 2.036505699157715, "learning_rate": 0.0001, "loss": 0.0372, "step": 6330 }, { "epoch": 41.71052631578947, "grad_norm": 1.9919437170028687, "learning_rate": 0.0001, "loss": 0.0339, "step": 6340 }, { "epoch": 41.776315789473685, "grad_norm": 2.1360974311828613, "learning_rate": 0.0001, "loss": 0.0362, "step": 6350 }, { "epoch": 41.8421052631579, "grad_norm": 2.1707985401153564, "learning_rate": 0.0001, "loss": 0.0343, "step": 6360 }, { "epoch": 41.9078947368421, "grad_norm": 2.1546218395233154, "learning_rate": 0.0001, "loss": 0.0359, "step": 6370 }, { "epoch": 41.973684210526315, "grad_norm": 1.8794069290161133, "learning_rate": 0.0001, "loss": 0.0363, "step": 6380 }, { "epoch": 42.03947368421053, "grad_norm": 2.156665086746216, "learning_rate": 0.0001, "loss": 0.0365, "step": 6390 }, { "epoch": 42.10526315789474, "grad_norm": 2.243816614151001, "learning_rate": 0.0001, "loss": 0.0378, "step": 6400 }, { "epoch": 42.171052631578945, "grad_norm": 2.2223100662231445, "learning_rate": 0.0001, "loss": 0.0363, "step": 6410 }, { "epoch": 42.23684210526316, "grad_norm": 2.0650203227996826, "learning_rate": 0.0001, "loss": 0.0338, "step": 6420 }, { "epoch": 42.30263157894737, "grad_norm": 2.1891047954559326, "learning_rate": 0.0001, "loss": 0.0359, "step": 6430 }, { "epoch": 42.36842105263158, "grad_norm": 2.168792963027954, "learning_rate": 0.0001, "loss": 0.0358, "step": 6440 }, { "epoch": 42.43421052631579, "grad_norm": 2.247410774230957, "learning_rate": 0.0001, "loss": 0.0357, "step": 6450 }, { "epoch": 42.5, "grad_norm": 1.878936529159546, "learning_rate": 0.0001, "loss": 0.0329, "step": 6460 }, { "epoch": 42.56578947368421, "grad_norm": 2.108396530151367, "learning_rate": 0.0001, "loss": 0.0357, "step": 6470 }, { "epoch": 42.63157894736842, "grad_norm": 1.6226104497909546, "learning_rate": 0.0001, "loss": 0.0335, "step": 6480 }, { "epoch": 42.69736842105263, "grad_norm": 2.6258723735809326, "learning_rate": 0.0001, "loss": 0.0325, "step": 6490 }, { "epoch": 42.76315789473684, "grad_norm": 2.1555979251861572, "learning_rate": 0.0001, "loss": 0.0364, "step": 6500 }, { "epoch": 42.828947368421055, "grad_norm": 2.5804638862609863, "learning_rate": 0.0001, "loss": 0.0366, "step": 6510 }, { "epoch": 42.89473684210526, "grad_norm": 2.313135862350464, "learning_rate": 0.0001, "loss": 0.0361, "step": 6520 }, { "epoch": 42.96052631578947, "grad_norm": 2.659311056137085, "learning_rate": 0.0001, "loss": 0.0362, "step": 6530 }, { "epoch": 43.026315789473685, "grad_norm": 2.1776773929595947, "learning_rate": 0.0001, "loss": 0.0363, "step": 6540 }, { "epoch": 43.0921052631579, "grad_norm": 2.1495778560638428, "learning_rate": 0.0001, "loss": 0.0382, "step": 6550 }, { "epoch": 43.1578947368421, "grad_norm": 2.05283522605896, "learning_rate": 0.0001, "loss": 0.0371, "step": 6560 }, { "epoch": 43.223684210526315, "grad_norm": 2.5983822345733643, "learning_rate": 0.0001, "loss": 0.0345, "step": 6570 }, { "epoch": 43.28947368421053, "grad_norm": 2.5054686069488525, "learning_rate": 0.0001, "loss": 0.0351, "step": 6580 }, { "epoch": 43.35526315789474, "grad_norm": 1.933837890625, "learning_rate": 0.0001, "loss": 0.0359, "step": 6590 }, { "epoch": 43.421052631578945, "grad_norm": 2.3833420276641846, "learning_rate": 0.0001, "loss": 0.0376, "step": 6600 }, { "epoch": 43.48684210526316, "grad_norm": 2.4395833015441895, "learning_rate": 0.0001, "loss": 0.0347, "step": 6610 }, { "epoch": 43.55263157894737, "grad_norm": 2.5590438842773438, "learning_rate": 0.0001, "loss": 0.0383, "step": 6620 }, { "epoch": 43.61842105263158, "grad_norm": 2.5415804386138916, "learning_rate": 0.0001, "loss": 0.0334, "step": 6630 }, { "epoch": 43.68421052631579, "grad_norm": 1.9072297811508179, "learning_rate": 0.0001, "loss": 0.0343, "step": 6640 }, { "epoch": 43.75, "grad_norm": 1.8998295068740845, "learning_rate": 0.0001, "loss": 0.0346, "step": 6650 }, { "epoch": 43.81578947368421, "grad_norm": 1.905118465423584, "learning_rate": 0.0001, "loss": 0.0333, "step": 6660 }, { "epoch": 43.88157894736842, "grad_norm": 2.132871627807617, "learning_rate": 0.0001, "loss": 0.0332, "step": 6670 }, { "epoch": 43.94736842105263, "grad_norm": 2.0413613319396973, "learning_rate": 0.0001, "loss": 0.0344, "step": 6680 }, { "epoch": 44.01315789473684, "grad_norm": 2.2114999294281006, "learning_rate": 0.0001, "loss": 0.0337, "step": 6690 }, { "epoch": 44.078947368421055, "grad_norm": 2.3088066577911377, "learning_rate": 0.0001, "loss": 0.0378, "step": 6700 }, { "epoch": 44.14473684210526, "grad_norm": 1.784862756729126, "learning_rate": 0.0001, "loss": 0.0364, "step": 6710 }, { "epoch": 44.21052631578947, "grad_norm": 2.099011182785034, "learning_rate": 0.0001, "loss": 0.0359, "step": 6720 }, { "epoch": 44.276315789473685, "grad_norm": 1.912520170211792, "learning_rate": 0.0001, "loss": 0.0352, "step": 6730 }, { "epoch": 44.3421052631579, "grad_norm": 1.9815537929534912, "learning_rate": 0.0001, "loss": 0.0366, "step": 6740 }, { "epoch": 44.4078947368421, "grad_norm": 2.396345853805542, "learning_rate": 0.0001, "loss": 0.0363, "step": 6750 }, { "epoch": 44.473684210526315, "grad_norm": 2.2811830043792725, "learning_rate": 0.0001, "loss": 0.0344, "step": 6760 }, { "epoch": 44.53947368421053, "grad_norm": 2.1888914108276367, "learning_rate": 0.0001, "loss": 0.0353, "step": 6770 }, { "epoch": 44.60526315789474, "grad_norm": 2.2141542434692383, "learning_rate": 0.0001, "loss": 0.0362, "step": 6780 }, { "epoch": 44.671052631578945, "grad_norm": 1.9573568105697632, "learning_rate": 0.0001, "loss": 0.0356, "step": 6790 }, { "epoch": 44.73684210526316, "grad_norm": 2.2900731563568115, "learning_rate": 0.0001, "loss": 0.0371, "step": 6800 }, { "epoch": 44.80263157894737, "grad_norm": 2.2001209259033203, "learning_rate": 0.0001, "loss": 0.0351, "step": 6810 }, { "epoch": 44.86842105263158, "grad_norm": 2.0125033855438232, "learning_rate": 0.0001, "loss": 0.0344, "step": 6820 }, { "epoch": 44.93421052631579, "grad_norm": 1.908859372138977, "learning_rate": 0.0001, "loss": 0.034, "step": 6830 }, { "epoch": 45.0, "grad_norm": 2.0976343154907227, "learning_rate": 0.0001, "loss": 0.034, "step": 6840 }, { "epoch": 45.06578947368421, "grad_norm": 2.10421085357666, "learning_rate": 0.0001, "loss": 0.0343, "step": 6850 }, { "epoch": 45.13157894736842, "grad_norm": 1.7706947326660156, "learning_rate": 0.0001, "loss": 0.0327, "step": 6860 }, { "epoch": 45.19736842105263, "grad_norm": 2.1556997299194336, "learning_rate": 0.0001, "loss": 0.0328, "step": 6870 }, { "epoch": 45.26315789473684, "grad_norm": 2.2130556106567383, "learning_rate": 0.0001, "loss": 0.0345, "step": 6880 }, { "epoch": 45.328947368421055, "grad_norm": 2.1554789543151855, "learning_rate": 0.0001, "loss": 0.0327, "step": 6890 }, { "epoch": 45.39473684210526, "grad_norm": 1.984050989151001, "learning_rate": 0.0001, "loss": 0.0343, "step": 6900 }, { "epoch": 45.46052631578947, "grad_norm": 1.7056621313095093, "learning_rate": 0.0001, "loss": 0.0333, "step": 6910 }, { "epoch": 45.526315789473685, "grad_norm": 1.9881770610809326, "learning_rate": 0.0001, "loss": 0.0318, "step": 6920 }, { "epoch": 45.5921052631579, "grad_norm": 1.829092264175415, "learning_rate": 0.0001, "loss": 0.0333, "step": 6930 }, { "epoch": 45.6578947368421, "grad_norm": 1.8903696537017822, "learning_rate": 0.0001, "loss": 0.0299, "step": 6940 }, { "epoch": 45.723684210526315, "grad_norm": 2.089012861251831, "learning_rate": 0.0001, "loss": 0.035, "step": 6950 }, { "epoch": 45.78947368421053, "grad_norm": 2.2835586071014404, "learning_rate": 0.0001, "loss": 0.0324, "step": 6960 }, { "epoch": 45.85526315789474, "grad_norm": 2.3608603477478027, "learning_rate": 0.0001, "loss": 0.0363, "step": 6970 }, { "epoch": 45.921052631578945, "grad_norm": 2.000751495361328, "learning_rate": 0.0001, "loss": 0.0344, "step": 6980 }, { "epoch": 45.98684210526316, "grad_norm": 2.0586018562316895, "learning_rate": 0.0001, "loss": 0.0369, "step": 6990 }, { "epoch": 46.05263157894737, "grad_norm": 2.077038526535034, "learning_rate": 0.0001, "loss": 0.0344, "step": 7000 }, { "epoch": 46.11842105263158, "grad_norm": 2.457906484603882, "learning_rate": 0.0001, "loss": 0.0353, "step": 7010 }, { "epoch": 46.18421052631579, "grad_norm": 2.286679267883301, "learning_rate": 0.0001, "loss": 0.0348, "step": 7020 }, { "epoch": 46.25, "grad_norm": 2.5469555854797363, "learning_rate": 0.0001, "loss": 0.0343, "step": 7030 }, { "epoch": 46.31578947368421, "grad_norm": 2.1791422367095947, "learning_rate": 0.0001, "loss": 0.0321, "step": 7040 }, { "epoch": 46.38157894736842, "grad_norm": 3.0373752117156982, "learning_rate": 0.0001, "loss": 0.0351, "step": 7050 }, { "epoch": 46.44736842105263, "grad_norm": 2.291721820831299, "learning_rate": 0.0001, "loss": 0.031, "step": 7060 }, { "epoch": 46.51315789473684, "grad_norm": 2.208719253540039, "learning_rate": 0.0001, "loss": 0.0308, "step": 7070 }, { "epoch": 46.578947368421055, "grad_norm": 2.194161891937256, "learning_rate": 0.0001, "loss": 0.0345, "step": 7080 }, { "epoch": 46.64473684210526, "grad_norm": 2.2741446495056152, "learning_rate": 0.0001, "loss": 0.033, "step": 7090 }, { "epoch": 46.71052631578947, "grad_norm": 2.3912644386291504, "learning_rate": 0.0001, "loss": 0.0355, "step": 7100 }, { "epoch": 46.776315789473685, "grad_norm": 2.1767451763153076, "learning_rate": 0.0001, "loss": 0.0329, "step": 7110 }, { "epoch": 46.8421052631579, "grad_norm": 2.3903443813323975, "learning_rate": 0.0001, "loss": 0.0332, "step": 7120 }, { "epoch": 46.9078947368421, "grad_norm": 2.259274482727051, "learning_rate": 0.0001, "loss": 0.0335, "step": 7130 }, { "epoch": 46.973684210526315, "grad_norm": 2.3009188175201416, "learning_rate": 0.0001, "loss": 0.0327, "step": 7140 }, { "epoch": 47.03947368421053, "grad_norm": 2.3467867374420166, "learning_rate": 0.0001, "loss": 0.0318, "step": 7150 }, { "epoch": 47.10526315789474, "grad_norm": 2.426922082901001, "learning_rate": 0.0001, "loss": 0.0324, "step": 7160 }, { "epoch": 47.171052631578945, "grad_norm": 2.1517276763916016, "learning_rate": 0.0001, "loss": 0.0337, "step": 7170 }, { "epoch": 47.23684210526316, "grad_norm": 2.0987422466278076, "learning_rate": 0.0001, "loss": 0.0281, "step": 7180 }, { "epoch": 47.30263157894737, "grad_norm": 1.8270368576049805, "learning_rate": 0.0001, "loss": 0.0329, "step": 7190 }, { "epoch": 47.36842105263158, "grad_norm": 2.406790018081665, "learning_rate": 0.0001, "loss": 0.0296, "step": 7200 }, { "epoch": 47.43421052631579, "grad_norm": 1.9560794830322266, "learning_rate": 0.0001, "loss": 0.032, "step": 7210 }, { "epoch": 47.5, "grad_norm": 1.9922142028808594, "learning_rate": 0.0001, "loss": 0.0318, "step": 7220 }, { "epoch": 47.56578947368421, "grad_norm": 1.9082542657852173, "learning_rate": 0.0001, "loss": 0.032, "step": 7230 }, { "epoch": 47.63157894736842, "grad_norm": 2.251033306121826, "learning_rate": 0.0001, "loss": 0.0326, "step": 7240 }, { "epoch": 47.69736842105263, "grad_norm": 1.8770537376403809, "learning_rate": 0.0001, "loss": 0.0344, "step": 7250 }, { "epoch": 47.76315789473684, "grad_norm": 2.259347438812256, "learning_rate": 0.0001, "loss": 0.0332, "step": 7260 }, { "epoch": 47.828947368421055, "grad_norm": 2.3631973266601562, "learning_rate": 0.0001, "loss": 0.0336, "step": 7270 }, { "epoch": 47.89473684210526, "grad_norm": 2.6666479110717773, "learning_rate": 0.0001, "loss": 0.0331, "step": 7280 }, { "epoch": 47.96052631578947, "grad_norm": 2.482814073562622, "learning_rate": 0.0001, "loss": 0.0354, "step": 7290 }, { "epoch": 48.026315789473685, "grad_norm": 2.0329630374908447, "learning_rate": 0.0001, "loss": 0.0357, "step": 7300 }, { "epoch": 48.0921052631579, "grad_norm": 2.2136952877044678, "learning_rate": 0.0001, "loss": 0.0319, "step": 7310 }, { "epoch": 48.1578947368421, "grad_norm": 2.1611454486846924, "learning_rate": 0.0001, "loss": 0.0326, "step": 7320 }, { "epoch": 48.223684210526315, "grad_norm": 2.0240912437438965, "learning_rate": 0.0001, "loss": 0.0317, "step": 7330 }, { "epoch": 48.28947368421053, "grad_norm": 2.6601762771606445, "learning_rate": 0.0001, "loss": 0.0338, "step": 7340 }, { "epoch": 48.35526315789474, "grad_norm": 2.2921714782714844, "learning_rate": 0.0001, "loss": 0.0305, "step": 7350 }, { "epoch": 48.421052631578945, "grad_norm": 2.367628812789917, "learning_rate": 0.0001, "loss": 0.0322, "step": 7360 }, { "epoch": 48.48684210526316, "grad_norm": 2.152510166168213, "learning_rate": 0.0001, "loss": 0.0337, "step": 7370 }, { "epoch": 48.55263157894737, "grad_norm": 2.1306285858154297, "learning_rate": 0.0001, "loss": 0.0329, "step": 7380 }, { "epoch": 48.61842105263158, "grad_norm": 2.3841469287872314, "learning_rate": 0.0001, "loss": 0.0313, "step": 7390 }, { "epoch": 48.68421052631579, "grad_norm": 2.2278404235839844, "learning_rate": 0.0001, "loss": 0.0317, "step": 7400 }, { "epoch": 48.75, "grad_norm": 2.0181496143341064, "learning_rate": 0.0001, "loss": 0.0339, "step": 7410 }, { "epoch": 48.81578947368421, "grad_norm": 1.9607185125350952, "learning_rate": 0.0001, "loss": 0.0349, "step": 7420 }, { "epoch": 48.88157894736842, "grad_norm": 2.1952409744262695, "learning_rate": 0.0001, "loss": 0.0347, "step": 7430 }, { "epoch": 48.94736842105263, "grad_norm": 1.9687610864639282, "learning_rate": 0.0001, "loss": 0.0341, "step": 7440 }, { "epoch": 49.01315789473684, "grad_norm": 2.2068471908569336, "learning_rate": 0.0001, "loss": 0.0333, "step": 7450 }, { "epoch": 49.078947368421055, "grad_norm": 2.1997668743133545, "learning_rate": 0.0001, "loss": 0.0339, "step": 7460 }, { "epoch": 49.14473684210526, "grad_norm": 2.183655261993408, "learning_rate": 0.0001, "loss": 0.033, "step": 7470 }, { "epoch": 49.21052631578947, "grad_norm": 1.942799687385559, "learning_rate": 0.0001, "loss": 0.0336, "step": 7480 }, { "epoch": 49.276315789473685, "grad_norm": 1.9380285739898682, "learning_rate": 0.0001, "loss": 0.0334, "step": 7490 }, { "epoch": 49.3421052631579, "grad_norm": 2.13240122795105, "learning_rate": 0.0001, "loss": 0.0337, "step": 7500 }, { "epoch": 49.4078947368421, "grad_norm": 2.2831900119781494, "learning_rate": 0.0001, "loss": 0.031, "step": 7510 }, { "epoch": 49.473684210526315, "grad_norm": 2.127020835876465, "learning_rate": 0.0001, "loss": 0.031, "step": 7520 }, { "epoch": 49.53947368421053, "grad_norm": 2.251577138900757, "learning_rate": 0.0001, "loss": 0.0301, "step": 7530 }, { "epoch": 49.60526315789474, "grad_norm": 2.340786933898926, "learning_rate": 0.0001, "loss": 0.0322, "step": 7540 }, { "epoch": 49.671052631578945, "grad_norm": 2.267075300216675, "learning_rate": 0.0001, "loss": 0.0346, "step": 7550 }, { "epoch": 49.73684210526316, "grad_norm": 2.334462881088257, "learning_rate": 0.0001, "loss": 0.0346, "step": 7560 }, { "epoch": 49.80263157894737, "grad_norm": 2.038785696029663, "learning_rate": 0.0001, "loss": 0.0333, "step": 7570 }, { "epoch": 49.86842105263158, "grad_norm": 2.192054510116577, "learning_rate": 0.0001, "loss": 0.0321, "step": 7580 }, { "epoch": 49.93421052631579, "grad_norm": 2.1006948947906494, "learning_rate": 0.0001, "loss": 0.0337, "step": 7590 }, { "epoch": 50.0, "grad_norm": 1.9702825546264648, "learning_rate": 0.0001, "loss": 0.0324, "step": 7600 }, { "epoch": 50.06578947368421, "grad_norm": 1.8587983846664429, "learning_rate": 0.0001, "loss": 0.0297, "step": 7610 }, { "epoch": 50.13157894736842, "grad_norm": 2.164700984954834, "learning_rate": 0.0001, "loss": 0.0321, "step": 7620 }, { "epoch": 50.19736842105263, "grad_norm": 2.0936055183410645, "learning_rate": 0.0001, "loss": 0.0328, "step": 7630 }, { "epoch": 50.26315789473684, "grad_norm": 2.155484914779663, "learning_rate": 0.0001, "loss": 0.0332, "step": 7640 }, { "epoch": 50.328947368421055, "grad_norm": 2.1522960662841797, "learning_rate": 0.0001, "loss": 0.035, "step": 7650 }, { "epoch": 50.39473684210526, "grad_norm": 1.9459351301193237, "learning_rate": 0.0001, "loss": 0.0312, "step": 7660 }, { "epoch": 50.46052631578947, "grad_norm": 2.1670145988464355, "learning_rate": 0.0001, "loss": 0.0305, "step": 7670 }, { "epoch": 50.526315789473685, "grad_norm": 2.069352626800537, "learning_rate": 0.0001, "loss": 0.0344, "step": 7680 }, { "epoch": 50.5921052631579, "grad_norm": 1.8716386556625366, "learning_rate": 0.0001, "loss": 0.03, "step": 7690 }, { "epoch": 50.6578947368421, "grad_norm": 2.056234836578369, "learning_rate": 0.0001, "loss": 0.0303, "step": 7700 }, { "epoch": 50.723684210526315, "grad_norm": 2.560983896255493, "learning_rate": 0.0001, "loss": 0.0327, "step": 7710 }, { "epoch": 50.78947368421053, "grad_norm": 2.280230760574341, "learning_rate": 0.0001, "loss": 0.0354, "step": 7720 }, { "epoch": 50.85526315789474, "grad_norm": 2.542572498321533, "learning_rate": 0.0001, "loss": 0.0322, "step": 7730 }, { "epoch": 50.921052631578945, "grad_norm": 2.2160181999206543, "learning_rate": 0.0001, "loss": 0.033, "step": 7740 }, { "epoch": 50.98684210526316, "grad_norm": 1.9975266456604004, "learning_rate": 0.0001, "loss": 0.0314, "step": 7750 }, { "epoch": 51.05263157894737, "grad_norm": 2.1476669311523438, "learning_rate": 0.0001, "loss": 0.0341, "step": 7760 }, { "epoch": 51.11842105263158, "grad_norm": 1.9486243724822998, "learning_rate": 0.0001, "loss": 0.0319, "step": 7770 }, { "epoch": 51.18421052631579, "grad_norm": 1.993624210357666, "learning_rate": 0.0001, "loss": 0.0322, "step": 7780 }, { "epoch": 51.25, "grad_norm": 2.0168542861938477, "learning_rate": 0.0001, "loss": 0.0321, "step": 7790 }, { "epoch": 51.31578947368421, "grad_norm": 2.1617634296417236, "learning_rate": 0.0001, "loss": 0.0312, "step": 7800 }, { "epoch": 51.38157894736842, "grad_norm": 1.82743239402771, "learning_rate": 0.0001, "loss": 0.0334, "step": 7810 }, { "epoch": 51.44736842105263, "grad_norm": 1.7217556238174438, "learning_rate": 0.0001, "loss": 0.031, "step": 7820 }, { "epoch": 51.51315789473684, "grad_norm": 1.9004185199737549, "learning_rate": 0.0001, "loss": 0.0361, "step": 7830 }, { "epoch": 51.578947368421055, "grad_norm": 2.210575580596924, "learning_rate": 0.0001, "loss": 0.0335, "step": 7840 }, { "epoch": 51.64473684210526, "grad_norm": 1.8974437713623047, "learning_rate": 0.0001, "loss": 0.0336, "step": 7850 }, { "epoch": 51.71052631578947, "grad_norm": 1.8807051181793213, "learning_rate": 0.0001, "loss": 0.0341, "step": 7860 }, { "epoch": 51.776315789473685, "grad_norm": 1.9475070238113403, "learning_rate": 0.0001, "loss": 0.0337, "step": 7870 }, { "epoch": 51.8421052631579, "grad_norm": 1.9319655895233154, "learning_rate": 0.0001, "loss": 0.0364, "step": 7880 }, { "epoch": 51.9078947368421, "grad_norm": 1.9801864624023438, "learning_rate": 0.0001, "loss": 0.0328, "step": 7890 }, { "epoch": 51.973684210526315, "grad_norm": 1.8490853309631348, "learning_rate": 0.0001, "loss": 0.0323, "step": 7900 }, { "epoch": 52.03947368421053, "grad_norm": 1.844796061515808, "learning_rate": 0.0001, "loss": 0.0326, "step": 7910 }, { "epoch": 52.10526315789474, "grad_norm": 2.170541763305664, "learning_rate": 0.0001, "loss": 0.0304, "step": 7920 }, { "epoch": 52.171052631578945, "grad_norm": 1.8265526294708252, "learning_rate": 0.0001, "loss": 0.032, "step": 7930 }, { "epoch": 52.23684210526316, "grad_norm": 1.8391716480255127, "learning_rate": 0.0001, "loss": 0.0324, "step": 7940 }, { "epoch": 52.30263157894737, "grad_norm": 2.0495717525482178, "learning_rate": 0.0001, "loss": 0.0336, "step": 7950 }, { "epoch": 52.36842105263158, "grad_norm": 2.0973620414733887, "learning_rate": 0.0001, "loss": 0.0328, "step": 7960 }, { "epoch": 52.43421052631579, "grad_norm": 2.0247726440429688, "learning_rate": 0.0001, "loss": 0.0307, "step": 7970 }, { "epoch": 52.5, "grad_norm": 2.3339807987213135, "learning_rate": 0.0001, "loss": 0.032, "step": 7980 }, { "epoch": 52.56578947368421, "grad_norm": 2.4479763507843018, "learning_rate": 0.0001, "loss": 0.0342, "step": 7990 }, { "epoch": 52.63157894736842, "grad_norm": 2.1954760551452637, "learning_rate": 0.0001, "loss": 0.03, "step": 8000 }, { "epoch": 52.69736842105263, "grad_norm": 2.01871919631958, "learning_rate": 0.0001, "loss": 0.0322, "step": 8010 }, { "epoch": 52.76315789473684, "grad_norm": 1.8598896265029907, "learning_rate": 0.0001, "loss": 0.0304, "step": 8020 }, { "epoch": 52.828947368421055, "grad_norm": 2.171518087387085, "learning_rate": 0.0001, "loss": 0.0301, "step": 8030 }, { "epoch": 52.89473684210526, "grad_norm": 2.029583692550659, "learning_rate": 0.0001, "loss": 0.0308, "step": 8040 }, { "epoch": 52.96052631578947, "grad_norm": 1.7323065996170044, "learning_rate": 0.0001, "loss": 0.0322, "step": 8050 }, { "epoch": 53.026315789473685, "grad_norm": 2.0981595516204834, "learning_rate": 0.0001, "loss": 0.03, "step": 8060 }, { "epoch": 53.0921052631579, "grad_norm": 1.671781063079834, "learning_rate": 0.0001, "loss": 0.031, "step": 8070 }, { "epoch": 53.1578947368421, "grad_norm": 1.8050312995910645, "learning_rate": 0.0001, "loss": 0.0324, "step": 8080 }, { "epoch": 53.223684210526315, "grad_norm": 2.084514617919922, "learning_rate": 0.0001, "loss": 0.0377, "step": 8090 }, { "epoch": 53.28947368421053, "grad_norm": 1.9709466695785522, "learning_rate": 0.0001, "loss": 0.0304, "step": 8100 }, { "epoch": 53.35526315789474, "grad_norm": 1.856992244720459, "learning_rate": 0.0001, "loss": 0.0303, "step": 8110 }, { "epoch": 53.421052631578945, "grad_norm": 1.892453670501709, "learning_rate": 0.0001, "loss": 0.0307, "step": 8120 }, { "epoch": 53.48684210526316, "grad_norm": 1.9388136863708496, "learning_rate": 0.0001, "loss": 0.0303, "step": 8130 }, { "epoch": 53.55263157894737, "grad_norm": 1.8416920900344849, "learning_rate": 0.0001, "loss": 0.0307, "step": 8140 }, { "epoch": 53.61842105263158, "grad_norm": 2.1083364486694336, "learning_rate": 0.0001, "loss": 0.0316, "step": 8150 }, { "epoch": 53.68421052631579, "grad_norm": 2.071626663208008, "learning_rate": 0.0001, "loss": 0.028, "step": 8160 }, { "epoch": 53.75, "grad_norm": 1.8106697797775269, "learning_rate": 0.0001, "loss": 0.0311, "step": 8170 }, { "epoch": 53.81578947368421, "grad_norm": 1.9116227626800537, "learning_rate": 0.0001, "loss": 0.0316, "step": 8180 }, { "epoch": 53.88157894736842, "grad_norm": 1.6516140699386597, "learning_rate": 0.0001, "loss": 0.0303, "step": 8190 }, { "epoch": 53.94736842105263, "grad_norm": 2.0792529582977295, "learning_rate": 0.0001, "loss": 0.0331, "step": 8200 }, { "epoch": 54.01315789473684, "grad_norm": 2.022188425064087, "learning_rate": 0.0001, "loss": 0.0321, "step": 8210 }, { "epoch": 54.078947368421055, "grad_norm": 1.862682819366455, "learning_rate": 0.0001, "loss": 0.029, "step": 8220 }, { "epoch": 54.14473684210526, "grad_norm": 1.7206437587738037, "learning_rate": 0.0001, "loss": 0.0311, "step": 8230 }, { "epoch": 54.21052631578947, "grad_norm": 1.6664676666259766, "learning_rate": 0.0001, "loss": 0.0306, "step": 8240 }, { "epoch": 54.276315789473685, "grad_norm": 2.4094207286834717, "learning_rate": 0.0001, "loss": 0.0314, "step": 8250 }, { "epoch": 54.3421052631579, "grad_norm": 1.9840387105941772, "learning_rate": 0.0001, "loss": 0.0316, "step": 8260 }, { "epoch": 54.4078947368421, "grad_norm": 2.449998140335083, "learning_rate": 0.0001, "loss": 0.0323, "step": 8270 }, { "epoch": 54.473684210526315, "grad_norm": 2.195594072341919, "learning_rate": 0.0001, "loss": 0.0317, "step": 8280 }, { "epoch": 54.53947368421053, "grad_norm": 2.2626521587371826, "learning_rate": 0.0001, "loss": 0.0319, "step": 8290 }, { "epoch": 54.60526315789474, "grad_norm": 1.693280816078186, "learning_rate": 0.0001, "loss": 0.032, "step": 8300 }, { "epoch": 54.671052631578945, "grad_norm": 1.8923887014389038, "learning_rate": 0.0001, "loss": 0.0296, "step": 8310 }, { "epoch": 54.73684210526316, "grad_norm": 1.639238953590393, "learning_rate": 0.0001, "loss": 0.0289, "step": 8320 }, { "epoch": 54.80263157894737, "grad_norm": 1.9876776933670044, "learning_rate": 0.0001, "loss": 0.0323, "step": 8330 }, { "epoch": 54.86842105263158, "grad_norm": 1.686047077178955, "learning_rate": 0.0001, "loss": 0.0316, "step": 8340 }, { "epoch": 54.93421052631579, "grad_norm": 2.186433792114258, "learning_rate": 0.0001, "loss": 0.0323, "step": 8350 }, { "epoch": 55.0, "grad_norm": 1.9404265880584717, "learning_rate": 0.0001, "loss": 0.0302, "step": 8360 }, { "epoch": 55.06578947368421, "grad_norm": 2.0368666648864746, "learning_rate": 0.0001, "loss": 0.031, "step": 8370 }, { "epoch": 55.13157894736842, "grad_norm": 1.385018229484558, "learning_rate": 0.0001, "loss": 0.0301, "step": 8380 }, { "epoch": 55.19736842105263, "grad_norm": 2.025214433670044, "learning_rate": 0.0001, "loss": 0.0291, "step": 8390 }, { "epoch": 55.26315789473684, "grad_norm": 1.7362509965896606, "learning_rate": 0.0001, "loss": 0.0302, "step": 8400 }, { "epoch": 55.328947368421055, "grad_norm": 1.9747296571731567, "learning_rate": 0.0001, "loss": 0.0317, "step": 8410 }, { "epoch": 55.39473684210526, "grad_norm": 1.8088208436965942, "learning_rate": 0.0001, "loss": 0.0293, "step": 8420 }, { "epoch": 55.46052631578947, "grad_norm": 1.8630869388580322, "learning_rate": 0.0001, "loss": 0.0318, "step": 8430 }, { "epoch": 55.526315789473685, "grad_norm": 1.8169368505477905, "learning_rate": 0.0001, "loss": 0.0294, "step": 8440 }, { "epoch": 55.5921052631579, "grad_norm": 1.717041254043579, "learning_rate": 0.0001, "loss": 0.0304, "step": 8450 }, { "epoch": 55.6578947368421, "grad_norm": 2.054370164871216, "learning_rate": 0.0001, "loss": 0.0293, "step": 8460 }, { "epoch": 55.723684210526315, "grad_norm": 1.8718016147613525, "learning_rate": 0.0001, "loss": 0.0294, "step": 8470 }, { "epoch": 55.78947368421053, "grad_norm": 2.4793713092803955, "learning_rate": 0.0001, "loss": 0.0302, "step": 8480 }, { "epoch": 55.85526315789474, "grad_norm": 2.1499078273773193, "learning_rate": 0.0001, "loss": 0.0313, "step": 8490 }, { "epoch": 55.921052631578945, "grad_norm": 2.1316916942596436, "learning_rate": 0.0001, "loss": 0.032, "step": 8500 }, { "epoch": 55.98684210526316, "grad_norm": 2.108400583267212, "learning_rate": 0.0001, "loss": 0.0279, "step": 8510 }, { "epoch": 56.05263157894737, "grad_norm": 1.7546727657318115, "learning_rate": 0.0001, "loss": 0.0305, "step": 8520 }, { "epoch": 56.11842105263158, "grad_norm": 1.8155990839004517, "learning_rate": 0.0001, "loss": 0.0285, "step": 8530 }, { "epoch": 56.18421052631579, "grad_norm": 1.8450534343719482, "learning_rate": 0.0001, "loss": 0.0299, "step": 8540 }, { "epoch": 56.25, "grad_norm": 1.9651875495910645, "learning_rate": 0.0001, "loss": 0.0308, "step": 8550 }, { "epoch": 56.31578947368421, "grad_norm": 1.9930384159088135, "learning_rate": 0.0001, "loss": 0.0278, "step": 8560 }, { "epoch": 56.38157894736842, "grad_norm": 2.035289764404297, "learning_rate": 0.0001, "loss": 0.0296, "step": 8570 }, { "epoch": 56.44736842105263, "grad_norm": 2.206793785095215, "learning_rate": 0.0001, "loss": 0.0295, "step": 8580 }, { "epoch": 56.51315789473684, "grad_norm": 2.2738990783691406, "learning_rate": 0.0001, "loss": 0.0295, "step": 8590 }, { "epoch": 56.578947368421055, "grad_norm": 1.6985399723052979, "learning_rate": 0.0001, "loss": 0.0293, "step": 8600 }, { "epoch": 56.64473684210526, "grad_norm": 1.7506465911865234, "learning_rate": 0.0001, "loss": 0.026, "step": 8610 }, { "epoch": 56.71052631578947, "grad_norm": 1.8177592754364014, "learning_rate": 0.0001, "loss": 0.0299, "step": 8620 }, { "epoch": 56.776315789473685, "grad_norm": 2.0824978351593018, "learning_rate": 0.0001, "loss": 0.0332, "step": 8630 }, { "epoch": 56.8421052631579, "grad_norm": 1.8708689212799072, "learning_rate": 0.0001, "loss": 0.0294, "step": 8640 }, { "epoch": 56.9078947368421, "grad_norm": 1.536291480064392, "learning_rate": 0.0001, "loss": 0.0301, "step": 8650 }, { "epoch": 56.973684210526315, "grad_norm": 1.5639556646347046, "learning_rate": 0.0001, "loss": 0.0292, "step": 8660 }, { "epoch": 57.03947368421053, "grad_norm": 1.836817979812622, "learning_rate": 0.0001, "loss": 0.0284, "step": 8670 }, { "epoch": 57.10526315789474, "grad_norm": 1.6977314949035645, "learning_rate": 0.0001, "loss": 0.028, "step": 8680 }, { "epoch": 57.171052631578945, "grad_norm": 1.9491312503814697, "learning_rate": 0.0001, "loss": 0.0301, "step": 8690 }, { "epoch": 57.23684210526316, "grad_norm": 1.9810190200805664, "learning_rate": 0.0001, "loss": 0.0299, "step": 8700 }, { "epoch": 57.30263157894737, "grad_norm": 3.380073308944702, "learning_rate": 0.0001, "loss": 0.0305, "step": 8710 }, { "epoch": 57.36842105263158, "grad_norm": 2.4485232830047607, "learning_rate": 0.0001, "loss": 0.0324, "step": 8720 }, { "epoch": 57.43421052631579, "grad_norm": 2.817413330078125, "learning_rate": 0.0001, "loss": 0.0293, "step": 8730 }, { "epoch": 57.5, "grad_norm": 2.479456901550293, "learning_rate": 0.0001, "loss": 0.0293, "step": 8740 }, { "epoch": 57.56578947368421, "grad_norm": 2.279740571975708, "learning_rate": 0.0001, "loss": 0.0278, "step": 8750 }, { "epoch": 57.63157894736842, "grad_norm": 2.4175727367401123, "learning_rate": 0.0001, "loss": 0.0283, "step": 8760 }, { "epoch": 57.69736842105263, "grad_norm": 1.8929423093795776, "learning_rate": 0.0001, "loss": 0.0269, "step": 8770 }, { "epoch": 57.76315789473684, "grad_norm": 1.9583559036254883, "learning_rate": 0.0001, "loss": 0.0271, "step": 8780 }, { "epoch": 57.828947368421055, "grad_norm": 2.2703166007995605, "learning_rate": 0.0001, "loss": 0.0274, "step": 8790 }, { "epoch": 57.89473684210526, "grad_norm": 2.0460057258605957, "learning_rate": 0.0001, "loss": 0.027, "step": 8800 }, { "epoch": 57.96052631578947, "grad_norm": 2.1975557804107666, "learning_rate": 0.0001, "loss": 0.0315, "step": 8810 }, { "epoch": 58.026315789473685, "grad_norm": 1.971722960472107, "learning_rate": 0.0001, "loss": 0.0272, "step": 8820 }, { "epoch": 58.0921052631579, "grad_norm": 2.0381031036376953, "learning_rate": 0.0001, "loss": 0.0289, "step": 8830 }, { "epoch": 58.1578947368421, "grad_norm": 1.650619626045227, "learning_rate": 0.0001, "loss": 0.0301, "step": 8840 }, { "epoch": 58.223684210526315, "grad_norm": 2.122473955154419, "learning_rate": 0.0001, "loss": 0.0284, "step": 8850 }, { "epoch": 58.28947368421053, "grad_norm": 1.7196934223175049, "learning_rate": 0.0001, "loss": 0.0272, "step": 8860 }, { "epoch": 58.35526315789474, "grad_norm": 1.6575008630752563, "learning_rate": 0.0001, "loss": 0.0277, "step": 8870 }, { "epoch": 58.421052631578945, "grad_norm": 1.7115286588668823, "learning_rate": 0.0001, "loss": 0.0312, "step": 8880 }, { "epoch": 58.48684210526316, "grad_norm": 1.908919095993042, "learning_rate": 0.0001, "loss": 0.0283, "step": 8890 }, { "epoch": 58.55263157894737, "grad_norm": 1.7055505514144897, "learning_rate": 0.0001, "loss": 0.0299, "step": 8900 }, { "epoch": 58.61842105263158, "grad_norm": 1.72140371799469, "learning_rate": 0.0001, "loss": 0.03, "step": 8910 }, { "epoch": 58.68421052631579, "grad_norm": 1.752676248550415, "learning_rate": 0.0001, "loss": 0.0288, "step": 8920 }, { "epoch": 58.75, "grad_norm": 1.886494517326355, "learning_rate": 0.0001, "loss": 0.0301, "step": 8930 }, { "epoch": 58.81578947368421, "grad_norm": 1.7200045585632324, "learning_rate": 0.0001, "loss": 0.0308, "step": 8940 }, { "epoch": 58.88157894736842, "grad_norm": 1.7977442741394043, "learning_rate": 0.0001, "loss": 0.0323, "step": 8950 }, { "epoch": 58.94736842105263, "grad_norm": 1.9197369813919067, "learning_rate": 0.0001, "loss": 0.0353, "step": 8960 }, { "epoch": 59.01315789473684, "grad_norm": 1.717230200767517, "learning_rate": 0.0001, "loss": 0.0335, "step": 8970 }, { "epoch": 59.078947368421055, "grad_norm": 1.8992480039596558, "learning_rate": 0.0001, "loss": 0.0305, "step": 8980 }, { "epoch": 59.14473684210526, "grad_norm": 1.9328705072402954, "learning_rate": 0.0001, "loss": 0.0312, "step": 8990 }, { "epoch": 59.21052631578947, "grad_norm": 1.7343662977218628, "learning_rate": 0.0001, "loss": 0.0295, "step": 9000 }, { "epoch": 59.276315789473685, "grad_norm": 1.6927893161773682, "learning_rate": 0.0001, "loss": 0.0326, "step": 9010 }, { "epoch": 59.3421052631579, "grad_norm": 1.746718406677246, "learning_rate": 0.0001, "loss": 0.0308, "step": 9020 }, { "epoch": 59.4078947368421, "grad_norm": 2.095132827758789, "learning_rate": 0.0001, "loss": 0.0296, "step": 9030 }, { "epoch": 59.473684210526315, "grad_norm": 1.837533950805664, "learning_rate": 0.0001, "loss": 0.0346, "step": 9040 }, { "epoch": 59.53947368421053, "grad_norm": 1.8166559934616089, "learning_rate": 0.0001, "loss": 0.0295, "step": 9050 }, { "epoch": 59.60526315789474, "grad_norm": 1.6477857828140259, "learning_rate": 0.0001, "loss": 0.0301, "step": 9060 }, { "epoch": 59.671052631578945, "grad_norm": 1.8890045881271362, "learning_rate": 0.0001, "loss": 0.0294, "step": 9070 }, { "epoch": 59.73684210526316, "grad_norm": 2.0278661251068115, "learning_rate": 0.0001, "loss": 0.0334, "step": 9080 }, { "epoch": 59.80263157894737, "grad_norm": 2.0316243171691895, "learning_rate": 0.0001, "loss": 0.0266, "step": 9090 }, { "epoch": 59.86842105263158, "grad_norm": 1.8405317068099976, "learning_rate": 0.0001, "loss": 0.0282, "step": 9100 }, { "epoch": 59.93421052631579, "grad_norm": 2.397641897201538, "learning_rate": 0.0001, "loss": 0.0295, "step": 9110 }, { "epoch": 60.0, "grad_norm": 1.7151198387145996, "learning_rate": 0.0001, "loss": 0.0278, "step": 9120 }, { "epoch": 60.06578947368421, "grad_norm": 2.201002359390259, "learning_rate": 0.0001, "loss": 0.0294, "step": 9130 }, { "epoch": 60.13157894736842, "grad_norm": 1.911597490310669, "learning_rate": 0.0001, "loss": 0.0289, "step": 9140 }, { "epoch": 60.19736842105263, "grad_norm": 2.3099772930145264, "learning_rate": 0.0001, "loss": 0.0279, "step": 9150 }, { "epoch": 60.26315789473684, "grad_norm": 2.0865139961242676, "learning_rate": 0.0001, "loss": 0.0269, "step": 9160 }, { "epoch": 60.328947368421055, "grad_norm": 2.1491951942443848, "learning_rate": 0.0001, "loss": 0.0293, "step": 9170 }, { "epoch": 60.39473684210526, "grad_norm": 1.7886714935302734, "learning_rate": 0.0001, "loss": 0.0286, "step": 9180 }, { "epoch": 60.46052631578947, "grad_norm": 2.1274571418762207, "learning_rate": 0.0001, "loss": 0.0293, "step": 9190 }, { "epoch": 60.526315789473685, "grad_norm": 2.1661720275878906, "learning_rate": 0.0001, "loss": 0.0251, "step": 9200 }, { "epoch": 60.5921052631579, "grad_norm": 1.6278198957443237, "learning_rate": 0.0001, "loss": 0.0283, "step": 9210 }, { "epoch": 60.6578947368421, "grad_norm": 1.8151003122329712, "learning_rate": 0.0001, "loss": 0.0259, "step": 9220 }, { "epoch": 60.723684210526315, "grad_norm": 1.9003932476043701, "learning_rate": 0.0001, "loss": 0.0291, "step": 9230 }, { "epoch": 60.78947368421053, "grad_norm": 1.6437238454818726, "learning_rate": 0.0001, "loss": 0.0274, "step": 9240 }, { "epoch": 60.85526315789474, "grad_norm": 1.418359637260437, "learning_rate": 0.0001, "loss": 0.0285, "step": 9250 }, { "epoch": 60.921052631578945, "grad_norm": 1.5304903984069824, "learning_rate": 0.0001, "loss": 0.0293, "step": 9260 }, { "epoch": 60.98684210526316, "grad_norm": 1.7247731685638428, "learning_rate": 0.0001, "loss": 0.0301, "step": 9270 }, { "epoch": 61.05263157894737, "grad_norm": 1.9069534540176392, "learning_rate": 0.0001, "loss": 0.0318, "step": 9280 }, { "epoch": 61.11842105263158, "grad_norm": 1.6469271183013916, "learning_rate": 0.0001, "loss": 0.0325, "step": 9290 }, { "epoch": 61.18421052631579, "grad_norm": 1.5234088897705078, "learning_rate": 0.0001, "loss": 0.028, "step": 9300 }, { "epoch": 61.25, "grad_norm": 1.9155454635620117, "learning_rate": 0.0001, "loss": 0.0279, "step": 9310 }, { "epoch": 61.31578947368421, "grad_norm": 1.3420042991638184, "learning_rate": 0.0001, "loss": 0.0301, "step": 9320 }, { "epoch": 61.38157894736842, "grad_norm": 1.73533296585083, "learning_rate": 0.0001, "loss": 0.0323, "step": 9330 }, { "epoch": 61.44736842105263, "grad_norm": 2.0660176277160645, "learning_rate": 0.0001, "loss": 0.0302, "step": 9340 }, { "epoch": 61.51315789473684, "grad_norm": 1.8069857358932495, "learning_rate": 0.0001, "loss": 0.0314, "step": 9350 }, { "epoch": 61.578947368421055, "grad_norm": 2.0904011726379395, "learning_rate": 0.0001, "loss": 0.0303, "step": 9360 }, { "epoch": 61.64473684210526, "grad_norm": 1.8158031702041626, "learning_rate": 0.0001, "loss": 0.0301, "step": 9370 }, { "epoch": 61.71052631578947, "grad_norm": 1.410555362701416, "learning_rate": 0.0001, "loss": 0.0307, "step": 9380 }, { "epoch": 61.776315789473685, "grad_norm": 1.4610170125961304, "learning_rate": 0.0001, "loss": 0.0333, "step": 9390 }, { "epoch": 61.8421052631579, "grad_norm": 1.5231578350067139, "learning_rate": 0.0001, "loss": 0.0307, "step": 9400 }, { "epoch": 61.9078947368421, "grad_norm": 1.829412817955017, "learning_rate": 0.0001, "loss": 0.0315, "step": 9410 }, { "epoch": 61.973684210526315, "grad_norm": 1.9624525308609009, "learning_rate": 0.0001, "loss": 0.0302, "step": 9420 }, { "epoch": 62.03947368421053, "grad_norm": 1.5953247547149658, "learning_rate": 0.0001, "loss": 0.0293, "step": 9430 }, { "epoch": 62.10526315789474, "grad_norm": 1.8386650085449219, "learning_rate": 0.0001, "loss": 0.0321, "step": 9440 }, { "epoch": 62.171052631578945, "grad_norm": 2.2038257122039795, "learning_rate": 0.0001, "loss": 0.0312, "step": 9450 }, { "epoch": 62.23684210526316, "grad_norm": 2.08797550201416, "learning_rate": 0.0001, "loss": 0.0288, "step": 9460 }, { "epoch": 62.30263157894737, "grad_norm": 1.9139946699142456, "learning_rate": 0.0001, "loss": 0.0272, "step": 9470 }, { "epoch": 62.36842105263158, "grad_norm": 2.331393241882324, "learning_rate": 0.0001, "loss": 0.0292, "step": 9480 }, { "epoch": 62.43421052631579, "grad_norm": 1.5585893392562866, "learning_rate": 0.0001, "loss": 0.0285, "step": 9490 }, { "epoch": 62.5, "grad_norm": 1.67988920211792, "learning_rate": 0.0001, "loss": 0.0283, "step": 9500 }, { "epoch": 62.56578947368421, "grad_norm": 1.9753636121749878, "learning_rate": 0.0001, "loss": 0.0278, "step": 9510 }, { "epoch": 62.63157894736842, "grad_norm": 1.5838857889175415, "learning_rate": 0.0001, "loss": 0.0268, "step": 9520 }, { "epoch": 62.69736842105263, "grad_norm": 1.9455825090408325, "learning_rate": 0.0001, "loss": 0.0261, "step": 9530 }, { "epoch": 62.76315789473684, "grad_norm": 2.0940582752227783, "learning_rate": 0.0001, "loss": 0.029, "step": 9540 }, { "epoch": 62.828947368421055, "grad_norm": 2.082871198654175, "learning_rate": 0.0001, "loss": 0.0271, "step": 9550 }, { "epoch": 62.89473684210526, "grad_norm": 1.8131487369537354, "learning_rate": 0.0001, "loss": 0.0265, "step": 9560 }, { "epoch": 62.96052631578947, "grad_norm": 1.8822680711746216, "learning_rate": 0.0001, "loss": 0.0268, "step": 9570 }, { "epoch": 63.026315789473685, "grad_norm": 1.9734561443328857, "learning_rate": 0.0001, "loss": 0.0283, "step": 9580 }, { "epoch": 63.0921052631579, "grad_norm": 1.6446675062179565, "learning_rate": 0.0001, "loss": 0.0285, "step": 9590 }, { "epoch": 63.1578947368421, "grad_norm": 1.587969422340393, "learning_rate": 0.0001, "loss": 0.0283, "step": 9600 }, { "epoch": 63.223684210526315, "grad_norm": 2.2087881565093994, "learning_rate": 0.0001, "loss": 0.03, "step": 9610 }, { "epoch": 63.28947368421053, "grad_norm": 2.1909730434417725, "learning_rate": 0.0001, "loss": 0.0296, "step": 9620 }, { "epoch": 63.35526315789474, "grad_norm": 1.4683645963668823, "learning_rate": 0.0001, "loss": 0.027, "step": 9630 }, { "epoch": 63.421052631578945, "grad_norm": 1.7564547061920166, "learning_rate": 0.0001, "loss": 0.0273, "step": 9640 }, { "epoch": 63.48684210526316, "grad_norm": 1.922224998474121, "learning_rate": 0.0001, "loss": 0.03, "step": 9650 }, { "epoch": 63.55263157894737, "grad_norm": 1.8191381692886353, "learning_rate": 0.0001, "loss": 0.0262, "step": 9660 }, { "epoch": 63.61842105263158, "grad_norm": 1.686124324798584, "learning_rate": 0.0001, "loss": 0.0307, "step": 9670 }, { "epoch": 63.68421052631579, "grad_norm": 1.717603325843811, "learning_rate": 0.0001, "loss": 0.0285, "step": 9680 }, { "epoch": 63.75, "grad_norm": 1.855259656906128, "learning_rate": 0.0001, "loss": 0.0276, "step": 9690 }, { "epoch": 63.81578947368421, "grad_norm": 1.6836774349212646, "learning_rate": 0.0001, "loss": 0.0275, "step": 9700 }, { "epoch": 63.88157894736842, "grad_norm": 1.776174545288086, "learning_rate": 0.0001, "loss": 0.0288, "step": 9710 }, { "epoch": 63.94736842105263, "grad_norm": 1.868103265762329, "learning_rate": 0.0001, "loss": 0.0272, "step": 9720 }, { "epoch": 64.01315789473684, "grad_norm": 1.6347148418426514, "learning_rate": 0.0001, "loss": 0.0286, "step": 9730 }, { "epoch": 64.07894736842105, "grad_norm": 1.8645349740982056, "learning_rate": 0.0001, "loss": 0.0279, "step": 9740 }, { "epoch": 64.14473684210526, "grad_norm": 2.106388807296753, "learning_rate": 0.0001, "loss": 0.0289, "step": 9750 }, { "epoch": 64.21052631578948, "grad_norm": 1.4443974494934082, "learning_rate": 0.0001, "loss": 0.0289, "step": 9760 }, { "epoch": 64.27631578947368, "grad_norm": 1.8115508556365967, "learning_rate": 0.0001, "loss": 0.0298, "step": 9770 }, { "epoch": 64.34210526315789, "grad_norm": 1.6376737356185913, "learning_rate": 0.0001, "loss": 0.0275, "step": 9780 }, { "epoch": 64.40789473684211, "grad_norm": 1.7943278551101685, "learning_rate": 0.0001, "loss": 0.0297, "step": 9790 }, { "epoch": 64.47368421052632, "grad_norm": 1.6659159660339355, "learning_rate": 0.0001, "loss": 0.027, "step": 9800 }, { "epoch": 64.53947368421052, "grad_norm": 1.792837381362915, "learning_rate": 0.0001, "loss": 0.0265, "step": 9810 }, { "epoch": 64.60526315789474, "grad_norm": 1.9182575941085815, "learning_rate": 0.0001, "loss": 0.0293, "step": 9820 }, { "epoch": 64.67105263157895, "grad_norm": 1.6181617975234985, "learning_rate": 0.0001, "loss": 0.0284, "step": 9830 }, { "epoch": 64.73684210526316, "grad_norm": 1.5407286882400513, "learning_rate": 0.0001, "loss": 0.0298, "step": 9840 }, { "epoch": 64.80263157894737, "grad_norm": 1.621099591255188, "learning_rate": 0.0001, "loss": 0.0295, "step": 9850 }, { "epoch": 64.86842105263158, "grad_norm": 1.7339770793914795, "learning_rate": 0.0001, "loss": 0.0306, "step": 9860 }, { "epoch": 64.9342105263158, "grad_norm": 1.9840919971466064, "learning_rate": 0.0001, "loss": 0.0291, "step": 9870 }, { "epoch": 65.0, "grad_norm": 1.8245599269866943, "learning_rate": 0.0001, "loss": 0.0316, "step": 9880 }, { "epoch": 65.0657894736842, "grad_norm": 1.6021732091903687, "learning_rate": 0.0001, "loss": 0.0311, "step": 9890 }, { "epoch": 65.13157894736842, "grad_norm": 2.1346256732940674, "learning_rate": 0.0001, "loss": 0.0266, "step": 9900 }, { "epoch": 65.19736842105263, "grad_norm": 1.537832498550415, "learning_rate": 0.0001, "loss": 0.028, "step": 9910 }, { "epoch": 65.26315789473684, "grad_norm": 1.70017671585083, "learning_rate": 0.0001, "loss": 0.0269, "step": 9920 }, { "epoch": 65.32894736842105, "grad_norm": 2.058269500732422, "learning_rate": 0.0001, "loss": 0.0289, "step": 9930 }, { "epoch": 65.39473684210526, "grad_norm": 1.592274785041809, "learning_rate": 0.0001, "loss": 0.0301, "step": 9940 }, { "epoch": 65.46052631578948, "grad_norm": 1.6225742101669312, "learning_rate": 0.0001, "loss": 0.0305, "step": 9950 }, { "epoch": 65.52631578947368, "grad_norm": 1.3477301597595215, "learning_rate": 0.0001, "loss": 0.0272, "step": 9960 }, { "epoch": 65.59210526315789, "grad_norm": 1.6625304222106934, "learning_rate": 0.0001, "loss": 0.0301, "step": 9970 }, { "epoch": 65.65789473684211, "grad_norm": 1.5278533697128296, "learning_rate": 0.0001, "loss": 0.0269, "step": 9980 }, { "epoch": 65.72368421052632, "grad_norm": 1.9545159339904785, "learning_rate": 0.0001, "loss": 0.0279, "step": 9990 }, { "epoch": 65.78947368421052, "grad_norm": 2.0630109310150146, "learning_rate": 0.0001, "loss": 0.0284, "step": 10000 }, { "epoch": 65.85526315789474, "grad_norm": 2.0734527111053467, "learning_rate": 0.0001, "loss": 0.0304, "step": 10010 }, { "epoch": 65.92105263157895, "grad_norm": 1.8130292892456055, "learning_rate": 0.0001, "loss": 0.031, "step": 10020 }, { "epoch": 65.98684210526316, "grad_norm": 1.9439656734466553, "learning_rate": 0.0001, "loss": 0.03, "step": 10030 }, { "epoch": 66.05263157894737, "grad_norm": 1.825518012046814, "learning_rate": 0.0001, "loss": 0.0306, "step": 10040 }, { "epoch": 66.11842105263158, "grad_norm": 1.7734483480453491, "learning_rate": 0.0001, "loss": 0.0288, "step": 10050 }, { "epoch": 66.1842105263158, "grad_norm": 1.8506808280944824, "learning_rate": 0.0001, "loss": 0.0272, "step": 10060 }, { "epoch": 66.25, "grad_norm": 1.4367343187332153, "learning_rate": 0.0001, "loss": 0.0301, "step": 10070 }, { "epoch": 66.3157894736842, "grad_norm": 2.0297582149505615, "learning_rate": 0.0001, "loss": 0.0265, "step": 10080 }, { "epoch": 66.38157894736842, "grad_norm": 2.1654398441314697, "learning_rate": 0.0001, "loss": 0.0282, "step": 10090 }, { "epoch": 66.44736842105263, "grad_norm": 1.8896561861038208, "learning_rate": 0.0001, "loss": 0.0276, "step": 10100 }, { "epoch": 66.51315789473684, "grad_norm": 2.0529367923736572, "learning_rate": 0.0001, "loss": 0.0283, "step": 10110 }, { "epoch": 66.57894736842105, "grad_norm": 2.187924385070801, "learning_rate": 0.0001, "loss": 0.029, "step": 10120 }, { "epoch": 66.64473684210526, "grad_norm": 1.9992048740386963, "learning_rate": 0.0001, "loss": 0.0277, "step": 10130 }, { "epoch": 66.71052631578948, "grad_norm": 1.930832862854004, "learning_rate": 0.0001, "loss": 0.0274, "step": 10140 }, { "epoch": 66.77631578947368, "grad_norm": 2.112302780151367, "learning_rate": 0.0001, "loss": 0.0292, "step": 10150 }, { "epoch": 66.84210526315789, "grad_norm": 2.1016273498535156, "learning_rate": 0.0001, "loss": 0.0235, "step": 10160 }, { "epoch": 66.90789473684211, "grad_norm": 1.839139699935913, "learning_rate": 0.0001, "loss": 0.025, "step": 10170 }, { "epoch": 66.97368421052632, "grad_norm": 2.132181406021118, "learning_rate": 0.0001, "loss": 0.0264, "step": 10180 }, { "epoch": 67.03947368421052, "grad_norm": 1.6180822849273682, "learning_rate": 0.0001, "loss": 0.0251, "step": 10190 }, { "epoch": 67.10526315789474, "grad_norm": 2.0724172592163086, "learning_rate": 0.0001, "loss": 0.0255, "step": 10200 }, { "epoch": 67.17105263157895, "grad_norm": 1.807228446006775, "learning_rate": 0.0001, "loss": 0.0264, "step": 10210 }, { "epoch": 67.23684210526316, "grad_norm": 1.818137764930725, "learning_rate": 0.0001, "loss": 0.0266, "step": 10220 }, { "epoch": 67.30263157894737, "grad_norm": 1.9353524446487427, "learning_rate": 0.0001, "loss": 0.0247, "step": 10230 }, { "epoch": 67.36842105263158, "grad_norm": 1.9978927373886108, "learning_rate": 0.0001, "loss": 0.0259, "step": 10240 }, { "epoch": 67.4342105263158, "grad_norm": 1.6602277755737305, "learning_rate": 0.0001, "loss": 0.0287, "step": 10250 }, { "epoch": 67.5, "grad_norm": 1.6295214891433716, "learning_rate": 0.0001, "loss": 0.0263, "step": 10260 }, { "epoch": 67.5657894736842, "grad_norm": 1.5134141445159912, "learning_rate": 0.0001, "loss": 0.0311, "step": 10270 }, { "epoch": 67.63157894736842, "grad_norm": 1.7062309980392456, "learning_rate": 0.0001, "loss": 0.0288, "step": 10280 }, { "epoch": 67.69736842105263, "grad_norm": 2.065563678741455, "learning_rate": 0.0001, "loss": 0.0268, "step": 10290 }, { "epoch": 67.76315789473684, "grad_norm": 1.7206897735595703, "learning_rate": 0.0001, "loss": 0.0304, "step": 10300 }, { "epoch": 67.82894736842105, "grad_norm": 1.927570104598999, "learning_rate": 0.0001, "loss": 0.0283, "step": 10310 }, { "epoch": 67.89473684210526, "grad_norm": 1.7813613414764404, "learning_rate": 0.0001, "loss": 0.0303, "step": 10320 }, { "epoch": 67.96052631578948, "grad_norm": 1.7587003707885742, "learning_rate": 0.0001, "loss": 0.0264, "step": 10330 }, { "epoch": 68.02631578947368, "grad_norm": 1.5077179670333862, "learning_rate": 0.0001, "loss": 0.0282, "step": 10340 }, { "epoch": 68.09210526315789, "grad_norm": 1.6387265920639038, "learning_rate": 0.0001, "loss": 0.0308, "step": 10350 }, { "epoch": 68.15789473684211, "grad_norm": 1.3568955659866333, "learning_rate": 0.0001, "loss": 0.0303, "step": 10360 }, { "epoch": 68.22368421052632, "grad_norm": 1.4115484952926636, "learning_rate": 0.0001, "loss": 0.0267, "step": 10370 }, { "epoch": 68.28947368421052, "grad_norm": 1.726250171661377, "learning_rate": 0.0001, "loss": 0.0264, "step": 10380 }, { "epoch": 68.35526315789474, "grad_norm": 1.825256586074829, "learning_rate": 0.0001, "loss": 0.0284, "step": 10390 }, { "epoch": 68.42105263157895, "grad_norm": 1.7078465223312378, "learning_rate": 0.0001, "loss": 0.028, "step": 10400 }, { "epoch": 68.48684210526316, "grad_norm": 1.6383626461029053, "learning_rate": 0.0001, "loss": 0.0277, "step": 10410 }, { "epoch": 68.55263157894737, "grad_norm": 1.635048270225525, "learning_rate": 0.0001, "loss": 0.0295, "step": 10420 }, { "epoch": 68.61842105263158, "grad_norm": 1.7052724361419678, "learning_rate": 0.0001, "loss": 0.0272, "step": 10430 }, { "epoch": 68.6842105263158, "grad_norm": 1.6822491884231567, "learning_rate": 0.0001, "loss": 0.027, "step": 10440 }, { "epoch": 68.75, "grad_norm": 1.7057929039001465, "learning_rate": 0.0001, "loss": 0.0278, "step": 10450 }, { "epoch": 68.8157894736842, "grad_norm": 1.5500861406326294, "learning_rate": 0.0001, "loss": 0.0282, "step": 10460 }, { "epoch": 68.88157894736842, "grad_norm": 1.7019840478897095, "learning_rate": 0.0001, "loss": 0.0296, "step": 10470 }, { "epoch": 68.94736842105263, "grad_norm": 1.6525379419326782, "learning_rate": 0.0001, "loss": 0.0266, "step": 10480 }, { "epoch": 69.01315789473684, "grad_norm": 2.0071771144866943, "learning_rate": 0.0001, "loss": 0.029, "step": 10490 }, { "epoch": 69.07894736842105, "grad_norm": 2.170626640319824, "learning_rate": 0.0001, "loss": 0.0286, "step": 10500 }, { "epoch": 69.14473684210526, "grad_norm": 1.7918614149093628, "learning_rate": 0.0001, "loss": 0.0265, "step": 10510 }, { "epoch": 69.21052631578948, "grad_norm": 1.633731722831726, "learning_rate": 0.0001, "loss": 0.0277, "step": 10520 }, { "epoch": 69.27631578947368, "grad_norm": 1.8682153224945068, "learning_rate": 0.0001, "loss": 0.0268, "step": 10530 }, { "epoch": 69.34210526315789, "grad_norm": 1.8960853815078735, "learning_rate": 0.0001, "loss": 0.0255, "step": 10540 }, { "epoch": 69.40789473684211, "grad_norm": 1.7412015199661255, "learning_rate": 0.0001, "loss": 0.0267, "step": 10550 }, { "epoch": 69.47368421052632, "grad_norm": 1.7792584896087646, "learning_rate": 0.0001, "loss": 0.027, "step": 10560 }, { "epoch": 69.53947368421052, "grad_norm": 1.578399896621704, "learning_rate": 0.0001, "loss": 0.0274, "step": 10570 }, { "epoch": 69.60526315789474, "grad_norm": 1.7477600574493408, "learning_rate": 0.0001, "loss": 0.0257, "step": 10580 }, { "epoch": 69.67105263157895, "grad_norm": 1.8707166910171509, "learning_rate": 0.0001, "loss": 0.0264, "step": 10590 }, { "epoch": 69.73684210526316, "grad_norm": 1.953047275543213, "learning_rate": 0.0001, "loss": 0.0268, "step": 10600 }, { "epoch": 69.80263157894737, "grad_norm": 1.7453070878982544, "learning_rate": 0.0001, "loss": 0.0253, "step": 10610 }, { "epoch": 69.86842105263158, "grad_norm": 1.3676609992980957, "learning_rate": 0.0001, "loss": 0.0262, "step": 10620 }, { "epoch": 69.9342105263158, "grad_norm": 1.8007850646972656, "learning_rate": 0.0001, "loss": 0.0266, "step": 10630 }, { "epoch": 70.0, "grad_norm": 1.5974959135055542, "learning_rate": 0.0001, "loss": 0.0247, "step": 10640 }, { "epoch": 70.0657894736842, "grad_norm": 1.7148112058639526, "learning_rate": 0.0001, "loss": 0.0263, "step": 10650 }, { "epoch": 70.13157894736842, "grad_norm": 1.8458138704299927, "learning_rate": 0.0001, "loss": 0.0262, "step": 10660 }, { "epoch": 70.19736842105263, "grad_norm": 1.8393112421035767, "learning_rate": 0.0001, "loss": 0.0295, "step": 10670 }, { "epoch": 70.26315789473684, "grad_norm": 2.173353433609009, "learning_rate": 0.0001, "loss": 0.0265, "step": 10680 }, { "epoch": 70.32894736842105, "grad_norm": 1.9548739194869995, "learning_rate": 0.0001, "loss": 0.0265, "step": 10690 }, { "epoch": 70.39473684210526, "grad_norm": 2.02528715133667, "learning_rate": 0.0001, "loss": 0.0279, "step": 10700 }, { "epoch": 70.46052631578948, "grad_norm": 1.9165359735488892, "learning_rate": 0.0001, "loss": 0.0233, "step": 10710 }, { "epoch": 70.52631578947368, "grad_norm": 1.8980177640914917, "learning_rate": 0.0001, "loss": 0.0262, "step": 10720 }, { "epoch": 70.59210526315789, "grad_norm": 1.5036553144454956, "learning_rate": 0.0001, "loss": 0.0272, "step": 10730 }, { "epoch": 70.65789473684211, "grad_norm": 1.9608731269836426, "learning_rate": 0.0001, "loss": 0.027, "step": 10740 }, { "epoch": 70.72368421052632, "grad_norm": 1.665083885192871, "learning_rate": 0.0001, "loss": 0.0269, "step": 10750 }, { "epoch": 70.78947368421052, "grad_norm": 1.6562914848327637, "learning_rate": 0.0001, "loss": 0.0244, "step": 10760 }, { "epoch": 70.85526315789474, "grad_norm": 1.5882655382156372, "learning_rate": 0.0001, "loss": 0.0235, "step": 10770 }, { "epoch": 70.92105263157895, "grad_norm": 1.7006151676177979, "learning_rate": 0.0001, "loss": 0.0254, "step": 10780 }, { "epoch": 70.98684210526316, "grad_norm": 1.5319530963897705, "learning_rate": 0.0001, "loss": 0.0281, "step": 10790 }, { "epoch": 71.05263157894737, "grad_norm": 1.9043638706207275, "learning_rate": 0.0001, "loss": 0.0284, "step": 10800 }, { "epoch": 71.11842105263158, "grad_norm": 1.5125269889831543, "learning_rate": 0.0001, "loss": 0.0267, "step": 10810 }, { "epoch": 71.1842105263158, "grad_norm": 1.8229715824127197, "learning_rate": 0.0001, "loss": 0.0299, "step": 10820 }, { "epoch": 71.25, "grad_norm": 1.796139121055603, "learning_rate": 0.0001, "loss": 0.0264, "step": 10830 }, { "epoch": 71.3157894736842, "grad_norm": 1.8449422121047974, "learning_rate": 0.0001, "loss": 0.0278, "step": 10840 }, { "epoch": 71.38157894736842, "grad_norm": 1.7261402606964111, "learning_rate": 0.0001, "loss": 0.0287, "step": 10850 }, { "epoch": 71.44736842105263, "grad_norm": 1.5883243083953857, "learning_rate": 0.0001, "loss": 0.0298, "step": 10860 }, { "epoch": 71.51315789473684, "grad_norm": 1.3273059129714966, "learning_rate": 0.0001, "loss": 0.0277, "step": 10870 }, { "epoch": 71.57894736842105, "grad_norm": 1.3199368715286255, "learning_rate": 0.0001, "loss": 0.0254, "step": 10880 }, { "epoch": 71.64473684210526, "grad_norm": 1.3597865104675293, "learning_rate": 0.0001, "loss": 0.0278, "step": 10890 }, { "epoch": 71.71052631578948, "grad_norm": 1.6409027576446533, "learning_rate": 0.0001, "loss": 0.0274, "step": 10900 }, { "epoch": 71.77631578947368, "grad_norm": 1.7969907522201538, "learning_rate": 0.0001, "loss": 0.0265, "step": 10910 }, { "epoch": 71.84210526315789, "grad_norm": 1.7779749631881714, "learning_rate": 0.0001, "loss": 0.0285, "step": 10920 }, { "epoch": 71.90789473684211, "grad_norm": 1.8789730072021484, "learning_rate": 0.0001, "loss": 0.0281, "step": 10930 }, { "epoch": 71.97368421052632, "grad_norm": 1.832434058189392, "learning_rate": 0.0001, "loss": 0.0272, "step": 10940 }, { "epoch": 72.03947368421052, "grad_norm": 1.6766541004180908, "learning_rate": 0.0001, "loss": 0.0284, "step": 10950 }, { "epoch": 72.10526315789474, "grad_norm": 1.7368793487548828, "learning_rate": 0.0001, "loss": 0.0286, "step": 10960 }, { "epoch": 72.17105263157895, "grad_norm": 1.8270950317382812, "learning_rate": 0.0001, "loss": 0.0313, "step": 10970 }, { "epoch": 72.23684210526316, "grad_norm": 1.5050655603408813, "learning_rate": 0.0001, "loss": 0.0286, "step": 10980 }, { "epoch": 72.30263157894737, "grad_norm": 1.7033870220184326, "learning_rate": 0.0001, "loss": 0.0283, "step": 10990 }, { "epoch": 72.36842105263158, "grad_norm": 1.559373378753662, "learning_rate": 0.0001, "loss": 0.0272, "step": 11000 }, { "epoch": 72.4342105263158, "grad_norm": 1.8907803297042847, "learning_rate": 0.0001, "loss": 0.027, "step": 11010 }, { "epoch": 72.5, "grad_norm": 1.5523408651351929, "learning_rate": 0.0001, "loss": 0.0245, "step": 11020 }, { "epoch": 72.5657894736842, "grad_norm": 1.4263194799423218, "learning_rate": 0.0001, "loss": 0.0266, "step": 11030 }, { "epoch": 72.63157894736842, "grad_norm": 1.5710784196853638, "learning_rate": 0.0001, "loss": 0.0262, "step": 11040 }, { "epoch": 72.69736842105263, "grad_norm": 1.9038443565368652, "learning_rate": 0.0001, "loss": 0.0266, "step": 11050 }, { "epoch": 72.76315789473684, "grad_norm": 1.9687570333480835, "learning_rate": 0.0001, "loss": 0.0271, "step": 11060 }, { "epoch": 72.82894736842105, "grad_norm": 1.8713347911834717, "learning_rate": 0.0001, "loss": 0.0251, "step": 11070 }, { "epoch": 72.89473684210526, "grad_norm": 1.827601671218872, "learning_rate": 0.0001, "loss": 0.0265, "step": 11080 }, { "epoch": 72.96052631578948, "grad_norm": 1.7612308263778687, "learning_rate": 0.0001, "loss": 0.0269, "step": 11090 }, { "epoch": 73.02631578947368, "grad_norm": 1.6651338338851929, "learning_rate": 0.0001, "loss": 0.0265, "step": 11100 }, { "epoch": 73.09210526315789, "grad_norm": 1.7223801612854004, "learning_rate": 0.0001, "loss": 0.0278, "step": 11110 }, { "epoch": 73.15789473684211, "grad_norm": 2.2473108768463135, "learning_rate": 0.0001, "loss": 0.0256, "step": 11120 }, { "epoch": 73.22368421052632, "grad_norm": 1.6378623247146606, "learning_rate": 0.0001, "loss": 0.0283, "step": 11130 }, { "epoch": 73.28947368421052, "grad_norm": 1.7509886026382446, "learning_rate": 0.0001, "loss": 0.0274, "step": 11140 }, { "epoch": 73.35526315789474, "grad_norm": 1.7899543046951294, "learning_rate": 0.0001, "loss": 0.0263, "step": 11150 }, { "epoch": 73.42105263157895, "grad_norm": 1.7126433849334717, "learning_rate": 0.0001, "loss": 0.026, "step": 11160 }, { "epoch": 73.48684210526316, "grad_norm": 1.624640941619873, "learning_rate": 0.0001, "loss": 0.0258, "step": 11170 }, { "epoch": 73.55263157894737, "grad_norm": 1.6922892332077026, "learning_rate": 0.0001, "loss": 0.0261, "step": 11180 }, { "epoch": 73.61842105263158, "grad_norm": 1.6018348932266235, "learning_rate": 0.0001, "loss": 0.0265, "step": 11190 }, { "epoch": 73.6842105263158, "grad_norm": 1.7257733345031738, "learning_rate": 0.0001, "loss": 0.0293, "step": 11200 }, { "epoch": 73.75, "grad_norm": 1.5125445127487183, "learning_rate": 0.0001, "loss": 0.0274, "step": 11210 }, { "epoch": 73.8157894736842, "grad_norm": 2.309644937515259, "learning_rate": 0.0001, "loss": 0.0312, "step": 11220 }, { "epoch": 73.88157894736842, "grad_norm": 2.2980353832244873, "learning_rate": 0.0001, "loss": 0.028, "step": 11230 }, { "epoch": 73.94736842105263, "grad_norm": 2.155663251876831, "learning_rate": 0.0001, "loss": 0.0271, "step": 11240 }, { "epoch": 74.01315789473684, "grad_norm": 1.5754151344299316, "learning_rate": 0.0001, "loss": 0.0253, "step": 11250 }, { "epoch": 74.07894736842105, "grad_norm": 1.6232088804244995, "learning_rate": 0.0001, "loss": 0.0257, "step": 11260 }, { "epoch": 74.14473684210526, "grad_norm": 2.0059304237365723, "learning_rate": 0.0001, "loss": 0.0272, "step": 11270 }, { "epoch": 74.21052631578948, "grad_norm": 2.166524648666382, "learning_rate": 0.0001, "loss": 0.0241, "step": 11280 }, { "epoch": 74.27631578947368, "grad_norm": 1.8639678955078125, "learning_rate": 0.0001, "loss": 0.027, "step": 11290 }, { "epoch": 74.34210526315789, "grad_norm": 2.540382146835327, "learning_rate": 0.0001, "loss": 0.0269, "step": 11300 }, { "epoch": 74.40789473684211, "grad_norm": 1.8720356225967407, "learning_rate": 0.0001, "loss": 0.0251, "step": 11310 }, { "epoch": 74.47368421052632, "grad_norm": 1.6244946718215942, "learning_rate": 0.0001, "loss": 0.025, "step": 11320 }, { "epoch": 74.53947368421052, "grad_norm": 1.4546319246292114, "learning_rate": 0.0001, "loss": 0.0237, "step": 11330 }, { "epoch": 74.60526315789474, "grad_norm": 1.374340534210205, "learning_rate": 0.0001, "loss": 0.024, "step": 11340 }, { "epoch": 74.67105263157895, "grad_norm": 1.5473732948303223, "learning_rate": 0.0001, "loss": 0.0242, "step": 11350 }, { "epoch": 74.73684210526316, "grad_norm": 1.646817922592163, "learning_rate": 0.0001, "loss": 0.0263, "step": 11360 }, { "epoch": 74.80263157894737, "grad_norm": 1.6465305089950562, "learning_rate": 0.0001, "loss": 0.0283, "step": 11370 }, { "epoch": 74.86842105263158, "grad_norm": 1.6561059951782227, "learning_rate": 0.0001, "loss": 0.0256, "step": 11380 }, { "epoch": 74.9342105263158, "grad_norm": 1.7500685453414917, "learning_rate": 0.0001, "loss": 0.0276, "step": 11390 }, { "epoch": 75.0, "grad_norm": 1.3135260343551636, "learning_rate": 0.0001, "loss": 0.0262, "step": 11400 }, { "epoch": 75.0657894736842, "grad_norm": 1.6338132619857788, "learning_rate": 0.0001, "loss": 0.0259, "step": 11410 }, { "epoch": 75.13157894736842, "grad_norm": 1.5177695751190186, "learning_rate": 0.0001, "loss": 0.0293, "step": 11420 }, { "epoch": 75.19736842105263, "grad_norm": 1.5215574502944946, "learning_rate": 0.0001, "loss": 0.0261, "step": 11430 }, { "epoch": 75.26315789473684, "grad_norm": 1.4262157678604126, "learning_rate": 0.0001, "loss": 0.024, "step": 11440 }, { "epoch": 75.32894736842105, "grad_norm": 1.7465966939926147, "learning_rate": 0.0001, "loss": 0.0266, "step": 11450 }, { "epoch": 75.39473684210526, "grad_norm": 1.7781189680099487, "learning_rate": 0.0001, "loss": 0.0258, "step": 11460 }, { "epoch": 75.46052631578948, "grad_norm": 1.861718773841858, "learning_rate": 0.0001, "loss": 0.0265, "step": 11470 }, { "epoch": 75.52631578947368, "grad_norm": 1.34523606300354, "learning_rate": 0.0001, "loss": 0.0255, "step": 11480 }, { "epoch": 75.59210526315789, "grad_norm": 1.6540141105651855, "learning_rate": 0.0001, "loss": 0.0277, "step": 11490 }, { "epoch": 75.65789473684211, "grad_norm": 1.8050261735916138, "learning_rate": 0.0001, "loss": 0.0258, "step": 11500 }, { "epoch": 75.72368421052632, "grad_norm": 1.8646867275238037, "learning_rate": 0.0001, "loss": 0.0285, "step": 11510 }, { "epoch": 75.78947368421052, "grad_norm": 1.4578933715820312, "learning_rate": 0.0001, "loss": 0.0261, "step": 11520 }, { "epoch": 75.85526315789474, "grad_norm": 1.5991803407669067, "learning_rate": 0.0001, "loss": 0.0249, "step": 11530 }, { "epoch": 75.92105263157895, "grad_norm": 1.9933110475540161, "learning_rate": 0.0001, "loss": 0.025, "step": 11540 }, { "epoch": 75.98684210526316, "grad_norm": 1.5704941749572754, "learning_rate": 0.0001, "loss": 0.0263, "step": 11550 }, { "epoch": 76.05263157894737, "grad_norm": 1.813393235206604, "learning_rate": 0.0001, "loss": 0.0259, "step": 11560 }, { "epoch": 76.11842105263158, "grad_norm": 2.1795897483825684, "learning_rate": 0.0001, "loss": 0.0263, "step": 11570 }, { "epoch": 76.1842105263158, "grad_norm": 1.719844102859497, "learning_rate": 0.0001, "loss": 0.0257, "step": 11580 }, { "epoch": 76.25, "grad_norm": 1.964848518371582, "learning_rate": 0.0001, "loss": 0.0278, "step": 11590 }, { "epoch": 76.3157894736842, "grad_norm": 2.0262434482574463, "learning_rate": 0.0001, "loss": 0.0276, "step": 11600 }, { "epoch": 76.38157894736842, "grad_norm": 2.0098633766174316, "learning_rate": 0.0001, "loss": 0.0245, "step": 11610 }, { "epoch": 76.44736842105263, "grad_norm": 1.8454413414001465, "learning_rate": 0.0001, "loss": 0.0243, "step": 11620 }, { "epoch": 76.51315789473684, "grad_norm": 1.8662750720977783, "learning_rate": 0.0001, "loss": 0.0272, "step": 11630 }, { "epoch": 76.57894736842105, "grad_norm": 1.9370334148406982, "learning_rate": 0.0001, "loss": 0.0251, "step": 11640 }, { "epoch": 76.64473684210526, "grad_norm": 1.6803852319717407, "learning_rate": 0.0001, "loss": 0.0224, "step": 11650 }, { "epoch": 76.71052631578948, "grad_norm": 1.9592006206512451, "learning_rate": 0.0001, "loss": 0.0242, "step": 11660 }, { "epoch": 76.77631578947368, "grad_norm": 1.9452179670333862, "learning_rate": 0.0001, "loss": 0.0247, "step": 11670 }, { "epoch": 76.84210526315789, "grad_norm": 1.9002512693405151, "learning_rate": 0.0001, "loss": 0.0235, "step": 11680 }, { "epoch": 76.90789473684211, "grad_norm": 1.9148516654968262, "learning_rate": 0.0001, "loss": 0.0262, "step": 11690 }, { "epoch": 76.97368421052632, "grad_norm": 1.7390122413635254, "learning_rate": 0.0001, "loss": 0.0256, "step": 11700 }, { "epoch": 77.03947368421052, "grad_norm": 1.5007272958755493, "learning_rate": 0.0001, "loss": 0.0254, "step": 11710 }, { "epoch": 77.10526315789474, "grad_norm": 1.6768064498901367, "learning_rate": 0.0001, "loss": 0.0257, "step": 11720 }, { "epoch": 77.17105263157895, "grad_norm": 1.8090136051177979, "learning_rate": 0.0001, "loss": 0.0249, "step": 11730 }, { "epoch": 77.23684210526316, "grad_norm": 1.8841345310211182, "learning_rate": 0.0001, "loss": 0.0253, "step": 11740 }, { "epoch": 77.30263157894737, "grad_norm": 1.282132863998413, "learning_rate": 0.0001, "loss": 0.0235, "step": 11750 }, { "epoch": 77.36842105263158, "grad_norm": 1.8869783878326416, "learning_rate": 0.0001, "loss": 0.0253, "step": 11760 }, { "epoch": 77.4342105263158, "grad_norm": 1.700897216796875, "learning_rate": 0.0001, "loss": 0.0247, "step": 11770 }, { "epoch": 77.5, "grad_norm": 1.763547420501709, "learning_rate": 0.0001, "loss": 0.0229, "step": 11780 }, { "epoch": 77.5657894736842, "grad_norm": 1.661258339881897, "learning_rate": 0.0001, "loss": 0.0262, "step": 11790 }, { "epoch": 77.63157894736842, "grad_norm": 1.2386846542358398, "learning_rate": 0.0001, "loss": 0.0256, "step": 11800 }, { "epoch": 77.69736842105263, "grad_norm": 1.4917902946472168, "learning_rate": 0.0001, "loss": 0.0262, "step": 11810 }, { "epoch": 77.76315789473684, "grad_norm": 1.4909294843673706, "learning_rate": 0.0001, "loss": 0.0262, "step": 11820 }, { "epoch": 77.82894736842105, "grad_norm": 1.6174029111862183, "learning_rate": 0.0001, "loss": 0.0282, "step": 11830 }, { "epoch": 77.89473684210526, "grad_norm": 1.75030517578125, "learning_rate": 0.0001, "loss": 0.026, "step": 11840 }, { "epoch": 77.96052631578948, "grad_norm": 1.3512479066848755, "learning_rate": 0.0001, "loss": 0.0269, "step": 11850 }, { "epoch": 78.02631578947368, "grad_norm": 1.5821315050125122, "learning_rate": 0.0001, "loss": 0.028, "step": 11860 }, { "epoch": 78.09210526315789, "grad_norm": 1.8378440141677856, "learning_rate": 0.0001, "loss": 0.0252, "step": 11870 }, { "epoch": 78.15789473684211, "grad_norm": 1.536332130432129, "learning_rate": 0.0001, "loss": 0.0265, "step": 11880 }, { "epoch": 78.22368421052632, "grad_norm": 1.5582396984100342, "learning_rate": 0.0001, "loss": 0.0263, "step": 11890 }, { "epoch": 78.28947368421052, "grad_norm": 1.637697458267212, "learning_rate": 0.0001, "loss": 0.0269, "step": 11900 }, { "epoch": 78.35526315789474, "grad_norm": 1.7529138326644897, "learning_rate": 0.0001, "loss": 0.0248, "step": 11910 }, { "epoch": 78.42105263157895, "grad_norm": 1.9082077741622925, "learning_rate": 0.0001, "loss": 0.0257, "step": 11920 }, { "epoch": 78.48684210526316, "grad_norm": 1.5302765369415283, "learning_rate": 0.0001, "loss": 0.0248, "step": 11930 }, { "epoch": 78.55263157894737, "grad_norm": 1.9385207891464233, "learning_rate": 0.0001, "loss": 0.0298, "step": 11940 }, { "epoch": 78.61842105263158, "grad_norm": 1.6783729791641235, "learning_rate": 0.0001, "loss": 0.0251, "step": 11950 }, { "epoch": 78.6842105263158, "grad_norm": 1.7037731409072876, "learning_rate": 0.0001, "loss": 0.0251, "step": 11960 }, { "epoch": 78.75, "grad_norm": 1.3808066844940186, "learning_rate": 0.0001, "loss": 0.0273, "step": 11970 }, { "epoch": 78.8157894736842, "grad_norm": 1.4401555061340332, "learning_rate": 0.0001, "loss": 0.0261, "step": 11980 }, { "epoch": 78.88157894736842, "grad_norm": 1.8740235567092896, "learning_rate": 0.0001, "loss": 0.025, "step": 11990 }, { "epoch": 78.94736842105263, "grad_norm": 1.5552549362182617, "learning_rate": 0.0001, "loss": 0.0247, "step": 12000 }, { "epoch": 79.01315789473684, "grad_norm": 1.5589462518692017, "learning_rate": 0.0001, "loss": 0.0258, "step": 12010 }, { "epoch": 79.07894736842105, "grad_norm": 1.641662359237671, "learning_rate": 0.0001, "loss": 0.0243, "step": 12020 }, { "epoch": 79.14473684210526, "grad_norm": 1.6106501817703247, "learning_rate": 0.0001, "loss": 0.0244, "step": 12030 }, { "epoch": 79.21052631578948, "grad_norm": 1.9273698329925537, "learning_rate": 0.0001, "loss": 0.0263, "step": 12040 }, { "epoch": 79.27631578947368, "grad_norm": 1.7661243677139282, "learning_rate": 0.0001, "loss": 0.0236, "step": 12050 }, { "epoch": 79.34210526315789, "grad_norm": 1.7702202796936035, "learning_rate": 0.0001, "loss": 0.0248, "step": 12060 }, { "epoch": 79.40789473684211, "grad_norm": 1.2317006587982178, "learning_rate": 0.0001, "loss": 0.0221, "step": 12070 }, { "epoch": 79.47368421052632, "grad_norm": 1.6304638385772705, "learning_rate": 0.0001, "loss": 0.0262, "step": 12080 }, { "epoch": 79.53947368421052, "grad_norm": 1.764953851699829, "learning_rate": 0.0001, "loss": 0.0243, "step": 12090 }, { "epoch": 79.60526315789474, "grad_norm": 1.6987632513046265, "learning_rate": 0.0001, "loss": 0.0281, "step": 12100 }, { "epoch": 79.67105263157895, "grad_norm": 1.7687889337539673, "learning_rate": 0.0001, "loss": 0.0242, "step": 12110 }, { "epoch": 79.73684210526316, "grad_norm": 1.714768648147583, "learning_rate": 0.0001, "loss": 0.0261, "step": 12120 }, { "epoch": 79.80263157894737, "grad_norm": 1.5168180465698242, "learning_rate": 0.0001, "loss": 0.0238, "step": 12130 }, { "epoch": 79.86842105263158, "grad_norm": 1.6321282386779785, "learning_rate": 0.0001, "loss": 0.0252, "step": 12140 }, { "epoch": 79.9342105263158, "grad_norm": 1.8023918867111206, "learning_rate": 0.0001, "loss": 0.0248, "step": 12150 }, { "epoch": 80.0, "grad_norm": 1.475740909576416, "learning_rate": 0.0001, "loss": 0.026, "step": 12160 }, { "epoch": 80.0657894736842, "grad_norm": 1.8622595071792603, "learning_rate": 0.0001, "loss": 0.0236, "step": 12170 }, { "epoch": 80.13157894736842, "grad_norm": 1.6295769214630127, "learning_rate": 0.0001, "loss": 0.0232, "step": 12180 }, { "epoch": 80.19736842105263, "grad_norm": 1.6116666793823242, "learning_rate": 0.0001, "loss": 0.0242, "step": 12190 }, { "epoch": 80.26315789473684, "grad_norm": 1.599142074584961, "learning_rate": 0.0001, "loss": 0.026, "step": 12200 }, { "epoch": 80.32894736842105, "grad_norm": 1.5806418657302856, "learning_rate": 0.0001, "loss": 0.0259, "step": 12210 }, { "epoch": 80.39473684210526, "grad_norm": 1.4835152626037598, "learning_rate": 0.0001, "loss": 0.0276, "step": 12220 }, { "epoch": 80.46052631578948, "grad_norm": 1.2303491830825806, "learning_rate": 0.0001, "loss": 0.0274, "step": 12230 }, { "epoch": 80.52631578947368, "grad_norm": 1.4486466646194458, "learning_rate": 0.0001, "loss": 0.026, "step": 12240 }, { "epoch": 80.59210526315789, "grad_norm": 1.5165975093841553, "learning_rate": 0.0001, "loss": 0.028, "step": 12250 }, { "epoch": 80.65789473684211, "grad_norm": 1.4321902990341187, "learning_rate": 0.0001, "loss": 0.0262, "step": 12260 }, { "epoch": 80.72368421052632, "grad_norm": 1.3290172815322876, "learning_rate": 0.0001, "loss": 0.0284, "step": 12270 }, { "epoch": 80.78947368421052, "grad_norm": 1.4528899192810059, "learning_rate": 0.0001, "loss": 0.0272, "step": 12280 }, { "epoch": 80.85526315789474, "grad_norm": 1.4954471588134766, "learning_rate": 0.0001, "loss": 0.0263, "step": 12290 }, { "epoch": 80.92105263157895, "grad_norm": 1.686185359954834, "learning_rate": 0.0001, "loss": 0.0268, "step": 12300 }, { "epoch": 80.98684210526316, "grad_norm": 1.8050355911254883, "learning_rate": 0.0001, "loss": 0.0273, "step": 12310 }, { "epoch": 81.05263157894737, "grad_norm": 1.667278528213501, "learning_rate": 0.0001, "loss": 0.026, "step": 12320 }, { "epoch": 81.11842105263158, "grad_norm": 1.8313591480255127, "learning_rate": 0.0001, "loss": 0.0275, "step": 12330 }, { "epoch": 81.1842105263158, "grad_norm": 1.5865910053253174, "learning_rate": 0.0001, "loss": 0.0265, "step": 12340 }, { "epoch": 81.25, "grad_norm": 1.4617630243301392, "learning_rate": 0.0001, "loss": 0.0263, "step": 12350 }, { "epoch": 81.3157894736842, "grad_norm": 1.6927311420440674, "learning_rate": 0.0001, "loss": 0.023, "step": 12360 }, { "epoch": 81.38157894736842, "grad_norm": 1.7742199897766113, "learning_rate": 0.0001, "loss": 0.0249, "step": 12370 }, { "epoch": 81.44736842105263, "grad_norm": 1.5240896940231323, "learning_rate": 0.0001, "loss": 0.0273, "step": 12380 }, { "epoch": 81.51315789473684, "grad_norm": 1.980691909790039, "learning_rate": 0.0001, "loss": 0.0265, "step": 12390 }, { "epoch": 81.57894736842105, "grad_norm": 1.9031286239624023, "learning_rate": 0.0001, "loss": 0.0254, "step": 12400 }, { "epoch": 81.64473684210526, "grad_norm": 1.8978111743927002, "learning_rate": 0.0001, "loss": 0.026, "step": 12410 }, { "epoch": 81.71052631578948, "grad_norm": 2.1107935905456543, "learning_rate": 0.0001, "loss": 0.025, "step": 12420 }, { "epoch": 81.77631578947368, "grad_norm": 1.7692242860794067, "learning_rate": 0.0001, "loss": 0.024, "step": 12430 }, { "epoch": 81.84210526315789, "grad_norm": 1.9986506700515747, "learning_rate": 0.0001, "loss": 0.0238, "step": 12440 }, { "epoch": 81.90789473684211, "grad_norm": 2.076988935470581, "learning_rate": 0.0001, "loss": 0.0263, "step": 12450 }, { "epoch": 81.97368421052632, "grad_norm": 1.7011905908584595, "learning_rate": 0.0001, "loss": 0.0231, "step": 12460 }, { "epoch": 82.03947368421052, "grad_norm": 1.8305109739303589, "learning_rate": 0.0001, "loss": 0.0264, "step": 12470 }, { "epoch": 82.10526315789474, "grad_norm": 1.6795586347579956, "learning_rate": 0.0001, "loss": 0.0243, "step": 12480 }, { "epoch": 82.17105263157895, "grad_norm": 1.4467089176177979, "learning_rate": 0.0001, "loss": 0.0233, "step": 12490 }, { "epoch": 82.23684210526316, "grad_norm": 1.618145227432251, "learning_rate": 0.0001, "loss": 0.0258, "step": 12500 }, { "epoch": 82.30263157894737, "grad_norm": 1.5068567991256714, "learning_rate": 0.0001, "loss": 0.0247, "step": 12510 }, { "epoch": 82.36842105263158, "grad_norm": 1.7931199073791504, "learning_rate": 0.0001, "loss": 0.0242, "step": 12520 }, { "epoch": 82.4342105263158, "grad_norm": 1.7545214891433716, "learning_rate": 0.0001, "loss": 0.0248, "step": 12530 }, { "epoch": 82.5, "grad_norm": 1.677007794380188, "learning_rate": 0.0001, "loss": 0.0234, "step": 12540 }, { "epoch": 82.5657894736842, "grad_norm": 1.8432847261428833, "learning_rate": 0.0001, "loss": 0.0275, "step": 12550 }, { "epoch": 82.63157894736842, "grad_norm": 1.6245819330215454, "learning_rate": 0.0001, "loss": 0.0265, "step": 12560 }, { "epoch": 82.69736842105263, "grad_norm": 1.837830901145935, "learning_rate": 0.0001, "loss": 0.0268, "step": 12570 }, { "epoch": 82.76315789473684, "grad_norm": 1.7958941459655762, "learning_rate": 0.0001, "loss": 0.0256, "step": 12580 }, { "epoch": 82.82894736842105, "grad_norm": 1.5123578310012817, "learning_rate": 0.0001, "loss": 0.0246, "step": 12590 }, { "epoch": 82.89473684210526, "grad_norm": 1.5450695753097534, "learning_rate": 0.0001, "loss": 0.0254, "step": 12600 }, { "epoch": 82.96052631578948, "grad_norm": 1.3524283170700073, "learning_rate": 0.0001, "loss": 0.0238, "step": 12610 }, { "epoch": 83.02631578947368, "grad_norm": 1.588718295097351, "learning_rate": 0.0001, "loss": 0.0252, "step": 12620 }, { "epoch": 83.09210526315789, "grad_norm": 1.4721720218658447, "learning_rate": 0.0001, "loss": 0.0251, "step": 12630 }, { "epoch": 83.15789473684211, "grad_norm": 1.411702036857605, "learning_rate": 0.0001, "loss": 0.0253, "step": 12640 }, { "epoch": 83.22368421052632, "grad_norm": 1.5033427476882935, "learning_rate": 0.0001, "loss": 0.0268, "step": 12650 }, { "epoch": 83.28947368421052, "grad_norm": 1.7150914669036865, "learning_rate": 0.0001, "loss": 0.025, "step": 12660 }, { "epoch": 83.35526315789474, "grad_norm": 1.4532182216644287, "learning_rate": 0.0001, "loss": 0.0275, "step": 12670 }, { "epoch": 83.42105263157895, "grad_norm": 1.678092360496521, "learning_rate": 0.0001, "loss": 0.025, "step": 12680 }, { "epoch": 83.48684210526316, "grad_norm": 1.542473554611206, "learning_rate": 0.0001, "loss": 0.0255, "step": 12690 }, { "epoch": 83.55263157894737, "grad_norm": 1.6771373748779297, "learning_rate": 0.0001, "loss": 0.0255, "step": 12700 }, { "epoch": 83.61842105263158, "grad_norm": 1.5920690298080444, "learning_rate": 0.0001, "loss": 0.0237, "step": 12710 }, { "epoch": 83.6842105263158, "grad_norm": 1.29483962059021, "learning_rate": 0.0001, "loss": 0.0225, "step": 12720 }, { "epoch": 83.75, "grad_norm": 1.575247883796692, "learning_rate": 0.0001, "loss": 0.0241, "step": 12730 }, { "epoch": 83.8157894736842, "grad_norm": 1.4278053045272827, "learning_rate": 0.0001, "loss": 0.0263, "step": 12740 }, { "epoch": 83.88157894736842, "grad_norm": 1.0531659126281738, "learning_rate": 0.0001, "loss": 0.0252, "step": 12750 }, { "epoch": 83.94736842105263, "grad_norm": 1.2762305736541748, "learning_rate": 0.0001, "loss": 0.0248, "step": 12760 }, { "epoch": 84.01315789473684, "grad_norm": 1.573049545288086, "learning_rate": 0.0001, "loss": 0.0253, "step": 12770 }, { "epoch": 84.07894736842105, "grad_norm": 1.2428479194641113, "learning_rate": 0.0001, "loss": 0.0239, "step": 12780 }, { "epoch": 84.14473684210526, "grad_norm": 1.6974382400512695, "learning_rate": 0.0001, "loss": 0.0252, "step": 12790 }, { "epoch": 84.21052631578948, "grad_norm": 1.1859573125839233, "learning_rate": 0.0001, "loss": 0.024, "step": 12800 }, { "epoch": 84.27631578947368, "grad_norm": 1.5613579750061035, "learning_rate": 0.0001, "loss": 0.0242, "step": 12810 }, { "epoch": 84.34210526315789, "grad_norm": 1.359305739402771, "learning_rate": 0.0001, "loss": 0.025, "step": 12820 }, { "epoch": 84.40789473684211, "grad_norm": 1.6808149814605713, "learning_rate": 0.0001, "loss": 0.0237, "step": 12830 }, { "epoch": 84.47368421052632, "grad_norm": 1.8556708097457886, "learning_rate": 0.0001, "loss": 0.0263, "step": 12840 }, { "epoch": 84.53947368421052, "grad_norm": 1.8008233308792114, "learning_rate": 0.0001, "loss": 0.0242, "step": 12850 }, { "epoch": 84.60526315789474, "grad_norm": 1.5086830854415894, "learning_rate": 0.0001, "loss": 0.024, "step": 12860 }, { "epoch": 84.67105263157895, "grad_norm": 1.6331901550292969, "learning_rate": 0.0001, "loss": 0.0238, "step": 12870 }, { "epoch": 84.73684210526316, "grad_norm": 1.1920050382614136, "learning_rate": 0.0001, "loss": 0.0245, "step": 12880 }, { "epoch": 84.80263157894737, "grad_norm": 1.910048484802246, "learning_rate": 0.0001, "loss": 0.0237, "step": 12890 }, { "epoch": 84.86842105263158, "grad_norm": 1.4449541568756104, "learning_rate": 0.0001, "loss": 0.0265, "step": 12900 }, { "epoch": 84.9342105263158, "grad_norm": 1.3787635564804077, "learning_rate": 0.0001, "loss": 0.0251, "step": 12910 }, { "epoch": 85.0, "grad_norm": 1.3375296592712402, "learning_rate": 0.0001, "loss": 0.0247, "step": 12920 }, { "epoch": 85.0657894736842, "grad_norm": 1.44197416305542, "learning_rate": 0.0001, "loss": 0.0246, "step": 12930 }, { "epoch": 85.13157894736842, "grad_norm": 1.2884734869003296, "learning_rate": 0.0001, "loss": 0.0266, "step": 12940 }, { "epoch": 85.19736842105263, "grad_norm": 1.1492596864700317, "learning_rate": 0.0001, "loss": 0.0229, "step": 12950 }, { "epoch": 85.26315789473684, "grad_norm": 1.6913357973098755, "learning_rate": 0.0001, "loss": 0.0278, "step": 12960 }, { "epoch": 85.32894736842105, "grad_norm": 1.591509461402893, "learning_rate": 0.0001, "loss": 0.0254, "step": 12970 }, { "epoch": 85.39473684210526, "grad_norm": 1.9279398918151855, "learning_rate": 0.0001, "loss": 0.0256, "step": 12980 }, { "epoch": 85.46052631578948, "grad_norm": 1.8467780351638794, "learning_rate": 0.0001, "loss": 0.0237, "step": 12990 }, { "epoch": 85.52631578947368, "grad_norm": 1.6500190496444702, "learning_rate": 0.0001, "loss": 0.0242, "step": 13000 }, { "epoch": 85.59210526315789, "grad_norm": 2.0222458839416504, "learning_rate": 0.0001, "loss": 0.0217, "step": 13010 }, { "epoch": 85.65789473684211, "grad_norm": 1.445722222328186, "learning_rate": 0.0001, "loss": 0.0251, "step": 13020 }, { "epoch": 85.72368421052632, "grad_norm": 1.3406612873077393, "learning_rate": 0.0001, "loss": 0.0249, "step": 13030 }, { "epoch": 85.78947368421052, "grad_norm": 1.2125831842422485, "learning_rate": 0.0001, "loss": 0.0251, "step": 13040 }, { "epoch": 85.85526315789474, "grad_norm": 1.3329436779022217, "learning_rate": 0.0001, "loss": 0.0241, "step": 13050 }, { "epoch": 85.92105263157895, "grad_norm": 1.7026501893997192, "learning_rate": 0.0001, "loss": 0.0244, "step": 13060 }, { "epoch": 85.98684210526316, "grad_norm": 1.6232529878616333, "learning_rate": 0.0001, "loss": 0.0229, "step": 13070 }, { "epoch": 86.05263157894737, "grad_norm": 1.4894771575927734, "learning_rate": 0.0001, "loss": 0.0232, "step": 13080 }, { "epoch": 86.11842105263158, "grad_norm": 1.3056237697601318, "learning_rate": 0.0001, "loss": 0.0257, "step": 13090 }, { "epoch": 86.1842105263158, "grad_norm": 1.318498134613037, "learning_rate": 0.0001, "loss": 0.0248, "step": 13100 }, { "epoch": 86.25, "grad_norm": 1.4497151374816895, "learning_rate": 0.0001, "loss": 0.0243, "step": 13110 }, { "epoch": 86.3157894736842, "grad_norm": 1.6557412147521973, "learning_rate": 0.0001, "loss": 0.0249, "step": 13120 }, { "epoch": 86.38157894736842, "grad_norm": 1.569457769393921, "learning_rate": 0.0001, "loss": 0.0268, "step": 13130 }, { "epoch": 86.44736842105263, "grad_norm": 1.7060179710388184, "learning_rate": 0.0001, "loss": 0.0261, "step": 13140 }, { "epoch": 86.51315789473684, "grad_norm": 1.4572563171386719, "learning_rate": 0.0001, "loss": 0.0236, "step": 13150 }, { "epoch": 86.57894736842105, "grad_norm": 1.4536716938018799, "learning_rate": 0.0001, "loss": 0.024, "step": 13160 }, { "epoch": 86.64473684210526, "grad_norm": 1.6783726215362549, "learning_rate": 0.0001, "loss": 0.0227, "step": 13170 }, { "epoch": 86.71052631578948, "grad_norm": 1.4602693319320679, "learning_rate": 0.0001, "loss": 0.0249, "step": 13180 }, { "epoch": 86.77631578947368, "grad_norm": 1.6970704793930054, "learning_rate": 0.0001, "loss": 0.0241, "step": 13190 }, { "epoch": 86.84210526315789, "grad_norm": 1.3816075325012207, "learning_rate": 0.0001, "loss": 0.0243, "step": 13200 }, { "epoch": 86.90789473684211, "grad_norm": 1.6789470911026, "learning_rate": 0.0001, "loss": 0.0245, "step": 13210 }, { "epoch": 86.97368421052632, "grad_norm": 1.5384844541549683, "learning_rate": 0.0001, "loss": 0.0219, "step": 13220 }, { "epoch": 87.03947368421052, "grad_norm": 1.5757942199707031, "learning_rate": 0.0001, "loss": 0.0233, "step": 13230 }, { "epoch": 87.10526315789474, "grad_norm": 1.7311173677444458, "learning_rate": 0.0001, "loss": 0.0239, "step": 13240 }, { "epoch": 87.17105263157895, "grad_norm": 1.5452516078948975, "learning_rate": 0.0001, "loss": 0.0241, "step": 13250 }, { "epoch": 87.23684210526316, "grad_norm": 1.495526909828186, "learning_rate": 0.0001, "loss": 0.0244, "step": 13260 }, { "epoch": 87.30263157894737, "grad_norm": 1.5925183296203613, "learning_rate": 0.0001, "loss": 0.0229, "step": 13270 }, { "epoch": 87.36842105263158, "grad_norm": 1.58163321018219, "learning_rate": 0.0001, "loss": 0.0268, "step": 13280 }, { "epoch": 87.4342105263158, "grad_norm": 1.8590866327285767, "learning_rate": 0.0001, "loss": 0.0235, "step": 13290 }, { "epoch": 87.5, "grad_norm": 1.5774991512298584, "learning_rate": 0.0001, "loss": 0.024, "step": 13300 }, { "epoch": 87.5657894736842, "grad_norm": 1.7061364650726318, "learning_rate": 0.0001, "loss": 0.0236, "step": 13310 }, { "epoch": 87.63157894736842, "grad_norm": 1.4900331497192383, "learning_rate": 0.0001, "loss": 0.022, "step": 13320 }, { "epoch": 87.69736842105263, "grad_norm": 1.8623180389404297, "learning_rate": 0.0001, "loss": 0.025, "step": 13330 }, { "epoch": 87.76315789473684, "grad_norm": 1.6893125772476196, "learning_rate": 0.0001, "loss": 0.0243, "step": 13340 }, { "epoch": 87.82894736842105, "grad_norm": 1.8551734685897827, "learning_rate": 0.0001, "loss": 0.022, "step": 13350 }, { "epoch": 87.89473684210526, "grad_norm": 1.7966662645339966, "learning_rate": 0.0001, "loss": 0.0231, "step": 13360 }, { "epoch": 87.96052631578948, "grad_norm": 1.7751268148422241, "learning_rate": 0.0001, "loss": 0.0235, "step": 13370 }, { "epoch": 88.02631578947368, "grad_norm": 1.7870049476623535, "learning_rate": 0.0001, "loss": 0.0226, "step": 13380 }, { "epoch": 88.09210526315789, "grad_norm": 1.6607742309570312, "learning_rate": 0.0001, "loss": 0.0225, "step": 13390 }, { "epoch": 88.15789473684211, "grad_norm": 1.734802007675171, "learning_rate": 0.0001, "loss": 0.0207, "step": 13400 }, { "epoch": 88.22368421052632, "grad_norm": 2.1333210468292236, "learning_rate": 0.0001, "loss": 0.0222, "step": 13410 }, { "epoch": 88.28947368421052, "grad_norm": 1.473213791847229, "learning_rate": 0.0001, "loss": 0.0244, "step": 13420 }, { "epoch": 88.35526315789474, "grad_norm": 1.6745879650115967, "learning_rate": 0.0001, "loss": 0.0242, "step": 13430 }, { "epoch": 88.42105263157895, "grad_norm": 1.3163137435913086, "learning_rate": 0.0001, "loss": 0.0248, "step": 13440 }, { "epoch": 88.48684210526316, "grad_norm": 1.573905110359192, "learning_rate": 0.0001, "loss": 0.0266, "step": 13450 }, { "epoch": 88.55263157894737, "grad_norm": 1.672709345817566, "learning_rate": 0.0001, "loss": 0.0238, "step": 13460 }, { "epoch": 88.61842105263158, "grad_norm": 1.473910927772522, "learning_rate": 0.0001, "loss": 0.0262, "step": 13470 }, { "epoch": 88.6842105263158, "grad_norm": 1.4131242036819458, "learning_rate": 0.0001, "loss": 0.0248, "step": 13480 }, { "epoch": 88.75, "grad_norm": 1.6035963296890259, "learning_rate": 0.0001, "loss": 0.0227, "step": 13490 }, { "epoch": 88.8157894736842, "grad_norm": 1.489512324333191, "learning_rate": 0.0001, "loss": 0.0238, "step": 13500 }, { "epoch": 88.88157894736842, "grad_norm": 1.45292329788208, "learning_rate": 0.0001, "loss": 0.0223, "step": 13510 }, { "epoch": 88.94736842105263, "grad_norm": 1.6451809406280518, "learning_rate": 0.0001, "loss": 0.024, "step": 13520 }, { "epoch": 89.01315789473684, "grad_norm": 1.826170563697815, "learning_rate": 0.0001, "loss": 0.024, "step": 13530 }, { "epoch": 89.07894736842105, "grad_norm": 1.5260390043258667, "learning_rate": 0.0001, "loss": 0.0223, "step": 13540 }, { "epoch": 89.14473684210526, "grad_norm": 1.5344808101654053, "learning_rate": 0.0001, "loss": 0.0241, "step": 13550 }, { "epoch": 89.21052631578948, "grad_norm": 1.3127132654190063, "learning_rate": 0.0001, "loss": 0.023, "step": 13560 }, { "epoch": 89.27631578947368, "grad_norm": 1.71023428440094, "learning_rate": 0.0001, "loss": 0.0221, "step": 13570 }, { "epoch": 89.34210526315789, "grad_norm": 1.7097973823547363, "learning_rate": 0.0001, "loss": 0.0249, "step": 13580 }, { "epoch": 89.40789473684211, "grad_norm": 1.8148266077041626, "learning_rate": 0.0001, "loss": 0.0258, "step": 13590 }, { "epoch": 89.47368421052632, "grad_norm": 1.3944507837295532, "learning_rate": 0.0001, "loss": 0.0237, "step": 13600 }, { "epoch": 89.53947368421052, "grad_norm": 1.421279788017273, "learning_rate": 0.0001, "loss": 0.0234, "step": 13610 }, { "epoch": 89.60526315789474, "grad_norm": 1.6334819793701172, "learning_rate": 0.0001, "loss": 0.0257, "step": 13620 }, { "epoch": 89.67105263157895, "grad_norm": 1.9979926347732544, "learning_rate": 0.0001, "loss": 0.0249, "step": 13630 }, { "epoch": 89.73684210526316, "grad_norm": 1.5678966045379639, "learning_rate": 0.0001, "loss": 0.025, "step": 13640 }, { "epoch": 89.80263157894737, "grad_norm": 1.6031328439712524, "learning_rate": 0.0001, "loss": 0.0241, "step": 13650 }, { "epoch": 89.86842105263158, "grad_norm": 1.8089882135391235, "learning_rate": 0.0001, "loss": 0.0244, "step": 13660 }, { "epoch": 89.9342105263158, "grad_norm": 1.2982338666915894, "learning_rate": 0.0001, "loss": 0.0233, "step": 13670 }, { "epoch": 90.0, "grad_norm": 1.3500252962112427, "learning_rate": 0.0001, "loss": 0.025, "step": 13680 }, { "epoch": 90.0657894736842, "grad_norm": 1.7127699851989746, "learning_rate": 0.0001, "loss": 0.0221, "step": 13690 }, { "epoch": 90.13157894736842, "grad_norm": 1.1409753561019897, "learning_rate": 0.0001, "loss": 0.023, "step": 13700 }, { "epoch": 90.19736842105263, "grad_norm": 1.4835864305496216, "learning_rate": 0.0001, "loss": 0.0255, "step": 13710 }, { "epoch": 90.26315789473684, "grad_norm": 1.676373839378357, "learning_rate": 0.0001, "loss": 0.0236, "step": 13720 }, { "epoch": 90.32894736842105, "grad_norm": 1.247381329536438, "learning_rate": 0.0001, "loss": 0.0219, "step": 13730 }, { "epoch": 90.39473684210526, "grad_norm": 1.1744779348373413, "learning_rate": 0.0001, "loss": 0.0234, "step": 13740 }, { "epoch": 90.46052631578948, "grad_norm": 1.8650901317596436, "learning_rate": 0.0001, "loss": 0.025, "step": 13750 }, { "epoch": 90.52631578947368, "grad_norm": 1.7591873407363892, "learning_rate": 0.0001, "loss": 0.0249, "step": 13760 }, { "epoch": 90.59210526315789, "grad_norm": 1.6825592517852783, "learning_rate": 0.0001, "loss": 0.0221, "step": 13770 }, { "epoch": 90.65789473684211, "grad_norm": 1.285715937614441, "learning_rate": 0.0001, "loss": 0.0221, "step": 13780 }, { "epoch": 90.72368421052632, "grad_norm": 1.576466679573059, "learning_rate": 0.0001, "loss": 0.0256, "step": 13790 }, { "epoch": 90.78947368421052, "grad_norm": 1.6532832384109497, "learning_rate": 0.0001, "loss": 0.024, "step": 13800 }, { "epoch": 90.85526315789474, "grad_norm": 1.4761638641357422, "learning_rate": 0.0001, "loss": 0.023, "step": 13810 }, { "epoch": 90.92105263157895, "grad_norm": 1.490132451057434, "learning_rate": 0.0001, "loss": 0.0248, "step": 13820 }, { "epoch": 90.98684210526316, "grad_norm": 1.9364569187164307, "learning_rate": 0.0001, "loss": 0.0219, "step": 13830 }, { "epoch": 91.05263157894737, "grad_norm": 1.6803781986236572, "learning_rate": 0.0001, "loss": 0.0254, "step": 13840 }, { "epoch": 91.11842105263158, "grad_norm": 1.5090696811676025, "learning_rate": 0.0001, "loss": 0.024, "step": 13850 }, { "epoch": 91.1842105263158, "grad_norm": 1.7960859537124634, "learning_rate": 0.0001, "loss": 0.0252, "step": 13860 }, { "epoch": 91.25, "grad_norm": 1.7104315757751465, "learning_rate": 0.0001, "loss": 0.022, "step": 13870 }, { "epoch": 91.3157894736842, "grad_norm": 1.793373703956604, "learning_rate": 0.0001, "loss": 0.0236, "step": 13880 }, { "epoch": 91.38157894736842, "grad_norm": 2.0539634227752686, "learning_rate": 0.0001, "loss": 0.0248, "step": 13890 }, { "epoch": 91.44736842105263, "grad_norm": 2.1168859004974365, "learning_rate": 0.0001, "loss": 0.0233, "step": 13900 }, { "epoch": 91.51315789473684, "grad_norm": 1.8195390701293945, "learning_rate": 0.0001, "loss": 0.0236, "step": 13910 }, { "epoch": 91.57894736842105, "grad_norm": 1.714077353477478, "learning_rate": 0.0001, "loss": 0.0225, "step": 13920 }, { "epoch": 91.64473684210526, "grad_norm": 1.7049275636672974, "learning_rate": 0.0001, "loss": 0.0226, "step": 13930 }, { "epoch": 91.71052631578948, "grad_norm": 2.158938407897949, "learning_rate": 0.0001, "loss": 0.023, "step": 13940 }, { "epoch": 91.77631578947368, "grad_norm": 2.1923866271972656, "learning_rate": 0.0001, "loss": 0.0236, "step": 13950 }, { "epoch": 91.84210526315789, "grad_norm": 1.7512110471725464, "learning_rate": 0.0001, "loss": 0.0222, "step": 13960 }, { "epoch": 91.90789473684211, "grad_norm": 1.8669230937957764, "learning_rate": 0.0001, "loss": 0.023, "step": 13970 }, { "epoch": 91.97368421052632, "grad_norm": 1.6305128335952759, "learning_rate": 0.0001, "loss": 0.0225, "step": 13980 }, { "epoch": 92.03947368421052, "grad_norm": 1.5910475254058838, "learning_rate": 0.0001, "loss": 0.0229, "step": 13990 }, { "epoch": 92.10526315789474, "grad_norm": 1.5273504257202148, "learning_rate": 0.0001, "loss": 0.024, "step": 14000 }, { "epoch": 92.17105263157895, "grad_norm": 1.7885582447052002, "learning_rate": 0.0001, "loss": 0.0236, "step": 14010 }, { "epoch": 92.23684210526316, "grad_norm": 2.1910252571105957, "learning_rate": 0.0001, "loss": 0.0244, "step": 14020 }, { "epoch": 92.30263157894737, "grad_norm": 1.755102515220642, "learning_rate": 0.0001, "loss": 0.0253, "step": 14030 }, { "epoch": 92.36842105263158, "grad_norm": 1.6435348987579346, "learning_rate": 0.0001, "loss": 0.0266, "step": 14040 }, { "epoch": 92.4342105263158, "grad_norm": 2.323174238204956, "learning_rate": 0.0001, "loss": 0.0234, "step": 14050 }, { "epoch": 92.5, "grad_norm": 3.1456828117370605, "learning_rate": 0.0001, "loss": 0.0257, "step": 14060 }, { "epoch": 92.5657894736842, "grad_norm": 2.456490993499756, "learning_rate": 0.0001, "loss": 0.0248, "step": 14070 }, { "epoch": 92.63157894736842, "grad_norm": 1.9856231212615967, "learning_rate": 0.0001, "loss": 0.0221, "step": 14080 }, { "epoch": 92.69736842105263, "grad_norm": 1.8175798654556274, "learning_rate": 0.0001, "loss": 0.0202, "step": 14090 }, { "epoch": 92.76315789473684, "grad_norm": 1.905171513557434, "learning_rate": 0.0001, "loss": 0.0224, "step": 14100 }, { "epoch": 92.82894736842105, "grad_norm": 1.7334188222885132, "learning_rate": 0.0001, "loss": 0.0221, "step": 14110 }, { "epoch": 92.89473684210526, "grad_norm": 1.6266708374023438, "learning_rate": 0.0001, "loss": 0.0185, "step": 14120 }, { "epoch": 92.96052631578948, "grad_norm": 1.4972394704818726, "learning_rate": 0.0001, "loss": 0.0201, "step": 14130 }, { "epoch": 93.02631578947368, "grad_norm": 1.655664086341858, "learning_rate": 0.0001, "loss": 0.024, "step": 14140 }, { "epoch": 93.09210526315789, "grad_norm": 1.6464513540267944, "learning_rate": 0.0001, "loss": 0.0227, "step": 14150 }, { "epoch": 93.15789473684211, "grad_norm": 1.4033807516098022, "learning_rate": 0.0001, "loss": 0.0238, "step": 14160 }, { "epoch": 93.22368421052632, "grad_norm": 1.5057168006896973, "learning_rate": 0.0001, "loss": 0.0244, "step": 14170 }, { "epoch": 93.28947368421052, "grad_norm": 1.735034465789795, "learning_rate": 0.0001, "loss": 0.0223, "step": 14180 }, { "epoch": 93.35526315789474, "grad_norm": 1.6805418729782104, "learning_rate": 0.0001, "loss": 0.0248, "step": 14190 }, { "epoch": 93.42105263157895, "grad_norm": 1.3809490203857422, "learning_rate": 0.0001, "loss": 0.0243, "step": 14200 }, { "epoch": 93.48684210526316, "grad_norm": 1.5541129112243652, "learning_rate": 0.0001, "loss": 0.0241, "step": 14210 }, { "epoch": 93.55263157894737, "grad_norm": 1.6721612215042114, "learning_rate": 0.0001, "loss": 0.0226, "step": 14220 }, { "epoch": 93.61842105263158, "grad_norm": 1.4749832153320312, "learning_rate": 0.0001, "loss": 0.023, "step": 14230 }, { "epoch": 93.6842105263158, "grad_norm": 1.229355812072754, "learning_rate": 0.0001, "loss": 0.0221, "step": 14240 }, { "epoch": 93.75, "grad_norm": 1.3654001951217651, "learning_rate": 0.0001, "loss": 0.0249, "step": 14250 }, { "epoch": 93.8157894736842, "grad_norm": 1.3455557823181152, "learning_rate": 0.0001, "loss": 0.0245, "step": 14260 }, { "epoch": 93.88157894736842, "grad_norm": 1.2095595598220825, "learning_rate": 0.0001, "loss": 0.0233, "step": 14270 }, { "epoch": 93.94736842105263, "grad_norm": 1.2949072122573853, "learning_rate": 0.0001, "loss": 0.0252, "step": 14280 }, { "epoch": 94.01315789473684, "grad_norm": 1.7537165880203247, "learning_rate": 0.0001, "loss": 0.0238, "step": 14290 }, { "epoch": 94.07894736842105, "grad_norm": 1.4385466575622559, "learning_rate": 0.0001, "loss": 0.0282, "step": 14300 }, { "epoch": 94.14473684210526, "grad_norm": 1.1989967823028564, "learning_rate": 0.0001, "loss": 0.0238, "step": 14310 }, { "epoch": 94.21052631578948, "grad_norm": 1.478391408920288, "learning_rate": 0.0001, "loss": 0.0218, "step": 14320 }, { "epoch": 94.27631578947368, "grad_norm": 1.5552846193313599, "learning_rate": 0.0001, "loss": 0.0229, "step": 14330 }, { "epoch": 94.34210526315789, "grad_norm": 1.5184147357940674, "learning_rate": 0.0001, "loss": 0.0245, "step": 14340 }, { "epoch": 94.40789473684211, "grad_norm": 1.4583368301391602, "learning_rate": 0.0001, "loss": 0.0225, "step": 14350 }, { "epoch": 94.47368421052632, "grad_norm": 1.5319230556488037, "learning_rate": 0.0001, "loss": 0.0243, "step": 14360 }, { "epoch": 94.53947368421052, "grad_norm": 1.505807876586914, "learning_rate": 0.0001, "loss": 0.0216, "step": 14370 }, { "epoch": 94.60526315789474, "grad_norm": 1.2531390190124512, "learning_rate": 0.0001, "loss": 0.0243, "step": 14380 }, { "epoch": 94.67105263157895, "grad_norm": 1.9696604013442993, "learning_rate": 0.0001, "loss": 0.0247, "step": 14390 }, { "epoch": 94.73684210526316, "grad_norm": 1.7615699768066406, "learning_rate": 0.0001, "loss": 0.0225, "step": 14400 }, { "epoch": 94.80263157894737, "grad_norm": 1.9439195394515991, "learning_rate": 0.0001, "loss": 0.0255, "step": 14410 }, { "epoch": 94.86842105263158, "grad_norm": 2.943202018737793, "learning_rate": 0.0001, "loss": 0.0236, "step": 14420 }, { "epoch": 94.9342105263158, "grad_norm": 1.8300869464874268, "learning_rate": 0.0001, "loss": 0.0239, "step": 14430 }, { "epoch": 95.0, "grad_norm": 2.4264817237854004, "learning_rate": 0.0001, "loss": 0.0246, "step": 14440 }, { "epoch": 95.0657894736842, "grad_norm": 2.130911350250244, "learning_rate": 0.0001, "loss": 0.0236, "step": 14450 }, { "epoch": 95.13157894736842, "grad_norm": 2.132805109024048, "learning_rate": 0.0001, "loss": 0.0219, "step": 14460 }, { "epoch": 95.19736842105263, "grad_norm": 2.0448882579803467, "learning_rate": 0.0001, "loss": 0.0216, "step": 14470 }, { "epoch": 95.26315789473684, "grad_norm": 1.6117432117462158, "learning_rate": 0.0001, "loss": 0.0208, "step": 14480 }, { "epoch": 95.32894736842105, "grad_norm": 1.5426193475723267, "learning_rate": 0.0001, "loss": 0.0226, "step": 14490 }, { "epoch": 95.39473684210526, "grad_norm": 1.6238819360733032, "learning_rate": 0.0001, "loss": 0.0206, "step": 14500 }, { "epoch": 95.46052631578948, "grad_norm": 1.608115553855896, "learning_rate": 0.0001, "loss": 0.0236, "step": 14510 }, { "epoch": 95.52631578947368, "grad_norm": 1.6221609115600586, "learning_rate": 0.0001, "loss": 0.0223, "step": 14520 }, { "epoch": 95.59210526315789, "grad_norm": 1.772195816040039, "learning_rate": 0.0001, "loss": 0.0225, "step": 14530 }, { "epoch": 95.65789473684211, "grad_norm": 1.5958181619644165, "learning_rate": 0.0001, "loss": 0.0254, "step": 14540 }, { "epoch": 95.72368421052632, "grad_norm": 1.7433152198791504, "learning_rate": 0.0001, "loss": 0.0249, "step": 14550 }, { "epoch": 95.78947368421052, "grad_norm": 1.7404011487960815, "learning_rate": 0.0001, "loss": 0.0214, "step": 14560 }, { "epoch": 95.85526315789474, "grad_norm": 1.8106964826583862, "learning_rate": 0.0001, "loss": 0.0252, "step": 14570 }, { "epoch": 95.92105263157895, "grad_norm": 1.8747131824493408, "learning_rate": 0.0001, "loss": 0.0235, "step": 14580 }, { "epoch": 95.98684210526316, "grad_norm": 1.9325027465820312, "learning_rate": 0.0001, "loss": 0.0235, "step": 14590 }, { "epoch": 96.05263157894737, "grad_norm": 1.6936956644058228, "learning_rate": 0.0001, "loss": 0.023, "step": 14600 }, { "epoch": 96.11842105263158, "grad_norm": 1.6250704526901245, "learning_rate": 0.0001, "loss": 0.0235, "step": 14610 }, { "epoch": 96.1842105263158, "grad_norm": 2.4609410762786865, "learning_rate": 0.0001, "loss": 0.0249, "step": 14620 }, { "epoch": 96.25, "grad_norm": 2.147636890411377, "learning_rate": 0.0001, "loss": 0.0224, "step": 14630 }, { "epoch": 96.3157894736842, "grad_norm": 1.605124831199646, "learning_rate": 0.0001, "loss": 0.0213, "step": 14640 }, { "epoch": 96.38157894736842, "grad_norm": 1.8906571865081787, "learning_rate": 0.0001, "loss": 0.0232, "step": 14650 }, { "epoch": 96.44736842105263, "grad_norm": 1.9277275800704956, "learning_rate": 0.0001, "loss": 0.0216, "step": 14660 }, { "epoch": 96.51315789473684, "grad_norm": 1.7315579652786255, "learning_rate": 0.0001, "loss": 0.0215, "step": 14670 }, { "epoch": 96.57894736842105, "grad_norm": 1.8327442407608032, "learning_rate": 0.0001, "loss": 0.0236, "step": 14680 }, { "epoch": 96.64473684210526, "grad_norm": 1.5039429664611816, "learning_rate": 0.0001, "loss": 0.0231, "step": 14690 }, { "epoch": 96.71052631578948, "grad_norm": 1.958951473236084, "learning_rate": 0.0001, "loss": 0.0229, "step": 14700 }, { "epoch": 96.77631578947368, "grad_norm": 1.7764480113983154, "learning_rate": 0.0001, "loss": 0.0222, "step": 14710 }, { "epoch": 96.84210526315789, "grad_norm": 1.4950709342956543, "learning_rate": 0.0001, "loss": 0.0243, "step": 14720 }, { "epoch": 96.90789473684211, "grad_norm": 2.5329701900482178, "learning_rate": 0.0001, "loss": 0.0242, "step": 14730 }, { "epoch": 96.97368421052632, "grad_norm": 1.7658509016036987, "learning_rate": 0.0001, "loss": 0.0226, "step": 14740 }, { "epoch": 97.03947368421052, "grad_norm": 1.8286138772964478, "learning_rate": 0.0001, "loss": 0.0239, "step": 14750 }, { "epoch": 97.10526315789474, "grad_norm": 1.5775742530822754, "learning_rate": 0.0001, "loss": 0.0227, "step": 14760 }, { "epoch": 97.17105263157895, "grad_norm": 1.4077131748199463, "learning_rate": 0.0001, "loss": 0.0218, "step": 14770 }, { "epoch": 97.23684210526316, "grad_norm": 1.6287063360214233, "learning_rate": 0.0001, "loss": 0.0229, "step": 14780 }, { "epoch": 97.30263157894737, "grad_norm": 1.9090032577514648, "learning_rate": 0.0001, "loss": 0.0231, "step": 14790 }, { "epoch": 97.36842105263158, "grad_norm": 1.7820619344711304, "learning_rate": 0.0001, "loss": 0.0213, "step": 14800 }, { "epoch": 97.4342105263158, "grad_norm": 1.4791796207427979, "learning_rate": 0.0001, "loss": 0.0237, "step": 14810 }, { "epoch": 97.5, "grad_norm": 1.6727066040039062, "learning_rate": 0.0001, "loss": 0.0229, "step": 14820 }, { "epoch": 97.5657894736842, "grad_norm": 1.720504641532898, "learning_rate": 0.0001, "loss": 0.0191, "step": 14830 }, { "epoch": 97.63157894736842, "grad_norm": 2.1600022315979004, "learning_rate": 0.0001, "loss": 0.0229, "step": 14840 }, { "epoch": 97.69736842105263, "grad_norm": 2.139911413192749, "learning_rate": 0.0001, "loss": 0.0218, "step": 14850 }, { "epoch": 97.76315789473684, "grad_norm": 1.762117624282837, "learning_rate": 0.0001, "loss": 0.021, "step": 14860 }, { "epoch": 97.82894736842105, "grad_norm": 1.6421223878860474, "learning_rate": 0.0001, "loss": 0.0193, "step": 14870 }, { "epoch": 97.89473684210526, "grad_norm": 1.2390879392623901, "learning_rate": 0.0001, "loss": 0.0215, "step": 14880 }, { "epoch": 97.96052631578948, "grad_norm": 1.3071866035461426, "learning_rate": 0.0001, "loss": 0.0218, "step": 14890 }, { "epoch": 98.02631578947368, "grad_norm": 1.6224156618118286, "learning_rate": 0.0001, "loss": 0.0208, "step": 14900 }, { "epoch": 98.09210526315789, "grad_norm": 1.703869342803955, "learning_rate": 0.0001, "loss": 0.0232, "step": 14910 }, { "epoch": 98.15789473684211, "grad_norm": 1.475963830947876, "learning_rate": 0.0001, "loss": 0.0212, "step": 14920 }, { "epoch": 98.22368421052632, "grad_norm": 1.451790690422058, "learning_rate": 0.0001, "loss": 0.0217, "step": 14930 }, { "epoch": 98.28947368421052, "grad_norm": 1.3597410917282104, "learning_rate": 0.0001, "loss": 0.0225, "step": 14940 }, { "epoch": 98.35526315789474, "grad_norm": 1.047415018081665, "learning_rate": 0.0001, "loss": 0.0212, "step": 14950 }, { "epoch": 98.42105263157895, "grad_norm": 1.6275583505630493, "learning_rate": 0.0001, "loss": 0.0233, "step": 14960 }, { "epoch": 98.48684210526316, "grad_norm": 1.3515644073486328, "learning_rate": 0.0001, "loss": 0.0232, "step": 14970 }, { "epoch": 98.55263157894737, "grad_norm": 1.5167436599731445, "learning_rate": 0.0001, "loss": 0.0245, "step": 14980 }, { "epoch": 98.61842105263158, "grad_norm": 1.5004242658615112, "learning_rate": 0.0001, "loss": 0.0246, "step": 14990 }, { "epoch": 98.6842105263158, "grad_norm": 1.2739217281341553, "learning_rate": 0.0001, "loss": 0.0234, "step": 15000 }, { "epoch": 98.75, "grad_norm": 1.6664555072784424, "learning_rate": 0.0001, "loss": 0.0252, "step": 15010 }, { "epoch": 98.8157894736842, "grad_norm": 1.635536789894104, "learning_rate": 0.0001, "loss": 0.0245, "step": 15020 }, { "epoch": 98.88157894736842, "grad_norm": 1.3948959112167358, "learning_rate": 0.0001, "loss": 0.023, "step": 15030 }, { "epoch": 98.94736842105263, "grad_norm": 1.6810851097106934, "learning_rate": 0.0001, "loss": 0.0249, "step": 15040 }, { "epoch": 99.01315789473684, "grad_norm": 1.4952811002731323, "learning_rate": 0.0001, "loss": 0.024, "step": 15050 }, { "epoch": 99.07894736842105, "grad_norm": 1.319809913635254, "learning_rate": 0.0001, "loss": 0.023, "step": 15060 }, { "epoch": 99.14473684210526, "grad_norm": 1.7210019826889038, "learning_rate": 0.0001, "loss": 0.0233, "step": 15070 }, { "epoch": 99.21052631578948, "grad_norm": 1.4392873048782349, "learning_rate": 0.0001, "loss": 0.0234, "step": 15080 }, { "epoch": 99.27631578947368, "grad_norm": 1.3925666809082031, "learning_rate": 0.0001, "loss": 0.0235, "step": 15090 }, { "epoch": 99.34210526315789, "grad_norm": 1.5854908227920532, "learning_rate": 0.0001, "loss": 0.0234, "step": 15100 }, { "epoch": 99.40789473684211, "grad_norm": 1.3266748189926147, "learning_rate": 0.0001, "loss": 0.0228, "step": 15110 }, { "epoch": 99.47368421052632, "grad_norm": 1.3466399908065796, "learning_rate": 0.0001, "loss": 0.0261, "step": 15120 }, { "epoch": 99.53947368421052, "grad_norm": 1.1351549625396729, "learning_rate": 0.0001, "loss": 0.0236, "step": 15130 }, { "epoch": 99.60526315789474, "grad_norm": 1.2604902982711792, "learning_rate": 0.0001, "loss": 0.0239, "step": 15140 }, { "epoch": 99.67105263157895, "grad_norm": 1.4526969194412231, "learning_rate": 0.0001, "loss": 0.0282, "step": 15150 }, { "epoch": 99.73684210526316, "grad_norm": 1.7058517932891846, "learning_rate": 0.0001, "loss": 0.0252, "step": 15160 }, { "epoch": 99.80263157894737, "grad_norm": 1.7141302824020386, "learning_rate": 0.0001, "loss": 0.0235, "step": 15170 }, { "epoch": 99.86842105263158, "grad_norm": 1.6191295385360718, "learning_rate": 0.0001, "loss": 0.0247, "step": 15180 }, { "epoch": 99.9342105263158, "grad_norm": 1.258483648300171, "learning_rate": 0.0001, "loss": 0.0236, "step": 15190 }, { "epoch": 100.0, "grad_norm": 1.4220373630523682, "learning_rate": 0.0001, "loss": 0.0234, "step": 15200 }, { "epoch": 100.0657894736842, "grad_norm": 1.295554518699646, "learning_rate": 0.0001, "loss": 0.0235, "step": 15210 }, { "epoch": 100.13157894736842, "grad_norm": 1.5223777294158936, "learning_rate": 0.0001, "loss": 0.0235, "step": 15220 }, { "epoch": 100.19736842105263, "grad_norm": 0.9295150637626648, "learning_rate": 0.0001, "loss": 0.0232, "step": 15230 }, { "epoch": 100.26315789473684, "grad_norm": 1.522321105003357, "learning_rate": 0.0001, "loss": 0.023, "step": 15240 }, { "epoch": 100.32894736842105, "grad_norm": 1.380570650100708, "learning_rate": 0.0001, "loss": 0.0234, "step": 15250 }, { "epoch": 100.39473684210526, "grad_norm": 1.745607614517212, "learning_rate": 0.0001, "loss": 0.0237, "step": 15260 }, { "epoch": 100.46052631578948, "grad_norm": 1.6587855815887451, "learning_rate": 0.0001, "loss": 0.0236, "step": 15270 }, { "epoch": 100.52631578947368, "grad_norm": 1.592822551727295, "learning_rate": 0.0001, "loss": 0.0232, "step": 15280 }, { "epoch": 100.59210526315789, "grad_norm": 1.649880290031433, "learning_rate": 0.0001, "loss": 0.0226, "step": 15290 }, { "epoch": 100.65789473684211, "grad_norm": 1.350723385810852, "learning_rate": 0.0001, "loss": 0.0236, "step": 15300 }, { "epoch": 100.72368421052632, "grad_norm": 1.5590640306472778, "learning_rate": 0.0001, "loss": 0.0244, "step": 15310 }, { "epoch": 100.78947368421052, "grad_norm": 1.4502315521240234, "learning_rate": 0.0001, "loss": 0.0237, "step": 15320 }, { "epoch": 100.85526315789474, "grad_norm": 1.206421136856079, "learning_rate": 0.0001, "loss": 0.0256, "step": 15330 }, { "epoch": 100.92105263157895, "grad_norm": 1.6681444644927979, "learning_rate": 0.0001, "loss": 0.0237, "step": 15340 }, { "epoch": 100.98684210526316, "grad_norm": 1.7128868103027344, "learning_rate": 0.0001, "loss": 0.0277, "step": 15350 }, { "epoch": 101.05263157894737, "grad_norm": 1.7957628965377808, "learning_rate": 0.0001, "loss": 0.0261, "step": 15360 }, { "epoch": 101.11842105263158, "grad_norm": 1.8973162174224854, "learning_rate": 0.0001, "loss": 0.0247, "step": 15370 }, { "epoch": 101.1842105263158, "grad_norm": 1.4725027084350586, "learning_rate": 0.0001, "loss": 0.024, "step": 15380 }, { "epoch": 101.25, "grad_norm": 1.2609061002731323, "learning_rate": 0.0001, "loss": 0.0253, "step": 15390 }, { "epoch": 101.3157894736842, "grad_norm": 1.3644044399261475, "learning_rate": 0.0001, "loss": 0.0218, "step": 15400 }, { "epoch": 101.38157894736842, "grad_norm": 1.4843460321426392, "learning_rate": 0.0001, "loss": 0.0232, "step": 15410 }, { "epoch": 101.44736842105263, "grad_norm": 1.5529197454452515, "learning_rate": 0.0001, "loss": 0.0215, "step": 15420 }, { "epoch": 101.51315789473684, "grad_norm": 1.5046693086624146, "learning_rate": 0.0001, "loss": 0.0243, "step": 15430 }, { "epoch": 101.57894736842105, "grad_norm": 1.6078213453292847, "learning_rate": 0.0001, "loss": 0.0223, "step": 15440 }, { "epoch": 101.64473684210526, "grad_norm": 1.9121206998825073, "learning_rate": 0.0001, "loss": 0.0241, "step": 15450 }, { "epoch": 101.71052631578948, "grad_norm": 1.69439697265625, "learning_rate": 0.0001, "loss": 0.0215, "step": 15460 }, { "epoch": 101.77631578947368, "grad_norm": 1.3743191957473755, "learning_rate": 0.0001, "loss": 0.0217, "step": 15470 }, { "epoch": 101.84210526315789, "grad_norm": 1.4057716131210327, "learning_rate": 0.0001, "loss": 0.0238, "step": 15480 }, { "epoch": 101.90789473684211, "grad_norm": 1.2741807699203491, "learning_rate": 0.0001, "loss": 0.0246, "step": 15490 }, { "epoch": 101.97368421052632, "grad_norm": 1.209885597229004, "learning_rate": 0.0001, "loss": 0.024, "step": 15500 }, { "epoch": 102.03947368421052, "grad_norm": 1.3488867282867432, "learning_rate": 0.0001, "loss": 0.024, "step": 15510 }, { "epoch": 102.10526315789474, "grad_norm": 1.4938287734985352, "learning_rate": 0.0001, "loss": 0.025, "step": 15520 }, { "epoch": 102.17105263157895, "grad_norm": 1.3326101303100586, "learning_rate": 0.0001, "loss": 0.0245, "step": 15530 }, { "epoch": 102.23684210526316, "grad_norm": 1.462449550628662, "learning_rate": 0.0001, "loss": 0.0244, "step": 15540 }, { "epoch": 102.30263157894737, "grad_norm": 1.2210637331008911, "learning_rate": 0.0001, "loss": 0.0244, "step": 15550 }, { "epoch": 102.36842105263158, "grad_norm": 1.4737353324890137, "learning_rate": 0.0001, "loss": 0.0225, "step": 15560 }, { "epoch": 102.4342105263158, "grad_norm": 1.4356292486190796, "learning_rate": 0.0001, "loss": 0.0237, "step": 15570 }, { "epoch": 102.5, "grad_norm": 1.472893238067627, "learning_rate": 0.0001, "loss": 0.0218, "step": 15580 }, { "epoch": 102.5657894736842, "grad_norm": 1.5573779344558716, "learning_rate": 0.0001, "loss": 0.0214, "step": 15590 }, { "epoch": 102.63157894736842, "grad_norm": 1.5440502166748047, "learning_rate": 0.0001, "loss": 0.024, "step": 15600 }, { "epoch": 102.69736842105263, "grad_norm": 1.5142933130264282, "learning_rate": 0.0001, "loss": 0.0224, "step": 15610 }, { "epoch": 102.76315789473684, "grad_norm": 1.6982897520065308, "learning_rate": 0.0001, "loss": 0.0217, "step": 15620 }, { "epoch": 102.82894736842105, "grad_norm": 1.8263667821884155, "learning_rate": 0.0001, "loss": 0.0208, "step": 15630 }, { "epoch": 102.89473684210526, "grad_norm": 1.2521591186523438, "learning_rate": 0.0001, "loss": 0.0246, "step": 15640 }, { "epoch": 102.96052631578948, "grad_norm": 1.5161454677581787, "learning_rate": 0.0001, "loss": 0.0217, "step": 15650 }, { "epoch": 103.02631578947368, "grad_norm": 1.4727702140808105, "learning_rate": 0.0001, "loss": 0.0224, "step": 15660 }, { "epoch": 103.09210526315789, "grad_norm": 1.3148939609527588, "learning_rate": 0.0001, "loss": 0.0221, "step": 15670 }, { "epoch": 103.15789473684211, "grad_norm": 1.3606066703796387, "learning_rate": 0.0001, "loss": 0.0221, "step": 15680 }, { "epoch": 103.22368421052632, "grad_norm": 1.4023685455322266, "learning_rate": 0.0001, "loss": 0.0225, "step": 15690 }, { "epoch": 103.28947368421052, "grad_norm": 1.424401879310608, "learning_rate": 0.0001, "loss": 0.0208, "step": 15700 }, { "epoch": 103.35526315789474, "grad_norm": 1.1225404739379883, "learning_rate": 0.0001, "loss": 0.0228, "step": 15710 }, { "epoch": 103.42105263157895, "grad_norm": 1.4364515542984009, "learning_rate": 0.0001, "loss": 0.0202, "step": 15720 }, { "epoch": 103.48684210526316, "grad_norm": 1.8251726627349854, "learning_rate": 0.0001, "loss": 0.025, "step": 15730 }, { "epoch": 103.55263157894737, "grad_norm": 1.6310752630233765, "learning_rate": 0.0001, "loss": 0.0222, "step": 15740 }, { "epoch": 103.61842105263158, "grad_norm": 1.3311368227005005, "learning_rate": 0.0001, "loss": 0.0229, "step": 15750 }, { "epoch": 103.6842105263158, "grad_norm": 1.7061165571212769, "learning_rate": 0.0001, "loss": 0.0233, "step": 15760 }, { "epoch": 103.75, "grad_norm": 1.737336277961731, "learning_rate": 0.0001, "loss": 0.0231, "step": 15770 }, { "epoch": 103.8157894736842, "grad_norm": 1.429793119430542, "learning_rate": 0.0001, "loss": 0.0224, "step": 15780 }, { "epoch": 103.88157894736842, "grad_norm": 1.2859523296356201, "learning_rate": 0.0001, "loss": 0.0249, "step": 15790 }, { "epoch": 103.94736842105263, "grad_norm": 1.5139811038970947, "learning_rate": 0.0001, "loss": 0.0203, "step": 15800 }, { "epoch": 104.01315789473684, "grad_norm": 1.5388349294662476, "learning_rate": 0.0001, "loss": 0.0227, "step": 15810 }, { "epoch": 104.07894736842105, "grad_norm": 1.2440307140350342, "learning_rate": 0.0001, "loss": 0.021, "step": 15820 }, { "epoch": 104.14473684210526, "grad_norm": 1.590995192527771, "learning_rate": 0.0001, "loss": 0.0219, "step": 15830 }, { "epoch": 104.21052631578948, "grad_norm": 1.5027881860733032, "learning_rate": 0.0001, "loss": 0.0227, "step": 15840 }, { "epoch": 104.27631578947368, "grad_norm": 1.2226101160049438, "learning_rate": 0.0001, "loss": 0.0244, "step": 15850 }, { "epoch": 104.34210526315789, "grad_norm": 1.3308379650115967, "learning_rate": 0.0001, "loss": 0.0227, "step": 15860 }, { "epoch": 104.40789473684211, "grad_norm": 1.2079100608825684, "learning_rate": 0.0001, "loss": 0.0218, "step": 15870 }, { "epoch": 104.47368421052632, "grad_norm": 1.1981853246688843, "learning_rate": 0.0001, "loss": 0.0221, "step": 15880 }, { "epoch": 104.53947368421052, "grad_norm": 1.3664216995239258, "learning_rate": 0.0001, "loss": 0.022, "step": 15890 }, { "epoch": 104.60526315789474, "grad_norm": 1.8628100156784058, "learning_rate": 0.0001, "loss": 0.0234, "step": 15900 }, { "epoch": 104.67105263157895, "grad_norm": 1.2317039966583252, "learning_rate": 0.0001, "loss": 0.0255, "step": 15910 }, { "epoch": 104.73684210526316, "grad_norm": 1.513042688369751, "learning_rate": 0.0001, "loss": 0.0236, "step": 15920 }, { "epoch": 104.80263157894737, "grad_norm": 1.6402066946029663, "learning_rate": 0.0001, "loss": 0.0226, "step": 15930 }, { "epoch": 104.86842105263158, "grad_norm": 1.7423036098480225, "learning_rate": 0.0001, "loss": 0.0221, "step": 15940 }, { "epoch": 104.9342105263158, "grad_norm": 1.2021863460540771, "learning_rate": 0.0001, "loss": 0.022, "step": 15950 }, { "epoch": 105.0, "grad_norm": 1.5771297216415405, "learning_rate": 0.0001, "loss": 0.0243, "step": 15960 }, { "epoch": 105.0657894736842, "grad_norm": 1.2136021852493286, "learning_rate": 0.0001, "loss": 0.0233, "step": 15970 }, { "epoch": 105.13157894736842, "grad_norm": 1.7530934810638428, "learning_rate": 0.0001, "loss": 0.0232, "step": 15980 }, { "epoch": 105.19736842105263, "grad_norm": 1.727021336555481, "learning_rate": 0.0001, "loss": 0.0202, "step": 15990 }, { "epoch": 105.26315789473684, "grad_norm": 1.689032793045044, "learning_rate": 0.0001, "loss": 0.0233, "step": 16000 }, { "epoch": 105.32894736842105, "grad_norm": 2.0343551635742188, "learning_rate": 0.0001, "loss": 0.0215, "step": 16010 }, { "epoch": 105.39473684210526, "grad_norm": 1.865867257118225, "learning_rate": 0.0001, "loss": 0.0246, "step": 16020 }, { "epoch": 105.46052631578948, "grad_norm": 1.5226266384124756, "learning_rate": 0.0001, "loss": 0.0223, "step": 16030 }, { "epoch": 105.52631578947368, "grad_norm": 1.8167226314544678, "learning_rate": 0.0001, "loss": 0.0231, "step": 16040 }, { "epoch": 105.59210526315789, "grad_norm": 1.2684314250946045, "learning_rate": 0.0001, "loss": 0.0226, "step": 16050 }, { "epoch": 105.65789473684211, "grad_norm": 1.7727292776107788, "learning_rate": 0.0001, "loss": 0.0221, "step": 16060 }, { "epoch": 105.72368421052632, "grad_norm": 1.3268436193466187, "learning_rate": 0.0001, "loss": 0.0225, "step": 16070 }, { "epoch": 105.78947368421052, "grad_norm": 1.5590864419937134, "learning_rate": 0.0001, "loss": 0.0207, "step": 16080 }, { "epoch": 105.85526315789474, "grad_norm": 1.4346693754196167, "learning_rate": 0.0001, "loss": 0.0203, "step": 16090 }, { "epoch": 105.92105263157895, "grad_norm": 1.5737860202789307, "learning_rate": 0.0001, "loss": 0.021, "step": 16100 }, { "epoch": 105.98684210526316, "grad_norm": 1.5371108055114746, "learning_rate": 0.0001, "loss": 0.0228, "step": 16110 }, { "epoch": 106.05263157894737, "grad_norm": 1.2796305418014526, "learning_rate": 0.0001, "loss": 0.0222, "step": 16120 }, { "epoch": 106.11842105263158, "grad_norm": 1.506816029548645, "learning_rate": 0.0001, "loss": 0.0211, "step": 16130 }, { "epoch": 106.1842105263158, "grad_norm": 1.4355484247207642, "learning_rate": 0.0001, "loss": 0.0221, "step": 16140 }, { "epoch": 106.25, "grad_norm": 1.6774417161941528, "learning_rate": 0.0001, "loss": 0.022, "step": 16150 }, { "epoch": 106.3157894736842, "grad_norm": 1.5477979183197021, "learning_rate": 0.0001, "loss": 0.0215, "step": 16160 }, { "epoch": 106.38157894736842, "grad_norm": 1.6077028512954712, "learning_rate": 0.0001, "loss": 0.0212, "step": 16170 }, { "epoch": 106.44736842105263, "grad_norm": 1.9327597618103027, "learning_rate": 0.0001, "loss": 0.0203, "step": 16180 }, { "epoch": 106.51315789473684, "grad_norm": 1.2858346700668335, "learning_rate": 0.0001, "loss": 0.0217, "step": 16190 }, { "epoch": 106.57894736842105, "grad_norm": 1.3751716613769531, "learning_rate": 0.0001, "loss": 0.0206, "step": 16200 }, { "epoch": 106.64473684210526, "grad_norm": 1.340836763381958, "learning_rate": 0.0001, "loss": 0.0236, "step": 16210 }, { "epoch": 106.71052631578948, "grad_norm": 1.5455154180526733, "learning_rate": 0.0001, "loss": 0.0207, "step": 16220 }, { "epoch": 106.77631578947368, "grad_norm": 1.066322922706604, "learning_rate": 0.0001, "loss": 0.0219, "step": 16230 }, { "epoch": 106.84210526315789, "grad_norm": 1.5077837705612183, "learning_rate": 0.0001, "loss": 0.022, "step": 16240 }, { "epoch": 106.90789473684211, "grad_norm": 1.211094856262207, "learning_rate": 0.0001, "loss": 0.0209, "step": 16250 }, { "epoch": 106.97368421052632, "grad_norm": 2.008754014968872, "learning_rate": 0.0001, "loss": 0.0208, "step": 16260 }, { "epoch": 107.03947368421052, "grad_norm": 1.3147034645080566, "learning_rate": 0.0001, "loss": 0.02, "step": 16270 }, { "epoch": 107.10526315789474, "grad_norm": 1.1280360221862793, "learning_rate": 0.0001, "loss": 0.0212, "step": 16280 }, { "epoch": 107.17105263157895, "grad_norm": 1.3991897106170654, "learning_rate": 0.0001, "loss": 0.0217, "step": 16290 }, { "epoch": 107.23684210526316, "grad_norm": 1.2809622287750244, "learning_rate": 0.0001, "loss": 0.0206, "step": 16300 }, { "epoch": 107.30263157894737, "grad_norm": 1.4164490699768066, "learning_rate": 0.0001, "loss": 0.0211, "step": 16310 }, { "epoch": 107.36842105263158, "grad_norm": 1.374470829963684, "learning_rate": 0.0001, "loss": 0.0212, "step": 16320 }, { "epoch": 107.4342105263158, "grad_norm": 1.3295577764511108, "learning_rate": 0.0001, "loss": 0.0227, "step": 16330 }, { "epoch": 107.5, "grad_norm": 1.4546465873718262, "learning_rate": 0.0001, "loss": 0.0211, "step": 16340 }, { "epoch": 107.5657894736842, "grad_norm": 1.6725012063980103, "learning_rate": 0.0001, "loss": 0.0218, "step": 16350 }, { "epoch": 107.63157894736842, "grad_norm": 1.4987870454788208, "learning_rate": 0.0001, "loss": 0.0231, "step": 16360 }, { "epoch": 107.69736842105263, "grad_norm": 1.4768329858779907, "learning_rate": 0.0001, "loss": 0.0237, "step": 16370 }, { "epoch": 107.76315789473684, "grad_norm": 1.509992003440857, "learning_rate": 0.0001, "loss": 0.0194, "step": 16380 }, { "epoch": 107.82894736842105, "grad_norm": 1.5696868896484375, "learning_rate": 0.0001, "loss": 0.0218, "step": 16390 }, { "epoch": 107.89473684210526, "grad_norm": 1.9608714580535889, "learning_rate": 0.0001, "loss": 0.022, "step": 16400 }, { "epoch": 107.96052631578948, "grad_norm": 1.597530722618103, "learning_rate": 0.0001, "loss": 0.021, "step": 16410 }, { "epoch": 108.02631578947368, "grad_norm": 1.544816493988037, "learning_rate": 0.0001, "loss": 0.0237, "step": 16420 }, { "epoch": 108.09210526315789, "grad_norm": 1.8198386430740356, "learning_rate": 0.0001, "loss": 0.0233, "step": 16430 }, { "epoch": 108.15789473684211, "grad_norm": 1.1581134796142578, "learning_rate": 0.0001, "loss": 0.0214, "step": 16440 }, { "epoch": 108.22368421052632, "grad_norm": 1.273421049118042, "learning_rate": 0.0001, "loss": 0.022, "step": 16450 }, { "epoch": 108.28947368421052, "grad_norm": 1.438262939453125, "learning_rate": 0.0001, "loss": 0.0237, "step": 16460 }, { "epoch": 108.35526315789474, "grad_norm": 1.5786799192428589, "learning_rate": 0.0001, "loss": 0.0234, "step": 16470 }, { "epoch": 108.42105263157895, "grad_norm": 1.8563624620437622, "learning_rate": 0.0001, "loss": 0.0251, "step": 16480 }, { "epoch": 108.48684210526316, "grad_norm": 1.6424920558929443, "learning_rate": 0.0001, "loss": 0.0231, "step": 16490 }, { "epoch": 108.55263157894737, "grad_norm": 1.5354336500167847, "learning_rate": 0.0001, "loss": 0.0238, "step": 16500 }, { "epoch": 108.61842105263158, "grad_norm": 1.7186359167099, "learning_rate": 0.0001, "loss": 0.025, "step": 16510 }, { "epoch": 108.6842105263158, "grad_norm": 1.3204076290130615, "learning_rate": 0.0001, "loss": 0.0232, "step": 16520 }, { "epoch": 108.75, "grad_norm": 1.1582022905349731, "learning_rate": 0.0001, "loss": 0.0227, "step": 16530 }, { "epoch": 108.8157894736842, "grad_norm": 1.2648552656173706, "learning_rate": 0.0001, "loss": 0.0242, "step": 16540 }, { "epoch": 108.88157894736842, "grad_norm": 1.2796598672866821, "learning_rate": 0.0001, "loss": 0.0241, "step": 16550 }, { "epoch": 108.94736842105263, "grad_norm": 1.9214965105056763, "learning_rate": 0.0001, "loss": 0.0244, "step": 16560 }, { "epoch": 109.01315789473684, "grad_norm": 1.449998140335083, "learning_rate": 0.0001, "loss": 0.0218, "step": 16570 }, { "epoch": 109.07894736842105, "grad_norm": 1.1618794202804565, "learning_rate": 0.0001, "loss": 0.0249, "step": 16580 }, { "epoch": 109.14473684210526, "grad_norm": 1.4211252927780151, "learning_rate": 0.0001, "loss": 0.0224, "step": 16590 }, { "epoch": 109.21052631578948, "grad_norm": 1.3328765630722046, "learning_rate": 0.0001, "loss": 0.0218, "step": 16600 }, { "epoch": 109.27631578947368, "grad_norm": 1.715414047241211, "learning_rate": 0.0001, "loss": 0.0233, "step": 16610 }, { "epoch": 109.34210526315789, "grad_norm": 1.2124247550964355, "learning_rate": 0.0001, "loss": 0.0217, "step": 16620 }, { "epoch": 109.40789473684211, "grad_norm": 1.3251482248306274, "learning_rate": 0.0001, "loss": 0.0229, "step": 16630 }, { "epoch": 109.47368421052632, "grad_norm": 1.4357656240463257, "learning_rate": 0.0001, "loss": 0.0237, "step": 16640 }, { "epoch": 109.53947368421052, "grad_norm": 1.2938634157180786, "learning_rate": 0.0001, "loss": 0.0248, "step": 16650 }, { "epoch": 109.60526315789474, "grad_norm": 1.2677600383758545, "learning_rate": 0.0001, "loss": 0.0233, "step": 16660 }, { "epoch": 109.67105263157895, "grad_norm": 1.1516252756118774, "learning_rate": 0.0001, "loss": 0.0245, "step": 16670 }, { "epoch": 109.73684210526316, "grad_norm": 1.4747333526611328, "learning_rate": 0.0001, "loss": 0.0202, "step": 16680 }, { "epoch": 109.80263157894737, "grad_norm": 1.3867018222808838, "learning_rate": 0.0001, "loss": 0.0214, "step": 16690 }, { "epoch": 109.86842105263158, "grad_norm": 1.8688561916351318, "learning_rate": 0.0001, "loss": 0.0233, "step": 16700 }, { "epoch": 109.9342105263158, "grad_norm": 1.686368465423584, "learning_rate": 0.0001, "loss": 0.0246, "step": 16710 }, { "epoch": 110.0, "grad_norm": 1.311707615852356, "learning_rate": 0.0001, "loss": 0.021, "step": 16720 }, { "epoch": 110.0657894736842, "grad_norm": 1.1597585678100586, "learning_rate": 0.0001, "loss": 0.0218, "step": 16730 }, { "epoch": 110.13157894736842, "grad_norm": 1.41483736038208, "learning_rate": 0.0001, "loss": 0.0239, "step": 16740 }, { "epoch": 110.19736842105263, "grad_norm": 1.4122625589370728, "learning_rate": 0.0001, "loss": 0.0224, "step": 16750 }, { "epoch": 110.26315789473684, "grad_norm": 1.336828589439392, "learning_rate": 0.0001, "loss": 0.0248, "step": 16760 }, { "epoch": 110.32894736842105, "grad_norm": 1.5870128870010376, "learning_rate": 0.0001, "loss": 0.0215, "step": 16770 }, { "epoch": 110.39473684210526, "grad_norm": 1.2915987968444824, "learning_rate": 0.0001, "loss": 0.022, "step": 16780 }, { "epoch": 110.46052631578948, "grad_norm": 1.4546549320220947, "learning_rate": 0.0001, "loss": 0.0228, "step": 16790 }, { "epoch": 110.52631578947368, "grad_norm": 1.3749148845672607, "learning_rate": 0.0001, "loss": 0.0206, "step": 16800 }, { "epoch": 110.59210526315789, "grad_norm": 1.8686497211456299, "learning_rate": 0.0001, "loss": 0.0221, "step": 16810 }, { "epoch": 110.65789473684211, "grad_norm": 1.5140857696533203, "learning_rate": 0.0001, "loss": 0.0229, "step": 16820 }, { "epoch": 110.72368421052632, "grad_norm": 1.222466230392456, "learning_rate": 0.0001, "loss": 0.0221, "step": 16830 }, { "epoch": 110.78947368421052, "grad_norm": 1.4809081554412842, "learning_rate": 0.0001, "loss": 0.0203, "step": 16840 }, { "epoch": 110.85526315789474, "grad_norm": 1.4631025791168213, "learning_rate": 0.0001, "loss": 0.0219, "step": 16850 }, { "epoch": 110.92105263157895, "grad_norm": 1.5033537149429321, "learning_rate": 0.0001, "loss": 0.0193, "step": 16860 }, { "epoch": 110.98684210526316, "grad_norm": 1.5242358446121216, "learning_rate": 0.0001, "loss": 0.0219, "step": 16870 }, { "epoch": 111.05263157894737, "grad_norm": 1.4843236207962036, "learning_rate": 0.0001, "loss": 0.024, "step": 16880 }, { "epoch": 111.11842105263158, "grad_norm": 1.1458847522735596, "learning_rate": 0.0001, "loss": 0.0202, "step": 16890 }, { "epoch": 111.1842105263158, "grad_norm": 1.1731573343276978, "learning_rate": 0.0001, "loss": 0.0198, "step": 16900 }, { "epoch": 111.25, "grad_norm": 1.4374542236328125, "learning_rate": 0.0001, "loss": 0.0204, "step": 16910 }, { "epoch": 111.3157894736842, "grad_norm": 1.523205041885376, "learning_rate": 0.0001, "loss": 0.0217, "step": 16920 }, { "epoch": 111.38157894736842, "grad_norm": 1.5758098363876343, "learning_rate": 0.0001, "loss": 0.0198, "step": 16930 }, { "epoch": 111.44736842105263, "grad_norm": 1.42531156539917, "learning_rate": 0.0001, "loss": 0.0202, "step": 16940 }, { "epoch": 111.51315789473684, "grad_norm": 1.332920789718628, "learning_rate": 0.0001, "loss": 0.0221, "step": 16950 }, { "epoch": 111.57894736842105, "grad_norm": 1.5037161111831665, "learning_rate": 0.0001, "loss": 0.0223, "step": 16960 }, { "epoch": 111.64473684210526, "grad_norm": 1.2761225700378418, "learning_rate": 0.0001, "loss": 0.0203, "step": 16970 }, { "epoch": 111.71052631578948, "grad_norm": 1.315170168876648, "learning_rate": 0.0001, "loss": 0.0194, "step": 16980 }, { "epoch": 111.77631578947368, "grad_norm": 1.6541320085525513, "learning_rate": 0.0001, "loss": 0.0221, "step": 16990 }, { "epoch": 111.84210526315789, "grad_norm": 1.4604840278625488, "learning_rate": 0.0001, "loss": 0.0199, "step": 17000 }, { "epoch": 111.90789473684211, "grad_norm": 1.7079929113388062, "learning_rate": 0.0001, "loss": 0.0207, "step": 17010 }, { "epoch": 111.97368421052632, "grad_norm": 1.5881198644638062, "learning_rate": 0.0001, "loss": 0.021, "step": 17020 }, { "epoch": 112.03947368421052, "grad_norm": 1.1928815841674805, "learning_rate": 0.0001, "loss": 0.024, "step": 17030 }, { "epoch": 112.10526315789474, "grad_norm": 1.283767819404602, "learning_rate": 0.0001, "loss": 0.0212, "step": 17040 }, { "epoch": 112.17105263157895, "grad_norm": 1.0753800868988037, "learning_rate": 0.0001, "loss": 0.0209, "step": 17050 }, { "epoch": 112.23684210526316, "grad_norm": 1.3679052591323853, "learning_rate": 0.0001, "loss": 0.0214, "step": 17060 }, { "epoch": 112.30263157894737, "grad_norm": 1.5422084331512451, "learning_rate": 0.0001, "loss": 0.0209, "step": 17070 }, { "epoch": 112.36842105263158, "grad_norm": 1.0112746953964233, "learning_rate": 0.0001, "loss": 0.0206, "step": 17080 }, { "epoch": 112.4342105263158, "grad_norm": 1.7527899742126465, "learning_rate": 0.0001, "loss": 0.0216, "step": 17090 }, { "epoch": 112.5, "grad_norm": 2.344182252883911, "learning_rate": 0.0001, "loss": 0.0312, "step": 17100 }, { "epoch": 112.5657894736842, "grad_norm": 2.5137453079223633, "learning_rate": 0.0001, "loss": 0.0225, "step": 17110 }, { "epoch": 112.63157894736842, "grad_norm": 2.357712507247925, "learning_rate": 0.0001, "loss": 0.0208, "step": 17120 }, { "epoch": 112.69736842105263, "grad_norm": 2.287658929824829, "learning_rate": 0.0001, "loss": 0.0207, "step": 17130 }, { "epoch": 112.76315789473684, "grad_norm": 1.9697659015655518, "learning_rate": 0.0001, "loss": 0.0203, "step": 17140 }, { "epoch": 112.82894736842105, "grad_norm": 1.8423213958740234, "learning_rate": 0.0001, "loss": 0.0204, "step": 17150 }, { "epoch": 112.89473684210526, "grad_norm": 1.7179113626480103, "learning_rate": 0.0001, "loss": 0.0204, "step": 17160 }, { "epoch": 112.96052631578948, "grad_norm": 1.2885282039642334, "learning_rate": 0.0001, "loss": 0.0187, "step": 17170 }, { "epoch": 113.02631578947368, "grad_norm": 2.470750331878662, "learning_rate": 0.0001, "loss": 0.0205, "step": 17180 }, { "epoch": 113.09210526315789, "grad_norm": 1.9972500801086426, "learning_rate": 0.0001, "loss": 0.0206, "step": 17190 }, { "epoch": 113.15789473684211, "grad_norm": 1.7662476301193237, "learning_rate": 0.0001, "loss": 0.0193, "step": 17200 }, { "epoch": 113.22368421052632, "grad_norm": 2.526029586791992, "learning_rate": 0.0001, "loss": 0.0221, "step": 17210 }, { "epoch": 113.28947368421052, "grad_norm": 1.8449680805206299, "learning_rate": 0.0001, "loss": 0.0195, "step": 17220 }, { "epoch": 113.35526315789474, "grad_norm": 1.9079487323760986, "learning_rate": 0.0001, "loss": 0.0186, "step": 17230 }, { "epoch": 113.42105263157895, "grad_norm": 1.5968166589736938, "learning_rate": 0.0001, "loss": 0.0209, "step": 17240 }, { "epoch": 113.48684210526316, "grad_norm": 1.7774184942245483, "learning_rate": 0.0001, "loss": 0.0209, "step": 17250 }, { "epoch": 113.55263157894737, "grad_norm": 1.8195269107818604, "learning_rate": 0.0001, "loss": 0.0192, "step": 17260 }, { "epoch": 113.61842105263158, "grad_norm": 2.0075738430023193, "learning_rate": 0.0001, "loss": 0.0199, "step": 17270 }, { "epoch": 113.6842105263158, "grad_norm": 1.6541519165039062, "learning_rate": 0.0001, "loss": 0.0176, "step": 17280 }, { "epoch": 113.75, "grad_norm": 1.6574792861938477, "learning_rate": 0.0001, "loss": 0.0169, "step": 17290 }, { "epoch": 113.8157894736842, "grad_norm": 1.459970235824585, "learning_rate": 0.0001, "loss": 0.0197, "step": 17300 }, { "epoch": 113.88157894736842, "grad_norm": 1.072644829750061, "learning_rate": 0.0001, "loss": 0.0186, "step": 17310 }, { "epoch": 113.94736842105263, "grad_norm": 1.4994428157806396, "learning_rate": 0.0001, "loss": 0.0193, "step": 17320 }, { "epoch": 114.01315789473684, "grad_norm": 1.1813583374023438, "learning_rate": 0.0001, "loss": 0.0204, "step": 17330 }, { "epoch": 114.07894736842105, "grad_norm": 1.115692377090454, "learning_rate": 0.0001, "loss": 0.0197, "step": 17340 }, { "epoch": 114.14473684210526, "grad_norm": 1.6557120084762573, "learning_rate": 0.0001, "loss": 0.0201, "step": 17350 }, { "epoch": 114.21052631578948, "grad_norm": 1.287843942642212, "learning_rate": 0.0001, "loss": 0.0198, "step": 17360 }, { "epoch": 114.27631578947368, "grad_norm": 1.5140693187713623, "learning_rate": 0.0001, "loss": 0.0193, "step": 17370 }, { "epoch": 114.34210526315789, "grad_norm": 1.3632303476333618, "learning_rate": 0.0001, "loss": 0.0223, "step": 17380 }, { "epoch": 114.40789473684211, "grad_norm": 1.5650956630706787, "learning_rate": 0.0001, "loss": 0.0215, "step": 17390 }, { "epoch": 114.47368421052632, "grad_norm": 1.413570761680603, "learning_rate": 0.0001, "loss": 0.0201, "step": 17400 }, { "epoch": 114.53947368421052, "grad_norm": 1.2383382320404053, "learning_rate": 0.0001, "loss": 0.019, "step": 17410 }, { "epoch": 114.60526315789474, "grad_norm": 1.6793911457061768, "learning_rate": 0.0001, "loss": 0.0207, "step": 17420 }, { "epoch": 114.67105263157895, "grad_norm": 1.5027910470962524, "learning_rate": 0.0001, "loss": 0.0223, "step": 17430 }, { "epoch": 114.73684210526316, "grad_norm": 1.4984204769134521, "learning_rate": 0.0001, "loss": 0.0223, "step": 17440 }, { "epoch": 114.80263157894737, "grad_norm": 1.712581992149353, "learning_rate": 0.0001, "loss": 0.0207, "step": 17450 }, { "epoch": 114.86842105263158, "grad_norm": 1.507750153541565, "learning_rate": 0.0001, "loss": 0.0202, "step": 17460 }, { "epoch": 114.9342105263158, "grad_norm": 1.515345573425293, "learning_rate": 0.0001, "loss": 0.0194, "step": 17470 }, { "epoch": 115.0, "grad_norm": 1.804507851600647, "learning_rate": 0.0001, "loss": 0.0204, "step": 17480 }, { "epoch": 115.0657894736842, "grad_norm": 1.6192446947097778, "learning_rate": 0.0001, "loss": 0.0206, "step": 17490 }, { "epoch": 115.13157894736842, "grad_norm": 1.3027291297912598, "learning_rate": 0.0001, "loss": 0.0228, "step": 17500 }, { "epoch": 115.19736842105263, "grad_norm": 1.5303033590316772, "learning_rate": 0.0001, "loss": 0.0211, "step": 17510 }, { "epoch": 115.26315789473684, "grad_norm": 1.245597004890442, "learning_rate": 0.0001, "loss": 0.0235, "step": 17520 }, { "epoch": 115.32894736842105, "grad_norm": 1.048466444015503, "learning_rate": 0.0001, "loss": 0.0212, "step": 17530 }, { "epoch": 115.39473684210526, "grad_norm": 1.5736318826675415, "learning_rate": 0.0001, "loss": 0.0209, "step": 17540 }, { "epoch": 115.46052631578948, "grad_norm": 1.8937121629714966, "learning_rate": 0.0001, "loss": 0.0218, "step": 17550 }, { "epoch": 115.52631578947368, "grad_norm": 1.350262999534607, "learning_rate": 0.0001, "loss": 0.0217, "step": 17560 }, { "epoch": 115.59210526315789, "grad_norm": 1.4281275272369385, "learning_rate": 0.0001, "loss": 0.0206, "step": 17570 }, { "epoch": 115.65789473684211, "grad_norm": 1.6036561727523804, "learning_rate": 0.0001, "loss": 0.0209, "step": 17580 }, { "epoch": 115.72368421052632, "grad_norm": 1.1876840591430664, "learning_rate": 0.0001, "loss": 0.024, "step": 17590 }, { "epoch": 115.78947368421052, "grad_norm": 1.1769388914108276, "learning_rate": 0.0001, "loss": 0.0222, "step": 17600 }, { "epoch": 115.85526315789474, "grad_norm": 1.621465802192688, "learning_rate": 0.0001, "loss": 0.0219, "step": 17610 }, { "epoch": 115.92105263157895, "grad_norm": 3.2968623638153076, "learning_rate": 0.0001, "loss": 0.0258, "step": 17620 }, { "epoch": 115.98684210526316, "grad_norm": 1.8271244764328003, "learning_rate": 0.0001, "loss": 0.0246, "step": 17630 }, { "epoch": 116.05263157894737, "grad_norm": 1.8423553705215454, "learning_rate": 0.0001, "loss": 0.0229, "step": 17640 }, { "epoch": 116.11842105263158, "grad_norm": 1.65046226978302, "learning_rate": 0.0001, "loss": 0.0215, "step": 17650 }, { "epoch": 116.1842105263158, "grad_norm": 1.7557287216186523, "learning_rate": 0.0001, "loss": 0.0202, "step": 17660 }, { "epoch": 116.25, "grad_norm": 2.067875385284424, "learning_rate": 0.0001, "loss": 0.0204, "step": 17670 }, { "epoch": 116.3157894736842, "grad_norm": 1.5921411514282227, "learning_rate": 0.0001, "loss": 0.0246, "step": 17680 }, { "epoch": 116.38157894736842, "grad_norm": 1.153791069984436, "learning_rate": 0.0001, "loss": 0.0209, "step": 17690 }, { "epoch": 116.44736842105263, "grad_norm": 1.2742387056350708, "learning_rate": 0.0001, "loss": 0.02, "step": 17700 }, { "epoch": 116.51315789473684, "grad_norm": 1.384451150894165, "learning_rate": 0.0001, "loss": 0.0204, "step": 17710 }, { "epoch": 116.57894736842105, "grad_norm": 1.3240984678268433, "learning_rate": 0.0001, "loss": 0.0215, "step": 17720 }, { "epoch": 116.64473684210526, "grad_norm": 1.3543736934661865, "learning_rate": 0.0001, "loss": 0.0212, "step": 17730 }, { "epoch": 116.71052631578948, "grad_norm": 1.4543614387512207, "learning_rate": 0.0001, "loss": 0.0232, "step": 17740 }, { "epoch": 116.77631578947368, "grad_norm": 1.2755897045135498, "learning_rate": 0.0001, "loss": 0.0211, "step": 17750 }, { "epoch": 116.84210526315789, "grad_norm": 1.4515208005905151, "learning_rate": 0.0001, "loss": 0.02, "step": 17760 }, { "epoch": 116.90789473684211, "grad_norm": 1.4449197053909302, "learning_rate": 0.0001, "loss": 0.0234, "step": 17770 }, { "epoch": 116.97368421052632, "grad_norm": 1.1386222839355469, "learning_rate": 0.0001, "loss": 0.0217, "step": 17780 }, { "epoch": 117.03947368421052, "grad_norm": 1.416370153427124, "learning_rate": 0.0001, "loss": 0.0238, "step": 17790 }, { "epoch": 117.10526315789474, "grad_norm": 1.1340827941894531, "learning_rate": 0.0001, "loss": 0.0227, "step": 17800 }, { "epoch": 117.17105263157895, "grad_norm": 1.6249243021011353, "learning_rate": 0.0001, "loss": 0.0206, "step": 17810 }, { "epoch": 117.23684210526316, "grad_norm": 1.2724114656448364, "learning_rate": 0.0001, "loss": 0.0212, "step": 17820 }, { "epoch": 117.30263157894737, "grad_norm": 1.7152994871139526, "learning_rate": 0.0001, "loss": 0.0226, "step": 17830 }, { "epoch": 117.36842105263158, "grad_norm": 1.4186722040176392, "learning_rate": 0.0001, "loss": 0.0201, "step": 17840 }, { "epoch": 117.4342105263158, "grad_norm": 1.5989038944244385, "learning_rate": 0.0001, "loss": 0.019, "step": 17850 }, { "epoch": 117.5, "grad_norm": 1.5071998834609985, "learning_rate": 0.0001, "loss": 0.0215, "step": 17860 }, { "epoch": 117.5657894736842, "grad_norm": 1.528257131576538, "learning_rate": 0.0001, "loss": 0.0212, "step": 17870 }, { "epoch": 117.63157894736842, "grad_norm": 1.3654208183288574, "learning_rate": 0.0001, "loss": 0.0192, "step": 17880 }, { "epoch": 117.69736842105263, "grad_norm": 2.163045883178711, "learning_rate": 0.0001, "loss": 0.0228, "step": 17890 }, { "epoch": 117.76315789473684, "grad_norm": 1.3992512226104736, "learning_rate": 0.0001, "loss": 0.0196, "step": 17900 }, { "epoch": 117.82894736842105, "grad_norm": 1.3927537202835083, "learning_rate": 0.0001, "loss": 0.0204, "step": 17910 }, { "epoch": 117.89473684210526, "grad_norm": 1.443617343902588, "learning_rate": 0.0001, "loss": 0.0218, "step": 17920 }, { "epoch": 117.96052631578948, "grad_norm": 1.7115437984466553, "learning_rate": 0.0001, "loss": 0.0217, "step": 17930 }, { "epoch": 118.02631578947368, "grad_norm": 1.743646502494812, "learning_rate": 0.0001, "loss": 0.0212, "step": 17940 }, { "epoch": 118.09210526315789, "grad_norm": 1.8455554246902466, "learning_rate": 0.0001, "loss": 0.0202, "step": 17950 }, { "epoch": 118.15789473684211, "grad_norm": 1.5267865657806396, "learning_rate": 0.0001, "loss": 0.0209, "step": 17960 }, { "epoch": 118.22368421052632, "grad_norm": 1.3369451761245728, "learning_rate": 0.0001, "loss": 0.0205, "step": 17970 }, { "epoch": 118.28947368421052, "grad_norm": 1.7073959112167358, "learning_rate": 0.0001, "loss": 0.0221, "step": 17980 }, { "epoch": 118.35526315789474, "grad_norm": 1.3288969993591309, "learning_rate": 0.0001, "loss": 0.0216, "step": 17990 }, { "epoch": 118.42105263157895, "grad_norm": 1.353158950805664, "learning_rate": 0.0001, "loss": 0.0216, "step": 18000 }, { "epoch": 118.48684210526316, "grad_norm": 1.260098934173584, "learning_rate": 0.0001, "loss": 0.0214, "step": 18010 }, { "epoch": 118.55263157894737, "grad_norm": 1.0010889768600464, "learning_rate": 0.0001, "loss": 0.0216, "step": 18020 }, { "epoch": 118.61842105263158, "grad_norm": 1.4269499778747559, "learning_rate": 0.0001, "loss": 0.0201, "step": 18030 }, { "epoch": 118.6842105263158, "grad_norm": 1.6440515518188477, "learning_rate": 0.0001, "loss": 0.0205, "step": 18040 }, { "epoch": 118.75, "grad_norm": 1.2828301191329956, "learning_rate": 0.0001, "loss": 0.0225, "step": 18050 }, { "epoch": 118.8157894736842, "grad_norm": 1.632926106452942, "learning_rate": 0.0001, "loss": 0.0212, "step": 18060 }, { "epoch": 118.88157894736842, "grad_norm": 1.6261667013168335, "learning_rate": 0.0001, "loss": 0.0198, "step": 18070 }, { "epoch": 118.94736842105263, "grad_norm": 1.401406168937683, "learning_rate": 0.0001, "loss": 0.0209, "step": 18080 }, { "epoch": 119.01315789473684, "grad_norm": 1.7001771926879883, "learning_rate": 0.0001, "loss": 0.0205, "step": 18090 }, { "epoch": 119.07894736842105, "grad_norm": 1.5548099279403687, "learning_rate": 0.0001, "loss": 0.0203, "step": 18100 }, { "epoch": 119.14473684210526, "grad_norm": 1.1037205457687378, "learning_rate": 0.0001, "loss": 0.0218, "step": 18110 }, { "epoch": 119.21052631578948, "grad_norm": 1.5918753147125244, "learning_rate": 0.0001, "loss": 0.0221, "step": 18120 }, { "epoch": 119.27631578947368, "grad_norm": 1.6971482038497925, "learning_rate": 0.0001, "loss": 0.0235, "step": 18130 }, { "epoch": 119.34210526315789, "grad_norm": 2.4634158611297607, "learning_rate": 0.0001, "loss": 0.0238, "step": 18140 }, { "epoch": 119.40789473684211, "grad_norm": 1.9838364124298096, "learning_rate": 0.0001, "loss": 0.0207, "step": 18150 }, { "epoch": 119.47368421052632, "grad_norm": 1.578308343887329, "learning_rate": 0.0001, "loss": 0.0207, "step": 18160 }, { "epoch": 119.53947368421052, "grad_norm": 1.6289037466049194, "learning_rate": 0.0001, "loss": 0.0209, "step": 18170 }, { "epoch": 119.60526315789474, "grad_norm": 1.2893215417861938, "learning_rate": 0.0001, "loss": 0.0189, "step": 18180 }, { "epoch": 119.67105263157895, "grad_norm": 1.372152328491211, "learning_rate": 0.0001, "loss": 0.0213, "step": 18190 }, { "epoch": 119.73684210526316, "grad_norm": 1.3960975408554077, "learning_rate": 0.0001, "loss": 0.0202, "step": 18200 }, { "epoch": 119.80263157894737, "grad_norm": 2.3912997245788574, "learning_rate": 0.0001, "loss": 0.0238, "step": 18210 }, { "epoch": 119.86842105263158, "grad_norm": 2.194430112838745, "learning_rate": 0.0001, "loss": 0.0216, "step": 18220 }, { "epoch": 119.9342105263158, "grad_norm": 1.995544195175171, "learning_rate": 0.0001, "loss": 0.0204, "step": 18230 }, { "epoch": 120.0, "grad_norm": 2.018141031265259, "learning_rate": 0.0001, "loss": 0.0209, "step": 18240 }, { "epoch": 120.0657894736842, "grad_norm": 2.009296178817749, "learning_rate": 0.0001, "loss": 0.0206, "step": 18250 }, { "epoch": 120.13157894736842, "grad_norm": 1.7988462448120117, "learning_rate": 0.0001, "loss": 0.0211, "step": 18260 }, { "epoch": 120.19736842105263, "grad_norm": 1.3400589227676392, "learning_rate": 0.0001, "loss": 0.0183, "step": 18270 }, { "epoch": 120.26315789473684, "grad_norm": 1.8497594594955444, "learning_rate": 0.0001, "loss": 0.0176, "step": 18280 }, { "epoch": 120.32894736842105, "grad_norm": 1.826569676399231, "learning_rate": 0.0001, "loss": 0.0188, "step": 18290 }, { "epoch": 120.39473684210526, "grad_norm": 1.4031181335449219, "learning_rate": 0.0001, "loss": 0.0184, "step": 18300 }, { "epoch": 120.46052631578948, "grad_norm": 1.3137474060058594, "learning_rate": 0.0001, "loss": 0.0207, "step": 18310 }, { "epoch": 120.52631578947368, "grad_norm": 1.4643906354904175, "learning_rate": 0.0001, "loss": 0.0194, "step": 18320 }, { "epoch": 120.59210526315789, "grad_norm": 1.4734902381896973, "learning_rate": 0.0001, "loss": 0.0209, "step": 18330 }, { "epoch": 120.65789473684211, "grad_norm": 1.2963664531707764, "learning_rate": 0.0001, "loss": 0.0196, "step": 18340 }, { "epoch": 120.72368421052632, "grad_norm": 1.8689401149749756, "learning_rate": 0.0001, "loss": 0.0193, "step": 18350 }, { "epoch": 120.78947368421052, "grad_norm": 1.442025899887085, "learning_rate": 0.0001, "loss": 0.0197, "step": 18360 }, { "epoch": 120.85526315789474, "grad_norm": 1.0878459215164185, "learning_rate": 0.0001, "loss": 0.0204, "step": 18370 }, { "epoch": 120.92105263157895, "grad_norm": 1.1373950242996216, "learning_rate": 0.0001, "loss": 0.0205, "step": 18380 }, { "epoch": 120.98684210526316, "grad_norm": 1.359013319015503, "learning_rate": 0.0001, "loss": 0.0234, "step": 18390 }, { "epoch": 121.05263157894737, "grad_norm": 1.154805302619934, "learning_rate": 0.0001, "loss": 0.0204, "step": 18400 }, { "epoch": 121.11842105263158, "grad_norm": 1.2236026525497437, "learning_rate": 0.0001, "loss": 0.0222, "step": 18410 }, { "epoch": 121.1842105263158, "grad_norm": 1.6635630130767822, "learning_rate": 0.0001, "loss": 0.0207, "step": 18420 }, { "epoch": 121.25, "grad_norm": 1.2726155519485474, "learning_rate": 0.0001, "loss": 0.0211, "step": 18430 }, { "epoch": 121.3157894736842, "grad_norm": 1.6328818798065186, "learning_rate": 0.0001, "loss": 0.0198, "step": 18440 }, { "epoch": 121.38157894736842, "grad_norm": 1.1731946468353271, "learning_rate": 0.0001, "loss": 0.0227, "step": 18450 }, { "epoch": 121.44736842105263, "grad_norm": 1.302308440208435, "learning_rate": 0.0001, "loss": 0.0232, "step": 18460 }, { "epoch": 121.51315789473684, "grad_norm": 1.287872076034546, "learning_rate": 0.0001, "loss": 0.0182, "step": 18470 }, { "epoch": 121.57894736842105, "grad_norm": 1.315284013748169, "learning_rate": 0.0001, "loss": 0.0218, "step": 18480 }, { "epoch": 121.64473684210526, "grad_norm": 1.6699914932250977, "learning_rate": 0.0001, "loss": 0.0202, "step": 18490 }, { "epoch": 121.71052631578948, "grad_norm": 1.2884074449539185, "learning_rate": 0.0001, "loss": 0.0204, "step": 18500 }, { "epoch": 121.77631578947368, "grad_norm": 1.6029454469680786, "learning_rate": 0.0001, "loss": 0.0206, "step": 18510 }, { "epoch": 121.84210526315789, "grad_norm": 1.4443784952163696, "learning_rate": 0.0001, "loss": 0.0188, "step": 18520 }, { "epoch": 121.90789473684211, "grad_norm": 1.3697898387908936, "learning_rate": 0.0001, "loss": 0.02, "step": 18530 }, { "epoch": 121.97368421052632, "grad_norm": 1.4503991603851318, "learning_rate": 0.0001, "loss": 0.0195, "step": 18540 }, { "epoch": 122.03947368421052, "grad_norm": 2.0889556407928467, "learning_rate": 0.0001, "loss": 0.0341, "step": 18550 }, { "epoch": 122.10526315789474, "grad_norm": 2.1557774543762207, "learning_rate": 0.0001, "loss": 0.0215, "step": 18560 }, { "epoch": 122.17105263157895, "grad_norm": 1.9375776052474976, "learning_rate": 0.0001, "loss": 0.0194, "step": 18570 }, { "epoch": 122.23684210526316, "grad_norm": 1.9048449993133545, "learning_rate": 0.0001, "loss": 0.02, "step": 18580 }, { "epoch": 122.30263157894737, "grad_norm": 1.9082547426223755, "learning_rate": 0.0001, "loss": 0.0205, "step": 18590 }, { "epoch": 122.36842105263158, "grad_norm": 1.3321175575256348, "learning_rate": 0.0001, "loss": 0.0213, "step": 18600 }, { "epoch": 122.4342105263158, "grad_norm": 1.7989875078201294, "learning_rate": 0.0001, "loss": 0.0201, "step": 18610 }, { "epoch": 122.5, "grad_norm": 1.6709932088851929, "learning_rate": 0.0001, "loss": 0.021, "step": 18620 }, { "epoch": 122.5657894736842, "grad_norm": 2.0253422260284424, "learning_rate": 0.0001, "loss": 0.022, "step": 18630 }, { "epoch": 122.63157894736842, "grad_norm": 1.6843767166137695, "learning_rate": 0.0001, "loss": 0.0193, "step": 18640 }, { "epoch": 122.69736842105263, "grad_norm": 1.6629420518875122, "learning_rate": 0.0001, "loss": 0.0193, "step": 18650 }, { "epoch": 122.76315789473684, "grad_norm": 1.5332101583480835, "learning_rate": 0.0001, "loss": 0.0206, "step": 18660 }, { "epoch": 122.82894736842105, "grad_norm": 1.3796052932739258, "learning_rate": 0.0001, "loss": 0.0222, "step": 18670 }, { "epoch": 122.89473684210526, "grad_norm": 1.4094196557998657, "learning_rate": 0.0001, "loss": 0.0216, "step": 18680 }, { "epoch": 122.96052631578948, "grad_norm": 1.7053511142730713, "learning_rate": 0.0001, "loss": 0.0253, "step": 18690 }, { "epoch": 123.02631578947368, "grad_norm": 1.2467565536499023, "learning_rate": 0.0001, "loss": 0.0232, "step": 18700 }, { "epoch": 123.09210526315789, "grad_norm": 1.2807624340057373, "learning_rate": 0.0001, "loss": 0.0229, "step": 18710 }, { "epoch": 123.15789473684211, "grad_norm": 2.067713499069214, "learning_rate": 0.0001, "loss": 0.023, "step": 18720 }, { "epoch": 123.22368421052632, "grad_norm": 1.4443409442901611, "learning_rate": 0.0001, "loss": 0.0235, "step": 18730 }, { "epoch": 123.28947368421052, "grad_norm": 1.0428988933563232, "learning_rate": 0.0001, "loss": 0.0211, "step": 18740 }, { "epoch": 123.35526315789474, "grad_norm": 1.4765625, "learning_rate": 0.0001, "loss": 0.0206, "step": 18750 }, { "epoch": 123.42105263157895, "grad_norm": 1.2120792865753174, "learning_rate": 0.0001, "loss": 0.0213, "step": 18760 }, { "epoch": 123.48684210526316, "grad_norm": 1.4466508626937866, "learning_rate": 0.0001, "loss": 0.0194, "step": 18770 }, { "epoch": 123.55263157894737, "grad_norm": 1.5829192399978638, "learning_rate": 0.0001, "loss": 0.0206, "step": 18780 }, { "epoch": 123.61842105263158, "grad_norm": 1.7138676643371582, "learning_rate": 0.0001, "loss": 0.0191, "step": 18790 }, { "epoch": 123.6842105263158, "grad_norm": 1.3385616540908813, "learning_rate": 0.0001, "loss": 0.0208, "step": 18800 }, { "epoch": 123.75, "grad_norm": 1.5545923709869385, "learning_rate": 0.0001, "loss": 0.0224, "step": 18810 }, { "epoch": 123.8157894736842, "grad_norm": 1.66548752784729, "learning_rate": 0.0001, "loss": 0.0234, "step": 18820 }, { "epoch": 123.88157894736842, "grad_norm": 1.5020532608032227, "learning_rate": 0.0001, "loss": 0.0222, "step": 18830 }, { "epoch": 123.94736842105263, "grad_norm": 1.4988902807235718, "learning_rate": 0.0001, "loss": 0.0198, "step": 18840 }, { "epoch": 124.01315789473684, "grad_norm": 1.415207862854004, "learning_rate": 0.0001, "loss": 0.0202, "step": 18850 }, { "epoch": 124.07894736842105, "grad_norm": 1.9516119956970215, "learning_rate": 0.0001, "loss": 0.0218, "step": 18860 }, { "epoch": 124.14473684210526, "grad_norm": 1.5451509952545166, "learning_rate": 0.0001, "loss": 0.0199, "step": 18870 }, { "epoch": 124.21052631578948, "grad_norm": 2.0787911415100098, "learning_rate": 0.0001, "loss": 0.0222, "step": 18880 }, { "epoch": 124.27631578947368, "grad_norm": 1.574526071548462, "learning_rate": 0.0001, "loss": 0.0214, "step": 18890 }, { "epoch": 124.34210526315789, "grad_norm": 1.3802706003189087, "learning_rate": 0.0001, "loss": 0.0207, "step": 18900 }, { "epoch": 124.40789473684211, "grad_norm": 1.6244707107543945, "learning_rate": 0.0001, "loss": 0.0215, "step": 18910 }, { "epoch": 124.47368421052632, "grad_norm": 2.145286798477173, "learning_rate": 0.0001, "loss": 0.0203, "step": 18920 }, { "epoch": 124.53947368421052, "grad_norm": 1.4865652322769165, "learning_rate": 0.0001, "loss": 0.0208, "step": 18930 }, { "epoch": 124.60526315789474, "grad_norm": 1.6736758947372437, "learning_rate": 0.0001, "loss": 0.0186, "step": 18940 }, { "epoch": 124.67105263157895, "grad_norm": 1.779492735862732, "learning_rate": 0.0001, "loss": 0.0186, "step": 18950 }, { "epoch": 124.73684210526316, "grad_norm": 1.4367070198059082, "learning_rate": 0.0001, "loss": 0.0199, "step": 18960 }, { "epoch": 124.80263157894737, "grad_norm": 2.134930372238159, "learning_rate": 0.0001, "loss": 0.019, "step": 18970 }, { "epoch": 124.86842105263158, "grad_norm": 1.5714412927627563, "learning_rate": 0.0001, "loss": 0.0199, "step": 18980 }, { "epoch": 124.9342105263158, "grad_norm": 1.3601255416870117, "learning_rate": 0.0001, "loss": 0.0213, "step": 18990 }, { "epoch": 125.0, "grad_norm": 1.4811025857925415, "learning_rate": 0.0001, "loss": 0.0199, "step": 19000 }, { "epoch": 125.0657894736842, "grad_norm": 1.156876564025879, "learning_rate": 0.0001, "loss": 0.0208, "step": 19010 }, { "epoch": 125.13157894736842, "grad_norm": 1.284725546836853, "learning_rate": 0.0001, "loss": 0.0195, "step": 19020 }, { "epoch": 125.19736842105263, "grad_norm": 1.6561628580093384, "learning_rate": 0.0001, "loss": 0.0222, "step": 19030 }, { "epoch": 125.26315789473684, "grad_norm": 1.505050778388977, "learning_rate": 0.0001, "loss": 0.0202, "step": 19040 }, { "epoch": 125.32894736842105, "grad_norm": 1.6309056282043457, "learning_rate": 0.0001, "loss": 0.0244, "step": 19050 }, { "epoch": 125.39473684210526, "grad_norm": 1.736699104309082, "learning_rate": 0.0001, "loss": 0.0205, "step": 19060 }, { "epoch": 125.46052631578948, "grad_norm": 1.9428766965866089, "learning_rate": 0.0001, "loss": 0.0195, "step": 19070 }, { "epoch": 125.52631578947368, "grad_norm": 1.7686620950698853, "learning_rate": 0.0001, "loss": 0.0211, "step": 19080 }, { "epoch": 125.59210526315789, "grad_norm": 1.7622973918914795, "learning_rate": 0.0001, "loss": 0.0198, "step": 19090 }, { "epoch": 125.65789473684211, "grad_norm": 1.5690042972564697, "learning_rate": 0.0001, "loss": 0.0191, "step": 19100 }, { "epoch": 125.72368421052632, "grad_norm": 1.8227565288543701, "learning_rate": 0.0001, "loss": 0.0212, "step": 19110 }, { "epoch": 125.78947368421052, "grad_norm": 1.6870558261871338, "learning_rate": 0.0001, "loss": 0.0199, "step": 19120 }, { "epoch": 125.85526315789474, "grad_norm": 1.479076862335205, "learning_rate": 0.0001, "loss": 0.0192, "step": 19130 }, { "epoch": 125.92105263157895, "grad_norm": 1.2277092933654785, "learning_rate": 0.0001, "loss": 0.0191, "step": 19140 }, { "epoch": 125.98684210526316, "grad_norm": 1.489528775215149, "learning_rate": 0.0001, "loss": 0.0211, "step": 19150 }, { "epoch": 126.05263157894737, "grad_norm": 1.0452980995178223, "learning_rate": 0.0001, "loss": 0.022, "step": 19160 }, { "epoch": 126.11842105263158, "grad_norm": 1.3360462188720703, "learning_rate": 0.0001, "loss": 0.0202, "step": 19170 }, { "epoch": 126.1842105263158, "grad_norm": 1.3254673480987549, "learning_rate": 0.0001, "loss": 0.0197, "step": 19180 }, { "epoch": 126.25, "grad_norm": 1.2667726278305054, "learning_rate": 0.0001, "loss": 0.0204, "step": 19190 }, { "epoch": 126.3157894736842, "grad_norm": 1.4161853790283203, "learning_rate": 0.0001, "loss": 0.0199, "step": 19200 }, { "epoch": 126.38157894736842, "grad_norm": 1.2914543151855469, "learning_rate": 0.0001, "loss": 0.0212, "step": 19210 }, { "epoch": 126.44736842105263, "grad_norm": 1.231687307357788, "learning_rate": 0.0001, "loss": 0.0195, "step": 19220 }, { "epoch": 126.51315789473684, "grad_norm": 1.5462013483047485, "learning_rate": 0.0001, "loss": 0.0211, "step": 19230 }, { "epoch": 126.57894736842105, "grad_norm": 1.4174609184265137, "learning_rate": 0.0001, "loss": 0.0211, "step": 19240 }, { "epoch": 126.64473684210526, "grad_norm": 1.4570950269699097, "learning_rate": 0.0001, "loss": 0.0204, "step": 19250 }, { "epoch": 126.71052631578948, "grad_norm": 1.3737624883651733, "learning_rate": 0.0001, "loss": 0.0201, "step": 19260 }, { "epoch": 126.77631578947368, "grad_norm": 1.2217376232147217, "learning_rate": 0.0001, "loss": 0.0218, "step": 19270 }, { "epoch": 126.84210526315789, "grad_norm": 1.2423151731491089, "learning_rate": 0.0001, "loss": 0.0219, "step": 19280 }, { "epoch": 126.90789473684211, "grad_norm": 0.9811283349990845, "learning_rate": 0.0001, "loss": 0.0209, "step": 19290 }, { "epoch": 126.97368421052632, "grad_norm": 1.0705347061157227, "learning_rate": 0.0001, "loss": 0.0221, "step": 19300 }, { "epoch": 127.03947368421052, "grad_norm": 1.479683518409729, "learning_rate": 0.0001, "loss": 0.0213, "step": 19310 }, { "epoch": 127.10526315789474, "grad_norm": 1.0638930797576904, "learning_rate": 0.0001, "loss": 0.0225, "step": 19320 }, { "epoch": 127.17105263157895, "grad_norm": 1.0353878736495972, "learning_rate": 0.0001, "loss": 0.0235, "step": 19330 }, { "epoch": 127.23684210526316, "grad_norm": 1.5880337953567505, "learning_rate": 0.0001, "loss": 0.0205, "step": 19340 }, { "epoch": 127.30263157894737, "grad_norm": 1.4664833545684814, "learning_rate": 0.0001, "loss": 0.0215, "step": 19350 }, { "epoch": 127.36842105263158, "grad_norm": 1.302016019821167, "learning_rate": 0.0001, "loss": 0.023, "step": 19360 }, { "epoch": 127.4342105263158, "grad_norm": 1.1787173748016357, "learning_rate": 0.0001, "loss": 0.0225, "step": 19370 }, { "epoch": 127.5, "grad_norm": 1.5672588348388672, "learning_rate": 0.0001, "loss": 0.0219, "step": 19380 }, { "epoch": 127.5657894736842, "grad_norm": 1.434388518333435, "learning_rate": 0.0001, "loss": 0.0213, "step": 19390 }, { "epoch": 127.63157894736842, "grad_norm": 1.571679949760437, "learning_rate": 0.0001, "loss": 0.0212, "step": 19400 }, { "epoch": 127.69736842105263, "grad_norm": 1.2628552913665771, "learning_rate": 0.0001, "loss": 0.0204, "step": 19410 }, { "epoch": 127.76315789473684, "grad_norm": 1.4900308847427368, "learning_rate": 0.0001, "loss": 0.0195, "step": 19420 }, { "epoch": 127.82894736842105, "grad_norm": 1.2159069776535034, "learning_rate": 0.0001, "loss": 0.0197, "step": 19430 }, { "epoch": 127.89473684210526, "grad_norm": 1.3993160724639893, "learning_rate": 0.0001, "loss": 0.0217, "step": 19440 }, { "epoch": 127.96052631578948, "grad_norm": 1.1536054611206055, "learning_rate": 0.0001, "loss": 0.0201, "step": 19450 }, { "epoch": 128.02631578947367, "grad_norm": 1.333641767501831, "learning_rate": 0.0001, "loss": 0.0212, "step": 19460 }, { "epoch": 128.0921052631579, "grad_norm": 1.1968632936477661, "learning_rate": 0.0001, "loss": 0.0211, "step": 19470 }, { "epoch": 128.1578947368421, "grad_norm": 1.3755112886428833, "learning_rate": 0.0001, "loss": 0.0215, "step": 19480 }, { "epoch": 128.22368421052633, "grad_norm": 1.2440341711044312, "learning_rate": 0.0001, "loss": 0.0231, "step": 19490 }, { "epoch": 128.28947368421052, "grad_norm": 1.4638385772705078, "learning_rate": 0.0001, "loss": 0.0224, "step": 19500 }, { "epoch": 128.35526315789474, "grad_norm": 1.5661355257034302, "learning_rate": 0.0001, "loss": 0.0204, "step": 19510 }, { "epoch": 128.42105263157896, "grad_norm": 1.3939542770385742, "learning_rate": 0.0001, "loss": 0.0218, "step": 19520 }, { "epoch": 128.48684210526315, "grad_norm": 1.1770873069763184, "learning_rate": 0.0001, "loss": 0.02, "step": 19530 }, { "epoch": 128.55263157894737, "grad_norm": 1.2070765495300293, "learning_rate": 0.0001, "loss": 0.0216, "step": 19540 }, { "epoch": 128.6184210526316, "grad_norm": 1.6263254880905151, "learning_rate": 0.0001, "loss": 0.0205, "step": 19550 }, { "epoch": 128.68421052631578, "grad_norm": 1.2368308305740356, "learning_rate": 0.0001, "loss": 0.0218, "step": 19560 }, { "epoch": 128.75, "grad_norm": 1.4440819025039673, "learning_rate": 0.0001, "loss": 0.022, "step": 19570 }, { "epoch": 128.81578947368422, "grad_norm": 1.1472845077514648, "learning_rate": 0.0001, "loss": 0.0199, "step": 19580 }, { "epoch": 128.8815789473684, "grad_norm": 1.410331130027771, "learning_rate": 0.0001, "loss": 0.0219, "step": 19590 }, { "epoch": 128.94736842105263, "grad_norm": 1.265761137008667, "learning_rate": 0.0001, "loss": 0.0217, "step": 19600 }, { "epoch": 129.01315789473685, "grad_norm": 1.2967408895492554, "learning_rate": 0.0001, "loss": 0.0208, "step": 19610 }, { "epoch": 129.07894736842104, "grad_norm": 1.5309879779815674, "learning_rate": 0.0001, "loss": 0.0215, "step": 19620 }, { "epoch": 129.14473684210526, "grad_norm": 1.5370832681655884, "learning_rate": 0.0001, "loss": 0.0224, "step": 19630 }, { "epoch": 129.21052631578948, "grad_norm": 1.3466033935546875, "learning_rate": 0.0001, "loss": 0.0226, "step": 19640 }, { "epoch": 129.27631578947367, "grad_norm": 1.4539897441864014, "learning_rate": 0.0001, "loss": 0.0224, "step": 19650 }, { "epoch": 129.3421052631579, "grad_norm": 1.633927583694458, "learning_rate": 0.0001, "loss": 0.022, "step": 19660 }, { "epoch": 129.4078947368421, "grad_norm": 1.246416449546814, "learning_rate": 0.0001, "loss": 0.023, "step": 19670 }, { "epoch": 129.47368421052633, "grad_norm": 1.3420946598052979, "learning_rate": 0.0001, "loss": 0.0241, "step": 19680 }, { "epoch": 129.53947368421052, "grad_norm": 1.3653072118759155, "learning_rate": 0.0001, "loss": 0.023, "step": 19690 }, { "epoch": 129.60526315789474, "grad_norm": 1.0461795330047607, "learning_rate": 0.0001, "loss": 0.0236, "step": 19700 }, { "epoch": 129.67105263157896, "grad_norm": 1.4482468366622925, "learning_rate": 0.0001, "loss": 0.0241, "step": 19710 }, { "epoch": 129.73684210526315, "grad_norm": 1.0564301013946533, "learning_rate": 0.0001, "loss": 0.0222, "step": 19720 }, { "epoch": 129.80263157894737, "grad_norm": 1.5804928541183472, "learning_rate": 0.0001, "loss": 0.0237, "step": 19730 }, { "epoch": 129.8684210526316, "grad_norm": 1.6412838697433472, "learning_rate": 0.0001, "loss": 0.0237, "step": 19740 }, { "epoch": 129.93421052631578, "grad_norm": 1.5836437940597534, "learning_rate": 0.0001, "loss": 0.0223, "step": 19750 }, { "epoch": 130.0, "grad_norm": 1.197196125984192, "learning_rate": 0.0001, "loss": 0.0202, "step": 19760 }, { "epoch": 130.06578947368422, "grad_norm": 0.96909499168396, "learning_rate": 0.0001, "loss": 0.0204, "step": 19770 }, { "epoch": 130.1315789473684, "grad_norm": 1.4088032245635986, "learning_rate": 0.0001, "loss": 0.0214, "step": 19780 }, { "epoch": 130.19736842105263, "grad_norm": 1.3101266622543335, "learning_rate": 0.0001, "loss": 0.0208, "step": 19790 }, { "epoch": 130.26315789473685, "grad_norm": 1.375200867652893, "learning_rate": 0.0001, "loss": 0.022, "step": 19800 }, { "epoch": 130.32894736842104, "grad_norm": 1.243313193321228, "learning_rate": 0.0001, "loss": 0.0197, "step": 19810 }, { "epoch": 130.39473684210526, "grad_norm": 1.121595025062561, "learning_rate": 0.0001, "loss": 0.0202, "step": 19820 }, { "epoch": 130.46052631578948, "grad_norm": 2.108215093612671, "learning_rate": 0.0001, "loss": 0.0236, "step": 19830 }, { "epoch": 130.52631578947367, "grad_norm": 2.1370813846588135, "learning_rate": 0.0001, "loss": 0.0208, "step": 19840 }, { "epoch": 130.5921052631579, "grad_norm": 1.728285789489746, "learning_rate": 0.0001, "loss": 0.0219, "step": 19850 }, { "epoch": 130.6578947368421, "grad_norm": 2.018031358718872, "learning_rate": 0.0001, "loss": 0.0202, "step": 19860 }, { "epoch": 130.72368421052633, "grad_norm": 1.6380020380020142, "learning_rate": 0.0001, "loss": 0.026, "step": 19870 }, { "epoch": 130.78947368421052, "grad_norm": 1.7799023389816284, "learning_rate": 0.0001, "loss": 0.0221, "step": 19880 }, { "epoch": 130.85526315789474, "grad_norm": 1.419378638267517, "learning_rate": 0.0001, "loss": 0.0186, "step": 19890 }, { "epoch": 130.92105263157896, "grad_norm": 1.769496202468872, "learning_rate": 0.0001, "loss": 0.0189, "step": 19900 }, { "epoch": 130.98684210526315, "grad_norm": 1.9092391729354858, "learning_rate": 0.0001, "loss": 0.0212, "step": 19910 }, { "epoch": 131.05263157894737, "grad_norm": 1.5821865797042847, "learning_rate": 0.0001, "loss": 0.0204, "step": 19920 }, { "epoch": 131.1184210526316, "grad_norm": 1.5526002645492554, "learning_rate": 0.0001, "loss": 0.0171, "step": 19930 }, { "epoch": 131.18421052631578, "grad_norm": 1.4830267429351807, "learning_rate": 0.0001, "loss": 0.0205, "step": 19940 }, { "epoch": 131.25, "grad_norm": 1.3607596158981323, "learning_rate": 0.0001, "loss": 0.0178, "step": 19950 }, { "epoch": 131.31578947368422, "grad_norm": 1.4321061372756958, "learning_rate": 0.0001, "loss": 0.0185, "step": 19960 }, { "epoch": 131.3815789473684, "grad_norm": 1.343908667564392, "learning_rate": 0.0001, "loss": 0.0195, "step": 19970 }, { "epoch": 131.44736842105263, "grad_norm": 1.047028660774231, "learning_rate": 0.0001, "loss": 0.0211, "step": 19980 }, { "epoch": 131.51315789473685, "grad_norm": 1.2941601276397705, "learning_rate": 0.0001, "loss": 0.0199, "step": 19990 }, { "epoch": 131.57894736842104, "grad_norm": 1.480362892150879, "learning_rate": 0.0001, "loss": 0.021, "step": 20000 }, { "epoch": 131.64473684210526, "grad_norm": 1.8121306896209717, "learning_rate": 0.0001, "loss": 0.0195, "step": 20010 }, { "epoch": 131.71052631578948, "grad_norm": 1.2839956283569336, "learning_rate": 0.0001, "loss": 0.019, "step": 20020 }, { "epoch": 131.77631578947367, "grad_norm": 1.1678411960601807, "learning_rate": 0.0001, "loss": 0.0221, "step": 20030 }, { "epoch": 131.8421052631579, "grad_norm": 1.2261030673980713, "learning_rate": 0.0001, "loss": 0.0187, "step": 20040 }, { "epoch": 131.9078947368421, "grad_norm": 1.1341450214385986, "learning_rate": 0.0001, "loss": 0.0192, "step": 20050 }, { "epoch": 131.97368421052633, "grad_norm": 1.0404419898986816, "learning_rate": 0.0001, "loss": 0.0197, "step": 20060 }, { "epoch": 132.03947368421052, "grad_norm": 1.3259118795394897, "learning_rate": 0.0001, "loss": 0.0204, "step": 20070 }, { "epoch": 132.10526315789474, "grad_norm": 1.6572375297546387, "learning_rate": 0.0001, "loss": 0.0239, "step": 20080 }, { "epoch": 132.17105263157896, "grad_norm": 1.5282135009765625, "learning_rate": 0.0001, "loss": 0.0214, "step": 20090 }, { "epoch": 132.23684210526315, "grad_norm": 1.3130202293395996, "learning_rate": 0.0001, "loss": 0.0204, "step": 20100 }, { "epoch": 132.30263157894737, "grad_norm": 1.7606295347213745, "learning_rate": 0.0001, "loss": 0.0208, "step": 20110 }, { "epoch": 132.3684210526316, "grad_norm": 1.5770694017410278, "learning_rate": 0.0001, "loss": 0.0196, "step": 20120 }, { "epoch": 132.43421052631578, "grad_norm": 1.3118009567260742, "learning_rate": 0.0001, "loss": 0.0198, "step": 20130 }, { "epoch": 132.5, "grad_norm": 1.220281958580017, "learning_rate": 0.0001, "loss": 0.0214, "step": 20140 }, { "epoch": 132.56578947368422, "grad_norm": 1.5257964134216309, "learning_rate": 0.0001, "loss": 0.0218, "step": 20150 }, { "epoch": 132.6315789473684, "grad_norm": 1.567521333694458, "learning_rate": 0.0001, "loss": 0.0192, "step": 20160 }, { "epoch": 132.69736842105263, "grad_norm": 1.1980758905410767, "learning_rate": 0.0001, "loss": 0.0207, "step": 20170 }, { "epoch": 132.76315789473685, "grad_norm": 1.453986644744873, "learning_rate": 0.0001, "loss": 0.0198, "step": 20180 }, { "epoch": 132.82894736842104, "grad_norm": 1.4468498229980469, "learning_rate": 0.0001, "loss": 0.0182, "step": 20190 }, { "epoch": 132.89473684210526, "grad_norm": 1.3943188190460205, "learning_rate": 0.0001, "loss": 0.022, "step": 20200 }, { "epoch": 132.96052631578948, "grad_norm": 1.7826237678527832, "learning_rate": 0.0001, "loss": 0.02, "step": 20210 }, { "epoch": 133.02631578947367, "grad_norm": 1.3166335821151733, "learning_rate": 0.0001, "loss": 0.0216, "step": 20220 }, { "epoch": 133.0921052631579, "grad_norm": 1.3654383420944214, "learning_rate": 0.0001, "loss": 0.0183, "step": 20230 }, { "epoch": 133.1578947368421, "grad_norm": 1.322871208190918, "learning_rate": 0.0001, "loss": 0.0201, "step": 20240 }, { "epoch": 133.22368421052633, "grad_norm": 1.2936899662017822, "learning_rate": 0.0001, "loss": 0.0197, "step": 20250 }, { "epoch": 133.28947368421052, "grad_norm": 0.9831916093826294, "learning_rate": 0.0001, "loss": 0.0222, "step": 20260 }, { "epoch": 133.35526315789474, "grad_norm": 1.345542311668396, "learning_rate": 0.0001, "loss": 0.0212, "step": 20270 }, { "epoch": 133.42105263157896, "grad_norm": 1.4100401401519775, "learning_rate": 0.0001, "loss": 0.0196, "step": 20280 }, { "epoch": 133.48684210526315, "grad_norm": 1.1066662073135376, "learning_rate": 0.0001, "loss": 0.0218, "step": 20290 }, { "epoch": 133.55263157894737, "grad_norm": 1.3817609548568726, "learning_rate": 0.0001, "loss": 0.0228, "step": 20300 }, { "epoch": 133.6184210526316, "grad_norm": 1.107347846031189, "learning_rate": 0.0001, "loss": 0.0203, "step": 20310 }, { "epoch": 133.68421052631578, "grad_norm": 0.8152048587799072, "learning_rate": 0.0001, "loss": 0.0214, "step": 20320 }, { "epoch": 133.75, "grad_norm": 1.1018624305725098, "learning_rate": 0.0001, "loss": 0.0207, "step": 20330 }, { "epoch": 133.81578947368422, "grad_norm": 1.5733144283294678, "learning_rate": 0.0001, "loss": 0.0211, "step": 20340 }, { "epoch": 133.8815789473684, "grad_norm": 1.2820268869400024, "learning_rate": 0.0001, "loss": 0.0206, "step": 20350 }, { "epoch": 133.94736842105263, "grad_norm": 1.4256007671356201, "learning_rate": 0.0001, "loss": 0.0238, "step": 20360 }, { "epoch": 134.01315789473685, "grad_norm": 1.2284027338027954, "learning_rate": 0.0001, "loss": 0.0204, "step": 20370 }, { "epoch": 134.07894736842104, "grad_norm": 1.1770983934402466, "learning_rate": 0.0001, "loss": 0.02, "step": 20380 }, { "epoch": 134.14473684210526, "grad_norm": 1.1191388368606567, "learning_rate": 0.0001, "loss": 0.0199, "step": 20390 }, { "epoch": 134.21052631578948, "grad_norm": 1.1628490686416626, "learning_rate": 0.0001, "loss": 0.022, "step": 20400 }, { "epoch": 134.27631578947367, "grad_norm": 1.2943140268325806, "learning_rate": 0.0001, "loss": 0.0208, "step": 20410 }, { "epoch": 134.3421052631579, "grad_norm": 1.0791293382644653, "learning_rate": 0.0001, "loss": 0.0194, "step": 20420 }, { "epoch": 134.4078947368421, "grad_norm": 1.308792233467102, "learning_rate": 0.0001, "loss": 0.02, "step": 20430 }, { "epoch": 134.47368421052633, "grad_norm": 1.2937871217727661, "learning_rate": 0.0001, "loss": 0.0221, "step": 20440 }, { "epoch": 134.53947368421052, "grad_norm": 1.2588351964950562, "learning_rate": 0.0001, "loss": 0.0198, "step": 20450 }, { "epoch": 134.60526315789474, "grad_norm": 1.3284668922424316, "learning_rate": 0.0001, "loss": 0.0208, "step": 20460 }, { "epoch": 134.67105263157896, "grad_norm": 1.3896305561065674, "learning_rate": 0.0001, "loss": 0.0191, "step": 20470 }, { "epoch": 134.73684210526315, "grad_norm": 1.4380180835723877, "learning_rate": 0.0001, "loss": 0.0212, "step": 20480 }, { "epoch": 134.80263157894737, "grad_norm": 1.1442885398864746, "learning_rate": 0.0001, "loss": 0.0187, "step": 20490 }, { "epoch": 134.8684210526316, "grad_norm": 1.3140195608139038, "learning_rate": 0.0001, "loss": 0.0205, "step": 20500 }, { "epoch": 134.93421052631578, "grad_norm": 1.2796006202697754, "learning_rate": 0.0001, "loss": 0.0209, "step": 20510 }, { "epoch": 135.0, "grad_norm": 1.2553552389144897, "learning_rate": 0.0001, "loss": 0.0189, "step": 20520 }, { "epoch": 135.06578947368422, "grad_norm": 1.573137640953064, "learning_rate": 0.0001, "loss": 0.0205, "step": 20530 }, { "epoch": 135.1315789473684, "grad_norm": 1.4506351947784424, "learning_rate": 0.0001, "loss": 0.0204, "step": 20540 }, { "epoch": 135.19736842105263, "grad_norm": 1.219154953956604, "learning_rate": 0.0001, "loss": 0.0184, "step": 20550 }, { "epoch": 135.26315789473685, "grad_norm": 1.235091209411621, "learning_rate": 0.0001, "loss": 0.0199, "step": 20560 }, { "epoch": 135.32894736842104, "grad_norm": 1.7018022537231445, "learning_rate": 0.0001, "loss": 0.0219, "step": 20570 }, { "epoch": 135.39473684210526, "grad_norm": 1.2762569189071655, "learning_rate": 0.0001, "loss": 0.0205, "step": 20580 }, { "epoch": 135.46052631578948, "grad_norm": 1.5831774473190308, "learning_rate": 0.0001, "loss": 0.0206, "step": 20590 }, { "epoch": 135.52631578947367, "grad_norm": 1.3347238302230835, "learning_rate": 0.0001, "loss": 0.0192, "step": 20600 }, { "epoch": 135.5921052631579, "grad_norm": 1.5127307176589966, "learning_rate": 0.0001, "loss": 0.0197, "step": 20610 }, { "epoch": 135.6578947368421, "grad_norm": 1.3495934009552002, "learning_rate": 0.0001, "loss": 0.0195, "step": 20620 }, { "epoch": 135.72368421052633, "grad_norm": 1.6535933017730713, "learning_rate": 0.0001, "loss": 0.0176, "step": 20630 }, { "epoch": 135.78947368421052, "grad_norm": 1.341562032699585, "learning_rate": 0.0001, "loss": 0.0177, "step": 20640 }, { "epoch": 135.85526315789474, "grad_norm": 1.2143185138702393, "learning_rate": 0.0001, "loss": 0.0187, "step": 20650 }, { "epoch": 135.92105263157896, "grad_norm": 1.1850231885910034, "learning_rate": 0.0001, "loss": 0.0196, "step": 20660 }, { "epoch": 135.98684210526315, "grad_norm": 1.3171666860580444, "learning_rate": 0.0001, "loss": 0.0204, "step": 20670 }, { "epoch": 136.05263157894737, "grad_norm": 1.7448344230651855, "learning_rate": 0.0001, "loss": 0.0197, "step": 20680 }, { "epoch": 136.1184210526316, "grad_norm": 1.0638881921768188, "learning_rate": 0.0001, "loss": 0.0193, "step": 20690 }, { "epoch": 136.18421052631578, "grad_norm": 0.8317880034446716, "learning_rate": 0.0001, "loss": 0.02, "step": 20700 }, { "epoch": 136.25, "grad_norm": 1.029839277267456, "learning_rate": 0.0001, "loss": 0.0194, "step": 20710 }, { "epoch": 136.31578947368422, "grad_norm": 0.9803492426872253, "learning_rate": 0.0001, "loss": 0.0186, "step": 20720 }, { "epoch": 136.3815789473684, "grad_norm": 1.7811894416809082, "learning_rate": 0.0001, "loss": 0.0205, "step": 20730 }, { "epoch": 136.44736842105263, "grad_norm": 1.0440852642059326, "learning_rate": 0.0001, "loss": 0.0191, "step": 20740 }, { "epoch": 136.51315789473685, "grad_norm": 1.3539479970932007, "learning_rate": 0.0001, "loss": 0.019, "step": 20750 }, { "epoch": 136.57894736842104, "grad_norm": 1.2563800811767578, "learning_rate": 0.0001, "loss": 0.0219, "step": 20760 }, { "epoch": 136.64473684210526, "grad_norm": 0.9620851874351501, "learning_rate": 0.0001, "loss": 0.0194, "step": 20770 }, { "epoch": 136.71052631578948, "grad_norm": 1.340428352355957, "learning_rate": 0.0001, "loss": 0.0206, "step": 20780 }, { "epoch": 136.77631578947367, "grad_norm": 1.2113215923309326, "learning_rate": 0.0001, "loss": 0.0207, "step": 20790 }, { "epoch": 136.8421052631579, "grad_norm": 1.3657537698745728, "learning_rate": 0.0001, "loss": 0.0202, "step": 20800 }, { "epoch": 136.9078947368421, "grad_norm": 1.3240277767181396, "learning_rate": 0.0001, "loss": 0.0182, "step": 20810 }, { "epoch": 136.97368421052633, "grad_norm": 1.2962788343429565, "learning_rate": 0.0001, "loss": 0.0213, "step": 20820 }, { "epoch": 137.03947368421052, "grad_norm": 0.9829322695732117, "learning_rate": 0.0001, "loss": 0.0182, "step": 20830 }, { "epoch": 137.10526315789474, "grad_norm": 1.1034486293792725, "learning_rate": 0.0001, "loss": 0.0184, "step": 20840 }, { "epoch": 137.17105263157896, "grad_norm": 1.254267692565918, "learning_rate": 0.0001, "loss": 0.0206, "step": 20850 }, { "epoch": 137.23684210526315, "grad_norm": 1.059151530265808, "learning_rate": 0.0001, "loss": 0.0201, "step": 20860 }, { "epoch": 137.30263157894737, "grad_norm": 1.5754255056381226, "learning_rate": 0.0001, "loss": 0.0213, "step": 20870 }, { "epoch": 137.3684210526316, "grad_norm": 1.3503801822662354, "learning_rate": 0.0001, "loss": 0.0202, "step": 20880 }, { "epoch": 137.43421052631578, "grad_norm": 1.3268914222717285, "learning_rate": 0.0001, "loss": 0.02, "step": 20890 }, { "epoch": 137.5, "grad_norm": 1.3608646392822266, "learning_rate": 0.0001, "loss": 0.0216, "step": 20900 }, { "epoch": 137.56578947368422, "grad_norm": 1.1551671028137207, "learning_rate": 0.0001, "loss": 0.0212, "step": 20910 }, { "epoch": 137.6315789473684, "grad_norm": 0.8847004175186157, "learning_rate": 0.0001, "loss": 0.0191, "step": 20920 }, { "epoch": 137.69736842105263, "grad_norm": 1.4831230640411377, "learning_rate": 0.0001, "loss": 0.0216, "step": 20930 }, { "epoch": 137.76315789473685, "grad_norm": 1.1870131492614746, "learning_rate": 0.0001, "loss": 0.0181, "step": 20940 }, { "epoch": 137.82894736842104, "grad_norm": 1.3143157958984375, "learning_rate": 0.0001, "loss": 0.0184, "step": 20950 }, { "epoch": 137.89473684210526, "grad_norm": 1.1769039630889893, "learning_rate": 0.0001, "loss": 0.0187, "step": 20960 }, { "epoch": 137.96052631578948, "grad_norm": 1.852351427078247, "learning_rate": 0.0001, "loss": 0.0211, "step": 20970 }, { "epoch": 138.02631578947367, "grad_norm": 1.6631145477294922, "learning_rate": 0.0001, "loss": 0.0191, "step": 20980 }, { "epoch": 138.0921052631579, "grad_norm": 1.4467190504074097, "learning_rate": 0.0001, "loss": 0.0184, "step": 20990 }, { "epoch": 138.1578947368421, "grad_norm": 1.4761419296264648, "learning_rate": 0.0001, "loss": 0.0177, "step": 21000 }, { "epoch": 138.22368421052633, "grad_norm": 1.160988211631775, "learning_rate": 0.0001, "loss": 0.0203, "step": 21010 }, { "epoch": 138.28947368421052, "grad_norm": 1.591023325920105, "learning_rate": 0.0001, "loss": 0.0179, "step": 21020 }, { "epoch": 138.35526315789474, "grad_norm": 1.4847773313522339, "learning_rate": 0.0001, "loss": 0.0203, "step": 21030 }, { "epoch": 138.42105263157896, "grad_norm": 1.3351836204528809, "learning_rate": 0.0001, "loss": 0.0197, "step": 21040 }, { "epoch": 138.48684210526315, "grad_norm": 1.6248340606689453, "learning_rate": 0.0001, "loss": 0.0191, "step": 21050 }, { "epoch": 138.55263157894737, "grad_norm": 1.3487075567245483, "learning_rate": 0.0001, "loss": 0.0179, "step": 21060 }, { "epoch": 138.6184210526316, "grad_norm": 1.291077733039856, "learning_rate": 0.0001, "loss": 0.0192, "step": 21070 }, { "epoch": 138.68421052631578, "grad_norm": 1.6398788690567017, "learning_rate": 0.0001, "loss": 0.0181, "step": 21080 }, { "epoch": 138.75, "grad_norm": 1.8707720041275024, "learning_rate": 0.0001, "loss": 0.018, "step": 21090 }, { "epoch": 138.81578947368422, "grad_norm": 1.5530805587768555, "learning_rate": 0.0001, "loss": 0.0178, "step": 21100 }, { "epoch": 138.8815789473684, "grad_norm": 1.3435490131378174, "learning_rate": 0.0001, "loss": 0.0199, "step": 21110 }, { "epoch": 138.94736842105263, "grad_norm": 1.51068115234375, "learning_rate": 0.0001, "loss": 0.0197, "step": 21120 }, { "epoch": 139.01315789473685, "grad_norm": 1.293702244758606, "learning_rate": 0.0001, "loss": 0.0197, "step": 21130 }, { "epoch": 139.07894736842104, "grad_norm": 1.422736406326294, "learning_rate": 0.0001, "loss": 0.0202, "step": 21140 }, { "epoch": 139.14473684210526, "grad_norm": 1.303194522857666, "learning_rate": 0.0001, "loss": 0.019, "step": 21150 }, { "epoch": 139.21052631578948, "grad_norm": 1.724423885345459, "learning_rate": 0.0001, "loss": 0.0176, "step": 21160 }, { "epoch": 139.27631578947367, "grad_norm": 1.5041424036026, "learning_rate": 0.0001, "loss": 0.0198, "step": 21170 }, { "epoch": 139.3421052631579, "grad_norm": 1.4980031251907349, "learning_rate": 0.0001, "loss": 0.0199, "step": 21180 }, { "epoch": 139.4078947368421, "grad_norm": 1.6551101207733154, "learning_rate": 0.0001, "loss": 0.0178, "step": 21190 }, { "epoch": 139.47368421052633, "grad_norm": 1.4190785884857178, "learning_rate": 0.0001, "loss": 0.0191, "step": 21200 }, { "epoch": 139.53947368421052, "grad_norm": 1.5306510925292969, "learning_rate": 0.0001, "loss": 0.019, "step": 21210 }, { "epoch": 139.60526315789474, "grad_norm": 1.395179033279419, "learning_rate": 0.0001, "loss": 0.0195, "step": 21220 }, { "epoch": 139.67105263157896, "grad_norm": 1.9223673343658447, "learning_rate": 0.0001, "loss": 0.0175, "step": 21230 }, { "epoch": 139.73684210526315, "grad_norm": 1.42075777053833, "learning_rate": 0.0001, "loss": 0.017, "step": 21240 }, { "epoch": 139.80263157894737, "grad_norm": 1.3706669807434082, "learning_rate": 0.0001, "loss": 0.0186, "step": 21250 }, { "epoch": 139.8684210526316, "grad_norm": 1.5455049276351929, "learning_rate": 0.0001, "loss": 0.0167, "step": 21260 }, { "epoch": 139.93421052631578, "grad_norm": 1.6650147438049316, "learning_rate": 0.0001, "loss": 0.0195, "step": 21270 }, { "epoch": 140.0, "grad_norm": 1.3898757696151733, "learning_rate": 0.0001, "loss": 0.021, "step": 21280 }, { "epoch": 140.06578947368422, "grad_norm": 1.516356110572815, "learning_rate": 0.0001, "loss": 0.0171, "step": 21290 }, { "epoch": 140.1315789473684, "grad_norm": 1.162001609802246, "learning_rate": 0.0001, "loss": 0.0171, "step": 21300 }, { "epoch": 140.19736842105263, "grad_norm": 1.5784013271331787, "learning_rate": 0.0001, "loss": 0.0184, "step": 21310 }, { "epoch": 140.26315789473685, "grad_norm": 1.3628367185592651, "learning_rate": 0.0001, "loss": 0.0187, "step": 21320 }, { "epoch": 140.32894736842104, "grad_norm": 1.2204854488372803, "learning_rate": 0.0001, "loss": 0.0191, "step": 21330 }, { "epoch": 140.39473684210526, "grad_norm": 1.3196606636047363, "learning_rate": 0.0001, "loss": 0.019, "step": 21340 }, { "epoch": 140.46052631578948, "grad_norm": 1.3891950845718384, "learning_rate": 0.0001, "loss": 0.0204, "step": 21350 }, { "epoch": 140.52631578947367, "grad_norm": 0.994315505027771, "learning_rate": 0.0001, "loss": 0.0171, "step": 21360 }, { "epoch": 140.5921052631579, "grad_norm": 1.3768020868301392, "learning_rate": 0.0001, "loss": 0.019, "step": 21370 }, { "epoch": 140.6578947368421, "grad_norm": 0.8438733816146851, "learning_rate": 0.0001, "loss": 0.0191, "step": 21380 }, { "epoch": 140.72368421052633, "grad_norm": 1.5164715051651, "learning_rate": 0.0001, "loss": 0.0175, "step": 21390 }, { "epoch": 140.78947368421052, "grad_norm": 1.3153681755065918, "learning_rate": 0.0001, "loss": 0.017, "step": 21400 }, { "epoch": 140.85526315789474, "grad_norm": 1.4804083108901978, "learning_rate": 0.0001, "loss": 0.0185, "step": 21410 }, { "epoch": 140.92105263157896, "grad_norm": 1.6807725429534912, "learning_rate": 0.0001, "loss": 0.0187, "step": 21420 }, { "epoch": 140.98684210526315, "grad_norm": 1.5503164529800415, "learning_rate": 0.0001, "loss": 0.0202, "step": 21430 }, { "epoch": 141.05263157894737, "grad_norm": 1.3315825462341309, "learning_rate": 0.0001, "loss": 0.0196, "step": 21440 }, { "epoch": 141.1184210526316, "grad_norm": 1.2883803844451904, "learning_rate": 0.0001, "loss": 0.0184, "step": 21450 }, { "epoch": 141.18421052631578, "grad_norm": 1.409468173980713, "learning_rate": 0.0001, "loss": 0.0184, "step": 21460 }, { "epoch": 141.25, "grad_norm": 1.7659132480621338, "learning_rate": 0.0001, "loss": 0.0199, "step": 21470 }, { "epoch": 141.31578947368422, "grad_norm": 1.2036762237548828, "learning_rate": 0.0001, "loss": 0.0188, "step": 21480 }, { "epoch": 141.3815789473684, "grad_norm": 1.3691455125808716, "learning_rate": 0.0001, "loss": 0.0191, "step": 21490 }, { "epoch": 141.44736842105263, "grad_norm": 1.2026904821395874, "learning_rate": 0.0001, "loss": 0.0201, "step": 21500 }, { "epoch": 141.51315789473685, "grad_norm": 1.3784390687942505, "learning_rate": 0.0001, "loss": 0.0193, "step": 21510 }, { "epoch": 141.57894736842104, "grad_norm": 1.1604245901107788, "learning_rate": 0.0001, "loss": 0.0175, "step": 21520 }, { "epoch": 141.64473684210526, "grad_norm": 1.6574337482452393, "learning_rate": 0.0001, "loss": 0.0179, "step": 21530 }, { "epoch": 141.71052631578948, "grad_norm": 1.6064780950546265, "learning_rate": 0.0001, "loss": 0.0195, "step": 21540 }, { "epoch": 141.77631578947367, "grad_norm": 1.4753048419952393, "learning_rate": 0.0001, "loss": 0.0195, "step": 21550 }, { "epoch": 141.8421052631579, "grad_norm": 1.6422748565673828, "learning_rate": 0.0001, "loss": 0.0191, "step": 21560 }, { "epoch": 141.9078947368421, "grad_norm": 1.4057989120483398, "learning_rate": 0.0001, "loss": 0.0184, "step": 21570 }, { "epoch": 141.97368421052633, "grad_norm": 1.7741460800170898, "learning_rate": 0.0001, "loss": 0.018, "step": 21580 }, { "epoch": 142.03947368421052, "grad_norm": 1.278627872467041, "learning_rate": 0.0001, "loss": 0.0203, "step": 21590 }, { "epoch": 142.10526315789474, "grad_norm": 1.6472762823104858, "learning_rate": 0.0001, "loss": 0.0174, "step": 21600 }, { "epoch": 142.17105263157896, "grad_norm": 1.5067228078842163, "learning_rate": 0.0001, "loss": 0.0198, "step": 21610 }, { "epoch": 142.23684210526315, "grad_norm": 1.04597008228302, "learning_rate": 0.0001, "loss": 0.0178, "step": 21620 }, { "epoch": 142.30263157894737, "grad_norm": 1.3342257738113403, "learning_rate": 0.0001, "loss": 0.0199, "step": 21630 }, { "epoch": 142.3684210526316, "grad_norm": 1.3006070852279663, "learning_rate": 0.0001, "loss": 0.0207, "step": 21640 }, { "epoch": 142.43421052631578, "grad_norm": 1.0908511877059937, "learning_rate": 0.0001, "loss": 0.0195, "step": 21650 }, { "epoch": 142.5, "grad_norm": 1.156730055809021, "learning_rate": 0.0001, "loss": 0.0182, "step": 21660 }, { "epoch": 142.56578947368422, "grad_norm": 1.5827045440673828, "learning_rate": 0.0001, "loss": 0.0181, "step": 21670 }, { "epoch": 142.6315789473684, "grad_norm": 1.2614614963531494, "learning_rate": 0.0001, "loss": 0.0183, "step": 21680 }, { "epoch": 142.69736842105263, "grad_norm": 1.386438012123108, "learning_rate": 0.0001, "loss": 0.0206, "step": 21690 }, { "epoch": 142.76315789473685, "grad_norm": 1.0151652097702026, "learning_rate": 0.0001, "loss": 0.0212, "step": 21700 }, { "epoch": 142.82894736842104, "grad_norm": 1.2075785398483276, "learning_rate": 0.0001, "loss": 0.0184, "step": 21710 }, { "epoch": 142.89473684210526, "grad_norm": 1.2344067096710205, "learning_rate": 0.0001, "loss": 0.0212, "step": 21720 }, { "epoch": 142.96052631578948, "grad_norm": 1.3653433322906494, "learning_rate": 0.0001, "loss": 0.0201, "step": 21730 }, { "epoch": 143.02631578947367, "grad_norm": 1.1140528917312622, "learning_rate": 0.0001, "loss": 0.0192, "step": 21740 }, { "epoch": 143.0921052631579, "grad_norm": 1.543173909187317, "learning_rate": 0.0001, "loss": 0.0198, "step": 21750 }, { "epoch": 143.1578947368421, "grad_norm": 1.6480592489242554, "learning_rate": 0.0001, "loss": 0.0189, "step": 21760 }, { "epoch": 143.22368421052633, "grad_norm": 1.5367249250411987, "learning_rate": 0.0001, "loss": 0.0192, "step": 21770 }, { "epoch": 143.28947368421052, "grad_norm": 1.3302656412124634, "learning_rate": 0.0001, "loss": 0.018, "step": 21780 }, { "epoch": 143.35526315789474, "grad_norm": 1.3698561191558838, "learning_rate": 0.0001, "loss": 0.0206, "step": 21790 }, { "epoch": 143.42105263157896, "grad_norm": 1.6688646078109741, "learning_rate": 0.0001, "loss": 0.0176, "step": 21800 }, { "epoch": 143.48684210526315, "grad_norm": 0.9735197424888611, "learning_rate": 0.0001, "loss": 0.0201, "step": 21810 }, { "epoch": 143.55263157894737, "grad_norm": 1.394163727760315, "learning_rate": 0.0001, "loss": 0.0201, "step": 21820 }, { "epoch": 143.6184210526316, "grad_norm": 1.4685237407684326, "learning_rate": 0.0001, "loss": 0.0197, "step": 21830 }, { "epoch": 143.68421052631578, "grad_norm": 1.2259248495101929, "learning_rate": 0.0001, "loss": 0.0196, "step": 21840 }, { "epoch": 143.75, "grad_norm": 1.0960712432861328, "learning_rate": 0.0001, "loss": 0.0202, "step": 21850 }, { "epoch": 143.81578947368422, "grad_norm": 1.4755326509475708, "learning_rate": 0.0001, "loss": 0.0172, "step": 21860 }, { "epoch": 143.8815789473684, "grad_norm": 1.1997923851013184, "learning_rate": 0.0001, "loss": 0.0173, "step": 21870 }, { "epoch": 143.94736842105263, "grad_norm": 1.0807325839996338, "learning_rate": 0.0001, "loss": 0.0201, "step": 21880 }, { "epoch": 144.01315789473685, "grad_norm": 1.120715618133545, "learning_rate": 0.0001, "loss": 0.0175, "step": 21890 }, { "epoch": 144.07894736842104, "grad_norm": 1.401428461074829, "learning_rate": 0.0001, "loss": 0.0195, "step": 21900 }, { "epoch": 144.14473684210526, "grad_norm": 1.0058794021606445, "learning_rate": 0.0001, "loss": 0.0196, "step": 21910 }, { "epoch": 144.21052631578948, "grad_norm": 1.4190456867218018, "learning_rate": 0.0001, "loss": 0.0193, "step": 21920 }, { "epoch": 144.27631578947367, "grad_norm": 1.0206199884414673, "learning_rate": 0.0001, "loss": 0.0193, "step": 21930 }, { "epoch": 144.3421052631579, "grad_norm": 1.3440083265304565, "learning_rate": 0.0001, "loss": 0.0212, "step": 21940 }, { "epoch": 144.4078947368421, "grad_norm": 1.0611833333969116, "learning_rate": 0.0001, "loss": 0.0189, "step": 21950 }, { "epoch": 144.47368421052633, "grad_norm": 1.4812649488449097, "learning_rate": 0.0001, "loss": 0.0217, "step": 21960 }, { "epoch": 144.53947368421052, "grad_norm": 1.3473953008651733, "learning_rate": 0.0001, "loss": 0.0184, "step": 21970 }, { "epoch": 144.60526315789474, "grad_norm": 1.229573130607605, "learning_rate": 0.0001, "loss": 0.0189, "step": 21980 }, { "epoch": 144.67105263157896, "grad_norm": 1.0524625778198242, "learning_rate": 0.0001, "loss": 0.0188, "step": 21990 }, { "epoch": 144.73684210526315, "grad_norm": 1.317591905593872, "learning_rate": 0.0001, "loss": 0.0191, "step": 22000 }, { "epoch": 144.80263157894737, "grad_norm": 1.3425631523132324, "learning_rate": 0.0001, "loss": 0.0205, "step": 22010 }, { "epoch": 144.8684210526316, "grad_norm": 1.0790326595306396, "learning_rate": 0.0001, "loss": 0.0189, "step": 22020 }, { "epoch": 144.93421052631578, "grad_norm": 1.0952657461166382, "learning_rate": 0.0001, "loss": 0.0211, "step": 22030 }, { "epoch": 145.0, "grad_norm": 1.3941048383712769, "learning_rate": 0.0001, "loss": 0.0207, "step": 22040 }, { "epoch": 145.06578947368422, "grad_norm": 1.4520479440689087, "learning_rate": 0.0001, "loss": 0.0203, "step": 22050 }, { "epoch": 145.1315789473684, "grad_norm": 1.3955209255218506, "learning_rate": 0.0001, "loss": 0.0201, "step": 22060 }, { "epoch": 145.19736842105263, "grad_norm": 1.3725961446762085, "learning_rate": 0.0001, "loss": 0.0204, "step": 22070 }, { "epoch": 145.26315789473685, "grad_norm": 1.536729335784912, "learning_rate": 0.0001, "loss": 0.0196, "step": 22080 }, { "epoch": 145.32894736842104, "grad_norm": 1.2806124687194824, "learning_rate": 0.0001, "loss": 0.0197, "step": 22090 }, { "epoch": 145.39473684210526, "grad_norm": 1.2393194437026978, "learning_rate": 0.0001, "loss": 0.0171, "step": 22100 }, { "epoch": 145.46052631578948, "grad_norm": 1.6960039138793945, "learning_rate": 0.0001, "loss": 0.018, "step": 22110 }, { "epoch": 145.52631578947367, "grad_norm": 1.5707310438156128, "learning_rate": 0.0001, "loss": 0.0174, "step": 22120 }, { "epoch": 145.5921052631579, "grad_norm": 1.2192633152008057, "learning_rate": 0.0001, "loss": 0.0174, "step": 22130 }, { "epoch": 145.6578947368421, "grad_norm": 1.4482849836349487, "learning_rate": 0.0001, "loss": 0.0196, "step": 22140 }, { "epoch": 145.72368421052633, "grad_norm": 1.1989715099334717, "learning_rate": 0.0001, "loss": 0.0183, "step": 22150 }, { "epoch": 145.78947368421052, "grad_norm": 1.3403609991073608, "learning_rate": 0.0001, "loss": 0.0216, "step": 22160 }, { "epoch": 145.85526315789474, "grad_norm": 1.599914789199829, "learning_rate": 0.0001, "loss": 0.0186, "step": 22170 }, { "epoch": 145.92105263157896, "grad_norm": 1.602010726928711, "learning_rate": 0.0001, "loss": 0.0189, "step": 22180 }, { "epoch": 145.98684210526315, "grad_norm": 1.2780475616455078, "learning_rate": 0.0001, "loss": 0.0206, "step": 22190 }, { "epoch": 146.05263157894737, "grad_norm": 1.600659728050232, "learning_rate": 0.0001, "loss": 0.0206, "step": 22200 }, { "epoch": 146.1184210526316, "grad_norm": 1.0254467725753784, "learning_rate": 0.0001, "loss": 0.0195, "step": 22210 }, { "epoch": 146.18421052631578, "grad_norm": 1.2700848579406738, "learning_rate": 0.0001, "loss": 0.0166, "step": 22220 }, { "epoch": 146.25, "grad_norm": 1.2667077779769897, "learning_rate": 0.0001, "loss": 0.0173, "step": 22230 }, { "epoch": 146.31578947368422, "grad_norm": 1.057997703552246, "learning_rate": 0.0001, "loss": 0.0222, "step": 22240 }, { "epoch": 146.3815789473684, "grad_norm": 1.3093483448028564, "learning_rate": 0.0001, "loss": 0.0193, "step": 22250 }, { "epoch": 146.44736842105263, "grad_norm": 1.0066701173782349, "learning_rate": 0.0001, "loss": 0.0166, "step": 22260 }, { "epoch": 146.51315789473685, "grad_norm": 1.212918758392334, "learning_rate": 0.0001, "loss": 0.0179, "step": 22270 }, { "epoch": 146.57894736842104, "grad_norm": 1.3692690134048462, "learning_rate": 0.0001, "loss": 0.0236, "step": 22280 }, { "epoch": 146.64473684210526, "grad_norm": 1.1842776536941528, "learning_rate": 0.0001, "loss": 0.02, "step": 22290 }, { "epoch": 146.71052631578948, "grad_norm": 1.0390976667404175, "learning_rate": 0.0001, "loss": 0.0179, "step": 22300 }, { "epoch": 146.77631578947367, "grad_norm": 0.8841727375984192, "learning_rate": 0.0001, "loss": 0.018, "step": 22310 }, { "epoch": 146.8421052631579, "grad_norm": 1.2545442581176758, "learning_rate": 0.0001, "loss": 0.0176, "step": 22320 }, { "epoch": 146.9078947368421, "grad_norm": 1.3116580247879028, "learning_rate": 0.0001, "loss": 0.0201, "step": 22330 }, { "epoch": 146.97368421052633, "grad_norm": 0.9272478222846985, "learning_rate": 0.0001, "loss": 0.0173, "step": 22340 }, { "epoch": 147.03947368421052, "grad_norm": 1.2526774406433105, "learning_rate": 0.0001, "loss": 0.0189, "step": 22350 }, { "epoch": 147.10526315789474, "grad_norm": 1.276404857635498, "learning_rate": 0.0001, "loss": 0.0182, "step": 22360 }, { "epoch": 147.17105263157896, "grad_norm": 1.035636305809021, "learning_rate": 0.0001, "loss": 0.0209, "step": 22370 }, { "epoch": 147.23684210526315, "grad_norm": 1.1112178564071655, "learning_rate": 0.0001, "loss": 0.0177, "step": 22380 }, { "epoch": 147.30263157894737, "grad_norm": 1.3240015506744385, "learning_rate": 0.0001, "loss": 0.019, "step": 22390 }, { "epoch": 147.3684210526316, "grad_norm": 1.081715703010559, "learning_rate": 0.0001, "loss": 0.0193, "step": 22400 }, { "epoch": 147.43421052631578, "grad_norm": 1.243489384651184, "learning_rate": 0.0001, "loss": 0.0198, "step": 22410 }, { "epoch": 147.5, "grad_norm": 1.5017218589782715, "learning_rate": 0.0001, "loss": 0.019, "step": 22420 }, { "epoch": 147.56578947368422, "grad_norm": 0.9764308929443359, "learning_rate": 0.0001, "loss": 0.0211, "step": 22430 }, { "epoch": 147.6315789473684, "grad_norm": 1.4389960765838623, "learning_rate": 0.0001, "loss": 0.0198, "step": 22440 }, { "epoch": 147.69736842105263, "grad_norm": 1.4686914682388306, "learning_rate": 0.0001, "loss": 0.0206, "step": 22450 }, { "epoch": 147.76315789473685, "grad_norm": 1.3333423137664795, "learning_rate": 0.0001, "loss": 0.0188, "step": 22460 }, { "epoch": 147.82894736842104, "grad_norm": 1.3306684494018555, "learning_rate": 0.0001, "loss": 0.0186, "step": 22470 }, { "epoch": 147.89473684210526, "grad_norm": 1.4141942262649536, "learning_rate": 0.0001, "loss": 0.0189, "step": 22480 }, { "epoch": 147.96052631578948, "grad_norm": 1.542427659034729, "learning_rate": 0.0001, "loss": 0.0188, "step": 22490 }, { "epoch": 148.02631578947367, "grad_norm": 1.7996158599853516, "learning_rate": 0.0001, "loss": 0.0196, "step": 22500 }, { "epoch": 148.0921052631579, "grad_norm": 0.9638149738311768, "learning_rate": 0.0001, "loss": 0.0183, "step": 22510 }, { "epoch": 148.1578947368421, "grad_norm": 1.074876070022583, "learning_rate": 0.0001, "loss": 0.0206, "step": 22520 }, { "epoch": 148.22368421052633, "grad_norm": 1.0280288457870483, "learning_rate": 0.0001, "loss": 0.0217, "step": 22530 }, { "epoch": 148.28947368421052, "grad_norm": 1.0511701107025146, "learning_rate": 0.0001, "loss": 0.0192, "step": 22540 }, { "epoch": 148.35526315789474, "grad_norm": 1.678219199180603, "learning_rate": 0.0001, "loss": 0.0183, "step": 22550 }, { "epoch": 148.42105263157896, "grad_norm": 1.0990034341812134, "learning_rate": 0.0001, "loss": 0.0176, "step": 22560 }, { "epoch": 148.48684210526315, "grad_norm": 1.601233720779419, "learning_rate": 0.0001, "loss": 0.0183, "step": 22570 }, { "epoch": 148.55263157894737, "grad_norm": 1.4822688102722168, "learning_rate": 0.0001, "loss": 0.0184, "step": 22580 }, { "epoch": 148.6184210526316, "grad_norm": 1.492310881614685, "learning_rate": 0.0001, "loss": 0.0192, "step": 22590 }, { "epoch": 148.68421052631578, "grad_norm": 1.3018604516983032, "learning_rate": 0.0001, "loss": 0.0175, "step": 22600 }, { "epoch": 148.75, "grad_norm": 1.4880129098892212, "learning_rate": 0.0001, "loss": 0.0189, "step": 22610 }, { "epoch": 148.81578947368422, "grad_norm": 1.6518675088882446, "learning_rate": 0.0001, "loss": 0.0183, "step": 22620 }, { "epoch": 148.8815789473684, "grad_norm": 1.6061004400253296, "learning_rate": 0.0001, "loss": 0.0193, "step": 22630 }, { "epoch": 148.94736842105263, "grad_norm": 1.5879257917404175, "learning_rate": 0.0001, "loss": 0.0182, "step": 22640 }, { "epoch": 149.01315789473685, "grad_norm": 1.7779239416122437, "learning_rate": 0.0001, "loss": 0.0177, "step": 22650 }, { "epoch": 149.07894736842104, "grad_norm": 1.654737949371338, "learning_rate": 0.0001, "loss": 0.0184, "step": 22660 }, { "epoch": 149.14473684210526, "grad_norm": 1.3545973300933838, "learning_rate": 0.0001, "loss": 0.0175, "step": 22670 }, { "epoch": 149.21052631578948, "grad_norm": 1.3350344896316528, "learning_rate": 0.0001, "loss": 0.0172, "step": 22680 }, { "epoch": 149.27631578947367, "grad_norm": 1.253635287284851, "learning_rate": 0.0001, "loss": 0.0197, "step": 22690 }, { "epoch": 149.3421052631579, "grad_norm": 1.4453550577163696, "learning_rate": 0.0001, "loss": 0.0175, "step": 22700 }, { "epoch": 149.4078947368421, "grad_norm": 1.421644926071167, "learning_rate": 0.0001, "loss": 0.0186, "step": 22710 }, { "epoch": 149.47368421052633, "grad_norm": 1.5055420398712158, "learning_rate": 0.0001, "loss": 0.0194, "step": 22720 }, { "epoch": 149.53947368421052, "grad_norm": 1.4135552644729614, "learning_rate": 0.0001, "loss": 0.0179, "step": 22730 }, { "epoch": 149.60526315789474, "grad_norm": 1.5352897644042969, "learning_rate": 0.0001, "loss": 0.0166, "step": 22740 }, { "epoch": 149.67105263157896, "grad_norm": 1.561647891998291, "learning_rate": 0.0001, "loss": 0.0197, "step": 22750 }, { "epoch": 149.73684210526315, "grad_norm": 1.3064342737197876, "learning_rate": 0.0001, "loss": 0.0172, "step": 22760 }, { "epoch": 149.80263157894737, "grad_norm": 1.1616345643997192, "learning_rate": 0.0001, "loss": 0.0174, "step": 22770 }, { "epoch": 149.8684210526316, "grad_norm": 1.3709357976913452, "learning_rate": 0.0001, "loss": 0.0186, "step": 22780 }, { "epoch": 149.93421052631578, "grad_norm": 1.3530012369155884, "learning_rate": 0.0001, "loss": 0.0198, "step": 22790 }, { "epoch": 150.0, "grad_norm": 1.3412154912948608, "learning_rate": 0.0001, "loss": 0.0176, "step": 22800 }, { "epoch": 150.06578947368422, "grad_norm": 1.2247885465621948, "learning_rate": 0.0001, "loss": 0.0181, "step": 22810 }, { "epoch": 150.1315789473684, "grad_norm": 1.5520859956741333, "learning_rate": 0.0001, "loss": 0.019, "step": 22820 }, { "epoch": 150.19736842105263, "grad_norm": 1.8670061826705933, "learning_rate": 0.0001, "loss": 0.0218, "step": 22830 }, { "epoch": 150.26315789473685, "grad_norm": 2.187220573425293, "learning_rate": 0.0001, "loss": 0.0186, "step": 22840 }, { "epoch": 150.32894736842104, "grad_norm": 1.9027713537216187, "learning_rate": 0.0001, "loss": 0.0204, "step": 22850 }, { "epoch": 150.39473684210526, "grad_norm": 1.7409223318099976, "learning_rate": 0.0001, "loss": 0.0179, "step": 22860 }, { "epoch": 150.46052631578948, "grad_norm": 1.7857537269592285, "learning_rate": 0.0001, "loss": 0.0197, "step": 22870 }, { "epoch": 150.52631578947367, "grad_norm": 1.6792347431182861, "learning_rate": 0.0001, "loss": 0.0167, "step": 22880 }, { "epoch": 150.5921052631579, "grad_norm": 1.2226988077163696, "learning_rate": 0.0001, "loss": 0.0163, "step": 22890 }, { "epoch": 150.6578947368421, "grad_norm": 2.2091164588928223, "learning_rate": 0.0001, "loss": 0.0205, "step": 22900 }, { "epoch": 150.72368421052633, "grad_norm": 2.2626655101776123, "learning_rate": 0.0001, "loss": 0.0193, "step": 22910 }, { "epoch": 150.78947368421052, "grad_norm": 2.42525315284729, "learning_rate": 0.0001, "loss": 0.0193, "step": 22920 }, { "epoch": 150.85526315789474, "grad_norm": 1.7006795406341553, "learning_rate": 0.0001, "loss": 0.0181, "step": 22930 }, { "epoch": 150.92105263157896, "grad_norm": 1.7463268041610718, "learning_rate": 0.0001, "loss": 0.0176, "step": 22940 }, { "epoch": 150.98684210526315, "grad_norm": 1.5665723085403442, "learning_rate": 0.0001, "loss": 0.0157, "step": 22950 }, { "epoch": 151.05263157894737, "grad_norm": 1.7291148900985718, "learning_rate": 0.0001, "loss": 0.0176, "step": 22960 }, { "epoch": 151.1184210526316, "grad_norm": 1.3778690099716187, "learning_rate": 0.0001, "loss": 0.0166, "step": 22970 }, { "epoch": 151.18421052631578, "grad_norm": 1.612257480621338, "learning_rate": 0.0001, "loss": 0.0177, "step": 22980 }, { "epoch": 151.25, "grad_norm": 1.3075007200241089, "learning_rate": 0.0001, "loss": 0.0183, "step": 22990 }, { "epoch": 151.31578947368422, "grad_norm": 1.2902040481567383, "learning_rate": 0.0001, "loss": 0.0162, "step": 23000 }, { "epoch": 151.3815789473684, "grad_norm": 1.3315701484680176, "learning_rate": 0.0001, "loss": 0.0209, "step": 23010 }, { "epoch": 151.44736842105263, "grad_norm": 1.0458896160125732, "learning_rate": 0.0001, "loss": 0.0178, "step": 23020 }, { "epoch": 151.51315789473685, "grad_norm": 1.1520403623580933, "learning_rate": 0.0001, "loss": 0.0205, "step": 23030 }, { "epoch": 151.57894736842104, "grad_norm": 1.5853742361068726, "learning_rate": 0.0001, "loss": 0.0196, "step": 23040 }, { "epoch": 151.64473684210526, "grad_norm": 1.2538849115371704, "learning_rate": 0.0001, "loss": 0.0185, "step": 23050 }, { "epoch": 151.71052631578948, "grad_norm": 1.6762452125549316, "learning_rate": 0.0001, "loss": 0.019, "step": 23060 }, { "epoch": 151.77631578947367, "grad_norm": 1.7446788549423218, "learning_rate": 0.0001, "loss": 0.0184, "step": 23070 }, { "epoch": 151.8421052631579, "grad_norm": 0.9342503547668457, "learning_rate": 0.0001, "loss": 0.0189, "step": 23080 }, { "epoch": 151.9078947368421, "grad_norm": 1.1515876054763794, "learning_rate": 0.0001, "loss": 0.018, "step": 23090 }, { "epoch": 151.97368421052633, "grad_norm": 1.6669334173202515, "learning_rate": 0.0001, "loss": 0.0178, "step": 23100 }, { "epoch": 152.03947368421052, "grad_norm": 1.5257030725479126, "learning_rate": 0.0001, "loss": 0.0209, "step": 23110 }, { "epoch": 152.10526315789474, "grad_norm": 1.6215834617614746, "learning_rate": 0.0001, "loss": 0.0188, "step": 23120 }, { "epoch": 152.17105263157896, "grad_norm": 1.7707445621490479, "learning_rate": 0.0001, "loss": 0.0195, "step": 23130 }, { "epoch": 152.23684210526315, "grad_norm": 1.1109079122543335, "learning_rate": 0.0001, "loss": 0.0186, "step": 23140 }, { "epoch": 152.30263157894737, "grad_norm": 1.362051010131836, "learning_rate": 0.0001, "loss": 0.0165, "step": 23150 }, { "epoch": 152.3684210526316, "grad_norm": 1.205551028251648, "learning_rate": 0.0001, "loss": 0.0176, "step": 23160 }, { "epoch": 152.43421052631578, "grad_norm": 1.1164907217025757, "learning_rate": 0.0001, "loss": 0.0177, "step": 23170 }, { "epoch": 152.5, "grad_norm": 1.4803227186203003, "learning_rate": 0.0001, "loss": 0.0198, "step": 23180 }, { "epoch": 152.56578947368422, "grad_norm": 1.282638430595398, "learning_rate": 0.0001, "loss": 0.0204, "step": 23190 }, { "epoch": 152.6315789473684, "grad_norm": 1.3923507928848267, "learning_rate": 0.0001, "loss": 0.0199, "step": 23200 }, { "epoch": 152.69736842105263, "grad_norm": 1.3010979890823364, "learning_rate": 0.0001, "loss": 0.0202, "step": 23210 }, { "epoch": 152.76315789473685, "grad_norm": 1.3184207677841187, "learning_rate": 0.0001, "loss": 0.0169, "step": 23220 }, { "epoch": 152.82894736842104, "grad_norm": 1.2217152118682861, "learning_rate": 0.0001, "loss": 0.0172, "step": 23230 }, { "epoch": 152.89473684210526, "grad_norm": 3.6102325916290283, "learning_rate": 0.0001, "loss": 0.0229, "step": 23240 }, { "epoch": 152.96052631578948, "grad_norm": 1.7692070007324219, "learning_rate": 0.0001, "loss": 0.0213, "step": 23250 }, { "epoch": 153.02631578947367, "grad_norm": 1.6135303974151611, "learning_rate": 0.0001, "loss": 0.0188, "step": 23260 }, { "epoch": 153.0921052631579, "grad_norm": 1.7957147359848022, "learning_rate": 0.0001, "loss": 0.0169, "step": 23270 }, { "epoch": 153.1578947368421, "grad_norm": 1.8844883441925049, "learning_rate": 0.0001, "loss": 0.0173, "step": 23280 }, { "epoch": 153.22368421052633, "grad_norm": 1.8668259382247925, "learning_rate": 0.0001, "loss": 0.0181, "step": 23290 }, { "epoch": 153.28947368421052, "grad_norm": 1.8679157495498657, "learning_rate": 0.0001, "loss": 0.0179, "step": 23300 }, { "epoch": 153.35526315789474, "grad_norm": 1.436536192893982, "learning_rate": 0.0001, "loss": 0.0172, "step": 23310 }, { "epoch": 153.42105263157896, "grad_norm": 1.4835171699523926, "learning_rate": 0.0001, "loss": 0.0203, "step": 23320 }, { "epoch": 153.48684210526315, "grad_norm": 1.3552966117858887, "learning_rate": 0.0001, "loss": 0.018, "step": 23330 }, { "epoch": 153.55263157894737, "grad_norm": 1.5863488912582397, "learning_rate": 0.0001, "loss": 0.018, "step": 23340 }, { "epoch": 153.6184210526316, "grad_norm": 1.2556802034378052, "learning_rate": 0.0001, "loss": 0.0165, "step": 23350 }, { "epoch": 153.68421052631578, "grad_norm": 1.7405736446380615, "learning_rate": 0.0001, "loss": 0.0168, "step": 23360 }, { "epoch": 153.75, "grad_norm": 1.4233875274658203, "learning_rate": 0.0001, "loss": 0.0176, "step": 23370 }, { "epoch": 153.81578947368422, "grad_norm": 1.3720194101333618, "learning_rate": 0.0001, "loss": 0.0165, "step": 23380 }, { "epoch": 153.8815789473684, "grad_norm": 1.481738567352295, "learning_rate": 0.0001, "loss": 0.0183, "step": 23390 }, { "epoch": 153.94736842105263, "grad_norm": 1.8668274879455566, "learning_rate": 0.0001, "loss": 0.0186, "step": 23400 }, { "epoch": 154.01315789473685, "grad_norm": 1.2795329093933105, "learning_rate": 0.0001, "loss": 0.0207, "step": 23410 }, { "epoch": 154.07894736842104, "grad_norm": 1.5393232107162476, "learning_rate": 0.0001, "loss": 0.018, "step": 23420 }, { "epoch": 154.14473684210526, "grad_norm": 1.5710614919662476, "learning_rate": 0.0001, "loss": 0.021, "step": 23430 }, { "epoch": 154.21052631578948, "grad_norm": 1.5256733894348145, "learning_rate": 0.0001, "loss": 0.0186, "step": 23440 }, { "epoch": 154.27631578947367, "grad_norm": 1.4345033168792725, "learning_rate": 0.0001, "loss": 0.0182, "step": 23450 }, { "epoch": 154.3421052631579, "grad_norm": 1.4099990129470825, "learning_rate": 0.0001, "loss": 0.0171, "step": 23460 }, { "epoch": 154.4078947368421, "grad_norm": 1.6725177764892578, "learning_rate": 0.0001, "loss": 0.0164, "step": 23470 }, { "epoch": 154.47368421052633, "grad_norm": 1.6998838186264038, "learning_rate": 0.0001, "loss": 0.0172, "step": 23480 }, { "epoch": 154.53947368421052, "grad_norm": 1.105302095413208, "learning_rate": 0.0001, "loss": 0.0181, "step": 23490 }, { "epoch": 154.60526315789474, "grad_norm": 1.0682851076126099, "learning_rate": 0.0001, "loss": 0.0172, "step": 23500 }, { "epoch": 154.67105263157896, "grad_norm": 1.4762492179870605, "learning_rate": 0.0001, "loss": 0.0188, "step": 23510 }, { "epoch": 154.73684210526315, "grad_norm": 1.3171006441116333, "learning_rate": 0.0001, "loss": 0.0171, "step": 23520 }, { "epoch": 154.80263157894737, "grad_norm": 1.1788697242736816, "learning_rate": 0.0001, "loss": 0.0179, "step": 23530 }, { "epoch": 154.8684210526316, "grad_norm": 1.425657868385315, "learning_rate": 0.0001, "loss": 0.0204, "step": 23540 }, { "epoch": 154.93421052631578, "grad_norm": 1.547235369682312, "learning_rate": 0.0001, "loss": 0.0182, "step": 23550 }, { "epoch": 155.0, "grad_norm": 1.6571357250213623, "learning_rate": 0.0001, "loss": 0.0173, "step": 23560 }, { "epoch": 155.06578947368422, "grad_norm": 3.123242139816284, "learning_rate": 0.0001, "loss": 0.0198, "step": 23570 }, { "epoch": 155.1315789473684, "grad_norm": 1.6413636207580566, "learning_rate": 0.0001, "loss": 0.0182, "step": 23580 }, { "epoch": 155.19736842105263, "grad_norm": 1.3690893650054932, "learning_rate": 0.0001, "loss": 0.0176, "step": 23590 }, { "epoch": 155.26315789473685, "grad_norm": 1.3904107809066772, "learning_rate": 0.0001, "loss": 0.0192, "step": 23600 }, { "epoch": 155.32894736842104, "grad_norm": 1.816664457321167, "learning_rate": 0.0001, "loss": 0.0201, "step": 23610 }, { "epoch": 155.39473684210526, "grad_norm": 1.4407306909561157, "learning_rate": 0.0001, "loss": 0.0175, "step": 23620 }, { "epoch": 155.46052631578948, "grad_norm": 1.6005239486694336, "learning_rate": 0.0001, "loss": 0.0175, "step": 23630 }, { "epoch": 155.52631578947367, "grad_norm": 1.4299267530441284, "learning_rate": 0.0001, "loss": 0.0173, "step": 23640 }, { "epoch": 155.5921052631579, "grad_norm": 1.0841288566589355, "learning_rate": 0.0001, "loss": 0.0198, "step": 23650 }, { "epoch": 155.6578947368421, "grad_norm": 1.3383357524871826, "learning_rate": 0.0001, "loss": 0.021, "step": 23660 }, { "epoch": 155.72368421052633, "grad_norm": 1.0255568027496338, "learning_rate": 0.0001, "loss": 0.02, "step": 23670 }, { "epoch": 155.78947368421052, "grad_norm": 1.336609959602356, "learning_rate": 0.0001, "loss": 0.0213, "step": 23680 }, { "epoch": 155.85526315789474, "grad_norm": 1.3863122463226318, "learning_rate": 0.0001, "loss": 0.0186, "step": 23690 }, { "epoch": 155.92105263157896, "grad_norm": 1.3259609937667847, "learning_rate": 0.0001, "loss": 0.0187, "step": 23700 }, { "epoch": 155.98684210526315, "grad_norm": 1.0066629648208618, "learning_rate": 0.0001, "loss": 0.0206, "step": 23710 }, { "epoch": 156.05263157894737, "grad_norm": 1.1141328811645508, "learning_rate": 0.0001, "loss": 0.0211, "step": 23720 }, { "epoch": 156.1184210526316, "grad_norm": 1.0179667472839355, "learning_rate": 0.0001, "loss": 0.0209, "step": 23730 }, { "epoch": 156.18421052631578, "grad_norm": 1.498150110244751, "learning_rate": 0.0001, "loss": 0.023, "step": 23740 }, { "epoch": 156.25, "grad_norm": 1.844730257987976, "learning_rate": 0.0001, "loss": 0.0209, "step": 23750 }, { "epoch": 156.31578947368422, "grad_norm": 1.3886311054229736, "learning_rate": 0.0001, "loss": 0.0181, "step": 23760 }, { "epoch": 156.3815789473684, "grad_norm": 1.3924232721328735, "learning_rate": 0.0001, "loss": 0.0197, "step": 23770 }, { "epoch": 156.44736842105263, "grad_norm": 1.7710256576538086, "learning_rate": 0.0001, "loss": 0.0178, "step": 23780 }, { "epoch": 156.51315789473685, "grad_norm": 1.4801042079925537, "learning_rate": 0.0001, "loss": 0.0194, "step": 23790 }, { "epoch": 156.57894736842104, "grad_norm": 1.1061052083969116, "learning_rate": 0.0001, "loss": 0.0187, "step": 23800 }, { "epoch": 156.64473684210526, "grad_norm": 1.3435893058776855, "learning_rate": 0.0001, "loss": 0.0184, "step": 23810 }, { "epoch": 156.71052631578948, "grad_norm": 1.2930083274841309, "learning_rate": 0.0001, "loss": 0.0189, "step": 23820 }, { "epoch": 156.77631578947367, "grad_norm": 1.1935070753097534, "learning_rate": 0.0001, "loss": 0.0177, "step": 23830 }, { "epoch": 156.8421052631579, "grad_norm": 1.4816370010375977, "learning_rate": 0.0001, "loss": 0.0195, "step": 23840 }, { "epoch": 156.9078947368421, "grad_norm": 1.39240300655365, "learning_rate": 0.0001, "loss": 0.0206, "step": 23850 }, { "epoch": 156.97368421052633, "grad_norm": 1.3462928533554077, "learning_rate": 0.0001, "loss": 0.0183, "step": 23860 }, { "epoch": 157.03947368421052, "grad_norm": 1.2192853689193726, "learning_rate": 0.0001, "loss": 0.0192, "step": 23870 }, { "epoch": 157.10526315789474, "grad_norm": 1.2664424180984497, "learning_rate": 0.0001, "loss": 0.0199, "step": 23880 }, { "epoch": 157.17105263157896, "grad_norm": 1.3305250406265259, "learning_rate": 0.0001, "loss": 0.0184, "step": 23890 }, { "epoch": 157.23684210526315, "grad_norm": 1.512176513671875, "learning_rate": 0.0001, "loss": 0.0198, "step": 23900 }, { "epoch": 157.30263157894737, "grad_norm": 1.5272941589355469, "learning_rate": 0.0001, "loss": 0.0184, "step": 23910 }, { "epoch": 157.3684210526316, "grad_norm": 1.281473994255066, "learning_rate": 0.0001, "loss": 0.0199, "step": 23920 }, { "epoch": 157.43421052631578, "grad_norm": 1.4534245729446411, "learning_rate": 0.0001, "loss": 0.0191, "step": 23930 }, { "epoch": 157.5, "grad_norm": 1.065725564956665, "learning_rate": 0.0001, "loss": 0.0176, "step": 23940 }, { "epoch": 157.56578947368422, "grad_norm": 1.2938498258590698, "learning_rate": 0.0001, "loss": 0.0189, "step": 23950 }, { "epoch": 157.6315789473684, "grad_norm": 1.5139905214309692, "learning_rate": 0.0001, "loss": 0.0177, "step": 23960 }, { "epoch": 157.69736842105263, "grad_norm": 1.1256234645843506, "learning_rate": 0.0001, "loss": 0.0197, "step": 23970 }, { "epoch": 157.76315789473685, "grad_norm": 1.3171970844268799, "learning_rate": 0.0001, "loss": 0.0175, "step": 23980 }, { "epoch": 157.82894736842104, "grad_norm": 1.7550626993179321, "learning_rate": 0.0001, "loss": 0.02, "step": 23990 }, { "epoch": 157.89473684210526, "grad_norm": 1.2105461359024048, "learning_rate": 0.0001, "loss": 0.0194, "step": 24000 }, { "epoch": 157.96052631578948, "grad_norm": 1.7495108842849731, "learning_rate": 0.0001, "loss": 0.0181, "step": 24010 }, { "epoch": 158.02631578947367, "grad_norm": 1.5057815313339233, "learning_rate": 0.0001, "loss": 0.0178, "step": 24020 }, { "epoch": 158.0921052631579, "grad_norm": 1.364128828048706, "learning_rate": 0.0001, "loss": 0.018, "step": 24030 }, { "epoch": 158.1578947368421, "grad_norm": 1.6452478170394897, "learning_rate": 0.0001, "loss": 0.0178, "step": 24040 }, { "epoch": 158.22368421052633, "grad_norm": 1.2079576253890991, "learning_rate": 0.0001, "loss": 0.0173, "step": 24050 }, { "epoch": 158.28947368421052, "grad_norm": 1.1615091562271118, "learning_rate": 0.0001, "loss": 0.0179, "step": 24060 }, { "epoch": 158.35526315789474, "grad_norm": 1.7258861064910889, "learning_rate": 0.0001, "loss": 0.021, "step": 24070 }, { "epoch": 158.42105263157896, "grad_norm": 1.5624957084655762, "learning_rate": 0.0001, "loss": 0.0186, "step": 24080 }, { "epoch": 158.48684210526315, "grad_norm": 1.3713676929473877, "learning_rate": 0.0001, "loss": 0.0158, "step": 24090 }, { "epoch": 158.55263157894737, "grad_norm": 0.9600792527198792, "learning_rate": 0.0001, "loss": 0.0183, "step": 24100 }, { "epoch": 158.6184210526316, "grad_norm": 1.1764682531356812, "learning_rate": 0.0001, "loss": 0.0203, "step": 24110 }, { "epoch": 158.68421052631578, "grad_norm": 0.8807045221328735, "learning_rate": 0.0001, "loss": 0.0191, "step": 24120 }, { "epoch": 158.75, "grad_norm": 1.495734691619873, "learning_rate": 0.0001, "loss": 0.0188, "step": 24130 }, { "epoch": 158.81578947368422, "grad_norm": 1.2996577024459839, "learning_rate": 0.0001, "loss": 0.0195, "step": 24140 }, { "epoch": 158.8815789473684, "grad_norm": 1.3335479497909546, "learning_rate": 0.0001, "loss": 0.0181, "step": 24150 }, { "epoch": 158.94736842105263, "grad_norm": 1.3486828804016113, "learning_rate": 0.0001, "loss": 0.0188, "step": 24160 }, { "epoch": 159.01315789473685, "grad_norm": 1.1436874866485596, "learning_rate": 0.0001, "loss": 0.0176, "step": 24170 }, { "epoch": 159.07894736842104, "grad_norm": 1.0566798448562622, "learning_rate": 0.0001, "loss": 0.02, "step": 24180 }, { "epoch": 159.14473684210526, "grad_norm": 1.3232210874557495, "learning_rate": 0.0001, "loss": 0.0182, "step": 24190 }, { "epoch": 159.21052631578948, "grad_norm": 1.1854642629623413, "learning_rate": 0.0001, "loss": 0.019, "step": 24200 }, { "epoch": 159.27631578947367, "grad_norm": 1.3037360906600952, "learning_rate": 0.0001, "loss": 0.0179, "step": 24210 }, { "epoch": 159.3421052631579, "grad_norm": 1.1624709367752075, "learning_rate": 0.0001, "loss": 0.0203, "step": 24220 }, { "epoch": 159.4078947368421, "grad_norm": 1.279187798500061, "learning_rate": 0.0001, "loss": 0.0185, "step": 24230 }, { "epoch": 159.47368421052633, "grad_norm": 1.1820547580718994, "learning_rate": 0.0001, "loss": 0.0186, "step": 24240 }, { "epoch": 159.53947368421052, "grad_norm": 1.3856960535049438, "learning_rate": 0.0001, "loss": 0.0207, "step": 24250 }, { "epoch": 159.60526315789474, "grad_norm": 1.538688063621521, "learning_rate": 0.0001, "loss": 0.0187, "step": 24260 }, { "epoch": 159.67105263157896, "grad_norm": 1.3758467435836792, "learning_rate": 0.0001, "loss": 0.0186, "step": 24270 }, { "epoch": 159.73684210526315, "grad_norm": 1.637129306793213, "learning_rate": 0.0001, "loss": 0.0191, "step": 24280 }, { "epoch": 159.80263157894737, "grad_norm": 1.3896411657333374, "learning_rate": 0.0001, "loss": 0.0168, "step": 24290 }, { "epoch": 159.8684210526316, "grad_norm": 1.1721456050872803, "learning_rate": 0.0001, "loss": 0.0169, "step": 24300 }, { "epoch": 159.93421052631578, "grad_norm": 1.9932554960250854, "learning_rate": 0.0001, "loss": 0.0173, "step": 24310 }, { "epoch": 160.0, "grad_norm": 1.3891863822937012, "learning_rate": 0.0001, "loss": 0.0175, "step": 24320 }, { "epoch": 160.06578947368422, "grad_norm": 1.4516315460205078, "learning_rate": 0.0001, "loss": 0.0182, "step": 24330 }, { "epoch": 160.1315789473684, "grad_norm": 1.426121473312378, "learning_rate": 0.0001, "loss": 0.019, "step": 24340 }, { "epoch": 160.19736842105263, "grad_norm": 1.5220839977264404, "learning_rate": 0.0001, "loss": 0.02, "step": 24350 }, { "epoch": 160.26315789473685, "grad_norm": 1.4482239484786987, "learning_rate": 0.0001, "loss": 0.0156, "step": 24360 }, { "epoch": 160.32894736842104, "grad_norm": 1.5488955974578857, "learning_rate": 0.0001, "loss": 0.0185, "step": 24370 }, { "epoch": 160.39473684210526, "grad_norm": 1.3807427883148193, "learning_rate": 0.0001, "loss": 0.0173, "step": 24380 }, { "epoch": 160.46052631578948, "grad_norm": 1.0160894393920898, "learning_rate": 0.0001, "loss": 0.0178, "step": 24390 }, { "epoch": 160.52631578947367, "grad_norm": 1.2377235889434814, "learning_rate": 0.0001, "loss": 0.0191, "step": 24400 }, { "epoch": 160.5921052631579, "grad_norm": 0.987001895904541, "learning_rate": 0.0001, "loss": 0.0166, "step": 24410 }, { "epoch": 160.6578947368421, "grad_norm": 1.317531704902649, "learning_rate": 0.0001, "loss": 0.0183, "step": 24420 }, { "epoch": 160.72368421052633, "grad_norm": 1.521753191947937, "learning_rate": 0.0001, "loss": 0.0216, "step": 24430 }, { "epoch": 160.78947368421052, "grad_norm": 1.166321039199829, "learning_rate": 0.0001, "loss": 0.0192, "step": 24440 }, { "epoch": 160.85526315789474, "grad_norm": 1.1440471410751343, "learning_rate": 0.0001, "loss": 0.0184, "step": 24450 }, { "epoch": 160.92105263157896, "grad_norm": 1.3936820030212402, "learning_rate": 0.0001, "loss": 0.0218, "step": 24460 }, { "epoch": 160.98684210526315, "grad_norm": 1.1521437168121338, "learning_rate": 0.0001, "loss": 0.0171, "step": 24470 }, { "epoch": 161.05263157894737, "grad_norm": 1.0062541961669922, "learning_rate": 0.0001, "loss": 0.0204, "step": 24480 }, { "epoch": 161.1184210526316, "grad_norm": 1.126672625541687, "learning_rate": 0.0001, "loss": 0.0188, "step": 24490 }, { "epoch": 161.18421052631578, "grad_norm": 0.8864695429801941, "learning_rate": 0.0001, "loss": 0.0206, "step": 24500 }, { "epoch": 161.25, "grad_norm": 1.3956273794174194, "learning_rate": 0.0001, "loss": 0.0188, "step": 24510 }, { "epoch": 161.31578947368422, "grad_norm": 1.4042059183120728, "learning_rate": 0.0001, "loss": 0.0183, "step": 24520 }, { "epoch": 161.3815789473684, "grad_norm": 1.1768255233764648, "learning_rate": 0.0001, "loss": 0.0221, "step": 24530 }, { "epoch": 161.44736842105263, "grad_norm": 1.468287467956543, "learning_rate": 0.0001, "loss": 0.0208, "step": 24540 }, { "epoch": 161.51315789473685, "grad_norm": 1.4978389739990234, "learning_rate": 0.0001, "loss": 0.0212, "step": 24550 }, { "epoch": 161.57894736842104, "grad_norm": 0.9304846525192261, "learning_rate": 0.0001, "loss": 0.0216, "step": 24560 }, { "epoch": 161.64473684210526, "grad_norm": 1.3616132736206055, "learning_rate": 0.0001, "loss": 0.0205, "step": 24570 }, { "epoch": 161.71052631578948, "grad_norm": 1.0822436809539795, "learning_rate": 0.0001, "loss": 0.0188, "step": 24580 }, { "epoch": 161.77631578947367, "grad_norm": 1.1985223293304443, "learning_rate": 0.0001, "loss": 0.0187, "step": 24590 }, { "epoch": 161.8421052631579, "grad_norm": 1.1661827564239502, "learning_rate": 0.0001, "loss": 0.0205, "step": 24600 }, { "epoch": 161.9078947368421, "grad_norm": 1.2504955530166626, "learning_rate": 0.0001, "loss": 0.0206, "step": 24610 }, { "epoch": 161.97368421052633, "grad_norm": 1.2397123575210571, "learning_rate": 0.0001, "loss": 0.0182, "step": 24620 }, { "epoch": 162.03947368421052, "grad_norm": 1.0715943574905396, "learning_rate": 0.0001, "loss": 0.0206, "step": 24630 }, { "epoch": 162.10526315789474, "grad_norm": 1.1244536638259888, "learning_rate": 0.0001, "loss": 0.021, "step": 24640 }, { "epoch": 162.17105263157896, "grad_norm": 1.3353065252304077, "learning_rate": 0.0001, "loss": 0.0175, "step": 24650 }, { "epoch": 162.23684210526315, "grad_norm": 1.408204436302185, "learning_rate": 0.0001, "loss": 0.0187, "step": 24660 }, { "epoch": 162.30263157894737, "grad_norm": 1.3387621641159058, "learning_rate": 0.0001, "loss": 0.0185, "step": 24670 }, { "epoch": 162.3684210526316, "grad_norm": 1.7283587455749512, "learning_rate": 0.0001, "loss": 0.0202, "step": 24680 }, { "epoch": 162.43421052631578, "grad_norm": 1.5374019145965576, "learning_rate": 0.0001, "loss": 0.019, "step": 24690 }, { "epoch": 162.5, "grad_norm": 1.2795945405960083, "learning_rate": 0.0001, "loss": 0.0213, "step": 24700 }, { "epoch": 162.56578947368422, "grad_norm": 1.537787675857544, "learning_rate": 0.0001, "loss": 0.0174, "step": 24710 }, { "epoch": 162.6315789473684, "grad_norm": 1.3099159002304077, "learning_rate": 0.0001, "loss": 0.0166, "step": 24720 }, { "epoch": 162.69736842105263, "grad_norm": 1.4521063566207886, "learning_rate": 0.0001, "loss": 0.0179, "step": 24730 }, { "epoch": 162.76315789473685, "grad_norm": 1.0471484661102295, "learning_rate": 0.0001, "loss": 0.0189, "step": 24740 }, { "epoch": 162.82894736842104, "grad_norm": 1.6101007461547852, "learning_rate": 0.0001, "loss": 0.0182, "step": 24750 }, { "epoch": 162.89473684210526, "grad_norm": 1.3530237674713135, "learning_rate": 0.0001, "loss": 0.0173, "step": 24760 }, { "epoch": 162.96052631578948, "grad_norm": 1.4511295557022095, "learning_rate": 0.0001, "loss": 0.0201, "step": 24770 }, { "epoch": 163.02631578947367, "grad_norm": 1.6022499799728394, "learning_rate": 0.0001, "loss": 0.018, "step": 24780 }, { "epoch": 163.0921052631579, "grad_norm": 1.4935600757598877, "learning_rate": 0.0001, "loss": 0.0158, "step": 24790 }, { "epoch": 163.1578947368421, "grad_norm": 1.327233910560608, "learning_rate": 0.0001, "loss": 0.0174, "step": 24800 }, { "epoch": 163.22368421052633, "grad_norm": 1.6861796379089355, "learning_rate": 0.0001, "loss": 0.0186, "step": 24810 }, { "epoch": 163.28947368421052, "grad_norm": 1.4635359048843384, "learning_rate": 0.0001, "loss": 0.0224, "step": 24820 }, { "epoch": 163.35526315789474, "grad_norm": 1.0945830345153809, "learning_rate": 0.0001, "loss": 0.018, "step": 24830 }, { "epoch": 163.42105263157896, "grad_norm": 1.359348177909851, "learning_rate": 0.0001, "loss": 0.0184, "step": 24840 }, { "epoch": 163.48684210526315, "grad_norm": 1.269120216369629, "learning_rate": 0.0001, "loss": 0.0194, "step": 24850 }, { "epoch": 163.55263157894737, "grad_norm": 1.2218973636627197, "learning_rate": 0.0001, "loss": 0.0168, "step": 24860 }, { "epoch": 163.6184210526316, "grad_norm": 1.1602321863174438, "learning_rate": 0.0001, "loss": 0.0149, "step": 24870 }, { "epoch": 163.68421052631578, "grad_norm": 1.5064598321914673, "learning_rate": 0.0001, "loss": 0.0171, "step": 24880 }, { "epoch": 163.75, "grad_norm": 1.5285717248916626, "learning_rate": 0.0001, "loss": 0.0192, "step": 24890 }, { "epoch": 163.81578947368422, "grad_norm": 1.0176746845245361, "learning_rate": 0.0001, "loss": 0.0183, "step": 24900 }, { "epoch": 163.8815789473684, "grad_norm": 1.0195715427398682, "learning_rate": 0.0001, "loss": 0.0187, "step": 24910 }, { "epoch": 163.94736842105263, "grad_norm": 1.253736972808838, "learning_rate": 0.0001, "loss": 0.0173, "step": 24920 }, { "epoch": 164.01315789473685, "grad_norm": 1.370121717453003, "learning_rate": 0.0001, "loss": 0.0192, "step": 24930 }, { "epoch": 164.07894736842104, "grad_norm": 1.2714835405349731, "learning_rate": 0.0001, "loss": 0.0178, "step": 24940 }, { "epoch": 164.14473684210526, "grad_norm": 1.4336186647415161, "learning_rate": 0.0001, "loss": 0.0181, "step": 24950 }, { "epoch": 164.21052631578948, "grad_norm": 1.3978872299194336, "learning_rate": 0.0001, "loss": 0.0184, "step": 24960 }, { "epoch": 164.27631578947367, "grad_norm": 1.19012451171875, "learning_rate": 0.0001, "loss": 0.0179, "step": 24970 }, { "epoch": 164.3421052631579, "grad_norm": 1.0551053285598755, "learning_rate": 0.0001, "loss": 0.0157, "step": 24980 }, { "epoch": 164.4078947368421, "grad_norm": 1.8229308128356934, "learning_rate": 0.0001, "loss": 0.0187, "step": 24990 }, { "epoch": 164.47368421052633, "grad_norm": 0.7936663627624512, "learning_rate": 0.0001, "loss": 0.0153, "step": 25000 }, { "epoch": 164.53947368421052, "grad_norm": 1.1978473663330078, "learning_rate": 0.0001, "loss": 0.0163, "step": 25010 }, { "epoch": 164.60526315789474, "grad_norm": 1.305327296257019, "learning_rate": 0.0001, "loss": 0.0177, "step": 25020 }, { "epoch": 164.67105263157896, "grad_norm": 1.2756356000900269, "learning_rate": 0.0001, "loss": 0.0197, "step": 25030 }, { "epoch": 164.73684210526315, "grad_norm": 0.8239328861236572, "learning_rate": 0.0001, "loss": 0.0179, "step": 25040 }, { "epoch": 164.80263157894737, "grad_norm": 1.0603528022766113, "learning_rate": 0.0001, "loss": 0.0177, "step": 25050 }, { "epoch": 164.8684210526316, "grad_norm": 1.0184378623962402, "learning_rate": 0.0001, "loss": 0.018, "step": 25060 }, { "epoch": 164.93421052631578, "grad_norm": 1.0379877090454102, "learning_rate": 0.0001, "loss": 0.0186, "step": 25070 }, { "epoch": 165.0, "grad_norm": 1.1607532501220703, "learning_rate": 0.0001, "loss": 0.0191, "step": 25080 }, { "epoch": 165.06578947368422, "grad_norm": 1.4055289030075073, "learning_rate": 0.0001, "loss": 0.0182, "step": 25090 }, { "epoch": 165.1315789473684, "grad_norm": 1.3009381294250488, "learning_rate": 0.0001, "loss": 0.0195, "step": 25100 }, { "epoch": 165.19736842105263, "grad_norm": 1.4090827703475952, "learning_rate": 0.0001, "loss": 0.0188, "step": 25110 }, { "epoch": 165.26315789473685, "grad_norm": 1.2835659980773926, "learning_rate": 0.0001, "loss": 0.0191, "step": 25120 }, { "epoch": 165.32894736842104, "grad_norm": 1.6038185358047485, "learning_rate": 0.0001, "loss": 0.0193, "step": 25130 }, { "epoch": 165.39473684210526, "grad_norm": 1.2217997312545776, "learning_rate": 0.0001, "loss": 0.0181, "step": 25140 }, { "epoch": 165.46052631578948, "grad_norm": 1.1289339065551758, "learning_rate": 0.0001, "loss": 0.0221, "step": 25150 }, { "epoch": 165.52631578947367, "grad_norm": 1.2177705764770508, "learning_rate": 0.0001, "loss": 0.0187, "step": 25160 }, { "epoch": 165.5921052631579, "grad_norm": 1.6059798002243042, "learning_rate": 0.0001, "loss": 0.017, "step": 25170 }, { "epoch": 165.6578947368421, "grad_norm": 1.921615481376648, "learning_rate": 0.0001, "loss": 0.0191, "step": 25180 }, { "epoch": 165.72368421052633, "grad_norm": 1.20645010471344, "learning_rate": 0.0001, "loss": 0.0181, "step": 25190 }, { "epoch": 165.78947368421052, "grad_norm": 1.2785398960113525, "learning_rate": 0.0001, "loss": 0.018, "step": 25200 }, { "epoch": 165.85526315789474, "grad_norm": 1.6781924962997437, "learning_rate": 0.0001, "loss": 0.0182, "step": 25210 }, { "epoch": 165.92105263157896, "grad_norm": 0.9301381707191467, "learning_rate": 0.0001, "loss": 0.0162, "step": 25220 }, { "epoch": 165.98684210526315, "grad_norm": 1.2392312288284302, "learning_rate": 0.0001, "loss": 0.0171, "step": 25230 }, { "epoch": 166.05263157894737, "grad_norm": 1.5881590843200684, "learning_rate": 0.0001, "loss": 0.0188, "step": 25240 }, { "epoch": 166.1184210526316, "grad_norm": 1.0536082983016968, "learning_rate": 0.0001, "loss": 0.0171, "step": 25250 }, { "epoch": 166.18421052631578, "grad_norm": 1.475879192352295, "learning_rate": 0.0001, "loss": 0.0175, "step": 25260 }, { "epoch": 166.25, "grad_norm": 1.0223356485366821, "learning_rate": 0.0001, "loss": 0.0172, "step": 25270 }, { "epoch": 166.31578947368422, "grad_norm": 1.4303029775619507, "learning_rate": 0.0001, "loss": 0.0175, "step": 25280 }, { "epoch": 166.3815789473684, "grad_norm": 1.3278456926345825, "learning_rate": 0.0001, "loss": 0.016, "step": 25290 }, { "epoch": 166.44736842105263, "grad_norm": 1.6226589679718018, "learning_rate": 0.0001, "loss": 0.0204, "step": 25300 }, { "epoch": 166.51315789473685, "grad_norm": 1.4535454511642456, "learning_rate": 0.0001, "loss": 0.0169, "step": 25310 }, { "epoch": 166.57894736842104, "grad_norm": 1.3479703664779663, "learning_rate": 0.0001, "loss": 0.0174, "step": 25320 }, { "epoch": 166.64473684210526, "grad_norm": 1.2640882730484009, "learning_rate": 0.0001, "loss": 0.0165, "step": 25330 }, { "epoch": 166.71052631578948, "grad_norm": 0.9760680198669434, "learning_rate": 0.0001, "loss": 0.0181, "step": 25340 }, { "epoch": 166.77631578947367, "grad_norm": 1.1063249111175537, "learning_rate": 0.0001, "loss": 0.0171, "step": 25350 }, { "epoch": 166.8421052631579, "grad_norm": 1.4282013177871704, "learning_rate": 0.0001, "loss": 0.0176, "step": 25360 }, { "epoch": 166.9078947368421, "grad_norm": 1.7794874906539917, "learning_rate": 0.0001, "loss": 0.0184, "step": 25370 }, { "epoch": 166.97368421052633, "grad_norm": 1.1673434972763062, "learning_rate": 0.0001, "loss": 0.018, "step": 25380 }, { "epoch": 167.03947368421052, "grad_norm": 1.1143444776535034, "learning_rate": 0.0001, "loss": 0.0167, "step": 25390 }, { "epoch": 167.10526315789474, "grad_norm": 1.4228686094284058, "learning_rate": 0.0001, "loss": 0.0188, "step": 25400 }, { "epoch": 167.17105263157896, "grad_norm": 1.154889464378357, "learning_rate": 0.0001, "loss": 0.0164, "step": 25410 }, { "epoch": 167.23684210526315, "grad_norm": 1.4954147338867188, "learning_rate": 0.0001, "loss": 0.0155, "step": 25420 }, { "epoch": 167.30263157894737, "grad_norm": 1.0341747999191284, "learning_rate": 0.0001, "loss": 0.0212, "step": 25430 }, { "epoch": 167.3684210526316, "grad_norm": 1.333762526512146, "learning_rate": 0.0001, "loss": 0.0173, "step": 25440 }, { "epoch": 167.43421052631578, "grad_norm": 1.4361519813537598, "learning_rate": 0.0001, "loss": 0.0189, "step": 25450 }, { "epoch": 167.5, "grad_norm": 1.2971889972686768, "learning_rate": 0.0001, "loss": 0.0182, "step": 25460 }, { "epoch": 167.56578947368422, "grad_norm": 1.3549476861953735, "learning_rate": 0.0001, "loss": 0.0177, "step": 25470 }, { "epoch": 167.6315789473684, "grad_norm": 1.283470630645752, "learning_rate": 0.0001, "loss": 0.0177, "step": 25480 }, { "epoch": 167.69736842105263, "grad_norm": 1.2950304746627808, "learning_rate": 0.0001, "loss": 0.0161, "step": 25490 }, { "epoch": 167.76315789473685, "grad_norm": 1.133094072341919, "learning_rate": 0.0001, "loss": 0.0184, "step": 25500 }, { "epoch": 167.82894736842104, "grad_norm": 1.097690463066101, "learning_rate": 0.0001, "loss": 0.0166, "step": 25510 }, { "epoch": 167.89473684210526, "grad_norm": 1.6458208560943604, "learning_rate": 0.0001, "loss": 0.0185, "step": 25520 }, { "epoch": 167.96052631578948, "grad_norm": 1.4381608963012695, "learning_rate": 0.0001, "loss": 0.0157, "step": 25530 }, { "epoch": 168.02631578947367, "grad_norm": 1.225541591644287, "learning_rate": 0.0001, "loss": 0.018, "step": 25540 }, { "epoch": 168.0921052631579, "grad_norm": 1.082775592803955, "learning_rate": 0.0001, "loss": 0.0166, "step": 25550 }, { "epoch": 168.1578947368421, "grad_norm": 1.2318446636199951, "learning_rate": 0.0001, "loss": 0.0189, "step": 25560 }, { "epoch": 168.22368421052633, "grad_norm": 1.5468242168426514, "learning_rate": 0.0001, "loss": 0.0179, "step": 25570 }, { "epoch": 168.28947368421052, "grad_norm": 1.3016541004180908, "learning_rate": 0.0001, "loss": 0.0184, "step": 25580 }, { "epoch": 168.35526315789474, "grad_norm": 1.2177584171295166, "learning_rate": 0.0001, "loss": 0.0169, "step": 25590 }, { "epoch": 168.42105263157896, "grad_norm": 1.384822130203247, "learning_rate": 0.0001, "loss": 0.0169, "step": 25600 }, { "epoch": 168.48684210526315, "grad_norm": 1.5428193807601929, "learning_rate": 0.0001, "loss": 0.0169, "step": 25610 }, { "epoch": 168.55263157894737, "grad_norm": 1.5225193500518799, "learning_rate": 0.0001, "loss": 0.0175, "step": 25620 }, { "epoch": 168.6184210526316, "grad_norm": 1.4411053657531738, "learning_rate": 0.0001, "loss": 0.0183, "step": 25630 }, { "epoch": 168.68421052631578, "grad_norm": 1.386864185333252, "learning_rate": 0.0001, "loss": 0.0169, "step": 25640 }, { "epoch": 168.75, "grad_norm": 1.2823785543441772, "learning_rate": 0.0001, "loss": 0.0174, "step": 25650 }, { "epoch": 168.81578947368422, "grad_norm": 1.1967064142227173, "learning_rate": 0.0001, "loss": 0.0183, "step": 25660 }, { "epoch": 168.8815789473684, "grad_norm": 1.3014733791351318, "learning_rate": 0.0001, "loss": 0.0183, "step": 25670 }, { "epoch": 168.94736842105263, "grad_norm": 1.2117063999176025, "learning_rate": 0.0001, "loss": 0.0168, "step": 25680 }, { "epoch": 169.01315789473685, "grad_norm": 1.3568642139434814, "learning_rate": 0.0001, "loss": 0.0175, "step": 25690 }, { "epoch": 169.07894736842104, "grad_norm": 0.8560911417007446, "learning_rate": 0.0001, "loss": 0.0202, "step": 25700 }, { "epoch": 169.14473684210526, "grad_norm": 1.223082184791565, "learning_rate": 0.0001, "loss": 0.0177, "step": 25710 }, { "epoch": 169.21052631578948, "grad_norm": 1.027945637702942, "learning_rate": 0.0001, "loss": 0.0186, "step": 25720 }, { "epoch": 169.27631578947367, "grad_norm": 1.2788870334625244, "learning_rate": 0.0001, "loss": 0.0175, "step": 25730 }, { "epoch": 169.3421052631579, "grad_norm": 1.5068094730377197, "learning_rate": 0.0001, "loss": 0.0176, "step": 25740 }, { "epoch": 169.4078947368421, "grad_norm": 1.1870827674865723, "learning_rate": 0.0001, "loss": 0.0182, "step": 25750 }, { "epoch": 169.47368421052633, "grad_norm": 1.2578105926513672, "learning_rate": 0.0001, "loss": 0.0171, "step": 25760 }, { "epoch": 169.53947368421052, "grad_norm": 1.2269151210784912, "learning_rate": 0.0001, "loss": 0.0185, "step": 25770 }, { "epoch": 169.60526315789474, "grad_norm": 1.482102870941162, "learning_rate": 0.0001, "loss": 0.0205, "step": 25780 }, { "epoch": 169.67105263157896, "grad_norm": 1.2745014429092407, "learning_rate": 0.0001, "loss": 0.018, "step": 25790 }, { "epoch": 169.73684210526315, "grad_norm": 1.4906076192855835, "learning_rate": 0.0001, "loss": 0.0154, "step": 25800 }, { "epoch": 169.80263157894737, "grad_norm": 0.9935338497161865, "learning_rate": 0.0001, "loss": 0.0156, "step": 25810 }, { "epoch": 169.8684210526316, "grad_norm": 1.6479783058166504, "learning_rate": 0.0001, "loss": 0.0183, "step": 25820 }, { "epoch": 169.93421052631578, "grad_norm": 1.0743275880813599, "learning_rate": 0.0001, "loss": 0.016, "step": 25830 }, { "epoch": 170.0, "grad_norm": 1.0243290662765503, "learning_rate": 0.0001, "loss": 0.0175, "step": 25840 }, { "epoch": 170.06578947368422, "grad_norm": 1.5142858028411865, "learning_rate": 0.0001, "loss": 0.0168, "step": 25850 }, { "epoch": 170.1315789473684, "grad_norm": 0.9342432618141174, "learning_rate": 0.0001, "loss": 0.0165, "step": 25860 }, { "epoch": 170.19736842105263, "grad_norm": 1.4976930618286133, "learning_rate": 0.0001, "loss": 0.0189, "step": 25870 }, { "epoch": 170.26315789473685, "grad_norm": 1.565292477607727, "learning_rate": 0.0001, "loss": 0.0173, "step": 25880 }, { "epoch": 170.32894736842104, "grad_norm": 1.1127831935882568, "learning_rate": 0.0001, "loss": 0.0184, "step": 25890 }, { "epoch": 170.39473684210526, "grad_norm": 1.198778748512268, "learning_rate": 0.0001, "loss": 0.0148, "step": 25900 }, { "epoch": 170.46052631578948, "grad_norm": 1.2573102712631226, "learning_rate": 0.0001, "loss": 0.0157, "step": 25910 }, { "epoch": 170.52631578947367, "grad_norm": 1.2907289266586304, "learning_rate": 0.0001, "loss": 0.0177, "step": 25920 }, { "epoch": 170.5921052631579, "grad_norm": 1.4278053045272827, "learning_rate": 0.0001, "loss": 0.0181, "step": 25930 }, { "epoch": 170.6578947368421, "grad_norm": 1.3394439220428467, "learning_rate": 0.0001, "loss": 0.0168, "step": 25940 }, { "epoch": 170.72368421052633, "grad_norm": 1.1726481914520264, "learning_rate": 0.0001, "loss": 0.0171, "step": 25950 }, { "epoch": 170.78947368421052, "grad_norm": 1.369882583618164, "learning_rate": 0.0001, "loss": 0.0188, "step": 25960 }, { "epoch": 170.85526315789474, "grad_norm": 1.3367946147918701, "learning_rate": 0.0001, "loss": 0.0179, "step": 25970 }, { "epoch": 170.92105263157896, "grad_norm": 1.3555912971496582, "learning_rate": 0.0001, "loss": 0.0184, "step": 25980 }, { "epoch": 170.98684210526315, "grad_norm": 1.1395307779312134, "learning_rate": 0.0001, "loss": 0.0187, "step": 25990 }, { "epoch": 171.05263157894737, "grad_norm": 1.1588562726974487, "learning_rate": 0.0001, "loss": 0.0164, "step": 26000 }, { "epoch": 171.1184210526316, "grad_norm": 0.930858850479126, "learning_rate": 0.0001, "loss": 0.0182, "step": 26010 }, { "epoch": 171.18421052631578, "grad_norm": 0.9874327778816223, "learning_rate": 0.0001, "loss": 0.0169, "step": 26020 }, { "epoch": 171.25, "grad_norm": 1.0993543863296509, "learning_rate": 0.0001, "loss": 0.0172, "step": 26030 }, { "epoch": 171.31578947368422, "grad_norm": 1.1052964925765991, "learning_rate": 0.0001, "loss": 0.0178, "step": 26040 }, { "epoch": 171.3815789473684, "grad_norm": 1.25705885887146, "learning_rate": 0.0001, "loss": 0.0183, "step": 26050 }, { "epoch": 171.44736842105263, "grad_norm": 0.7609035968780518, "learning_rate": 0.0001, "loss": 0.0187, "step": 26060 }, { "epoch": 171.51315789473685, "grad_norm": 0.779438316822052, "learning_rate": 0.0001, "loss": 0.0175, "step": 26070 }, { "epoch": 171.57894736842104, "grad_norm": 0.9345868229866028, "learning_rate": 0.0001, "loss": 0.0191, "step": 26080 }, { "epoch": 171.64473684210526, "grad_norm": 1.416816234588623, "learning_rate": 0.0001, "loss": 0.0176, "step": 26090 }, { "epoch": 171.71052631578948, "grad_norm": 1.5457994937896729, "learning_rate": 0.0001, "loss": 0.0185, "step": 26100 }, { "epoch": 171.77631578947367, "grad_norm": 1.3715592622756958, "learning_rate": 0.0001, "loss": 0.017, "step": 26110 }, { "epoch": 171.8421052631579, "grad_norm": 1.3556997776031494, "learning_rate": 0.0001, "loss": 0.0166, "step": 26120 }, { "epoch": 171.9078947368421, "grad_norm": 1.2203997373580933, "learning_rate": 0.0001, "loss": 0.0189, "step": 26130 }, { "epoch": 171.97368421052633, "grad_norm": 1.4606289863586426, "learning_rate": 0.0001, "loss": 0.0179, "step": 26140 }, { "epoch": 172.03947368421052, "grad_norm": 1.5704396963119507, "learning_rate": 0.0001, "loss": 0.0165, "step": 26150 }, { "epoch": 172.10526315789474, "grad_norm": 1.2381999492645264, "learning_rate": 0.0001, "loss": 0.0165, "step": 26160 }, { "epoch": 172.17105263157896, "grad_norm": 1.518921136856079, "learning_rate": 0.0001, "loss": 0.016, "step": 26170 }, { "epoch": 172.23684210526315, "grad_norm": 1.4631500244140625, "learning_rate": 0.0001, "loss": 0.0182, "step": 26180 }, { "epoch": 172.30263157894737, "grad_norm": 1.0997428894042969, "learning_rate": 0.0001, "loss": 0.0154, "step": 26190 }, { "epoch": 172.3684210526316, "grad_norm": 1.2960116863250732, "learning_rate": 0.0001, "loss": 0.0174, "step": 26200 }, { "epoch": 172.43421052631578, "grad_norm": 1.2885228395462036, "learning_rate": 0.0001, "loss": 0.0188, "step": 26210 }, { "epoch": 172.5, "grad_norm": 1.0690890550613403, "learning_rate": 0.0001, "loss": 0.0175, "step": 26220 }, { "epoch": 172.56578947368422, "grad_norm": 1.4473509788513184, "learning_rate": 0.0001, "loss": 0.0177, "step": 26230 }, { "epoch": 172.6315789473684, "grad_norm": 0.9004012942314148, "learning_rate": 0.0001, "loss": 0.0177, "step": 26240 }, { "epoch": 172.69736842105263, "grad_norm": 1.4914005994796753, "learning_rate": 0.0001, "loss": 0.0182, "step": 26250 }, { "epoch": 172.76315789473685, "grad_norm": 1.30367112159729, "learning_rate": 0.0001, "loss": 0.0171, "step": 26260 }, { "epoch": 172.82894736842104, "grad_norm": 1.3740594387054443, "learning_rate": 0.0001, "loss": 0.0161, "step": 26270 }, { "epoch": 172.89473684210526, "grad_norm": 1.5620779991149902, "learning_rate": 0.0001, "loss": 0.0178, "step": 26280 }, { "epoch": 172.96052631578948, "grad_norm": 1.559910774230957, "learning_rate": 0.0001, "loss": 0.0151, "step": 26290 }, { "epoch": 173.02631578947367, "grad_norm": 1.3926976919174194, "learning_rate": 0.0001, "loss": 0.02, "step": 26300 }, { "epoch": 173.0921052631579, "grad_norm": 1.1394861936569214, "learning_rate": 0.0001, "loss": 0.0165, "step": 26310 }, { "epoch": 173.1578947368421, "grad_norm": 1.7277958393096924, "learning_rate": 0.0001, "loss": 0.0192, "step": 26320 }, { "epoch": 173.22368421052633, "grad_norm": 1.507736325263977, "learning_rate": 0.0001, "loss": 0.0165, "step": 26330 }, { "epoch": 173.28947368421052, "grad_norm": 1.7787176370620728, "learning_rate": 0.0001, "loss": 0.0163, "step": 26340 }, { "epoch": 173.35526315789474, "grad_norm": 1.5719088315963745, "learning_rate": 0.0001, "loss": 0.0162, "step": 26350 }, { "epoch": 173.42105263157896, "grad_norm": 1.6575725078582764, "learning_rate": 0.0001, "loss": 0.0164, "step": 26360 }, { "epoch": 173.48684210526315, "grad_norm": 1.9089598655700684, "learning_rate": 0.0001, "loss": 0.0178, "step": 26370 }, { "epoch": 173.55263157894737, "grad_norm": 1.8127702474594116, "learning_rate": 0.0001, "loss": 0.0169, "step": 26380 }, { "epoch": 173.6184210526316, "grad_norm": 2.279390573501587, "learning_rate": 0.0001, "loss": 0.0167, "step": 26390 }, { "epoch": 173.68421052631578, "grad_norm": 1.7834361791610718, "learning_rate": 0.0001, "loss": 0.0161, "step": 26400 }, { "epoch": 173.75, "grad_norm": 2.3080971240997314, "learning_rate": 0.0001, "loss": 0.0178, "step": 26410 }, { "epoch": 173.81578947368422, "grad_norm": 1.2840913534164429, "learning_rate": 0.0001, "loss": 0.0172, "step": 26420 }, { "epoch": 173.8815789473684, "grad_norm": 1.8161916732788086, "learning_rate": 0.0001, "loss": 0.0162, "step": 26430 }, { "epoch": 173.94736842105263, "grad_norm": 2.031912088394165, "learning_rate": 0.0001, "loss": 0.0181, "step": 26440 }, { "epoch": 174.01315789473685, "grad_norm": 1.5057018995285034, "learning_rate": 0.0001, "loss": 0.0153, "step": 26450 }, { "epoch": 174.07894736842104, "grad_norm": 1.6998172998428345, "learning_rate": 0.0001, "loss": 0.0169, "step": 26460 }, { "epoch": 174.14473684210526, "grad_norm": 1.6755802631378174, "learning_rate": 0.0001, "loss": 0.0161, "step": 26470 }, { "epoch": 174.21052631578948, "grad_norm": 1.45081627368927, "learning_rate": 0.0001, "loss": 0.0157, "step": 26480 }, { "epoch": 174.27631578947367, "grad_norm": 1.4480472803115845, "learning_rate": 0.0001, "loss": 0.0161, "step": 26490 }, { "epoch": 174.3421052631579, "grad_norm": 1.2978633642196655, "learning_rate": 0.0001, "loss": 0.0158, "step": 26500 }, { "epoch": 174.4078947368421, "grad_norm": 1.4956743717193604, "learning_rate": 0.0001, "loss": 0.0148, "step": 26510 }, { "epoch": 174.47368421052633, "grad_norm": 1.184404969215393, "learning_rate": 0.0001, "loss": 0.0151, "step": 26520 }, { "epoch": 174.53947368421052, "grad_norm": 1.209301233291626, "learning_rate": 0.0001, "loss": 0.0177, "step": 26530 }, { "epoch": 174.60526315789474, "grad_norm": 1.2950125932693481, "learning_rate": 0.0001, "loss": 0.0168, "step": 26540 }, { "epoch": 174.67105263157896, "grad_norm": 1.8026961088180542, "learning_rate": 0.0001, "loss": 0.0158, "step": 26550 }, { "epoch": 174.73684210526315, "grad_norm": 1.2904764413833618, "learning_rate": 0.0001, "loss": 0.016, "step": 26560 }, { "epoch": 174.80263157894737, "grad_norm": 1.365148663520813, "learning_rate": 0.0001, "loss": 0.0153, "step": 26570 }, { "epoch": 174.8684210526316, "grad_norm": 1.2511751651763916, "learning_rate": 0.0001, "loss": 0.0169, "step": 26580 }, { "epoch": 174.93421052631578, "grad_norm": 1.4057785272598267, "learning_rate": 0.0001, "loss": 0.0179, "step": 26590 }, { "epoch": 175.0, "grad_norm": 1.572277545928955, "learning_rate": 0.0001, "loss": 0.0177, "step": 26600 }, { "epoch": 175.06578947368422, "grad_norm": 1.2321019172668457, "learning_rate": 0.0001, "loss": 0.017, "step": 26610 }, { "epoch": 175.1315789473684, "grad_norm": 1.2357227802276611, "learning_rate": 0.0001, "loss": 0.0163, "step": 26620 }, { "epoch": 175.19736842105263, "grad_norm": 1.3698261976242065, "learning_rate": 0.0001, "loss": 0.0154, "step": 26630 }, { "epoch": 175.26315789473685, "grad_norm": 1.2478382587432861, "learning_rate": 0.0001, "loss": 0.0174, "step": 26640 }, { "epoch": 175.32894736842104, "grad_norm": 1.074097990989685, "learning_rate": 0.0001, "loss": 0.0169, "step": 26650 }, { "epoch": 175.39473684210526, "grad_norm": 0.874796986579895, "learning_rate": 0.0001, "loss": 0.0165, "step": 26660 }, { "epoch": 175.46052631578948, "grad_norm": 1.2885454893112183, "learning_rate": 0.0001, "loss": 0.0193, "step": 26670 }, { "epoch": 175.52631578947367, "grad_norm": 1.255919337272644, "learning_rate": 0.0001, "loss": 0.0167, "step": 26680 }, { "epoch": 175.5921052631579, "grad_norm": 1.7973171472549438, "learning_rate": 0.0001, "loss": 0.0188, "step": 26690 }, { "epoch": 175.6578947368421, "grad_norm": 1.4382481575012207, "learning_rate": 0.0001, "loss": 0.018, "step": 26700 }, { "epoch": 175.72368421052633, "grad_norm": 1.3824963569641113, "learning_rate": 0.0001, "loss": 0.0186, "step": 26710 }, { "epoch": 175.78947368421052, "grad_norm": 1.2797788381576538, "learning_rate": 0.0001, "loss": 0.0183, "step": 26720 }, { "epoch": 175.85526315789474, "grad_norm": 1.3257101774215698, "learning_rate": 0.0001, "loss": 0.0185, "step": 26730 }, { "epoch": 175.92105263157896, "grad_norm": 1.3095109462738037, "learning_rate": 0.0001, "loss": 0.0161, "step": 26740 }, { "epoch": 175.98684210526315, "grad_norm": 1.2997289896011353, "learning_rate": 0.0001, "loss": 0.0159, "step": 26750 }, { "epoch": 176.05263157894737, "grad_norm": 1.637865662574768, "learning_rate": 0.0001, "loss": 0.0164, "step": 26760 }, { "epoch": 176.1184210526316, "grad_norm": 1.384114384651184, "learning_rate": 0.0001, "loss": 0.0169, "step": 26770 }, { "epoch": 176.18421052631578, "grad_norm": 1.4303343296051025, "learning_rate": 0.0001, "loss": 0.0187, "step": 26780 }, { "epoch": 176.25, "grad_norm": 1.5146687030792236, "learning_rate": 0.0001, "loss": 0.0171, "step": 26790 }, { "epoch": 176.31578947368422, "grad_norm": 1.5877227783203125, "learning_rate": 0.0001, "loss": 0.019, "step": 26800 }, { "epoch": 176.3815789473684, "grad_norm": 1.3442049026489258, "learning_rate": 0.0001, "loss": 0.0164, "step": 26810 }, { "epoch": 176.44736842105263, "grad_norm": 1.266574740409851, "learning_rate": 0.0001, "loss": 0.0174, "step": 26820 }, { "epoch": 176.51315789473685, "grad_norm": 1.4955312013626099, "learning_rate": 0.0001, "loss": 0.0172, "step": 26830 }, { "epoch": 176.57894736842104, "grad_norm": 0.9446787238121033, "learning_rate": 0.0001, "loss": 0.0184, "step": 26840 }, { "epoch": 176.64473684210526, "grad_norm": 0.9993739724159241, "learning_rate": 0.0001, "loss": 0.0173, "step": 26850 }, { "epoch": 176.71052631578948, "grad_norm": 1.365684151649475, "learning_rate": 0.0001, "loss": 0.0174, "step": 26860 }, { "epoch": 176.77631578947367, "grad_norm": 1.4473934173583984, "learning_rate": 0.0001, "loss": 0.0165, "step": 26870 }, { "epoch": 176.8421052631579, "grad_norm": 1.6463544368743896, "learning_rate": 0.0001, "loss": 0.0198, "step": 26880 }, { "epoch": 176.9078947368421, "grad_norm": 1.3166974782943726, "learning_rate": 0.0001, "loss": 0.0175, "step": 26890 }, { "epoch": 176.97368421052633, "grad_norm": 1.214211106300354, "learning_rate": 0.0001, "loss": 0.016, "step": 26900 }, { "epoch": 177.03947368421052, "grad_norm": 1.529462218284607, "learning_rate": 0.0001, "loss": 0.0177, "step": 26910 }, { "epoch": 177.10526315789474, "grad_norm": 1.0761915445327759, "learning_rate": 0.0001, "loss": 0.0169, "step": 26920 }, { "epoch": 177.17105263157896, "grad_norm": 1.2023556232452393, "learning_rate": 0.0001, "loss": 0.0159, "step": 26930 }, { "epoch": 177.23684210526315, "grad_norm": 1.5598783493041992, "learning_rate": 0.0001, "loss": 0.0196, "step": 26940 }, { "epoch": 177.30263157894737, "grad_norm": 1.2606966495513916, "learning_rate": 0.0001, "loss": 0.0166, "step": 26950 }, { "epoch": 177.3684210526316, "grad_norm": 1.6140894889831543, "learning_rate": 0.0001, "loss": 0.0165, "step": 26960 }, { "epoch": 177.43421052631578, "grad_norm": 1.3021560907363892, "learning_rate": 0.0001, "loss": 0.0171, "step": 26970 }, { "epoch": 177.5, "grad_norm": 1.0216045379638672, "learning_rate": 0.0001, "loss": 0.0169, "step": 26980 }, { "epoch": 177.56578947368422, "grad_norm": 0.9635993242263794, "learning_rate": 0.0001, "loss": 0.0166, "step": 26990 }, { "epoch": 177.6315789473684, "grad_norm": 1.2217941284179688, "learning_rate": 0.0001, "loss": 0.0178, "step": 27000 }, { "epoch": 177.69736842105263, "grad_norm": 1.3411375284194946, "learning_rate": 0.0001, "loss": 0.0161, "step": 27010 }, { "epoch": 177.76315789473685, "grad_norm": 0.7628015279769897, "learning_rate": 0.0001, "loss": 0.0167, "step": 27020 }, { "epoch": 177.82894736842104, "grad_norm": 0.9084556698799133, "learning_rate": 0.0001, "loss": 0.0188, "step": 27030 }, { "epoch": 177.89473684210526, "grad_norm": 1.1490297317504883, "learning_rate": 0.0001, "loss": 0.0166, "step": 27040 }, { "epoch": 177.96052631578948, "grad_norm": 1.4998666048049927, "learning_rate": 0.0001, "loss": 0.0183, "step": 27050 }, { "epoch": 178.02631578947367, "grad_norm": 1.2536399364471436, "learning_rate": 0.0001, "loss": 0.0185, "step": 27060 }, { "epoch": 178.0921052631579, "grad_norm": 1.217274785041809, "learning_rate": 0.0001, "loss": 0.0179, "step": 27070 }, { "epoch": 178.1578947368421, "grad_norm": 1.3491017818450928, "learning_rate": 0.0001, "loss": 0.0181, "step": 27080 }, { "epoch": 178.22368421052633, "grad_norm": 1.25252366065979, "learning_rate": 0.0001, "loss": 0.0177, "step": 27090 }, { "epoch": 178.28947368421052, "grad_norm": 1.0801442861557007, "learning_rate": 0.0001, "loss": 0.0181, "step": 27100 }, { "epoch": 178.35526315789474, "grad_norm": 1.302809238433838, "learning_rate": 0.0001, "loss": 0.0171, "step": 27110 }, { "epoch": 178.42105263157896, "grad_norm": 1.4321649074554443, "learning_rate": 0.0001, "loss": 0.0179, "step": 27120 }, { "epoch": 178.48684210526315, "grad_norm": 1.3368439674377441, "learning_rate": 0.0001, "loss": 0.0169, "step": 27130 }, { "epoch": 178.55263157894737, "grad_norm": 1.2113497257232666, "learning_rate": 0.0001, "loss": 0.0172, "step": 27140 }, { "epoch": 178.6184210526316, "grad_norm": 1.0615336894989014, "learning_rate": 0.0001, "loss": 0.0174, "step": 27150 }, { "epoch": 178.68421052631578, "grad_norm": 1.075102686882019, "learning_rate": 0.0001, "loss": 0.0198, "step": 27160 }, { "epoch": 178.75, "grad_norm": 0.9051514863967896, "learning_rate": 0.0001, "loss": 0.0175, "step": 27170 }, { "epoch": 178.81578947368422, "grad_norm": 1.019240140914917, "learning_rate": 0.0001, "loss": 0.0177, "step": 27180 }, { "epoch": 178.8815789473684, "grad_norm": 1.2126598358154297, "learning_rate": 0.0001, "loss": 0.0177, "step": 27190 }, { "epoch": 178.94736842105263, "grad_norm": 1.2346746921539307, "learning_rate": 0.0001, "loss": 0.017, "step": 27200 }, { "epoch": 179.01315789473685, "grad_norm": 1.2096296548843384, "learning_rate": 0.0001, "loss": 0.0198, "step": 27210 }, { "epoch": 179.07894736842104, "grad_norm": 0.973487913608551, "learning_rate": 0.0001, "loss": 0.0174, "step": 27220 }, { "epoch": 179.14473684210526, "grad_norm": 1.368411660194397, "learning_rate": 0.0001, "loss": 0.0188, "step": 27230 }, { "epoch": 179.21052631578948, "grad_norm": 1.2822602987289429, "learning_rate": 0.0001, "loss": 0.0171, "step": 27240 }, { "epoch": 179.27631578947367, "grad_norm": 1.2959389686584473, "learning_rate": 0.0001, "loss": 0.0172, "step": 27250 }, { "epoch": 179.3421052631579, "grad_norm": 1.3127459287643433, "learning_rate": 0.0001, "loss": 0.0187, "step": 27260 }, { "epoch": 179.4078947368421, "grad_norm": 1.122056007385254, "learning_rate": 0.0001, "loss": 0.0169, "step": 27270 }, { "epoch": 179.47368421052633, "grad_norm": 1.3028178215026855, "learning_rate": 0.0001, "loss": 0.0181, "step": 27280 }, { "epoch": 179.53947368421052, "grad_norm": 0.9754687547683716, "learning_rate": 0.0001, "loss": 0.0184, "step": 27290 }, { "epoch": 179.60526315789474, "grad_norm": 1.238944411277771, "learning_rate": 0.0001, "loss": 0.019, "step": 27300 }, { "epoch": 179.67105263157896, "grad_norm": 1.3136788606643677, "learning_rate": 0.0001, "loss": 0.0167, "step": 27310 }, { "epoch": 179.73684210526315, "grad_norm": 1.3043994903564453, "learning_rate": 0.0001, "loss": 0.0167, "step": 27320 }, { "epoch": 179.80263157894737, "grad_norm": 1.0866022109985352, "learning_rate": 0.0001, "loss": 0.0161, "step": 27330 }, { "epoch": 179.8684210526316, "grad_norm": 1.4017601013183594, "learning_rate": 0.0001, "loss": 0.017, "step": 27340 }, { "epoch": 179.93421052631578, "grad_norm": 1.2640719413757324, "learning_rate": 0.0001, "loss": 0.0184, "step": 27350 }, { "epoch": 180.0, "grad_norm": 1.5062077045440674, "learning_rate": 0.0001, "loss": 0.0189, "step": 27360 }, { "epoch": 180.06578947368422, "grad_norm": 1.3499562740325928, "learning_rate": 0.0001, "loss": 0.0177, "step": 27370 }, { "epoch": 180.1315789473684, "grad_norm": 1.028475284576416, "learning_rate": 0.0001, "loss": 0.0167, "step": 27380 }, { "epoch": 180.19736842105263, "grad_norm": 1.1006380319595337, "learning_rate": 0.0001, "loss": 0.0172, "step": 27390 }, { "epoch": 180.26315789473685, "grad_norm": 1.2591434717178345, "learning_rate": 0.0001, "loss": 0.0171, "step": 27400 }, { "epoch": 180.32894736842104, "grad_norm": 1.4151883125305176, "learning_rate": 0.0001, "loss": 0.0186, "step": 27410 }, { "epoch": 180.39473684210526, "grad_norm": 1.132414698600769, "learning_rate": 0.0001, "loss": 0.0183, "step": 27420 }, { "epoch": 180.46052631578948, "grad_norm": 1.3552273511886597, "learning_rate": 0.0001, "loss": 0.0198, "step": 27430 }, { "epoch": 180.52631578947367, "grad_norm": 0.8924891352653503, "learning_rate": 0.0001, "loss": 0.0176, "step": 27440 }, { "epoch": 180.5921052631579, "grad_norm": 1.676880955696106, "learning_rate": 0.0001, "loss": 0.0195, "step": 27450 }, { "epoch": 180.6578947368421, "grad_norm": 1.1529208421707153, "learning_rate": 0.0001, "loss": 0.0184, "step": 27460 }, { "epoch": 180.72368421052633, "grad_norm": 1.1190521717071533, "learning_rate": 0.0001, "loss": 0.0172, "step": 27470 }, { "epoch": 180.78947368421052, "grad_norm": 1.293173909187317, "learning_rate": 0.0001, "loss": 0.0192, "step": 27480 }, { "epoch": 180.85526315789474, "grad_norm": 0.9071879386901855, "learning_rate": 0.0001, "loss": 0.0168, "step": 27490 }, { "epoch": 180.92105263157896, "grad_norm": 1.2806898355484009, "learning_rate": 0.0001, "loss": 0.0164, "step": 27500 }, { "epoch": 180.98684210526315, "grad_norm": 0.9975317120552063, "learning_rate": 0.0001, "loss": 0.0152, "step": 27510 }, { "epoch": 181.05263157894737, "grad_norm": 1.2514346837997437, "learning_rate": 0.0001, "loss": 0.0154, "step": 27520 }, { "epoch": 181.1184210526316, "grad_norm": 1.361411452293396, "learning_rate": 0.0001, "loss": 0.016, "step": 27530 }, { "epoch": 181.18421052631578, "grad_norm": 0.9159877896308899, "learning_rate": 0.0001, "loss": 0.0201, "step": 27540 }, { "epoch": 181.25, "grad_norm": 1.0003042221069336, "learning_rate": 0.0001, "loss": 0.0176, "step": 27550 }, { "epoch": 181.31578947368422, "grad_norm": 1.133141040802002, "learning_rate": 0.0001, "loss": 0.0159, "step": 27560 }, { "epoch": 181.3815789473684, "grad_norm": 1.0461962223052979, "learning_rate": 0.0001, "loss": 0.0201, "step": 27570 }, { "epoch": 181.44736842105263, "grad_norm": 1.3540297746658325, "learning_rate": 0.0001, "loss": 0.0189, "step": 27580 }, { "epoch": 181.51315789473685, "grad_norm": 1.3331037759780884, "learning_rate": 0.0001, "loss": 0.018, "step": 27590 }, { "epoch": 181.57894736842104, "grad_norm": 1.4690834283828735, "learning_rate": 0.0001, "loss": 0.0176, "step": 27600 }, { "epoch": 181.64473684210526, "grad_norm": 1.3080083131790161, "learning_rate": 0.0001, "loss": 0.0182, "step": 27610 }, { "epoch": 181.71052631578948, "grad_norm": 1.1954478025436401, "learning_rate": 0.0001, "loss": 0.0165, "step": 27620 }, { "epoch": 181.77631578947367, "grad_norm": 1.5837913751602173, "learning_rate": 0.0001, "loss": 0.0177, "step": 27630 }, { "epoch": 181.8421052631579, "grad_norm": 1.9183199405670166, "learning_rate": 0.0001, "loss": 0.017, "step": 27640 }, { "epoch": 181.9078947368421, "grad_norm": 1.468712568283081, "learning_rate": 0.0001, "loss": 0.0167, "step": 27650 }, { "epoch": 181.97368421052633, "grad_norm": 1.183106780052185, "learning_rate": 0.0001, "loss": 0.0157, "step": 27660 }, { "epoch": 182.03947368421052, "grad_norm": 1.315909743309021, "learning_rate": 0.0001, "loss": 0.016, "step": 27670 }, { "epoch": 182.10526315789474, "grad_norm": 1.3399372100830078, "learning_rate": 0.0001, "loss": 0.016, "step": 27680 }, { "epoch": 182.17105263157896, "grad_norm": 1.3758108615875244, "learning_rate": 0.0001, "loss": 0.0163, "step": 27690 }, { "epoch": 182.23684210526315, "grad_norm": 1.372221827507019, "learning_rate": 0.0001, "loss": 0.0183, "step": 27700 }, { "epoch": 182.30263157894737, "grad_norm": 1.1618051528930664, "learning_rate": 0.0001, "loss": 0.018, "step": 27710 }, { "epoch": 182.3684210526316, "grad_norm": 1.5792875289916992, "learning_rate": 0.0001, "loss": 0.0162, "step": 27720 }, { "epoch": 182.43421052631578, "grad_norm": 1.4284778833389282, "learning_rate": 0.0001, "loss": 0.0172, "step": 27730 }, { "epoch": 182.5, "grad_norm": 1.1605561971664429, "learning_rate": 0.0001, "loss": 0.0172, "step": 27740 }, { "epoch": 182.56578947368422, "grad_norm": 1.394383192062378, "learning_rate": 0.0001, "loss": 0.0187, "step": 27750 }, { "epoch": 182.6315789473684, "grad_norm": 1.2640798091888428, "learning_rate": 0.0001, "loss": 0.019, "step": 27760 }, { "epoch": 182.69736842105263, "grad_norm": 1.0631877183914185, "learning_rate": 0.0001, "loss": 0.0161, "step": 27770 }, { "epoch": 182.76315789473685, "grad_norm": 1.3253459930419922, "learning_rate": 0.0001, "loss": 0.0191, "step": 27780 }, { "epoch": 182.82894736842104, "grad_norm": 1.4501514434814453, "learning_rate": 0.0001, "loss": 0.0163, "step": 27790 }, { "epoch": 182.89473684210526, "grad_norm": 1.629292607307434, "learning_rate": 0.0001, "loss": 0.0176, "step": 27800 }, { "epoch": 182.96052631578948, "grad_norm": 1.373126745223999, "learning_rate": 0.0001, "loss": 0.0166, "step": 27810 }, { "epoch": 183.02631578947367, "grad_norm": 1.4406814575195312, "learning_rate": 0.0001, "loss": 0.018, "step": 27820 }, { "epoch": 183.0921052631579, "grad_norm": 1.3722494840621948, "learning_rate": 0.0001, "loss": 0.0178, "step": 27830 }, { "epoch": 183.1578947368421, "grad_norm": 1.8163440227508545, "learning_rate": 0.0001, "loss": 0.0191, "step": 27840 }, { "epoch": 183.22368421052633, "grad_norm": 2.5917444229125977, "learning_rate": 0.0001, "loss": 0.0175, "step": 27850 }, { "epoch": 183.28947368421052, "grad_norm": 1.976516604423523, "learning_rate": 0.0001, "loss": 0.0177, "step": 27860 }, { "epoch": 183.35526315789474, "grad_norm": 1.48430335521698, "learning_rate": 0.0001, "loss": 0.0166, "step": 27870 }, { "epoch": 183.42105263157896, "grad_norm": 1.908106803894043, "learning_rate": 0.0001, "loss": 0.0143, "step": 27880 }, { "epoch": 183.48684210526315, "grad_norm": 1.8086661100387573, "learning_rate": 0.0001, "loss": 0.015, "step": 27890 }, { "epoch": 183.55263157894737, "grad_norm": 1.7461923360824585, "learning_rate": 0.0001, "loss": 0.0163, "step": 27900 }, { "epoch": 183.6184210526316, "grad_norm": 1.1997524499893188, "learning_rate": 0.0001, "loss": 0.0175, "step": 27910 }, { "epoch": 183.68421052631578, "grad_norm": 1.2785133123397827, "learning_rate": 0.0001, "loss": 0.0149, "step": 27920 }, { "epoch": 183.75, "grad_norm": 0.9899089932441711, "learning_rate": 0.0001, "loss": 0.0153, "step": 27930 }, { "epoch": 183.81578947368422, "grad_norm": 1.36931574344635, "learning_rate": 0.0001, "loss": 0.0166, "step": 27940 }, { "epoch": 183.8815789473684, "grad_norm": 1.0563100576400757, "learning_rate": 0.0001, "loss": 0.0163, "step": 27950 }, { "epoch": 183.94736842105263, "grad_norm": 1.2459518909454346, "learning_rate": 0.0001, "loss": 0.018, "step": 27960 }, { "epoch": 184.01315789473685, "grad_norm": 1.7021054029464722, "learning_rate": 0.0001, "loss": 0.0168, "step": 27970 }, { "epoch": 184.07894736842104, "grad_norm": 1.1068536043167114, "learning_rate": 0.0001, "loss": 0.015, "step": 27980 }, { "epoch": 184.14473684210526, "grad_norm": 1.299325704574585, "learning_rate": 0.0001, "loss": 0.0187, "step": 27990 }, { "epoch": 184.21052631578948, "grad_norm": 1.3241454362869263, "learning_rate": 0.0001, "loss": 0.0147, "step": 28000 }, { "epoch": 184.27631578947367, "grad_norm": 1.4273960590362549, "learning_rate": 0.0001, "loss": 0.0187, "step": 28010 }, { "epoch": 184.3421052631579, "grad_norm": 1.1758840084075928, "learning_rate": 0.0001, "loss": 0.0168, "step": 28020 }, { "epoch": 184.4078947368421, "grad_norm": 1.7336351871490479, "learning_rate": 0.0001, "loss": 0.0162, "step": 28030 }, { "epoch": 184.47368421052633, "grad_norm": 1.4646220207214355, "learning_rate": 0.0001, "loss": 0.0177, "step": 28040 }, { "epoch": 184.53947368421052, "grad_norm": 1.5751539468765259, "learning_rate": 0.0001, "loss": 0.0175, "step": 28050 }, { "epoch": 184.60526315789474, "grad_norm": 1.675756812095642, "learning_rate": 0.0001, "loss": 0.016, "step": 28060 }, { "epoch": 184.67105263157896, "grad_norm": 1.7213208675384521, "learning_rate": 0.0001, "loss": 0.0163, "step": 28070 }, { "epoch": 184.73684210526315, "grad_norm": 1.4388047456741333, "learning_rate": 0.0001, "loss": 0.0159, "step": 28080 }, { "epoch": 184.80263157894737, "grad_norm": 1.40472412109375, "learning_rate": 0.0001, "loss": 0.0192, "step": 28090 }, { "epoch": 184.8684210526316, "grad_norm": 1.187379240989685, "learning_rate": 0.0001, "loss": 0.0151, "step": 28100 }, { "epoch": 184.93421052631578, "grad_norm": 1.5341135263442993, "learning_rate": 0.0001, "loss": 0.0186, "step": 28110 }, { "epoch": 185.0, "grad_norm": 1.2194042205810547, "learning_rate": 0.0001, "loss": 0.0184, "step": 28120 }, { "epoch": 185.06578947368422, "grad_norm": 1.183134913444519, "learning_rate": 0.0001, "loss": 0.018, "step": 28130 }, { "epoch": 185.1315789473684, "grad_norm": 1.4038760662078857, "learning_rate": 0.0001, "loss": 0.0162, "step": 28140 }, { "epoch": 185.19736842105263, "grad_norm": 1.3915314674377441, "learning_rate": 0.0001, "loss": 0.0172, "step": 28150 }, { "epoch": 185.26315789473685, "grad_norm": 1.3534201383590698, "learning_rate": 0.0001, "loss": 0.0177, "step": 28160 }, { "epoch": 185.32894736842104, "grad_norm": 1.2670302391052246, "learning_rate": 0.0001, "loss": 0.0171, "step": 28170 }, { "epoch": 185.39473684210526, "grad_norm": 1.6847999095916748, "learning_rate": 0.0001, "loss": 0.0164, "step": 28180 }, { "epoch": 185.46052631578948, "grad_norm": 1.430661678314209, "learning_rate": 0.0001, "loss": 0.0181, "step": 28190 }, { "epoch": 185.52631578947367, "grad_norm": 1.600559949874878, "learning_rate": 0.0001, "loss": 0.0185, "step": 28200 }, { "epoch": 185.5921052631579, "grad_norm": 1.3401246070861816, "learning_rate": 0.0001, "loss": 0.0182, "step": 28210 }, { "epoch": 185.6578947368421, "grad_norm": 1.1347683668136597, "learning_rate": 0.0001, "loss": 0.0162, "step": 28220 }, { "epoch": 185.72368421052633, "grad_norm": 1.1868791580200195, "learning_rate": 0.0001, "loss": 0.0168, "step": 28230 }, { "epoch": 185.78947368421052, "grad_norm": 1.1938930749893188, "learning_rate": 0.0001, "loss": 0.0167, "step": 28240 }, { "epoch": 185.85526315789474, "grad_norm": 0.9007596969604492, "learning_rate": 0.0001, "loss": 0.0152, "step": 28250 }, { "epoch": 185.92105263157896, "grad_norm": 1.0700163841247559, "learning_rate": 0.0001, "loss": 0.0159, "step": 28260 }, { "epoch": 185.98684210526315, "grad_norm": 1.3188432455062866, "learning_rate": 0.0001, "loss": 0.0169, "step": 28270 }, { "epoch": 186.05263157894737, "grad_norm": 1.2520577907562256, "learning_rate": 0.0001, "loss": 0.0166, "step": 28280 }, { "epoch": 186.1184210526316, "grad_norm": 1.2639751434326172, "learning_rate": 0.0001, "loss": 0.0151, "step": 28290 }, { "epoch": 186.18421052631578, "grad_norm": 1.2425426244735718, "learning_rate": 0.0001, "loss": 0.0185, "step": 28300 }, { "epoch": 186.25, "grad_norm": 1.1979525089263916, "learning_rate": 0.0001, "loss": 0.0172, "step": 28310 }, { "epoch": 186.31578947368422, "grad_norm": 1.25445556640625, "learning_rate": 0.0001, "loss": 0.0185, "step": 28320 }, { "epoch": 186.3815789473684, "grad_norm": 1.0609337091445923, "learning_rate": 0.0001, "loss": 0.016, "step": 28330 }, { "epoch": 186.44736842105263, "grad_norm": 0.9719675183296204, "learning_rate": 0.0001, "loss": 0.0174, "step": 28340 }, { "epoch": 186.51315789473685, "grad_norm": 1.2194287776947021, "learning_rate": 0.0001, "loss": 0.0184, "step": 28350 }, { "epoch": 186.57894736842104, "grad_norm": 1.2803702354431152, "learning_rate": 0.0001, "loss": 0.0171, "step": 28360 }, { "epoch": 186.64473684210526, "grad_norm": 1.472983717918396, "learning_rate": 0.0001, "loss": 0.0165, "step": 28370 }, { "epoch": 186.71052631578948, "grad_norm": 0.7579768896102905, "learning_rate": 0.0001, "loss": 0.0169, "step": 28380 }, { "epoch": 186.77631578947367, "grad_norm": 1.4877924919128418, "learning_rate": 0.0001, "loss": 0.0185, "step": 28390 }, { "epoch": 186.8421052631579, "grad_norm": 1.1992541551589966, "learning_rate": 0.0001, "loss": 0.0166, "step": 28400 }, { "epoch": 186.9078947368421, "grad_norm": 1.5354152917861938, "learning_rate": 0.0001, "loss": 0.0172, "step": 28410 }, { "epoch": 186.97368421052633, "grad_norm": 1.009834885597229, "learning_rate": 0.0001, "loss": 0.0169, "step": 28420 }, { "epoch": 187.03947368421052, "grad_norm": 1.4068772792816162, "learning_rate": 0.0001, "loss": 0.0151, "step": 28430 }, { "epoch": 187.10526315789474, "grad_norm": 1.7721872329711914, "learning_rate": 0.0001, "loss": 0.0165, "step": 28440 }, { "epoch": 187.17105263157896, "grad_norm": 1.3178023099899292, "learning_rate": 0.0001, "loss": 0.017, "step": 28450 }, { "epoch": 187.23684210526315, "grad_norm": 2.538278341293335, "learning_rate": 0.0001, "loss": 0.0277, "step": 28460 }, { "epoch": 187.30263157894737, "grad_norm": 2.6935033798217773, "learning_rate": 0.0001, "loss": 0.0167, "step": 28470 }, { "epoch": 187.3684210526316, "grad_norm": 2.583784341812134, "learning_rate": 0.0001, "loss": 0.0189, "step": 28480 }, { "epoch": 187.43421052631578, "grad_norm": 2.7853147983551025, "learning_rate": 0.0001, "loss": 0.0183, "step": 28490 }, { "epoch": 187.5, "grad_norm": 1.8979226350784302, "learning_rate": 0.0001, "loss": 0.014, "step": 28500 }, { "epoch": 187.56578947368422, "grad_norm": 1.951745867729187, "learning_rate": 0.0001, "loss": 0.0168, "step": 28510 }, { "epoch": 187.6315789473684, "grad_norm": 1.744847059249878, "learning_rate": 0.0001, "loss": 0.0162, "step": 28520 }, { "epoch": 187.69736842105263, "grad_norm": 1.6811963319778442, "learning_rate": 0.0001, "loss": 0.0165, "step": 28530 }, { "epoch": 187.76315789473685, "grad_norm": 1.182780146598816, "learning_rate": 0.0001, "loss": 0.0151, "step": 28540 }, { "epoch": 187.82894736842104, "grad_norm": 1.3196076154708862, "learning_rate": 0.0001, "loss": 0.0156, "step": 28550 }, { "epoch": 187.89473684210526, "grad_norm": 1.376051425933838, "learning_rate": 0.0001, "loss": 0.0151, "step": 28560 }, { "epoch": 187.96052631578948, "grad_norm": 1.3644719123840332, "learning_rate": 0.0001, "loss": 0.0155, "step": 28570 }, { "epoch": 188.02631578947367, "grad_norm": 1.3114805221557617, "learning_rate": 0.0001, "loss": 0.0181, "step": 28580 }, { "epoch": 188.0921052631579, "grad_norm": 1.1968399286270142, "learning_rate": 0.0001, "loss": 0.0152, "step": 28590 }, { "epoch": 188.1578947368421, "grad_norm": 1.2592320442199707, "learning_rate": 0.0001, "loss": 0.016, "step": 28600 }, { "epoch": 188.22368421052633, "grad_norm": 1.2056187391281128, "learning_rate": 0.0001, "loss": 0.0156, "step": 28610 }, { "epoch": 188.28947368421052, "grad_norm": 1.101921558380127, "learning_rate": 0.0001, "loss": 0.0148, "step": 28620 }, { "epoch": 188.35526315789474, "grad_norm": 1.2965214252471924, "learning_rate": 0.0001, "loss": 0.0155, "step": 28630 }, { "epoch": 188.42105263157896, "grad_norm": 1.5886129140853882, "learning_rate": 0.0001, "loss": 0.0173, "step": 28640 }, { "epoch": 188.48684210526315, "grad_norm": 1.6472376585006714, "learning_rate": 0.0001, "loss": 0.0149, "step": 28650 }, { "epoch": 188.55263157894737, "grad_norm": 1.4192408323287964, "learning_rate": 0.0001, "loss": 0.0181, "step": 28660 }, { "epoch": 188.6184210526316, "grad_norm": 1.955345630645752, "learning_rate": 0.0001, "loss": 0.0186, "step": 28670 }, { "epoch": 188.68421052631578, "grad_norm": 1.7373226881027222, "learning_rate": 0.0001, "loss": 0.0172, "step": 28680 }, { "epoch": 188.75, "grad_norm": 1.6695311069488525, "learning_rate": 0.0001, "loss": 0.0167, "step": 28690 }, { "epoch": 188.81578947368422, "grad_norm": 1.6247828006744385, "learning_rate": 0.0001, "loss": 0.0169, "step": 28700 }, { "epoch": 188.8815789473684, "grad_norm": 1.587103009223938, "learning_rate": 0.0001, "loss": 0.0161, "step": 28710 }, { "epoch": 188.94736842105263, "grad_norm": 1.3761903047561646, "learning_rate": 0.0001, "loss": 0.0157, "step": 28720 }, { "epoch": 189.01315789473685, "grad_norm": 1.054154872894287, "learning_rate": 0.0001, "loss": 0.0176, "step": 28730 }, { "epoch": 189.07894736842104, "grad_norm": 1.7570017576217651, "learning_rate": 0.0001, "loss": 0.0164, "step": 28740 }, { "epoch": 189.14473684210526, "grad_norm": 1.302839994430542, "learning_rate": 0.0001, "loss": 0.0159, "step": 28750 }, { "epoch": 189.21052631578948, "grad_norm": 1.2320873737335205, "learning_rate": 0.0001, "loss": 0.0168, "step": 28760 }, { "epoch": 189.27631578947367, "grad_norm": 1.2856324911117554, "learning_rate": 0.0001, "loss": 0.0157, "step": 28770 }, { "epoch": 189.3421052631579, "grad_norm": 1.2495955228805542, "learning_rate": 0.0001, "loss": 0.016, "step": 28780 }, { "epoch": 189.4078947368421, "grad_norm": 1.2113772630691528, "learning_rate": 0.0001, "loss": 0.0163, "step": 28790 }, { "epoch": 189.47368421052633, "grad_norm": 1.1812059879302979, "learning_rate": 0.0001, "loss": 0.0175, "step": 28800 }, { "epoch": 189.53947368421052, "grad_norm": 1.3910290002822876, "learning_rate": 0.0001, "loss": 0.0172, "step": 28810 }, { "epoch": 189.60526315789474, "grad_norm": 1.076119303703308, "learning_rate": 0.0001, "loss": 0.0187, "step": 28820 }, { "epoch": 189.67105263157896, "grad_norm": 2.222041606903076, "learning_rate": 0.0001, "loss": 0.0251, "step": 28830 }, { "epoch": 189.73684210526315, "grad_norm": 2.265124559402466, "learning_rate": 0.0001, "loss": 0.0179, "step": 28840 }, { "epoch": 189.80263157894737, "grad_norm": 1.6033040285110474, "learning_rate": 0.0001, "loss": 0.016, "step": 28850 }, { "epoch": 189.8684210526316, "grad_norm": 1.4938453435897827, "learning_rate": 0.0001, "loss": 0.0187, "step": 28860 }, { "epoch": 189.93421052631578, "grad_norm": 1.3724675178527832, "learning_rate": 0.0001, "loss": 0.0157, "step": 28870 }, { "epoch": 190.0, "grad_norm": 1.3057665824890137, "learning_rate": 0.0001, "loss": 0.0188, "step": 28880 }, { "epoch": 190.06578947368422, "grad_norm": 1.1445728540420532, "learning_rate": 0.0001, "loss": 0.0174, "step": 28890 }, { "epoch": 190.1315789473684, "grad_norm": 1.2155309915542603, "learning_rate": 0.0001, "loss": 0.0152, "step": 28900 }, { "epoch": 190.19736842105263, "grad_norm": 1.1960302591323853, "learning_rate": 0.0001, "loss": 0.0187, "step": 28910 }, { "epoch": 190.26315789473685, "grad_norm": 1.3511428833007812, "learning_rate": 0.0001, "loss": 0.0178, "step": 28920 }, { "epoch": 190.32894736842104, "grad_norm": 0.9308839440345764, "learning_rate": 0.0001, "loss": 0.0163, "step": 28930 }, { "epoch": 190.39473684210526, "grad_norm": 1.409009575843811, "learning_rate": 0.0001, "loss": 0.0171, "step": 28940 }, { "epoch": 190.46052631578948, "grad_norm": 1.1859124898910522, "learning_rate": 0.0001, "loss": 0.0177, "step": 28950 }, { "epoch": 190.52631578947367, "grad_norm": 1.6667457818984985, "learning_rate": 0.0001, "loss": 0.0231, "step": 28960 }, { "epoch": 190.5921052631579, "grad_norm": 1.8634475469589233, "learning_rate": 0.0001, "loss": 0.0182, "step": 28970 }, { "epoch": 190.6578947368421, "grad_norm": 1.645556092262268, "learning_rate": 0.0001, "loss": 0.0165, "step": 28980 }, { "epoch": 190.72368421052633, "grad_norm": 1.3310197591781616, "learning_rate": 0.0001, "loss": 0.0173, "step": 28990 }, { "epoch": 190.78947368421052, "grad_norm": 1.2974885702133179, "learning_rate": 0.0001, "loss": 0.0192, "step": 29000 }, { "epoch": 190.85526315789474, "grad_norm": 1.270050048828125, "learning_rate": 0.0001, "loss": 0.0188, "step": 29010 }, { "epoch": 190.92105263157896, "grad_norm": 1.1336300373077393, "learning_rate": 0.0001, "loss": 0.0164, "step": 29020 }, { "epoch": 190.98684210526315, "grad_norm": 1.3560301065444946, "learning_rate": 0.0001, "loss": 0.0158, "step": 29030 }, { "epoch": 191.05263157894737, "grad_norm": 1.405979871749878, "learning_rate": 0.0001, "loss": 0.0153, "step": 29040 }, { "epoch": 191.1184210526316, "grad_norm": 1.3937962055206299, "learning_rate": 0.0001, "loss": 0.0155, "step": 29050 }, { "epoch": 191.18421052631578, "grad_norm": 1.3739968538284302, "learning_rate": 0.0001, "loss": 0.0168, "step": 29060 }, { "epoch": 191.25, "grad_norm": 1.259650707244873, "learning_rate": 0.0001, "loss": 0.0163, "step": 29070 }, { "epoch": 191.31578947368422, "grad_norm": 0.9018769860267639, "learning_rate": 0.0001, "loss": 0.0185, "step": 29080 }, { "epoch": 191.3815789473684, "grad_norm": 1.1630486249923706, "learning_rate": 0.0001, "loss": 0.0154, "step": 29090 }, { "epoch": 191.44736842105263, "grad_norm": 1.2646576166152954, "learning_rate": 0.0001, "loss": 0.0161, "step": 29100 }, { "epoch": 191.51315789473685, "grad_norm": 1.3670384883880615, "learning_rate": 0.0001, "loss": 0.0171, "step": 29110 }, { "epoch": 191.57894736842104, "grad_norm": 1.4089142084121704, "learning_rate": 0.0001, "loss": 0.0165, "step": 29120 }, { "epoch": 191.64473684210526, "grad_norm": 1.2056618928909302, "learning_rate": 0.0001, "loss": 0.0194, "step": 29130 }, { "epoch": 191.71052631578948, "grad_norm": 1.2704377174377441, "learning_rate": 0.0001, "loss": 0.0168, "step": 29140 }, { "epoch": 191.77631578947367, "grad_norm": 1.3765251636505127, "learning_rate": 0.0001, "loss": 0.0172, "step": 29150 }, { "epoch": 191.8421052631579, "grad_norm": 1.2344194650650024, "learning_rate": 0.0001, "loss": 0.0181, "step": 29160 }, { "epoch": 191.9078947368421, "grad_norm": 1.1715292930603027, "learning_rate": 0.0001, "loss": 0.0198, "step": 29170 }, { "epoch": 191.97368421052633, "grad_norm": 1.3913319110870361, "learning_rate": 0.0001, "loss": 0.0177, "step": 29180 }, { "epoch": 192.03947368421052, "grad_norm": 1.4549134969711304, "learning_rate": 0.0001, "loss": 0.0202, "step": 29190 }, { "epoch": 192.10526315789474, "grad_norm": 1.2978013753890991, "learning_rate": 0.0001, "loss": 0.0181, "step": 29200 }, { "epoch": 192.17105263157896, "grad_norm": 1.2289888858795166, "learning_rate": 0.0001, "loss": 0.0158, "step": 29210 }, { "epoch": 192.23684210526315, "grad_norm": 0.9477553367614746, "learning_rate": 0.0001, "loss": 0.0179, "step": 29220 }, { "epoch": 192.30263157894737, "grad_norm": 1.459433674812317, "learning_rate": 0.0001, "loss": 0.0151, "step": 29230 }, { "epoch": 192.3684210526316, "grad_norm": 1.5326049327850342, "learning_rate": 0.0001, "loss": 0.0167, "step": 29240 }, { "epoch": 192.43421052631578, "grad_norm": 1.2914258241653442, "learning_rate": 0.0001, "loss": 0.0179, "step": 29250 }, { "epoch": 192.5, "grad_norm": 1.216827630996704, "learning_rate": 0.0001, "loss": 0.0163, "step": 29260 }, { "epoch": 192.56578947368422, "grad_norm": 1.4195771217346191, "learning_rate": 0.0001, "loss": 0.0154, "step": 29270 }, { "epoch": 192.6315789473684, "grad_norm": 1.1989006996154785, "learning_rate": 0.0001, "loss": 0.0166, "step": 29280 }, { "epoch": 192.69736842105263, "grad_norm": 1.2557868957519531, "learning_rate": 0.0001, "loss": 0.0163, "step": 29290 }, { "epoch": 192.76315789473685, "grad_norm": 1.556070327758789, "learning_rate": 0.0001, "loss": 0.0164, "step": 29300 }, { "epoch": 192.82894736842104, "grad_norm": 1.3581067323684692, "learning_rate": 0.0001, "loss": 0.0208, "step": 29310 }, { "epoch": 192.89473684210526, "grad_norm": 1.4777802228927612, "learning_rate": 0.0001, "loss": 0.0157, "step": 29320 }, { "epoch": 192.96052631578948, "grad_norm": 1.00076162815094, "learning_rate": 0.0001, "loss": 0.0171, "step": 29330 }, { "epoch": 193.02631578947367, "grad_norm": 1.448732852935791, "learning_rate": 0.0001, "loss": 0.0192, "step": 29340 }, { "epoch": 193.0921052631579, "grad_norm": 1.2730473279953003, "learning_rate": 0.0001, "loss": 0.017, "step": 29350 }, { "epoch": 193.1578947368421, "grad_norm": 1.256740689277649, "learning_rate": 0.0001, "loss": 0.0173, "step": 29360 }, { "epoch": 193.22368421052633, "grad_norm": 1.0726568698883057, "learning_rate": 0.0001, "loss": 0.0198, "step": 29370 }, { "epoch": 193.28947368421052, "grad_norm": 0.9188181757926941, "learning_rate": 0.0001, "loss": 0.0176, "step": 29380 }, { "epoch": 193.35526315789474, "grad_norm": 1.2872679233551025, "learning_rate": 0.0001, "loss": 0.0188, "step": 29390 }, { "epoch": 193.42105263157896, "grad_norm": 1.520761251449585, "learning_rate": 0.0001, "loss": 0.0203, "step": 29400 }, { "epoch": 193.48684210526315, "grad_norm": 1.1832773685455322, "learning_rate": 0.0001, "loss": 0.018, "step": 29410 }, { "epoch": 193.55263157894737, "grad_norm": 1.4867194890975952, "learning_rate": 0.0001, "loss": 0.0171, "step": 29420 }, { "epoch": 193.6184210526316, "grad_norm": 1.7297718524932861, "learning_rate": 0.0001, "loss": 0.0156, "step": 29430 }, { "epoch": 193.68421052631578, "grad_norm": 1.251755952835083, "learning_rate": 0.0001, "loss": 0.0155, "step": 29440 }, { "epoch": 193.75, "grad_norm": 1.3701705932617188, "learning_rate": 0.0001, "loss": 0.0184, "step": 29450 }, { "epoch": 193.81578947368422, "grad_norm": 1.7457464933395386, "learning_rate": 0.0001, "loss": 0.0169, "step": 29460 }, { "epoch": 193.8815789473684, "grad_norm": 1.126453161239624, "learning_rate": 0.0001, "loss": 0.0155, "step": 29470 }, { "epoch": 193.94736842105263, "grad_norm": 1.2443026304244995, "learning_rate": 0.0001, "loss": 0.016, "step": 29480 }, { "epoch": 194.01315789473685, "grad_norm": 1.2029883861541748, "learning_rate": 0.0001, "loss": 0.0157, "step": 29490 }, { "epoch": 194.07894736842104, "grad_norm": 1.449203610420227, "learning_rate": 0.0001, "loss": 0.0176, "step": 29500 }, { "epoch": 194.14473684210526, "grad_norm": 1.0315684080123901, "learning_rate": 0.0001, "loss": 0.0163, "step": 29510 }, { "epoch": 194.21052631578948, "grad_norm": 1.3804000616073608, "learning_rate": 0.0001, "loss": 0.0164, "step": 29520 }, { "epoch": 194.27631578947367, "grad_norm": 1.527992844581604, "learning_rate": 0.0001, "loss": 0.0194, "step": 29530 }, { "epoch": 194.3421052631579, "grad_norm": 1.5879637002944946, "learning_rate": 0.0001, "loss": 0.0244, "step": 29540 }, { "epoch": 194.4078947368421, "grad_norm": 1.9236878156661987, "learning_rate": 0.0001, "loss": 0.0197, "step": 29550 }, { "epoch": 194.47368421052633, "grad_norm": 1.539232611656189, "learning_rate": 0.0001, "loss": 0.0203, "step": 29560 }, { "epoch": 194.53947368421052, "grad_norm": 1.3699829578399658, "learning_rate": 0.0001, "loss": 0.02, "step": 29570 }, { "epoch": 194.60526315789474, "grad_norm": 1.3466382026672363, "learning_rate": 0.0001, "loss": 0.018, "step": 29580 }, { "epoch": 194.67105263157896, "grad_norm": 1.6446095705032349, "learning_rate": 0.0001, "loss": 0.0163, "step": 29590 }, { "epoch": 194.73684210526315, "grad_norm": 1.2877060174942017, "learning_rate": 0.0001, "loss": 0.0184, "step": 29600 }, { "epoch": 194.80263157894737, "grad_norm": 1.1062644720077515, "learning_rate": 0.0001, "loss": 0.0184, "step": 29610 }, { "epoch": 194.8684210526316, "grad_norm": 0.854597270488739, "learning_rate": 0.0001, "loss": 0.0193, "step": 29620 }, { "epoch": 194.93421052631578, "grad_norm": 1.1438567638397217, "learning_rate": 0.0001, "loss": 0.0209, "step": 29630 }, { "epoch": 195.0, "grad_norm": 1.058065414428711, "learning_rate": 0.0001, "loss": 0.0212, "step": 29640 }, { "epoch": 195.06578947368422, "grad_norm": 1.3158515691757202, "learning_rate": 0.0001, "loss": 0.0212, "step": 29650 }, { "epoch": 195.1315789473684, "grad_norm": 1.3402299880981445, "learning_rate": 0.0001, "loss": 0.0199, "step": 29660 }, { "epoch": 195.19736842105263, "grad_norm": 1.5896971225738525, "learning_rate": 0.0001, "loss": 0.022, "step": 29670 }, { "epoch": 195.26315789473685, "grad_norm": 1.113895297050476, "learning_rate": 0.0001, "loss": 0.0189, "step": 29680 }, { "epoch": 195.32894736842104, "grad_norm": 0.8616934418678284, "learning_rate": 0.0001, "loss": 0.0196, "step": 29690 }, { "epoch": 195.39473684210526, "grad_norm": 1.0356990098953247, "learning_rate": 0.0001, "loss": 0.0206, "step": 29700 }, { "epoch": 195.46052631578948, "grad_norm": 1.1201450824737549, "learning_rate": 0.0001, "loss": 0.0205, "step": 29710 }, { "epoch": 195.52631578947367, "grad_norm": 1.386657476425171, "learning_rate": 0.0001, "loss": 0.0196, "step": 29720 }, { "epoch": 195.5921052631579, "grad_norm": 0.8346795439720154, "learning_rate": 0.0001, "loss": 0.0221, "step": 29730 }, { "epoch": 195.6578947368421, "grad_norm": 1.4521796703338623, "learning_rate": 0.0001, "loss": 0.0188, "step": 29740 }, { "epoch": 195.72368421052633, "grad_norm": 1.4133926630020142, "learning_rate": 0.0001, "loss": 0.0199, "step": 29750 }, { "epoch": 195.78947368421052, "grad_norm": 0.9600488543510437, "learning_rate": 0.0001, "loss": 0.021, "step": 29760 }, { "epoch": 195.85526315789474, "grad_norm": 1.1314250230789185, "learning_rate": 0.0001, "loss": 0.0173, "step": 29770 }, { "epoch": 195.92105263157896, "grad_norm": 0.8754208087921143, "learning_rate": 0.0001, "loss": 0.019, "step": 29780 }, { "epoch": 195.98684210526315, "grad_norm": 1.3597506284713745, "learning_rate": 0.0001, "loss": 0.0185, "step": 29790 }, { "epoch": 196.05263157894737, "grad_norm": 1.1357085704803467, "learning_rate": 0.0001, "loss": 0.0172, "step": 29800 }, { "epoch": 196.1184210526316, "grad_norm": 1.1614354848861694, "learning_rate": 0.0001, "loss": 0.0188, "step": 29810 }, { "epoch": 196.18421052631578, "grad_norm": 1.2745589017868042, "learning_rate": 0.0001, "loss": 0.0209, "step": 29820 }, { "epoch": 196.25, "grad_norm": 1.1572297811508179, "learning_rate": 0.0001, "loss": 0.0185, "step": 29830 }, { "epoch": 196.31578947368422, "grad_norm": 0.7867621183395386, "learning_rate": 0.0001, "loss": 0.02, "step": 29840 }, { "epoch": 196.3815789473684, "grad_norm": 1.308093786239624, "learning_rate": 0.0001, "loss": 0.0195, "step": 29850 }, { "epoch": 196.44736842105263, "grad_norm": 1.2609182596206665, "learning_rate": 0.0001, "loss": 0.0171, "step": 29860 }, { "epoch": 196.51315789473685, "grad_norm": 1.3445231914520264, "learning_rate": 0.0001, "loss": 0.0169, "step": 29870 }, { "epoch": 196.57894736842104, "grad_norm": 1.0773708820343018, "learning_rate": 0.0001, "loss": 0.0174, "step": 29880 }, { "epoch": 196.64473684210526, "grad_norm": 0.9694660902023315, "learning_rate": 0.0001, "loss": 0.0179, "step": 29890 }, { "epoch": 196.71052631578948, "grad_norm": 1.11964750289917, "learning_rate": 0.0001, "loss": 0.0177, "step": 29900 }, { "epoch": 196.77631578947367, "grad_norm": 1.1886194944381714, "learning_rate": 0.0001, "loss": 0.0182, "step": 29910 }, { "epoch": 196.8421052631579, "grad_norm": 1.4369953870773315, "learning_rate": 0.0001, "loss": 0.0165, "step": 29920 }, { "epoch": 196.9078947368421, "grad_norm": 0.9446222186088562, "learning_rate": 0.0001, "loss": 0.0186, "step": 29930 }, { "epoch": 196.97368421052633, "grad_norm": 1.308263897895813, "learning_rate": 0.0001, "loss": 0.0174, "step": 29940 }, { "epoch": 197.03947368421052, "grad_norm": 1.479591965675354, "learning_rate": 0.0001, "loss": 0.0196, "step": 29950 }, { "epoch": 197.10526315789474, "grad_norm": 1.261594295501709, "learning_rate": 0.0001, "loss": 0.0178, "step": 29960 }, { "epoch": 197.17105263157896, "grad_norm": 1.597878098487854, "learning_rate": 0.0001, "loss": 0.0178, "step": 29970 }, { "epoch": 197.23684210526315, "grad_norm": 0.9023445248603821, "learning_rate": 0.0001, "loss": 0.0179, "step": 29980 }, { "epoch": 197.30263157894737, "grad_norm": 1.7900540828704834, "learning_rate": 0.0001, "loss": 0.0178, "step": 29990 }, { "epoch": 197.3684210526316, "grad_norm": 1.0138733386993408, "learning_rate": 0.0001, "loss": 0.0155, "step": 30000 }, { "epoch": 197.43421052631578, "grad_norm": 1.617836594581604, "learning_rate": 0.0001, "loss": 0.0171, "step": 30010 }, { "epoch": 197.5, "grad_norm": 1.0943124294281006, "learning_rate": 0.0001, "loss": 0.019, "step": 30020 }, { "epoch": 197.56578947368422, "grad_norm": 1.1068415641784668, "learning_rate": 0.0001, "loss": 0.0189, "step": 30030 }, { "epoch": 197.6315789473684, "grad_norm": 1.248450756072998, "learning_rate": 0.0001, "loss": 0.0165, "step": 30040 }, { "epoch": 197.69736842105263, "grad_norm": 1.4986473321914673, "learning_rate": 0.0001, "loss": 0.0145, "step": 30050 }, { "epoch": 197.76315789473685, "grad_norm": 1.2006663084030151, "learning_rate": 0.0001, "loss": 0.0182, "step": 30060 }, { "epoch": 197.82894736842104, "grad_norm": 1.0944031476974487, "learning_rate": 0.0001, "loss": 0.0155, "step": 30070 }, { "epoch": 197.89473684210526, "grad_norm": 1.6953439712524414, "learning_rate": 0.0001, "loss": 0.0144, "step": 30080 }, { "epoch": 197.96052631578948, "grad_norm": 1.1064839363098145, "learning_rate": 0.0001, "loss": 0.0173, "step": 30090 }, { "epoch": 198.02631578947367, "grad_norm": 2.015153646469116, "learning_rate": 0.0001, "loss": 0.0172, "step": 30100 }, { "epoch": 198.0921052631579, "grad_norm": 1.8545198440551758, "learning_rate": 0.0001, "loss": 0.0182, "step": 30110 }, { "epoch": 198.1578947368421, "grad_norm": 1.5696916580200195, "learning_rate": 0.0001, "loss": 0.0157, "step": 30120 }, { "epoch": 198.22368421052633, "grad_norm": 1.4251947402954102, "learning_rate": 0.0001, "loss": 0.0179, "step": 30130 }, { "epoch": 198.28947368421052, "grad_norm": 1.1599140167236328, "learning_rate": 0.0001, "loss": 0.0157, "step": 30140 }, { "epoch": 198.35526315789474, "grad_norm": 1.4993690252304077, "learning_rate": 0.0001, "loss": 0.0176, "step": 30150 }, { "epoch": 198.42105263157896, "grad_norm": 1.4265055656433105, "learning_rate": 0.0001, "loss": 0.015, "step": 30160 }, { "epoch": 198.48684210526315, "grad_norm": 1.101048231124878, "learning_rate": 0.0001, "loss": 0.014, "step": 30170 }, { "epoch": 198.55263157894737, "grad_norm": 1.5096710920333862, "learning_rate": 0.0001, "loss": 0.0143, "step": 30180 }, { "epoch": 198.6184210526316, "grad_norm": 1.190572738647461, "learning_rate": 0.0001, "loss": 0.0163, "step": 30190 }, { "epoch": 198.68421052631578, "grad_norm": 1.591253399848938, "learning_rate": 0.0001, "loss": 0.0149, "step": 30200 }, { "epoch": 198.75, "grad_norm": 1.0739487409591675, "learning_rate": 0.0001, "loss": 0.0152, "step": 30210 }, { "epoch": 198.81578947368422, "grad_norm": 1.2973365783691406, "learning_rate": 0.0001, "loss": 0.0164, "step": 30220 }, { "epoch": 198.8815789473684, "grad_norm": 1.2122310400009155, "learning_rate": 0.0001, "loss": 0.0161, "step": 30230 }, { "epoch": 198.94736842105263, "grad_norm": 1.2195587158203125, "learning_rate": 0.0001, "loss": 0.0154, "step": 30240 }, { "epoch": 199.01315789473685, "grad_norm": 1.0152171850204468, "learning_rate": 0.0001, "loss": 0.0153, "step": 30250 }, { "epoch": 199.07894736842104, "grad_norm": 1.404311180114746, "learning_rate": 0.0001, "loss": 0.0174, "step": 30260 }, { "epoch": 199.14473684210526, "grad_norm": 1.4140353202819824, "learning_rate": 0.0001, "loss": 0.0164, "step": 30270 }, { "epoch": 199.21052631578948, "grad_norm": 1.4403727054595947, "learning_rate": 0.0001, "loss": 0.0138, "step": 30280 }, { "epoch": 199.27631578947367, "grad_norm": 1.9959533214569092, "learning_rate": 0.0001, "loss": 0.0169, "step": 30290 }, { "epoch": 199.3421052631579, "grad_norm": 1.6239718198776245, "learning_rate": 0.0001, "loss": 0.0135, "step": 30300 }, { "epoch": 199.4078947368421, "grad_norm": 1.3269397020339966, "learning_rate": 0.0001, "loss": 0.0144, "step": 30310 }, { "epoch": 199.47368421052633, "grad_norm": 1.6701898574829102, "learning_rate": 0.0001, "loss": 0.0159, "step": 30320 }, { "epoch": 199.53947368421052, "grad_norm": 1.5344353914260864, "learning_rate": 0.0001, "loss": 0.0157, "step": 30330 }, { "epoch": 199.60526315789474, "grad_norm": 1.4121617078781128, "learning_rate": 0.0001, "loss": 0.0143, "step": 30340 }, { "epoch": 199.67105263157896, "grad_norm": 0.9731596112251282, "learning_rate": 0.0001, "loss": 0.0137, "step": 30350 }, { "epoch": 199.73684210526315, "grad_norm": 1.1787141561508179, "learning_rate": 0.0001, "loss": 0.0163, "step": 30360 }, { "epoch": 199.80263157894737, "grad_norm": 1.0008739233016968, "learning_rate": 0.0001, "loss": 0.0147, "step": 30370 }, { "epoch": 199.8684210526316, "grad_norm": 1.2544138431549072, "learning_rate": 0.0001, "loss": 0.0162, "step": 30380 }, { "epoch": 199.93421052631578, "grad_norm": 1.1031585931777954, "learning_rate": 0.0001, "loss": 0.0177, "step": 30390 }, { "epoch": 200.0, "grad_norm": 1.5565379858016968, "learning_rate": 0.0001, "loss": 0.0174, "step": 30400 }, { "epoch": 200.06578947368422, "grad_norm": 1.0128217935562134, "learning_rate": 0.0001, "loss": 0.0153, "step": 30410 }, { "epoch": 200.1315789473684, "grad_norm": 0.9004884362220764, "learning_rate": 0.0001, "loss": 0.0171, "step": 30420 }, { "epoch": 200.19736842105263, "grad_norm": 0.7593161463737488, "learning_rate": 0.0001, "loss": 0.0157, "step": 30430 }, { "epoch": 200.26315789473685, "grad_norm": 1.4337648153305054, "learning_rate": 0.0001, "loss": 0.0164, "step": 30440 }, { "epoch": 200.32894736842104, "grad_norm": 1.2471925020217896, "learning_rate": 0.0001, "loss": 0.0192, "step": 30450 }, { "epoch": 200.39473684210526, "grad_norm": 1.1293731927871704, "learning_rate": 0.0001, "loss": 0.0163, "step": 30460 }, { "epoch": 200.46052631578948, "grad_norm": 0.8459559679031372, "learning_rate": 0.0001, "loss": 0.0163, "step": 30470 }, { "epoch": 200.52631578947367, "grad_norm": 1.5371299982070923, "learning_rate": 0.0001, "loss": 0.0145, "step": 30480 }, { "epoch": 200.5921052631579, "grad_norm": 1.3424179553985596, "learning_rate": 0.0001, "loss": 0.0156, "step": 30490 }, { "epoch": 200.6578947368421, "grad_norm": 1.5373995304107666, "learning_rate": 0.0001, "loss": 0.0179, "step": 30500 }, { "epoch": 200.72368421052633, "grad_norm": 0.9894046783447266, "learning_rate": 0.0001, "loss": 0.0152, "step": 30510 }, { "epoch": 200.78947368421052, "grad_norm": 1.7582042217254639, "learning_rate": 0.0001, "loss": 0.0141, "step": 30520 }, { "epoch": 200.85526315789474, "grad_norm": 1.3425977230072021, "learning_rate": 0.0001, "loss": 0.0169, "step": 30530 }, { "epoch": 200.92105263157896, "grad_norm": 1.3770979642868042, "learning_rate": 0.0001, "loss": 0.0158, "step": 30540 }, { "epoch": 200.98684210526315, "grad_norm": 1.40471351146698, "learning_rate": 0.0001, "loss": 0.0151, "step": 30550 }, { "epoch": 201.05263157894737, "grad_norm": 1.337066411972046, "learning_rate": 0.0001, "loss": 0.017, "step": 30560 }, { "epoch": 201.1184210526316, "grad_norm": 1.1256475448608398, "learning_rate": 0.0001, "loss": 0.0159, "step": 30570 }, { "epoch": 201.18421052631578, "grad_norm": 1.3861485719680786, "learning_rate": 0.0001, "loss": 0.0152, "step": 30580 }, { "epoch": 201.25, "grad_norm": 1.1584820747375488, "learning_rate": 0.0001, "loss": 0.0151, "step": 30590 }, { "epoch": 201.31578947368422, "grad_norm": 1.3375623226165771, "learning_rate": 0.0001, "loss": 0.0171, "step": 30600 }, { "epoch": 201.3815789473684, "grad_norm": 1.2848438024520874, "learning_rate": 0.0001, "loss": 0.0161, "step": 30610 }, { "epoch": 201.44736842105263, "grad_norm": 1.2044715881347656, "learning_rate": 0.0001, "loss": 0.0148, "step": 30620 }, { "epoch": 201.51315789473685, "grad_norm": 1.1717559099197388, "learning_rate": 0.0001, "loss": 0.0172, "step": 30630 }, { "epoch": 201.57894736842104, "grad_norm": 1.0431238412857056, "learning_rate": 0.0001, "loss": 0.0169, "step": 30640 }, { "epoch": 201.64473684210526, "grad_norm": 1.0847781896591187, "learning_rate": 0.0001, "loss": 0.014, "step": 30650 }, { "epoch": 201.71052631578948, "grad_norm": 1.1109511852264404, "learning_rate": 0.0001, "loss": 0.018, "step": 30660 }, { "epoch": 201.77631578947367, "grad_norm": 1.094912052154541, "learning_rate": 0.0001, "loss": 0.0151, "step": 30670 }, { "epoch": 201.8421052631579, "grad_norm": 1.0548181533813477, "learning_rate": 0.0001, "loss": 0.0164, "step": 30680 }, { "epoch": 201.9078947368421, "grad_norm": 1.3336670398712158, "learning_rate": 0.0001, "loss": 0.0179, "step": 30690 }, { "epoch": 201.97368421052633, "grad_norm": 1.0279159545898438, "learning_rate": 0.0001, "loss": 0.0161, "step": 30700 }, { "epoch": 202.03947368421052, "grad_norm": 0.7135113477706909, "learning_rate": 0.0001, "loss": 0.0166, "step": 30710 }, { "epoch": 202.10526315789474, "grad_norm": 1.0755541324615479, "learning_rate": 0.0001, "loss": 0.0176, "step": 30720 }, { "epoch": 202.17105263157896, "grad_norm": 1.3156121969223022, "learning_rate": 0.0001, "loss": 0.0163, "step": 30730 }, { "epoch": 202.23684210526315, "grad_norm": 1.4101308584213257, "learning_rate": 0.0001, "loss": 0.0143, "step": 30740 }, { "epoch": 202.30263157894737, "grad_norm": 1.3764532804489136, "learning_rate": 0.0001, "loss": 0.0151, "step": 30750 }, { "epoch": 202.3684210526316, "grad_norm": 0.882384181022644, "learning_rate": 0.0001, "loss": 0.0149, "step": 30760 }, { "epoch": 202.43421052631578, "grad_norm": 1.6236560344696045, "learning_rate": 0.0001, "loss": 0.0167, "step": 30770 }, { "epoch": 202.5, "grad_norm": 1.4815741777420044, "learning_rate": 0.0001, "loss": 0.0178, "step": 30780 }, { "epoch": 202.56578947368422, "grad_norm": 1.2555485963821411, "learning_rate": 0.0001, "loss": 0.0177, "step": 30790 }, { "epoch": 202.6315789473684, "grad_norm": 1.3122458457946777, "learning_rate": 0.0001, "loss": 0.0167, "step": 30800 }, { "epoch": 202.69736842105263, "grad_norm": 1.042381763458252, "learning_rate": 0.0001, "loss": 0.0149, "step": 30810 }, { "epoch": 202.76315789473685, "grad_norm": 0.9814920425415039, "learning_rate": 0.0001, "loss": 0.016, "step": 30820 }, { "epoch": 202.82894736842104, "grad_norm": 1.0357002019882202, "learning_rate": 0.0001, "loss": 0.0187, "step": 30830 }, { "epoch": 202.89473684210526, "grad_norm": 1.281749963760376, "learning_rate": 0.0001, "loss": 0.0165, "step": 30840 }, { "epoch": 202.96052631578948, "grad_norm": 1.321096658706665, "learning_rate": 0.0001, "loss": 0.0173, "step": 30850 }, { "epoch": 203.02631578947367, "grad_norm": 1.1207501888275146, "learning_rate": 0.0001, "loss": 0.0176, "step": 30860 }, { "epoch": 203.0921052631579, "grad_norm": 1.5637404918670654, "learning_rate": 0.0001, "loss": 0.0167, "step": 30870 }, { "epoch": 203.1578947368421, "grad_norm": 1.2370407581329346, "learning_rate": 0.0001, "loss": 0.0156, "step": 30880 }, { "epoch": 203.22368421052633, "grad_norm": 1.0421314239501953, "learning_rate": 0.0001, "loss": 0.0217, "step": 30890 }, { "epoch": 203.28947368421052, "grad_norm": 1.0301634073257446, "learning_rate": 0.0001, "loss": 0.0156, "step": 30900 }, { "epoch": 203.35526315789474, "grad_norm": 1.2774720191955566, "learning_rate": 0.0001, "loss": 0.0195, "step": 30910 }, { "epoch": 203.42105263157896, "grad_norm": 1.2703475952148438, "learning_rate": 0.0001, "loss": 0.0147, "step": 30920 }, { "epoch": 203.48684210526315, "grad_norm": 1.1837942600250244, "learning_rate": 0.0001, "loss": 0.0156, "step": 30930 }, { "epoch": 203.55263157894737, "grad_norm": 1.0560357570648193, "learning_rate": 0.0001, "loss": 0.0145, "step": 30940 }, { "epoch": 203.6184210526316, "grad_norm": 1.2957569360733032, "learning_rate": 0.0001, "loss": 0.0155, "step": 30950 }, { "epoch": 203.68421052631578, "grad_norm": 1.4275071620941162, "learning_rate": 0.0001, "loss": 0.0146, "step": 30960 }, { "epoch": 203.75, "grad_norm": 1.513159155845642, "learning_rate": 0.0001, "loss": 0.0165, "step": 30970 }, { "epoch": 203.81578947368422, "grad_norm": 1.6335196495056152, "learning_rate": 0.0001, "loss": 0.0178, "step": 30980 }, { "epoch": 203.8815789473684, "grad_norm": 1.5469692945480347, "learning_rate": 0.0001, "loss": 0.015, "step": 30990 }, { "epoch": 203.94736842105263, "grad_norm": 1.5226917266845703, "learning_rate": 0.0001, "loss": 0.016, "step": 31000 }, { "epoch": 204.01315789473685, "grad_norm": 1.3117252588272095, "learning_rate": 0.0001, "loss": 0.018, "step": 31010 }, { "epoch": 204.07894736842104, "grad_norm": 1.800711989402771, "learning_rate": 0.0001, "loss": 0.0171, "step": 31020 }, { "epoch": 204.14473684210526, "grad_norm": 1.6700514554977417, "learning_rate": 0.0001, "loss": 0.0175, "step": 31030 }, { "epoch": 204.21052631578948, "grad_norm": 1.702384352684021, "learning_rate": 0.0001, "loss": 0.016, "step": 31040 }, { "epoch": 204.27631578947367, "grad_norm": 1.678499698638916, "learning_rate": 0.0001, "loss": 0.0139, "step": 31050 }, { "epoch": 204.3421052631579, "grad_norm": 1.4093444347381592, "learning_rate": 0.0001, "loss": 0.0155, "step": 31060 }, { "epoch": 204.4078947368421, "grad_norm": 1.5333293676376343, "learning_rate": 0.0001, "loss": 0.0147, "step": 31070 }, { "epoch": 204.47368421052633, "grad_norm": 1.2159759998321533, "learning_rate": 0.0001, "loss": 0.0155, "step": 31080 }, { "epoch": 204.53947368421052, "grad_norm": 1.358657956123352, "learning_rate": 0.0001, "loss": 0.017, "step": 31090 }, { "epoch": 204.60526315789474, "grad_norm": 1.1089929342269897, "learning_rate": 0.0001, "loss": 0.0157, "step": 31100 }, { "epoch": 204.67105263157896, "grad_norm": 1.3180047273635864, "learning_rate": 0.0001, "loss": 0.0163, "step": 31110 }, { "epoch": 204.73684210526315, "grad_norm": 1.2154042720794678, "learning_rate": 0.0001, "loss": 0.0153, "step": 31120 }, { "epoch": 204.80263157894737, "grad_norm": 1.4112086296081543, "learning_rate": 0.0001, "loss": 0.0161, "step": 31130 }, { "epoch": 204.8684210526316, "grad_norm": 1.432087779045105, "learning_rate": 0.0001, "loss": 0.0179, "step": 31140 }, { "epoch": 204.93421052631578, "grad_norm": 1.3358913660049438, "learning_rate": 0.0001, "loss": 0.0158, "step": 31150 }, { "epoch": 205.0, "grad_norm": 1.435907006263733, "learning_rate": 0.0001, "loss": 0.0151, "step": 31160 }, { "epoch": 205.06578947368422, "grad_norm": 1.0711824893951416, "learning_rate": 0.0001, "loss": 0.0143, "step": 31170 }, { "epoch": 205.1315789473684, "grad_norm": 1.3709452152252197, "learning_rate": 0.0001, "loss": 0.0144, "step": 31180 }, { "epoch": 205.19736842105263, "grad_norm": 1.1924278736114502, "learning_rate": 0.0001, "loss": 0.0164, "step": 31190 }, { "epoch": 205.26315789473685, "grad_norm": 1.3899904489517212, "learning_rate": 0.0001, "loss": 0.0156, "step": 31200 }, { "epoch": 205.32894736842104, "grad_norm": 1.3233460187911987, "learning_rate": 0.0001, "loss": 0.0149, "step": 31210 }, { "epoch": 205.39473684210526, "grad_norm": 1.4931349754333496, "learning_rate": 0.0001, "loss": 0.0157, "step": 31220 }, { "epoch": 205.46052631578948, "grad_norm": 1.203282117843628, "learning_rate": 0.0001, "loss": 0.0165, "step": 31230 }, { "epoch": 205.52631578947367, "grad_norm": 1.3920016288757324, "learning_rate": 0.0001, "loss": 0.0158, "step": 31240 }, { "epoch": 205.5921052631579, "grad_norm": 1.077797770500183, "learning_rate": 0.0001, "loss": 0.0168, "step": 31250 }, { "epoch": 205.6578947368421, "grad_norm": 1.7095680236816406, "learning_rate": 0.0001, "loss": 0.0167, "step": 31260 }, { "epoch": 205.72368421052633, "grad_norm": 0.7705028057098389, "learning_rate": 0.0001, "loss": 0.0144, "step": 31270 }, { "epoch": 205.78947368421052, "grad_norm": 1.1185240745544434, "learning_rate": 0.0001, "loss": 0.0196, "step": 31280 }, { "epoch": 205.85526315789474, "grad_norm": 1.0305986404418945, "learning_rate": 0.0001, "loss": 0.0152, "step": 31290 }, { "epoch": 205.92105263157896, "grad_norm": 1.5906319618225098, "learning_rate": 0.0001, "loss": 0.0151, "step": 31300 }, { "epoch": 205.98684210526315, "grad_norm": 1.2245663404464722, "learning_rate": 0.0001, "loss": 0.0176, "step": 31310 }, { "epoch": 206.05263157894737, "grad_norm": 1.706349492073059, "learning_rate": 0.0001, "loss": 0.0161, "step": 31320 }, { "epoch": 206.1184210526316, "grad_norm": 1.1370997428894043, "learning_rate": 0.0001, "loss": 0.0162, "step": 31330 }, { "epoch": 206.18421052631578, "grad_norm": 0.6681815385818481, "learning_rate": 0.0001, "loss": 0.0176, "step": 31340 }, { "epoch": 206.25, "grad_norm": 1.397417664527893, "learning_rate": 0.0001, "loss": 0.0156, "step": 31350 }, { "epoch": 206.31578947368422, "grad_norm": 1.0024604797363281, "learning_rate": 0.0001, "loss": 0.0166, "step": 31360 }, { "epoch": 206.3815789473684, "grad_norm": 1.329154372215271, "learning_rate": 0.0001, "loss": 0.0174, "step": 31370 }, { "epoch": 206.44736842105263, "grad_norm": 1.4420346021652222, "learning_rate": 0.0001, "loss": 0.017, "step": 31380 }, { "epoch": 206.51315789473685, "grad_norm": 1.2937798500061035, "learning_rate": 0.0001, "loss": 0.0163, "step": 31390 }, { "epoch": 206.57894736842104, "grad_norm": 1.2045661211013794, "learning_rate": 0.0001, "loss": 0.0161, "step": 31400 }, { "epoch": 206.64473684210526, "grad_norm": 1.0258898735046387, "learning_rate": 0.0001, "loss": 0.0151, "step": 31410 }, { "epoch": 206.71052631578948, "grad_norm": 1.1112213134765625, "learning_rate": 0.0001, "loss": 0.0153, "step": 31420 }, { "epoch": 206.77631578947367, "grad_norm": 1.2808059453964233, "learning_rate": 0.0001, "loss": 0.0163, "step": 31430 }, { "epoch": 206.8421052631579, "grad_norm": 1.4980825185775757, "learning_rate": 0.0001, "loss": 0.0195, "step": 31440 }, { "epoch": 206.9078947368421, "grad_norm": 1.1103272438049316, "learning_rate": 0.0001, "loss": 0.0148, "step": 31450 }, { "epoch": 206.97368421052633, "grad_norm": 0.9099360704421997, "learning_rate": 0.0001, "loss": 0.0163, "step": 31460 }, { "epoch": 207.03947368421052, "grad_norm": 1.1088165044784546, "learning_rate": 0.0001, "loss": 0.0158, "step": 31470 }, { "epoch": 207.10526315789474, "grad_norm": 1.3863370418548584, "learning_rate": 0.0001, "loss": 0.018, "step": 31480 }, { "epoch": 207.17105263157896, "grad_norm": 1.522480845451355, "learning_rate": 0.0001, "loss": 0.0175, "step": 31490 }, { "epoch": 207.23684210526315, "grad_norm": 1.1153115034103394, "learning_rate": 0.0001, "loss": 0.0161, "step": 31500 }, { "epoch": 207.30263157894737, "grad_norm": 1.211844563484192, "learning_rate": 0.0001, "loss": 0.0166, "step": 31510 }, { "epoch": 207.3684210526316, "grad_norm": 1.3280556201934814, "learning_rate": 0.0001, "loss": 0.0163, "step": 31520 }, { "epoch": 207.43421052631578, "grad_norm": 1.232966423034668, "learning_rate": 0.0001, "loss": 0.0178, "step": 31530 }, { "epoch": 207.5, "grad_norm": 1.3021212816238403, "learning_rate": 0.0001, "loss": 0.0158, "step": 31540 }, { "epoch": 207.56578947368422, "grad_norm": 1.4837563037872314, "learning_rate": 0.0001, "loss": 0.0175, "step": 31550 }, { "epoch": 207.6315789473684, "grad_norm": 1.122427225112915, "learning_rate": 0.0001, "loss": 0.0161, "step": 31560 }, { "epoch": 207.69736842105263, "grad_norm": 1.358843207359314, "learning_rate": 0.0001, "loss": 0.018, "step": 31570 }, { "epoch": 207.76315789473685, "grad_norm": 1.1522997617721558, "learning_rate": 0.0001, "loss": 0.0161, "step": 31580 }, { "epoch": 207.82894736842104, "grad_norm": 1.1149085760116577, "learning_rate": 0.0001, "loss": 0.0167, "step": 31590 }, { "epoch": 207.89473684210526, "grad_norm": 1.3184139728546143, "learning_rate": 0.0001, "loss": 0.0153, "step": 31600 }, { "epoch": 207.96052631578948, "grad_norm": 1.1251814365386963, "learning_rate": 0.0001, "loss": 0.0157, "step": 31610 }, { "epoch": 208.02631578947367, "grad_norm": 1.4511667490005493, "learning_rate": 0.0001, "loss": 0.0133, "step": 31620 }, { "epoch": 208.0921052631579, "grad_norm": 1.4698028564453125, "learning_rate": 0.0001, "loss": 0.0164, "step": 31630 }, { "epoch": 208.1578947368421, "grad_norm": 1.1822978258132935, "learning_rate": 0.0001, "loss": 0.016, "step": 31640 }, { "epoch": 208.22368421052633, "grad_norm": 1.1715060472488403, "learning_rate": 0.0001, "loss": 0.0155, "step": 31650 }, { "epoch": 208.28947368421052, "grad_norm": 1.4276723861694336, "learning_rate": 0.0001, "loss": 0.0171, "step": 31660 }, { "epoch": 208.35526315789474, "grad_norm": 1.4519996643066406, "learning_rate": 0.0001, "loss": 0.0163, "step": 31670 }, { "epoch": 208.42105263157896, "grad_norm": 1.0336174964904785, "learning_rate": 0.0001, "loss": 0.0153, "step": 31680 }, { "epoch": 208.48684210526315, "grad_norm": 1.6115700006484985, "learning_rate": 0.0001, "loss": 0.0166, "step": 31690 }, { "epoch": 208.55263157894737, "grad_norm": 1.6710377931594849, "learning_rate": 0.0001, "loss": 0.0176, "step": 31700 }, { "epoch": 208.6184210526316, "grad_norm": 1.5397769212722778, "learning_rate": 0.0001, "loss": 0.0166, "step": 31710 }, { "epoch": 208.68421052631578, "grad_norm": 1.5007388591766357, "learning_rate": 0.0001, "loss": 0.016, "step": 31720 }, { "epoch": 208.75, "grad_norm": 1.3674216270446777, "learning_rate": 0.0001, "loss": 0.0144, "step": 31730 }, { "epoch": 208.81578947368422, "grad_norm": 2.207770347595215, "learning_rate": 0.0001, "loss": 0.0165, "step": 31740 }, { "epoch": 208.8815789473684, "grad_norm": 1.5573480129241943, "learning_rate": 0.0001, "loss": 0.0176, "step": 31750 }, { "epoch": 208.94736842105263, "grad_norm": 1.5399261713027954, "learning_rate": 0.0001, "loss": 0.0144, "step": 31760 }, { "epoch": 209.01315789473685, "grad_norm": 1.6872302293777466, "learning_rate": 0.0001, "loss": 0.0173, "step": 31770 }, { "epoch": 209.07894736842104, "grad_norm": 1.3373833894729614, "learning_rate": 0.0001, "loss": 0.0151, "step": 31780 }, { "epoch": 209.14473684210526, "grad_norm": 1.6845489740371704, "learning_rate": 0.0001, "loss": 0.0156, "step": 31790 }, { "epoch": 209.21052631578948, "grad_norm": 1.3818607330322266, "learning_rate": 0.0001, "loss": 0.0148, "step": 31800 }, { "epoch": 209.27631578947367, "grad_norm": 1.3296219110488892, "learning_rate": 0.0001, "loss": 0.0152, "step": 31810 }, { "epoch": 209.3421052631579, "grad_norm": 1.1584928035736084, "learning_rate": 0.0001, "loss": 0.0135, "step": 31820 }, { "epoch": 209.4078947368421, "grad_norm": 1.3986483812332153, "learning_rate": 0.0001, "loss": 0.0153, "step": 31830 }, { "epoch": 209.47368421052633, "grad_norm": 1.7007300853729248, "learning_rate": 0.0001, "loss": 0.0139, "step": 31840 }, { "epoch": 209.53947368421052, "grad_norm": 0.916429877281189, "learning_rate": 0.0001, "loss": 0.0155, "step": 31850 }, { "epoch": 209.60526315789474, "grad_norm": 1.1400072574615479, "learning_rate": 0.0001, "loss": 0.0163, "step": 31860 }, { "epoch": 209.67105263157896, "grad_norm": 1.3716983795166016, "learning_rate": 0.0001, "loss": 0.0142, "step": 31870 }, { "epoch": 209.73684210526315, "grad_norm": 0.9862516522407532, "learning_rate": 0.0001, "loss": 0.0164, "step": 31880 }, { "epoch": 209.80263157894737, "grad_norm": 1.1696069240570068, "learning_rate": 0.0001, "loss": 0.0142, "step": 31890 }, { "epoch": 209.8684210526316, "grad_norm": 1.4392577409744263, "learning_rate": 0.0001, "loss": 0.0147, "step": 31900 }, { "epoch": 209.93421052631578, "grad_norm": 2.1823062896728516, "learning_rate": 0.0001, "loss": 0.0183, "step": 31910 }, { "epoch": 210.0, "grad_norm": 1.6324290037155151, "learning_rate": 0.0001, "loss": 0.0171, "step": 31920 }, { "epoch": 210.06578947368422, "grad_norm": 1.2188938856124878, "learning_rate": 0.0001, "loss": 0.016, "step": 31930 }, { "epoch": 210.1315789473684, "grad_norm": 1.401517391204834, "learning_rate": 0.0001, "loss": 0.0157, "step": 31940 }, { "epoch": 210.19736842105263, "grad_norm": 1.3221532106399536, "learning_rate": 0.0001, "loss": 0.016, "step": 31950 }, { "epoch": 210.26315789473685, "grad_norm": 1.5217102766036987, "learning_rate": 0.0001, "loss": 0.0154, "step": 31960 }, { "epoch": 210.32894736842104, "grad_norm": 1.4652286767959595, "learning_rate": 0.0001, "loss": 0.0157, "step": 31970 }, { "epoch": 210.39473684210526, "grad_norm": 1.2757086753845215, "learning_rate": 0.0001, "loss": 0.0169, "step": 31980 }, { "epoch": 210.46052631578948, "grad_norm": 1.227782130241394, "learning_rate": 0.0001, "loss": 0.0137, "step": 31990 }, { "epoch": 210.52631578947367, "grad_norm": 1.3850603103637695, "learning_rate": 0.0001, "loss": 0.0141, "step": 32000 }, { "epoch": 210.5921052631579, "grad_norm": 1.2859461307525635, "learning_rate": 0.0001, "loss": 0.0158, "step": 32010 }, { "epoch": 210.6578947368421, "grad_norm": 1.3917819261550903, "learning_rate": 0.0001, "loss": 0.0149, "step": 32020 }, { "epoch": 210.72368421052633, "grad_norm": 1.4318441152572632, "learning_rate": 0.0001, "loss": 0.0158, "step": 32030 }, { "epoch": 210.78947368421052, "grad_norm": 1.5002471208572388, "learning_rate": 0.0001, "loss": 0.0152, "step": 32040 }, { "epoch": 210.85526315789474, "grad_norm": 1.2311240434646606, "learning_rate": 0.0001, "loss": 0.0188, "step": 32050 }, { "epoch": 210.92105263157896, "grad_norm": 1.293976902961731, "learning_rate": 0.0001, "loss": 0.0143, "step": 32060 }, { "epoch": 210.98684210526315, "grad_norm": 1.1462241411209106, "learning_rate": 0.0001, "loss": 0.0136, "step": 32070 }, { "epoch": 211.05263157894737, "grad_norm": 1.0642205476760864, "learning_rate": 0.0001, "loss": 0.0141, "step": 32080 }, { "epoch": 211.1184210526316, "grad_norm": 1.0781315565109253, "learning_rate": 0.0001, "loss": 0.0146, "step": 32090 }, { "epoch": 211.18421052631578, "grad_norm": 1.2331435680389404, "learning_rate": 0.0001, "loss": 0.0161, "step": 32100 }, { "epoch": 211.25, "grad_norm": 1.129051685333252, "learning_rate": 0.0001, "loss": 0.0157, "step": 32110 }, { "epoch": 211.31578947368422, "grad_norm": 1.301344871520996, "learning_rate": 0.0001, "loss": 0.0164, "step": 32120 }, { "epoch": 211.3815789473684, "grad_norm": 1.6704833507537842, "learning_rate": 0.0001, "loss": 0.0148, "step": 32130 }, { "epoch": 211.44736842105263, "grad_norm": 1.4150813817977905, "learning_rate": 0.0001, "loss": 0.0158, "step": 32140 }, { "epoch": 211.51315789473685, "grad_norm": 1.3065874576568604, "learning_rate": 0.0001, "loss": 0.0159, "step": 32150 }, { "epoch": 211.57894736842104, "grad_norm": 1.0689506530761719, "learning_rate": 0.0001, "loss": 0.0148, "step": 32160 }, { "epoch": 211.64473684210526, "grad_norm": 1.220293641090393, "learning_rate": 0.0001, "loss": 0.0146, "step": 32170 }, { "epoch": 211.71052631578948, "grad_norm": 1.4087119102478027, "learning_rate": 0.0001, "loss": 0.0166, "step": 32180 }, { "epoch": 211.77631578947367, "grad_norm": 1.3689287900924683, "learning_rate": 0.0001, "loss": 0.0157, "step": 32190 }, { "epoch": 211.8421052631579, "grad_norm": 1.4669215679168701, "learning_rate": 0.0001, "loss": 0.0169, "step": 32200 }, { "epoch": 211.9078947368421, "grad_norm": 1.2780801057815552, "learning_rate": 0.0001, "loss": 0.0149, "step": 32210 }, { "epoch": 211.97368421052633, "grad_norm": 1.0613449811935425, "learning_rate": 0.0001, "loss": 0.0176, "step": 32220 }, { "epoch": 212.03947368421052, "grad_norm": 1.206862211227417, "learning_rate": 0.0001, "loss": 0.0145, "step": 32230 }, { "epoch": 212.10526315789474, "grad_norm": 1.3805917501449585, "learning_rate": 0.0001, "loss": 0.0162, "step": 32240 }, { "epoch": 212.17105263157896, "grad_norm": 1.1278748512268066, "learning_rate": 0.0001, "loss": 0.0163, "step": 32250 }, { "epoch": 212.23684210526315, "grad_norm": 1.1782323122024536, "learning_rate": 0.0001, "loss": 0.015, "step": 32260 }, { "epoch": 212.30263157894737, "grad_norm": 1.0774714946746826, "learning_rate": 0.0001, "loss": 0.0163, "step": 32270 }, { "epoch": 212.3684210526316, "grad_norm": 1.1292855739593506, "learning_rate": 0.0001, "loss": 0.0169, "step": 32280 }, { "epoch": 212.43421052631578, "grad_norm": 1.4095853567123413, "learning_rate": 0.0001, "loss": 0.0186, "step": 32290 }, { "epoch": 212.5, "grad_norm": 1.196883201599121, "learning_rate": 0.0001, "loss": 0.0157, "step": 32300 }, { "epoch": 212.56578947368422, "grad_norm": 1.2091253995895386, "learning_rate": 0.0001, "loss": 0.0156, "step": 32310 }, { "epoch": 212.6315789473684, "grad_norm": 1.3275787830352783, "learning_rate": 0.0001, "loss": 0.0173, "step": 32320 }, { "epoch": 212.69736842105263, "grad_norm": 1.531229019165039, "learning_rate": 0.0001, "loss": 0.0177, "step": 32330 }, { "epoch": 212.76315789473685, "grad_norm": 1.4241570234298706, "learning_rate": 0.0001, "loss": 0.0173, "step": 32340 }, { "epoch": 212.82894736842104, "grad_norm": 0.965205192565918, "learning_rate": 0.0001, "loss": 0.0173, "step": 32350 }, { "epoch": 212.89473684210526, "grad_norm": 1.515239953994751, "learning_rate": 0.0001, "loss": 0.0162, "step": 32360 }, { "epoch": 212.96052631578948, "grad_norm": 1.5352081060409546, "learning_rate": 0.0001, "loss": 0.0158, "step": 32370 }, { "epoch": 213.02631578947367, "grad_norm": 1.2615565061569214, "learning_rate": 0.0001, "loss": 0.0172, "step": 32380 }, { "epoch": 213.0921052631579, "grad_norm": 1.6691360473632812, "learning_rate": 0.0001, "loss": 0.0153, "step": 32390 }, { "epoch": 213.1578947368421, "grad_norm": 1.4845948219299316, "learning_rate": 0.0001, "loss": 0.0156, "step": 32400 }, { "epoch": 213.22368421052633, "grad_norm": 1.1609926223754883, "learning_rate": 0.0001, "loss": 0.0168, "step": 32410 }, { "epoch": 213.28947368421052, "grad_norm": 1.4472464323043823, "learning_rate": 0.0001, "loss": 0.0154, "step": 32420 }, { "epoch": 213.35526315789474, "grad_norm": 1.1727287769317627, "learning_rate": 0.0001, "loss": 0.0144, "step": 32430 }, { "epoch": 213.42105263157896, "grad_norm": 1.0012319087982178, "learning_rate": 0.0001, "loss": 0.0165, "step": 32440 }, { "epoch": 213.48684210526315, "grad_norm": 1.4010659456253052, "learning_rate": 0.0001, "loss": 0.0158, "step": 32450 }, { "epoch": 213.55263157894737, "grad_norm": 1.6938822269439697, "learning_rate": 0.0001, "loss": 0.0188, "step": 32460 }, { "epoch": 213.6184210526316, "grad_norm": 1.7426364421844482, "learning_rate": 0.0001, "loss": 0.0183, "step": 32470 }, { "epoch": 213.68421052631578, "grad_norm": 1.147688627243042, "learning_rate": 0.0001, "loss": 0.0167, "step": 32480 }, { "epoch": 213.75, "grad_norm": 1.278310775756836, "learning_rate": 0.0001, "loss": 0.0159, "step": 32490 }, { "epoch": 213.81578947368422, "grad_norm": 1.183099627494812, "learning_rate": 0.0001, "loss": 0.015, "step": 32500 }, { "epoch": 213.8815789473684, "grad_norm": 1.738754391670227, "learning_rate": 0.0001, "loss": 0.0176, "step": 32510 }, { "epoch": 213.94736842105263, "grad_norm": 1.2291311025619507, "learning_rate": 0.0001, "loss": 0.0164, "step": 32520 }, { "epoch": 214.01315789473685, "grad_norm": 1.1750850677490234, "learning_rate": 0.0001, "loss": 0.0156, "step": 32530 }, { "epoch": 214.07894736842104, "grad_norm": 1.2301994562149048, "learning_rate": 0.0001, "loss": 0.0164, "step": 32540 }, { "epoch": 214.14473684210526, "grad_norm": 1.2301639318466187, "learning_rate": 0.0001, "loss": 0.0155, "step": 32550 }, { "epoch": 214.21052631578948, "grad_norm": 1.4040155410766602, "learning_rate": 0.0001, "loss": 0.0186, "step": 32560 }, { "epoch": 214.27631578947367, "grad_norm": 1.4484095573425293, "learning_rate": 0.0001, "loss": 0.0177, "step": 32570 }, { "epoch": 214.3421052631579, "grad_norm": 1.0612156391143799, "learning_rate": 0.0001, "loss": 0.0152, "step": 32580 }, { "epoch": 214.4078947368421, "grad_norm": 1.1924538612365723, "learning_rate": 0.0001, "loss": 0.0188, "step": 32590 }, { "epoch": 214.47368421052633, "grad_norm": 1.3676021099090576, "learning_rate": 0.0001, "loss": 0.0193, "step": 32600 }, { "epoch": 214.53947368421052, "grad_norm": 1.155792474746704, "learning_rate": 0.0001, "loss": 0.0156, "step": 32610 }, { "epoch": 214.60526315789474, "grad_norm": 1.2802067995071411, "learning_rate": 0.0001, "loss": 0.0172, "step": 32620 }, { "epoch": 214.67105263157896, "grad_norm": 1.1130114793777466, "learning_rate": 0.0001, "loss": 0.0166, "step": 32630 }, { "epoch": 214.73684210526315, "grad_norm": 1.3467772006988525, "learning_rate": 0.0001, "loss": 0.0173, "step": 32640 }, { "epoch": 214.80263157894737, "grad_norm": 1.1575422286987305, "learning_rate": 0.0001, "loss": 0.0185, "step": 32650 }, { "epoch": 214.8684210526316, "grad_norm": 1.0883058309555054, "learning_rate": 0.0001, "loss": 0.0169, "step": 32660 }, { "epoch": 214.93421052631578, "grad_norm": 1.3685483932495117, "learning_rate": 0.0001, "loss": 0.0173, "step": 32670 }, { "epoch": 215.0, "grad_norm": 1.452791690826416, "learning_rate": 0.0001, "loss": 0.016, "step": 32680 }, { "epoch": 215.06578947368422, "grad_norm": 1.2236331701278687, "learning_rate": 0.0001, "loss": 0.0175, "step": 32690 }, { "epoch": 215.1315789473684, "grad_norm": 1.1579644680023193, "learning_rate": 0.0001, "loss": 0.0162, "step": 32700 }, { "epoch": 215.19736842105263, "grad_norm": 1.2398250102996826, "learning_rate": 0.0001, "loss": 0.0181, "step": 32710 }, { "epoch": 215.26315789473685, "grad_norm": 0.9351480603218079, "learning_rate": 0.0001, "loss": 0.0178, "step": 32720 }, { "epoch": 215.32894736842104, "grad_norm": 0.8759018778800964, "learning_rate": 0.0001, "loss": 0.0165, "step": 32730 }, { "epoch": 215.39473684210526, "grad_norm": 1.077101230621338, "learning_rate": 0.0001, "loss": 0.0185, "step": 32740 }, { "epoch": 215.46052631578948, "grad_norm": 0.8699200749397278, "learning_rate": 0.0001, "loss": 0.0171, "step": 32750 }, { "epoch": 215.52631578947367, "grad_norm": 1.2081657648086548, "learning_rate": 0.0001, "loss": 0.018, "step": 32760 }, { "epoch": 215.5921052631579, "grad_norm": 1.218841791152954, "learning_rate": 0.0001, "loss": 0.018, "step": 32770 }, { "epoch": 215.6578947368421, "grad_norm": 1.0985344648361206, "learning_rate": 0.0001, "loss": 0.0155, "step": 32780 }, { "epoch": 215.72368421052633, "grad_norm": 1.242873191833496, "learning_rate": 0.0001, "loss": 0.0151, "step": 32790 }, { "epoch": 215.78947368421052, "grad_norm": 0.956916868686676, "learning_rate": 0.0001, "loss": 0.0156, "step": 32800 }, { "epoch": 215.85526315789474, "grad_norm": 0.9822515249252319, "learning_rate": 0.0001, "loss": 0.0191, "step": 32810 }, { "epoch": 215.92105263157896, "grad_norm": 0.8483900427818298, "learning_rate": 0.0001, "loss": 0.0171, "step": 32820 }, { "epoch": 215.98684210526315, "grad_norm": 1.0709314346313477, "learning_rate": 0.0001, "loss": 0.0167, "step": 32830 }, { "epoch": 216.05263157894737, "grad_norm": 1.0873550176620483, "learning_rate": 0.0001, "loss": 0.017, "step": 32840 }, { "epoch": 216.1184210526316, "grad_norm": 1.20504891872406, "learning_rate": 0.0001, "loss": 0.0165, "step": 32850 }, { "epoch": 216.18421052631578, "grad_norm": 1.1691912412643433, "learning_rate": 0.0001, "loss": 0.0178, "step": 32860 }, { "epoch": 216.25, "grad_norm": 1.352332592010498, "learning_rate": 0.0001, "loss": 0.0163, "step": 32870 }, { "epoch": 216.31578947368422, "grad_norm": 0.8410949110984802, "learning_rate": 0.0001, "loss": 0.0188, "step": 32880 }, { "epoch": 216.3815789473684, "grad_norm": 1.0081835985183716, "learning_rate": 0.0001, "loss": 0.0158, "step": 32890 }, { "epoch": 216.44736842105263, "grad_norm": 0.9765644073486328, "learning_rate": 0.0001, "loss": 0.0169, "step": 32900 }, { "epoch": 216.51315789473685, "grad_norm": 1.4321998357772827, "learning_rate": 0.0001, "loss": 0.0179, "step": 32910 }, { "epoch": 216.57894736842104, "grad_norm": 1.1078122854232788, "learning_rate": 0.0001, "loss": 0.0199, "step": 32920 }, { "epoch": 216.64473684210526, "grad_norm": 1.004533290863037, "learning_rate": 0.0001, "loss": 0.0163, "step": 32930 }, { "epoch": 216.71052631578948, "grad_norm": 1.0383466482162476, "learning_rate": 0.0001, "loss": 0.0152, "step": 32940 }, { "epoch": 216.77631578947367, "grad_norm": 1.178542137145996, "learning_rate": 0.0001, "loss": 0.0173, "step": 32950 }, { "epoch": 216.8421052631579, "grad_norm": 1.2944022417068481, "learning_rate": 0.0001, "loss": 0.0172, "step": 32960 }, { "epoch": 216.9078947368421, "grad_norm": 1.0292662382125854, "learning_rate": 0.0001, "loss": 0.0174, "step": 32970 }, { "epoch": 216.97368421052633, "grad_norm": 1.4745246171951294, "learning_rate": 0.0001, "loss": 0.017, "step": 32980 }, { "epoch": 217.03947368421052, "grad_norm": 1.0737231969833374, "learning_rate": 0.0001, "loss": 0.0173, "step": 32990 }, { "epoch": 217.10526315789474, "grad_norm": 1.2109375, "learning_rate": 0.0001, "loss": 0.0164, "step": 33000 }, { "epoch": 217.17105263157896, "grad_norm": 1.142307162284851, "learning_rate": 0.0001, "loss": 0.0162, "step": 33010 }, { "epoch": 217.23684210526315, "grad_norm": 0.9450036883354187, "learning_rate": 0.0001, "loss": 0.0146, "step": 33020 }, { "epoch": 217.30263157894737, "grad_norm": 0.9609930515289307, "learning_rate": 0.0001, "loss": 0.0185, "step": 33030 }, { "epoch": 217.3684210526316, "grad_norm": 0.949567437171936, "learning_rate": 0.0001, "loss": 0.0161, "step": 33040 }, { "epoch": 217.43421052631578, "grad_norm": 1.1803269386291504, "learning_rate": 0.0001, "loss": 0.0179, "step": 33050 }, { "epoch": 217.5, "grad_norm": 1.2131317853927612, "learning_rate": 0.0001, "loss": 0.017, "step": 33060 }, { "epoch": 217.56578947368422, "grad_norm": 1.3640658855438232, "learning_rate": 0.0001, "loss": 0.0172, "step": 33070 }, { "epoch": 217.6315789473684, "grad_norm": 1.1110533475875854, "learning_rate": 0.0001, "loss": 0.0159, "step": 33080 }, { "epoch": 217.69736842105263, "grad_norm": 1.4728652238845825, "learning_rate": 0.0001, "loss": 0.0171, "step": 33090 }, { "epoch": 217.76315789473685, "grad_norm": 1.1803345680236816, "learning_rate": 0.0001, "loss": 0.0187, "step": 33100 }, { "epoch": 217.82894736842104, "grad_norm": 1.0455313920974731, "learning_rate": 0.0001, "loss": 0.0163, "step": 33110 }, { "epoch": 217.89473684210526, "grad_norm": 1.290387749671936, "learning_rate": 0.0001, "loss": 0.0169, "step": 33120 }, { "epoch": 217.96052631578948, "grad_norm": 1.0073187351226807, "learning_rate": 0.0001, "loss": 0.0161, "step": 33130 }, { "epoch": 218.02631578947367, "grad_norm": 1.0683584213256836, "learning_rate": 0.0001, "loss": 0.0167, "step": 33140 }, { "epoch": 218.0921052631579, "grad_norm": 1.0977116823196411, "learning_rate": 0.0001, "loss": 0.0172, "step": 33150 }, { "epoch": 218.1578947368421, "grad_norm": 1.2289358377456665, "learning_rate": 0.0001, "loss": 0.0174, "step": 33160 }, { "epoch": 218.22368421052633, "grad_norm": 1.4504176378250122, "learning_rate": 0.0001, "loss": 0.0174, "step": 33170 }, { "epoch": 218.28947368421052, "grad_norm": 1.3620048761367798, "learning_rate": 0.0001, "loss": 0.017, "step": 33180 }, { "epoch": 218.35526315789474, "grad_norm": 1.0863347053527832, "learning_rate": 0.0001, "loss": 0.0166, "step": 33190 }, { "epoch": 218.42105263157896, "grad_norm": 1.5031284093856812, "learning_rate": 0.0001, "loss": 0.0158, "step": 33200 }, { "epoch": 218.48684210526315, "grad_norm": 1.6940463781356812, "learning_rate": 0.0001, "loss": 0.0174, "step": 33210 }, { "epoch": 218.55263157894737, "grad_norm": 1.5782625675201416, "learning_rate": 0.0001, "loss": 0.0157, "step": 33220 }, { "epoch": 218.6184210526316, "grad_norm": 1.5026166439056396, "learning_rate": 0.0001, "loss": 0.0166, "step": 33230 }, { "epoch": 218.68421052631578, "grad_norm": 0.9451717734336853, "learning_rate": 0.0001, "loss": 0.0153, "step": 33240 }, { "epoch": 218.75, "grad_norm": 1.007091999053955, "learning_rate": 0.0001, "loss": 0.0164, "step": 33250 }, { "epoch": 218.81578947368422, "grad_norm": 1.0861047506332397, "learning_rate": 0.0001, "loss": 0.0172, "step": 33260 }, { "epoch": 218.8815789473684, "grad_norm": 1.3030095100402832, "learning_rate": 0.0001, "loss": 0.0158, "step": 33270 }, { "epoch": 218.94736842105263, "grad_norm": 1.0647660493850708, "learning_rate": 0.0001, "loss": 0.0159, "step": 33280 }, { "epoch": 219.01315789473685, "grad_norm": 1.160729169845581, "learning_rate": 0.0001, "loss": 0.0151, "step": 33290 }, { "epoch": 219.07894736842104, "grad_norm": 0.992527425289154, "learning_rate": 0.0001, "loss": 0.0151, "step": 33300 }, { "epoch": 219.14473684210526, "grad_norm": 1.6091036796569824, "learning_rate": 0.0001, "loss": 0.0168, "step": 33310 }, { "epoch": 219.21052631578948, "grad_norm": 1.16289222240448, "learning_rate": 0.0001, "loss": 0.016, "step": 33320 }, { "epoch": 219.27631578947367, "grad_norm": 0.95487380027771, "learning_rate": 0.0001, "loss": 0.0152, "step": 33330 }, { "epoch": 219.3421052631579, "grad_norm": 0.9959771633148193, "learning_rate": 0.0001, "loss": 0.0157, "step": 33340 }, { "epoch": 219.4078947368421, "grad_norm": 1.1076931953430176, "learning_rate": 0.0001, "loss": 0.0158, "step": 33350 }, { "epoch": 219.47368421052633, "grad_norm": 1.2252377271652222, "learning_rate": 0.0001, "loss": 0.0164, "step": 33360 }, { "epoch": 219.53947368421052, "grad_norm": 1.2380799055099487, "learning_rate": 0.0001, "loss": 0.0156, "step": 33370 }, { "epoch": 219.60526315789474, "grad_norm": 1.33864164352417, "learning_rate": 0.0001, "loss": 0.0146, "step": 33380 }, { "epoch": 219.67105263157896, "grad_norm": 1.1309316158294678, "learning_rate": 0.0001, "loss": 0.0148, "step": 33390 }, { "epoch": 219.73684210526315, "grad_norm": 1.1230326890945435, "learning_rate": 0.0001, "loss": 0.0159, "step": 33400 }, { "epoch": 219.80263157894737, "grad_norm": 1.306256890296936, "learning_rate": 0.0001, "loss": 0.0182, "step": 33410 }, { "epoch": 219.8684210526316, "grad_norm": 1.3912127017974854, "learning_rate": 0.0001, "loss": 0.0166, "step": 33420 }, { "epoch": 219.93421052631578, "grad_norm": 1.529556155204773, "learning_rate": 0.0001, "loss": 0.0161, "step": 33430 }, { "epoch": 220.0, "grad_norm": 1.9190083742141724, "learning_rate": 0.0001, "loss": 0.0176, "step": 33440 }, { "epoch": 220.06578947368422, "grad_norm": 1.1662191152572632, "learning_rate": 0.0001, "loss": 0.0147, "step": 33450 }, { "epoch": 220.1315789473684, "grad_norm": 1.6552345752716064, "learning_rate": 0.0001, "loss": 0.0151, "step": 33460 }, { "epoch": 220.19736842105263, "grad_norm": 1.3389320373535156, "learning_rate": 0.0001, "loss": 0.0159, "step": 33470 }, { "epoch": 220.26315789473685, "grad_norm": 1.5310899019241333, "learning_rate": 0.0001, "loss": 0.016, "step": 33480 }, { "epoch": 220.32894736842104, "grad_norm": 1.621925950050354, "learning_rate": 0.0001, "loss": 0.0158, "step": 33490 }, { "epoch": 220.39473684210526, "grad_norm": 1.3250995874404907, "learning_rate": 0.0001, "loss": 0.0158, "step": 33500 }, { "epoch": 220.46052631578948, "grad_norm": 1.3257274627685547, "learning_rate": 0.0001, "loss": 0.0155, "step": 33510 }, { "epoch": 220.52631578947367, "grad_norm": 1.333977460861206, "learning_rate": 0.0001, "loss": 0.0152, "step": 33520 }, { "epoch": 220.5921052631579, "grad_norm": 1.4061793088912964, "learning_rate": 0.0001, "loss": 0.0138, "step": 33530 }, { "epoch": 220.6578947368421, "grad_norm": 1.223670244216919, "learning_rate": 0.0001, "loss": 0.0135, "step": 33540 }, { "epoch": 220.72368421052633, "grad_norm": 1.1718958616256714, "learning_rate": 0.0001, "loss": 0.0183, "step": 33550 }, { "epoch": 220.78947368421052, "grad_norm": 1.171622395515442, "learning_rate": 0.0001, "loss": 0.0145, "step": 33560 }, { "epoch": 220.85526315789474, "grad_norm": 1.121756911277771, "learning_rate": 0.0001, "loss": 0.0148, "step": 33570 }, { "epoch": 220.92105263157896, "grad_norm": 1.4940133094787598, "learning_rate": 0.0001, "loss": 0.0149, "step": 33580 }, { "epoch": 220.98684210526315, "grad_norm": 0.976639449596405, "learning_rate": 0.0001, "loss": 0.0164, "step": 33590 }, { "epoch": 221.05263157894737, "grad_norm": 1.2112866640090942, "learning_rate": 0.0001, "loss": 0.0158, "step": 33600 }, { "epoch": 221.1184210526316, "grad_norm": 1.3244744539260864, "learning_rate": 0.0001, "loss": 0.0153, "step": 33610 }, { "epoch": 221.18421052631578, "grad_norm": 1.2948193550109863, "learning_rate": 0.0001, "loss": 0.0155, "step": 33620 }, { "epoch": 221.25, "grad_norm": 1.39680016040802, "learning_rate": 0.0001, "loss": 0.0171, "step": 33630 }, { "epoch": 221.31578947368422, "grad_norm": 1.0296803712844849, "learning_rate": 0.0001, "loss": 0.0143, "step": 33640 }, { "epoch": 221.3815789473684, "grad_norm": 1.1246095895767212, "learning_rate": 0.0001, "loss": 0.0158, "step": 33650 }, { "epoch": 221.44736842105263, "grad_norm": 1.0090177059173584, "learning_rate": 0.0001, "loss": 0.0153, "step": 33660 }, { "epoch": 221.51315789473685, "grad_norm": 1.1224722862243652, "learning_rate": 0.0001, "loss": 0.0169, "step": 33670 }, { "epoch": 221.57894736842104, "grad_norm": 1.6427732706069946, "learning_rate": 0.0001, "loss": 0.0169, "step": 33680 }, { "epoch": 221.64473684210526, "grad_norm": 1.3435227870941162, "learning_rate": 0.0001, "loss": 0.0181, "step": 33690 }, { "epoch": 221.71052631578948, "grad_norm": 1.7769924402236938, "learning_rate": 0.0001, "loss": 0.0159, "step": 33700 }, { "epoch": 221.77631578947367, "grad_norm": 1.2814626693725586, "learning_rate": 0.0001, "loss": 0.0166, "step": 33710 }, { "epoch": 221.8421052631579, "grad_norm": 1.3737503290176392, "learning_rate": 0.0001, "loss": 0.0152, "step": 33720 }, { "epoch": 221.9078947368421, "grad_norm": 1.2347252368927002, "learning_rate": 0.0001, "loss": 0.0163, "step": 33730 }, { "epoch": 221.97368421052633, "grad_norm": 1.3277086019515991, "learning_rate": 0.0001, "loss": 0.0175, "step": 33740 }, { "epoch": 222.03947368421052, "grad_norm": 1.7966270446777344, "learning_rate": 0.0001, "loss": 0.0149, "step": 33750 }, { "epoch": 222.10526315789474, "grad_norm": 1.3284343481063843, "learning_rate": 0.0001, "loss": 0.0163, "step": 33760 }, { "epoch": 222.17105263157896, "grad_norm": 1.6279863119125366, "learning_rate": 0.0001, "loss": 0.0153, "step": 33770 }, { "epoch": 222.23684210526315, "grad_norm": 1.5999658107757568, "learning_rate": 0.0001, "loss": 0.0166, "step": 33780 }, { "epoch": 222.30263157894737, "grad_norm": 1.5054606199264526, "learning_rate": 0.0001, "loss": 0.0157, "step": 33790 }, { "epoch": 222.3684210526316, "grad_norm": 1.4686840772628784, "learning_rate": 0.0001, "loss": 0.0166, "step": 33800 }, { "epoch": 222.43421052631578, "grad_norm": 1.1565569639205933, "learning_rate": 0.0001, "loss": 0.0167, "step": 33810 }, { "epoch": 222.5, "grad_norm": 1.396141529083252, "learning_rate": 0.0001, "loss": 0.0158, "step": 33820 }, { "epoch": 222.56578947368422, "grad_norm": 1.1285852193832397, "learning_rate": 0.0001, "loss": 0.0154, "step": 33830 }, { "epoch": 222.6315789473684, "grad_norm": 1.093670129776001, "learning_rate": 0.0001, "loss": 0.0163, "step": 33840 }, { "epoch": 222.69736842105263, "grad_norm": 1.443837285041809, "learning_rate": 0.0001, "loss": 0.015, "step": 33850 }, { "epoch": 222.76315789473685, "grad_norm": 1.3673397302627563, "learning_rate": 0.0001, "loss": 0.0183, "step": 33860 }, { "epoch": 222.82894736842104, "grad_norm": 1.2065106630325317, "learning_rate": 0.0001, "loss": 0.0154, "step": 33870 }, { "epoch": 222.89473684210526, "grad_norm": 1.3784736394882202, "learning_rate": 0.0001, "loss": 0.0144, "step": 33880 }, { "epoch": 222.96052631578948, "grad_norm": 1.6203807592391968, "learning_rate": 0.0001, "loss": 0.016, "step": 33890 }, { "epoch": 223.02631578947367, "grad_norm": 1.314103126525879, "learning_rate": 0.0001, "loss": 0.016, "step": 33900 }, { "epoch": 223.0921052631579, "grad_norm": 1.6348367929458618, "learning_rate": 0.0001, "loss": 0.0159, "step": 33910 }, { "epoch": 223.1578947368421, "grad_norm": 1.20957350730896, "learning_rate": 0.0001, "loss": 0.0164, "step": 33920 }, { "epoch": 223.22368421052633, "grad_norm": 1.1228580474853516, "learning_rate": 0.0001, "loss": 0.0167, "step": 33930 }, { "epoch": 223.28947368421052, "grad_norm": 1.4203588962554932, "learning_rate": 0.0001, "loss": 0.0167, "step": 33940 }, { "epoch": 223.35526315789474, "grad_norm": 1.2360961437225342, "learning_rate": 0.0001, "loss": 0.0148, "step": 33950 }, { "epoch": 223.42105263157896, "grad_norm": 0.9158451557159424, "learning_rate": 0.0001, "loss": 0.0151, "step": 33960 }, { "epoch": 223.48684210526315, "grad_norm": 1.6221871376037598, "learning_rate": 0.0001, "loss": 0.0157, "step": 33970 }, { "epoch": 223.55263157894737, "grad_norm": 1.559161901473999, "learning_rate": 0.0001, "loss": 0.0157, "step": 33980 }, { "epoch": 223.6184210526316, "grad_norm": 1.876190423965454, "learning_rate": 0.0001, "loss": 0.0168, "step": 33990 }, { "epoch": 223.68421052631578, "grad_norm": 1.5894197225570679, "learning_rate": 0.0001, "loss": 0.0146, "step": 34000 }, { "epoch": 223.75, "grad_norm": 1.2699931859970093, "learning_rate": 0.0001, "loss": 0.0164, "step": 34010 }, { "epoch": 223.81578947368422, "grad_norm": 1.1223138570785522, "learning_rate": 0.0001, "loss": 0.017, "step": 34020 }, { "epoch": 223.8815789473684, "grad_norm": 1.2991082668304443, "learning_rate": 0.0001, "loss": 0.0141, "step": 34030 }, { "epoch": 223.94736842105263, "grad_norm": 1.1831358671188354, "learning_rate": 0.0001, "loss": 0.0147, "step": 34040 }, { "epoch": 224.01315789473685, "grad_norm": 0.8384124636650085, "learning_rate": 0.0001, "loss": 0.0167, "step": 34050 }, { "epoch": 224.07894736842104, "grad_norm": 1.0878987312316895, "learning_rate": 0.0001, "loss": 0.0147, "step": 34060 }, { "epoch": 224.14473684210526, "grad_norm": 1.440589189529419, "learning_rate": 0.0001, "loss": 0.0166, "step": 34070 }, { "epoch": 224.21052631578948, "grad_norm": 1.6876184940338135, "learning_rate": 0.0001, "loss": 0.0176, "step": 34080 }, { "epoch": 224.27631578947367, "grad_norm": 1.424138069152832, "learning_rate": 0.0001, "loss": 0.0162, "step": 34090 }, { "epoch": 224.3421052631579, "grad_norm": 1.2443312406539917, "learning_rate": 0.0001, "loss": 0.0163, "step": 34100 }, { "epoch": 224.4078947368421, "grad_norm": 1.3643524646759033, "learning_rate": 0.0001, "loss": 0.0154, "step": 34110 }, { "epoch": 224.47368421052633, "grad_norm": 1.1311956644058228, "learning_rate": 0.0001, "loss": 0.017, "step": 34120 }, { "epoch": 224.53947368421052, "grad_norm": 1.0207782983779907, "learning_rate": 0.0001, "loss": 0.017, "step": 34130 }, { "epoch": 224.60526315789474, "grad_norm": 1.3237141370773315, "learning_rate": 0.0001, "loss": 0.0159, "step": 34140 }, { "epoch": 224.67105263157896, "grad_norm": 1.5647715330123901, "learning_rate": 0.0001, "loss": 0.0173, "step": 34150 }, { "epoch": 224.73684210526315, "grad_norm": 1.4168190956115723, "learning_rate": 0.0001, "loss": 0.0157, "step": 34160 }, { "epoch": 224.80263157894737, "grad_norm": 1.3588078022003174, "learning_rate": 0.0001, "loss": 0.0152, "step": 34170 }, { "epoch": 224.8684210526316, "grad_norm": 1.0193902254104614, "learning_rate": 0.0001, "loss": 0.0157, "step": 34180 }, { "epoch": 224.93421052631578, "grad_norm": 1.3121767044067383, "learning_rate": 0.0001, "loss": 0.0141, "step": 34190 }, { "epoch": 225.0, "grad_norm": 1.4055311679840088, "learning_rate": 0.0001, "loss": 0.0155, "step": 34200 }, { "epoch": 225.06578947368422, "grad_norm": 1.3310362100601196, "learning_rate": 0.0001, "loss": 0.0169, "step": 34210 }, { "epoch": 225.1315789473684, "grad_norm": 1.2854124307632446, "learning_rate": 0.0001, "loss": 0.0178, "step": 34220 }, { "epoch": 225.19736842105263, "grad_norm": 1.1616575717926025, "learning_rate": 0.0001, "loss": 0.0146, "step": 34230 }, { "epoch": 225.26315789473685, "grad_norm": 1.6716482639312744, "learning_rate": 0.0001, "loss": 0.0161, "step": 34240 }, { "epoch": 225.32894736842104, "grad_norm": 1.0361127853393555, "learning_rate": 0.0001, "loss": 0.0162, "step": 34250 }, { "epoch": 225.39473684210526, "grad_norm": 1.1146401166915894, "learning_rate": 0.0001, "loss": 0.0145, "step": 34260 }, { "epoch": 225.46052631578948, "grad_norm": 1.2672135829925537, "learning_rate": 0.0001, "loss": 0.0161, "step": 34270 }, { "epoch": 225.52631578947367, "grad_norm": 1.557548999786377, "learning_rate": 0.0001, "loss": 0.0156, "step": 34280 }, { "epoch": 225.5921052631579, "grad_norm": 1.0384529829025269, "learning_rate": 0.0001, "loss": 0.0163, "step": 34290 }, { "epoch": 225.6578947368421, "grad_norm": 1.3350151777267456, "learning_rate": 0.0001, "loss": 0.0158, "step": 34300 }, { "epoch": 225.72368421052633, "grad_norm": 1.3474798202514648, "learning_rate": 0.0001, "loss": 0.0156, "step": 34310 }, { "epoch": 225.78947368421052, "grad_norm": 0.9546133875846863, "learning_rate": 0.0001, "loss": 0.0155, "step": 34320 }, { "epoch": 225.85526315789474, "grad_norm": 1.325697660446167, "learning_rate": 0.0001, "loss": 0.0153, "step": 34330 }, { "epoch": 225.92105263157896, "grad_norm": 1.2987803220748901, "learning_rate": 0.0001, "loss": 0.0165, "step": 34340 }, { "epoch": 225.98684210526315, "grad_norm": 1.4725877046585083, "learning_rate": 0.0001, "loss": 0.017, "step": 34350 }, { "epoch": 226.05263157894737, "grad_norm": 1.2885562181472778, "learning_rate": 0.0001, "loss": 0.0168, "step": 34360 }, { "epoch": 226.1184210526316, "grad_norm": 1.1747262477874756, "learning_rate": 0.0001, "loss": 0.015, "step": 34370 }, { "epoch": 226.18421052631578, "grad_norm": 1.6619282960891724, "learning_rate": 0.0001, "loss": 0.0161, "step": 34380 }, { "epoch": 226.25, "grad_norm": 1.142620325088501, "learning_rate": 0.0001, "loss": 0.0152, "step": 34390 }, { "epoch": 226.31578947368422, "grad_norm": 1.11537766456604, "learning_rate": 0.0001, "loss": 0.0145, "step": 34400 }, { "epoch": 226.3815789473684, "grad_norm": 0.9072920083999634, "learning_rate": 0.0001, "loss": 0.0167, "step": 34410 }, { "epoch": 226.44736842105263, "grad_norm": 0.9555834531784058, "learning_rate": 0.0001, "loss": 0.017, "step": 34420 }, { "epoch": 226.51315789473685, "grad_norm": 1.3261579275131226, "learning_rate": 0.0001, "loss": 0.0164, "step": 34430 }, { "epoch": 226.57894736842104, "grad_norm": 1.6103168725967407, "learning_rate": 0.0001, "loss": 0.0158, "step": 34440 }, { "epoch": 226.64473684210526, "grad_norm": 1.1931551694869995, "learning_rate": 0.0001, "loss": 0.0168, "step": 34450 }, { "epoch": 226.71052631578948, "grad_norm": 1.633232593536377, "learning_rate": 0.0001, "loss": 0.0184, "step": 34460 }, { "epoch": 226.77631578947367, "grad_norm": 1.4821163415908813, "learning_rate": 0.0001, "loss": 0.0151, "step": 34470 }, { "epoch": 226.8421052631579, "grad_norm": 0.9790869951248169, "learning_rate": 0.0001, "loss": 0.0141, "step": 34480 }, { "epoch": 226.9078947368421, "grad_norm": 0.8639764785766602, "learning_rate": 0.0001, "loss": 0.0155, "step": 34490 }, { "epoch": 226.97368421052633, "grad_norm": 1.0887365341186523, "learning_rate": 0.0001, "loss": 0.0157, "step": 34500 }, { "epoch": 227.03947368421052, "grad_norm": 0.8437522649765015, "learning_rate": 0.0001, "loss": 0.0143, "step": 34510 }, { "epoch": 227.10526315789474, "grad_norm": 1.2919409275054932, "learning_rate": 0.0001, "loss": 0.014, "step": 34520 }, { "epoch": 227.17105263157896, "grad_norm": 1.2690752744674683, "learning_rate": 0.0001, "loss": 0.0157, "step": 34530 }, { "epoch": 227.23684210526315, "grad_norm": 1.1717513799667358, "learning_rate": 0.0001, "loss": 0.0152, "step": 34540 }, { "epoch": 227.30263157894737, "grad_norm": 1.1742141246795654, "learning_rate": 0.0001, "loss": 0.0153, "step": 34550 }, { "epoch": 227.3684210526316, "grad_norm": 1.2285854816436768, "learning_rate": 0.0001, "loss": 0.0162, "step": 34560 }, { "epoch": 227.43421052631578, "grad_norm": 1.6287882328033447, "learning_rate": 0.0001, "loss": 0.0156, "step": 34570 }, { "epoch": 227.5, "grad_norm": 1.4671905040740967, "learning_rate": 0.0001, "loss": 0.0182, "step": 34580 }, { "epoch": 227.56578947368422, "grad_norm": 1.2717926502227783, "learning_rate": 0.0001, "loss": 0.0173, "step": 34590 }, { "epoch": 227.6315789473684, "grad_norm": 1.2138713598251343, "learning_rate": 0.0001, "loss": 0.0151, "step": 34600 }, { "epoch": 227.69736842105263, "grad_norm": 1.219657301902771, "learning_rate": 0.0001, "loss": 0.0156, "step": 34610 }, { "epoch": 227.76315789473685, "grad_norm": 1.2700283527374268, "learning_rate": 0.0001, "loss": 0.016, "step": 34620 }, { "epoch": 227.82894736842104, "grad_norm": 1.2568398714065552, "learning_rate": 0.0001, "loss": 0.0158, "step": 34630 }, { "epoch": 227.89473684210526, "grad_norm": 1.308393120765686, "learning_rate": 0.0001, "loss": 0.016, "step": 34640 }, { "epoch": 227.96052631578948, "grad_norm": 1.6060470342636108, "learning_rate": 0.0001, "loss": 0.0144, "step": 34650 }, { "epoch": 228.02631578947367, "grad_norm": 1.4891085624694824, "learning_rate": 0.0001, "loss": 0.0156, "step": 34660 }, { "epoch": 228.0921052631579, "grad_norm": 1.2525702714920044, "learning_rate": 0.0001, "loss": 0.0146, "step": 34670 }, { "epoch": 228.1578947368421, "grad_norm": 1.2235273122787476, "learning_rate": 0.0001, "loss": 0.0156, "step": 34680 }, { "epoch": 228.22368421052633, "grad_norm": 1.2164841890335083, "learning_rate": 0.0001, "loss": 0.0151, "step": 34690 }, { "epoch": 228.28947368421052, "grad_norm": 1.487396001815796, "learning_rate": 0.0001, "loss": 0.0168, "step": 34700 }, { "epoch": 228.35526315789474, "grad_norm": 1.3770133256912231, "learning_rate": 0.0001, "loss": 0.0148, "step": 34710 }, { "epoch": 228.42105263157896, "grad_norm": 1.5073363780975342, "learning_rate": 0.0001, "loss": 0.0168, "step": 34720 }, { "epoch": 228.48684210526315, "grad_norm": 1.1958956718444824, "learning_rate": 0.0001, "loss": 0.0144, "step": 34730 }, { "epoch": 228.55263157894737, "grad_norm": 1.1365365982055664, "learning_rate": 0.0001, "loss": 0.0151, "step": 34740 }, { "epoch": 228.6184210526316, "grad_norm": 0.9599331021308899, "learning_rate": 0.0001, "loss": 0.0159, "step": 34750 }, { "epoch": 228.68421052631578, "grad_norm": 0.7286871671676636, "learning_rate": 0.0001, "loss": 0.0166, "step": 34760 }, { "epoch": 228.75, "grad_norm": 0.8963000774383545, "learning_rate": 0.0001, "loss": 0.0171, "step": 34770 }, { "epoch": 228.81578947368422, "grad_norm": 1.069594144821167, "learning_rate": 0.0001, "loss": 0.0164, "step": 34780 }, { "epoch": 228.8815789473684, "grad_norm": 1.107754111289978, "learning_rate": 0.0001, "loss": 0.0164, "step": 34790 }, { "epoch": 228.94736842105263, "grad_norm": 0.8711170554161072, "learning_rate": 0.0001, "loss": 0.0155, "step": 34800 }, { "epoch": 229.01315789473685, "grad_norm": 1.2165173292160034, "learning_rate": 0.0001, "loss": 0.018, "step": 34810 }, { "epoch": 229.07894736842104, "grad_norm": 1.3709280490875244, "learning_rate": 0.0001, "loss": 0.0145, "step": 34820 }, { "epoch": 229.14473684210526, "grad_norm": 0.9676075577735901, "learning_rate": 0.0001, "loss": 0.0162, "step": 34830 }, { "epoch": 229.21052631578948, "grad_norm": 1.192239761352539, "learning_rate": 0.0001, "loss": 0.0174, "step": 34840 }, { "epoch": 229.27631578947367, "grad_norm": 1.1368480920791626, "learning_rate": 0.0001, "loss": 0.018, "step": 34850 }, { "epoch": 229.3421052631579, "grad_norm": 1.0366135835647583, "learning_rate": 0.0001, "loss": 0.0148, "step": 34860 }, { "epoch": 229.4078947368421, "grad_norm": 1.52436101436615, "learning_rate": 0.0001, "loss": 0.0152, "step": 34870 }, { "epoch": 229.47368421052633, "grad_norm": 1.2649922370910645, "learning_rate": 0.0001, "loss": 0.0169, "step": 34880 }, { "epoch": 229.53947368421052, "grad_norm": 1.4526771306991577, "learning_rate": 0.0001, "loss": 0.0151, "step": 34890 }, { "epoch": 229.60526315789474, "grad_norm": 1.317568063735962, "learning_rate": 0.0001, "loss": 0.0142, "step": 34900 }, { "epoch": 229.67105263157896, "grad_norm": 1.5620092153549194, "learning_rate": 0.0001, "loss": 0.0153, "step": 34910 }, { "epoch": 229.73684210526315, "grad_norm": 1.2714239358901978, "learning_rate": 0.0001, "loss": 0.0154, "step": 34920 }, { "epoch": 229.80263157894737, "grad_norm": 1.5597683191299438, "learning_rate": 0.0001, "loss": 0.0161, "step": 34930 }, { "epoch": 229.8684210526316, "grad_norm": 1.3051207065582275, "learning_rate": 0.0001, "loss": 0.0149, "step": 34940 }, { "epoch": 229.93421052631578, "grad_norm": 1.688822865486145, "learning_rate": 0.0001, "loss": 0.0169, "step": 34950 }, { "epoch": 230.0, "grad_norm": 1.4721719026565552, "learning_rate": 0.0001, "loss": 0.0141, "step": 34960 }, { "epoch": 230.06578947368422, "grad_norm": 1.6833611726760864, "learning_rate": 0.0001, "loss": 0.0158, "step": 34970 }, { "epoch": 230.1315789473684, "grad_norm": 1.749559998512268, "learning_rate": 0.0001, "loss": 0.0162, "step": 34980 }, { "epoch": 230.19736842105263, "grad_norm": 1.0270739793777466, "learning_rate": 0.0001, "loss": 0.0131, "step": 34990 }, { "epoch": 230.26315789473685, "grad_norm": 1.5380287170410156, "learning_rate": 0.0001, "loss": 0.0156, "step": 35000 }, { "epoch": 230.32894736842104, "grad_norm": 1.2859152555465698, "learning_rate": 0.0001, "loss": 0.0166, "step": 35010 }, { "epoch": 230.39473684210526, "grad_norm": 1.2448034286499023, "learning_rate": 0.0001, "loss": 0.014, "step": 35020 }, { "epoch": 230.46052631578948, "grad_norm": 1.1096408367156982, "learning_rate": 0.0001, "loss": 0.0144, "step": 35030 }, { "epoch": 230.52631578947367, "grad_norm": 1.2876746654510498, "learning_rate": 0.0001, "loss": 0.0138, "step": 35040 }, { "epoch": 230.5921052631579, "grad_norm": 1.4161434173583984, "learning_rate": 0.0001, "loss": 0.0145, "step": 35050 }, { "epoch": 230.6578947368421, "grad_norm": 1.1465400457382202, "learning_rate": 0.0001, "loss": 0.0146, "step": 35060 }, { "epoch": 230.72368421052633, "grad_norm": 1.4166468381881714, "learning_rate": 0.0001, "loss": 0.0164, "step": 35070 }, { "epoch": 230.78947368421052, "grad_norm": 1.3646936416625977, "learning_rate": 0.0001, "loss": 0.0159, "step": 35080 }, { "epoch": 230.85526315789474, "grad_norm": 1.3088558912277222, "learning_rate": 0.0001, "loss": 0.0147, "step": 35090 }, { "epoch": 230.92105263157896, "grad_norm": 1.1793044805526733, "learning_rate": 0.0001, "loss": 0.0158, "step": 35100 }, { "epoch": 230.98684210526315, "grad_norm": 1.1064832210540771, "learning_rate": 0.0001, "loss": 0.0138, "step": 35110 }, { "epoch": 231.05263157894737, "grad_norm": 1.2353724241256714, "learning_rate": 0.0001, "loss": 0.0146, "step": 35120 }, { "epoch": 231.1184210526316, "grad_norm": 1.4452154636383057, "learning_rate": 0.0001, "loss": 0.0151, "step": 35130 }, { "epoch": 231.18421052631578, "grad_norm": 1.1226993799209595, "learning_rate": 0.0001, "loss": 0.015, "step": 35140 }, { "epoch": 231.25, "grad_norm": 1.0637484788894653, "learning_rate": 0.0001, "loss": 0.0161, "step": 35150 }, { "epoch": 231.31578947368422, "grad_norm": 0.9938138723373413, "learning_rate": 0.0001, "loss": 0.0165, "step": 35160 }, { "epoch": 231.3815789473684, "grad_norm": 1.5435380935668945, "learning_rate": 0.0001, "loss": 0.0176, "step": 35170 }, { "epoch": 231.44736842105263, "grad_norm": 1.042462944984436, "learning_rate": 0.0001, "loss": 0.0148, "step": 35180 }, { "epoch": 231.51315789473685, "grad_norm": 1.0510119199752808, "learning_rate": 0.0001, "loss": 0.0161, "step": 35190 }, { "epoch": 231.57894736842104, "grad_norm": 1.3831288814544678, "learning_rate": 0.0001, "loss": 0.0155, "step": 35200 }, { "epoch": 231.64473684210526, "grad_norm": 1.462022066116333, "learning_rate": 0.0001, "loss": 0.0156, "step": 35210 }, { "epoch": 231.71052631578948, "grad_norm": 1.0927327871322632, "learning_rate": 0.0001, "loss": 0.0178, "step": 35220 }, { "epoch": 231.77631578947367, "grad_norm": 1.1333094835281372, "learning_rate": 0.0001, "loss": 0.0143, "step": 35230 }, { "epoch": 231.8421052631579, "grad_norm": 1.2732223272323608, "learning_rate": 0.0001, "loss": 0.0149, "step": 35240 }, { "epoch": 231.9078947368421, "grad_norm": 1.2944512367248535, "learning_rate": 0.0001, "loss": 0.0175, "step": 35250 }, { "epoch": 231.97368421052633, "grad_norm": 1.2028173208236694, "learning_rate": 0.0001, "loss": 0.0165, "step": 35260 }, { "epoch": 232.03947368421052, "grad_norm": 1.0721412897109985, "learning_rate": 0.0001, "loss": 0.0159, "step": 35270 }, { "epoch": 232.10526315789474, "grad_norm": 1.3471068143844604, "learning_rate": 0.0001, "loss": 0.0152, "step": 35280 }, { "epoch": 232.17105263157896, "grad_norm": 1.085255742073059, "learning_rate": 0.0001, "loss": 0.0182, "step": 35290 }, { "epoch": 232.23684210526315, "grad_norm": 1.335943579673767, "learning_rate": 0.0001, "loss": 0.0161, "step": 35300 }, { "epoch": 232.30263157894737, "grad_norm": 1.0885100364685059, "learning_rate": 0.0001, "loss": 0.0157, "step": 35310 }, { "epoch": 232.3684210526316, "grad_norm": 1.2813364267349243, "learning_rate": 0.0001, "loss": 0.0186, "step": 35320 }, { "epoch": 232.43421052631578, "grad_norm": 1.1324126720428467, "learning_rate": 0.0001, "loss": 0.0169, "step": 35330 }, { "epoch": 232.5, "grad_norm": 1.5118268728256226, "learning_rate": 0.0001, "loss": 0.0158, "step": 35340 }, { "epoch": 232.56578947368422, "grad_norm": 1.3967148065567017, "learning_rate": 0.0001, "loss": 0.0151, "step": 35350 }, { "epoch": 232.6315789473684, "grad_norm": 1.3626654148101807, "learning_rate": 0.0001, "loss": 0.0159, "step": 35360 }, { "epoch": 232.69736842105263, "grad_norm": 0.9588110446929932, "learning_rate": 0.0001, "loss": 0.0164, "step": 35370 }, { "epoch": 232.76315789473685, "grad_norm": 1.7593961954116821, "learning_rate": 0.0001, "loss": 0.0161, "step": 35380 }, { "epoch": 232.82894736842104, "grad_norm": 1.4406646490097046, "learning_rate": 0.0001, "loss": 0.0153, "step": 35390 }, { "epoch": 232.89473684210526, "grad_norm": 1.0357986688613892, "learning_rate": 0.0001, "loss": 0.0151, "step": 35400 }, { "epoch": 232.96052631578948, "grad_norm": 1.051256775856018, "learning_rate": 0.0001, "loss": 0.0153, "step": 35410 }, { "epoch": 233.02631578947367, "grad_norm": 1.338333249092102, "learning_rate": 0.0001, "loss": 0.0164, "step": 35420 }, { "epoch": 233.0921052631579, "grad_norm": 1.0235182046890259, "learning_rate": 0.0001, "loss": 0.0146, "step": 35430 }, { "epoch": 233.1578947368421, "grad_norm": 1.265726923942566, "learning_rate": 0.0001, "loss": 0.014, "step": 35440 }, { "epoch": 233.22368421052633, "grad_norm": 1.3721272945404053, "learning_rate": 0.0001, "loss": 0.0156, "step": 35450 }, { "epoch": 233.28947368421052, "grad_norm": 1.0401647090911865, "learning_rate": 0.0001, "loss": 0.0161, "step": 35460 }, { "epoch": 233.35526315789474, "grad_norm": 1.3844105005264282, "learning_rate": 0.0001, "loss": 0.0156, "step": 35470 }, { "epoch": 233.42105263157896, "grad_norm": 1.977495551109314, "learning_rate": 0.0001, "loss": 0.0187, "step": 35480 }, { "epoch": 233.48684210526315, "grad_norm": 1.13108229637146, "learning_rate": 0.0001, "loss": 0.0191, "step": 35490 }, { "epoch": 233.55263157894737, "grad_norm": 1.4336754083633423, "learning_rate": 0.0001, "loss": 0.0174, "step": 35500 }, { "epoch": 233.6184210526316, "grad_norm": 1.2272390127182007, "learning_rate": 0.0001, "loss": 0.0151, "step": 35510 }, { "epoch": 233.68421052631578, "grad_norm": 0.978657066822052, "learning_rate": 0.0001, "loss": 0.0171, "step": 35520 }, { "epoch": 233.75, "grad_norm": 1.2305833101272583, "learning_rate": 0.0001, "loss": 0.016, "step": 35530 }, { "epoch": 233.81578947368422, "grad_norm": 1.1648582220077515, "learning_rate": 0.0001, "loss": 0.0183, "step": 35540 }, { "epoch": 233.8815789473684, "grad_norm": 1.3043948411941528, "learning_rate": 0.0001, "loss": 0.0164, "step": 35550 }, { "epoch": 233.94736842105263, "grad_norm": 1.4503754377365112, "learning_rate": 0.0001, "loss": 0.0154, "step": 35560 }, { "epoch": 234.01315789473685, "grad_norm": 1.3145157098770142, "learning_rate": 0.0001, "loss": 0.015, "step": 35570 }, { "epoch": 234.07894736842104, "grad_norm": 0.9834286570549011, "learning_rate": 0.0001, "loss": 0.0148, "step": 35580 }, { "epoch": 234.14473684210526, "grad_norm": 1.5456626415252686, "learning_rate": 0.0001, "loss": 0.018, "step": 35590 }, { "epoch": 234.21052631578948, "grad_norm": 1.1707838773727417, "learning_rate": 0.0001, "loss": 0.0167, "step": 35600 }, { "epoch": 234.27631578947367, "grad_norm": 1.4479657411575317, "learning_rate": 0.0001, "loss": 0.0164, "step": 35610 }, { "epoch": 234.3421052631579, "grad_norm": 1.352433204650879, "learning_rate": 0.0001, "loss": 0.0161, "step": 35620 }, { "epoch": 234.4078947368421, "grad_norm": 1.1182633638381958, "learning_rate": 0.0001, "loss": 0.0143, "step": 35630 }, { "epoch": 234.47368421052633, "grad_norm": 0.9117094278335571, "learning_rate": 0.0001, "loss": 0.0177, "step": 35640 }, { "epoch": 234.53947368421052, "grad_norm": 1.345800757408142, "learning_rate": 0.0001, "loss": 0.0157, "step": 35650 }, { "epoch": 234.60526315789474, "grad_norm": 1.145963430404663, "learning_rate": 0.0001, "loss": 0.0154, "step": 35660 }, { "epoch": 234.67105263157896, "grad_norm": 0.9094061851501465, "learning_rate": 0.0001, "loss": 0.0163, "step": 35670 }, { "epoch": 234.73684210526315, "grad_norm": 1.2878849506378174, "learning_rate": 0.0001, "loss": 0.0142, "step": 35680 }, { "epoch": 234.80263157894737, "grad_norm": 1.4541486501693726, "learning_rate": 0.0001, "loss": 0.0148, "step": 35690 }, { "epoch": 234.8684210526316, "grad_norm": 0.971204400062561, "learning_rate": 0.0001, "loss": 0.0135, "step": 35700 }, { "epoch": 234.93421052631578, "grad_norm": 1.1860767602920532, "learning_rate": 0.0001, "loss": 0.0184, "step": 35710 }, { "epoch": 235.0, "grad_norm": 1.2985316514968872, "learning_rate": 0.0001, "loss": 0.0162, "step": 35720 }, { "epoch": 235.06578947368422, "grad_norm": 1.0510177612304688, "learning_rate": 0.0001, "loss": 0.0157, "step": 35730 }, { "epoch": 235.1315789473684, "grad_norm": 0.9717345833778381, "learning_rate": 0.0001, "loss": 0.0158, "step": 35740 }, { "epoch": 235.19736842105263, "grad_norm": 1.131279468536377, "learning_rate": 0.0001, "loss": 0.0158, "step": 35750 }, { "epoch": 235.26315789473685, "grad_norm": 1.148485541343689, "learning_rate": 0.0001, "loss": 0.0159, "step": 35760 }, { "epoch": 235.32894736842104, "grad_norm": 0.8427010774612427, "learning_rate": 0.0001, "loss": 0.0166, "step": 35770 }, { "epoch": 235.39473684210526, "grad_norm": 0.973970890045166, "learning_rate": 0.0001, "loss": 0.0151, "step": 35780 }, { "epoch": 235.46052631578948, "grad_norm": 0.9726606607437134, "learning_rate": 0.0001, "loss": 0.0161, "step": 35790 }, { "epoch": 235.52631578947367, "grad_norm": 0.849587082862854, "learning_rate": 0.0001, "loss": 0.0149, "step": 35800 }, { "epoch": 235.5921052631579, "grad_norm": 1.053238868713379, "learning_rate": 0.0001, "loss": 0.0162, "step": 35810 }, { "epoch": 235.6578947368421, "grad_norm": 1.2742220163345337, "learning_rate": 0.0001, "loss": 0.0173, "step": 35820 }, { "epoch": 235.72368421052633, "grad_norm": 1.3794277906417847, "learning_rate": 0.0001, "loss": 0.0165, "step": 35830 }, { "epoch": 235.78947368421052, "grad_norm": 1.2692276239395142, "learning_rate": 0.0001, "loss": 0.0165, "step": 35840 }, { "epoch": 235.85526315789474, "grad_norm": 1.1622906923294067, "learning_rate": 0.0001, "loss": 0.0159, "step": 35850 }, { "epoch": 235.92105263157896, "grad_norm": 1.1229708194732666, "learning_rate": 0.0001, "loss": 0.0159, "step": 35860 }, { "epoch": 235.98684210526315, "grad_norm": 1.1721322536468506, "learning_rate": 0.0001, "loss": 0.0147, "step": 35870 }, { "epoch": 236.05263157894737, "grad_norm": 1.2149195671081543, "learning_rate": 0.0001, "loss": 0.0158, "step": 35880 }, { "epoch": 236.1184210526316, "grad_norm": 1.552794337272644, "learning_rate": 0.0001, "loss": 0.0153, "step": 35890 }, { "epoch": 236.18421052631578, "grad_norm": 0.9946601986885071, "learning_rate": 0.0001, "loss": 0.0144, "step": 35900 }, { "epoch": 236.25, "grad_norm": 1.1157256364822388, "learning_rate": 0.0001, "loss": 0.0152, "step": 35910 }, { "epoch": 236.31578947368422, "grad_norm": 1.2444145679473877, "learning_rate": 0.0001, "loss": 0.0162, "step": 35920 }, { "epoch": 236.3815789473684, "grad_norm": 1.2110340595245361, "learning_rate": 0.0001, "loss": 0.015, "step": 35930 }, { "epoch": 236.44736842105263, "grad_norm": 1.4335511922836304, "learning_rate": 0.0001, "loss": 0.015, "step": 35940 }, { "epoch": 236.51315789473685, "grad_norm": 1.3083791732788086, "learning_rate": 0.0001, "loss": 0.0177, "step": 35950 }, { "epoch": 236.57894736842104, "grad_norm": 0.8935396075248718, "learning_rate": 0.0001, "loss": 0.0136, "step": 35960 }, { "epoch": 236.64473684210526, "grad_norm": 0.9433857798576355, "learning_rate": 0.0001, "loss": 0.0153, "step": 35970 }, { "epoch": 236.71052631578948, "grad_norm": 1.1531612873077393, "learning_rate": 0.0001, "loss": 0.0169, "step": 35980 }, { "epoch": 236.77631578947367, "grad_norm": 1.431472659111023, "learning_rate": 0.0001, "loss": 0.0146, "step": 35990 }, { "epoch": 236.8421052631579, "grad_norm": 1.6425496339797974, "learning_rate": 0.0001, "loss": 0.0174, "step": 36000 }, { "epoch": 236.9078947368421, "grad_norm": 1.0566476583480835, "learning_rate": 0.0001, "loss": 0.0161, "step": 36010 }, { "epoch": 236.97368421052633, "grad_norm": 0.8310641050338745, "learning_rate": 0.0001, "loss": 0.0204, "step": 36020 }, { "epoch": 237.03947368421052, "grad_norm": 1.0086852312088013, "learning_rate": 0.0001, "loss": 0.0171, "step": 36030 }, { "epoch": 237.10526315789474, "grad_norm": 1.44770085811615, "learning_rate": 0.0001, "loss": 0.0196, "step": 36040 }, { "epoch": 237.17105263157896, "grad_norm": 1.002258539199829, "learning_rate": 0.0001, "loss": 0.0182, "step": 36050 }, { "epoch": 237.23684210526315, "grad_norm": 1.0949816703796387, "learning_rate": 0.0001, "loss": 0.0158, "step": 36060 }, { "epoch": 237.30263157894737, "grad_norm": 0.9487596750259399, "learning_rate": 0.0001, "loss": 0.0176, "step": 36070 }, { "epoch": 237.3684210526316, "grad_norm": 0.8451818227767944, "learning_rate": 0.0001, "loss": 0.0162, "step": 36080 }, { "epoch": 237.43421052631578, "grad_norm": 1.455590844154358, "learning_rate": 0.0001, "loss": 0.0185, "step": 36090 }, { "epoch": 237.5, "grad_norm": 1.2920894622802734, "learning_rate": 0.0001, "loss": 0.0173, "step": 36100 }, { "epoch": 237.56578947368422, "grad_norm": 1.3782984018325806, "learning_rate": 0.0001, "loss": 0.0172, "step": 36110 }, { "epoch": 237.6315789473684, "grad_norm": 1.0426079034805298, "learning_rate": 0.0001, "loss": 0.0173, "step": 36120 }, { "epoch": 237.69736842105263, "grad_norm": 1.128311276435852, "learning_rate": 0.0001, "loss": 0.015, "step": 36130 }, { "epoch": 237.76315789473685, "grad_norm": 1.0771710872650146, "learning_rate": 0.0001, "loss": 0.0183, "step": 36140 }, { "epoch": 237.82894736842104, "grad_norm": 0.8696498274803162, "learning_rate": 0.0001, "loss": 0.0176, "step": 36150 }, { "epoch": 237.89473684210526, "grad_norm": 1.1533684730529785, "learning_rate": 0.0001, "loss": 0.0168, "step": 36160 }, { "epoch": 237.96052631578948, "grad_norm": 1.2097066640853882, "learning_rate": 0.0001, "loss": 0.0173, "step": 36170 }, { "epoch": 238.02631578947367, "grad_norm": 1.1316962242126465, "learning_rate": 0.0001, "loss": 0.0199, "step": 36180 }, { "epoch": 238.0921052631579, "grad_norm": 1.1402578353881836, "learning_rate": 0.0001, "loss": 0.0182, "step": 36190 }, { "epoch": 238.1578947368421, "grad_norm": 1.0534335374832153, "learning_rate": 0.0001, "loss": 0.0192, "step": 36200 }, { "epoch": 238.22368421052633, "grad_norm": 1.2206014394760132, "learning_rate": 0.0001, "loss": 0.0168, "step": 36210 }, { "epoch": 238.28947368421052, "grad_norm": 0.9274782538414001, "learning_rate": 0.0001, "loss": 0.0156, "step": 36220 }, { "epoch": 238.35526315789474, "grad_norm": 1.1802715063095093, "learning_rate": 0.0001, "loss": 0.0165, "step": 36230 }, { "epoch": 238.42105263157896, "grad_norm": 1.3021156787872314, "learning_rate": 0.0001, "loss": 0.0157, "step": 36240 }, { "epoch": 238.48684210526315, "grad_norm": 0.9791660904884338, "learning_rate": 0.0001, "loss": 0.0188, "step": 36250 }, { "epoch": 238.55263157894737, "grad_norm": 1.458195686340332, "learning_rate": 0.0001, "loss": 0.0157, "step": 36260 }, { "epoch": 238.6184210526316, "grad_norm": 1.2013226747512817, "learning_rate": 0.0001, "loss": 0.0178, "step": 36270 }, { "epoch": 238.68421052631578, "grad_norm": 0.897367000579834, "learning_rate": 0.0001, "loss": 0.0151, "step": 36280 }, { "epoch": 238.75, "grad_norm": 1.430230736732483, "learning_rate": 0.0001, "loss": 0.0168, "step": 36290 }, { "epoch": 238.81578947368422, "grad_norm": 1.2003045082092285, "learning_rate": 0.0001, "loss": 0.0151, "step": 36300 }, { "epoch": 238.8815789473684, "grad_norm": 0.8546904921531677, "learning_rate": 0.0001, "loss": 0.0155, "step": 36310 }, { "epoch": 238.94736842105263, "grad_norm": 0.9247255325317383, "learning_rate": 0.0001, "loss": 0.0152, "step": 36320 }, { "epoch": 239.01315789473685, "grad_norm": 1.1530381441116333, "learning_rate": 0.0001, "loss": 0.0179, "step": 36330 }, { "epoch": 239.07894736842104, "grad_norm": 1.2787343263626099, "learning_rate": 0.0001, "loss": 0.015, "step": 36340 }, { "epoch": 239.14473684210526, "grad_norm": 1.2188760042190552, "learning_rate": 0.0001, "loss": 0.016, "step": 36350 }, { "epoch": 239.21052631578948, "grad_norm": 1.1100471019744873, "learning_rate": 0.0001, "loss": 0.017, "step": 36360 }, { "epoch": 239.27631578947367, "grad_norm": 0.8758446574211121, "learning_rate": 0.0001, "loss": 0.0161, "step": 36370 }, { "epoch": 239.3421052631579, "grad_norm": 0.9716992974281311, "learning_rate": 0.0001, "loss": 0.017, "step": 36380 }, { "epoch": 239.4078947368421, "grad_norm": 1.394768476486206, "learning_rate": 0.0001, "loss": 0.0155, "step": 36390 }, { "epoch": 239.47368421052633, "grad_norm": 1.2768748998641968, "learning_rate": 0.0001, "loss": 0.0169, "step": 36400 }, { "epoch": 239.53947368421052, "grad_norm": 1.1910803318023682, "learning_rate": 0.0001, "loss": 0.0136, "step": 36410 }, { "epoch": 239.60526315789474, "grad_norm": 1.2653837203979492, "learning_rate": 0.0001, "loss": 0.0141, "step": 36420 }, { "epoch": 239.67105263157896, "grad_norm": 1.348326325416565, "learning_rate": 0.0001, "loss": 0.0157, "step": 36430 }, { "epoch": 239.73684210526315, "grad_norm": 1.2382081747055054, "learning_rate": 0.0001, "loss": 0.0146, "step": 36440 }, { "epoch": 239.80263157894737, "grad_norm": 1.3600709438323975, "learning_rate": 0.0001, "loss": 0.0154, "step": 36450 }, { "epoch": 239.8684210526316, "grad_norm": 0.9798856377601624, "learning_rate": 0.0001, "loss": 0.0149, "step": 36460 }, { "epoch": 239.93421052631578, "grad_norm": 0.7852506637573242, "learning_rate": 0.0001, "loss": 0.0149, "step": 36470 }, { "epoch": 240.0, "grad_norm": 1.0045945644378662, "learning_rate": 0.0001, "loss": 0.0158, "step": 36480 }, { "epoch": 240.06578947368422, "grad_norm": 0.8587341904640198, "learning_rate": 0.0001, "loss": 0.0138, "step": 36490 }, { "epoch": 240.1315789473684, "grad_norm": 1.4854038953781128, "learning_rate": 0.0001, "loss": 0.0166, "step": 36500 }, { "epoch": 240.19736842105263, "grad_norm": 1.3243857622146606, "learning_rate": 0.0001, "loss": 0.0167, "step": 36510 }, { "epoch": 240.26315789473685, "grad_norm": 1.048046588897705, "learning_rate": 0.0001, "loss": 0.0147, "step": 36520 }, { "epoch": 240.32894736842104, "grad_norm": 1.2763868570327759, "learning_rate": 0.0001, "loss": 0.0164, "step": 36530 }, { "epoch": 240.39473684210526, "grad_norm": 0.910820722579956, "learning_rate": 0.0001, "loss": 0.0139, "step": 36540 }, { "epoch": 240.46052631578948, "grad_norm": 1.2709863185882568, "learning_rate": 0.0001, "loss": 0.0142, "step": 36550 }, { "epoch": 240.52631578947367, "grad_norm": 1.4138668775558472, "learning_rate": 0.0001, "loss": 0.0155, "step": 36560 }, { "epoch": 240.5921052631579, "grad_norm": 1.7599332332611084, "learning_rate": 0.0001, "loss": 0.0128, "step": 36570 }, { "epoch": 240.6578947368421, "grad_norm": 1.1151326894760132, "learning_rate": 0.0001, "loss": 0.0154, "step": 36580 }, { "epoch": 240.72368421052633, "grad_norm": 0.9767569899559021, "learning_rate": 0.0001, "loss": 0.0167, "step": 36590 }, { "epoch": 240.78947368421052, "grad_norm": 0.9586136341094971, "learning_rate": 0.0001, "loss": 0.0154, "step": 36600 }, { "epoch": 240.85526315789474, "grad_norm": 0.9484444856643677, "learning_rate": 0.0001, "loss": 0.0159, "step": 36610 }, { "epoch": 240.92105263157896, "grad_norm": 1.5435513257980347, "learning_rate": 0.0001, "loss": 0.0148, "step": 36620 }, { "epoch": 240.98684210526315, "grad_norm": 1.2928390502929688, "learning_rate": 0.0001, "loss": 0.0146, "step": 36630 }, { "epoch": 241.05263157894737, "grad_norm": 1.3825304508209229, "learning_rate": 0.0001, "loss": 0.0154, "step": 36640 }, { "epoch": 241.1184210526316, "grad_norm": 1.3325434923171997, "learning_rate": 0.0001, "loss": 0.0189, "step": 36650 }, { "epoch": 241.18421052631578, "grad_norm": 1.4383955001831055, "learning_rate": 0.0001, "loss": 0.0139, "step": 36660 }, { "epoch": 241.25, "grad_norm": 1.0420336723327637, "learning_rate": 0.0001, "loss": 0.015, "step": 36670 }, { "epoch": 241.31578947368422, "grad_norm": 1.0319572687149048, "learning_rate": 0.0001, "loss": 0.0137, "step": 36680 }, { "epoch": 241.3815789473684, "grad_norm": 1.3533285856246948, "learning_rate": 0.0001, "loss": 0.0172, "step": 36690 }, { "epoch": 241.44736842105263, "grad_norm": 1.112142562866211, "learning_rate": 0.0001, "loss": 0.0143, "step": 36700 }, { "epoch": 241.51315789473685, "grad_norm": 1.3508294820785522, "learning_rate": 0.0001, "loss": 0.0133, "step": 36710 }, { "epoch": 241.57894736842104, "grad_norm": 0.8377172946929932, "learning_rate": 0.0001, "loss": 0.0147, "step": 36720 }, { "epoch": 241.64473684210526, "grad_norm": 1.2829350233078003, "learning_rate": 0.0001, "loss": 0.0157, "step": 36730 }, { "epoch": 241.71052631578948, "grad_norm": 1.1019694805145264, "learning_rate": 0.0001, "loss": 0.0144, "step": 36740 }, { "epoch": 241.77631578947367, "grad_norm": 1.6284654140472412, "learning_rate": 0.0001, "loss": 0.0139, "step": 36750 }, { "epoch": 241.8421052631579, "grad_norm": 1.409332036972046, "learning_rate": 0.0001, "loss": 0.0149, "step": 36760 }, { "epoch": 241.9078947368421, "grad_norm": 1.5580686330795288, "learning_rate": 0.0001, "loss": 0.0152, "step": 36770 }, { "epoch": 241.97368421052633, "grad_norm": 1.2454417943954468, "learning_rate": 0.0001, "loss": 0.0155, "step": 36780 }, { "epoch": 242.03947368421052, "grad_norm": 1.194043755531311, "learning_rate": 0.0001, "loss": 0.0146, "step": 36790 }, { "epoch": 242.10526315789474, "grad_norm": 1.0807819366455078, "learning_rate": 0.0001, "loss": 0.0153, "step": 36800 }, { "epoch": 242.17105263157896, "grad_norm": 1.1862661838531494, "learning_rate": 0.0001, "loss": 0.0153, "step": 36810 }, { "epoch": 242.23684210526315, "grad_norm": 1.3441433906555176, "learning_rate": 0.0001, "loss": 0.014, "step": 36820 }, { "epoch": 242.30263157894737, "grad_norm": 1.0832732915878296, "learning_rate": 0.0001, "loss": 0.0156, "step": 36830 }, { "epoch": 242.3684210526316, "grad_norm": 0.8166968822479248, "learning_rate": 0.0001, "loss": 0.0175, "step": 36840 }, { "epoch": 242.43421052631578, "grad_norm": 1.1842600107192993, "learning_rate": 0.0001, "loss": 0.0151, "step": 36850 }, { "epoch": 242.5, "grad_norm": 1.0731406211853027, "learning_rate": 0.0001, "loss": 0.0151, "step": 36860 }, { "epoch": 242.56578947368422, "grad_norm": 1.1145586967468262, "learning_rate": 0.0001, "loss": 0.015, "step": 36870 }, { "epoch": 242.6315789473684, "grad_norm": 1.2087843418121338, "learning_rate": 0.0001, "loss": 0.0148, "step": 36880 }, { "epoch": 242.69736842105263, "grad_norm": 0.9884072542190552, "learning_rate": 0.0001, "loss": 0.0146, "step": 36890 }, { "epoch": 242.76315789473685, "grad_norm": 1.011466383934021, "learning_rate": 0.0001, "loss": 0.0161, "step": 36900 }, { "epoch": 242.82894736842104, "grad_norm": 0.9709348678588867, "learning_rate": 0.0001, "loss": 0.0157, "step": 36910 }, { "epoch": 242.89473684210526, "grad_norm": 1.2150779962539673, "learning_rate": 0.0001, "loss": 0.0174, "step": 36920 }, { "epoch": 242.96052631578948, "grad_norm": 1.3461240530014038, "learning_rate": 0.0001, "loss": 0.0165, "step": 36930 }, { "epoch": 243.02631578947367, "grad_norm": 1.4247745275497437, "learning_rate": 0.0001, "loss": 0.0153, "step": 36940 }, { "epoch": 243.0921052631579, "grad_norm": 1.213616132736206, "learning_rate": 0.0001, "loss": 0.0172, "step": 36950 }, { "epoch": 243.1578947368421, "grad_norm": 1.3313192129135132, "learning_rate": 0.0001, "loss": 0.0152, "step": 36960 }, { "epoch": 243.22368421052633, "grad_norm": 1.5116862058639526, "learning_rate": 0.0001, "loss": 0.0168, "step": 36970 }, { "epoch": 243.28947368421052, "grad_norm": 1.200487494468689, "learning_rate": 0.0001, "loss": 0.015, "step": 36980 }, { "epoch": 243.35526315789474, "grad_norm": 1.5582853555679321, "learning_rate": 0.0001, "loss": 0.0159, "step": 36990 }, { "epoch": 243.42105263157896, "grad_norm": 1.1975919008255005, "learning_rate": 0.0001, "loss": 0.0174, "step": 37000 }, { "epoch": 243.48684210526315, "grad_norm": 1.3097623586654663, "learning_rate": 0.0001, "loss": 0.0162, "step": 37010 }, { "epoch": 243.55263157894737, "grad_norm": 1.28339421749115, "learning_rate": 0.0001, "loss": 0.0147, "step": 37020 }, { "epoch": 243.6184210526316, "grad_norm": 1.0238935947418213, "learning_rate": 0.0001, "loss": 0.0144, "step": 37030 }, { "epoch": 243.68421052631578, "grad_norm": 1.203718900680542, "learning_rate": 0.0001, "loss": 0.014, "step": 37040 }, { "epoch": 243.75, "grad_norm": 1.2539606094360352, "learning_rate": 0.0001, "loss": 0.016, "step": 37050 }, { "epoch": 243.81578947368422, "grad_norm": 1.581852674484253, "learning_rate": 0.0001, "loss": 0.0157, "step": 37060 }, { "epoch": 243.8815789473684, "grad_norm": 0.8200912475585938, "learning_rate": 0.0001, "loss": 0.0145, "step": 37070 }, { "epoch": 243.94736842105263, "grad_norm": 1.1216977834701538, "learning_rate": 0.0001, "loss": 0.0133, "step": 37080 }, { "epoch": 244.01315789473685, "grad_norm": 1.295255422592163, "learning_rate": 0.0001, "loss": 0.0149, "step": 37090 }, { "epoch": 244.07894736842104, "grad_norm": 0.9966356158256531, "learning_rate": 0.0001, "loss": 0.0161, "step": 37100 }, { "epoch": 244.14473684210526, "grad_norm": 1.278610110282898, "learning_rate": 0.0001, "loss": 0.0153, "step": 37110 }, { "epoch": 244.21052631578948, "grad_norm": 1.1381927728652954, "learning_rate": 0.0001, "loss": 0.0154, "step": 37120 }, { "epoch": 244.27631578947367, "grad_norm": 1.3754022121429443, "learning_rate": 0.0001, "loss": 0.014, "step": 37130 }, { "epoch": 244.3421052631579, "grad_norm": 1.0089901685714722, "learning_rate": 0.0001, "loss": 0.0141, "step": 37140 }, { "epoch": 244.4078947368421, "grad_norm": 1.4417517185211182, "learning_rate": 0.0001, "loss": 0.0169, "step": 37150 }, { "epoch": 244.47368421052633, "grad_norm": 1.4486968517303467, "learning_rate": 0.0001, "loss": 0.0128, "step": 37160 }, { "epoch": 244.53947368421052, "grad_norm": 1.428544044494629, "learning_rate": 0.0001, "loss": 0.0152, "step": 37170 }, { "epoch": 244.60526315789474, "grad_norm": 1.239670753479004, "learning_rate": 0.0001, "loss": 0.0154, "step": 37180 }, { "epoch": 244.67105263157896, "grad_norm": 0.8541073203086853, "learning_rate": 0.0001, "loss": 0.0149, "step": 37190 }, { "epoch": 244.73684210526315, "grad_norm": 1.2242367267608643, "learning_rate": 0.0001, "loss": 0.0155, "step": 37200 }, { "epoch": 244.80263157894737, "grad_norm": 1.1044442653656006, "learning_rate": 0.0001, "loss": 0.014, "step": 37210 }, { "epoch": 244.8684210526316, "grad_norm": 1.4338637590408325, "learning_rate": 0.0001, "loss": 0.0137, "step": 37220 }, { "epoch": 244.93421052631578, "grad_norm": 1.1796600818634033, "learning_rate": 0.0001, "loss": 0.0142, "step": 37230 }, { "epoch": 245.0, "grad_norm": 1.4116781949996948, "learning_rate": 0.0001, "loss": 0.0153, "step": 37240 }, { "epoch": 245.06578947368422, "grad_norm": 1.2402573823928833, "learning_rate": 0.0001, "loss": 0.016, "step": 37250 }, { "epoch": 245.1315789473684, "grad_norm": 1.3560236692428589, "learning_rate": 0.0001, "loss": 0.0133, "step": 37260 }, { "epoch": 245.19736842105263, "grad_norm": 1.0826292037963867, "learning_rate": 0.0001, "loss": 0.0139, "step": 37270 }, { "epoch": 245.26315789473685, "grad_norm": 1.4319058656692505, "learning_rate": 0.0001, "loss": 0.0149, "step": 37280 }, { "epoch": 245.32894736842104, "grad_norm": 1.4913933277130127, "learning_rate": 0.0001, "loss": 0.0156, "step": 37290 }, { "epoch": 245.39473684210526, "grad_norm": 1.6130610704421997, "learning_rate": 0.0001, "loss": 0.0168, "step": 37300 }, { "epoch": 245.46052631578948, "grad_norm": 1.419812560081482, "learning_rate": 0.0001, "loss": 0.0149, "step": 37310 }, { "epoch": 245.52631578947367, "grad_norm": 1.1114962100982666, "learning_rate": 0.0001, "loss": 0.0147, "step": 37320 }, { "epoch": 245.5921052631579, "grad_norm": 0.8215615749359131, "learning_rate": 0.0001, "loss": 0.0129, "step": 37330 }, { "epoch": 245.6578947368421, "grad_norm": 0.9287834763526917, "learning_rate": 0.0001, "loss": 0.0152, "step": 37340 }, { "epoch": 245.72368421052633, "grad_norm": 1.5063482522964478, "learning_rate": 0.0001, "loss": 0.0157, "step": 37350 }, { "epoch": 245.78947368421052, "grad_norm": 1.3184797763824463, "learning_rate": 0.0001, "loss": 0.0151, "step": 37360 }, { "epoch": 245.85526315789474, "grad_norm": 1.147472858428955, "learning_rate": 0.0001, "loss": 0.0157, "step": 37370 }, { "epoch": 245.92105263157896, "grad_norm": 1.3581057786941528, "learning_rate": 0.0001, "loss": 0.0144, "step": 37380 }, { "epoch": 245.98684210526315, "grad_norm": 0.9130412340164185, "learning_rate": 0.0001, "loss": 0.0129, "step": 37390 }, { "epoch": 246.05263157894737, "grad_norm": 1.0670521259307861, "learning_rate": 0.0001, "loss": 0.0134, "step": 37400 }, { "epoch": 246.1184210526316, "grad_norm": 1.1035175323486328, "learning_rate": 0.0001, "loss": 0.016, "step": 37410 }, { "epoch": 246.18421052631578, "grad_norm": 1.24159574508667, "learning_rate": 0.0001, "loss": 0.0153, "step": 37420 }, { "epoch": 246.25, "grad_norm": 1.571832537651062, "learning_rate": 0.0001, "loss": 0.0134, "step": 37430 }, { "epoch": 246.31578947368422, "grad_norm": 1.1885900497436523, "learning_rate": 0.0001, "loss": 0.0139, "step": 37440 }, { "epoch": 246.3815789473684, "grad_norm": 1.5651880502700806, "learning_rate": 0.0001, "loss": 0.0176, "step": 37450 }, { "epoch": 246.44736842105263, "grad_norm": 1.3197070360183716, "learning_rate": 0.0001, "loss": 0.0147, "step": 37460 }, { "epoch": 246.51315789473685, "grad_norm": 1.3817650079727173, "learning_rate": 0.0001, "loss": 0.0139, "step": 37470 }, { "epoch": 246.57894736842104, "grad_norm": 1.5466119050979614, "learning_rate": 0.0001, "loss": 0.0165, "step": 37480 }, { "epoch": 246.64473684210526, "grad_norm": 1.2215481996536255, "learning_rate": 0.0001, "loss": 0.0135, "step": 37490 }, { "epoch": 246.71052631578948, "grad_norm": 1.4967329502105713, "learning_rate": 0.0001, "loss": 0.0156, "step": 37500 }, { "epoch": 246.77631578947367, "grad_norm": 1.4192287921905518, "learning_rate": 0.0001, "loss": 0.0148, "step": 37510 }, { "epoch": 246.8421052631579, "grad_norm": 1.3284549713134766, "learning_rate": 0.0001, "loss": 0.0143, "step": 37520 }, { "epoch": 246.9078947368421, "grad_norm": 1.3947001695632935, "learning_rate": 0.0001, "loss": 0.0144, "step": 37530 }, { "epoch": 246.97368421052633, "grad_norm": 1.2742048501968384, "learning_rate": 0.0001, "loss": 0.0152, "step": 37540 }, { "epoch": 247.03947368421052, "grad_norm": 1.200553059577942, "learning_rate": 0.0001, "loss": 0.0145, "step": 37550 }, { "epoch": 247.10526315789474, "grad_norm": 1.2947498559951782, "learning_rate": 0.0001, "loss": 0.0152, "step": 37560 }, { "epoch": 247.17105263157896, "grad_norm": 1.950449824333191, "learning_rate": 0.0001, "loss": 0.0155, "step": 37570 }, { "epoch": 247.23684210526315, "grad_norm": 1.7037091255187988, "learning_rate": 0.0001, "loss": 0.0142, "step": 37580 }, { "epoch": 247.30263157894737, "grad_norm": 1.2691845893859863, "learning_rate": 0.0001, "loss": 0.014, "step": 37590 }, { "epoch": 247.3684210526316, "grad_norm": 0.9007494449615479, "learning_rate": 0.0001, "loss": 0.0155, "step": 37600 }, { "epoch": 247.43421052631578, "grad_norm": 1.374837040901184, "learning_rate": 0.0001, "loss": 0.0147, "step": 37610 }, { "epoch": 247.5, "grad_norm": 1.1083370447158813, "learning_rate": 0.0001, "loss": 0.015, "step": 37620 }, { "epoch": 247.56578947368422, "grad_norm": 1.2770500183105469, "learning_rate": 0.0001, "loss": 0.0155, "step": 37630 }, { "epoch": 247.6315789473684, "grad_norm": 1.4671261310577393, "learning_rate": 0.0001, "loss": 0.0155, "step": 37640 }, { "epoch": 247.69736842105263, "grad_norm": 1.6228381395339966, "learning_rate": 0.0001, "loss": 0.0135, "step": 37650 }, { "epoch": 247.76315789473685, "grad_norm": 1.6250126361846924, "learning_rate": 0.0001, "loss": 0.0156, "step": 37660 }, { "epoch": 247.82894736842104, "grad_norm": 1.4353737831115723, "learning_rate": 0.0001, "loss": 0.013, "step": 37670 }, { "epoch": 247.89473684210526, "grad_norm": 1.6463098526000977, "learning_rate": 0.0001, "loss": 0.0154, "step": 37680 }, { "epoch": 247.96052631578948, "grad_norm": 1.2910631895065308, "learning_rate": 0.0001, "loss": 0.0159, "step": 37690 }, { "epoch": 248.02631578947367, "grad_norm": 1.0633682012557983, "learning_rate": 0.0001, "loss": 0.0139, "step": 37700 }, { "epoch": 248.0921052631579, "grad_norm": 1.6399755477905273, "learning_rate": 0.0001, "loss": 0.0142, "step": 37710 }, { "epoch": 248.1578947368421, "grad_norm": 1.4523167610168457, "learning_rate": 0.0001, "loss": 0.0153, "step": 37720 }, { "epoch": 248.22368421052633, "grad_norm": 1.307390570640564, "learning_rate": 0.0001, "loss": 0.014, "step": 37730 }, { "epoch": 248.28947368421052, "grad_norm": 1.414101243019104, "learning_rate": 0.0001, "loss": 0.0164, "step": 37740 }, { "epoch": 248.35526315789474, "grad_norm": 1.1803747415542603, "learning_rate": 0.0001, "loss": 0.0146, "step": 37750 }, { "epoch": 248.42105263157896, "grad_norm": 1.416965365409851, "learning_rate": 0.0001, "loss": 0.0134, "step": 37760 }, { "epoch": 248.48684210526315, "grad_norm": 1.0817677974700928, "learning_rate": 0.0001, "loss": 0.0142, "step": 37770 }, { "epoch": 248.55263157894737, "grad_norm": 1.3911010026931763, "learning_rate": 0.0001, "loss": 0.0162, "step": 37780 }, { "epoch": 248.6184210526316, "grad_norm": 1.0438708066940308, "learning_rate": 0.0001, "loss": 0.0139, "step": 37790 }, { "epoch": 248.68421052631578, "grad_norm": 1.1672842502593994, "learning_rate": 0.0001, "loss": 0.0144, "step": 37800 }, { "epoch": 248.75, "grad_norm": 1.5514910221099854, "learning_rate": 0.0001, "loss": 0.0138, "step": 37810 }, { "epoch": 248.81578947368422, "grad_norm": 1.686729073524475, "learning_rate": 0.0001, "loss": 0.0152, "step": 37820 }, { "epoch": 248.8815789473684, "grad_norm": 1.720362901687622, "learning_rate": 0.0001, "loss": 0.0144, "step": 37830 }, { "epoch": 248.94736842105263, "grad_norm": 1.4286376237869263, "learning_rate": 0.0001, "loss": 0.0151, "step": 37840 }, { "epoch": 249.01315789473685, "grad_norm": 1.447860836982727, "learning_rate": 0.0001, "loss": 0.0135, "step": 37850 }, { "epoch": 249.07894736842104, "grad_norm": 1.2636222839355469, "learning_rate": 0.0001, "loss": 0.0137, "step": 37860 }, { "epoch": 249.14473684210526, "grad_norm": 1.2671912908554077, "learning_rate": 0.0001, "loss": 0.0162, "step": 37870 }, { "epoch": 249.21052631578948, "grad_norm": 0.9959325194358826, "learning_rate": 0.0001, "loss": 0.0132, "step": 37880 }, { "epoch": 249.27631578947367, "grad_norm": 1.2429472208023071, "learning_rate": 0.0001, "loss": 0.0149, "step": 37890 }, { "epoch": 249.3421052631579, "grad_norm": 1.1575770378112793, "learning_rate": 0.0001, "loss": 0.0144, "step": 37900 }, { "epoch": 249.4078947368421, "grad_norm": 1.075370192527771, "learning_rate": 0.0001, "loss": 0.0155, "step": 37910 }, { "epoch": 249.47368421052633, "grad_norm": 1.1879488229751587, "learning_rate": 0.0001, "loss": 0.0129, "step": 37920 }, { "epoch": 249.53947368421052, "grad_norm": 1.0791507959365845, "learning_rate": 0.0001, "loss": 0.0162, "step": 37930 }, { "epoch": 249.60526315789474, "grad_norm": 0.9675527811050415, "learning_rate": 0.0001, "loss": 0.0151, "step": 37940 }, { "epoch": 249.67105263157896, "grad_norm": 1.2315634489059448, "learning_rate": 0.0001, "loss": 0.0156, "step": 37950 }, { "epoch": 249.73684210526315, "grad_norm": 0.8122895956039429, "learning_rate": 0.0001, "loss": 0.0166, "step": 37960 }, { "epoch": 249.80263157894737, "grad_norm": 1.209394931793213, "learning_rate": 0.0001, "loss": 0.0142, "step": 37970 }, { "epoch": 249.8684210526316, "grad_norm": 1.001671314239502, "learning_rate": 0.0001, "loss": 0.0166, "step": 37980 }, { "epoch": 249.93421052631578, "grad_norm": 1.5136141777038574, "learning_rate": 0.0001, "loss": 0.0153, "step": 37990 }, { "epoch": 250.0, "grad_norm": 1.4685696363449097, "learning_rate": 0.0001, "loss": 0.0147, "step": 38000 }, { "epoch": 250.06578947368422, "grad_norm": 1.1837599277496338, "learning_rate": 0.0001, "loss": 0.0142, "step": 38010 }, { "epoch": 250.1315789473684, "grad_norm": 1.018049955368042, "learning_rate": 0.0001, "loss": 0.0146, "step": 38020 }, { "epoch": 250.19736842105263, "grad_norm": 1.0847623348236084, "learning_rate": 0.0001, "loss": 0.0154, "step": 38030 }, { "epoch": 250.26315789473685, "grad_norm": 1.4358677864074707, "learning_rate": 0.0001, "loss": 0.0158, "step": 38040 }, { "epoch": 250.32894736842104, "grad_norm": 1.3127168416976929, "learning_rate": 0.0001, "loss": 0.0145, "step": 38050 }, { "epoch": 250.39473684210526, "grad_norm": 1.6243581771850586, "learning_rate": 0.0001, "loss": 0.0154, "step": 38060 }, { "epoch": 250.46052631578948, "grad_norm": 1.3248577117919922, "learning_rate": 0.0001, "loss": 0.0158, "step": 38070 }, { "epoch": 250.52631578947367, "grad_norm": 1.2250198125839233, "learning_rate": 0.0001, "loss": 0.0153, "step": 38080 }, { "epoch": 250.5921052631579, "grad_norm": 1.238546371459961, "learning_rate": 0.0001, "loss": 0.0167, "step": 38090 }, { "epoch": 250.6578947368421, "grad_norm": 1.3515560626983643, "learning_rate": 0.0001, "loss": 0.0161, "step": 38100 }, { "epoch": 250.72368421052633, "grad_norm": 1.272128701210022, "learning_rate": 0.0001, "loss": 0.0157, "step": 38110 }, { "epoch": 250.78947368421052, "grad_norm": 1.1292731761932373, "learning_rate": 0.0001, "loss": 0.0147, "step": 38120 }, { "epoch": 250.85526315789474, "grad_norm": 1.018415927886963, "learning_rate": 0.0001, "loss": 0.0147, "step": 38130 }, { "epoch": 250.92105263157896, "grad_norm": 1.367689609527588, "learning_rate": 0.0001, "loss": 0.0133, "step": 38140 }, { "epoch": 250.98684210526315, "grad_norm": 1.3254410028457642, "learning_rate": 0.0001, "loss": 0.015, "step": 38150 }, { "epoch": 251.05263157894737, "grad_norm": 1.6456955671310425, "learning_rate": 0.0001, "loss": 0.0148, "step": 38160 }, { "epoch": 251.1184210526316, "grad_norm": 1.1170157194137573, "learning_rate": 0.0001, "loss": 0.015, "step": 38170 }, { "epoch": 251.18421052631578, "grad_norm": 0.998716413974762, "learning_rate": 0.0001, "loss": 0.0138, "step": 38180 }, { "epoch": 251.25, "grad_norm": 0.9729195833206177, "learning_rate": 0.0001, "loss": 0.0153, "step": 38190 }, { "epoch": 251.31578947368422, "grad_norm": 1.274336576461792, "learning_rate": 0.0001, "loss": 0.0157, "step": 38200 }, { "epoch": 251.3815789473684, "grad_norm": 1.0042622089385986, "learning_rate": 0.0001, "loss": 0.0146, "step": 38210 }, { "epoch": 251.44736842105263, "grad_norm": 1.3611550331115723, "learning_rate": 0.0001, "loss": 0.0143, "step": 38220 }, { "epoch": 251.51315789473685, "grad_norm": 1.4068595170974731, "learning_rate": 0.0001, "loss": 0.0153, "step": 38230 }, { "epoch": 251.57894736842104, "grad_norm": 1.0429598093032837, "learning_rate": 0.0001, "loss": 0.0145, "step": 38240 }, { "epoch": 251.64473684210526, "grad_norm": 1.2321966886520386, "learning_rate": 0.0001, "loss": 0.0152, "step": 38250 }, { "epoch": 251.71052631578948, "grad_norm": 1.5043584108352661, "learning_rate": 0.0001, "loss": 0.0137, "step": 38260 }, { "epoch": 251.77631578947367, "grad_norm": 1.3396488428115845, "learning_rate": 0.0001, "loss": 0.0143, "step": 38270 }, { "epoch": 251.8421052631579, "grad_norm": 1.1610597372055054, "learning_rate": 0.0001, "loss": 0.0158, "step": 38280 }, { "epoch": 251.9078947368421, "grad_norm": 1.2713470458984375, "learning_rate": 0.0001, "loss": 0.0128, "step": 38290 }, { "epoch": 251.97368421052633, "grad_norm": 1.6690809726715088, "learning_rate": 0.0001, "loss": 0.0155, "step": 38300 }, { "epoch": 252.03947368421052, "grad_norm": 1.3469371795654297, "learning_rate": 0.0001, "loss": 0.0167, "step": 38310 }, { "epoch": 252.10526315789474, "grad_norm": 1.1202424764633179, "learning_rate": 0.0001, "loss": 0.0148, "step": 38320 }, { "epoch": 252.17105263157896, "grad_norm": 1.3286770582199097, "learning_rate": 0.0001, "loss": 0.0146, "step": 38330 }, { "epoch": 252.23684210526315, "grad_norm": 1.2388663291931152, "learning_rate": 0.0001, "loss": 0.0168, "step": 38340 }, { "epoch": 252.30263157894737, "grad_norm": 1.1364609003067017, "learning_rate": 0.0001, "loss": 0.017, "step": 38350 }, { "epoch": 252.3684210526316, "grad_norm": 1.43731689453125, "learning_rate": 0.0001, "loss": 0.0152, "step": 38360 }, { "epoch": 252.43421052631578, "grad_norm": 1.337084412574768, "learning_rate": 0.0001, "loss": 0.0141, "step": 38370 }, { "epoch": 252.5, "grad_norm": 1.568783164024353, "learning_rate": 0.0001, "loss": 0.0137, "step": 38380 }, { "epoch": 252.56578947368422, "grad_norm": 1.5150007009506226, "learning_rate": 0.0001, "loss": 0.015, "step": 38390 }, { "epoch": 252.6315789473684, "grad_norm": 1.7503875494003296, "learning_rate": 0.0001, "loss": 0.0143, "step": 38400 }, { "epoch": 252.69736842105263, "grad_norm": 1.123337745666504, "learning_rate": 0.0001, "loss": 0.015, "step": 38410 }, { "epoch": 252.76315789473685, "grad_norm": 1.359995722770691, "learning_rate": 0.0001, "loss": 0.0152, "step": 38420 }, { "epoch": 252.82894736842104, "grad_norm": 1.1792511940002441, "learning_rate": 0.0001, "loss": 0.0134, "step": 38430 }, { "epoch": 252.89473684210526, "grad_norm": 1.2069591283798218, "learning_rate": 0.0001, "loss": 0.0162, "step": 38440 }, { "epoch": 252.96052631578948, "grad_norm": 1.2496885061264038, "learning_rate": 0.0001, "loss": 0.0156, "step": 38450 }, { "epoch": 253.02631578947367, "grad_norm": 1.0140652656555176, "learning_rate": 0.0001, "loss": 0.0141, "step": 38460 }, { "epoch": 253.0921052631579, "grad_norm": 1.251288652420044, "learning_rate": 0.0001, "loss": 0.0141, "step": 38470 }, { "epoch": 253.1578947368421, "grad_norm": 1.283728837966919, "learning_rate": 0.0001, "loss": 0.0143, "step": 38480 }, { "epoch": 253.22368421052633, "grad_norm": 1.3904523849487305, "learning_rate": 0.0001, "loss": 0.0149, "step": 38490 }, { "epoch": 253.28947368421052, "grad_norm": 1.363517165184021, "learning_rate": 0.0001, "loss": 0.0179, "step": 38500 }, { "epoch": 253.35526315789474, "grad_norm": 1.120867133140564, "learning_rate": 0.0001, "loss": 0.0147, "step": 38510 }, { "epoch": 253.42105263157896, "grad_norm": 1.360049843788147, "learning_rate": 0.0001, "loss": 0.015, "step": 38520 }, { "epoch": 253.48684210526315, "grad_norm": 1.39319908618927, "learning_rate": 0.0001, "loss": 0.0146, "step": 38530 }, { "epoch": 253.55263157894737, "grad_norm": 1.0926111936569214, "learning_rate": 0.0001, "loss": 0.0156, "step": 38540 }, { "epoch": 253.6184210526316, "grad_norm": 1.4008203744888306, "learning_rate": 0.0001, "loss": 0.0158, "step": 38550 }, { "epoch": 253.68421052631578, "grad_norm": 0.974804699420929, "learning_rate": 0.0001, "loss": 0.0145, "step": 38560 }, { "epoch": 253.75, "grad_norm": 1.2252269983291626, "learning_rate": 0.0001, "loss": 0.0155, "step": 38570 }, { "epoch": 253.81578947368422, "grad_norm": 1.2093294858932495, "learning_rate": 0.0001, "loss": 0.0151, "step": 38580 }, { "epoch": 253.8815789473684, "grad_norm": 1.3294168710708618, "learning_rate": 0.0001, "loss": 0.0165, "step": 38590 }, { "epoch": 253.94736842105263, "grad_norm": 1.3876254558563232, "learning_rate": 0.0001, "loss": 0.019, "step": 38600 }, { "epoch": 254.01315789473685, "grad_norm": 1.2935510873794556, "learning_rate": 0.0001, "loss": 0.0151, "step": 38610 }, { "epoch": 254.07894736842104, "grad_norm": 1.1402019262313843, "learning_rate": 0.0001, "loss": 0.0175, "step": 38620 }, { "epoch": 254.14473684210526, "grad_norm": 1.167262315750122, "learning_rate": 0.0001, "loss": 0.0155, "step": 38630 }, { "epoch": 254.21052631578948, "grad_norm": 1.2734203338623047, "learning_rate": 0.0001, "loss": 0.0176, "step": 38640 }, { "epoch": 254.27631578947367, "grad_norm": 1.4504673480987549, "learning_rate": 0.0001, "loss": 0.0164, "step": 38650 }, { "epoch": 254.3421052631579, "grad_norm": 1.499895691871643, "learning_rate": 0.0001, "loss": 0.0148, "step": 38660 }, { "epoch": 254.4078947368421, "grad_norm": 0.8160152435302734, "learning_rate": 0.0001, "loss": 0.0145, "step": 38670 }, { "epoch": 254.47368421052633, "grad_norm": 1.31825852394104, "learning_rate": 0.0001, "loss": 0.0157, "step": 38680 }, { "epoch": 254.53947368421052, "grad_norm": 1.0252249240875244, "learning_rate": 0.0001, "loss": 0.0149, "step": 38690 }, { "epoch": 254.60526315789474, "grad_norm": 1.2557110786437988, "learning_rate": 0.0001, "loss": 0.0144, "step": 38700 }, { "epoch": 254.67105263157896, "grad_norm": 1.0297420024871826, "learning_rate": 0.0001, "loss": 0.0157, "step": 38710 }, { "epoch": 254.73684210526315, "grad_norm": 1.1971931457519531, "learning_rate": 0.0001, "loss": 0.0148, "step": 38720 }, { "epoch": 254.80263157894737, "grad_norm": 0.9345195889472961, "learning_rate": 0.0001, "loss": 0.0151, "step": 38730 }, { "epoch": 254.8684210526316, "grad_norm": 1.0931565761566162, "learning_rate": 0.0001, "loss": 0.0143, "step": 38740 }, { "epoch": 254.93421052631578, "grad_norm": 1.1553313732147217, "learning_rate": 0.0001, "loss": 0.0149, "step": 38750 }, { "epoch": 255.0, "grad_norm": 0.8764203786849976, "learning_rate": 0.0001, "loss": 0.0154, "step": 38760 }, { "epoch": 255.06578947368422, "grad_norm": 1.2318146228790283, "learning_rate": 0.0001, "loss": 0.017, "step": 38770 }, { "epoch": 255.1315789473684, "grad_norm": 1.1166127920150757, "learning_rate": 0.0001, "loss": 0.0153, "step": 38780 }, { "epoch": 255.19736842105263, "grad_norm": 1.0156936645507812, "learning_rate": 0.0001, "loss": 0.0143, "step": 38790 }, { "epoch": 255.26315789473685, "grad_norm": 1.1941726207733154, "learning_rate": 0.0001, "loss": 0.0177, "step": 38800 }, { "epoch": 255.32894736842104, "grad_norm": 1.557681679725647, "learning_rate": 0.0001, "loss": 0.0149, "step": 38810 }, { "epoch": 255.39473684210526, "grad_norm": 1.3124456405639648, "learning_rate": 0.0001, "loss": 0.0163, "step": 38820 }, { "epoch": 255.46052631578948, "grad_norm": 1.4480106830596924, "learning_rate": 0.0001, "loss": 0.0145, "step": 38830 }, { "epoch": 255.52631578947367, "grad_norm": 1.780182957649231, "learning_rate": 0.0001, "loss": 0.0173, "step": 38840 }, { "epoch": 255.5921052631579, "grad_norm": 1.6105772256851196, "learning_rate": 0.0001, "loss": 0.0151, "step": 38850 }, { "epoch": 255.6578947368421, "grad_norm": 1.3613028526306152, "learning_rate": 0.0001, "loss": 0.0129, "step": 38860 }, { "epoch": 255.72368421052633, "grad_norm": 1.5420035123825073, "learning_rate": 0.0001, "loss": 0.0131, "step": 38870 }, { "epoch": 255.78947368421052, "grad_norm": 2.7019877433776855, "learning_rate": 0.0001, "loss": 0.0199, "step": 38880 }, { "epoch": 255.85526315789474, "grad_norm": 2.009016752243042, "learning_rate": 0.0001, "loss": 0.0171, "step": 38890 }, { "epoch": 255.92105263157896, "grad_norm": 1.6684277057647705, "learning_rate": 0.0001, "loss": 0.0144, "step": 38900 }, { "epoch": 255.98684210526315, "grad_norm": 1.6645160913467407, "learning_rate": 0.0001, "loss": 0.0129, "step": 38910 }, { "epoch": 256.05263157894734, "grad_norm": 1.6908752918243408, "learning_rate": 0.0001, "loss": 0.0142, "step": 38920 }, { "epoch": 256.11842105263156, "grad_norm": 1.4804624319076538, "learning_rate": 0.0001, "loss": 0.0128, "step": 38930 }, { "epoch": 256.1842105263158, "grad_norm": 1.5433467626571655, "learning_rate": 0.0001, "loss": 0.013, "step": 38940 }, { "epoch": 256.25, "grad_norm": 1.4467060565948486, "learning_rate": 0.0001, "loss": 0.0154, "step": 38950 }, { "epoch": 256.3157894736842, "grad_norm": 1.5117400884628296, "learning_rate": 0.0001, "loss": 0.0142, "step": 38960 }, { "epoch": 256.38157894736844, "grad_norm": 1.5240405797958374, "learning_rate": 0.0001, "loss": 0.0138, "step": 38970 }, { "epoch": 256.44736842105266, "grad_norm": 1.3311848640441895, "learning_rate": 0.0001, "loss": 0.0134, "step": 38980 }, { "epoch": 256.5131578947368, "grad_norm": 1.3353846073150635, "learning_rate": 0.0001, "loss": 0.015, "step": 38990 }, { "epoch": 256.57894736842104, "grad_norm": 2.797309398651123, "learning_rate": 0.0001, "loss": 0.0371, "step": 39000 }, { "epoch": 256.64473684210526, "grad_norm": 2.06365966796875, "learning_rate": 0.0001, "loss": 0.0166, "step": 39010 }, { "epoch": 256.7105263157895, "grad_norm": 2.192547082901001, "learning_rate": 0.0001, "loss": 0.0157, "step": 39020 }, { "epoch": 256.7763157894737, "grad_norm": 1.957229733467102, "learning_rate": 0.0001, "loss": 0.0142, "step": 39030 }, { "epoch": 256.8421052631579, "grad_norm": 1.7908846139907837, "learning_rate": 0.0001, "loss": 0.0157, "step": 39040 }, { "epoch": 256.9078947368421, "grad_norm": 1.4980082511901855, "learning_rate": 0.0001, "loss": 0.0145, "step": 39050 }, { "epoch": 256.9736842105263, "grad_norm": 1.5401355028152466, "learning_rate": 0.0001, "loss": 0.014, "step": 39060 }, { "epoch": 257.0394736842105, "grad_norm": 1.2455133199691772, "learning_rate": 0.0001, "loss": 0.0119, "step": 39070 }, { "epoch": 257.10526315789474, "grad_norm": 1.4379152059555054, "learning_rate": 0.0001, "loss": 0.0145, "step": 39080 }, { "epoch": 257.17105263157896, "grad_norm": 1.4791769981384277, "learning_rate": 0.0001, "loss": 0.015, "step": 39090 }, { "epoch": 257.2368421052632, "grad_norm": 1.1306533813476562, "learning_rate": 0.0001, "loss": 0.0153, "step": 39100 }, { "epoch": 257.30263157894734, "grad_norm": 1.0871785879135132, "learning_rate": 0.0001, "loss": 0.0141, "step": 39110 }, { "epoch": 257.36842105263156, "grad_norm": 1.423475980758667, "learning_rate": 0.0001, "loss": 0.0151, "step": 39120 }, { "epoch": 257.4342105263158, "grad_norm": 1.7465128898620605, "learning_rate": 0.0001, "loss": 0.0136, "step": 39130 }, { "epoch": 257.5, "grad_norm": 1.4371193647384644, "learning_rate": 0.0001, "loss": 0.012, "step": 39140 }, { "epoch": 257.5657894736842, "grad_norm": 1.4257088899612427, "learning_rate": 0.0001, "loss": 0.0148, "step": 39150 }, { "epoch": 257.63157894736844, "grad_norm": 1.1062278747558594, "learning_rate": 0.0001, "loss": 0.0137, "step": 39160 }, { "epoch": 257.69736842105266, "grad_norm": 0.7491597533226013, "learning_rate": 0.0001, "loss": 0.0164, "step": 39170 }, { "epoch": 257.7631578947368, "grad_norm": 1.2030974626541138, "learning_rate": 0.0001, "loss": 0.0156, "step": 39180 }, { "epoch": 257.82894736842104, "grad_norm": 0.9825753569602966, "learning_rate": 0.0001, "loss": 0.0141, "step": 39190 }, { "epoch": 257.89473684210526, "grad_norm": 1.3896033763885498, "learning_rate": 0.0001, "loss": 0.0145, "step": 39200 }, { "epoch": 257.9605263157895, "grad_norm": 1.0874238014221191, "learning_rate": 0.0001, "loss": 0.0127, "step": 39210 }, { "epoch": 258.0263157894737, "grad_norm": 1.1241742372512817, "learning_rate": 0.0001, "loss": 0.0162, "step": 39220 }, { "epoch": 258.0921052631579, "grad_norm": 1.2342971563339233, "learning_rate": 0.0001, "loss": 0.0157, "step": 39230 }, { "epoch": 258.1578947368421, "grad_norm": 1.249884843826294, "learning_rate": 0.0001, "loss": 0.0149, "step": 39240 }, { "epoch": 258.2236842105263, "grad_norm": 1.0342882871627808, "learning_rate": 0.0001, "loss": 0.0152, "step": 39250 }, { "epoch": 258.2894736842105, "grad_norm": 1.2886340618133545, "learning_rate": 0.0001, "loss": 0.0135, "step": 39260 }, { "epoch": 258.35526315789474, "grad_norm": 1.3061116933822632, "learning_rate": 0.0001, "loss": 0.0158, "step": 39270 }, { "epoch": 258.42105263157896, "grad_norm": 1.4507635831832886, "learning_rate": 0.0001, "loss": 0.0151, "step": 39280 }, { "epoch": 258.4868421052632, "grad_norm": 1.4641185998916626, "learning_rate": 0.0001, "loss": 0.0145, "step": 39290 }, { "epoch": 258.55263157894734, "grad_norm": 1.3404735326766968, "learning_rate": 0.0001, "loss": 0.0154, "step": 39300 }, { "epoch": 258.61842105263156, "grad_norm": 0.9776154160499573, "learning_rate": 0.0001, "loss": 0.0149, "step": 39310 }, { "epoch": 258.6842105263158, "grad_norm": 1.0288668870925903, "learning_rate": 0.0001, "loss": 0.0132, "step": 39320 }, { "epoch": 258.75, "grad_norm": 1.1790403127670288, "learning_rate": 0.0001, "loss": 0.0155, "step": 39330 }, { "epoch": 258.8157894736842, "grad_norm": 0.8931066989898682, "learning_rate": 0.0001, "loss": 0.0171, "step": 39340 }, { "epoch": 258.88157894736844, "grad_norm": 1.307145118713379, "learning_rate": 0.0001, "loss": 0.0158, "step": 39350 }, { "epoch": 258.94736842105266, "grad_norm": 1.0468474626541138, "learning_rate": 0.0001, "loss": 0.0185, "step": 39360 }, { "epoch": 259.0131578947368, "grad_norm": 0.9798098206520081, "learning_rate": 0.0001, "loss": 0.0141, "step": 39370 }, { "epoch": 259.07894736842104, "grad_norm": 0.8931979537010193, "learning_rate": 0.0001, "loss": 0.0158, "step": 39380 }, { "epoch": 259.14473684210526, "grad_norm": 1.3130055665969849, "learning_rate": 0.0001, "loss": 0.0149, "step": 39390 }, { "epoch": 259.2105263157895, "grad_norm": 1.2557275295257568, "learning_rate": 0.0001, "loss": 0.0178, "step": 39400 }, { "epoch": 259.2763157894737, "grad_norm": 1.2544786930084229, "learning_rate": 0.0001, "loss": 0.0185, "step": 39410 }, { "epoch": 259.3421052631579, "grad_norm": 1.079939365386963, "learning_rate": 0.0001, "loss": 0.0151, "step": 39420 }, { "epoch": 259.4078947368421, "grad_norm": 1.3079148530960083, "learning_rate": 0.0001, "loss": 0.0148, "step": 39430 }, { "epoch": 259.4736842105263, "grad_norm": 1.2881779670715332, "learning_rate": 0.0001, "loss": 0.0144, "step": 39440 }, { "epoch": 259.5394736842105, "grad_norm": 1.4759607315063477, "learning_rate": 0.0001, "loss": 0.0168, "step": 39450 }, { "epoch": 259.60526315789474, "grad_norm": 1.1630066633224487, "learning_rate": 0.0001, "loss": 0.0152, "step": 39460 }, { "epoch": 259.67105263157896, "grad_norm": 1.894482970237732, "learning_rate": 0.0001, "loss": 0.0161, "step": 39470 }, { "epoch": 259.7368421052632, "grad_norm": 1.7164617776870728, "learning_rate": 0.0001, "loss": 0.0166, "step": 39480 }, { "epoch": 259.80263157894734, "grad_norm": 1.365029215812683, "learning_rate": 0.0001, "loss": 0.0165, "step": 39490 }, { "epoch": 259.86842105263156, "grad_norm": 1.2268500328063965, "learning_rate": 0.0001, "loss": 0.0127, "step": 39500 }, { "epoch": 259.9342105263158, "grad_norm": 1.182262897491455, "learning_rate": 0.0001, "loss": 0.0152, "step": 39510 }, { "epoch": 260.0, "grad_norm": 1.3532487154006958, "learning_rate": 0.0001, "loss": 0.0162, "step": 39520 }, { "epoch": 260.0657894736842, "grad_norm": 1.1759036779403687, "learning_rate": 0.0001, "loss": 0.0135, "step": 39530 }, { "epoch": 260.13157894736844, "grad_norm": 1.0273208618164062, "learning_rate": 0.0001, "loss": 0.0162, "step": 39540 }, { "epoch": 260.19736842105266, "grad_norm": 1.1280847787857056, "learning_rate": 0.0001, "loss": 0.0175, "step": 39550 }, { "epoch": 260.2631578947368, "grad_norm": 1.3776977062225342, "learning_rate": 0.0001, "loss": 0.0153, "step": 39560 }, { "epoch": 260.32894736842104, "grad_norm": 1.1340792179107666, "learning_rate": 0.0001, "loss": 0.0164, "step": 39570 }, { "epoch": 260.39473684210526, "grad_norm": 1.010206699371338, "learning_rate": 0.0001, "loss": 0.0165, "step": 39580 }, { "epoch": 260.4605263157895, "grad_norm": 1.4387060403823853, "learning_rate": 0.0001, "loss": 0.017, "step": 39590 }, { "epoch": 260.5263157894737, "grad_norm": 1.1363633871078491, "learning_rate": 0.0001, "loss": 0.0157, "step": 39600 }, { "epoch": 260.5921052631579, "grad_norm": 1.204533338546753, "learning_rate": 0.0001, "loss": 0.0166, "step": 39610 }, { "epoch": 260.6578947368421, "grad_norm": 1.3235585689544678, "learning_rate": 0.0001, "loss": 0.0146, "step": 39620 }, { "epoch": 260.7236842105263, "grad_norm": 1.2263247966766357, "learning_rate": 0.0001, "loss": 0.0149, "step": 39630 }, { "epoch": 260.7894736842105, "grad_norm": 1.1299937963485718, "learning_rate": 0.0001, "loss": 0.0146, "step": 39640 }, { "epoch": 260.85526315789474, "grad_norm": 1.2160582542419434, "learning_rate": 0.0001, "loss": 0.016, "step": 39650 }, { "epoch": 260.92105263157896, "grad_norm": 1.1547380685806274, "learning_rate": 0.0001, "loss": 0.0145, "step": 39660 }, { "epoch": 260.9868421052632, "grad_norm": 1.1193982362747192, "learning_rate": 0.0001, "loss": 0.0151, "step": 39670 }, { "epoch": 261.05263157894734, "grad_norm": 1.039002776145935, "learning_rate": 0.0001, "loss": 0.0163, "step": 39680 }, { "epoch": 261.11842105263156, "grad_norm": 1.2692370414733887, "learning_rate": 0.0001, "loss": 0.0178, "step": 39690 }, { "epoch": 261.1842105263158, "grad_norm": 1.4166964292526245, "learning_rate": 0.0001, "loss": 0.0146, "step": 39700 }, { "epoch": 261.25, "grad_norm": 1.2513854503631592, "learning_rate": 0.0001, "loss": 0.0153, "step": 39710 }, { "epoch": 261.3157894736842, "grad_norm": 1.340706467628479, "learning_rate": 0.0001, "loss": 0.0142, "step": 39720 }, { "epoch": 261.38157894736844, "grad_norm": 1.2143534421920776, "learning_rate": 0.0001, "loss": 0.015, "step": 39730 }, { "epoch": 261.44736842105266, "grad_norm": 1.6067289113998413, "learning_rate": 0.0001, "loss": 0.0161, "step": 39740 }, { "epoch": 261.5131578947368, "grad_norm": 1.3668274879455566, "learning_rate": 0.0001, "loss": 0.016, "step": 39750 }, { "epoch": 261.57894736842104, "grad_norm": 0.8968744874000549, "learning_rate": 0.0001, "loss": 0.0151, "step": 39760 }, { "epoch": 261.64473684210526, "grad_norm": 1.5481125116348267, "learning_rate": 0.0001, "loss": 0.0171, "step": 39770 }, { "epoch": 261.7105263157895, "grad_norm": 1.5846647024154663, "learning_rate": 0.0001, "loss": 0.0167, "step": 39780 }, { "epoch": 261.7763157894737, "grad_norm": 0.9274570345878601, "learning_rate": 0.0001, "loss": 0.0158, "step": 39790 }, { "epoch": 261.8421052631579, "grad_norm": 1.1757594347000122, "learning_rate": 0.0001, "loss": 0.0181, "step": 39800 }, { "epoch": 261.9078947368421, "grad_norm": 1.5246598720550537, "learning_rate": 0.0001, "loss": 0.0163, "step": 39810 }, { "epoch": 261.9736842105263, "grad_norm": 1.5944026708602905, "learning_rate": 0.0001, "loss": 0.0187, "step": 39820 }, { "epoch": 262.0394736842105, "grad_norm": 1.3532087802886963, "learning_rate": 0.0001, "loss": 0.0177, "step": 39830 }, { "epoch": 262.10526315789474, "grad_norm": 1.1372880935668945, "learning_rate": 0.0001, "loss": 0.0159, "step": 39840 }, { "epoch": 262.17105263157896, "grad_norm": 1.1915911436080933, "learning_rate": 0.0001, "loss": 0.0186, "step": 39850 }, { "epoch": 262.2368421052632, "grad_norm": 1.1271971464157104, "learning_rate": 0.0001, "loss": 0.0178, "step": 39860 }, { "epoch": 262.30263157894734, "grad_norm": 1.3422695398330688, "learning_rate": 0.0001, "loss": 0.0163, "step": 39870 }, { "epoch": 262.36842105263156, "grad_norm": 1.7832303047180176, "learning_rate": 0.0001, "loss": 0.0159, "step": 39880 }, { "epoch": 262.4342105263158, "grad_norm": 1.4639548063278198, "learning_rate": 0.0001, "loss": 0.0169, "step": 39890 }, { "epoch": 262.5, "grad_norm": 1.2754369974136353, "learning_rate": 0.0001, "loss": 0.0172, "step": 39900 }, { "epoch": 262.5657894736842, "grad_norm": 1.5328514575958252, "learning_rate": 0.0001, "loss": 0.0168, "step": 39910 }, { "epoch": 262.63157894736844, "grad_norm": 1.320682168006897, "learning_rate": 0.0001, "loss": 0.0176, "step": 39920 }, { "epoch": 262.69736842105266, "grad_norm": 0.9552030563354492, "learning_rate": 0.0001, "loss": 0.017, "step": 39930 }, { "epoch": 262.7631578947368, "grad_norm": 1.2544821500778198, "learning_rate": 0.0001, "loss": 0.0211, "step": 39940 }, { "epoch": 262.82894736842104, "grad_norm": 1.4101918935775757, "learning_rate": 0.0001, "loss": 0.0171, "step": 39950 }, { "epoch": 262.89473684210526, "grad_norm": 1.5442532300949097, "learning_rate": 0.0001, "loss": 0.0165, "step": 39960 }, { "epoch": 262.9605263157895, "grad_norm": 1.2423205375671387, "learning_rate": 0.0001, "loss": 0.0173, "step": 39970 }, { "epoch": 263.0263157894737, "grad_norm": 1.0011227130889893, "learning_rate": 0.0001, "loss": 0.017, "step": 39980 }, { "epoch": 263.0921052631579, "grad_norm": 1.0351630449295044, "learning_rate": 0.0001, "loss": 0.0155, "step": 39990 }, { "epoch": 263.1578947368421, "grad_norm": 1.29893159866333, "learning_rate": 0.0001, "loss": 0.0157, "step": 40000 }, { "epoch": 263.2236842105263, "grad_norm": 1.5265520811080933, "learning_rate": 0.0001, "loss": 0.0148, "step": 40010 }, { "epoch": 263.2894736842105, "grad_norm": 1.2051048278808594, "learning_rate": 0.0001, "loss": 0.0167, "step": 40020 }, { "epoch": 263.35526315789474, "grad_norm": 1.576757788658142, "learning_rate": 0.0001, "loss": 0.0155, "step": 40030 }, { "epoch": 263.42105263157896, "grad_norm": 1.4314970970153809, "learning_rate": 0.0001, "loss": 0.0135, "step": 40040 }, { "epoch": 263.4868421052632, "grad_norm": 1.0734888315200806, "learning_rate": 0.0001, "loss": 0.0179, "step": 40050 }, { "epoch": 263.55263157894734, "grad_norm": 1.0169308185577393, "learning_rate": 0.0001, "loss": 0.0157, "step": 40060 }, { "epoch": 263.61842105263156, "grad_norm": 1.0590006113052368, "learning_rate": 0.0001, "loss": 0.0162, "step": 40070 }, { "epoch": 263.6842105263158, "grad_norm": 1.1199071407318115, "learning_rate": 0.0001, "loss": 0.0161, "step": 40080 }, { "epoch": 263.75, "grad_norm": 0.9885268211364746, "learning_rate": 0.0001, "loss": 0.0151, "step": 40090 }, { "epoch": 263.8157894736842, "grad_norm": 0.7480038404464722, "learning_rate": 0.0001, "loss": 0.0148, "step": 40100 }, { "epoch": 263.88157894736844, "grad_norm": 0.7771443128585815, "learning_rate": 0.0001, "loss": 0.0149, "step": 40110 }, { "epoch": 263.94736842105266, "grad_norm": 1.1133038997650146, "learning_rate": 0.0001, "loss": 0.0159, "step": 40120 }, { "epoch": 264.0131578947368, "grad_norm": 0.8933395147323608, "learning_rate": 0.0001, "loss": 0.015, "step": 40130 }, { "epoch": 264.07894736842104, "grad_norm": 1.2622970342636108, "learning_rate": 0.0001, "loss": 0.0167, "step": 40140 }, { "epoch": 264.14473684210526, "grad_norm": 1.2373851537704468, "learning_rate": 0.0001, "loss": 0.0171, "step": 40150 }, { "epoch": 264.2105263157895, "grad_norm": 1.3317362070083618, "learning_rate": 0.0001, "loss": 0.0127, "step": 40160 }, { "epoch": 264.2763157894737, "grad_norm": 1.274640440940857, "learning_rate": 0.0001, "loss": 0.0151, "step": 40170 }, { "epoch": 264.3421052631579, "grad_norm": 0.8638913035392761, "learning_rate": 0.0001, "loss": 0.015, "step": 40180 }, { "epoch": 264.4078947368421, "grad_norm": 1.1487349271774292, "learning_rate": 0.0001, "loss": 0.0167, "step": 40190 }, { "epoch": 264.4736842105263, "grad_norm": 1.4805721044540405, "learning_rate": 0.0001, "loss": 0.0135, "step": 40200 }, { "epoch": 264.5394736842105, "grad_norm": 1.180604338645935, "learning_rate": 0.0001, "loss": 0.0141, "step": 40210 }, { "epoch": 264.60526315789474, "grad_norm": 1.2854849100112915, "learning_rate": 0.0001, "loss": 0.015, "step": 40220 }, { "epoch": 264.67105263157896, "grad_norm": 1.2706739902496338, "learning_rate": 0.0001, "loss": 0.0157, "step": 40230 }, { "epoch": 264.7368421052632, "grad_norm": 1.0163190364837646, "learning_rate": 0.0001, "loss": 0.0147, "step": 40240 }, { "epoch": 264.80263157894734, "grad_norm": 1.3690663576126099, "learning_rate": 0.0001, "loss": 0.017, "step": 40250 }, { "epoch": 264.86842105263156, "grad_norm": 0.8744670748710632, "learning_rate": 0.0001, "loss": 0.0143, "step": 40260 }, { "epoch": 264.9342105263158, "grad_norm": 1.1227748394012451, "learning_rate": 0.0001, "loss": 0.0146, "step": 40270 }, { "epoch": 265.0, "grad_norm": 1.1143121719360352, "learning_rate": 0.0001, "loss": 0.0156, "step": 40280 }, { "epoch": 265.0657894736842, "grad_norm": 1.5000947713851929, "learning_rate": 0.0001, "loss": 0.0156, "step": 40290 }, { "epoch": 265.13157894736844, "grad_norm": 1.3426282405853271, "learning_rate": 0.0001, "loss": 0.0151, "step": 40300 }, { "epoch": 265.19736842105266, "grad_norm": 0.9148724675178528, "learning_rate": 0.0001, "loss": 0.0151, "step": 40310 }, { "epoch": 265.2631578947368, "grad_norm": 0.9080812335014343, "learning_rate": 0.0001, "loss": 0.0142, "step": 40320 }, { "epoch": 265.32894736842104, "grad_norm": 0.8644049167633057, "learning_rate": 0.0001, "loss": 0.0135, "step": 40330 }, { "epoch": 265.39473684210526, "grad_norm": 0.9863361120223999, "learning_rate": 0.0001, "loss": 0.0147, "step": 40340 }, { "epoch": 265.4605263157895, "grad_norm": 1.1581717729568481, "learning_rate": 0.0001, "loss": 0.0145, "step": 40350 }, { "epoch": 265.5263157894737, "grad_norm": 0.8015313148498535, "learning_rate": 0.0001, "loss": 0.0151, "step": 40360 }, { "epoch": 265.5921052631579, "grad_norm": 0.9356069564819336, "learning_rate": 0.0001, "loss": 0.0147, "step": 40370 }, { "epoch": 265.6578947368421, "grad_norm": 1.1475307941436768, "learning_rate": 0.0001, "loss": 0.0135, "step": 40380 }, { "epoch": 265.7236842105263, "grad_norm": 1.0415571928024292, "learning_rate": 0.0001, "loss": 0.0149, "step": 40390 }, { "epoch": 265.7894736842105, "grad_norm": 1.4499433040618896, "learning_rate": 0.0001, "loss": 0.0149, "step": 40400 }, { "epoch": 265.85526315789474, "grad_norm": 1.0016824007034302, "learning_rate": 0.0001, "loss": 0.0143, "step": 40410 }, { "epoch": 265.92105263157896, "grad_norm": 1.426163911819458, "learning_rate": 0.0001, "loss": 0.0169, "step": 40420 }, { "epoch": 265.9868421052632, "grad_norm": 1.2102915048599243, "learning_rate": 0.0001, "loss": 0.0167, "step": 40430 }, { "epoch": 266.05263157894734, "grad_norm": 1.2748351097106934, "learning_rate": 0.0001, "loss": 0.0162, "step": 40440 }, { "epoch": 266.11842105263156, "grad_norm": 1.0148237943649292, "learning_rate": 0.0001, "loss": 0.0155, "step": 40450 }, { "epoch": 266.1842105263158, "grad_norm": 1.6188982725143433, "learning_rate": 0.0001, "loss": 0.0145, "step": 40460 }, { "epoch": 266.25, "grad_norm": 1.2381527423858643, "learning_rate": 0.0001, "loss": 0.0144, "step": 40470 }, { "epoch": 266.3157894736842, "grad_norm": 1.1073635816574097, "learning_rate": 0.0001, "loss": 0.0135, "step": 40480 }, { "epoch": 266.38157894736844, "grad_norm": 1.1717376708984375, "learning_rate": 0.0001, "loss": 0.0143, "step": 40490 }, { "epoch": 266.44736842105266, "grad_norm": 1.3967905044555664, "learning_rate": 0.0001, "loss": 0.0139, "step": 40500 }, { "epoch": 266.5131578947368, "grad_norm": 0.78745037317276, "learning_rate": 0.0001, "loss": 0.0133, "step": 40510 }, { "epoch": 266.57894736842104, "grad_norm": 1.2975760698318481, "learning_rate": 0.0001, "loss": 0.0146, "step": 40520 }, { "epoch": 266.64473684210526, "grad_norm": 1.0333595275878906, "learning_rate": 0.0001, "loss": 0.0154, "step": 40530 }, { "epoch": 266.7105263157895, "grad_norm": 1.132156252861023, "learning_rate": 0.0001, "loss": 0.014, "step": 40540 }, { "epoch": 266.7763157894737, "grad_norm": 1.1081570386886597, "learning_rate": 0.0001, "loss": 0.0143, "step": 40550 }, { "epoch": 266.8421052631579, "grad_norm": 1.6667684316635132, "learning_rate": 0.0001, "loss": 0.015, "step": 40560 }, { "epoch": 266.9078947368421, "grad_norm": 1.6296488046646118, "learning_rate": 0.0001, "loss": 0.015, "step": 40570 }, { "epoch": 266.9736842105263, "grad_norm": 1.4970145225524902, "learning_rate": 0.0001, "loss": 0.0139, "step": 40580 }, { "epoch": 267.0394736842105, "grad_norm": 1.2086564302444458, "learning_rate": 0.0001, "loss": 0.0149, "step": 40590 }, { "epoch": 267.10526315789474, "grad_norm": 1.0567036867141724, "learning_rate": 0.0001, "loss": 0.0132, "step": 40600 }, { "epoch": 267.17105263157896, "grad_norm": 1.2320477962493896, "learning_rate": 0.0001, "loss": 0.0133, "step": 40610 }, { "epoch": 267.2368421052632, "grad_norm": 1.1581894159317017, "learning_rate": 0.0001, "loss": 0.0143, "step": 40620 }, { "epoch": 267.30263157894734, "grad_norm": 1.2620066404342651, "learning_rate": 0.0001, "loss": 0.0143, "step": 40630 }, { "epoch": 267.36842105263156, "grad_norm": 1.0989741086959839, "learning_rate": 0.0001, "loss": 0.0146, "step": 40640 }, { "epoch": 267.4342105263158, "grad_norm": 1.0343804359436035, "learning_rate": 0.0001, "loss": 0.0135, "step": 40650 }, { "epoch": 267.5, "grad_norm": 1.1863844394683838, "learning_rate": 0.0001, "loss": 0.0153, "step": 40660 }, { "epoch": 267.5657894736842, "grad_norm": 1.5389338731765747, "learning_rate": 0.0001, "loss": 0.0135, "step": 40670 }, { "epoch": 267.63157894736844, "grad_norm": 0.8726987242698669, "learning_rate": 0.0001, "loss": 0.0143, "step": 40680 }, { "epoch": 267.69736842105266, "grad_norm": 0.9518386721611023, "learning_rate": 0.0001, "loss": 0.0139, "step": 40690 }, { "epoch": 267.7631578947368, "grad_norm": 0.9239757657051086, "learning_rate": 0.0001, "loss": 0.0131, "step": 40700 }, { "epoch": 267.82894736842104, "grad_norm": 0.800014078617096, "learning_rate": 0.0001, "loss": 0.0151, "step": 40710 }, { "epoch": 267.89473684210526, "grad_norm": 1.1381514072418213, "learning_rate": 0.0001, "loss": 0.0137, "step": 40720 }, { "epoch": 267.9605263157895, "grad_norm": 1.2737356424331665, "learning_rate": 0.0001, "loss": 0.0185, "step": 40730 }, { "epoch": 268.0263157894737, "grad_norm": 1.2936004400253296, "learning_rate": 0.0001, "loss": 0.0149, "step": 40740 }, { "epoch": 268.0921052631579, "grad_norm": 1.2660059928894043, "learning_rate": 0.0001, "loss": 0.015, "step": 40750 }, { "epoch": 268.1578947368421, "grad_norm": 1.2411798238754272, "learning_rate": 0.0001, "loss": 0.0145, "step": 40760 }, { "epoch": 268.2236842105263, "grad_norm": 0.9876865744590759, "learning_rate": 0.0001, "loss": 0.0147, "step": 40770 }, { "epoch": 268.2894736842105, "grad_norm": 1.146823525428772, "learning_rate": 0.0001, "loss": 0.015, "step": 40780 }, { "epoch": 268.35526315789474, "grad_norm": 1.0858898162841797, "learning_rate": 0.0001, "loss": 0.0152, "step": 40790 }, { "epoch": 268.42105263157896, "grad_norm": 1.208295464515686, "learning_rate": 0.0001, "loss": 0.0144, "step": 40800 }, { "epoch": 268.4868421052632, "grad_norm": 1.1481982469558716, "learning_rate": 0.0001, "loss": 0.016, "step": 40810 }, { "epoch": 268.55263157894734, "grad_norm": 1.2117033004760742, "learning_rate": 0.0001, "loss": 0.0136, "step": 40820 }, { "epoch": 268.61842105263156, "grad_norm": 1.550671100616455, "learning_rate": 0.0001, "loss": 0.013, "step": 40830 }, { "epoch": 268.6842105263158, "grad_norm": 1.131887435913086, "learning_rate": 0.0001, "loss": 0.0154, "step": 40840 }, { "epoch": 268.75, "grad_norm": 1.248721957206726, "learning_rate": 0.0001, "loss": 0.0153, "step": 40850 }, { "epoch": 268.8157894736842, "grad_norm": 1.0527727603912354, "learning_rate": 0.0001, "loss": 0.0149, "step": 40860 }, { "epoch": 268.88157894736844, "grad_norm": 1.243199348449707, "learning_rate": 0.0001, "loss": 0.0138, "step": 40870 }, { "epoch": 268.94736842105266, "grad_norm": 1.0433778762817383, "learning_rate": 0.0001, "loss": 0.0128, "step": 40880 }, { "epoch": 269.0131578947368, "grad_norm": 1.5813816785812378, "learning_rate": 0.0001, "loss": 0.0144, "step": 40890 }, { "epoch": 269.07894736842104, "grad_norm": 1.3357555866241455, "learning_rate": 0.0001, "loss": 0.0162, "step": 40900 }, { "epoch": 269.14473684210526, "grad_norm": 1.4037662744522095, "learning_rate": 0.0001, "loss": 0.0139, "step": 40910 }, { "epoch": 269.2105263157895, "grad_norm": 1.459270715713501, "learning_rate": 0.0001, "loss": 0.014, "step": 40920 }, { "epoch": 269.2763157894737, "grad_norm": 1.2540459632873535, "learning_rate": 0.0001, "loss": 0.0153, "step": 40930 }, { "epoch": 269.3421052631579, "grad_norm": 0.7612592577934265, "learning_rate": 0.0001, "loss": 0.0129, "step": 40940 }, { "epoch": 269.4078947368421, "grad_norm": 1.0705971717834473, "learning_rate": 0.0001, "loss": 0.0142, "step": 40950 }, { "epoch": 269.4736842105263, "grad_norm": 1.2538421154022217, "learning_rate": 0.0001, "loss": 0.0138, "step": 40960 }, { "epoch": 269.5394736842105, "grad_norm": 1.143507719039917, "learning_rate": 0.0001, "loss": 0.0149, "step": 40970 }, { "epoch": 269.60526315789474, "grad_norm": 1.3670626878738403, "learning_rate": 0.0001, "loss": 0.0144, "step": 40980 }, { "epoch": 269.67105263157896, "grad_norm": 1.126387596130371, "learning_rate": 0.0001, "loss": 0.0143, "step": 40990 }, { "epoch": 269.7368421052632, "grad_norm": 1.3656985759735107, "learning_rate": 0.0001, "loss": 0.0153, "step": 41000 }, { "epoch": 269.80263157894734, "grad_norm": 1.4171274900436401, "learning_rate": 0.0001, "loss": 0.0133, "step": 41010 }, { "epoch": 269.86842105263156, "grad_norm": 1.4547640085220337, "learning_rate": 0.0001, "loss": 0.0143, "step": 41020 }, { "epoch": 269.9342105263158, "grad_norm": 1.1633445024490356, "learning_rate": 0.0001, "loss": 0.0142, "step": 41030 }, { "epoch": 270.0, "grad_norm": 1.2674797773361206, "learning_rate": 0.0001, "loss": 0.0137, "step": 41040 }, { "epoch": 270.0657894736842, "grad_norm": 1.2385079860687256, "learning_rate": 0.0001, "loss": 0.0152, "step": 41050 }, { "epoch": 270.13157894736844, "grad_norm": 1.0688042640686035, "learning_rate": 0.0001, "loss": 0.0148, "step": 41060 }, { "epoch": 270.19736842105266, "grad_norm": 1.8457244634628296, "learning_rate": 0.0001, "loss": 0.0163, "step": 41070 }, { "epoch": 270.2631578947368, "grad_norm": 1.4692785739898682, "learning_rate": 0.0001, "loss": 0.0133, "step": 41080 }, { "epoch": 270.32894736842104, "grad_norm": 1.0676112174987793, "learning_rate": 0.0001, "loss": 0.0134, "step": 41090 }, { "epoch": 270.39473684210526, "grad_norm": 1.3465666770935059, "learning_rate": 0.0001, "loss": 0.0144, "step": 41100 }, { "epoch": 270.4605263157895, "grad_norm": 1.012242317199707, "learning_rate": 0.0001, "loss": 0.0151, "step": 41110 }, { "epoch": 270.5263157894737, "grad_norm": 1.1431010961532593, "learning_rate": 0.0001, "loss": 0.0128, "step": 41120 }, { "epoch": 270.5921052631579, "grad_norm": 1.2994465827941895, "learning_rate": 0.0001, "loss": 0.0144, "step": 41130 }, { "epoch": 270.6578947368421, "grad_norm": 1.2632924318313599, "learning_rate": 0.0001, "loss": 0.0132, "step": 41140 }, { "epoch": 270.7236842105263, "grad_norm": 1.0502218008041382, "learning_rate": 0.0001, "loss": 0.0132, "step": 41150 }, { "epoch": 270.7894736842105, "grad_norm": 1.1967225074768066, "learning_rate": 0.0001, "loss": 0.0132, "step": 41160 }, { "epoch": 270.85526315789474, "grad_norm": 1.095524787902832, "learning_rate": 0.0001, "loss": 0.0133, "step": 41170 }, { "epoch": 270.92105263157896, "grad_norm": 1.2695966958999634, "learning_rate": 0.0001, "loss": 0.0144, "step": 41180 }, { "epoch": 270.9868421052632, "grad_norm": 1.425173282623291, "learning_rate": 0.0001, "loss": 0.0127, "step": 41190 }, { "epoch": 271.05263157894734, "grad_norm": 0.9129456877708435, "learning_rate": 0.0001, "loss": 0.0131, "step": 41200 }, { "epoch": 271.11842105263156, "grad_norm": 0.9223254919052124, "learning_rate": 0.0001, "loss": 0.0157, "step": 41210 }, { "epoch": 271.1842105263158, "grad_norm": 1.0759285688400269, "learning_rate": 0.0001, "loss": 0.0143, "step": 41220 }, { "epoch": 271.25, "grad_norm": 1.4797776937484741, "learning_rate": 0.0001, "loss": 0.0154, "step": 41230 }, { "epoch": 271.3157894736842, "grad_norm": 1.368739128112793, "learning_rate": 0.0001, "loss": 0.015, "step": 41240 }, { "epoch": 271.38157894736844, "grad_norm": 1.1188558340072632, "learning_rate": 0.0001, "loss": 0.015, "step": 41250 }, { "epoch": 271.44736842105266, "grad_norm": 1.3473429679870605, "learning_rate": 0.0001, "loss": 0.0157, "step": 41260 }, { "epoch": 271.5131578947368, "grad_norm": 1.3430105447769165, "learning_rate": 0.0001, "loss": 0.0136, "step": 41270 }, { "epoch": 271.57894736842104, "grad_norm": 1.654737949371338, "learning_rate": 0.0001, "loss": 0.0149, "step": 41280 }, { "epoch": 271.64473684210526, "grad_norm": 1.832235336303711, "learning_rate": 0.0001, "loss": 0.0129, "step": 41290 }, { "epoch": 271.7105263157895, "grad_norm": 1.3615492582321167, "learning_rate": 0.0001, "loss": 0.0133, "step": 41300 }, { "epoch": 271.7763157894737, "grad_norm": 1.5935784578323364, "learning_rate": 0.0001, "loss": 0.0171, "step": 41310 }, { "epoch": 271.8421052631579, "grad_norm": 1.6195662021636963, "learning_rate": 0.0001, "loss": 0.0145, "step": 41320 }, { "epoch": 271.9078947368421, "grad_norm": 1.3322649002075195, "learning_rate": 0.0001, "loss": 0.0127, "step": 41330 }, { "epoch": 271.9736842105263, "grad_norm": 1.2479629516601562, "learning_rate": 0.0001, "loss": 0.0132, "step": 41340 }, { "epoch": 272.0394736842105, "grad_norm": 1.4976459741592407, "learning_rate": 0.0001, "loss": 0.0138, "step": 41350 }, { "epoch": 272.10526315789474, "grad_norm": 1.2839045524597168, "learning_rate": 0.0001, "loss": 0.0149, "step": 41360 }, { "epoch": 272.17105263157896, "grad_norm": 1.2758049964904785, "learning_rate": 0.0001, "loss": 0.0157, "step": 41370 }, { "epoch": 272.2368421052632, "grad_norm": 1.352516531944275, "learning_rate": 0.0001, "loss": 0.0146, "step": 41380 }, { "epoch": 272.30263157894734, "grad_norm": 1.0206512212753296, "learning_rate": 0.0001, "loss": 0.0124, "step": 41390 }, { "epoch": 272.36842105263156, "grad_norm": 1.2636960744857788, "learning_rate": 0.0001, "loss": 0.0165, "step": 41400 }, { "epoch": 272.4342105263158, "grad_norm": 1.058984398841858, "learning_rate": 0.0001, "loss": 0.014, "step": 41410 }, { "epoch": 272.5, "grad_norm": 1.2583855390548706, "learning_rate": 0.0001, "loss": 0.0141, "step": 41420 }, { "epoch": 272.5657894736842, "grad_norm": 1.1971688270568848, "learning_rate": 0.0001, "loss": 0.0138, "step": 41430 }, { "epoch": 272.63157894736844, "grad_norm": 1.403083324432373, "learning_rate": 0.0001, "loss": 0.0143, "step": 41440 }, { "epoch": 272.69736842105266, "grad_norm": 1.0412906408309937, "learning_rate": 0.0001, "loss": 0.0125, "step": 41450 }, { "epoch": 272.7631578947368, "grad_norm": 1.2968218326568604, "learning_rate": 0.0001, "loss": 0.0142, "step": 41460 }, { "epoch": 272.82894736842104, "grad_norm": 1.4638139009475708, "learning_rate": 0.0001, "loss": 0.0147, "step": 41470 }, { "epoch": 272.89473684210526, "grad_norm": 1.3034776449203491, "learning_rate": 0.0001, "loss": 0.0162, "step": 41480 }, { "epoch": 272.9605263157895, "grad_norm": 1.0589145421981812, "learning_rate": 0.0001, "loss": 0.0142, "step": 41490 }, { "epoch": 273.0263157894737, "grad_norm": 0.905729353427887, "learning_rate": 0.0001, "loss": 0.0168, "step": 41500 }, { "epoch": 273.0921052631579, "grad_norm": 1.093269944190979, "learning_rate": 0.0001, "loss": 0.0158, "step": 41510 }, { "epoch": 273.1578947368421, "grad_norm": 1.5292916297912598, "learning_rate": 0.0001, "loss": 0.0138, "step": 41520 }, { "epoch": 273.2236842105263, "grad_norm": 1.5569329261779785, "learning_rate": 0.0001, "loss": 0.0146, "step": 41530 }, { "epoch": 273.2894736842105, "grad_norm": 1.620352029800415, "learning_rate": 0.0001, "loss": 0.0134, "step": 41540 }, { "epoch": 273.35526315789474, "grad_norm": 1.3029897212982178, "learning_rate": 0.0001, "loss": 0.0148, "step": 41550 }, { "epoch": 273.42105263157896, "grad_norm": 1.1392931938171387, "learning_rate": 0.0001, "loss": 0.0132, "step": 41560 }, { "epoch": 273.4868421052632, "grad_norm": 1.3479526042938232, "learning_rate": 0.0001, "loss": 0.0129, "step": 41570 }, { "epoch": 273.55263157894734, "grad_norm": 1.3470922708511353, "learning_rate": 0.0001, "loss": 0.0135, "step": 41580 }, { "epoch": 273.61842105263156, "grad_norm": 1.2211447954177856, "learning_rate": 0.0001, "loss": 0.0147, "step": 41590 }, { "epoch": 273.6842105263158, "grad_norm": 1.1938384771347046, "learning_rate": 0.0001, "loss": 0.0139, "step": 41600 }, { "epoch": 273.75, "grad_norm": 1.1164087057113647, "learning_rate": 0.0001, "loss": 0.0135, "step": 41610 }, { "epoch": 273.8157894736842, "grad_norm": 1.1596921682357788, "learning_rate": 0.0001, "loss": 0.0138, "step": 41620 }, { "epoch": 273.88157894736844, "grad_norm": 0.9100044369697571, "learning_rate": 0.0001, "loss": 0.0136, "step": 41630 }, { "epoch": 273.94736842105266, "grad_norm": 0.9539213180541992, "learning_rate": 0.0001, "loss": 0.014, "step": 41640 }, { "epoch": 274.0131578947368, "grad_norm": 0.9775730967521667, "learning_rate": 0.0001, "loss": 0.0152, "step": 41650 }, { "epoch": 274.07894736842104, "grad_norm": 1.1786597967147827, "learning_rate": 0.0001, "loss": 0.0134, "step": 41660 }, { "epoch": 274.14473684210526, "grad_norm": 0.8824886679649353, "learning_rate": 0.0001, "loss": 0.0146, "step": 41670 }, { "epoch": 274.2105263157895, "grad_norm": 1.173654556274414, "learning_rate": 0.0001, "loss": 0.0145, "step": 41680 }, { "epoch": 274.2763157894737, "grad_norm": 1.509282112121582, "learning_rate": 0.0001, "loss": 0.0152, "step": 41690 }, { "epoch": 274.3421052631579, "grad_norm": 1.3084309101104736, "learning_rate": 0.0001, "loss": 0.0137, "step": 41700 }, { "epoch": 274.4078947368421, "grad_norm": 1.2768287658691406, "learning_rate": 0.0001, "loss": 0.0135, "step": 41710 }, { "epoch": 274.4736842105263, "grad_norm": 1.142025351524353, "learning_rate": 0.0001, "loss": 0.0133, "step": 41720 }, { "epoch": 274.5394736842105, "grad_norm": 1.13783597946167, "learning_rate": 0.0001, "loss": 0.0151, "step": 41730 }, { "epoch": 274.60526315789474, "grad_norm": 0.8221064805984497, "learning_rate": 0.0001, "loss": 0.0152, "step": 41740 }, { "epoch": 274.67105263157896, "grad_norm": 1.6410062313079834, "learning_rate": 0.0001, "loss": 0.0148, "step": 41750 }, { "epoch": 274.7368421052632, "grad_norm": 1.4255626201629639, "learning_rate": 0.0001, "loss": 0.0131, "step": 41760 }, { "epoch": 274.80263157894734, "grad_norm": 1.1363221406936646, "learning_rate": 0.0001, "loss": 0.0135, "step": 41770 }, { "epoch": 274.86842105263156, "grad_norm": 1.3313068151474, "learning_rate": 0.0001, "loss": 0.0131, "step": 41780 }, { "epoch": 274.9342105263158, "grad_norm": 1.4202282428741455, "learning_rate": 0.0001, "loss": 0.0164, "step": 41790 }, { "epoch": 275.0, "grad_norm": 1.3749666213989258, "learning_rate": 0.0001, "loss": 0.0133, "step": 41800 }, { "epoch": 275.0657894736842, "grad_norm": 1.2333338260650635, "learning_rate": 0.0001, "loss": 0.0134, "step": 41810 }, { "epoch": 275.13157894736844, "grad_norm": 1.221083164215088, "learning_rate": 0.0001, "loss": 0.0163, "step": 41820 }, { "epoch": 275.19736842105266, "grad_norm": 0.8739851713180542, "learning_rate": 0.0001, "loss": 0.0132, "step": 41830 }, { "epoch": 275.2631578947368, "grad_norm": 1.3110480308532715, "learning_rate": 0.0001, "loss": 0.0136, "step": 41840 }, { "epoch": 275.32894736842104, "grad_norm": 0.8947159647941589, "learning_rate": 0.0001, "loss": 0.014, "step": 41850 }, { "epoch": 275.39473684210526, "grad_norm": 1.2646197080612183, "learning_rate": 0.0001, "loss": 0.0147, "step": 41860 }, { "epoch": 275.4605263157895, "grad_norm": 1.0885008573532104, "learning_rate": 0.0001, "loss": 0.0135, "step": 41870 }, { "epoch": 275.5263157894737, "grad_norm": 1.4110907316207886, "learning_rate": 0.0001, "loss": 0.015, "step": 41880 }, { "epoch": 275.5921052631579, "grad_norm": 1.312502145767212, "learning_rate": 0.0001, "loss": 0.0139, "step": 41890 }, { "epoch": 275.6578947368421, "grad_norm": 1.3466676473617554, "learning_rate": 0.0001, "loss": 0.0136, "step": 41900 }, { "epoch": 275.7236842105263, "grad_norm": 1.6430773735046387, "learning_rate": 0.0001, "loss": 0.0147, "step": 41910 }, { "epoch": 275.7894736842105, "grad_norm": 1.3257335424423218, "learning_rate": 0.0001, "loss": 0.0145, "step": 41920 }, { "epoch": 275.85526315789474, "grad_norm": 1.3992869853973389, "learning_rate": 0.0001, "loss": 0.0149, "step": 41930 }, { "epoch": 275.92105263157896, "grad_norm": 1.3640083074569702, "learning_rate": 0.0001, "loss": 0.0155, "step": 41940 }, { "epoch": 275.9868421052632, "grad_norm": 1.2238960266113281, "learning_rate": 0.0001, "loss": 0.0138, "step": 41950 }, { "epoch": 276.05263157894734, "grad_norm": 0.9982905983924866, "learning_rate": 0.0001, "loss": 0.0127, "step": 41960 }, { "epoch": 276.11842105263156, "grad_norm": 1.0933977365493774, "learning_rate": 0.0001, "loss": 0.017, "step": 41970 }, { "epoch": 276.1842105263158, "grad_norm": 1.1937322616577148, "learning_rate": 0.0001, "loss": 0.0135, "step": 41980 }, { "epoch": 276.25, "grad_norm": 1.1956714391708374, "learning_rate": 0.0001, "loss": 0.0141, "step": 41990 }, { "epoch": 276.3157894736842, "grad_norm": 1.256412386894226, "learning_rate": 0.0001, "loss": 0.0143, "step": 42000 }, { "epoch": 276.38157894736844, "grad_norm": 1.035623550415039, "learning_rate": 0.0001, "loss": 0.014, "step": 42010 }, { "epoch": 276.44736842105266, "grad_norm": 1.2642019987106323, "learning_rate": 0.0001, "loss": 0.0159, "step": 42020 }, { "epoch": 276.5131578947368, "grad_norm": 1.7125762701034546, "learning_rate": 0.0001, "loss": 0.0139, "step": 42030 }, { "epoch": 276.57894736842104, "grad_norm": 1.3358557224273682, "learning_rate": 0.0001, "loss": 0.0164, "step": 42040 }, { "epoch": 276.64473684210526, "grad_norm": 1.3947197198867798, "learning_rate": 0.0001, "loss": 0.0144, "step": 42050 }, { "epoch": 276.7105263157895, "grad_norm": 1.3491345643997192, "learning_rate": 0.0001, "loss": 0.0137, "step": 42060 }, { "epoch": 276.7763157894737, "grad_norm": 1.3073524236679077, "learning_rate": 0.0001, "loss": 0.015, "step": 42070 }, { "epoch": 276.8421052631579, "grad_norm": 1.403847098350525, "learning_rate": 0.0001, "loss": 0.014, "step": 42080 }, { "epoch": 276.9078947368421, "grad_norm": 1.1999281644821167, "learning_rate": 0.0001, "loss": 0.014, "step": 42090 }, { "epoch": 276.9736842105263, "grad_norm": 1.2854633331298828, "learning_rate": 0.0001, "loss": 0.0128, "step": 42100 }, { "epoch": 277.0394736842105, "grad_norm": 1.1524097919464111, "learning_rate": 0.0001, "loss": 0.013, "step": 42110 }, { "epoch": 277.10526315789474, "grad_norm": 1.1198937892913818, "learning_rate": 0.0001, "loss": 0.0163, "step": 42120 }, { "epoch": 277.17105263157896, "grad_norm": 0.9820287823677063, "learning_rate": 0.0001, "loss": 0.0142, "step": 42130 }, { "epoch": 277.2368421052632, "grad_norm": 0.7458430528640747, "learning_rate": 0.0001, "loss": 0.0139, "step": 42140 }, { "epoch": 277.30263157894734, "grad_norm": 1.2689908742904663, "learning_rate": 0.0001, "loss": 0.0137, "step": 42150 }, { "epoch": 277.36842105263156, "grad_norm": 1.062583088874817, "learning_rate": 0.0001, "loss": 0.015, "step": 42160 }, { "epoch": 277.4342105263158, "grad_norm": 0.82270747423172, "learning_rate": 0.0001, "loss": 0.0154, "step": 42170 }, { "epoch": 277.5, "grad_norm": 0.8221926093101501, "learning_rate": 0.0001, "loss": 0.0165, "step": 42180 }, { "epoch": 277.5657894736842, "grad_norm": 0.8966209888458252, "learning_rate": 0.0001, "loss": 0.0149, "step": 42190 }, { "epoch": 277.63157894736844, "grad_norm": 1.6077913045883179, "learning_rate": 0.0001, "loss": 0.0159, "step": 42200 }, { "epoch": 277.69736842105266, "grad_norm": 0.9948518872261047, "learning_rate": 0.0001, "loss": 0.0148, "step": 42210 }, { "epoch": 277.7631578947368, "grad_norm": 1.1815922260284424, "learning_rate": 0.0001, "loss": 0.0136, "step": 42220 }, { "epoch": 277.82894736842104, "grad_norm": 1.0743731260299683, "learning_rate": 0.0001, "loss": 0.013, "step": 42230 }, { "epoch": 277.89473684210526, "grad_norm": 0.9307351112365723, "learning_rate": 0.0001, "loss": 0.0149, "step": 42240 }, { "epoch": 277.9605263157895, "grad_norm": 1.2313666343688965, "learning_rate": 0.0001, "loss": 0.0151, "step": 42250 }, { "epoch": 278.0263157894737, "grad_norm": 1.1681599617004395, "learning_rate": 0.0001, "loss": 0.0139, "step": 42260 }, { "epoch": 278.0921052631579, "grad_norm": 1.4541995525360107, "learning_rate": 0.0001, "loss": 0.013, "step": 42270 }, { "epoch": 278.1578947368421, "grad_norm": 1.107507586479187, "learning_rate": 0.0001, "loss": 0.0131, "step": 42280 }, { "epoch": 278.2236842105263, "grad_norm": 0.9326557517051697, "learning_rate": 0.0001, "loss": 0.0136, "step": 42290 }, { "epoch": 278.2894736842105, "grad_norm": 1.512211799621582, "learning_rate": 0.0001, "loss": 0.0137, "step": 42300 }, { "epoch": 278.35526315789474, "grad_norm": 1.3133347034454346, "learning_rate": 0.0001, "loss": 0.0156, "step": 42310 }, { "epoch": 278.42105263157896, "grad_norm": 1.0264031887054443, "learning_rate": 0.0001, "loss": 0.0154, "step": 42320 }, { "epoch": 278.4868421052632, "grad_norm": 1.1678309440612793, "learning_rate": 0.0001, "loss": 0.0161, "step": 42330 }, { "epoch": 278.55263157894734, "grad_norm": 0.7482702732086182, "learning_rate": 0.0001, "loss": 0.0139, "step": 42340 }, { "epoch": 278.61842105263156, "grad_norm": 1.0552853345870972, "learning_rate": 0.0001, "loss": 0.0151, "step": 42350 }, { "epoch": 278.6842105263158, "grad_norm": 1.301750659942627, "learning_rate": 0.0001, "loss": 0.0144, "step": 42360 }, { "epoch": 278.75, "grad_norm": 0.8871774673461914, "learning_rate": 0.0001, "loss": 0.0139, "step": 42370 }, { "epoch": 278.8157894736842, "grad_norm": 0.9362806677818298, "learning_rate": 0.0001, "loss": 0.0133, "step": 42380 }, { "epoch": 278.88157894736844, "grad_norm": 0.6715094447135925, "learning_rate": 0.0001, "loss": 0.0152, "step": 42390 }, { "epoch": 278.94736842105266, "grad_norm": 1.3032416105270386, "learning_rate": 0.0001, "loss": 0.0145, "step": 42400 }, { "epoch": 279.0131578947368, "grad_norm": 1.2517324686050415, "learning_rate": 0.0001, "loss": 0.0164, "step": 42410 }, { "epoch": 279.07894736842104, "grad_norm": 1.0311671495437622, "learning_rate": 0.0001, "loss": 0.0148, "step": 42420 }, { "epoch": 279.14473684210526, "grad_norm": 0.9779248237609863, "learning_rate": 0.0001, "loss": 0.0151, "step": 42430 }, { "epoch": 279.2105263157895, "grad_norm": 1.322513461112976, "learning_rate": 0.0001, "loss": 0.0132, "step": 42440 }, { "epoch": 279.2763157894737, "grad_norm": 1.117714285850525, "learning_rate": 0.0001, "loss": 0.0164, "step": 42450 }, { "epoch": 279.3421052631579, "grad_norm": 1.0339843034744263, "learning_rate": 0.0001, "loss": 0.0133, "step": 42460 }, { "epoch": 279.4078947368421, "grad_norm": 1.3660122156143188, "learning_rate": 0.0001, "loss": 0.0157, "step": 42470 }, { "epoch": 279.4736842105263, "grad_norm": 1.191132664680481, "learning_rate": 0.0001, "loss": 0.0151, "step": 42480 }, { "epoch": 279.5394736842105, "grad_norm": 1.476515293121338, "learning_rate": 0.0001, "loss": 0.015, "step": 42490 }, { "epoch": 279.60526315789474, "grad_norm": 1.0748845338821411, "learning_rate": 0.0001, "loss": 0.0132, "step": 42500 }, { "epoch": 279.67105263157896, "grad_norm": 1.480494499206543, "learning_rate": 0.0001, "loss": 0.0147, "step": 42510 }, { "epoch": 279.7368421052632, "grad_norm": 1.1140382289886475, "learning_rate": 0.0001, "loss": 0.015, "step": 42520 }, { "epoch": 279.80263157894734, "grad_norm": 1.4050570726394653, "learning_rate": 0.0001, "loss": 0.0143, "step": 42530 }, { "epoch": 279.86842105263156, "grad_norm": 1.3111275434494019, "learning_rate": 0.0001, "loss": 0.0148, "step": 42540 }, { "epoch": 279.9342105263158, "grad_norm": 1.2099816799163818, "learning_rate": 0.0001, "loss": 0.0144, "step": 42550 }, { "epoch": 280.0, "grad_norm": 1.2732938528060913, "learning_rate": 0.0001, "loss": 0.0141, "step": 42560 }, { "epoch": 280.0657894736842, "grad_norm": 1.4480763673782349, "learning_rate": 0.0001, "loss": 0.0141, "step": 42570 }, { "epoch": 280.13157894736844, "grad_norm": 1.1580476760864258, "learning_rate": 0.0001, "loss": 0.0147, "step": 42580 }, { "epoch": 280.19736842105266, "grad_norm": 1.4821127653121948, "learning_rate": 0.0001, "loss": 0.0123, "step": 42590 }, { "epoch": 280.2631578947368, "grad_norm": 1.0352067947387695, "learning_rate": 0.0001, "loss": 0.0128, "step": 42600 }, { "epoch": 280.32894736842104, "grad_norm": 1.536171793937683, "learning_rate": 0.0001, "loss": 0.0149, "step": 42610 }, { "epoch": 280.39473684210526, "grad_norm": 1.3197144269943237, "learning_rate": 0.0001, "loss": 0.0152, "step": 42620 }, { "epoch": 280.4605263157895, "grad_norm": 1.0145988464355469, "learning_rate": 0.0001, "loss": 0.0147, "step": 42630 }, { "epoch": 280.5263157894737, "grad_norm": 1.5612324476242065, "learning_rate": 0.0001, "loss": 0.0137, "step": 42640 }, { "epoch": 280.5921052631579, "grad_norm": 1.013548493385315, "learning_rate": 0.0001, "loss": 0.0132, "step": 42650 }, { "epoch": 280.6578947368421, "grad_norm": 1.0804232358932495, "learning_rate": 0.0001, "loss": 0.0142, "step": 42660 }, { "epoch": 280.7236842105263, "grad_norm": 1.183570146560669, "learning_rate": 0.0001, "loss": 0.0145, "step": 42670 }, { "epoch": 280.7894736842105, "grad_norm": 1.551816463470459, "learning_rate": 0.0001, "loss": 0.0153, "step": 42680 }, { "epoch": 280.85526315789474, "grad_norm": 1.5829601287841797, "learning_rate": 0.0001, "loss": 0.0132, "step": 42690 }, { "epoch": 280.92105263157896, "grad_norm": 1.6313118934631348, "learning_rate": 0.0001, "loss": 0.0128, "step": 42700 }, { "epoch": 280.9868421052632, "grad_norm": 0.820428192615509, "learning_rate": 0.0001, "loss": 0.0153, "step": 42710 }, { "epoch": 281.05263157894734, "grad_norm": 1.293191909790039, "learning_rate": 0.0001, "loss": 0.0122, "step": 42720 }, { "epoch": 281.11842105263156, "grad_norm": 1.3593038320541382, "learning_rate": 0.0001, "loss": 0.0125, "step": 42730 }, { "epoch": 281.1842105263158, "grad_norm": 1.3805264234542847, "learning_rate": 0.0001, "loss": 0.013, "step": 42740 }, { "epoch": 281.25, "grad_norm": 1.3258028030395508, "learning_rate": 0.0001, "loss": 0.0145, "step": 42750 }, { "epoch": 281.3157894736842, "grad_norm": 1.2791032791137695, "learning_rate": 0.0001, "loss": 0.0143, "step": 42760 }, { "epoch": 281.38157894736844, "grad_norm": 1.3307384252548218, "learning_rate": 0.0001, "loss": 0.0131, "step": 42770 }, { "epoch": 281.44736842105266, "grad_norm": 1.1572662591934204, "learning_rate": 0.0001, "loss": 0.014, "step": 42780 }, { "epoch": 281.5131578947368, "grad_norm": 1.5701806545257568, "learning_rate": 0.0001, "loss": 0.0156, "step": 42790 }, { "epoch": 281.57894736842104, "grad_norm": 1.2336055040359497, "learning_rate": 0.0001, "loss": 0.0132, "step": 42800 }, { "epoch": 281.64473684210526, "grad_norm": 1.0266544818878174, "learning_rate": 0.0001, "loss": 0.0154, "step": 42810 }, { "epoch": 281.7105263157895, "grad_norm": 0.840146541595459, "learning_rate": 0.0001, "loss": 0.0152, "step": 42820 }, { "epoch": 281.7763157894737, "grad_norm": 1.1825062036514282, "learning_rate": 0.0001, "loss": 0.013, "step": 42830 }, { "epoch": 281.8421052631579, "grad_norm": 1.0842541456222534, "learning_rate": 0.0001, "loss": 0.0144, "step": 42840 }, { "epoch": 281.9078947368421, "grad_norm": 1.5062053203582764, "learning_rate": 0.0001, "loss": 0.0141, "step": 42850 }, { "epoch": 281.9736842105263, "grad_norm": 0.8112286925315857, "learning_rate": 0.0001, "loss": 0.015, "step": 42860 }, { "epoch": 282.0394736842105, "grad_norm": 1.1789262294769287, "learning_rate": 0.0001, "loss": 0.0171, "step": 42870 }, { "epoch": 282.10526315789474, "grad_norm": 1.375529170036316, "learning_rate": 0.0001, "loss": 0.0163, "step": 42880 }, { "epoch": 282.17105263157896, "grad_norm": 1.330922245979309, "learning_rate": 0.0001, "loss": 0.0138, "step": 42890 }, { "epoch": 282.2368421052632, "grad_norm": 1.3827651739120483, "learning_rate": 0.0001, "loss": 0.0137, "step": 42900 }, { "epoch": 282.30263157894734, "grad_norm": 1.2131527662277222, "learning_rate": 0.0001, "loss": 0.014, "step": 42910 }, { "epoch": 282.36842105263156, "grad_norm": 1.3317664861679077, "learning_rate": 0.0001, "loss": 0.0139, "step": 42920 }, { "epoch": 282.4342105263158, "grad_norm": 1.1144113540649414, "learning_rate": 0.0001, "loss": 0.0141, "step": 42930 }, { "epoch": 282.5, "grad_norm": 1.6057426929473877, "learning_rate": 0.0001, "loss": 0.0138, "step": 42940 }, { "epoch": 282.5657894736842, "grad_norm": 1.3409185409545898, "learning_rate": 0.0001, "loss": 0.0151, "step": 42950 }, { "epoch": 282.63157894736844, "grad_norm": 1.5420650243759155, "learning_rate": 0.0001, "loss": 0.0146, "step": 42960 }, { "epoch": 282.69736842105266, "grad_norm": 1.1373865604400635, "learning_rate": 0.0001, "loss": 0.0138, "step": 42970 }, { "epoch": 282.7631578947368, "grad_norm": 1.2429486513137817, "learning_rate": 0.0001, "loss": 0.0132, "step": 42980 }, { "epoch": 282.82894736842104, "grad_norm": 0.9992790222167969, "learning_rate": 0.0001, "loss": 0.0138, "step": 42990 }, { "epoch": 282.89473684210526, "grad_norm": 1.142056941986084, "learning_rate": 0.0001, "loss": 0.0134, "step": 43000 }, { "epoch": 282.9605263157895, "grad_norm": 1.0192174911499023, "learning_rate": 0.0001, "loss": 0.0137, "step": 43010 }, { "epoch": 283.0263157894737, "grad_norm": 0.9924356937408447, "learning_rate": 0.0001, "loss": 0.0146, "step": 43020 }, { "epoch": 283.0921052631579, "grad_norm": 0.8189639449119568, "learning_rate": 0.0001, "loss": 0.0151, "step": 43030 }, { "epoch": 283.1578947368421, "grad_norm": 0.9445114731788635, "learning_rate": 0.0001, "loss": 0.0136, "step": 43040 }, { "epoch": 283.2236842105263, "grad_norm": 1.3871409893035889, "learning_rate": 0.0001, "loss": 0.0146, "step": 43050 }, { "epoch": 283.2894736842105, "grad_norm": 1.086130142211914, "learning_rate": 0.0001, "loss": 0.0171, "step": 43060 }, { "epoch": 283.35526315789474, "grad_norm": 0.9964144825935364, "learning_rate": 0.0001, "loss": 0.0138, "step": 43070 }, { "epoch": 283.42105263157896, "grad_norm": 1.0563546419143677, "learning_rate": 0.0001, "loss": 0.0141, "step": 43080 }, { "epoch": 283.4868421052632, "grad_norm": 1.270878553390503, "learning_rate": 0.0001, "loss": 0.0152, "step": 43090 }, { "epoch": 283.55263157894734, "grad_norm": 1.3381757736206055, "learning_rate": 0.0001, "loss": 0.0137, "step": 43100 }, { "epoch": 283.61842105263156, "grad_norm": 1.1857870817184448, "learning_rate": 0.0001, "loss": 0.0157, "step": 43110 }, { "epoch": 283.6842105263158, "grad_norm": 1.2086889743804932, "learning_rate": 0.0001, "loss": 0.0144, "step": 43120 }, { "epoch": 283.75, "grad_norm": 1.3116366863250732, "learning_rate": 0.0001, "loss": 0.0145, "step": 43130 }, { "epoch": 283.8157894736842, "grad_norm": 1.1488587856292725, "learning_rate": 0.0001, "loss": 0.014, "step": 43140 }, { "epoch": 283.88157894736844, "grad_norm": 0.9865978360176086, "learning_rate": 0.0001, "loss": 0.0175, "step": 43150 }, { "epoch": 283.94736842105266, "grad_norm": 0.9599931240081787, "learning_rate": 0.0001, "loss": 0.0147, "step": 43160 }, { "epoch": 284.0131578947368, "grad_norm": 1.4896612167358398, "learning_rate": 0.0001, "loss": 0.0153, "step": 43170 }, { "epoch": 284.07894736842104, "grad_norm": 1.335207223892212, "learning_rate": 0.0001, "loss": 0.0145, "step": 43180 }, { "epoch": 284.14473684210526, "grad_norm": 0.7765811085700989, "learning_rate": 0.0001, "loss": 0.0145, "step": 43190 }, { "epoch": 284.2105263157895, "grad_norm": 1.1684138774871826, "learning_rate": 0.0001, "loss": 0.0153, "step": 43200 }, { "epoch": 284.2763157894737, "grad_norm": 1.3540507555007935, "learning_rate": 0.0001, "loss": 0.0148, "step": 43210 }, { "epoch": 284.3421052631579, "grad_norm": 1.5758388042449951, "learning_rate": 0.0001, "loss": 0.0142, "step": 43220 }, { "epoch": 284.4078947368421, "grad_norm": 1.411137580871582, "learning_rate": 0.0001, "loss": 0.014, "step": 43230 }, { "epoch": 284.4736842105263, "grad_norm": 1.1263060569763184, "learning_rate": 0.0001, "loss": 0.0136, "step": 43240 }, { "epoch": 284.5394736842105, "grad_norm": 1.4823040962219238, "learning_rate": 0.0001, "loss": 0.0161, "step": 43250 }, { "epoch": 284.60526315789474, "grad_norm": 1.3825323581695557, "learning_rate": 0.0001, "loss": 0.0146, "step": 43260 }, { "epoch": 284.67105263157896, "grad_norm": 1.1096550226211548, "learning_rate": 0.0001, "loss": 0.0129, "step": 43270 }, { "epoch": 284.7368421052632, "grad_norm": 1.0073033571243286, "learning_rate": 0.0001, "loss": 0.0149, "step": 43280 }, { "epoch": 284.80263157894734, "grad_norm": 1.08073091506958, "learning_rate": 0.0001, "loss": 0.0138, "step": 43290 }, { "epoch": 284.86842105263156, "grad_norm": 0.6875160336494446, "learning_rate": 0.0001, "loss": 0.0144, "step": 43300 }, { "epoch": 284.9342105263158, "grad_norm": 1.0610051155090332, "learning_rate": 0.0001, "loss": 0.0139, "step": 43310 }, { "epoch": 285.0, "grad_norm": 0.9582310318946838, "learning_rate": 0.0001, "loss": 0.0139, "step": 43320 }, { "epoch": 285.0657894736842, "grad_norm": 0.8970026969909668, "learning_rate": 0.0001, "loss": 0.0151, "step": 43330 }, { "epoch": 285.13157894736844, "grad_norm": 0.8817126750946045, "learning_rate": 0.0001, "loss": 0.0146, "step": 43340 }, { "epoch": 285.19736842105266, "grad_norm": 1.2000654935836792, "learning_rate": 0.0001, "loss": 0.0154, "step": 43350 }, { "epoch": 285.2631578947368, "grad_norm": 1.3868666887283325, "learning_rate": 0.0001, "loss": 0.0148, "step": 43360 }, { "epoch": 285.32894736842104, "grad_norm": 0.9346180558204651, "learning_rate": 0.0001, "loss": 0.0125, "step": 43370 }, { "epoch": 285.39473684210526, "grad_norm": 0.9020205736160278, "learning_rate": 0.0001, "loss": 0.0159, "step": 43380 }, { "epoch": 285.4605263157895, "grad_norm": 1.231022834777832, "learning_rate": 0.0001, "loss": 0.0157, "step": 43390 }, { "epoch": 285.5263157894737, "grad_norm": 1.107666015625, "learning_rate": 0.0001, "loss": 0.0159, "step": 43400 }, { "epoch": 285.5921052631579, "grad_norm": 1.5066663026809692, "learning_rate": 0.0001, "loss": 0.0129, "step": 43410 }, { "epoch": 285.6578947368421, "grad_norm": 1.7655541896820068, "learning_rate": 0.0001, "loss": 0.0139, "step": 43420 }, { "epoch": 285.7236842105263, "grad_norm": 1.2582757472991943, "learning_rate": 0.0001, "loss": 0.0151, "step": 43430 }, { "epoch": 285.7894736842105, "grad_norm": 1.1627318859100342, "learning_rate": 0.0001, "loss": 0.0152, "step": 43440 }, { "epoch": 285.85526315789474, "grad_norm": 1.3102083206176758, "learning_rate": 0.0001, "loss": 0.0147, "step": 43450 }, { "epoch": 285.92105263157896, "grad_norm": 1.2415940761566162, "learning_rate": 0.0001, "loss": 0.0149, "step": 43460 }, { "epoch": 285.9868421052632, "grad_norm": 1.494537353515625, "learning_rate": 0.0001, "loss": 0.0152, "step": 43470 }, { "epoch": 286.05263157894734, "grad_norm": 1.4216185808181763, "learning_rate": 0.0001, "loss": 0.0142, "step": 43480 }, { "epoch": 286.11842105263156, "grad_norm": 1.2979694604873657, "learning_rate": 0.0001, "loss": 0.0155, "step": 43490 }, { "epoch": 286.1842105263158, "grad_norm": 0.9436559677124023, "learning_rate": 0.0001, "loss": 0.0128, "step": 43500 }, { "epoch": 286.25, "grad_norm": 1.34527587890625, "learning_rate": 0.0001, "loss": 0.0157, "step": 43510 }, { "epoch": 286.3157894736842, "grad_norm": 1.5451651811599731, "learning_rate": 0.0001, "loss": 0.0164, "step": 43520 }, { "epoch": 286.38157894736844, "grad_norm": 1.5682357549667358, "learning_rate": 0.0001, "loss": 0.0145, "step": 43530 }, { "epoch": 286.44736842105266, "grad_norm": 1.3602068424224854, "learning_rate": 0.0001, "loss": 0.0153, "step": 43540 }, { "epoch": 286.5131578947368, "grad_norm": 1.146419644355774, "learning_rate": 0.0001, "loss": 0.0128, "step": 43550 }, { "epoch": 286.57894736842104, "grad_norm": 0.9245599508285522, "learning_rate": 0.0001, "loss": 0.0134, "step": 43560 }, { "epoch": 286.64473684210526, "grad_norm": 0.9519632458686829, "learning_rate": 0.0001, "loss": 0.0141, "step": 43570 }, { "epoch": 286.7105263157895, "grad_norm": 1.0367894172668457, "learning_rate": 0.0001, "loss": 0.0123, "step": 43580 }, { "epoch": 286.7763157894737, "grad_norm": 1.3567640781402588, "learning_rate": 0.0001, "loss": 0.0148, "step": 43590 }, { "epoch": 286.8421052631579, "grad_norm": 1.3016269207000732, "learning_rate": 0.0001, "loss": 0.0152, "step": 43600 }, { "epoch": 286.9078947368421, "grad_norm": 1.4324815273284912, "learning_rate": 0.0001, "loss": 0.0135, "step": 43610 }, { "epoch": 286.9736842105263, "grad_norm": 1.0870546102523804, "learning_rate": 0.0001, "loss": 0.0154, "step": 43620 }, { "epoch": 287.0394736842105, "grad_norm": 1.5067437887191772, "learning_rate": 0.0001, "loss": 0.0149, "step": 43630 }, { "epoch": 287.10526315789474, "grad_norm": 1.3528972864151, "learning_rate": 0.0001, "loss": 0.0144, "step": 43640 }, { "epoch": 287.17105263157896, "grad_norm": 1.3971104621887207, "learning_rate": 0.0001, "loss": 0.0139, "step": 43650 }, { "epoch": 287.2368421052632, "grad_norm": 1.1645212173461914, "learning_rate": 0.0001, "loss": 0.0139, "step": 43660 }, { "epoch": 287.30263157894734, "grad_norm": 1.295596718788147, "learning_rate": 0.0001, "loss": 0.0137, "step": 43670 }, { "epoch": 287.36842105263156, "grad_norm": 0.900801420211792, "learning_rate": 0.0001, "loss": 0.0151, "step": 43680 }, { "epoch": 287.4342105263158, "grad_norm": 1.128415584564209, "learning_rate": 0.0001, "loss": 0.0144, "step": 43690 }, { "epoch": 287.5, "grad_norm": 1.3298012018203735, "learning_rate": 0.0001, "loss": 0.0154, "step": 43700 }, { "epoch": 287.5657894736842, "grad_norm": 1.2503217458724976, "learning_rate": 0.0001, "loss": 0.0133, "step": 43710 }, { "epoch": 287.63157894736844, "grad_norm": 1.2108162641525269, "learning_rate": 0.0001, "loss": 0.0159, "step": 43720 }, { "epoch": 287.69736842105266, "grad_norm": 1.3603051900863647, "learning_rate": 0.0001, "loss": 0.014, "step": 43730 }, { "epoch": 287.7631578947368, "grad_norm": 1.156879186630249, "learning_rate": 0.0001, "loss": 0.0148, "step": 43740 }, { "epoch": 287.82894736842104, "grad_norm": 1.0096931457519531, "learning_rate": 0.0001, "loss": 0.0134, "step": 43750 }, { "epoch": 287.89473684210526, "grad_norm": 1.3088841438293457, "learning_rate": 0.0001, "loss": 0.0143, "step": 43760 }, { "epoch": 287.9605263157895, "grad_norm": 1.609463095664978, "learning_rate": 0.0001, "loss": 0.0158, "step": 43770 }, { "epoch": 288.0263157894737, "grad_norm": 1.2793583869934082, "learning_rate": 0.0001, "loss": 0.0152, "step": 43780 }, { "epoch": 288.0921052631579, "grad_norm": 1.3340468406677246, "learning_rate": 0.0001, "loss": 0.0141, "step": 43790 }, { "epoch": 288.1578947368421, "grad_norm": 1.108888030052185, "learning_rate": 0.0001, "loss": 0.0138, "step": 43800 }, { "epoch": 288.2236842105263, "grad_norm": 1.1814451217651367, "learning_rate": 0.0001, "loss": 0.0135, "step": 43810 }, { "epoch": 288.2894736842105, "grad_norm": 1.0461862087249756, "learning_rate": 0.0001, "loss": 0.0137, "step": 43820 }, { "epoch": 288.35526315789474, "grad_norm": 1.1520538330078125, "learning_rate": 0.0001, "loss": 0.0132, "step": 43830 }, { "epoch": 288.42105263157896, "grad_norm": 1.4166773557662964, "learning_rate": 0.0001, "loss": 0.0168, "step": 43840 }, { "epoch": 288.4868421052632, "grad_norm": 1.0651955604553223, "learning_rate": 0.0001, "loss": 0.0143, "step": 43850 }, { "epoch": 288.55263157894734, "grad_norm": 1.5009607076644897, "learning_rate": 0.0001, "loss": 0.0149, "step": 43860 }, { "epoch": 288.61842105263156, "grad_norm": 1.2824265956878662, "learning_rate": 0.0001, "loss": 0.0144, "step": 43870 }, { "epoch": 288.6842105263158, "grad_norm": 1.1175551414489746, "learning_rate": 0.0001, "loss": 0.0149, "step": 43880 }, { "epoch": 288.75, "grad_norm": 1.3193415403366089, "learning_rate": 0.0001, "loss": 0.0153, "step": 43890 }, { "epoch": 288.8157894736842, "grad_norm": 1.3196364641189575, "learning_rate": 0.0001, "loss": 0.0151, "step": 43900 }, { "epoch": 288.88157894736844, "grad_norm": 1.0883409976959229, "learning_rate": 0.0001, "loss": 0.0151, "step": 43910 }, { "epoch": 288.94736842105266, "grad_norm": 1.2597954273223877, "learning_rate": 0.0001, "loss": 0.0143, "step": 43920 }, { "epoch": 289.0131578947368, "grad_norm": 1.32917058467865, "learning_rate": 0.0001, "loss": 0.015, "step": 43930 }, { "epoch": 289.07894736842104, "grad_norm": 1.2906899452209473, "learning_rate": 0.0001, "loss": 0.015, "step": 43940 }, { "epoch": 289.14473684210526, "grad_norm": 0.9421364068984985, "learning_rate": 0.0001, "loss": 0.0164, "step": 43950 }, { "epoch": 289.2105263157895, "grad_norm": 0.7365257143974304, "learning_rate": 0.0001, "loss": 0.0142, "step": 43960 }, { "epoch": 289.2763157894737, "grad_norm": 1.0555346012115479, "learning_rate": 0.0001, "loss": 0.0151, "step": 43970 }, { "epoch": 289.3421052631579, "grad_norm": 1.3130031824111938, "learning_rate": 0.0001, "loss": 0.0156, "step": 43980 }, { "epoch": 289.4078947368421, "grad_norm": 1.474686622619629, "learning_rate": 0.0001, "loss": 0.0136, "step": 43990 }, { "epoch": 289.4736842105263, "grad_norm": 1.148776650428772, "learning_rate": 0.0001, "loss": 0.0146, "step": 44000 }, { "epoch": 289.5394736842105, "grad_norm": 1.4202920198440552, "learning_rate": 0.0001, "loss": 0.014, "step": 44010 }, { "epoch": 289.60526315789474, "grad_norm": 1.267279863357544, "learning_rate": 0.0001, "loss": 0.0127, "step": 44020 }, { "epoch": 289.67105263157896, "grad_norm": 1.1646653413772583, "learning_rate": 0.0001, "loss": 0.0131, "step": 44030 }, { "epoch": 289.7368421052632, "grad_norm": 1.241349458694458, "learning_rate": 0.0001, "loss": 0.0151, "step": 44040 }, { "epoch": 289.80263157894734, "grad_norm": 1.201326847076416, "learning_rate": 0.0001, "loss": 0.0155, "step": 44050 }, { "epoch": 289.86842105263156, "grad_norm": 1.1308850049972534, "learning_rate": 0.0001, "loss": 0.0161, "step": 44060 }, { "epoch": 289.9342105263158, "grad_norm": 1.6807210445404053, "learning_rate": 0.0001, "loss": 0.0144, "step": 44070 }, { "epoch": 290.0, "grad_norm": 1.081008791923523, "learning_rate": 0.0001, "loss": 0.0132, "step": 44080 }, { "epoch": 290.0657894736842, "grad_norm": 1.1021829843521118, "learning_rate": 0.0001, "loss": 0.0151, "step": 44090 }, { "epoch": 290.13157894736844, "grad_norm": 1.4114763736724854, "learning_rate": 0.0001, "loss": 0.0127, "step": 44100 }, { "epoch": 290.19736842105266, "grad_norm": 1.2354848384857178, "learning_rate": 0.0001, "loss": 0.014, "step": 44110 }, { "epoch": 290.2631578947368, "grad_norm": 1.4642871618270874, "learning_rate": 0.0001, "loss": 0.0156, "step": 44120 }, { "epoch": 290.32894736842104, "grad_norm": 0.9256936311721802, "learning_rate": 0.0001, "loss": 0.0138, "step": 44130 }, { "epoch": 290.39473684210526, "grad_norm": 1.1769204139709473, "learning_rate": 0.0001, "loss": 0.0165, "step": 44140 }, { "epoch": 290.4605263157895, "grad_norm": 1.3560692071914673, "learning_rate": 0.0001, "loss": 0.0147, "step": 44150 }, { "epoch": 290.5263157894737, "grad_norm": 1.3157851696014404, "learning_rate": 0.0001, "loss": 0.0126, "step": 44160 }, { "epoch": 290.5921052631579, "grad_norm": 1.5221748352050781, "learning_rate": 0.0001, "loss": 0.0133, "step": 44170 }, { "epoch": 290.6578947368421, "grad_norm": 1.3193734884262085, "learning_rate": 0.0001, "loss": 0.0124, "step": 44180 }, { "epoch": 290.7236842105263, "grad_norm": 1.5841879844665527, "learning_rate": 0.0001, "loss": 0.0151, "step": 44190 }, { "epoch": 290.7894736842105, "grad_norm": 1.5025423765182495, "learning_rate": 0.0001, "loss": 0.0132, "step": 44200 }, { "epoch": 290.85526315789474, "grad_norm": 1.6240133047103882, "learning_rate": 0.0001, "loss": 0.0143, "step": 44210 }, { "epoch": 290.92105263157896, "grad_norm": 1.4948194026947021, "learning_rate": 0.0001, "loss": 0.0148, "step": 44220 }, { "epoch": 290.9868421052632, "grad_norm": 1.317069411277771, "learning_rate": 0.0001, "loss": 0.0143, "step": 44230 }, { "epoch": 291.05263157894734, "grad_norm": 1.513387680053711, "learning_rate": 0.0001, "loss": 0.0142, "step": 44240 }, { "epoch": 291.11842105263156, "grad_norm": 1.4927419424057007, "learning_rate": 0.0001, "loss": 0.0125, "step": 44250 }, { "epoch": 291.1842105263158, "grad_norm": 1.4577031135559082, "learning_rate": 0.0001, "loss": 0.0136, "step": 44260 }, { "epoch": 291.25, "grad_norm": 1.673728585243225, "learning_rate": 0.0001, "loss": 0.0139, "step": 44270 }, { "epoch": 291.3157894736842, "grad_norm": 0.9547315835952759, "learning_rate": 0.0001, "loss": 0.014, "step": 44280 }, { "epoch": 291.38157894736844, "grad_norm": 0.90570068359375, "learning_rate": 0.0001, "loss": 0.0141, "step": 44290 }, { "epoch": 291.44736842105266, "grad_norm": 1.1997649669647217, "learning_rate": 0.0001, "loss": 0.0149, "step": 44300 }, { "epoch": 291.5131578947368, "grad_norm": 1.1620087623596191, "learning_rate": 0.0001, "loss": 0.0152, "step": 44310 }, { "epoch": 291.57894736842104, "grad_norm": 1.410526990890503, "learning_rate": 0.0001, "loss": 0.0144, "step": 44320 }, { "epoch": 291.64473684210526, "grad_norm": 1.3183605670928955, "learning_rate": 0.0001, "loss": 0.0136, "step": 44330 }, { "epoch": 291.7105263157895, "grad_norm": 1.7681739330291748, "learning_rate": 0.0001, "loss": 0.015, "step": 44340 }, { "epoch": 291.7763157894737, "grad_norm": 1.3306128978729248, "learning_rate": 0.0001, "loss": 0.0134, "step": 44350 }, { "epoch": 291.8421052631579, "grad_norm": 1.658753514289856, "learning_rate": 0.0001, "loss": 0.014, "step": 44360 }, { "epoch": 291.9078947368421, "grad_norm": 1.2005608081817627, "learning_rate": 0.0001, "loss": 0.013, "step": 44370 }, { "epoch": 291.9736842105263, "grad_norm": 1.5707415342330933, "learning_rate": 0.0001, "loss": 0.0157, "step": 44380 }, { "epoch": 292.0394736842105, "grad_norm": 1.211821436882019, "learning_rate": 0.0001, "loss": 0.0125, "step": 44390 }, { "epoch": 292.10526315789474, "grad_norm": 1.5584156513214111, "learning_rate": 0.0001, "loss": 0.0138, "step": 44400 }, { "epoch": 292.17105263157896, "grad_norm": 1.2665433883666992, "learning_rate": 0.0001, "loss": 0.0136, "step": 44410 }, { "epoch": 292.2368421052632, "grad_norm": 0.9688684344291687, "learning_rate": 0.0001, "loss": 0.0134, "step": 44420 }, { "epoch": 292.30263157894734, "grad_norm": 1.0552864074707031, "learning_rate": 0.0001, "loss": 0.0156, "step": 44430 }, { "epoch": 292.36842105263156, "grad_norm": 1.3883039951324463, "learning_rate": 0.0001, "loss": 0.0146, "step": 44440 }, { "epoch": 292.4342105263158, "grad_norm": 1.1752902269363403, "learning_rate": 0.0001, "loss": 0.0144, "step": 44450 }, { "epoch": 292.5, "grad_norm": 1.1989336013793945, "learning_rate": 0.0001, "loss": 0.014, "step": 44460 }, { "epoch": 292.5657894736842, "grad_norm": 1.2750636339187622, "learning_rate": 0.0001, "loss": 0.0161, "step": 44470 }, { "epoch": 292.63157894736844, "grad_norm": 1.1853424310684204, "learning_rate": 0.0001, "loss": 0.0133, "step": 44480 }, { "epoch": 292.69736842105266, "grad_norm": 0.9358903765678406, "learning_rate": 0.0001, "loss": 0.0135, "step": 44490 }, { "epoch": 292.7631578947368, "grad_norm": 1.2150110006332397, "learning_rate": 0.0001, "loss": 0.0141, "step": 44500 }, { "epoch": 292.82894736842104, "grad_norm": 1.1464403867721558, "learning_rate": 0.0001, "loss": 0.0146, "step": 44510 }, { "epoch": 292.89473684210526, "grad_norm": 1.5787771940231323, "learning_rate": 0.0001, "loss": 0.0145, "step": 44520 }, { "epoch": 292.9605263157895, "grad_norm": 1.234574794769287, "learning_rate": 0.0001, "loss": 0.0134, "step": 44530 }, { "epoch": 293.0263157894737, "grad_norm": 1.4504281282424927, "learning_rate": 0.0001, "loss": 0.0147, "step": 44540 }, { "epoch": 293.0921052631579, "grad_norm": 1.2790708541870117, "learning_rate": 0.0001, "loss": 0.0133, "step": 44550 }, { "epoch": 293.1578947368421, "grad_norm": 1.0297752618789673, "learning_rate": 0.0001, "loss": 0.0135, "step": 44560 }, { "epoch": 293.2236842105263, "grad_norm": 1.4982763528823853, "learning_rate": 0.0001, "loss": 0.0167, "step": 44570 }, { "epoch": 293.2894736842105, "grad_norm": 1.4405145645141602, "learning_rate": 0.0001, "loss": 0.0152, "step": 44580 }, { "epoch": 293.35526315789474, "grad_norm": 1.618911862373352, "learning_rate": 0.0001, "loss": 0.0141, "step": 44590 }, { "epoch": 293.42105263157896, "grad_norm": 1.1812571287155151, "learning_rate": 0.0001, "loss": 0.0158, "step": 44600 }, { "epoch": 293.4868421052632, "grad_norm": 1.3874218463897705, "learning_rate": 0.0001, "loss": 0.0142, "step": 44610 }, { "epoch": 293.55263157894734, "grad_norm": 1.1860712766647339, "learning_rate": 0.0001, "loss": 0.0125, "step": 44620 }, { "epoch": 293.61842105263156, "grad_norm": 1.5251539945602417, "learning_rate": 0.0001, "loss": 0.0149, "step": 44630 }, { "epoch": 293.6842105263158, "grad_norm": 1.2755794525146484, "learning_rate": 0.0001, "loss": 0.0141, "step": 44640 }, { "epoch": 293.75, "grad_norm": 1.8280200958251953, "learning_rate": 0.0001, "loss": 0.0165, "step": 44650 }, { "epoch": 293.8157894736842, "grad_norm": 1.7811310291290283, "learning_rate": 0.0001, "loss": 0.0137, "step": 44660 }, { "epoch": 293.88157894736844, "grad_norm": 1.8284488916397095, "learning_rate": 0.0001, "loss": 0.0137, "step": 44670 }, { "epoch": 293.94736842105266, "grad_norm": 1.3257440328598022, "learning_rate": 0.0001, "loss": 0.0117, "step": 44680 }, { "epoch": 294.0131578947368, "grad_norm": 1.3897215127944946, "learning_rate": 0.0001, "loss": 0.0128, "step": 44690 }, { "epoch": 294.07894736842104, "grad_norm": 1.2009508609771729, "learning_rate": 0.0001, "loss": 0.0135, "step": 44700 }, { "epoch": 294.14473684210526, "grad_norm": 1.5008352994918823, "learning_rate": 0.0001, "loss": 0.0137, "step": 44710 }, { "epoch": 294.2105263157895, "grad_norm": 1.0818425416946411, "learning_rate": 0.0001, "loss": 0.0125, "step": 44720 }, { "epoch": 294.2763157894737, "grad_norm": 1.3108577728271484, "learning_rate": 0.0001, "loss": 0.013, "step": 44730 }, { "epoch": 294.3421052631579, "grad_norm": 1.39351224899292, "learning_rate": 0.0001, "loss": 0.0142, "step": 44740 }, { "epoch": 294.4078947368421, "grad_norm": 1.2173326015472412, "learning_rate": 0.0001, "loss": 0.0131, "step": 44750 }, { "epoch": 294.4736842105263, "grad_norm": 1.1741549968719482, "learning_rate": 0.0001, "loss": 0.0121, "step": 44760 }, { "epoch": 294.5394736842105, "grad_norm": 1.123326301574707, "learning_rate": 0.0001, "loss": 0.0149, "step": 44770 }, { "epoch": 294.60526315789474, "grad_norm": 1.1277564764022827, "learning_rate": 0.0001, "loss": 0.0137, "step": 44780 }, { "epoch": 294.67105263157896, "grad_norm": 0.9613653421401978, "learning_rate": 0.0001, "loss": 0.0162, "step": 44790 }, { "epoch": 294.7368421052632, "grad_norm": 0.9356263279914856, "learning_rate": 0.0001, "loss": 0.0136, "step": 44800 }, { "epoch": 294.80263157894734, "grad_norm": 0.9894914031028748, "learning_rate": 0.0001, "loss": 0.0144, "step": 44810 }, { "epoch": 294.86842105263156, "grad_norm": 1.1962043046951294, "learning_rate": 0.0001, "loss": 0.0147, "step": 44820 }, { "epoch": 294.9342105263158, "grad_norm": 1.2527235746383667, "learning_rate": 0.0001, "loss": 0.0138, "step": 44830 }, { "epoch": 295.0, "grad_norm": 1.2737168073654175, "learning_rate": 0.0001, "loss": 0.0148, "step": 44840 }, { "epoch": 295.0657894736842, "grad_norm": 1.6003625392913818, "learning_rate": 0.0001, "loss": 0.0147, "step": 44850 }, { "epoch": 295.13157894736844, "grad_norm": 1.2156645059585571, "learning_rate": 0.0001, "loss": 0.0139, "step": 44860 }, { "epoch": 295.19736842105266, "grad_norm": 1.3037317991256714, "learning_rate": 0.0001, "loss": 0.0138, "step": 44870 }, { "epoch": 295.2631578947368, "grad_norm": 1.2797876596450806, "learning_rate": 0.0001, "loss": 0.014, "step": 44880 }, { "epoch": 295.32894736842104, "grad_norm": 0.9522796273231506, "learning_rate": 0.0001, "loss": 0.0161, "step": 44890 }, { "epoch": 295.39473684210526, "grad_norm": 0.9512258172035217, "learning_rate": 0.0001, "loss": 0.013, "step": 44900 }, { "epoch": 295.4605263157895, "grad_norm": 0.9569043517112732, "learning_rate": 0.0001, "loss": 0.0127, "step": 44910 }, { "epoch": 295.5263157894737, "grad_norm": 0.8486437797546387, "learning_rate": 0.0001, "loss": 0.0133, "step": 44920 }, { "epoch": 295.5921052631579, "grad_norm": 0.7993052005767822, "learning_rate": 0.0001, "loss": 0.0134, "step": 44930 }, { "epoch": 295.6578947368421, "grad_norm": 0.8292239308357239, "learning_rate": 0.0001, "loss": 0.0142, "step": 44940 }, { "epoch": 295.7236842105263, "grad_norm": 0.9200776219367981, "learning_rate": 0.0001, "loss": 0.0133, "step": 44950 }, { "epoch": 295.7894736842105, "grad_norm": 1.134240984916687, "learning_rate": 0.0001, "loss": 0.0137, "step": 44960 }, { "epoch": 295.85526315789474, "grad_norm": 1.2464627027511597, "learning_rate": 0.0001, "loss": 0.0153, "step": 44970 }, { "epoch": 295.92105263157896, "grad_norm": 1.2314313650131226, "learning_rate": 0.0001, "loss": 0.0157, "step": 44980 }, { "epoch": 295.9868421052632, "grad_norm": 1.2230592966079712, "learning_rate": 0.0001, "loss": 0.0163, "step": 44990 }, { "epoch": 296.05263157894734, "grad_norm": 1.0356382131576538, "learning_rate": 0.0001, "loss": 0.0141, "step": 45000 }, { "epoch": 296.11842105263156, "grad_norm": 1.4570183753967285, "learning_rate": 0.0001, "loss": 0.0165, "step": 45010 }, { "epoch": 296.1842105263158, "grad_norm": 0.9928843975067139, "learning_rate": 0.0001, "loss": 0.0157, "step": 45020 }, { "epoch": 296.25, "grad_norm": 1.1260870695114136, "learning_rate": 0.0001, "loss": 0.0138, "step": 45030 }, { "epoch": 296.3157894736842, "grad_norm": 1.1443604230880737, "learning_rate": 0.0001, "loss": 0.0142, "step": 45040 }, { "epoch": 296.38157894736844, "grad_norm": 1.091615915298462, "learning_rate": 0.0001, "loss": 0.0169, "step": 45050 }, { "epoch": 296.44736842105266, "grad_norm": 0.9866099953651428, "learning_rate": 0.0001, "loss": 0.0134, "step": 45060 }, { "epoch": 296.5131578947368, "grad_norm": 1.350821852684021, "learning_rate": 0.0001, "loss": 0.0149, "step": 45070 }, { "epoch": 296.57894736842104, "grad_norm": 0.8730394244194031, "learning_rate": 0.0001, "loss": 0.0142, "step": 45080 }, { "epoch": 296.64473684210526, "grad_norm": 1.3271095752716064, "learning_rate": 0.0001, "loss": 0.0154, "step": 45090 }, { "epoch": 296.7105263157895, "grad_norm": 1.1160370111465454, "learning_rate": 0.0001, "loss": 0.0155, "step": 45100 }, { "epoch": 296.7763157894737, "grad_norm": 1.0997018814086914, "learning_rate": 0.0001, "loss": 0.0133, "step": 45110 }, { "epoch": 296.8421052631579, "grad_norm": 1.1898871660232544, "learning_rate": 0.0001, "loss": 0.0179, "step": 45120 }, { "epoch": 296.9078947368421, "grad_norm": 0.9266688227653503, "learning_rate": 0.0001, "loss": 0.0149, "step": 45130 }, { "epoch": 296.9736842105263, "grad_norm": 1.0709036588668823, "learning_rate": 0.0001, "loss": 0.0137, "step": 45140 }, { "epoch": 297.0394736842105, "grad_norm": 1.0864067077636719, "learning_rate": 0.0001, "loss": 0.0132, "step": 45150 }, { "epoch": 297.10526315789474, "grad_norm": 0.7684114575386047, "learning_rate": 0.0001, "loss": 0.0161, "step": 45160 }, { "epoch": 297.17105263157896, "grad_norm": 1.0278398990631104, "learning_rate": 0.0001, "loss": 0.0154, "step": 45170 }, { "epoch": 297.2368421052632, "grad_norm": 1.1107127666473389, "learning_rate": 0.0001, "loss": 0.0141, "step": 45180 }, { "epoch": 297.30263157894734, "grad_norm": 0.9575175046920776, "learning_rate": 0.0001, "loss": 0.0149, "step": 45190 }, { "epoch": 297.36842105263156, "grad_norm": 1.2704232931137085, "learning_rate": 0.0001, "loss": 0.016, "step": 45200 }, { "epoch": 297.4342105263158, "grad_norm": 1.383664608001709, "learning_rate": 0.0001, "loss": 0.0143, "step": 45210 }, { "epoch": 297.5, "grad_norm": 0.8386159539222717, "learning_rate": 0.0001, "loss": 0.0138, "step": 45220 }, { "epoch": 297.5657894736842, "grad_norm": 1.1953688859939575, "learning_rate": 0.0001, "loss": 0.0136, "step": 45230 }, { "epoch": 297.63157894736844, "grad_norm": 1.0901776552200317, "learning_rate": 0.0001, "loss": 0.0142, "step": 45240 }, { "epoch": 297.69736842105266, "grad_norm": 1.1900759935379028, "learning_rate": 0.0001, "loss": 0.0143, "step": 45250 }, { "epoch": 297.7631578947368, "grad_norm": 0.9617878794670105, "learning_rate": 0.0001, "loss": 0.0142, "step": 45260 }, { "epoch": 297.82894736842104, "grad_norm": 1.275898814201355, "learning_rate": 0.0001, "loss": 0.0157, "step": 45270 }, { "epoch": 297.89473684210526, "grad_norm": 1.2279614210128784, "learning_rate": 0.0001, "loss": 0.0148, "step": 45280 }, { "epoch": 297.9605263157895, "grad_norm": 1.4091862440109253, "learning_rate": 0.0001, "loss": 0.0153, "step": 45290 }, { "epoch": 298.0263157894737, "grad_norm": 1.1640609502792358, "learning_rate": 0.0001, "loss": 0.0128, "step": 45300 }, { "epoch": 298.0921052631579, "grad_norm": 1.000942587852478, "learning_rate": 0.0001, "loss": 0.0134, "step": 45310 }, { "epoch": 298.1578947368421, "grad_norm": 0.9416703581809998, "learning_rate": 0.0001, "loss": 0.0133, "step": 45320 }, { "epoch": 298.2236842105263, "grad_norm": 1.3636903762817383, "learning_rate": 0.0001, "loss": 0.0124, "step": 45330 }, { "epoch": 298.2894736842105, "grad_norm": 0.9758198261260986, "learning_rate": 0.0001, "loss": 0.0136, "step": 45340 }, { "epoch": 298.35526315789474, "grad_norm": 1.266970157623291, "learning_rate": 0.0001, "loss": 0.0147, "step": 45350 }, { "epoch": 298.42105263157896, "grad_norm": 1.5186119079589844, "learning_rate": 0.0001, "loss": 0.0158, "step": 45360 }, { "epoch": 298.4868421052632, "grad_norm": 1.2090457677841187, "learning_rate": 0.0001, "loss": 0.0138, "step": 45370 }, { "epoch": 298.55263157894734, "grad_norm": 1.673222541809082, "learning_rate": 0.0001, "loss": 0.0154, "step": 45380 }, { "epoch": 298.61842105263156, "grad_norm": 1.3236820697784424, "learning_rate": 0.0001, "loss": 0.0133, "step": 45390 }, { "epoch": 298.6842105263158, "grad_norm": 1.2713639736175537, "learning_rate": 0.0001, "loss": 0.0124, "step": 45400 }, { "epoch": 298.75, "grad_norm": 1.2878004312515259, "learning_rate": 0.0001, "loss": 0.0141, "step": 45410 }, { "epoch": 298.8157894736842, "grad_norm": 1.013006567955017, "learning_rate": 0.0001, "loss": 0.0135, "step": 45420 }, { "epoch": 298.88157894736844, "grad_norm": 1.3307374715805054, "learning_rate": 0.0001, "loss": 0.0146, "step": 45430 }, { "epoch": 298.94736842105266, "grad_norm": 1.2865517139434814, "learning_rate": 0.0001, "loss": 0.0134, "step": 45440 }, { "epoch": 299.0131578947368, "grad_norm": 1.2010281085968018, "learning_rate": 0.0001, "loss": 0.0165, "step": 45450 }, { "epoch": 299.07894736842104, "grad_norm": 1.3164724111557007, "learning_rate": 0.0001, "loss": 0.014, "step": 45460 }, { "epoch": 299.14473684210526, "grad_norm": 1.2685626745224, "learning_rate": 0.0001, "loss": 0.014, "step": 45470 }, { "epoch": 299.2105263157895, "grad_norm": 1.0890898704528809, "learning_rate": 0.0001, "loss": 0.0135, "step": 45480 }, { "epoch": 299.2763157894737, "grad_norm": 1.3427873849868774, "learning_rate": 0.0001, "loss": 0.0129, "step": 45490 }, { "epoch": 299.3421052631579, "grad_norm": 1.1876996755599976, "learning_rate": 0.0001, "loss": 0.0141, "step": 45500 }, { "epoch": 299.4078947368421, "grad_norm": 1.2783021926879883, "learning_rate": 0.0001, "loss": 0.014, "step": 45510 }, { "epoch": 299.4736842105263, "grad_norm": 1.2588651180267334, "learning_rate": 0.0001, "loss": 0.0134, "step": 45520 }, { "epoch": 299.5394736842105, "grad_norm": 1.005475640296936, "learning_rate": 0.0001, "loss": 0.0166, "step": 45530 }, { "epoch": 299.60526315789474, "grad_norm": 1.3194289207458496, "learning_rate": 0.0001, "loss": 0.0139, "step": 45540 }, { "epoch": 299.67105263157896, "grad_norm": 1.593381643295288, "learning_rate": 0.0001, "loss": 0.0138, "step": 45550 }, { "epoch": 299.7368421052632, "grad_norm": 1.1744670867919922, "learning_rate": 0.0001, "loss": 0.0139, "step": 45560 }, { "epoch": 299.80263157894734, "grad_norm": 1.151694893836975, "learning_rate": 0.0001, "loss": 0.015, "step": 45570 }, { "epoch": 299.86842105263156, "grad_norm": 0.9926699995994568, "learning_rate": 0.0001, "loss": 0.0116, "step": 45580 }, { "epoch": 299.9342105263158, "grad_norm": 1.3312798738479614, "learning_rate": 0.0001, "loss": 0.015, "step": 45590 }, { "epoch": 300.0, "grad_norm": 1.242369294166565, "learning_rate": 0.0001, "loss": 0.0139, "step": 45600 }, { "epoch": 300.0657894736842, "grad_norm": 1.139743447303772, "learning_rate": 0.0001, "loss": 0.0141, "step": 45610 }, { "epoch": 300.13157894736844, "grad_norm": 1.255859375, "learning_rate": 0.0001, "loss": 0.0135, "step": 45620 }, { "epoch": 300.19736842105266, "grad_norm": 1.1239902973175049, "learning_rate": 0.0001, "loss": 0.0129, "step": 45630 }, { "epoch": 300.2631578947368, "grad_norm": 1.4652752876281738, "learning_rate": 0.0001, "loss": 0.0144, "step": 45640 }, { "epoch": 300.32894736842104, "grad_norm": 1.098900556564331, "learning_rate": 0.0001, "loss": 0.0145, "step": 45650 }, { "epoch": 300.39473684210526, "grad_norm": 0.6829715371131897, "learning_rate": 0.0001, "loss": 0.0134, "step": 45660 }, { "epoch": 300.4605263157895, "grad_norm": 1.10642409324646, "learning_rate": 0.0001, "loss": 0.0134, "step": 45670 }, { "epoch": 300.5263157894737, "grad_norm": 0.9976691007614136, "learning_rate": 0.0001, "loss": 0.0134, "step": 45680 }, { "epoch": 300.5921052631579, "grad_norm": 1.3558682203292847, "learning_rate": 0.0001, "loss": 0.0131, "step": 45690 }, { "epoch": 300.6578947368421, "grad_norm": 1.131054162979126, "learning_rate": 0.0001, "loss": 0.0151, "step": 45700 }, { "epoch": 300.7236842105263, "grad_norm": 1.3741534948349, "learning_rate": 0.0001, "loss": 0.0139, "step": 45710 }, { "epoch": 300.7894736842105, "grad_norm": 1.3326232433319092, "learning_rate": 0.0001, "loss": 0.0125, "step": 45720 }, { "epoch": 300.85526315789474, "grad_norm": 1.5174059867858887, "learning_rate": 0.0001, "loss": 0.0144, "step": 45730 }, { "epoch": 300.92105263157896, "grad_norm": 1.343550443649292, "learning_rate": 0.0001, "loss": 0.014, "step": 45740 }, { "epoch": 300.9868421052632, "grad_norm": 1.3771088123321533, "learning_rate": 0.0001, "loss": 0.0132, "step": 45750 }, { "epoch": 301.05263157894734, "grad_norm": 1.3173880577087402, "learning_rate": 0.0001, "loss": 0.0139, "step": 45760 }, { "epoch": 301.11842105263156, "grad_norm": 1.4637528657913208, "learning_rate": 0.0001, "loss": 0.0141, "step": 45770 }, { "epoch": 301.1842105263158, "grad_norm": 1.0429658889770508, "learning_rate": 0.0001, "loss": 0.0135, "step": 45780 }, { "epoch": 301.25, "grad_norm": 1.3570419549942017, "learning_rate": 0.0001, "loss": 0.0146, "step": 45790 }, { "epoch": 301.3157894736842, "grad_norm": 1.0001505613327026, "learning_rate": 0.0001, "loss": 0.0146, "step": 45800 }, { "epoch": 301.38157894736844, "grad_norm": 1.192205786705017, "learning_rate": 0.0001, "loss": 0.0134, "step": 45810 }, { "epoch": 301.44736842105266, "grad_norm": 1.4170939922332764, "learning_rate": 0.0001, "loss": 0.0141, "step": 45820 }, { "epoch": 301.5131578947368, "grad_norm": 1.2274196147918701, "learning_rate": 0.0001, "loss": 0.0134, "step": 45830 }, { "epoch": 301.57894736842104, "grad_norm": 1.2040157318115234, "learning_rate": 0.0001, "loss": 0.0128, "step": 45840 }, { "epoch": 301.64473684210526, "grad_norm": 1.2414194345474243, "learning_rate": 0.0001, "loss": 0.014, "step": 45850 }, { "epoch": 301.7105263157895, "grad_norm": 1.185573935508728, "learning_rate": 0.0001, "loss": 0.0143, "step": 45860 }, { "epoch": 301.7763157894737, "grad_norm": 1.1964563131332397, "learning_rate": 0.0001, "loss": 0.014, "step": 45870 }, { "epoch": 301.8421052631579, "grad_norm": 1.363934874534607, "learning_rate": 0.0001, "loss": 0.0154, "step": 45880 }, { "epoch": 301.9078947368421, "grad_norm": 1.0584746599197388, "learning_rate": 0.0001, "loss": 0.0155, "step": 45890 }, { "epoch": 301.9736842105263, "grad_norm": 1.0964704751968384, "learning_rate": 0.0001, "loss": 0.0142, "step": 45900 }, { "epoch": 302.0394736842105, "grad_norm": 0.9291341304779053, "learning_rate": 0.0001, "loss": 0.0134, "step": 45910 }, { "epoch": 302.10526315789474, "grad_norm": 1.3490403890609741, "learning_rate": 0.0001, "loss": 0.0142, "step": 45920 }, { "epoch": 302.17105263157896, "grad_norm": 1.4308061599731445, "learning_rate": 0.0001, "loss": 0.0149, "step": 45930 }, { "epoch": 302.2368421052632, "grad_norm": 1.307144045829773, "learning_rate": 0.0001, "loss": 0.0124, "step": 45940 }, { "epoch": 302.30263157894734, "grad_norm": 1.7374738454818726, "learning_rate": 0.0001, "loss": 0.0149, "step": 45950 }, { "epoch": 302.36842105263156, "grad_norm": 1.5400711297988892, "learning_rate": 0.0001, "loss": 0.0152, "step": 45960 }, { "epoch": 302.4342105263158, "grad_norm": 1.6926573514938354, "learning_rate": 0.0001, "loss": 0.0145, "step": 45970 }, { "epoch": 302.5, "grad_norm": 1.3912023305892944, "learning_rate": 0.0001, "loss": 0.013, "step": 45980 }, { "epoch": 302.5657894736842, "grad_norm": 1.3487814664840698, "learning_rate": 0.0001, "loss": 0.0134, "step": 45990 }, { "epoch": 302.63157894736844, "grad_norm": 1.3603285551071167, "learning_rate": 0.0001, "loss": 0.0145, "step": 46000 }, { "epoch": 302.69736842105266, "grad_norm": 1.107670545578003, "learning_rate": 0.0001, "loss": 0.0144, "step": 46010 }, { "epoch": 302.7631578947368, "grad_norm": 1.2520265579223633, "learning_rate": 0.0001, "loss": 0.013, "step": 46020 }, { "epoch": 302.82894736842104, "grad_norm": 1.2443839311599731, "learning_rate": 0.0001, "loss": 0.013, "step": 46030 }, { "epoch": 302.89473684210526, "grad_norm": 1.4623485803604126, "learning_rate": 0.0001, "loss": 0.0129, "step": 46040 }, { "epoch": 302.9605263157895, "grad_norm": 1.1879698038101196, "learning_rate": 0.0001, "loss": 0.0133, "step": 46050 }, { "epoch": 303.0263157894737, "grad_norm": 1.3571995496749878, "learning_rate": 0.0001, "loss": 0.0146, "step": 46060 }, { "epoch": 303.0921052631579, "grad_norm": 1.199619174003601, "learning_rate": 0.0001, "loss": 0.0135, "step": 46070 }, { "epoch": 303.1578947368421, "grad_norm": 1.4226540327072144, "learning_rate": 0.0001, "loss": 0.0125, "step": 46080 }, { "epoch": 303.2236842105263, "grad_norm": 1.490116834640503, "learning_rate": 0.0001, "loss": 0.0121, "step": 46090 }, { "epoch": 303.2894736842105, "grad_norm": 1.325772762298584, "learning_rate": 0.0001, "loss": 0.0142, "step": 46100 }, { "epoch": 303.35526315789474, "grad_norm": 1.2829171419143677, "learning_rate": 0.0001, "loss": 0.0126, "step": 46110 }, { "epoch": 303.42105263157896, "grad_norm": 1.164736032485962, "learning_rate": 0.0001, "loss": 0.0146, "step": 46120 }, { "epoch": 303.4868421052632, "grad_norm": 1.147537350654602, "learning_rate": 0.0001, "loss": 0.015, "step": 46130 }, { "epoch": 303.55263157894734, "grad_norm": 0.7918049693107605, "learning_rate": 0.0001, "loss": 0.0163, "step": 46140 }, { "epoch": 303.61842105263156, "grad_norm": 1.1674093008041382, "learning_rate": 0.0001, "loss": 0.0125, "step": 46150 }, { "epoch": 303.6842105263158, "grad_norm": 1.2786973714828491, "learning_rate": 0.0001, "loss": 0.0123, "step": 46160 }, { "epoch": 303.75, "grad_norm": 1.233464241027832, "learning_rate": 0.0001, "loss": 0.0117, "step": 46170 }, { "epoch": 303.8157894736842, "grad_norm": 1.2651668787002563, "learning_rate": 0.0001, "loss": 0.0158, "step": 46180 }, { "epoch": 303.88157894736844, "grad_norm": 1.4438832998275757, "learning_rate": 0.0001, "loss": 0.0138, "step": 46190 }, { "epoch": 303.94736842105266, "grad_norm": 1.30561363697052, "learning_rate": 0.0001, "loss": 0.0137, "step": 46200 }, { "epoch": 304.0131578947368, "grad_norm": 1.2133570909500122, "learning_rate": 0.0001, "loss": 0.0131, "step": 46210 }, { "epoch": 304.07894736842104, "grad_norm": 1.1664997339248657, "learning_rate": 0.0001, "loss": 0.0161, "step": 46220 }, { "epoch": 304.14473684210526, "grad_norm": 0.8901975750923157, "learning_rate": 0.0001, "loss": 0.0137, "step": 46230 }, { "epoch": 304.2105263157895, "grad_norm": 0.9484691619873047, "learning_rate": 0.0001, "loss": 0.0132, "step": 46240 }, { "epoch": 304.2763157894737, "grad_norm": 1.0902068614959717, "learning_rate": 0.0001, "loss": 0.0136, "step": 46250 }, { "epoch": 304.3421052631579, "grad_norm": 0.908757209777832, "learning_rate": 0.0001, "loss": 0.0149, "step": 46260 }, { "epoch": 304.4078947368421, "grad_norm": 1.406864047050476, "learning_rate": 0.0001, "loss": 0.0149, "step": 46270 }, { "epoch": 304.4736842105263, "grad_norm": 1.625902533531189, "learning_rate": 0.0001, "loss": 0.0132, "step": 46280 }, { "epoch": 304.5394736842105, "grad_norm": 1.1790637969970703, "learning_rate": 0.0001, "loss": 0.0132, "step": 46290 }, { "epoch": 304.60526315789474, "grad_norm": 1.3175780773162842, "learning_rate": 0.0001, "loss": 0.0135, "step": 46300 }, { "epoch": 304.67105263157896, "grad_norm": 1.1840614080429077, "learning_rate": 0.0001, "loss": 0.0148, "step": 46310 }, { "epoch": 304.7368421052632, "grad_norm": 1.3935436010360718, "learning_rate": 0.0001, "loss": 0.0151, "step": 46320 }, { "epoch": 304.80263157894734, "grad_norm": 1.0257351398468018, "learning_rate": 0.0001, "loss": 0.0135, "step": 46330 }, { "epoch": 304.86842105263156, "grad_norm": 0.9773437976837158, "learning_rate": 0.0001, "loss": 0.0139, "step": 46340 }, { "epoch": 304.9342105263158, "grad_norm": 1.1705121994018555, "learning_rate": 0.0001, "loss": 0.0136, "step": 46350 }, { "epoch": 305.0, "grad_norm": 1.569278359413147, "learning_rate": 0.0001, "loss": 0.012, "step": 46360 }, { "epoch": 305.0657894736842, "grad_norm": 1.1362944841384888, "learning_rate": 0.0001, "loss": 0.0147, "step": 46370 }, { "epoch": 305.13157894736844, "grad_norm": 0.9014955759048462, "learning_rate": 0.0001, "loss": 0.0149, "step": 46380 }, { "epoch": 305.19736842105266, "grad_norm": 1.377353549003601, "learning_rate": 0.0001, "loss": 0.013, "step": 46390 }, { "epoch": 305.2631578947368, "grad_norm": 1.3454692363739014, "learning_rate": 0.0001, "loss": 0.0136, "step": 46400 }, { "epoch": 305.32894736842104, "grad_norm": 1.1798051595687866, "learning_rate": 0.0001, "loss": 0.0128, "step": 46410 }, { "epoch": 305.39473684210526, "grad_norm": 1.0870208740234375, "learning_rate": 0.0001, "loss": 0.0138, "step": 46420 }, { "epoch": 305.4605263157895, "grad_norm": 0.8006429672241211, "learning_rate": 0.0001, "loss": 0.0134, "step": 46430 }, { "epoch": 305.5263157894737, "grad_norm": 0.890498161315918, "learning_rate": 0.0001, "loss": 0.0135, "step": 46440 }, { "epoch": 305.5921052631579, "grad_norm": 1.2902061939239502, "learning_rate": 0.0001, "loss": 0.0126, "step": 46450 }, { "epoch": 305.6578947368421, "grad_norm": 1.146141529083252, "learning_rate": 0.0001, "loss": 0.0138, "step": 46460 }, { "epoch": 305.7236842105263, "grad_norm": 0.9566381573677063, "learning_rate": 0.0001, "loss": 0.0136, "step": 46470 }, { "epoch": 305.7894736842105, "grad_norm": 1.5867373943328857, "learning_rate": 0.0001, "loss": 0.0165, "step": 46480 }, { "epoch": 305.85526315789474, "grad_norm": 1.2137548923492432, "learning_rate": 0.0001, "loss": 0.0138, "step": 46490 }, { "epoch": 305.92105263157896, "grad_norm": 1.305253505706787, "learning_rate": 0.0001, "loss": 0.015, "step": 46500 }, { "epoch": 305.9868421052632, "grad_norm": 0.8301891088485718, "learning_rate": 0.0001, "loss": 0.0134, "step": 46510 }, { "epoch": 306.05263157894734, "grad_norm": 0.9847671389579773, "learning_rate": 0.0001, "loss": 0.0135, "step": 46520 }, { "epoch": 306.11842105263156, "grad_norm": 1.592354655265808, "learning_rate": 0.0001, "loss": 0.0133, "step": 46530 }, { "epoch": 306.1842105263158, "grad_norm": 1.2078450918197632, "learning_rate": 0.0001, "loss": 0.0159, "step": 46540 }, { "epoch": 306.25, "grad_norm": 1.000321865081787, "learning_rate": 0.0001, "loss": 0.0133, "step": 46550 }, { "epoch": 306.3157894736842, "grad_norm": 1.207289695739746, "learning_rate": 0.0001, "loss": 0.0132, "step": 46560 }, { "epoch": 306.38157894736844, "grad_norm": 1.2162929773330688, "learning_rate": 0.0001, "loss": 0.0148, "step": 46570 }, { "epoch": 306.44736842105266, "grad_norm": 1.147230863571167, "learning_rate": 0.0001, "loss": 0.0153, "step": 46580 }, { "epoch": 306.5131578947368, "grad_norm": 1.2170518636703491, "learning_rate": 0.0001, "loss": 0.0148, "step": 46590 }, { "epoch": 306.57894736842104, "grad_norm": 1.1582554578781128, "learning_rate": 0.0001, "loss": 0.0142, "step": 46600 }, { "epoch": 306.64473684210526, "grad_norm": 1.2563729286193848, "learning_rate": 0.0001, "loss": 0.0135, "step": 46610 }, { "epoch": 306.7105263157895, "grad_norm": 0.9959396123886108, "learning_rate": 0.0001, "loss": 0.0151, "step": 46620 }, { "epoch": 306.7763157894737, "grad_norm": 1.2448713779449463, "learning_rate": 0.0001, "loss": 0.0133, "step": 46630 }, { "epoch": 306.8421052631579, "grad_norm": 1.0271637439727783, "learning_rate": 0.0001, "loss": 0.0143, "step": 46640 }, { "epoch": 306.9078947368421, "grad_norm": 1.0369125604629517, "learning_rate": 0.0001, "loss": 0.0152, "step": 46650 }, { "epoch": 306.9736842105263, "grad_norm": 1.0116463899612427, "learning_rate": 0.0001, "loss": 0.0141, "step": 46660 }, { "epoch": 307.0394736842105, "grad_norm": 1.3489338159561157, "learning_rate": 0.0001, "loss": 0.0157, "step": 46670 }, { "epoch": 307.10526315789474, "grad_norm": 1.214111328125, "learning_rate": 0.0001, "loss": 0.0147, "step": 46680 }, { "epoch": 307.17105263157896, "grad_norm": 1.4091652631759644, "learning_rate": 0.0001, "loss": 0.014, "step": 46690 }, { "epoch": 307.2368421052632, "grad_norm": 1.3401622772216797, "learning_rate": 0.0001, "loss": 0.0134, "step": 46700 }, { "epoch": 307.30263157894734, "grad_norm": 1.0717833042144775, "learning_rate": 0.0001, "loss": 0.0144, "step": 46710 }, { "epoch": 307.36842105263156, "grad_norm": 1.215564489364624, "learning_rate": 0.0001, "loss": 0.0156, "step": 46720 }, { "epoch": 307.4342105263158, "grad_norm": 1.126434326171875, "learning_rate": 0.0001, "loss": 0.0146, "step": 46730 }, { "epoch": 307.5, "grad_norm": 1.1707587242126465, "learning_rate": 0.0001, "loss": 0.0147, "step": 46740 }, { "epoch": 307.5657894736842, "grad_norm": 1.0138065814971924, "learning_rate": 0.0001, "loss": 0.0157, "step": 46750 }, { "epoch": 307.63157894736844, "grad_norm": 0.8410609364509583, "learning_rate": 0.0001, "loss": 0.0139, "step": 46760 }, { "epoch": 307.69736842105266, "grad_norm": 1.0997475385665894, "learning_rate": 0.0001, "loss": 0.0138, "step": 46770 }, { "epoch": 307.7631578947368, "grad_norm": 1.0100196599960327, "learning_rate": 0.0001, "loss": 0.0149, "step": 46780 }, { "epoch": 307.82894736842104, "grad_norm": 1.0084043741226196, "learning_rate": 0.0001, "loss": 0.0136, "step": 46790 }, { "epoch": 307.89473684210526, "grad_norm": 1.1131081581115723, "learning_rate": 0.0001, "loss": 0.0161, "step": 46800 }, { "epoch": 307.9605263157895, "grad_norm": 0.966968297958374, "learning_rate": 0.0001, "loss": 0.0136, "step": 46810 }, { "epoch": 308.0263157894737, "grad_norm": 0.7994092106819153, "learning_rate": 0.0001, "loss": 0.0147, "step": 46820 }, { "epoch": 308.0921052631579, "grad_norm": 1.2726207971572876, "learning_rate": 0.0001, "loss": 0.0168, "step": 46830 }, { "epoch": 308.1578947368421, "grad_norm": 1.2597211599349976, "learning_rate": 0.0001, "loss": 0.0149, "step": 46840 }, { "epoch": 308.2236842105263, "grad_norm": 1.6768962144851685, "learning_rate": 0.0001, "loss": 0.0162, "step": 46850 }, { "epoch": 308.2894736842105, "grad_norm": 1.068867564201355, "learning_rate": 0.0001, "loss": 0.0129, "step": 46860 }, { "epoch": 308.35526315789474, "grad_norm": 1.2445541620254517, "learning_rate": 0.0001, "loss": 0.0128, "step": 46870 }, { "epoch": 308.42105263157896, "grad_norm": 1.1465497016906738, "learning_rate": 0.0001, "loss": 0.0131, "step": 46880 }, { "epoch": 308.4868421052632, "grad_norm": 1.4472042322158813, "learning_rate": 0.0001, "loss": 0.0142, "step": 46890 }, { "epoch": 308.55263157894734, "grad_norm": 1.262213945388794, "learning_rate": 0.0001, "loss": 0.0136, "step": 46900 }, { "epoch": 308.61842105263156, "grad_norm": 1.1717816591262817, "learning_rate": 0.0001, "loss": 0.0138, "step": 46910 }, { "epoch": 308.6842105263158, "grad_norm": 0.995729923248291, "learning_rate": 0.0001, "loss": 0.0152, "step": 46920 }, { "epoch": 308.75, "grad_norm": 1.394285798072815, "learning_rate": 0.0001, "loss": 0.0154, "step": 46930 }, { "epoch": 308.8157894736842, "grad_norm": 1.8521760702133179, "learning_rate": 0.0001, "loss": 0.0138, "step": 46940 }, { "epoch": 308.88157894736844, "grad_norm": 1.5286895036697388, "learning_rate": 0.0001, "loss": 0.0136, "step": 46950 }, { "epoch": 308.94736842105266, "grad_norm": 0.9966525435447693, "learning_rate": 0.0001, "loss": 0.0127, "step": 46960 }, { "epoch": 309.0131578947368, "grad_norm": 1.0583655834197998, "learning_rate": 0.0001, "loss": 0.0146, "step": 46970 }, { "epoch": 309.07894736842104, "grad_norm": 1.0200626850128174, "learning_rate": 0.0001, "loss": 0.0149, "step": 46980 }, { "epoch": 309.14473684210526, "grad_norm": 1.2817143201828003, "learning_rate": 0.0001, "loss": 0.0148, "step": 46990 }, { "epoch": 309.2105263157895, "grad_norm": 1.4597275257110596, "learning_rate": 0.0001, "loss": 0.0141, "step": 47000 }, { "epoch": 309.2763157894737, "grad_norm": 1.026604413986206, "learning_rate": 0.0001, "loss": 0.0137, "step": 47010 }, { "epoch": 309.3421052631579, "grad_norm": 1.1650899648666382, "learning_rate": 0.0001, "loss": 0.0167, "step": 47020 }, { "epoch": 309.4078947368421, "grad_norm": 1.0153883695602417, "learning_rate": 0.0001, "loss": 0.0121, "step": 47030 }, { "epoch": 309.4736842105263, "grad_norm": 1.2171595096588135, "learning_rate": 0.0001, "loss": 0.0125, "step": 47040 }, { "epoch": 309.5394736842105, "grad_norm": 1.2337496280670166, "learning_rate": 0.0001, "loss": 0.012, "step": 47050 }, { "epoch": 309.60526315789474, "grad_norm": 1.2742234468460083, "learning_rate": 0.0001, "loss": 0.0151, "step": 47060 }, { "epoch": 309.67105263157896, "grad_norm": 1.307140588760376, "learning_rate": 0.0001, "loss": 0.0124, "step": 47070 }, { "epoch": 309.7368421052632, "grad_norm": 0.6933286190032959, "learning_rate": 0.0001, "loss": 0.0156, "step": 47080 }, { "epoch": 309.80263157894734, "grad_norm": 1.448660135269165, "learning_rate": 0.0001, "loss": 0.0125, "step": 47090 }, { "epoch": 309.86842105263156, "grad_norm": 1.2095212936401367, "learning_rate": 0.0001, "loss": 0.0142, "step": 47100 }, { "epoch": 309.9342105263158, "grad_norm": 1.179511308670044, "learning_rate": 0.0001, "loss": 0.0118, "step": 47110 }, { "epoch": 310.0, "grad_norm": 1.0663738250732422, "learning_rate": 0.0001, "loss": 0.0137, "step": 47120 }, { "epoch": 310.0657894736842, "grad_norm": 1.395041823387146, "learning_rate": 0.0001, "loss": 0.0163, "step": 47130 }, { "epoch": 310.13157894736844, "grad_norm": 1.5173497200012207, "learning_rate": 0.0001, "loss": 0.0128, "step": 47140 }, { "epoch": 310.19736842105266, "grad_norm": 1.4677331447601318, "learning_rate": 0.0001, "loss": 0.0133, "step": 47150 }, { "epoch": 310.2631578947368, "grad_norm": 1.263916015625, "learning_rate": 0.0001, "loss": 0.0141, "step": 47160 }, { "epoch": 310.32894736842104, "grad_norm": 1.796497106552124, "learning_rate": 0.0001, "loss": 0.0127, "step": 47170 }, { "epoch": 310.39473684210526, "grad_norm": 1.4208660125732422, "learning_rate": 0.0001, "loss": 0.0124, "step": 47180 }, { "epoch": 310.4605263157895, "grad_norm": 1.3126176595687866, "learning_rate": 0.0001, "loss": 0.0128, "step": 47190 }, { "epoch": 310.5263157894737, "grad_norm": 1.1969044208526611, "learning_rate": 0.0001, "loss": 0.0139, "step": 47200 }, { "epoch": 310.5921052631579, "grad_norm": 1.3965052366256714, "learning_rate": 0.0001, "loss": 0.0129, "step": 47210 }, { "epoch": 310.6578947368421, "grad_norm": 1.1293995380401611, "learning_rate": 0.0001, "loss": 0.0127, "step": 47220 }, { "epoch": 310.7236842105263, "grad_norm": 1.213976263999939, "learning_rate": 0.0001, "loss": 0.013, "step": 47230 }, { "epoch": 310.7894736842105, "grad_norm": 1.2718894481658936, "learning_rate": 0.0001, "loss": 0.0146, "step": 47240 }, { "epoch": 310.85526315789474, "grad_norm": 1.3019602298736572, "learning_rate": 0.0001, "loss": 0.013, "step": 47250 }, { "epoch": 310.92105263157896, "grad_norm": 1.1459777355194092, "learning_rate": 0.0001, "loss": 0.0129, "step": 47260 }, { "epoch": 310.9868421052632, "grad_norm": 0.9818292856216431, "learning_rate": 0.0001, "loss": 0.0147, "step": 47270 }, { "epoch": 311.05263157894734, "grad_norm": 1.271238088607788, "learning_rate": 0.0001, "loss": 0.0108, "step": 47280 }, { "epoch": 311.11842105263156, "grad_norm": 1.5601221323013306, "learning_rate": 0.0001, "loss": 0.0142, "step": 47290 }, { "epoch": 311.1842105263158, "grad_norm": 1.2876839637756348, "learning_rate": 0.0001, "loss": 0.0148, "step": 47300 }, { "epoch": 311.25, "grad_norm": 1.2727855443954468, "learning_rate": 0.0001, "loss": 0.0139, "step": 47310 }, { "epoch": 311.3157894736842, "grad_norm": 1.0672112703323364, "learning_rate": 0.0001, "loss": 0.0121, "step": 47320 }, { "epoch": 311.38157894736844, "grad_norm": 1.3671296834945679, "learning_rate": 0.0001, "loss": 0.0133, "step": 47330 }, { "epoch": 311.44736842105266, "grad_norm": 0.953478991985321, "learning_rate": 0.0001, "loss": 0.0127, "step": 47340 }, { "epoch": 311.5131578947368, "grad_norm": 1.3494404554367065, "learning_rate": 0.0001, "loss": 0.0127, "step": 47350 }, { "epoch": 311.57894736842104, "grad_norm": 1.256812572479248, "learning_rate": 0.0001, "loss": 0.0153, "step": 47360 }, { "epoch": 311.64473684210526, "grad_norm": 1.1076347827911377, "learning_rate": 0.0001, "loss": 0.0131, "step": 47370 }, { "epoch": 311.7105263157895, "grad_norm": 1.1264309883117676, "learning_rate": 0.0001, "loss": 0.0137, "step": 47380 }, { "epoch": 311.7763157894737, "grad_norm": 1.082269310951233, "learning_rate": 0.0001, "loss": 0.0145, "step": 47390 }, { "epoch": 311.8421052631579, "grad_norm": 1.2496287822723389, "learning_rate": 0.0001, "loss": 0.0148, "step": 47400 }, { "epoch": 311.9078947368421, "grad_norm": 1.2073965072631836, "learning_rate": 0.0001, "loss": 0.0131, "step": 47410 }, { "epoch": 311.9736842105263, "grad_norm": 1.4305161237716675, "learning_rate": 0.0001, "loss": 0.0136, "step": 47420 }, { "epoch": 312.0394736842105, "grad_norm": 1.231873631477356, "learning_rate": 0.0001, "loss": 0.0123, "step": 47430 }, { "epoch": 312.10526315789474, "grad_norm": 1.3474899530410767, "learning_rate": 0.0001, "loss": 0.0139, "step": 47440 }, { "epoch": 312.17105263157896, "grad_norm": 1.301870346069336, "learning_rate": 0.0001, "loss": 0.0138, "step": 47450 }, { "epoch": 312.2368421052632, "grad_norm": 1.3354082107543945, "learning_rate": 0.0001, "loss": 0.0126, "step": 47460 }, { "epoch": 312.30263157894734, "grad_norm": 1.112229347229004, "learning_rate": 0.0001, "loss": 0.0134, "step": 47470 }, { "epoch": 312.36842105263156, "grad_norm": 0.764408528804779, "learning_rate": 0.0001, "loss": 0.0136, "step": 47480 }, { "epoch": 312.4342105263158, "grad_norm": 0.8579666018486023, "learning_rate": 0.0001, "loss": 0.0127, "step": 47490 }, { "epoch": 312.5, "grad_norm": 0.7732484340667725, "learning_rate": 0.0001, "loss": 0.0153, "step": 47500 }, { "epoch": 312.5657894736842, "grad_norm": 1.0402549505233765, "learning_rate": 0.0001, "loss": 0.0135, "step": 47510 }, { "epoch": 312.63157894736844, "grad_norm": 0.876788318157196, "learning_rate": 0.0001, "loss": 0.0169, "step": 47520 }, { "epoch": 312.69736842105266, "grad_norm": 0.8869333267211914, "learning_rate": 0.0001, "loss": 0.0146, "step": 47530 }, { "epoch": 312.7631578947368, "grad_norm": 1.4594961404800415, "learning_rate": 0.0001, "loss": 0.0157, "step": 47540 }, { "epoch": 312.82894736842104, "grad_norm": 1.2416234016418457, "learning_rate": 0.0001, "loss": 0.0135, "step": 47550 }, { "epoch": 312.89473684210526, "grad_norm": 1.632511854171753, "learning_rate": 0.0001, "loss": 0.0131, "step": 47560 }, { "epoch": 312.9605263157895, "grad_norm": 1.8223305940628052, "learning_rate": 0.0001, "loss": 0.0147, "step": 47570 }, { "epoch": 313.0263157894737, "grad_norm": 1.4376325607299805, "learning_rate": 0.0001, "loss": 0.0127, "step": 47580 }, { "epoch": 313.0921052631579, "grad_norm": 1.3378567695617676, "learning_rate": 0.0001, "loss": 0.0132, "step": 47590 }, { "epoch": 313.1578947368421, "grad_norm": 1.1414756774902344, "learning_rate": 0.0001, "loss": 0.0144, "step": 47600 }, { "epoch": 313.2236842105263, "grad_norm": 1.2612454891204834, "learning_rate": 0.0001, "loss": 0.0126, "step": 47610 }, { "epoch": 313.2894736842105, "grad_norm": 1.2293567657470703, "learning_rate": 0.0001, "loss": 0.0138, "step": 47620 }, { "epoch": 313.35526315789474, "grad_norm": 1.0517131090164185, "learning_rate": 0.0001, "loss": 0.0125, "step": 47630 }, { "epoch": 313.42105263157896, "grad_norm": 1.222216248512268, "learning_rate": 0.0001, "loss": 0.0162, "step": 47640 }, { "epoch": 313.4868421052632, "grad_norm": 0.9757271409034729, "learning_rate": 0.0001, "loss": 0.0148, "step": 47650 }, { "epoch": 313.55263157894734, "grad_norm": 0.8784149289131165, "learning_rate": 0.0001, "loss": 0.0129, "step": 47660 }, { "epoch": 313.61842105263156, "grad_norm": 1.1013492345809937, "learning_rate": 0.0001, "loss": 0.0135, "step": 47670 }, { "epoch": 313.6842105263158, "grad_norm": 1.3613831996917725, "learning_rate": 0.0001, "loss": 0.0148, "step": 47680 }, { "epoch": 313.75, "grad_norm": 1.004358172416687, "learning_rate": 0.0001, "loss": 0.0142, "step": 47690 }, { "epoch": 313.8157894736842, "grad_norm": 1.2566474676132202, "learning_rate": 0.0001, "loss": 0.0141, "step": 47700 }, { "epoch": 313.88157894736844, "grad_norm": 1.362387776374817, "learning_rate": 0.0001, "loss": 0.0134, "step": 47710 }, { "epoch": 313.94736842105266, "grad_norm": 0.9462046027183533, "learning_rate": 0.0001, "loss": 0.0163, "step": 47720 }, { "epoch": 314.0131578947368, "grad_norm": 1.5367804765701294, "learning_rate": 0.0001, "loss": 0.0136, "step": 47730 }, { "epoch": 314.07894736842104, "grad_norm": 1.370335340499878, "learning_rate": 0.0001, "loss": 0.0143, "step": 47740 }, { "epoch": 314.14473684210526, "grad_norm": 1.0723284482955933, "learning_rate": 0.0001, "loss": 0.015, "step": 47750 }, { "epoch": 314.2105263157895, "grad_norm": 1.293366551399231, "learning_rate": 0.0001, "loss": 0.0152, "step": 47760 }, { "epoch": 314.2763157894737, "grad_norm": 1.2416729927062988, "learning_rate": 0.0001, "loss": 0.0127, "step": 47770 }, { "epoch": 314.3421052631579, "grad_norm": 0.9720730781555176, "learning_rate": 0.0001, "loss": 0.0137, "step": 47780 }, { "epoch": 314.4078947368421, "grad_norm": 1.1090688705444336, "learning_rate": 0.0001, "loss": 0.015, "step": 47790 }, { "epoch": 314.4736842105263, "grad_norm": 1.4181932210922241, "learning_rate": 0.0001, "loss": 0.0144, "step": 47800 }, { "epoch": 314.5394736842105, "grad_norm": 1.1214467287063599, "learning_rate": 0.0001, "loss": 0.0136, "step": 47810 }, { "epoch": 314.60526315789474, "grad_norm": 1.126190185546875, "learning_rate": 0.0001, "loss": 0.015, "step": 47820 }, { "epoch": 314.67105263157896, "grad_norm": 1.3774467706680298, "learning_rate": 0.0001, "loss": 0.0128, "step": 47830 }, { "epoch": 314.7368421052632, "grad_norm": 1.2156517505645752, "learning_rate": 0.0001, "loss": 0.0123, "step": 47840 }, { "epoch": 314.80263157894734, "grad_norm": 0.9228918552398682, "learning_rate": 0.0001, "loss": 0.0113, "step": 47850 }, { "epoch": 314.86842105263156, "grad_norm": 1.2994784116744995, "learning_rate": 0.0001, "loss": 0.0131, "step": 47860 }, { "epoch": 314.9342105263158, "grad_norm": 1.2928920984268188, "learning_rate": 0.0001, "loss": 0.0137, "step": 47870 }, { "epoch": 315.0, "grad_norm": 0.9638012647628784, "learning_rate": 0.0001, "loss": 0.0147, "step": 47880 }, { "epoch": 315.0657894736842, "grad_norm": 0.971041738986969, "learning_rate": 0.0001, "loss": 0.015, "step": 47890 }, { "epoch": 315.13157894736844, "grad_norm": 0.9184520244598389, "learning_rate": 0.0001, "loss": 0.0129, "step": 47900 }, { "epoch": 315.19736842105266, "grad_norm": 0.8455377221107483, "learning_rate": 0.0001, "loss": 0.0144, "step": 47910 }, { "epoch": 315.2631578947368, "grad_norm": 0.7738654017448425, "learning_rate": 0.0001, "loss": 0.0134, "step": 47920 }, { "epoch": 315.32894736842104, "grad_norm": 1.2264034748077393, "learning_rate": 0.0001, "loss": 0.0148, "step": 47930 }, { "epoch": 315.39473684210526, "grad_norm": 0.7875370979309082, "learning_rate": 0.0001, "loss": 0.0145, "step": 47940 }, { "epoch": 315.4605263157895, "grad_norm": 0.7856130599975586, "learning_rate": 0.0001, "loss": 0.0131, "step": 47950 }, { "epoch": 315.5263157894737, "grad_norm": 1.2851800918579102, "learning_rate": 0.0001, "loss": 0.0128, "step": 47960 }, { "epoch": 315.5921052631579, "grad_norm": 1.3538479804992676, "learning_rate": 0.0001, "loss": 0.0148, "step": 47970 }, { "epoch": 315.6578947368421, "grad_norm": 1.427662968635559, "learning_rate": 0.0001, "loss": 0.0149, "step": 47980 }, { "epoch": 315.7236842105263, "grad_norm": 1.2748855352401733, "learning_rate": 0.0001, "loss": 0.0149, "step": 47990 }, { "epoch": 315.7894736842105, "grad_norm": 1.1658332347869873, "learning_rate": 0.0001, "loss": 0.0149, "step": 48000 }, { "epoch": 315.85526315789474, "grad_norm": 1.1847903728485107, "learning_rate": 0.0001, "loss": 0.016, "step": 48010 }, { "epoch": 315.92105263157896, "grad_norm": 1.2371987104415894, "learning_rate": 0.0001, "loss": 0.013, "step": 48020 }, { "epoch": 315.9868421052632, "grad_norm": 1.256600260734558, "learning_rate": 0.0001, "loss": 0.0161, "step": 48030 }, { "epoch": 316.05263157894734, "grad_norm": 1.1105140447616577, "learning_rate": 0.0001, "loss": 0.0128, "step": 48040 }, { "epoch": 316.11842105263156, "grad_norm": 1.4505999088287354, "learning_rate": 0.0001, "loss": 0.0147, "step": 48050 }, { "epoch": 316.1842105263158, "grad_norm": 1.5613057613372803, "learning_rate": 0.0001, "loss": 0.0147, "step": 48060 }, { "epoch": 316.25, "grad_norm": 1.2023935317993164, "learning_rate": 0.0001, "loss": 0.0151, "step": 48070 }, { "epoch": 316.3157894736842, "grad_norm": 1.123512864112854, "learning_rate": 0.0001, "loss": 0.0125, "step": 48080 }, { "epoch": 316.38157894736844, "grad_norm": 1.049616813659668, "learning_rate": 0.0001, "loss": 0.0158, "step": 48090 }, { "epoch": 316.44736842105266, "grad_norm": 0.9086410403251648, "learning_rate": 0.0001, "loss": 0.0156, "step": 48100 }, { "epoch": 316.5131578947368, "grad_norm": 0.9956292510032654, "learning_rate": 0.0001, "loss": 0.0192, "step": 48110 }, { "epoch": 316.57894736842104, "grad_norm": 1.0076509714126587, "learning_rate": 0.0001, "loss": 0.0167, "step": 48120 }, { "epoch": 316.64473684210526, "grad_norm": 0.9078097343444824, "learning_rate": 0.0001, "loss": 0.0161, "step": 48130 }, { "epoch": 316.7105263157895, "grad_norm": 1.2351521253585815, "learning_rate": 0.0001, "loss": 0.018, "step": 48140 }, { "epoch": 316.7763157894737, "grad_norm": 0.9075007438659668, "learning_rate": 0.0001, "loss": 0.017, "step": 48150 }, { "epoch": 316.8421052631579, "grad_norm": 1.0333648920059204, "learning_rate": 0.0001, "loss": 0.0166, "step": 48160 }, { "epoch": 316.9078947368421, "grad_norm": 1.42146635055542, "learning_rate": 0.0001, "loss": 0.0204, "step": 48170 }, { "epoch": 316.9736842105263, "grad_norm": 1.4142286777496338, "learning_rate": 0.0001, "loss": 0.0182, "step": 48180 }, { "epoch": 317.0394736842105, "grad_norm": 1.693477749824524, "learning_rate": 0.0001, "loss": 0.0171, "step": 48190 }, { "epoch": 317.10526315789474, "grad_norm": 1.183012843132019, "learning_rate": 0.0001, "loss": 0.0203, "step": 48200 }, { "epoch": 317.17105263157896, "grad_norm": 1.2209391593933105, "learning_rate": 0.0001, "loss": 0.0172, "step": 48210 }, { "epoch": 317.2368421052632, "grad_norm": 1.294472098350525, "learning_rate": 0.0001, "loss": 0.0142, "step": 48220 }, { "epoch": 317.30263157894734, "grad_norm": 1.6887290477752686, "learning_rate": 0.0001, "loss": 0.0178, "step": 48230 }, { "epoch": 317.36842105263156, "grad_norm": 1.9167300462722778, "learning_rate": 0.0001, "loss": 0.0149, "step": 48240 }, { "epoch": 317.4342105263158, "grad_norm": 1.2060270309448242, "learning_rate": 0.0001, "loss": 0.0141, "step": 48250 }, { "epoch": 317.5, "grad_norm": 0.9500623941421509, "learning_rate": 0.0001, "loss": 0.0135, "step": 48260 }, { "epoch": 317.5657894736842, "grad_norm": 1.399133563041687, "learning_rate": 0.0001, "loss": 0.0165, "step": 48270 }, { "epoch": 317.63157894736844, "grad_norm": 1.2392226457595825, "learning_rate": 0.0001, "loss": 0.0149, "step": 48280 }, { "epoch": 317.69736842105266, "grad_norm": 1.2007087469100952, "learning_rate": 0.0001, "loss": 0.0133, "step": 48290 }, { "epoch": 317.7631578947368, "grad_norm": 1.4812746047973633, "learning_rate": 0.0001, "loss": 0.0147, "step": 48300 }, { "epoch": 317.82894736842104, "grad_norm": 1.3363618850708008, "learning_rate": 0.0001, "loss": 0.0149, "step": 48310 }, { "epoch": 317.89473684210526, "grad_norm": 1.3857090473175049, "learning_rate": 0.0001, "loss": 0.0133, "step": 48320 }, { "epoch": 317.9605263157895, "grad_norm": 1.5482040643692017, "learning_rate": 0.0001, "loss": 0.0136, "step": 48330 }, { "epoch": 318.0263157894737, "grad_norm": 1.3046433925628662, "learning_rate": 0.0001, "loss": 0.0122, "step": 48340 }, { "epoch": 318.0921052631579, "grad_norm": 1.4874904155731201, "learning_rate": 0.0001, "loss": 0.0133, "step": 48350 }, { "epoch": 318.1578947368421, "grad_norm": 1.252055048942566, "learning_rate": 0.0001, "loss": 0.0141, "step": 48360 }, { "epoch": 318.2236842105263, "grad_norm": 1.297682762145996, "learning_rate": 0.0001, "loss": 0.0143, "step": 48370 }, { "epoch": 318.2894736842105, "grad_norm": 1.7662724256515503, "learning_rate": 0.0001, "loss": 0.0142, "step": 48380 }, { "epoch": 318.35526315789474, "grad_norm": 1.5304863452911377, "learning_rate": 0.0001, "loss": 0.0121, "step": 48390 }, { "epoch": 318.42105263157896, "grad_norm": 1.3960685729980469, "learning_rate": 0.0001, "loss": 0.0135, "step": 48400 }, { "epoch": 318.4868421052632, "grad_norm": 1.506137490272522, "learning_rate": 0.0001, "loss": 0.0139, "step": 48410 }, { "epoch": 318.55263157894734, "grad_norm": 1.1885483264923096, "learning_rate": 0.0001, "loss": 0.0151, "step": 48420 }, { "epoch": 318.61842105263156, "grad_norm": 1.2131962776184082, "learning_rate": 0.0001, "loss": 0.0139, "step": 48430 }, { "epoch": 318.6842105263158, "grad_norm": 1.2580586671829224, "learning_rate": 0.0001, "loss": 0.0123, "step": 48440 }, { "epoch": 318.75, "grad_norm": 1.2965505123138428, "learning_rate": 0.0001, "loss": 0.0159, "step": 48450 }, { "epoch": 318.8157894736842, "grad_norm": 1.243948221206665, "learning_rate": 0.0001, "loss": 0.0136, "step": 48460 }, { "epoch": 318.88157894736844, "grad_norm": 0.9847695231437683, "learning_rate": 0.0001, "loss": 0.0139, "step": 48470 }, { "epoch": 318.94736842105266, "grad_norm": 1.0571852922439575, "learning_rate": 0.0001, "loss": 0.013, "step": 48480 }, { "epoch": 319.0131578947368, "grad_norm": 0.8944369554519653, "learning_rate": 0.0001, "loss": 0.0149, "step": 48490 }, { "epoch": 319.07894736842104, "grad_norm": 1.3923020362854004, "learning_rate": 0.0001, "loss": 0.0132, "step": 48500 }, { "epoch": 319.14473684210526, "grad_norm": 1.0978920459747314, "learning_rate": 0.0001, "loss": 0.0146, "step": 48510 }, { "epoch": 319.2105263157895, "grad_norm": 1.073713779449463, "learning_rate": 0.0001, "loss": 0.0131, "step": 48520 }, { "epoch": 319.2763157894737, "grad_norm": 0.8668067455291748, "learning_rate": 0.0001, "loss": 0.0151, "step": 48530 }, { "epoch": 319.3421052631579, "grad_norm": 0.8067622184753418, "learning_rate": 0.0001, "loss": 0.0131, "step": 48540 }, { "epoch": 319.4078947368421, "grad_norm": 0.9731525182723999, "learning_rate": 0.0001, "loss": 0.013, "step": 48550 }, { "epoch": 319.4736842105263, "grad_norm": 0.9085482358932495, "learning_rate": 0.0001, "loss": 0.0145, "step": 48560 }, { "epoch": 319.5394736842105, "grad_norm": 1.2935781478881836, "learning_rate": 0.0001, "loss": 0.014, "step": 48570 }, { "epoch": 319.60526315789474, "grad_norm": 2.343533515930176, "learning_rate": 0.0001, "loss": 0.0149, "step": 48580 }, { "epoch": 319.67105263157896, "grad_norm": 1.5465039014816284, "learning_rate": 0.0001, "loss": 0.0146, "step": 48590 }, { "epoch": 319.7368421052632, "grad_norm": 1.2827335596084595, "learning_rate": 0.0001, "loss": 0.0119, "step": 48600 }, { "epoch": 319.80263157894734, "grad_norm": 1.5653749704360962, "learning_rate": 0.0001, "loss": 0.0131, "step": 48610 }, { "epoch": 319.86842105263156, "grad_norm": 1.6465096473693848, "learning_rate": 0.0001, "loss": 0.0132, "step": 48620 }, { "epoch": 319.9342105263158, "grad_norm": 1.4881956577301025, "learning_rate": 0.0001, "loss": 0.013, "step": 48630 }, { "epoch": 320.0, "grad_norm": 1.5080645084381104, "learning_rate": 0.0001, "loss": 0.0139, "step": 48640 }, { "epoch": 320.0657894736842, "grad_norm": 1.516532063484192, "learning_rate": 0.0001, "loss": 0.0126, "step": 48650 }, { "epoch": 320.13157894736844, "grad_norm": 1.2939443588256836, "learning_rate": 0.0001, "loss": 0.0153, "step": 48660 }, { "epoch": 320.19736842105266, "grad_norm": 1.2543706893920898, "learning_rate": 0.0001, "loss": 0.0135, "step": 48670 }, { "epoch": 320.2631578947368, "grad_norm": 1.0210703611373901, "learning_rate": 0.0001, "loss": 0.0133, "step": 48680 }, { "epoch": 320.32894736842104, "grad_norm": 1.2187316417694092, "learning_rate": 0.0001, "loss": 0.0132, "step": 48690 }, { "epoch": 320.39473684210526, "grad_norm": 0.8297109007835388, "learning_rate": 0.0001, "loss": 0.0162, "step": 48700 }, { "epoch": 320.4605263157895, "grad_norm": 1.0933237075805664, "learning_rate": 0.0001, "loss": 0.0133, "step": 48710 }, { "epoch": 320.5263157894737, "grad_norm": 0.9736988544464111, "learning_rate": 0.0001, "loss": 0.0125, "step": 48720 }, { "epoch": 320.5921052631579, "grad_norm": 1.174633502960205, "learning_rate": 0.0001, "loss": 0.0144, "step": 48730 }, { "epoch": 320.6578947368421, "grad_norm": 1.419814109802246, "learning_rate": 0.0001, "loss": 0.0133, "step": 48740 }, { "epoch": 320.7236842105263, "grad_norm": 0.9784946441650391, "learning_rate": 0.0001, "loss": 0.0134, "step": 48750 }, { "epoch": 320.7894736842105, "grad_norm": 0.8846175670623779, "learning_rate": 0.0001, "loss": 0.0127, "step": 48760 }, { "epoch": 320.85526315789474, "grad_norm": 1.217024326324463, "learning_rate": 0.0001, "loss": 0.0131, "step": 48770 }, { "epoch": 320.92105263157896, "grad_norm": 1.0269222259521484, "learning_rate": 0.0001, "loss": 0.0127, "step": 48780 }, { "epoch": 320.9868421052632, "grad_norm": 1.176324486732483, "learning_rate": 0.0001, "loss": 0.0139, "step": 48790 }, { "epoch": 321.05263157894734, "grad_norm": 1.1904447078704834, "learning_rate": 0.0001, "loss": 0.0141, "step": 48800 }, { "epoch": 321.11842105263156, "grad_norm": 1.0044549703598022, "learning_rate": 0.0001, "loss": 0.0143, "step": 48810 }, { "epoch": 321.1842105263158, "grad_norm": 1.2409372329711914, "learning_rate": 0.0001, "loss": 0.0133, "step": 48820 }, { "epoch": 321.25, "grad_norm": 1.3968594074249268, "learning_rate": 0.0001, "loss": 0.0129, "step": 48830 }, { "epoch": 321.3157894736842, "grad_norm": 1.2159334421157837, "learning_rate": 0.0001, "loss": 0.0146, "step": 48840 }, { "epoch": 321.38157894736844, "grad_norm": 1.4838895797729492, "learning_rate": 0.0001, "loss": 0.014, "step": 48850 }, { "epoch": 321.44736842105266, "grad_norm": 1.58181893825531, "learning_rate": 0.0001, "loss": 0.0129, "step": 48860 }, { "epoch": 321.5131578947368, "grad_norm": 1.1558905839920044, "learning_rate": 0.0001, "loss": 0.0149, "step": 48870 }, { "epoch": 321.57894736842104, "grad_norm": 1.1674673557281494, "learning_rate": 0.0001, "loss": 0.0142, "step": 48880 }, { "epoch": 321.64473684210526, "grad_norm": 1.2923530340194702, "learning_rate": 0.0001, "loss": 0.0133, "step": 48890 }, { "epoch": 321.7105263157895, "grad_norm": 1.4258642196655273, "learning_rate": 0.0001, "loss": 0.0126, "step": 48900 }, { "epoch": 321.7763157894737, "grad_norm": 1.1080553531646729, "learning_rate": 0.0001, "loss": 0.0114, "step": 48910 }, { "epoch": 321.8421052631579, "grad_norm": 1.5684802532196045, "learning_rate": 0.0001, "loss": 0.0147, "step": 48920 }, { "epoch": 321.9078947368421, "grad_norm": 1.5006873607635498, "learning_rate": 0.0001, "loss": 0.0129, "step": 48930 }, { "epoch": 321.9736842105263, "grad_norm": 1.164925217628479, "learning_rate": 0.0001, "loss": 0.0141, "step": 48940 }, { "epoch": 322.0394736842105, "grad_norm": 1.4386217594146729, "learning_rate": 0.0001, "loss": 0.013, "step": 48950 }, { "epoch": 322.10526315789474, "grad_norm": 1.5981786251068115, "learning_rate": 0.0001, "loss": 0.015, "step": 48960 }, { "epoch": 322.17105263157896, "grad_norm": 1.1611731052398682, "learning_rate": 0.0001, "loss": 0.0132, "step": 48970 }, { "epoch": 322.2368421052632, "grad_norm": 1.0494667291641235, "learning_rate": 0.0001, "loss": 0.0146, "step": 48980 }, { "epoch": 322.30263157894734, "grad_norm": 1.397523045539856, "learning_rate": 0.0001, "loss": 0.0127, "step": 48990 }, { "epoch": 322.36842105263156, "grad_norm": 1.5347926616668701, "learning_rate": 0.0001, "loss": 0.0122, "step": 49000 }, { "epoch": 322.4342105263158, "grad_norm": 1.1124234199523926, "learning_rate": 0.0001, "loss": 0.0133, "step": 49010 }, { "epoch": 322.5, "grad_norm": 1.2991044521331787, "learning_rate": 0.0001, "loss": 0.0124, "step": 49020 }, { "epoch": 322.5657894736842, "grad_norm": 1.0345944166183472, "learning_rate": 0.0001, "loss": 0.011, "step": 49030 }, { "epoch": 322.63157894736844, "grad_norm": 1.2403799295425415, "learning_rate": 0.0001, "loss": 0.0137, "step": 49040 }, { "epoch": 322.69736842105266, "grad_norm": 1.202883005142212, "learning_rate": 0.0001, "loss": 0.0145, "step": 49050 }, { "epoch": 322.7631578947368, "grad_norm": 1.2251741886138916, "learning_rate": 0.0001, "loss": 0.0128, "step": 49060 }, { "epoch": 322.82894736842104, "grad_norm": 1.258322834968567, "learning_rate": 0.0001, "loss": 0.0141, "step": 49070 }, { "epoch": 322.89473684210526, "grad_norm": 1.1949845552444458, "learning_rate": 0.0001, "loss": 0.013, "step": 49080 }, { "epoch": 322.9605263157895, "grad_norm": 1.3184500932693481, "learning_rate": 0.0001, "loss": 0.0131, "step": 49090 }, { "epoch": 323.0263157894737, "grad_norm": 1.1563595533370972, "learning_rate": 0.0001, "loss": 0.0129, "step": 49100 }, { "epoch": 323.0921052631579, "grad_norm": 1.0812554359436035, "learning_rate": 0.0001, "loss": 0.0128, "step": 49110 }, { "epoch": 323.1578947368421, "grad_norm": 1.1537151336669922, "learning_rate": 0.0001, "loss": 0.0129, "step": 49120 }, { "epoch": 323.2236842105263, "grad_norm": 1.4239085912704468, "learning_rate": 0.0001, "loss": 0.0129, "step": 49130 }, { "epoch": 323.2894736842105, "grad_norm": 1.167175531387329, "learning_rate": 0.0001, "loss": 0.0124, "step": 49140 }, { "epoch": 323.35526315789474, "grad_norm": 1.0651171207427979, "learning_rate": 0.0001, "loss": 0.0136, "step": 49150 }, { "epoch": 323.42105263157896, "grad_norm": 1.1401432752609253, "learning_rate": 0.0001, "loss": 0.0129, "step": 49160 }, { "epoch": 323.4868421052632, "grad_norm": 1.0716900825500488, "learning_rate": 0.0001, "loss": 0.0155, "step": 49170 }, { "epoch": 323.55263157894734, "grad_norm": 0.9888905882835388, "learning_rate": 0.0001, "loss": 0.0131, "step": 49180 }, { "epoch": 323.61842105263156, "grad_norm": 1.1560784578323364, "learning_rate": 0.0001, "loss": 0.0132, "step": 49190 }, { "epoch": 323.6842105263158, "grad_norm": 1.1108367443084717, "learning_rate": 0.0001, "loss": 0.0126, "step": 49200 }, { "epoch": 323.75, "grad_norm": 0.7860233783721924, "learning_rate": 0.0001, "loss": 0.014, "step": 49210 }, { "epoch": 323.8157894736842, "grad_norm": 1.1012498140335083, "learning_rate": 0.0001, "loss": 0.0155, "step": 49220 }, { "epoch": 323.88157894736844, "grad_norm": 0.9723058938980103, "learning_rate": 0.0001, "loss": 0.0127, "step": 49230 }, { "epoch": 323.94736842105266, "grad_norm": 1.2430423498153687, "learning_rate": 0.0001, "loss": 0.0136, "step": 49240 }, { "epoch": 324.0131578947368, "grad_norm": 1.3332782983779907, "learning_rate": 0.0001, "loss": 0.0129, "step": 49250 }, { "epoch": 324.07894736842104, "grad_norm": 1.341118335723877, "learning_rate": 0.0001, "loss": 0.0132, "step": 49260 }, { "epoch": 324.14473684210526, "grad_norm": 1.2712905406951904, "learning_rate": 0.0001, "loss": 0.013, "step": 49270 }, { "epoch": 324.2105263157895, "grad_norm": 0.7532151937484741, "learning_rate": 0.0001, "loss": 0.0124, "step": 49280 }, { "epoch": 324.2763157894737, "grad_norm": 1.4031445980072021, "learning_rate": 0.0001, "loss": 0.0142, "step": 49290 }, { "epoch": 324.3421052631579, "grad_norm": 0.9208416938781738, "learning_rate": 0.0001, "loss": 0.0146, "step": 49300 }, { "epoch": 324.4078947368421, "grad_norm": 1.2247257232666016, "learning_rate": 0.0001, "loss": 0.0142, "step": 49310 }, { "epoch": 324.4736842105263, "grad_norm": 1.0223255157470703, "learning_rate": 0.0001, "loss": 0.0129, "step": 49320 }, { "epoch": 324.5394736842105, "grad_norm": 1.1954952478408813, "learning_rate": 0.0001, "loss": 0.0135, "step": 49330 }, { "epoch": 324.60526315789474, "grad_norm": 1.2614434957504272, "learning_rate": 0.0001, "loss": 0.0146, "step": 49340 }, { "epoch": 324.67105263157896, "grad_norm": 1.0313371419906616, "learning_rate": 0.0001, "loss": 0.0126, "step": 49350 }, { "epoch": 324.7368421052632, "grad_norm": 1.0749729871749878, "learning_rate": 0.0001, "loss": 0.012, "step": 49360 }, { "epoch": 324.80263157894734, "grad_norm": 1.2034530639648438, "learning_rate": 0.0001, "loss": 0.0138, "step": 49370 }, { "epoch": 324.86842105263156, "grad_norm": 1.1916738748550415, "learning_rate": 0.0001, "loss": 0.0144, "step": 49380 }, { "epoch": 324.9342105263158, "grad_norm": 0.9107323288917542, "learning_rate": 0.0001, "loss": 0.012, "step": 49390 }, { "epoch": 325.0, "grad_norm": 1.1662636995315552, "learning_rate": 0.0001, "loss": 0.0139, "step": 49400 }, { "epoch": 325.0657894736842, "grad_norm": 1.0498578548431396, "learning_rate": 0.0001, "loss": 0.0141, "step": 49410 }, { "epoch": 325.13157894736844, "grad_norm": 1.193319320678711, "learning_rate": 0.0001, "loss": 0.0144, "step": 49420 }, { "epoch": 325.19736842105266, "grad_norm": 1.2099950313568115, "learning_rate": 0.0001, "loss": 0.0133, "step": 49430 }, { "epoch": 325.2631578947368, "grad_norm": 1.0245522260665894, "learning_rate": 0.0001, "loss": 0.0141, "step": 49440 }, { "epoch": 325.32894736842104, "grad_norm": 1.201566219329834, "learning_rate": 0.0001, "loss": 0.0138, "step": 49450 }, { "epoch": 325.39473684210526, "grad_norm": 0.9498182535171509, "learning_rate": 0.0001, "loss": 0.0139, "step": 49460 }, { "epoch": 325.4605263157895, "grad_norm": 1.081298828125, "learning_rate": 0.0001, "loss": 0.0142, "step": 49470 }, { "epoch": 325.5263157894737, "grad_norm": 1.2874219417572021, "learning_rate": 0.0001, "loss": 0.0139, "step": 49480 }, { "epoch": 325.5921052631579, "grad_norm": 1.1696557998657227, "learning_rate": 0.0001, "loss": 0.0131, "step": 49490 }, { "epoch": 325.6578947368421, "grad_norm": 1.0769000053405762, "learning_rate": 0.0001, "loss": 0.0118, "step": 49500 }, { "epoch": 325.7236842105263, "grad_norm": 1.134009838104248, "learning_rate": 0.0001, "loss": 0.0141, "step": 49510 }, { "epoch": 325.7894736842105, "grad_norm": 0.9424777626991272, "learning_rate": 0.0001, "loss": 0.0126, "step": 49520 }, { "epoch": 325.85526315789474, "grad_norm": 1.0407861471176147, "learning_rate": 0.0001, "loss": 0.0129, "step": 49530 }, { "epoch": 325.92105263157896, "grad_norm": 0.9889567494392395, "learning_rate": 0.0001, "loss": 0.0136, "step": 49540 }, { "epoch": 325.9868421052632, "grad_norm": 0.6618396043777466, "learning_rate": 0.0001, "loss": 0.0137, "step": 49550 }, { "epoch": 326.05263157894734, "grad_norm": 1.2362364530563354, "learning_rate": 0.0001, "loss": 0.0127, "step": 49560 }, { "epoch": 326.11842105263156, "grad_norm": 1.2794060707092285, "learning_rate": 0.0001, "loss": 0.014, "step": 49570 }, { "epoch": 326.1842105263158, "grad_norm": 1.1880918741226196, "learning_rate": 0.0001, "loss": 0.015, "step": 49580 }, { "epoch": 326.25, "grad_norm": 1.0328410863876343, "learning_rate": 0.0001, "loss": 0.0134, "step": 49590 }, { "epoch": 326.3157894736842, "grad_norm": 1.3989026546478271, "learning_rate": 0.0001, "loss": 0.0129, "step": 49600 }, { "epoch": 326.38157894736844, "grad_norm": 0.8209923505783081, "learning_rate": 0.0001, "loss": 0.014, "step": 49610 }, { "epoch": 326.44736842105266, "grad_norm": 1.04630446434021, "learning_rate": 0.0001, "loss": 0.0143, "step": 49620 }, { "epoch": 326.5131578947368, "grad_norm": 1.4371641874313354, "learning_rate": 0.0001, "loss": 0.0144, "step": 49630 }, { "epoch": 326.57894736842104, "grad_norm": 1.1267870664596558, "learning_rate": 0.0001, "loss": 0.0158, "step": 49640 }, { "epoch": 326.64473684210526, "grad_norm": 1.2246888875961304, "learning_rate": 0.0001, "loss": 0.014, "step": 49650 }, { "epoch": 326.7105263157895, "grad_norm": 1.1389400959014893, "learning_rate": 0.0001, "loss": 0.0138, "step": 49660 }, { "epoch": 326.7763157894737, "grad_norm": 0.806943953037262, "learning_rate": 0.0001, "loss": 0.0129, "step": 49670 }, { "epoch": 326.8421052631579, "grad_norm": 1.1246274709701538, "learning_rate": 0.0001, "loss": 0.0117, "step": 49680 }, { "epoch": 326.9078947368421, "grad_norm": 1.3414881229400635, "learning_rate": 0.0001, "loss": 0.012, "step": 49690 }, { "epoch": 326.9736842105263, "grad_norm": 0.9751403331756592, "learning_rate": 0.0001, "loss": 0.0135, "step": 49700 }, { "epoch": 327.0394736842105, "grad_norm": 0.9873465299606323, "learning_rate": 0.0001, "loss": 0.015, "step": 49710 }, { "epoch": 327.10526315789474, "grad_norm": 1.0995386838912964, "learning_rate": 0.0001, "loss": 0.014, "step": 49720 }, { "epoch": 327.17105263157896, "grad_norm": 1.18962824344635, "learning_rate": 0.0001, "loss": 0.0137, "step": 49730 }, { "epoch": 327.2368421052632, "grad_norm": 1.1961159706115723, "learning_rate": 0.0001, "loss": 0.0128, "step": 49740 }, { "epoch": 327.30263157894734, "grad_norm": 1.5299499034881592, "learning_rate": 0.0001, "loss": 0.0156, "step": 49750 }, { "epoch": 327.36842105263156, "grad_norm": 1.2997653484344482, "learning_rate": 0.0001, "loss": 0.0137, "step": 49760 }, { "epoch": 327.4342105263158, "grad_norm": 1.19481360912323, "learning_rate": 0.0001, "loss": 0.0135, "step": 49770 }, { "epoch": 327.5, "grad_norm": 1.1865720748901367, "learning_rate": 0.0001, "loss": 0.0142, "step": 49780 }, { "epoch": 327.5657894736842, "grad_norm": 1.3562146425247192, "learning_rate": 0.0001, "loss": 0.0112, "step": 49790 }, { "epoch": 327.63157894736844, "grad_norm": 1.2696094512939453, "learning_rate": 0.0001, "loss": 0.0135, "step": 49800 }, { "epoch": 327.69736842105266, "grad_norm": 1.2589432001113892, "learning_rate": 0.0001, "loss": 0.0138, "step": 49810 }, { "epoch": 327.7631578947368, "grad_norm": 0.9365496039390564, "learning_rate": 0.0001, "loss": 0.0133, "step": 49820 }, { "epoch": 327.82894736842104, "grad_norm": 1.6220346689224243, "learning_rate": 0.0001, "loss": 0.0132, "step": 49830 }, { "epoch": 327.89473684210526, "grad_norm": 1.4582602977752686, "learning_rate": 0.0001, "loss": 0.0153, "step": 49840 }, { "epoch": 327.9605263157895, "grad_norm": 1.0291483402252197, "learning_rate": 0.0001, "loss": 0.0135, "step": 49850 }, { "epoch": 328.0263157894737, "grad_norm": 1.1116678714752197, "learning_rate": 0.0001, "loss": 0.0114, "step": 49860 }, { "epoch": 328.0921052631579, "grad_norm": 1.533398985862732, "learning_rate": 0.0001, "loss": 0.0126, "step": 49870 }, { "epoch": 328.1578947368421, "grad_norm": 1.562609076499939, "learning_rate": 0.0001, "loss": 0.0143, "step": 49880 }, { "epoch": 328.2236842105263, "grad_norm": 1.3016607761383057, "learning_rate": 0.0001, "loss": 0.0151, "step": 49890 }, { "epoch": 328.2894736842105, "grad_norm": 1.1702256202697754, "learning_rate": 0.0001, "loss": 0.0153, "step": 49900 }, { "epoch": 328.35526315789474, "grad_norm": 1.1809632778167725, "learning_rate": 0.0001, "loss": 0.0129, "step": 49910 }, { "epoch": 328.42105263157896, "grad_norm": 1.109625220298767, "learning_rate": 0.0001, "loss": 0.0138, "step": 49920 }, { "epoch": 328.4868421052632, "grad_norm": 1.376266360282898, "learning_rate": 0.0001, "loss": 0.0146, "step": 49930 }, { "epoch": 328.55263157894734, "grad_norm": 1.2670068740844727, "learning_rate": 0.0001, "loss": 0.012, "step": 49940 }, { "epoch": 328.61842105263156, "grad_norm": 1.1520689725875854, "learning_rate": 0.0001, "loss": 0.0124, "step": 49950 }, { "epoch": 328.6842105263158, "grad_norm": 1.171880841255188, "learning_rate": 0.0001, "loss": 0.0141, "step": 49960 }, { "epoch": 328.75, "grad_norm": 1.2454252243041992, "learning_rate": 0.0001, "loss": 0.0124, "step": 49970 }, { "epoch": 328.8157894736842, "grad_norm": 0.9792435169219971, "learning_rate": 0.0001, "loss": 0.012, "step": 49980 }, { "epoch": 328.88157894736844, "grad_norm": 1.4465330839157104, "learning_rate": 0.0001, "loss": 0.0128, "step": 49990 }, { "epoch": 328.94736842105266, "grad_norm": 1.2347135543823242, "learning_rate": 0.0001, "loss": 0.0125, "step": 50000 }, { "epoch": 329.0131578947368, "grad_norm": 1.15986967086792, "learning_rate": 0.0001, "loss": 0.0144, "step": 50010 }, { "epoch": 329.07894736842104, "grad_norm": 1.2300969362258911, "learning_rate": 0.0001, "loss": 0.0143, "step": 50020 }, { "epoch": 329.14473684210526, "grad_norm": 0.9923980832099915, "learning_rate": 0.0001, "loss": 0.0147, "step": 50030 }, { "epoch": 329.2105263157895, "grad_norm": 1.0011688470840454, "learning_rate": 0.0001, "loss": 0.0136, "step": 50040 }, { "epoch": 329.2763157894737, "grad_norm": 0.9253886938095093, "learning_rate": 0.0001, "loss": 0.0122, "step": 50050 }, { "epoch": 329.3421052631579, "grad_norm": 0.8614893555641174, "learning_rate": 0.0001, "loss": 0.014, "step": 50060 }, { "epoch": 329.4078947368421, "grad_norm": 1.411759614944458, "learning_rate": 0.0001, "loss": 0.0127, "step": 50070 }, { "epoch": 329.4736842105263, "grad_norm": 0.7994163632392883, "learning_rate": 0.0001, "loss": 0.0141, "step": 50080 }, { "epoch": 329.5394736842105, "grad_norm": 0.9142425060272217, "learning_rate": 0.0001, "loss": 0.013, "step": 50090 }, { "epoch": 329.60526315789474, "grad_norm": 0.7740334272384644, "learning_rate": 0.0001, "loss": 0.0148, "step": 50100 }, { "epoch": 329.67105263157896, "grad_norm": 0.847666323184967, "learning_rate": 0.0001, "loss": 0.0133, "step": 50110 }, { "epoch": 329.7368421052632, "grad_norm": 1.1491421461105347, "learning_rate": 0.0001, "loss": 0.0135, "step": 50120 }, { "epoch": 329.80263157894734, "grad_norm": 0.9505961537361145, "learning_rate": 0.0001, "loss": 0.0137, "step": 50130 }, { "epoch": 329.86842105263156, "grad_norm": 1.0270600318908691, "learning_rate": 0.0001, "loss": 0.0154, "step": 50140 }, { "epoch": 329.9342105263158, "grad_norm": 1.1158539056777954, "learning_rate": 0.0001, "loss": 0.0162, "step": 50150 }, { "epoch": 330.0, "grad_norm": 1.6066099405288696, "learning_rate": 0.0001, "loss": 0.0123, "step": 50160 }, { "epoch": 330.0657894736842, "grad_norm": 1.3317687511444092, "learning_rate": 0.0001, "loss": 0.0132, "step": 50170 }, { "epoch": 330.13157894736844, "grad_norm": 1.1555328369140625, "learning_rate": 0.0001, "loss": 0.0153, "step": 50180 }, { "epoch": 330.19736842105266, "grad_norm": 1.1379915475845337, "learning_rate": 0.0001, "loss": 0.0136, "step": 50190 }, { "epoch": 330.2631578947368, "grad_norm": 1.0150611400604248, "learning_rate": 0.0001, "loss": 0.0142, "step": 50200 }, { "epoch": 330.32894736842104, "grad_norm": 1.422431468963623, "learning_rate": 0.0001, "loss": 0.015, "step": 50210 }, { "epoch": 330.39473684210526, "grad_norm": 1.1463091373443604, "learning_rate": 0.0001, "loss": 0.0115, "step": 50220 }, { "epoch": 330.4605263157895, "grad_norm": 1.2713172435760498, "learning_rate": 0.0001, "loss": 0.0126, "step": 50230 }, { "epoch": 330.5263157894737, "grad_norm": 1.2657397985458374, "learning_rate": 0.0001, "loss": 0.0147, "step": 50240 }, { "epoch": 330.5921052631579, "grad_norm": 1.109223484992981, "learning_rate": 0.0001, "loss": 0.0141, "step": 50250 }, { "epoch": 330.6578947368421, "grad_norm": 1.1030226945877075, "learning_rate": 0.0001, "loss": 0.0147, "step": 50260 }, { "epoch": 330.7236842105263, "grad_norm": 1.2392481565475464, "learning_rate": 0.0001, "loss": 0.0126, "step": 50270 }, { "epoch": 330.7894736842105, "grad_norm": 1.1051968336105347, "learning_rate": 0.0001, "loss": 0.0153, "step": 50280 }, { "epoch": 330.85526315789474, "grad_norm": 1.110648274421692, "learning_rate": 0.0001, "loss": 0.0136, "step": 50290 }, { "epoch": 330.92105263157896, "grad_norm": 1.3679182529449463, "learning_rate": 0.0001, "loss": 0.0123, "step": 50300 }, { "epoch": 330.9868421052632, "grad_norm": 1.1558316946029663, "learning_rate": 0.0001, "loss": 0.0127, "step": 50310 }, { "epoch": 331.05263157894734, "grad_norm": 0.9238816499710083, "learning_rate": 0.0001, "loss": 0.0149, "step": 50320 }, { "epoch": 331.11842105263156, "grad_norm": 1.363661766052246, "learning_rate": 0.0001, "loss": 0.0152, "step": 50330 }, { "epoch": 331.1842105263158, "grad_norm": 1.5912078619003296, "learning_rate": 0.0001, "loss": 0.0143, "step": 50340 }, { "epoch": 331.25, "grad_norm": 1.1807664632797241, "learning_rate": 0.0001, "loss": 0.0144, "step": 50350 }, { "epoch": 331.3157894736842, "grad_norm": 1.0620604753494263, "learning_rate": 0.0001, "loss": 0.0114, "step": 50360 }, { "epoch": 331.38157894736844, "grad_norm": 1.0566785335540771, "learning_rate": 0.0001, "loss": 0.0151, "step": 50370 }, { "epoch": 331.44736842105266, "grad_norm": 0.9269898533821106, "learning_rate": 0.0001, "loss": 0.013, "step": 50380 }, { "epoch": 331.5131578947368, "grad_norm": 1.0795518159866333, "learning_rate": 0.0001, "loss": 0.014, "step": 50390 }, { "epoch": 331.57894736842104, "grad_norm": 1.1941287517547607, "learning_rate": 0.0001, "loss": 0.0131, "step": 50400 }, { "epoch": 331.64473684210526, "grad_norm": 1.3070482015609741, "learning_rate": 0.0001, "loss": 0.0138, "step": 50410 }, { "epoch": 331.7105263157895, "grad_norm": 1.1180983781814575, "learning_rate": 0.0001, "loss": 0.0135, "step": 50420 }, { "epoch": 331.7763157894737, "grad_norm": 1.0409196615219116, "learning_rate": 0.0001, "loss": 0.0125, "step": 50430 }, { "epoch": 331.8421052631579, "grad_norm": 1.5609703063964844, "learning_rate": 0.0001, "loss": 0.0125, "step": 50440 }, { "epoch": 331.9078947368421, "grad_norm": 1.2838364839553833, "learning_rate": 0.0001, "loss": 0.0126, "step": 50450 }, { "epoch": 331.9736842105263, "grad_norm": 1.4142061471939087, "learning_rate": 0.0001, "loss": 0.0119, "step": 50460 }, { "epoch": 332.0394736842105, "grad_norm": 1.0563913583755493, "learning_rate": 0.0001, "loss": 0.0126, "step": 50470 }, { "epoch": 332.10526315789474, "grad_norm": 1.106583833694458, "learning_rate": 0.0001, "loss": 0.0149, "step": 50480 }, { "epoch": 332.17105263157896, "grad_norm": 1.0735349655151367, "learning_rate": 0.0001, "loss": 0.0132, "step": 50490 }, { "epoch": 332.2368421052632, "grad_norm": 1.5495939254760742, "learning_rate": 0.0001, "loss": 0.0164, "step": 50500 }, { "epoch": 332.30263157894734, "grad_norm": 1.1184154748916626, "learning_rate": 0.0001, "loss": 0.0146, "step": 50510 }, { "epoch": 332.36842105263156, "grad_norm": 1.4668242931365967, "learning_rate": 0.0001, "loss": 0.0136, "step": 50520 }, { "epoch": 332.4342105263158, "grad_norm": 1.1980876922607422, "learning_rate": 0.0001, "loss": 0.0125, "step": 50530 }, { "epoch": 332.5, "grad_norm": 1.3636940717697144, "learning_rate": 0.0001, "loss": 0.0134, "step": 50540 }, { "epoch": 332.5657894736842, "grad_norm": 1.2938543558120728, "learning_rate": 0.0001, "loss": 0.0138, "step": 50550 }, { "epoch": 332.63157894736844, "grad_norm": 1.1288648843765259, "learning_rate": 0.0001, "loss": 0.0132, "step": 50560 }, { "epoch": 332.69736842105266, "grad_norm": 1.0192151069641113, "learning_rate": 0.0001, "loss": 0.0136, "step": 50570 }, { "epoch": 332.7631578947368, "grad_norm": 1.2933300733566284, "learning_rate": 0.0001, "loss": 0.0127, "step": 50580 }, { "epoch": 332.82894736842104, "grad_norm": 1.2441681623458862, "learning_rate": 0.0001, "loss": 0.0133, "step": 50590 }, { "epoch": 332.89473684210526, "grad_norm": 1.2795456647872925, "learning_rate": 0.0001, "loss": 0.0122, "step": 50600 }, { "epoch": 332.9605263157895, "grad_norm": 1.3279870748519897, "learning_rate": 0.0001, "loss": 0.0122, "step": 50610 }, { "epoch": 333.0263157894737, "grad_norm": 1.3241667747497559, "learning_rate": 0.0001, "loss": 0.0116, "step": 50620 }, { "epoch": 333.0921052631579, "grad_norm": 0.7530394196510315, "learning_rate": 0.0001, "loss": 0.0135, "step": 50630 }, { "epoch": 333.1578947368421, "grad_norm": 1.23948073387146, "learning_rate": 0.0001, "loss": 0.0124, "step": 50640 }, { "epoch": 333.2236842105263, "grad_norm": 1.1512643098831177, "learning_rate": 0.0001, "loss": 0.0125, "step": 50650 }, { "epoch": 333.2894736842105, "grad_norm": 1.1450495719909668, "learning_rate": 0.0001, "loss": 0.012, "step": 50660 }, { "epoch": 333.35526315789474, "grad_norm": 1.0570194721221924, "learning_rate": 0.0001, "loss": 0.0136, "step": 50670 }, { "epoch": 333.42105263157896, "grad_norm": 1.017793893814087, "learning_rate": 0.0001, "loss": 0.014, "step": 50680 }, { "epoch": 333.4868421052632, "grad_norm": 1.2568644285202026, "learning_rate": 0.0001, "loss": 0.0133, "step": 50690 }, { "epoch": 333.55263157894734, "grad_norm": 1.0966501235961914, "learning_rate": 0.0001, "loss": 0.0136, "step": 50700 }, { "epoch": 333.61842105263156, "grad_norm": 1.404511570930481, "learning_rate": 0.0001, "loss": 0.0149, "step": 50710 }, { "epoch": 333.6842105263158, "grad_norm": 0.9019613265991211, "learning_rate": 0.0001, "loss": 0.0168, "step": 50720 }, { "epoch": 333.75, "grad_norm": 1.206984519958496, "learning_rate": 0.0001, "loss": 0.0145, "step": 50730 }, { "epoch": 333.8157894736842, "grad_norm": 0.9531241059303284, "learning_rate": 0.0001, "loss": 0.0125, "step": 50740 }, { "epoch": 333.88157894736844, "grad_norm": 1.3552753925323486, "learning_rate": 0.0001, "loss": 0.0136, "step": 50750 }, { "epoch": 333.94736842105266, "grad_norm": 1.5873048305511475, "learning_rate": 0.0001, "loss": 0.0126, "step": 50760 }, { "epoch": 334.0131578947368, "grad_norm": 1.065086007118225, "learning_rate": 0.0001, "loss": 0.0124, "step": 50770 }, { "epoch": 334.07894736842104, "grad_norm": 1.4782254695892334, "learning_rate": 0.0001, "loss": 0.0143, "step": 50780 }, { "epoch": 334.14473684210526, "grad_norm": 1.0018125772476196, "learning_rate": 0.0001, "loss": 0.0128, "step": 50790 }, { "epoch": 334.2105263157895, "grad_norm": 1.2389479875564575, "learning_rate": 0.0001, "loss": 0.0137, "step": 50800 }, { "epoch": 334.2763157894737, "grad_norm": 1.1885042190551758, "learning_rate": 0.0001, "loss": 0.0137, "step": 50810 }, { "epoch": 334.3421052631579, "grad_norm": 0.9912749528884888, "learning_rate": 0.0001, "loss": 0.0121, "step": 50820 }, { "epoch": 334.4078947368421, "grad_norm": 1.206180214881897, "learning_rate": 0.0001, "loss": 0.014, "step": 50830 }, { "epoch": 334.4736842105263, "grad_norm": 1.087846279144287, "learning_rate": 0.0001, "loss": 0.0125, "step": 50840 }, { "epoch": 334.5394736842105, "grad_norm": 1.4583332538604736, "learning_rate": 0.0001, "loss": 0.0133, "step": 50850 }, { "epoch": 334.60526315789474, "grad_norm": 1.2611576318740845, "learning_rate": 0.0001, "loss": 0.0136, "step": 50860 }, { "epoch": 334.67105263157896, "grad_norm": 1.2593576908111572, "learning_rate": 0.0001, "loss": 0.0132, "step": 50870 }, { "epoch": 334.7368421052632, "grad_norm": 0.8674020171165466, "learning_rate": 0.0001, "loss": 0.0127, "step": 50880 }, { "epoch": 334.80263157894734, "grad_norm": 0.9973218441009521, "learning_rate": 0.0001, "loss": 0.0125, "step": 50890 }, { "epoch": 334.86842105263156, "grad_norm": 1.1739544868469238, "learning_rate": 0.0001, "loss": 0.0136, "step": 50900 }, { "epoch": 334.9342105263158, "grad_norm": 1.2401018142700195, "learning_rate": 0.0001, "loss": 0.0138, "step": 50910 }, { "epoch": 335.0, "grad_norm": 1.03706955909729, "learning_rate": 0.0001, "loss": 0.014, "step": 50920 }, { "epoch": 335.0657894736842, "grad_norm": 1.213478684425354, "learning_rate": 0.0001, "loss": 0.012, "step": 50930 }, { "epoch": 335.13157894736844, "grad_norm": 1.3878175020217896, "learning_rate": 0.0001, "loss": 0.0128, "step": 50940 }, { "epoch": 335.19736842105266, "grad_norm": 1.3380154371261597, "learning_rate": 0.0001, "loss": 0.013, "step": 50950 }, { "epoch": 335.2631578947368, "grad_norm": 0.9307578802108765, "learning_rate": 0.0001, "loss": 0.0144, "step": 50960 }, { "epoch": 335.32894736842104, "grad_norm": 1.1519443988800049, "learning_rate": 0.0001, "loss": 0.0139, "step": 50970 }, { "epoch": 335.39473684210526, "grad_norm": 1.3810474872589111, "learning_rate": 0.0001, "loss": 0.012, "step": 50980 }, { "epoch": 335.4605263157895, "grad_norm": 1.1275713443756104, "learning_rate": 0.0001, "loss": 0.0131, "step": 50990 }, { "epoch": 335.5263157894737, "grad_norm": 1.1235930919647217, "learning_rate": 0.0001, "loss": 0.0128, "step": 51000 }, { "epoch": 335.5921052631579, "grad_norm": 1.6423934698104858, "learning_rate": 0.0001, "loss": 0.015, "step": 51010 }, { "epoch": 335.6578947368421, "grad_norm": 1.104622721672058, "learning_rate": 0.0001, "loss": 0.0126, "step": 51020 }, { "epoch": 335.7236842105263, "grad_norm": 1.2324247360229492, "learning_rate": 0.0001, "loss": 0.0148, "step": 51030 }, { "epoch": 335.7894736842105, "grad_norm": 1.1801762580871582, "learning_rate": 0.0001, "loss": 0.0124, "step": 51040 }, { "epoch": 335.85526315789474, "grad_norm": 1.164038896560669, "learning_rate": 0.0001, "loss": 0.0156, "step": 51050 }, { "epoch": 335.92105263157896, "grad_norm": 0.7976166009902954, "learning_rate": 0.0001, "loss": 0.0137, "step": 51060 }, { "epoch": 335.9868421052632, "grad_norm": 1.0072071552276611, "learning_rate": 0.0001, "loss": 0.0143, "step": 51070 }, { "epoch": 336.05263157894734, "grad_norm": 0.8578206896781921, "learning_rate": 0.0001, "loss": 0.0122, "step": 51080 }, { "epoch": 336.11842105263156, "grad_norm": 1.0572422742843628, "learning_rate": 0.0001, "loss": 0.0137, "step": 51090 }, { "epoch": 336.1842105263158, "grad_norm": 0.9827244281768799, "learning_rate": 0.0001, "loss": 0.0149, "step": 51100 }, { "epoch": 336.25, "grad_norm": 1.35405695438385, "learning_rate": 0.0001, "loss": 0.0158, "step": 51110 }, { "epoch": 336.3157894736842, "grad_norm": 0.9394972920417786, "learning_rate": 0.0001, "loss": 0.0126, "step": 51120 }, { "epoch": 336.38157894736844, "grad_norm": 1.3125892877578735, "learning_rate": 0.0001, "loss": 0.0119, "step": 51130 }, { "epoch": 336.44736842105266, "grad_norm": 1.182879090309143, "learning_rate": 0.0001, "loss": 0.0148, "step": 51140 }, { "epoch": 336.5131578947368, "grad_norm": 0.9829082489013672, "learning_rate": 0.0001, "loss": 0.0135, "step": 51150 }, { "epoch": 336.57894736842104, "grad_norm": 1.1734791994094849, "learning_rate": 0.0001, "loss": 0.0137, "step": 51160 }, { "epoch": 336.64473684210526, "grad_norm": 1.2387040853500366, "learning_rate": 0.0001, "loss": 0.0152, "step": 51170 }, { "epoch": 336.7105263157895, "grad_norm": 1.2995988130569458, "learning_rate": 0.0001, "loss": 0.0139, "step": 51180 }, { "epoch": 336.7763157894737, "grad_norm": 1.099161982536316, "learning_rate": 0.0001, "loss": 0.0139, "step": 51190 }, { "epoch": 336.8421052631579, "grad_norm": 1.1866588592529297, "learning_rate": 0.0001, "loss": 0.0118, "step": 51200 }, { "epoch": 336.9078947368421, "grad_norm": 1.2611371278762817, "learning_rate": 0.0001, "loss": 0.0144, "step": 51210 }, { "epoch": 336.9736842105263, "grad_norm": 1.6526949405670166, "learning_rate": 0.0001, "loss": 0.0134, "step": 51220 }, { "epoch": 337.0394736842105, "grad_norm": 1.461340069770813, "learning_rate": 0.0001, "loss": 0.0124, "step": 51230 }, { "epoch": 337.10526315789474, "grad_norm": 1.127563238143921, "learning_rate": 0.0001, "loss": 0.0131, "step": 51240 }, { "epoch": 337.17105263157896, "grad_norm": 1.2652572393417358, "learning_rate": 0.0001, "loss": 0.0137, "step": 51250 }, { "epoch": 337.2368421052632, "grad_norm": 1.25266695022583, "learning_rate": 0.0001, "loss": 0.0126, "step": 51260 }, { "epoch": 337.30263157894734, "grad_norm": 1.2490793466567993, "learning_rate": 0.0001, "loss": 0.0145, "step": 51270 }, { "epoch": 337.36842105263156, "grad_norm": 1.2546192407608032, "learning_rate": 0.0001, "loss": 0.0125, "step": 51280 }, { "epoch": 337.4342105263158, "grad_norm": 1.2390152215957642, "learning_rate": 0.0001, "loss": 0.0164, "step": 51290 }, { "epoch": 337.5, "grad_norm": 1.2711284160614014, "learning_rate": 0.0001, "loss": 0.0128, "step": 51300 }, { "epoch": 337.5657894736842, "grad_norm": 1.480649471282959, "learning_rate": 0.0001, "loss": 0.013, "step": 51310 }, { "epoch": 337.63157894736844, "grad_norm": 1.043129563331604, "learning_rate": 0.0001, "loss": 0.0142, "step": 51320 }, { "epoch": 337.69736842105266, "grad_norm": 0.9065507054328918, "learning_rate": 0.0001, "loss": 0.0108, "step": 51330 }, { "epoch": 337.7631578947368, "grad_norm": 1.0650672912597656, "learning_rate": 0.0001, "loss": 0.0122, "step": 51340 }, { "epoch": 337.82894736842104, "grad_norm": 1.24941086769104, "learning_rate": 0.0001, "loss": 0.0148, "step": 51350 }, { "epoch": 337.89473684210526, "grad_norm": 1.2355936765670776, "learning_rate": 0.0001, "loss": 0.0141, "step": 51360 }, { "epoch": 337.9605263157895, "grad_norm": 1.2188667058944702, "learning_rate": 0.0001, "loss": 0.0131, "step": 51370 }, { "epoch": 338.0263157894737, "grad_norm": 1.136441707611084, "learning_rate": 0.0001, "loss": 0.0129, "step": 51380 }, { "epoch": 338.0921052631579, "grad_norm": 1.5337886810302734, "learning_rate": 0.0001, "loss": 0.0133, "step": 51390 }, { "epoch": 338.1578947368421, "grad_norm": 1.4767624139785767, "learning_rate": 0.0001, "loss": 0.0153, "step": 51400 }, { "epoch": 338.2236842105263, "grad_norm": 1.1894986629486084, "learning_rate": 0.0001, "loss": 0.0139, "step": 51410 }, { "epoch": 338.2894736842105, "grad_norm": 1.316213846206665, "learning_rate": 0.0001, "loss": 0.0138, "step": 51420 }, { "epoch": 338.35526315789474, "grad_norm": 1.614834189414978, "learning_rate": 0.0001, "loss": 0.0134, "step": 51430 }, { "epoch": 338.42105263157896, "grad_norm": 1.146628737449646, "learning_rate": 0.0001, "loss": 0.0123, "step": 51440 }, { "epoch": 338.4868421052632, "grad_norm": 1.2770928144454956, "learning_rate": 0.0001, "loss": 0.0138, "step": 51450 }, { "epoch": 338.55263157894734, "grad_norm": 1.2232598066329956, "learning_rate": 0.0001, "loss": 0.0137, "step": 51460 }, { "epoch": 338.61842105263156, "grad_norm": 1.278428554534912, "learning_rate": 0.0001, "loss": 0.0126, "step": 51470 }, { "epoch": 338.6842105263158, "grad_norm": 1.1535056829452515, "learning_rate": 0.0001, "loss": 0.0138, "step": 51480 }, { "epoch": 338.75, "grad_norm": 0.9330645203590393, "learning_rate": 0.0001, "loss": 0.0131, "step": 51490 }, { "epoch": 338.8157894736842, "grad_norm": 1.2083619832992554, "learning_rate": 0.0001, "loss": 0.0136, "step": 51500 }, { "epoch": 338.88157894736844, "grad_norm": 1.0455271005630493, "learning_rate": 0.0001, "loss": 0.0117, "step": 51510 }, { "epoch": 338.94736842105266, "grad_norm": 1.051611065864563, "learning_rate": 0.0001, "loss": 0.0145, "step": 51520 }, { "epoch": 339.0131578947368, "grad_norm": 0.9898776412010193, "learning_rate": 0.0001, "loss": 0.0169, "step": 51530 }, { "epoch": 339.07894736842104, "grad_norm": 1.3422266244888306, "learning_rate": 0.0001, "loss": 0.0153, "step": 51540 }, { "epoch": 339.14473684210526, "grad_norm": 0.925194501876831, "learning_rate": 0.0001, "loss": 0.0133, "step": 51550 }, { "epoch": 339.2105263157895, "grad_norm": 1.2911876440048218, "learning_rate": 0.0001, "loss": 0.0139, "step": 51560 }, { "epoch": 339.2763157894737, "grad_norm": 1.5111730098724365, "learning_rate": 0.0001, "loss": 0.0151, "step": 51570 }, { "epoch": 339.3421052631579, "grad_norm": 1.3134963512420654, "learning_rate": 0.0001, "loss": 0.0134, "step": 51580 }, { "epoch": 339.4078947368421, "grad_norm": 1.3850778341293335, "learning_rate": 0.0001, "loss": 0.0123, "step": 51590 }, { "epoch": 339.4736842105263, "grad_norm": 0.9667593836784363, "learning_rate": 0.0001, "loss": 0.0133, "step": 51600 }, { "epoch": 339.5394736842105, "grad_norm": 1.3687058687210083, "learning_rate": 0.0001, "loss": 0.0139, "step": 51610 }, { "epoch": 339.60526315789474, "grad_norm": 1.0629100799560547, "learning_rate": 0.0001, "loss": 0.0155, "step": 51620 }, { "epoch": 339.67105263157896, "grad_norm": 0.9805474877357483, "learning_rate": 0.0001, "loss": 0.0141, "step": 51630 }, { "epoch": 339.7368421052632, "grad_norm": 1.029873251914978, "learning_rate": 0.0001, "loss": 0.0125, "step": 51640 }, { "epoch": 339.80263157894734, "grad_norm": 1.1182074546813965, "learning_rate": 0.0001, "loss": 0.0137, "step": 51650 }, { "epoch": 339.86842105263156, "grad_norm": 0.8829177618026733, "learning_rate": 0.0001, "loss": 0.0127, "step": 51660 }, { "epoch": 339.9342105263158, "grad_norm": 1.0989736318588257, "learning_rate": 0.0001, "loss": 0.0131, "step": 51670 }, { "epoch": 340.0, "grad_norm": 1.5837504863739014, "learning_rate": 0.0001, "loss": 0.0125, "step": 51680 }, { "epoch": 340.0657894736842, "grad_norm": 1.524735689163208, "learning_rate": 0.0001, "loss": 0.0138, "step": 51690 }, { "epoch": 340.13157894736844, "grad_norm": 1.6917763948440552, "learning_rate": 0.0001, "loss": 0.0135, "step": 51700 }, { "epoch": 340.19736842105266, "grad_norm": 1.0942375659942627, "learning_rate": 0.0001, "loss": 0.0134, "step": 51710 }, { "epoch": 340.2631578947368, "grad_norm": 1.0912152528762817, "learning_rate": 0.0001, "loss": 0.0147, "step": 51720 }, { "epoch": 340.32894736842104, "grad_norm": 0.9950433373451233, "learning_rate": 0.0001, "loss": 0.0156, "step": 51730 }, { "epoch": 340.39473684210526, "grad_norm": 1.1365388631820679, "learning_rate": 0.0001, "loss": 0.0129, "step": 51740 }, { "epoch": 340.4605263157895, "grad_norm": 1.5053037405014038, "learning_rate": 0.0001, "loss": 0.015, "step": 51750 }, { "epoch": 340.5263157894737, "grad_norm": 1.1808347702026367, "learning_rate": 0.0001, "loss": 0.0123, "step": 51760 }, { "epoch": 340.5921052631579, "grad_norm": 1.6994234323501587, "learning_rate": 0.0001, "loss": 0.0145, "step": 51770 }, { "epoch": 340.6578947368421, "grad_norm": 1.4310306310653687, "learning_rate": 0.0001, "loss": 0.0133, "step": 51780 }, { "epoch": 340.7236842105263, "grad_norm": 1.2134596109390259, "learning_rate": 0.0001, "loss": 0.0145, "step": 51790 }, { "epoch": 340.7894736842105, "grad_norm": 1.7126917839050293, "learning_rate": 0.0001, "loss": 0.0132, "step": 51800 }, { "epoch": 340.85526315789474, "grad_norm": 1.0184993743896484, "learning_rate": 0.0001, "loss": 0.0136, "step": 51810 }, { "epoch": 340.92105263157896, "grad_norm": 1.3017053604125977, "learning_rate": 0.0001, "loss": 0.0136, "step": 51820 }, { "epoch": 340.9868421052632, "grad_norm": 0.8897969126701355, "learning_rate": 0.0001, "loss": 0.0143, "step": 51830 }, { "epoch": 341.05263157894734, "grad_norm": 1.1662821769714355, "learning_rate": 0.0001, "loss": 0.0133, "step": 51840 }, { "epoch": 341.11842105263156, "grad_norm": 1.338837742805481, "learning_rate": 0.0001, "loss": 0.0163, "step": 51850 }, { "epoch": 341.1842105263158, "grad_norm": 1.1382125616073608, "learning_rate": 0.0001, "loss": 0.0131, "step": 51860 }, { "epoch": 341.25, "grad_norm": 0.9120957851409912, "learning_rate": 0.0001, "loss": 0.0127, "step": 51870 }, { "epoch": 341.3157894736842, "grad_norm": 1.134007215499878, "learning_rate": 0.0001, "loss": 0.0128, "step": 51880 }, { "epoch": 341.38157894736844, "grad_norm": 1.2043037414550781, "learning_rate": 0.0001, "loss": 0.013, "step": 51890 }, { "epoch": 341.44736842105266, "grad_norm": 1.6688247919082642, "learning_rate": 0.0001, "loss": 0.0139, "step": 51900 }, { "epoch": 341.5131578947368, "grad_norm": 1.2863478660583496, "learning_rate": 0.0001, "loss": 0.0139, "step": 51910 }, { "epoch": 341.57894736842104, "grad_norm": 1.4102164506912231, "learning_rate": 0.0001, "loss": 0.0132, "step": 51920 }, { "epoch": 341.64473684210526, "grad_norm": 1.6692852973937988, "learning_rate": 0.0001, "loss": 0.0132, "step": 51930 }, { "epoch": 341.7105263157895, "grad_norm": 1.386616826057434, "learning_rate": 0.0001, "loss": 0.0153, "step": 51940 }, { "epoch": 341.7763157894737, "grad_norm": 1.1404385566711426, "learning_rate": 0.0001, "loss": 0.0138, "step": 51950 }, { "epoch": 341.8421052631579, "grad_norm": 1.3173713684082031, "learning_rate": 0.0001, "loss": 0.0131, "step": 51960 }, { "epoch": 341.9078947368421, "grad_norm": 1.272148847579956, "learning_rate": 0.0001, "loss": 0.0132, "step": 51970 }, { "epoch": 341.9736842105263, "grad_norm": 1.2954585552215576, "learning_rate": 0.0001, "loss": 0.0144, "step": 51980 }, { "epoch": 342.0394736842105, "grad_norm": 1.546271800994873, "learning_rate": 0.0001, "loss": 0.0132, "step": 51990 }, { "epoch": 342.10526315789474, "grad_norm": 1.21526300907135, "learning_rate": 0.0001, "loss": 0.0138, "step": 52000 }, { "epoch": 342.17105263157896, "grad_norm": 1.2381492853164673, "learning_rate": 0.0001, "loss": 0.0138, "step": 52010 }, { "epoch": 342.2368421052632, "grad_norm": 1.5428966283798218, "learning_rate": 0.0001, "loss": 0.0135, "step": 52020 }, { "epoch": 342.30263157894734, "grad_norm": 1.5430893898010254, "learning_rate": 0.0001, "loss": 0.0151, "step": 52030 }, { "epoch": 342.36842105263156, "grad_norm": 0.9203903079032898, "learning_rate": 0.0001, "loss": 0.0132, "step": 52040 }, { "epoch": 342.4342105263158, "grad_norm": 0.9490885734558105, "learning_rate": 0.0001, "loss": 0.0129, "step": 52050 }, { "epoch": 342.5, "grad_norm": 0.8359211087226868, "learning_rate": 0.0001, "loss": 0.0133, "step": 52060 }, { "epoch": 342.5657894736842, "grad_norm": 1.2397630214691162, "learning_rate": 0.0001, "loss": 0.0137, "step": 52070 }, { "epoch": 342.63157894736844, "grad_norm": 1.1610674858093262, "learning_rate": 0.0001, "loss": 0.0134, "step": 52080 }, { "epoch": 342.69736842105266, "grad_norm": 1.3798439502716064, "learning_rate": 0.0001, "loss": 0.0137, "step": 52090 }, { "epoch": 342.7631578947368, "grad_norm": 1.1632894277572632, "learning_rate": 0.0001, "loss": 0.0126, "step": 52100 }, { "epoch": 342.82894736842104, "grad_norm": 1.3754557371139526, "learning_rate": 0.0001, "loss": 0.013, "step": 52110 }, { "epoch": 342.89473684210526, "grad_norm": 1.087170124053955, "learning_rate": 0.0001, "loss": 0.0142, "step": 52120 }, { "epoch": 342.9605263157895, "grad_norm": 0.9013633131980896, "learning_rate": 0.0001, "loss": 0.0142, "step": 52130 }, { "epoch": 343.0263157894737, "grad_norm": 1.5039817094802856, "learning_rate": 0.0001, "loss": 0.0145, "step": 52140 }, { "epoch": 343.0921052631579, "grad_norm": 1.4814598560333252, "learning_rate": 0.0001, "loss": 0.0134, "step": 52150 }, { "epoch": 343.1578947368421, "grad_norm": 1.120323896408081, "learning_rate": 0.0001, "loss": 0.0151, "step": 52160 }, { "epoch": 343.2236842105263, "grad_norm": 0.9336338639259338, "learning_rate": 0.0001, "loss": 0.0148, "step": 52170 }, { "epoch": 343.2894736842105, "grad_norm": 0.7774641513824463, "learning_rate": 0.0001, "loss": 0.0132, "step": 52180 }, { "epoch": 343.35526315789474, "grad_norm": 0.8035191297531128, "learning_rate": 0.0001, "loss": 0.0144, "step": 52190 }, { "epoch": 343.42105263157896, "grad_norm": 0.9542168974876404, "learning_rate": 0.0001, "loss": 0.0128, "step": 52200 }, { "epoch": 343.4868421052632, "grad_norm": 1.1261277198791504, "learning_rate": 0.0001, "loss": 0.012, "step": 52210 }, { "epoch": 343.55263157894734, "grad_norm": 1.2652689218521118, "learning_rate": 0.0001, "loss": 0.0144, "step": 52220 }, { "epoch": 343.61842105263156, "grad_norm": 1.4694147109985352, "learning_rate": 0.0001, "loss": 0.0143, "step": 52230 }, { "epoch": 343.6842105263158, "grad_norm": 0.8322300910949707, "learning_rate": 0.0001, "loss": 0.0139, "step": 52240 }, { "epoch": 343.75, "grad_norm": 1.4689432382583618, "learning_rate": 0.0001, "loss": 0.0137, "step": 52250 }, { "epoch": 343.8157894736842, "grad_norm": 1.446466088294983, "learning_rate": 0.0001, "loss": 0.0135, "step": 52260 }, { "epoch": 343.88157894736844, "grad_norm": 1.2933214902877808, "learning_rate": 0.0001, "loss": 0.0146, "step": 52270 }, { "epoch": 343.94736842105266, "grad_norm": 1.245603322982788, "learning_rate": 0.0001, "loss": 0.0142, "step": 52280 }, { "epoch": 344.0131578947368, "grad_norm": 1.0786410570144653, "learning_rate": 0.0001, "loss": 0.013, "step": 52290 }, { "epoch": 344.07894736842104, "grad_norm": 1.2536273002624512, "learning_rate": 0.0001, "loss": 0.0157, "step": 52300 }, { "epoch": 344.14473684210526, "grad_norm": 1.1757087707519531, "learning_rate": 0.0001, "loss": 0.0126, "step": 52310 }, { "epoch": 344.2105263157895, "grad_norm": 1.397163987159729, "learning_rate": 0.0001, "loss": 0.0139, "step": 52320 }, { "epoch": 344.2763157894737, "grad_norm": 1.2626746892929077, "learning_rate": 0.0001, "loss": 0.0138, "step": 52330 }, { "epoch": 344.3421052631579, "grad_norm": 1.5072098970413208, "learning_rate": 0.0001, "loss": 0.0129, "step": 52340 }, { "epoch": 344.4078947368421, "grad_norm": 1.0162240266799927, "learning_rate": 0.0001, "loss": 0.0132, "step": 52350 }, { "epoch": 344.4736842105263, "grad_norm": 1.1723800897598267, "learning_rate": 0.0001, "loss": 0.0129, "step": 52360 }, { "epoch": 344.5394736842105, "grad_norm": 1.722672939300537, "learning_rate": 0.0001, "loss": 0.0142, "step": 52370 }, { "epoch": 344.60526315789474, "grad_norm": 1.4377808570861816, "learning_rate": 0.0001, "loss": 0.0132, "step": 52380 }, { "epoch": 344.67105263157896, "grad_norm": 1.4362269639968872, "learning_rate": 0.0001, "loss": 0.0133, "step": 52390 }, { "epoch": 344.7368421052632, "grad_norm": 1.1657408475875854, "learning_rate": 0.0001, "loss": 0.0121, "step": 52400 }, { "epoch": 344.80263157894734, "grad_norm": 1.3093397617340088, "learning_rate": 0.0001, "loss": 0.0125, "step": 52410 }, { "epoch": 344.86842105263156, "grad_norm": 1.1783092021942139, "learning_rate": 0.0001, "loss": 0.0124, "step": 52420 }, { "epoch": 344.9342105263158, "grad_norm": 1.0397982597351074, "learning_rate": 0.0001, "loss": 0.0132, "step": 52430 }, { "epoch": 345.0, "grad_norm": 1.1493473052978516, "learning_rate": 0.0001, "loss": 0.0141, "step": 52440 }, { "epoch": 345.0657894736842, "grad_norm": 1.1874574422836304, "learning_rate": 0.0001, "loss": 0.0135, "step": 52450 }, { "epoch": 345.13157894736844, "grad_norm": 1.2612534761428833, "learning_rate": 0.0001, "loss": 0.0133, "step": 52460 }, { "epoch": 345.19736842105266, "grad_norm": 1.2384288311004639, "learning_rate": 0.0001, "loss": 0.0136, "step": 52470 }, { "epoch": 345.2631578947368, "grad_norm": 1.2733972072601318, "learning_rate": 0.0001, "loss": 0.0129, "step": 52480 }, { "epoch": 345.32894736842104, "grad_norm": 1.046722650527954, "learning_rate": 0.0001, "loss": 0.0142, "step": 52490 }, { "epoch": 345.39473684210526, "grad_norm": 1.7517563104629517, "learning_rate": 0.0001, "loss": 0.0118, "step": 52500 }, { "epoch": 345.4605263157895, "grad_norm": 1.5715398788452148, "learning_rate": 0.0001, "loss": 0.0134, "step": 52510 }, { "epoch": 345.5263157894737, "grad_norm": 1.4876675605773926, "learning_rate": 0.0001, "loss": 0.0134, "step": 52520 }, { "epoch": 345.5921052631579, "grad_norm": 1.2595746517181396, "learning_rate": 0.0001, "loss": 0.0129, "step": 52530 }, { "epoch": 345.6578947368421, "grad_norm": 1.4354819059371948, "learning_rate": 0.0001, "loss": 0.0126, "step": 52540 }, { "epoch": 345.7236842105263, "grad_norm": 1.2486364841461182, "learning_rate": 0.0001, "loss": 0.0144, "step": 52550 }, { "epoch": 345.7894736842105, "grad_norm": 1.4385180473327637, "learning_rate": 0.0001, "loss": 0.0117, "step": 52560 }, { "epoch": 345.85526315789474, "grad_norm": 1.4834891557693481, "learning_rate": 0.0001, "loss": 0.0123, "step": 52570 }, { "epoch": 345.92105263157896, "grad_norm": 1.1149605512619019, "learning_rate": 0.0001, "loss": 0.0123, "step": 52580 }, { "epoch": 345.9868421052632, "grad_norm": 1.0604519844055176, "learning_rate": 0.0001, "loss": 0.0119, "step": 52590 }, { "epoch": 346.05263157894734, "grad_norm": 1.482948899269104, "learning_rate": 0.0001, "loss": 0.0116, "step": 52600 }, { "epoch": 346.11842105263156, "grad_norm": 0.9433545470237732, "learning_rate": 0.0001, "loss": 0.0162, "step": 52610 }, { "epoch": 346.1842105263158, "grad_norm": 1.2080388069152832, "learning_rate": 0.0001, "loss": 0.0135, "step": 52620 }, { "epoch": 346.25, "grad_norm": 1.2017712593078613, "learning_rate": 0.0001, "loss": 0.0118, "step": 52630 }, { "epoch": 346.3157894736842, "grad_norm": 1.3862873315811157, "learning_rate": 0.0001, "loss": 0.0143, "step": 52640 }, { "epoch": 346.38157894736844, "grad_norm": 1.4891222715377808, "learning_rate": 0.0001, "loss": 0.0123, "step": 52650 }, { "epoch": 346.44736842105266, "grad_norm": 1.356292724609375, "learning_rate": 0.0001, "loss": 0.0129, "step": 52660 }, { "epoch": 346.5131578947368, "grad_norm": 1.215113639831543, "learning_rate": 0.0001, "loss": 0.0137, "step": 52670 }, { "epoch": 346.57894736842104, "grad_norm": 1.551318883895874, "learning_rate": 0.0001, "loss": 0.0122, "step": 52680 }, { "epoch": 346.64473684210526, "grad_norm": 1.6980267763137817, "learning_rate": 0.0001, "loss": 0.0134, "step": 52690 }, { "epoch": 346.7105263157895, "grad_norm": 1.528921365737915, "learning_rate": 0.0001, "loss": 0.0123, "step": 52700 }, { "epoch": 346.7763157894737, "grad_norm": 1.3663548231124878, "learning_rate": 0.0001, "loss": 0.0117, "step": 52710 }, { "epoch": 346.8421052631579, "grad_norm": 1.3794395923614502, "learning_rate": 0.0001, "loss": 0.0137, "step": 52720 }, { "epoch": 346.9078947368421, "grad_norm": 1.5531320571899414, "learning_rate": 0.0001, "loss": 0.0115, "step": 52730 }, { "epoch": 346.9736842105263, "grad_norm": 1.017933964729309, "learning_rate": 0.0001, "loss": 0.0136, "step": 52740 }, { "epoch": 347.0394736842105, "grad_norm": 1.3421839475631714, "learning_rate": 0.0001, "loss": 0.0121, "step": 52750 }, { "epoch": 347.10526315789474, "grad_norm": 1.4590574502944946, "learning_rate": 0.0001, "loss": 0.0118, "step": 52760 }, { "epoch": 347.17105263157896, "grad_norm": 1.309173345565796, "learning_rate": 0.0001, "loss": 0.0104, "step": 52770 }, { "epoch": 347.2368421052632, "grad_norm": 1.1551851034164429, "learning_rate": 0.0001, "loss": 0.0121, "step": 52780 }, { "epoch": 347.30263157894734, "grad_norm": 1.4142370223999023, "learning_rate": 0.0001, "loss": 0.0119, "step": 52790 }, { "epoch": 347.36842105263156, "grad_norm": 1.2437849044799805, "learning_rate": 0.0001, "loss": 0.0145, "step": 52800 }, { "epoch": 347.4342105263158, "grad_norm": 1.08604097366333, "learning_rate": 0.0001, "loss": 0.0117, "step": 52810 }, { "epoch": 347.5, "grad_norm": 1.0768139362335205, "learning_rate": 0.0001, "loss": 0.0129, "step": 52820 }, { "epoch": 347.5657894736842, "grad_norm": 1.507045865058899, "learning_rate": 0.0001, "loss": 0.0127, "step": 52830 }, { "epoch": 347.63157894736844, "grad_norm": 1.1751208305358887, "learning_rate": 0.0001, "loss": 0.0125, "step": 52840 }, { "epoch": 347.69736842105266, "grad_norm": 1.4803016185760498, "learning_rate": 0.0001, "loss": 0.0133, "step": 52850 }, { "epoch": 347.7631578947368, "grad_norm": 1.3335362672805786, "learning_rate": 0.0001, "loss": 0.0143, "step": 52860 }, { "epoch": 347.82894736842104, "grad_norm": 0.9887474179267883, "learning_rate": 0.0001, "loss": 0.013, "step": 52870 }, { "epoch": 347.89473684210526, "grad_norm": 1.2500858306884766, "learning_rate": 0.0001, "loss": 0.0132, "step": 52880 }, { "epoch": 347.9605263157895, "grad_norm": 1.0655533075332642, "learning_rate": 0.0001, "loss": 0.0129, "step": 52890 }, { "epoch": 348.0263157894737, "grad_norm": 1.797389268875122, "learning_rate": 0.0001, "loss": 0.0143, "step": 52900 }, { "epoch": 348.0921052631579, "grad_norm": 1.0135571956634521, "learning_rate": 0.0001, "loss": 0.0142, "step": 52910 }, { "epoch": 348.1578947368421, "grad_norm": 1.4118088483810425, "learning_rate": 0.0001, "loss": 0.0141, "step": 52920 }, { "epoch": 348.2236842105263, "grad_norm": 1.5907139778137207, "learning_rate": 0.0001, "loss": 0.0129, "step": 52930 }, { "epoch": 348.2894736842105, "grad_norm": 0.9017720818519592, "learning_rate": 0.0001, "loss": 0.0146, "step": 52940 }, { "epoch": 348.35526315789474, "grad_norm": 1.0268571376800537, "learning_rate": 0.0001, "loss": 0.0119, "step": 52950 }, { "epoch": 348.42105263157896, "grad_norm": 1.0724031925201416, "learning_rate": 0.0001, "loss": 0.0136, "step": 52960 }, { "epoch": 348.4868421052632, "grad_norm": 0.9979811906814575, "learning_rate": 0.0001, "loss": 0.0132, "step": 52970 }, { "epoch": 348.55263157894734, "grad_norm": 1.0473777055740356, "learning_rate": 0.0001, "loss": 0.0128, "step": 52980 }, { "epoch": 348.61842105263156, "grad_norm": 1.4823224544525146, "learning_rate": 0.0001, "loss": 0.0143, "step": 52990 }, { "epoch": 348.6842105263158, "grad_norm": 1.1118677854537964, "learning_rate": 0.0001, "loss": 0.0118, "step": 53000 }, { "epoch": 348.75, "grad_norm": 1.2485218048095703, "learning_rate": 0.0001, "loss": 0.0134, "step": 53010 }, { "epoch": 348.8157894736842, "grad_norm": 1.1463638544082642, "learning_rate": 0.0001, "loss": 0.0119, "step": 53020 }, { "epoch": 348.88157894736844, "grad_norm": 0.8539924025535583, "learning_rate": 0.0001, "loss": 0.0138, "step": 53030 }, { "epoch": 348.94736842105266, "grad_norm": 0.826951801776886, "learning_rate": 0.0001, "loss": 0.0129, "step": 53040 }, { "epoch": 349.0131578947368, "grad_norm": 1.3441721200942993, "learning_rate": 0.0001, "loss": 0.0134, "step": 53050 }, { "epoch": 349.07894736842104, "grad_norm": 1.0839155912399292, "learning_rate": 0.0001, "loss": 0.0114, "step": 53060 }, { "epoch": 349.14473684210526, "grad_norm": 1.1949419975280762, "learning_rate": 0.0001, "loss": 0.0149, "step": 53070 }, { "epoch": 349.2105263157895, "grad_norm": 0.8186034560203552, "learning_rate": 0.0001, "loss": 0.0129, "step": 53080 }, { "epoch": 349.2763157894737, "grad_norm": 0.8655272126197815, "learning_rate": 0.0001, "loss": 0.0129, "step": 53090 }, { "epoch": 349.3421052631579, "grad_norm": 0.7736495733261108, "learning_rate": 0.0001, "loss": 0.0123, "step": 53100 }, { "epoch": 349.4078947368421, "grad_norm": 0.711251437664032, "learning_rate": 0.0001, "loss": 0.0139, "step": 53110 }, { "epoch": 349.4736842105263, "grad_norm": 1.2453712224960327, "learning_rate": 0.0001, "loss": 0.0125, "step": 53120 }, { "epoch": 349.5394736842105, "grad_norm": 1.0265164375305176, "learning_rate": 0.0001, "loss": 0.0135, "step": 53130 }, { "epoch": 349.60526315789474, "grad_norm": 0.9887539744377136, "learning_rate": 0.0001, "loss": 0.0127, "step": 53140 }, { "epoch": 349.67105263157896, "grad_norm": 0.9116423726081848, "learning_rate": 0.0001, "loss": 0.0132, "step": 53150 }, { "epoch": 349.7368421052632, "grad_norm": 1.176967978477478, "learning_rate": 0.0001, "loss": 0.0165, "step": 53160 }, { "epoch": 349.80263157894734, "grad_norm": 1.2313488721847534, "learning_rate": 0.0001, "loss": 0.0137, "step": 53170 }, { "epoch": 349.86842105263156, "grad_norm": 0.7793358564376831, "learning_rate": 0.0001, "loss": 0.0142, "step": 53180 }, { "epoch": 349.9342105263158, "grad_norm": 0.9506605863571167, "learning_rate": 0.0001, "loss": 0.013, "step": 53190 }, { "epoch": 350.0, "grad_norm": 0.9445679783821106, "learning_rate": 0.0001, "loss": 0.0124, "step": 53200 }, { "epoch": 350.0657894736842, "grad_norm": 1.0125964879989624, "learning_rate": 0.0001, "loss": 0.0134, "step": 53210 }, { "epoch": 350.13157894736844, "grad_norm": 1.2516371011734009, "learning_rate": 0.0001, "loss": 0.0138, "step": 53220 }, { "epoch": 350.19736842105266, "grad_norm": 1.4000440835952759, "learning_rate": 0.0001, "loss": 0.0127, "step": 53230 }, { "epoch": 350.2631578947368, "grad_norm": 1.0942318439483643, "learning_rate": 0.0001, "loss": 0.0134, "step": 53240 }, { "epoch": 350.32894736842104, "grad_norm": 1.0864887237548828, "learning_rate": 0.0001, "loss": 0.0136, "step": 53250 }, { "epoch": 350.39473684210526, "grad_norm": 1.2966852188110352, "learning_rate": 0.0001, "loss": 0.0139, "step": 53260 }, { "epoch": 350.4605263157895, "grad_norm": 1.227264404296875, "learning_rate": 0.0001, "loss": 0.0133, "step": 53270 }, { "epoch": 350.5263157894737, "grad_norm": 1.1729018688201904, "learning_rate": 0.0001, "loss": 0.0159, "step": 53280 }, { "epoch": 350.5921052631579, "grad_norm": 0.7288147211074829, "learning_rate": 0.0001, "loss": 0.0129, "step": 53290 }, { "epoch": 350.6578947368421, "grad_norm": 0.893334150314331, "learning_rate": 0.0001, "loss": 0.0132, "step": 53300 }, { "epoch": 350.7236842105263, "grad_norm": 1.2939788103103638, "learning_rate": 0.0001, "loss": 0.0131, "step": 53310 }, { "epoch": 350.7894736842105, "grad_norm": 0.9002676010131836, "learning_rate": 0.0001, "loss": 0.0131, "step": 53320 }, { "epoch": 350.85526315789474, "grad_norm": 0.9034085869789124, "learning_rate": 0.0001, "loss": 0.0157, "step": 53330 }, { "epoch": 350.92105263157896, "grad_norm": 1.2274662256240845, "learning_rate": 0.0001, "loss": 0.0129, "step": 53340 }, { "epoch": 350.9868421052632, "grad_norm": 0.969467043876648, "learning_rate": 0.0001, "loss": 0.0156, "step": 53350 }, { "epoch": 351.05263157894734, "grad_norm": 1.1215009689331055, "learning_rate": 0.0001, "loss": 0.0142, "step": 53360 }, { "epoch": 351.11842105263156, "grad_norm": 1.4852532148361206, "learning_rate": 0.0001, "loss": 0.0137, "step": 53370 }, { "epoch": 351.1842105263158, "grad_norm": 1.1253795623779297, "learning_rate": 0.0001, "loss": 0.0147, "step": 53380 }, { "epoch": 351.25, "grad_norm": 0.8905407786369324, "learning_rate": 0.0001, "loss": 0.0153, "step": 53390 }, { "epoch": 351.3157894736842, "grad_norm": 1.2344647645950317, "learning_rate": 0.0001, "loss": 0.0124, "step": 53400 }, { "epoch": 351.38157894736844, "grad_norm": 0.9669228196144104, "learning_rate": 0.0001, "loss": 0.015, "step": 53410 }, { "epoch": 351.44736842105266, "grad_norm": 1.300685167312622, "learning_rate": 0.0001, "loss": 0.0144, "step": 53420 }, { "epoch": 351.5131578947368, "grad_norm": 1.1283105611801147, "learning_rate": 0.0001, "loss": 0.0118, "step": 53430 }, { "epoch": 351.57894736842104, "grad_norm": 1.390939474105835, "learning_rate": 0.0001, "loss": 0.0128, "step": 53440 }, { "epoch": 351.64473684210526, "grad_norm": 1.187654972076416, "learning_rate": 0.0001, "loss": 0.0142, "step": 53450 }, { "epoch": 351.7105263157895, "grad_norm": 0.9280898571014404, "learning_rate": 0.0001, "loss": 0.0137, "step": 53460 }, { "epoch": 351.7763157894737, "grad_norm": 1.1642065048217773, "learning_rate": 0.0001, "loss": 0.0128, "step": 53470 }, { "epoch": 351.8421052631579, "grad_norm": 1.3112080097198486, "learning_rate": 0.0001, "loss": 0.0133, "step": 53480 }, { "epoch": 351.9078947368421, "grad_norm": 1.1810340881347656, "learning_rate": 0.0001, "loss": 0.0128, "step": 53490 }, { "epoch": 351.9736842105263, "grad_norm": 1.1890909671783447, "learning_rate": 0.0001, "loss": 0.0165, "step": 53500 }, { "epoch": 352.0394736842105, "grad_norm": 0.9350360035896301, "learning_rate": 0.0001, "loss": 0.0135, "step": 53510 }, { "epoch": 352.10526315789474, "grad_norm": 1.0225274562835693, "learning_rate": 0.0001, "loss": 0.0134, "step": 53520 }, { "epoch": 352.17105263157896, "grad_norm": 1.1939771175384521, "learning_rate": 0.0001, "loss": 0.0116, "step": 53530 }, { "epoch": 352.2368421052632, "grad_norm": 0.9395789504051208, "learning_rate": 0.0001, "loss": 0.013, "step": 53540 }, { "epoch": 352.30263157894734, "grad_norm": 1.4980489015579224, "learning_rate": 0.0001, "loss": 0.0121, "step": 53550 }, { "epoch": 352.36842105263156, "grad_norm": 1.4125310182571411, "learning_rate": 0.0001, "loss": 0.0157, "step": 53560 }, { "epoch": 352.4342105263158, "grad_norm": 1.1465486288070679, "learning_rate": 0.0001, "loss": 0.0137, "step": 53570 }, { "epoch": 352.5, "grad_norm": 1.5160516500473022, "learning_rate": 0.0001, "loss": 0.0133, "step": 53580 }, { "epoch": 352.5657894736842, "grad_norm": 1.0091547966003418, "learning_rate": 0.0001, "loss": 0.0142, "step": 53590 }, { "epoch": 352.63157894736844, "grad_norm": 1.6452295780181885, "learning_rate": 0.0001, "loss": 0.014, "step": 53600 }, { "epoch": 352.69736842105266, "grad_norm": 0.8045058846473694, "learning_rate": 0.0001, "loss": 0.0148, "step": 53610 }, { "epoch": 352.7631578947368, "grad_norm": 0.9372885823249817, "learning_rate": 0.0001, "loss": 0.0151, "step": 53620 }, { "epoch": 352.82894736842104, "grad_norm": 1.2400946617126465, "learning_rate": 0.0001, "loss": 0.0153, "step": 53630 }, { "epoch": 352.89473684210526, "grad_norm": 1.2363208532333374, "learning_rate": 0.0001, "loss": 0.0174, "step": 53640 }, { "epoch": 352.9605263157895, "grad_norm": 1.4583326578140259, "learning_rate": 0.0001, "loss": 0.0161, "step": 53650 }, { "epoch": 353.0263157894737, "grad_norm": 1.4135735034942627, "learning_rate": 0.0001, "loss": 0.0178, "step": 53660 }, { "epoch": 353.0921052631579, "grad_norm": 0.8179410099983215, "learning_rate": 0.0001, "loss": 0.0159, "step": 53670 }, { "epoch": 353.1578947368421, "grad_norm": 1.1129777431488037, "learning_rate": 0.0001, "loss": 0.0137, "step": 53680 }, { "epoch": 353.2236842105263, "grad_norm": 0.9328417778015137, "learning_rate": 0.0001, "loss": 0.0153, "step": 53690 }, { "epoch": 353.2894736842105, "grad_norm": 1.0493443012237549, "learning_rate": 0.0001, "loss": 0.0138, "step": 53700 }, { "epoch": 353.35526315789474, "grad_norm": 1.4180327653884888, "learning_rate": 0.0001, "loss": 0.0146, "step": 53710 }, { "epoch": 353.42105263157896, "grad_norm": 1.10849130153656, "learning_rate": 0.0001, "loss": 0.0148, "step": 53720 }, { "epoch": 353.4868421052632, "grad_norm": 0.9594805836677551, "learning_rate": 0.0001, "loss": 0.0163, "step": 53730 }, { "epoch": 353.55263157894734, "grad_norm": 1.55135977268219, "learning_rate": 0.0001, "loss": 0.0168, "step": 53740 }, { "epoch": 353.61842105263156, "grad_norm": 1.0522472858428955, "learning_rate": 0.0001, "loss": 0.0155, "step": 53750 }, { "epoch": 353.6842105263158, "grad_norm": 1.2985424995422363, "learning_rate": 0.0001, "loss": 0.0141, "step": 53760 }, { "epoch": 353.75, "grad_norm": 1.2076297998428345, "learning_rate": 0.0001, "loss": 0.0155, "step": 53770 }, { "epoch": 353.8157894736842, "grad_norm": 1.4441065788269043, "learning_rate": 0.0001, "loss": 0.0136, "step": 53780 }, { "epoch": 353.88157894736844, "grad_norm": 1.4848010540008545, "learning_rate": 0.0001, "loss": 0.0148, "step": 53790 }, { "epoch": 353.94736842105266, "grad_norm": 1.2929956912994385, "learning_rate": 0.0001, "loss": 0.0158, "step": 53800 }, { "epoch": 354.0131578947368, "grad_norm": 1.4430453777313232, "learning_rate": 0.0001, "loss": 0.014, "step": 53810 }, { "epoch": 354.07894736842104, "grad_norm": 1.5679028034210205, "learning_rate": 0.0001, "loss": 0.0153, "step": 53820 }, { "epoch": 354.14473684210526, "grad_norm": 0.8929752111434937, "learning_rate": 0.0001, "loss": 0.0129, "step": 53830 }, { "epoch": 354.2105263157895, "grad_norm": 1.3934887647628784, "learning_rate": 0.0001, "loss": 0.0129, "step": 53840 }, { "epoch": 354.2763157894737, "grad_norm": 1.3150675296783447, "learning_rate": 0.0001, "loss": 0.0144, "step": 53850 }, { "epoch": 354.3421052631579, "grad_norm": 1.1540673971176147, "learning_rate": 0.0001, "loss": 0.0153, "step": 53860 }, { "epoch": 354.4078947368421, "grad_norm": 1.2295771837234497, "learning_rate": 0.0001, "loss": 0.0127, "step": 53870 }, { "epoch": 354.4736842105263, "grad_norm": 1.2992318868637085, "learning_rate": 0.0001, "loss": 0.0125, "step": 53880 }, { "epoch": 354.5394736842105, "grad_norm": 1.3661577701568604, "learning_rate": 0.0001, "loss": 0.0141, "step": 53890 }, { "epoch": 354.60526315789474, "grad_norm": 1.475778579711914, "learning_rate": 0.0001, "loss": 0.0138, "step": 53900 }, { "epoch": 354.67105263157896, "grad_norm": 1.3246492147445679, "learning_rate": 0.0001, "loss": 0.0154, "step": 53910 }, { "epoch": 354.7368421052632, "grad_norm": 1.3882511854171753, "learning_rate": 0.0001, "loss": 0.0126, "step": 53920 }, { "epoch": 354.80263157894734, "grad_norm": 0.9738257527351379, "learning_rate": 0.0001, "loss": 0.012, "step": 53930 }, { "epoch": 354.86842105263156, "grad_norm": 1.405836820602417, "learning_rate": 0.0001, "loss": 0.0132, "step": 53940 }, { "epoch": 354.9342105263158, "grad_norm": 1.3983672857284546, "learning_rate": 0.0001, "loss": 0.0153, "step": 53950 }, { "epoch": 355.0, "grad_norm": 1.1816673278808594, "learning_rate": 0.0001, "loss": 0.0158, "step": 53960 }, { "epoch": 355.0657894736842, "grad_norm": 0.901597797870636, "learning_rate": 0.0001, "loss": 0.014, "step": 53970 }, { "epoch": 355.13157894736844, "grad_norm": 0.8046585321426392, "learning_rate": 0.0001, "loss": 0.0137, "step": 53980 }, { "epoch": 355.19736842105266, "grad_norm": 0.8949022889137268, "learning_rate": 0.0001, "loss": 0.015, "step": 53990 }, { "epoch": 355.2631578947368, "grad_norm": 0.8433843851089478, "learning_rate": 0.0001, "loss": 0.0128, "step": 54000 }, { "epoch": 355.32894736842104, "grad_norm": 1.3329582214355469, "learning_rate": 0.0001, "loss": 0.0125, "step": 54010 }, { "epoch": 355.39473684210526, "grad_norm": 1.203683614730835, "learning_rate": 0.0001, "loss": 0.0131, "step": 54020 }, { "epoch": 355.4605263157895, "grad_norm": 1.1506283283233643, "learning_rate": 0.0001, "loss": 0.0134, "step": 54030 }, { "epoch": 355.5263157894737, "grad_norm": 1.4512953758239746, "learning_rate": 0.0001, "loss": 0.0126, "step": 54040 }, { "epoch": 355.5921052631579, "grad_norm": 1.065973162651062, "learning_rate": 0.0001, "loss": 0.0131, "step": 54050 }, { "epoch": 355.6578947368421, "grad_norm": 0.8735188245773315, "learning_rate": 0.0001, "loss": 0.0126, "step": 54060 }, { "epoch": 355.7236842105263, "grad_norm": 1.3658928871154785, "learning_rate": 0.0001, "loss": 0.015, "step": 54070 }, { "epoch": 355.7894736842105, "grad_norm": 1.037252426147461, "learning_rate": 0.0001, "loss": 0.0125, "step": 54080 }, { "epoch": 355.85526315789474, "grad_norm": 1.2813448905944824, "learning_rate": 0.0001, "loss": 0.0137, "step": 54090 }, { "epoch": 355.92105263157896, "grad_norm": 1.114138126373291, "learning_rate": 0.0001, "loss": 0.013, "step": 54100 }, { "epoch": 355.9868421052632, "grad_norm": 1.242543339729309, "learning_rate": 0.0001, "loss": 0.0125, "step": 54110 }, { "epoch": 356.05263157894734, "grad_norm": 1.8601607084274292, "learning_rate": 0.0001, "loss": 0.0118, "step": 54120 }, { "epoch": 356.11842105263156, "grad_norm": 1.521649718284607, "learning_rate": 0.0001, "loss": 0.0107, "step": 54130 }, { "epoch": 356.1842105263158, "grad_norm": 1.109571099281311, "learning_rate": 0.0001, "loss": 0.0153, "step": 54140 }, { "epoch": 356.25, "grad_norm": 1.3010332584381104, "learning_rate": 0.0001, "loss": 0.0129, "step": 54150 }, { "epoch": 356.3157894736842, "grad_norm": 1.2290462255477905, "learning_rate": 0.0001, "loss": 0.0122, "step": 54160 }, { "epoch": 356.38157894736844, "grad_norm": 1.0519379377365112, "learning_rate": 0.0001, "loss": 0.0122, "step": 54170 }, { "epoch": 356.44736842105266, "grad_norm": 1.036097764968872, "learning_rate": 0.0001, "loss": 0.012, "step": 54180 }, { "epoch": 356.5131578947368, "grad_norm": 1.120662808418274, "learning_rate": 0.0001, "loss": 0.0126, "step": 54190 }, { "epoch": 356.57894736842104, "grad_norm": 0.9310142993927002, "learning_rate": 0.0001, "loss": 0.0129, "step": 54200 }, { "epoch": 356.64473684210526, "grad_norm": 1.009954810142517, "learning_rate": 0.0001, "loss": 0.0128, "step": 54210 }, { "epoch": 356.7105263157895, "grad_norm": 0.9205268621444702, "learning_rate": 0.0001, "loss": 0.0115, "step": 54220 }, { "epoch": 356.7763157894737, "grad_norm": 1.1177328824996948, "learning_rate": 0.0001, "loss": 0.0171, "step": 54230 }, { "epoch": 356.8421052631579, "grad_norm": 1.0612447261810303, "learning_rate": 0.0001, "loss": 0.014, "step": 54240 }, { "epoch": 356.9078947368421, "grad_norm": 0.7625839710235596, "learning_rate": 0.0001, "loss": 0.015, "step": 54250 }, { "epoch": 356.9736842105263, "grad_norm": 0.8989898562431335, "learning_rate": 0.0001, "loss": 0.0146, "step": 54260 }, { "epoch": 357.0394736842105, "grad_norm": 1.3405532836914062, "learning_rate": 0.0001, "loss": 0.0127, "step": 54270 }, { "epoch": 357.10526315789474, "grad_norm": 1.3939285278320312, "learning_rate": 0.0001, "loss": 0.0138, "step": 54280 }, { "epoch": 357.17105263157896, "grad_norm": 1.1620155572891235, "learning_rate": 0.0001, "loss": 0.0153, "step": 54290 }, { "epoch": 357.2368421052632, "grad_norm": 1.2777410745620728, "learning_rate": 0.0001, "loss": 0.0159, "step": 54300 }, { "epoch": 357.30263157894734, "grad_norm": 1.271978735923767, "learning_rate": 0.0001, "loss": 0.014, "step": 54310 }, { "epoch": 357.36842105263156, "grad_norm": 1.060716152191162, "learning_rate": 0.0001, "loss": 0.0139, "step": 54320 }, { "epoch": 357.4342105263158, "grad_norm": 1.1990467309951782, "learning_rate": 0.0001, "loss": 0.0153, "step": 54330 }, { "epoch": 357.5, "grad_norm": 1.0680278539657593, "learning_rate": 0.0001, "loss": 0.0139, "step": 54340 }, { "epoch": 357.5657894736842, "grad_norm": 1.2417722940444946, "learning_rate": 0.0001, "loss": 0.0161, "step": 54350 }, { "epoch": 357.63157894736844, "grad_norm": 1.0456074476242065, "learning_rate": 0.0001, "loss": 0.0166, "step": 54360 }, { "epoch": 357.69736842105266, "grad_norm": 1.1133095026016235, "learning_rate": 0.0001, "loss": 0.0141, "step": 54370 }, { "epoch": 357.7631578947368, "grad_norm": 1.355271816253662, "learning_rate": 0.0001, "loss": 0.0159, "step": 54380 }, { "epoch": 357.82894736842104, "grad_norm": 1.0156669616699219, "learning_rate": 0.0001, "loss": 0.0165, "step": 54390 }, { "epoch": 357.89473684210526, "grad_norm": 1.571312427520752, "learning_rate": 0.0001, "loss": 0.015, "step": 54400 }, { "epoch": 357.9605263157895, "grad_norm": 1.2970470190048218, "learning_rate": 0.0001, "loss": 0.0133, "step": 54410 }, { "epoch": 358.0263157894737, "grad_norm": 1.343517780303955, "learning_rate": 0.0001, "loss": 0.0136, "step": 54420 }, { "epoch": 358.0921052631579, "grad_norm": 1.6380618810653687, "learning_rate": 0.0001, "loss": 0.0133, "step": 54430 }, { "epoch": 358.1578947368421, "grad_norm": 1.1087614297866821, "learning_rate": 0.0001, "loss": 0.014, "step": 54440 }, { "epoch": 358.2236842105263, "grad_norm": 1.523401141166687, "learning_rate": 0.0001, "loss": 0.0147, "step": 54450 }, { "epoch": 358.2894736842105, "grad_norm": 1.1688175201416016, "learning_rate": 0.0001, "loss": 0.0134, "step": 54460 }, { "epoch": 358.35526315789474, "grad_norm": 1.3475173711776733, "learning_rate": 0.0001, "loss": 0.0134, "step": 54470 }, { "epoch": 358.42105263157896, "grad_norm": 1.240882396697998, "learning_rate": 0.0001, "loss": 0.0132, "step": 54480 }, { "epoch": 358.4868421052632, "grad_norm": 1.5280466079711914, "learning_rate": 0.0001, "loss": 0.0129, "step": 54490 }, { "epoch": 358.55263157894734, "grad_norm": 1.313010334968567, "learning_rate": 0.0001, "loss": 0.0132, "step": 54500 }, { "epoch": 358.61842105263156, "grad_norm": 1.4535280466079712, "learning_rate": 0.0001, "loss": 0.0151, "step": 54510 }, { "epoch": 358.6842105263158, "grad_norm": 0.7499117255210876, "learning_rate": 0.0001, "loss": 0.0121, "step": 54520 }, { "epoch": 358.75, "grad_norm": 1.1150604486465454, "learning_rate": 0.0001, "loss": 0.0141, "step": 54530 }, { "epoch": 358.8157894736842, "grad_norm": 1.5090540647506714, "learning_rate": 0.0001, "loss": 0.013, "step": 54540 }, { "epoch": 358.88157894736844, "grad_norm": 1.2874553203582764, "learning_rate": 0.0001, "loss": 0.0148, "step": 54550 }, { "epoch": 358.94736842105266, "grad_norm": 1.4591070413589478, "learning_rate": 0.0001, "loss": 0.0116, "step": 54560 }, { "epoch": 359.0131578947368, "grad_norm": 1.5048354864120483, "learning_rate": 0.0001, "loss": 0.0138, "step": 54570 }, { "epoch": 359.07894736842104, "grad_norm": 1.1533069610595703, "learning_rate": 0.0001, "loss": 0.0134, "step": 54580 }, { "epoch": 359.14473684210526, "grad_norm": 1.287916660308838, "learning_rate": 0.0001, "loss": 0.0157, "step": 54590 }, { "epoch": 359.2105263157895, "grad_norm": 1.1660511493682861, "learning_rate": 0.0001, "loss": 0.0132, "step": 54600 }, { "epoch": 359.2763157894737, "grad_norm": 1.1380329132080078, "learning_rate": 0.0001, "loss": 0.0114, "step": 54610 }, { "epoch": 359.3421052631579, "grad_norm": 1.1352312564849854, "learning_rate": 0.0001, "loss": 0.0126, "step": 54620 }, { "epoch": 359.4078947368421, "grad_norm": 1.5034499168395996, "learning_rate": 0.0001, "loss": 0.0128, "step": 54630 }, { "epoch": 359.4736842105263, "grad_norm": 1.6546825170516968, "learning_rate": 0.0001, "loss": 0.0146, "step": 54640 }, { "epoch": 359.5394736842105, "grad_norm": 1.4479390382766724, "learning_rate": 0.0001, "loss": 0.0151, "step": 54650 }, { "epoch": 359.60526315789474, "grad_norm": 1.404312014579773, "learning_rate": 0.0001, "loss": 0.0111, "step": 54660 }, { "epoch": 359.67105263157896, "grad_norm": 1.507351040840149, "learning_rate": 0.0001, "loss": 0.012, "step": 54670 }, { "epoch": 359.7368421052632, "grad_norm": 1.2614494562149048, "learning_rate": 0.0001, "loss": 0.0109, "step": 54680 }, { "epoch": 359.80263157894734, "grad_norm": 0.779187798500061, "learning_rate": 0.0001, "loss": 0.0127, "step": 54690 }, { "epoch": 359.86842105263156, "grad_norm": 1.3693275451660156, "learning_rate": 0.0001, "loss": 0.0118, "step": 54700 }, { "epoch": 359.9342105263158, "grad_norm": 1.4360358715057373, "learning_rate": 0.0001, "loss": 0.0134, "step": 54710 }, { "epoch": 360.0, "grad_norm": 1.4990230798721313, "learning_rate": 0.0001, "loss": 0.0125, "step": 54720 }, { "epoch": 360.0657894736842, "grad_norm": 1.2829705476760864, "learning_rate": 0.0001, "loss": 0.0125, "step": 54730 }, { "epoch": 360.13157894736844, "grad_norm": 0.8983616232872009, "learning_rate": 0.0001, "loss": 0.0138, "step": 54740 }, { "epoch": 360.19736842105266, "grad_norm": 1.4845367670059204, "learning_rate": 0.0001, "loss": 0.0151, "step": 54750 }, { "epoch": 360.2631578947368, "grad_norm": 1.124158501625061, "learning_rate": 0.0001, "loss": 0.0134, "step": 54760 }, { "epoch": 360.32894736842104, "grad_norm": 1.3822499513626099, "learning_rate": 0.0001, "loss": 0.0131, "step": 54770 }, { "epoch": 360.39473684210526, "grad_norm": 1.2073116302490234, "learning_rate": 0.0001, "loss": 0.0124, "step": 54780 }, { "epoch": 360.4605263157895, "grad_norm": 1.0493237972259521, "learning_rate": 0.0001, "loss": 0.014, "step": 54790 }, { "epoch": 360.5263157894737, "grad_norm": 1.4061884880065918, "learning_rate": 0.0001, "loss": 0.012, "step": 54800 }, { "epoch": 360.5921052631579, "grad_norm": 2.0674805641174316, "learning_rate": 0.0001, "loss": 0.014, "step": 54810 }, { "epoch": 360.6578947368421, "grad_norm": 1.6307419538497925, "learning_rate": 0.0001, "loss": 0.011, "step": 54820 }, { "epoch": 360.7236842105263, "grad_norm": 1.5388418436050415, "learning_rate": 0.0001, "loss": 0.0107, "step": 54830 }, { "epoch": 360.7894736842105, "grad_norm": 1.1315034627914429, "learning_rate": 0.0001, "loss": 0.0123, "step": 54840 }, { "epoch": 360.85526315789474, "grad_norm": 1.5357550382614136, "learning_rate": 0.0001, "loss": 0.0109, "step": 54850 }, { "epoch": 360.92105263157896, "grad_norm": 1.019702672958374, "learning_rate": 0.0001, "loss": 0.0125, "step": 54860 }, { "epoch": 360.9868421052632, "grad_norm": 1.2998785972595215, "learning_rate": 0.0001, "loss": 0.0119, "step": 54870 }, { "epoch": 361.05263157894734, "grad_norm": 0.9051557183265686, "learning_rate": 0.0001, "loss": 0.0113, "step": 54880 }, { "epoch": 361.11842105263156, "grad_norm": 1.275030493736267, "learning_rate": 0.0001, "loss": 0.0108, "step": 54890 }, { "epoch": 361.1842105263158, "grad_norm": 0.9858371615409851, "learning_rate": 0.0001, "loss": 0.0119, "step": 54900 }, { "epoch": 361.25, "grad_norm": 1.1971203088760376, "learning_rate": 0.0001, "loss": 0.0144, "step": 54910 }, { "epoch": 361.3157894736842, "grad_norm": 1.2768568992614746, "learning_rate": 0.0001, "loss": 0.0139, "step": 54920 }, { "epoch": 361.38157894736844, "grad_norm": 1.199203372001648, "learning_rate": 0.0001, "loss": 0.0116, "step": 54930 }, { "epoch": 361.44736842105266, "grad_norm": 0.8393948078155518, "learning_rate": 0.0001, "loss": 0.0137, "step": 54940 }, { "epoch": 361.5131578947368, "grad_norm": 1.0057693719863892, "learning_rate": 0.0001, "loss": 0.0138, "step": 54950 }, { "epoch": 361.57894736842104, "grad_norm": 1.328454852104187, "learning_rate": 0.0001, "loss": 0.015, "step": 54960 }, { "epoch": 361.64473684210526, "grad_norm": 1.4820215702056885, "learning_rate": 0.0001, "loss": 0.0108, "step": 54970 }, { "epoch": 361.7105263157895, "grad_norm": 1.2273774147033691, "learning_rate": 0.0001, "loss": 0.0126, "step": 54980 }, { "epoch": 361.7763157894737, "grad_norm": 0.9895418882369995, "learning_rate": 0.0001, "loss": 0.0124, "step": 54990 }, { "epoch": 361.8421052631579, "grad_norm": 1.2177479267120361, "learning_rate": 0.0001, "loss": 0.013, "step": 55000 }, { "epoch": 361.9078947368421, "grad_norm": 1.5505794286727905, "learning_rate": 0.0001, "loss": 0.0127, "step": 55010 }, { "epoch": 361.9736842105263, "grad_norm": 1.2999932765960693, "learning_rate": 0.0001, "loss": 0.0136, "step": 55020 }, { "epoch": 362.0394736842105, "grad_norm": 1.0745863914489746, "learning_rate": 0.0001, "loss": 0.0144, "step": 55030 }, { "epoch": 362.10526315789474, "grad_norm": 1.1478854417800903, "learning_rate": 0.0001, "loss": 0.0123, "step": 55040 }, { "epoch": 362.17105263157896, "grad_norm": 1.1435706615447998, "learning_rate": 0.0001, "loss": 0.0131, "step": 55050 }, { "epoch": 362.2368421052632, "grad_norm": 1.1603302955627441, "learning_rate": 0.0001, "loss": 0.0129, "step": 55060 }, { "epoch": 362.30263157894734, "grad_norm": 1.1007798910140991, "learning_rate": 0.0001, "loss": 0.0125, "step": 55070 }, { "epoch": 362.36842105263156, "grad_norm": 0.8749071359634399, "learning_rate": 0.0001, "loss": 0.0145, "step": 55080 }, { "epoch": 362.4342105263158, "grad_norm": 1.287734031677246, "learning_rate": 0.0001, "loss": 0.0132, "step": 55090 }, { "epoch": 362.5, "grad_norm": 1.153018593788147, "learning_rate": 0.0001, "loss": 0.0146, "step": 55100 }, { "epoch": 362.5657894736842, "grad_norm": 1.2814031839370728, "learning_rate": 0.0001, "loss": 0.0136, "step": 55110 }, { "epoch": 362.63157894736844, "grad_norm": 1.211867094039917, "learning_rate": 0.0001, "loss": 0.0114, "step": 55120 }, { "epoch": 362.69736842105266, "grad_norm": 1.1564773321151733, "learning_rate": 0.0001, "loss": 0.0138, "step": 55130 }, { "epoch": 362.7631578947368, "grad_norm": 0.9832426309585571, "learning_rate": 0.0001, "loss": 0.0134, "step": 55140 }, { "epoch": 362.82894736842104, "grad_norm": 1.05033278465271, "learning_rate": 0.0001, "loss": 0.0123, "step": 55150 }, { "epoch": 362.89473684210526, "grad_norm": 1.2688597440719604, "learning_rate": 0.0001, "loss": 0.0123, "step": 55160 }, { "epoch": 362.9605263157895, "grad_norm": 1.314858078956604, "learning_rate": 0.0001, "loss": 0.0133, "step": 55170 }, { "epoch": 363.0263157894737, "grad_norm": 1.2743005752563477, "learning_rate": 0.0001, "loss": 0.0133, "step": 55180 }, { "epoch": 363.0921052631579, "grad_norm": 1.3456010818481445, "learning_rate": 0.0001, "loss": 0.0125, "step": 55190 }, { "epoch": 363.1578947368421, "grad_norm": 1.2974659204483032, "learning_rate": 0.0001, "loss": 0.0145, "step": 55200 }, { "epoch": 363.2236842105263, "grad_norm": 1.0319002866744995, "learning_rate": 0.0001, "loss": 0.014, "step": 55210 }, { "epoch": 363.2894736842105, "grad_norm": 1.074663519859314, "learning_rate": 0.0001, "loss": 0.0122, "step": 55220 }, { "epoch": 363.35526315789474, "grad_norm": 1.0586227178573608, "learning_rate": 0.0001, "loss": 0.0132, "step": 55230 }, { "epoch": 363.42105263157896, "grad_norm": 1.1722004413604736, "learning_rate": 0.0001, "loss": 0.0141, "step": 55240 }, { "epoch": 363.4868421052632, "grad_norm": 1.1225117444992065, "learning_rate": 0.0001, "loss": 0.0138, "step": 55250 }, { "epoch": 363.55263157894734, "grad_norm": 0.9017395973205566, "learning_rate": 0.0001, "loss": 0.0121, "step": 55260 }, { "epoch": 363.61842105263156, "grad_norm": 1.1716728210449219, "learning_rate": 0.0001, "loss": 0.0137, "step": 55270 }, { "epoch": 363.6842105263158, "grad_norm": 0.8839274644851685, "learning_rate": 0.0001, "loss": 0.0118, "step": 55280 }, { "epoch": 363.75, "grad_norm": 1.1430319547653198, "learning_rate": 0.0001, "loss": 0.0114, "step": 55290 }, { "epoch": 363.8157894736842, "grad_norm": 1.2584179639816284, "learning_rate": 0.0001, "loss": 0.0137, "step": 55300 }, { "epoch": 363.88157894736844, "grad_norm": 1.3413207530975342, "learning_rate": 0.0001, "loss": 0.0129, "step": 55310 }, { "epoch": 363.94736842105266, "grad_norm": 1.4143167734146118, "learning_rate": 0.0001, "loss": 0.0145, "step": 55320 }, { "epoch": 364.0131578947368, "grad_norm": 1.290185570716858, "learning_rate": 0.0001, "loss": 0.0126, "step": 55330 }, { "epoch": 364.07894736842104, "grad_norm": 1.241997241973877, "learning_rate": 0.0001, "loss": 0.0122, "step": 55340 }, { "epoch": 364.14473684210526, "grad_norm": 1.200785756111145, "learning_rate": 0.0001, "loss": 0.0128, "step": 55350 }, { "epoch": 364.2105263157895, "grad_norm": 1.1258078813552856, "learning_rate": 0.0001, "loss": 0.014, "step": 55360 }, { "epoch": 364.2763157894737, "grad_norm": 1.1537657976150513, "learning_rate": 0.0001, "loss": 0.0112, "step": 55370 }, { "epoch": 364.3421052631579, "grad_norm": 1.3146229982376099, "learning_rate": 0.0001, "loss": 0.0124, "step": 55380 }, { "epoch": 364.4078947368421, "grad_norm": 1.3080146312713623, "learning_rate": 0.0001, "loss": 0.0143, "step": 55390 }, { "epoch": 364.4736842105263, "grad_norm": 1.3526312112808228, "learning_rate": 0.0001, "loss": 0.0132, "step": 55400 }, { "epoch": 364.5394736842105, "grad_norm": 1.6095331907272339, "learning_rate": 0.0001, "loss": 0.0129, "step": 55410 }, { "epoch": 364.60526315789474, "grad_norm": 1.4046205282211304, "learning_rate": 0.0001, "loss": 0.0128, "step": 55420 }, { "epoch": 364.67105263157896, "grad_norm": 1.0746979713439941, "learning_rate": 0.0001, "loss": 0.0133, "step": 55430 }, { "epoch": 364.7368421052632, "grad_norm": 1.3545022010803223, "learning_rate": 0.0001, "loss": 0.013, "step": 55440 }, { "epoch": 364.80263157894734, "grad_norm": 1.36902916431427, "learning_rate": 0.0001, "loss": 0.0112, "step": 55450 }, { "epoch": 364.86842105263156, "grad_norm": 1.1085587739944458, "learning_rate": 0.0001, "loss": 0.0127, "step": 55460 }, { "epoch": 364.9342105263158, "grad_norm": 0.8094392418861389, "learning_rate": 0.0001, "loss": 0.0115, "step": 55470 }, { "epoch": 365.0, "grad_norm": 0.8859060406684875, "learning_rate": 0.0001, "loss": 0.0128, "step": 55480 }, { "epoch": 365.0657894736842, "grad_norm": 1.0482323169708252, "learning_rate": 0.0001, "loss": 0.013, "step": 55490 }, { "epoch": 365.13157894736844, "grad_norm": 1.1933088302612305, "learning_rate": 0.0001, "loss": 0.013, "step": 55500 }, { "epoch": 365.19736842105266, "grad_norm": 1.1254971027374268, "learning_rate": 0.0001, "loss": 0.0115, "step": 55510 }, { "epoch": 365.2631578947368, "grad_norm": 1.3154945373535156, "learning_rate": 0.0001, "loss": 0.0115, "step": 55520 }, { "epoch": 365.32894736842104, "grad_norm": 0.9752078056335449, "learning_rate": 0.0001, "loss": 0.0127, "step": 55530 }, { "epoch": 365.39473684210526, "grad_norm": 1.297957420349121, "learning_rate": 0.0001, "loss": 0.0126, "step": 55540 }, { "epoch": 365.4605263157895, "grad_norm": 0.6452957391738892, "learning_rate": 0.0001, "loss": 0.0129, "step": 55550 }, { "epoch": 365.5263157894737, "grad_norm": 1.1209876537322998, "learning_rate": 0.0001, "loss": 0.0128, "step": 55560 }, { "epoch": 365.5921052631579, "grad_norm": 1.1463710069656372, "learning_rate": 0.0001, "loss": 0.0116, "step": 55570 }, { "epoch": 365.6578947368421, "grad_norm": 0.9191970825195312, "learning_rate": 0.0001, "loss": 0.0134, "step": 55580 }, { "epoch": 365.7236842105263, "grad_norm": 1.2849448919296265, "learning_rate": 0.0001, "loss": 0.0164, "step": 55590 }, { "epoch": 365.7894736842105, "grad_norm": 1.0626157522201538, "learning_rate": 0.0001, "loss": 0.0141, "step": 55600 }, { "epoch": 365.85526315789474, "grad_norm": 1.1464340686798096, "learning_rate": 0.0001, "loss": 0.0128, "step": 55610 }, { "epoch": 365.92105263157896, "grad_norm": 0.7038142681121826, "learning_rate": 0.0001, "loss": 0.012, "step": 55620 }, { "epoch": 365.9868421052632, "grad_norm": 1.4112557172775269, "learning_rate": 0.0001, "loss": 0.0139, "step": 55630 }, { "epoch": 366.05263157894734, "grad_norm": 1.230036735534668, "learning_rate": 0.0001, "loss": 0.0147, "step": 55640 }, { "epoch": 366.11842105263156, "grad_norm": 0.9362917542457581, "learning_rate": 0.0001, "loss": 0.0108, "step": 55650 }, { "epoch": 366.1842105263158, "grad_norm": 1.2693713903427124, "learning_rate": 0.0001, "loss": 0.0141, "step": 55660 }, { "epoch": 366.25, "grad_norm": 1.3503483533859253, "learning_rate": 0.0001, "loss": 0.0152, "step": 55670 }, { "epoch": 366.3157894736842, "grad_norm": 1.1417988538742065, "learning_rate": 0.0001, "loss": 0.0144, "step": 55680 }, { "epoch": 366.38157894736844, "grad_norm": 1.155261516571045, "learning_rate": 0.0001, "loss": 0.0131, "step": 55690 }, { "epoch": 366.44736842105266, "grad_norm": 1.2800606489181519, "learning_rate": 0.0001, "loss": 0.0134, "step": 55700 }, { "epoch": 366.5131578947368, "grad_norm": 1.3735575675964355, "learning_rate": 0.0001, "loss": 0.0123, "step": 55710 }, { "epoch": 366.57894736842104, "grad_norm": 1.050938367843628, "learning_rate": 0.0001, "loss": 0.0121, "step": 55720 }, { "epoch": 366.64473684210526, "grad_norm": 0.9390007257461548, "learning_rate": 0.0001, "loss": 0.0119, "step": 55730 }, { "epoch": 366.7105263157895, "grad_norm": 1.1883047819137573, "learning_rate": 0.0001, "loss": 0.0121, "step": 55740 }, { "epoch": 366.7763157894737, "grad_norm": 1.5340079069137573, "learning_rate": 0.0001, "loss": 0.0109, "step": 55750 }, { "epoch": 366.8421052631579, "grad_norm": 1.2325025796890259, "learning_rate": 0.0001, "loss": 0.0131, "step": 55760 }, { "epoch": 366.9078947368421, "grad_norm": 1.1925488710403442, "learning_rate": 0.0001, "loss": 0.0143, "step": 55770 }, { "epoch": 366.9736842105263, "grad_norm": 1.3321561813354492, "learning_rate": 0.0001, "loss": 0.012, "step": 55780 }, { "epoch": 367.0394736842105, "grad_norm": 1.380933165550232, "learning_rate": 0.0001, "loss": 0.0124, "step": 55790 }, { "epoch": 367.10526315789474, "grad_norm": 1.200136423110962, "learning_rate": 0.0001, "loss": 0.0137, "step": 55800 }, { "epoch": 367.17105263157896, "grad_norm": 0.8746941685676575, "learning_rate": 0.0001, "loss": 0.0118, "step": 55810 }, { "epoch": 367.2368421052632, "grad_norm": 1.1686458587646484, "learning_rate": 0.0001, "loss": 0.0131, "step": 55820 }, { "epoch": 367.30263157894734, "grad_norm": 0.9047589302062988, "learning_rate": 0.0001, "loss": 0.0137, "step": 55830 }, { "epoch": 367.36842105263156, "grad_norm": 1.1924694776535034, "learning_rate": 0.0001, "loss": 0.0123, "step": 55840 }, { "epoch": 367.4342105263158, "grad_norm": 1.4154642820358276, "learning_rate": 0.0001, "loss": 0.0137, "step": 55850 }, { "epoch": 367.5, "grad_norm": 1.3152343034744263, "learning_rate": 0.0001, "loss": 0.0147, "step": 55860 }, { "epoch": 367.5657894736842, "grad_norm": 1.2152726650238037, "learning_rate": 0.0001, "loss": 0.0113, "step": 55870 }, { "epoch": 367.63157894736844, "grad_norm": 1.0928616523742676, "learning_rate": 0.0001, "loss": 0.0115, "step": 55880 }, { "epoch": 367.69736842105266, "grad_norm": 1.3509669303894043, "learning_rate": 0.0001, "loss": 0.0124, "step": 55890 }, { "epoch": 367.7631578947368, "grad_norm": 1.1209146976470947, "learning_rate": 0.0001, "loss": 0.0141, "step": 55900 }, { "epoch": 367.82894736842104, "grad_norm": 1.1768624782562256, "learning_rate": 0.0001, "loss": 0.0118, "step": 55910 }, { "epoch": 367.89473684210526, "grad_norm": 1.249316930770874, "learning_rate": 0.0001, "loss": 0.0128, "step": 55920 }, { "epoch": 367.9605263157895, "grad_norm": 1.1277846097946167, "learning_rate": 0.0001, "loss": 0.0117, "step": 55930 }, { "epoch": 368.0263157894737, "grad_norm": 1.326911449432373, "learning_rate": 0.0001, "loss": 0.0172, "step": 55940 }, { "epoch": 368.0921052631579, "grad_norm": 1.5124350786209106, "learning_rate": 0.0001, "loss": 0.0139, "step": 55950 }, { "epoch": 368.1578947368421, "grad_norm": 0.921715259552002, "learning_rate": 0.0001, "loss": 0.0128, "step": 55960 }, { "epoch": 368.2236842105263, "grad_norm": 1.4262442588806152, "learning_rate": 0.0001, "loss": 0.0128, "step": 55970 }, { "epoch": 368.2894736842105, "grad_norm": 1.1071768999099731, "learning_rate": 0.0001, "loss": 0.0115, "step": 55980 }, { "epoch": 368.35526315789474, "grad_norm": 1.1342467069625854, "learning_rate": 0.0001, "loss": 0.0131, "step": 55990 }, { "epoch": 368.42105263157896, "grad_norm": 1.218801736831665, "learning_rate": 0.0001, "loss": 0.0137, "step": 56000 }, { "epoch": 368.4868421052632, "grad_norm": 1.243741750717163, "learning_rate": 0.0001, "loss": 0.012, "step": 56010 }, { "epoch": 368.55263157894734, "grad_norm": 1.1212338209152222, "learning_rate": 0.0001, "loss": 0.0121, "step": 56020 }, { "epoch": 368.61842105263156, "grad_norm": 1.3740099668502808, "learning_rate": 0.0001, "loss": 0.013, "step": 56030 }, { "epoch": 368.6842105263158, "grad_norm": 1.2437816858291626, "learning_rate": 0.0001, "loss": 0.0123, "step": 56040 }, { "epoch": 368.75, "grad_norm": 1.3597102165222168, "learning_rate": 0.0001, "loss": 0.012, "step": 56050 }, { "epoch": 368.8157894736842, "grad_norm": 1.264675498008728, "learning_rate": 0.0001, "loss": 0.013, "step": 56060 }, { "epoch": 368.88157894736844, "grad_norm": 1.0519795417785645, "learning_rate": 0.0001, "loss": 0.0131, "step": 56070 }, { "epoch": 368.94736842105266, "grad_norm": 1.288150429725647, "learning_rate": 0.0001, "loss": 0.0121, "step": 56080 }, { "epoch": 369.0131578947368, "grad_norm": 1.3312660455703735, "learning_rate": 0.0001, "loss": 0.0128, "step": 56090 }, { "epoch": 369.07894736842104, "grad_norm": 1.0909931659698486, "learning_rate": 0.0001, "loss": 0.0145, "step": 56100 }, { "epoch": 369.14473684210526, "grad_norm": 1.1233609914779663, "learning_rate": 0.0001, "loss": 0.0142, "step": 56110 }, { "epoch": 369.2105263157895, "grad_norm": 1.1134326457977295, "learning_rate": 0.0001, "loss": 0.0135, "step": 56120 }, { "epoch": 369.2763157894737, "grad_norm": 1.4635008573532104, "learning_rate": 0.0001, "loss": 0.0116, "step": 56130 }, { "epoch": 369.3421052631579, "grad_norm": 1.5184545516967773, "learning_rate": 0.0001, "loss": 0.0123, "step": 56140 }, { "epoch": 369.4078947368421, "grad_norm": 1.1048532724380493, "learning_rate": 0.0001, "loss": 0.0124, "step": 56150 }, { "epoch": 369.4736842105263, "grad_norm": 1.1498737335205078, "learning_rate": 0.0001, "loss": 0.0116, "step": 56160 }, { "epoch": 369.5394736842105, "grad_norm": 1.2778600454330444, "learning_rate": 0.0001, "loss": 0.0116, "step": 56170 }, { "epoch": 369.60526315789474, "grad_norm": 1.0318385362625122, "learning_rate": 0.0001, "loss": 0.0114, "step": 56180 }, { "epoch": 369.67105263157896, "grad_norm": 0.9718679785728455, "learning_rate": 0.0001, "loss": 0.0121, "step": 56190 }, { "epoch": 369.7368421052632, "grad_norm": 1.2061275243759155, "learning_rate": 0.0001, "loss": 0.0147, "step": 56200 }, { "epoch": 369.80263157894734, "grad_norm": 1.2633416652679443, "learning_rate": 0.0001, "loss": 0.0131, "step": 56210 }, { "epoch": 369.86842105263156, "grad_norm": 1.3707966804504395, "learning_rate": 0.0001, "loss": 0.0119, "step": 56220 }, { "epoch": 369.9342105263158, "grad_norm": 1.2290523052215576, "learning_rate": 0.0001, "loss": 0.0147, "step": 56230 }, { "epoch": 370.0, "grad_norm": 0.9901204109191895, "learning_rate": 0.0001, "loss": 0.013, "step": 56240 }, { "epoch": 370.0657894736842, "grad_norm": 1.1951624155044556, "learning_rate": 0.0001, "loss": 0.0129, "step": 56250 }, { "epoch": 370.13157894736844, "grad_norm": 1.1654387712478638, "learning_rate": 0.0001, "loss": 0.0104, "step": 56260 }, { "epoch": 370.19736842105266, "grad_norm": 1.5832988023757935, "learning_rate": 0.0001, "loss": 0.0145, "step": 56270 }, { "epoch": 370.2631578947368, "grad_norm": 1.0225611925125122, "learning_rate": 0.0001, "loss": 0.0124, "step": 56280 }, { "epoch": 370.32894736842104, "grad_norm": 1.5608348846435547, "learning_rate": 0.0001, "loss": 0.0141, "step": 56290 }, { "epoch": 370.39473684210526, "grad_norm": 1.4639538526535034, "learning_rate": 0.0001, "loss": 0.014, "step": 56300 }, { "epoch": 370.4605263157895, "grad_norm": 1.4396363496780396, "learning_rate": 0.0001, "loss": 0.0118, "step": 56310 }, { "epoch": 370.5263157894737, "grad_norm": 0.938585638999939, "learning_rate": 0.0001, "loss": 0.0144, "step": 56320 }, { "epoch": 370.5921052631579, "grad_norm": 1.232274055480957, "learning_rate": 0.0001, "loss": 0.0105, "step": 56330 }, { "epoch": 370.6578947368421, "grad_norm": 1.1240586042404175, "learning_rate": 0.0001, "loss": 0.0115, "step": 56340 }, { "epoch": 370.7236842105263, "grad_norm": 1.164602279663086, "learning_rate": 0.0001, "loss": 0.0119, "step": 56350 }, { "epoch": 370.7894736842105, "grad_norm": 1.4600279331207275, "learning_rate": 0.0001, "loss": 0.0142, "step": 56360 }, { "epoch": 370.85526315789474, "grad_norm": 1.1497042179107666, "learning_rate": 0.0001, "loss": 0.014, "step": 56370 }, { "epoch": 370.92105263157896, "grad_norm": 1.1283025741577148, "learning_rate": 0.0001, "loss": 0.0121, "step": 56380 }, { "epoch": 370.9868421052632, "grad_norm": 1.196718454360962, "learning_rate": 0.0001, "loss": 0.0129, "step": 56390 }, { "epoch": 371.05263157894734, "grad_norm": 0.8738747239112854, "learning_rate": 0.0001, "loss": 0.012, "step": 56400 }, { "epoch": 371.11842105263156, "grad_norm": 1.2236226797103882, "learning_rate": 0.0001, "loss": 0.0122, "step": 56410 }, { "epoch": 371.1842105263158, "grad_norm": 1.3389105796813965, "learning_rate": 0.0001, "loss": 0.0121, "step": 56420 }, { "epoch": 371.25, "grad_norm": 1.1783345937728882, "learning_rate": 0.0001, "loss": 0.0121, "step": 56430 }, { "epoch": 371.3157894736842, "grad_norm": 1.1184672117233276, "learning_rate": 0.0001, "loss": 0.0131, "step": 56440 }, { "epoch": 371.38157894736844, "grad_norm": 1.3328872919082642, "learning_rate": 0.0001, "loss": 0.0122, "step": 56450 }, { "epoch": 371.44736842105266, "grad_norm": 1.3304328918457031, "learning_rate": 0.0001, "loss": 0.0119, "step": 56460 }, { "epoch": 371.5131578947368, "grad_norm": 1.4431946277618408, "learning_rate": 0.0001, "loss": 0.0116, "step": 56470 }, { "epoch": 371.57894736842104, "grad_norm": 1.3655774593353271, "learning_rate": 0.0001, "loss": 0.013, "step": 56480 }, { "epoch": 371.64473684210526, "grad_norm": 0.9467471837997437, "learning_rate": 0.0001, "loss": 0.012, "step": 56490 }, { "epoch": 371.7105263157895, "grad_norm": 1.276974081993103, "learning_rate": 0.0001, "loss": 0.0157, "step": 56500 }, { "epoch": 371.7763157894737, "grad_norm": 1.1539723873138428, "learning_rate": 0.0001, "loss": 0.0119, "step": 56510 }, { "epoch": 371.8421052631579, "grad_norm": 1.0478674173355103, "learning_rate": 0.0001, "loss": 0.0142, "step": 56520 }, { "epoch": 371.9078947368421, "grad_norm": 1.2943617105484009, "learning_rate": 0.0001, "loss": 0.014, "step": 56530 }, { "epoch": 371.9736842105263, "grad_norm": 1.0241856575012207, "learning_rate": 0.0001, "loss": 0.0112, "step": 56540 }, { "epoch": 372.0394736842105, "grad_norm": 1.068509817123413, "learning_rate": 0.0001, "loss": 0.0138, "step": 56550 }, { "epoch": 372.10526315789474, "grad_norm": 1.422802209854126, "learning_rate": 0.0001, "loss": 0.0145, "step": 56560 }, { "epoch": 372.17105263157896, "grad_norm": 1.3704060316085815, "learning_rate": 0.0001, "loss": 0.0114, "step": 56570 }, { "epoch": 372.2368421052632, "grad_norm": 1.3845621347427368, "learning_rate": 0.0001, "loss": 0.0114, "step": 56580 }, { "epoch": 372.30263157894734, "grad_norm": 1.221064805984497, "learning_rate": 0.0001, "loss": 0.0122, "step": 56590 }, { "epoch": 372.36842105263156, "grad_norm": 0.9670616388320923, "learning_rate": 0.0001, "loss": 0.0103, "step": 56600 }, { "epoch": 372.4342105263158, "grad_norm": 1.3151499032974243, "learning_rate": 0.0001, "loss": 0.0127, "step": 56610 }, { "epoch": 372.5, "grad_norm": 1.210457682609558, "learning_rate": 0.0001, "loss": 0.0117, "step": 56620 }, { "epoch": 372.5657894736842, "grad_norm": 1.1957846879959106, "learning_rate": 0.0001, "loss": 0.0116, "step": 56630 }, { "epoch": 372.63157894736844, "grad_norm": 0.8912261724472046, "learning_rate": 0.0001, "loss": 0.0133, "step": 56640 }, { "epoch": 372.69736842105266, "grad_norm": 1.1011767387390137, "learning_rate": 0.0001, "loss": 0.0131, "step": 56650 }, { "epoch": 372.7631578947368, "grad_norm": 1.0120614767074585, "learning_rate": 0.0001, "loss": 0.0138, "step": 56660 }, { "epoch": 372.82894736842104, "grad_norm": 1.5126779079437256, "learning_rate": 0.0001, "loss": 0.0132, "step": 56670 }, { "epoch": 372.89473684210526, "grad_norm": 1.7110366821289062, "learning_rate": 0.0001, "loss": 0.0144, "step": 56680 }, { "epoch": 372.9605263157895, "grad_norm": 1.3137553930282593, "learning_rate": 0.0001, "loss": 0.0126, "step": 56690 }, { "epoch": 373.0263157894737, "grad_norm": 1.3267024755477905, "learning_rate": 0.0001, "loss": 0.013, "step": 56700 }, { "epoch": 373.0921052631579, "grad_norm": 1.0713841915130615, "learning_rate": 0.0001, "loss": 0.0132, "step": 56710 }, { "epoch": 373.1578947368421, "grad_norm": 1.2255971431732178, "learning_rate": 0.0001, "loss": 0.0128, "step": 56720 }, { "epoch": 373.2236842105263, "grad_norm": 1.1243081092834473, "learning_rate": 0.0001, "loss": 0.0136, "step": 56730 }, { "epoch": 373.2894736842105, "grad_norm": 0.909152090549469, "learning_rate": 0.0001, "loss": 0.012, "step": 56740 }, { "epoch": 373.35526315789474, "grad_norm": 0.9241997003555298, "learning_rate": 0.0001, "loss": 0.0119, "step": 56750 }, { "epoch": 373.42105263157896, "grad_norm": 0.7907554507255554, "learning_rate": 0.0001, "loss": 0.0127, "step": 56760 }, { "epoch": 373.4868421052632, "grad_norm": 1.4424355030059814, "learning_rate": 0.0001, "loss": 0.0126, "step": 56770 }, { "epoch": 373.55263157894734, "grad_norm": 0.7469786405563354, "learning_rate": 0.0001, "loss": 0.0134, "step": 56780 }, { "epoch": 373.61842105263156, "grad_norm": 1.0877963304519653, "learning_rate": 0.0001, "loss": 0.0119, "step": 56790 }, { "epoch": 373.6842105263158, "grad_norm": 1.020845890045166, "learning_rate": 0.0001, "loss": 0.0131, "step": 56800 }, { "epoch": 373.75, "grad_norm": 0.8404970765113831, "learning_rate": 0.0001, "loss": 0.013, "step": 56810 }, { "epoch": 373.8157894736842, "grad_norm": 1.032677412033081, "learning_rate": 0.0001, "loss": 0.0143, "step": 56820 }, { "epoch": 373.88157894736844, "grad_norm": 1.0752617120742798, "learning_rate": 0.0001, "loss": 0.0133, "step": 56830 }, { "epoch": 373.94736842105266, "grad_norm": 0.9850388169288635, "learning_rate": 0.0001, "loss": 0.0138, "step": 56840 }, { "epoch": 374.0131578947368, "grad_norm": 0.8292937874794006, "learning_rate": 0.0001, "loss": 0.0121, "step": 56850 }, { "epoch": 374.07894736842104, "grad_norm": 1.0605348348617554, "learning_rate": 0.0001, "loss": 0.0138, "step": 56860 }, { "epoch": 374.14473684210526, "grad_norm": 1.2353730201721191, "learning_rate": 0.0001, "loss": 0.0136, "step": 56870 }, { "epoch": 374.2105263157895, "grad_norm": 0.9276547431945801, "learning_rate": 0.0001, "loss": 0.014, "step": 56880 }, { "epoch": 374.2763157894737, "grad_norm": 1.3930708169937134, "learning_rate": 0.0001, "loss": 0.0119, "step": 56890 }, { "epoch": 374.3421052631579, "grad_norm": 1.2086683511734009, "learning_rate": 0.0001, "loss": 0.0122, "step": 56900 }, { "epoch": 374.4078947368421, "grad_norm": 1.2379398345947266, "learning_rate": 0.0001, "loss": 0.0131, "step": 56910 }, { "epoch": 374.4736842105263, "grad_norm": 1.1630572080612183, "learning_rate": 0.0001, "loss": 0.0123, "step": 56920 }, { "epoch": 374.5394736842105, "grad_norm": 1.2085312604904175, "learning_rate": 0.0001, "loss": 0.0144, "step": 56930 }, { "epoch": 374.60526315789474, "grad_norm": 1.1504122018814087, "learning_rate": 0.0001, "loss": 0.0125, "step": 56940 }, { "epoch": 374.67105263157896, "grad_norm": 1.1568752527236938, "learning_rate": 0.0001, "loss": 0.0129, "step": 56950 }, { "epoch": 374.7368421052632, "grad_norm": 1.122700572013855, "learning_rate": 0.0001, "loss": 0.0148, "step": 56960 }, { "epoch": 374.80263157894734, "grad_norm": 0.869761049747467, "learning_rate": 0.0001, "loss": 0.0122, "step": 56970 }, { "epoch": 374.86842105263156, "grad_norm": 1.219969630241394, "learning_rate": 0.0001, "loss": 0.0132, "step": 56980 }, { "epoch": 374.9342105263158, "grad_norm": 1.0810168981552124, "learning_rate": 0.0001, "loss": 0.0128, "step": 56990 }, { "epoch": 375.0, "grad_norm": 0.8487389087677002, "learning_rate": 0.0001, "loss": 0.0135, "step": 57000 }, { "epoch": 375.0657894736842, "grad_norm": 1.3084897994995117, "learning_rate": 0.0001, "loss": 0.0129, "step": 57010 }, { "epoch": 375.13157894736844, "grad_norm": 1.0866003036499023, "learning_rate": 0.0001, "loss": 0.0138, "step": 57020 }, { "epoch": 375.19736842105266, "grad_norm": 1.215729832649231, "learning_rate": 0.0001, "loss": 0.0123, "step": 57030 }, { "epoch": 375.2631578947368, "grad_norm": 1.321714162826538, "learning_rate": 0.0001, "loss": 0.0123, "step": 57040 }, { "epoch": 375.32894736842104, "grad_norm": 1.21835458278656, "learning_rate": 0.0001, "loss": 0.0118, "step": 57050 }, { "epoch": 375.39473684210526, "grad_norm": 1.352271318435669, "learning_rate": 0.0001, "loss": 0.0119, "step": 57060 }, { "epoch": 375.4605263157895, "grad_norm": 1.1468392610549927, "learning_rate": 0.0001, "loss": 0.0141, "step": 57070 }, { "epoch": 375.5263157894737, "grad_norm": 0.9708956480026245, "learning_rate": 0.0001, "loss": 0.0124, "step": 57080 }, { "epoch": 375.5921052631579, "grad_norm": 1.2509098052978516, "learning_rate": 0.0001, "loss": 0.0132, "step": 57090 }, { "epoch": 375.6578947368421, "grad_norm": 0.8894394636154175, "learning_rate": 0.0001, "loss": 0.0132, "step": 57100 }, { "epoch": 375.7236842105263, "grad_norm": 1.4251313209533691, "learning_rate": 0.0001, "loss": 0.0133, "step": 57110 }, { "epoch": 375.7894736842105, "grad_norm": 1.3613892793655396, "learning_rate": 0.0001, "loss": 0.0121, "step": 57120 }, { "epoch": 375.85526315789474, "grad_norm": 1.2195764780044556, "learning_rate": 0.0001, "loss": 0.0144, "step": 57130 }, { "epoch": 375.92105263157896, "grad_norm": 1.2804150581359863, "learning_rate": 0.0001, "loss": 0.0134, "step": 57140 }, { "epoch": 375.9868421052632, "grad_norm": 1.2926859855651855, "learning_rate": 0.0001, "loss": 0.013, "step": 57150 }, { "epoch": 376.05263157894734, "grad_norm": 1.2950412034988403, "learning_rate": 0.0001, "loss": 0.0119, "step": 57160 }, { "epoch": 376.11842105263156, "grad_norm": 1.122531771659851, "learning_rate": 0.0001, "loss": 0.0114, "step": 57170 }, { "epoch": 376.1842105263158, "grad_norm": 1.0365455150604248, "learning_rate": 0.0001, "loss": 0.0159, "step": 57180 }, { "epoch": 376.25, "grad_norm": 0.9025917649269104, "learning_rate": 0.0001, "loss": 0.012, "step": 57190 }, { "epoch": 376.3157894736842, "grad_norm": 1.360355257987976, "learning_rate": 0.0001, "loss": 0.0139, "step": 57200 }, { "epoch": 376.38157894736844, "grad_norm": 1.0496774911880493, "learning_rate": 0.0001, "loss": 0.0132, "step": 57210 }, { "epoch": 376.44736842105266, "grad_norm": 0.9033643007278442, "learning_rate": 0.0001, "loss": 0.0144, "step": 57220 }, { "epoch": 376.5131578947368, "grad_norm": 1.0611474514007568, "learning_rate": 0.0001, "loss": 0.0129, "step": 57230 }, { "epoch": 376.57894736842104, "grad_norm": 1.022553563117981, "learning_rate": 0.0001, "loss": 0.0125, "step": 57240 }, { "epoch": 376.64473684210526, "grad_norm": 1.3348984718322754, "learning_rate": 0.0001, "loss": 0.0123, "step": 57250 }, { "epoch": 376.7105263157895, "grad_norm": 1.362504005432129, "learning_rate": 0.0001, "loss": 0.0134, "step": 57260 }, { "epoch": 376.7763157894737, "grad_norm": 1.2323849201202393, "learning_rate": 0.0001, "loss": 0.0145, "step": 57270 }, { "epoch": 376.8421052631579, "grad_norm": 1.3414735794067383, "learning_rate": 0.0001, "loss": 0.0109, "step": 57280 }, { "epoch": 376.9078947368421, "grad_norm": 1.2798577547073364, "learning_rate": 0.0001, "loss": 0.0138, "step": 57290 }, { "epoch": 376.9736842105263, "grad_norm": 0.9005674719810486, "learning_rate": 0.0001, "loss": 0.0126, "step": 57300 }, { "epoch": 377.0394736842105, "grad_norm": 1.0160363912582397, "learning_rate": 0.0001, "loss": 0.0119, "step": 57310 }, { "epoch": 377.10526315789474, "grad_norm": 1.1084673404693604, "learning_rate": 0.0001, "loss": 0.0116, "step": 57320 }, { "epoch": 377.17105263157896, "grad_norm": 1.4065401554107666, "learning_rate": 0.0001, "loss": 0.0132, "step": 57330 }, { "epoch": 377.2368421052632, "grad_norm": 1.1100188493728638, "learning_rate": 0.0001, "loss": 0.0126, "step": 57340 }, { "epoch": 377.30263157894734, "grad_norm": 0.9800202250480652, "learning_rate": 0.0001, "loss": 0.0117, "step": 57350 }, { "epoch": 377.36842105263156, "grad_norm": 1.887536644935608, "learning_rate": 0.0001, "loss": 0.0136, "step": 57360 }, { "epoch": 377.4342105263158, "grad_norm": 1.2453606128692627, "learning_rate": 0.0001, "loss": 0.0164, "step": 57370 }, { "epoch": 377.5, "grad_norm": 1.2225714921951294, "learning_rate": 0.0001, "loss": 0.0132, "step": 57380 }, { "epoch": 377.5657894736842, "grad_norm": 1.2892736196517944, "learning_rate": 0.0001, "loss": 0.0127, "step": 57390 }, { "epoch": 377.63157894736844, "grad_norm": 1.242989420890808, "learning_rate": 0.0001, "loss": 0.0119, "step": 57400 }, { "epoch": 377.69736842105266, "grad_norm": 1.3066517114639282, "learning_rate": 0.0001, "loss": 0.012, "step": 57410 }, { "epoch": 377.7631578947368, "grad_norm": 1.126461386680603, "learning_rate": 0.0001, "loss": 0.0121, "step": 57420 }, { "epoch": 377.82894736842104, "grad_norm": 1.0878877639770508, "learning_rate": 0.0001, "loss": 0.0124, "step": 57430 }, { "epoch": 377.89473684210526, "grad_norm": 1.2524346113204956, "learning_rate": 0.0001, "loss": 0.0128, "step": 57440 }, { "epoch": 377.9605263157895, "grad_norm": 1.3451015949249268, "learning_rate": 0.0001, "loss": 0.0126, "step": 57450 }, { "epoch": 378.0263157894737, "grad_norm": 1.1526377201080322, "learning_rate": 0.0001, "loss": 0.0125, "step": 57460 }, { "epoch": 378.0921052631579, "grad_norm": 0.9030545353889465, "learning_rate": 0.0001, "loss": 0.0138, "step": 57470 }, { "epoch": 378.1578947368421, "grad_norm": 0.7027451992034912, "learning_rate": 0.0001, "loss": 0.0113, "step": 57480 }, { "epoch": 378.2236842105263, "grad_norm": 0.7617095708847046, "learning_rate": 0.0001, "loss": 0.012, "step": 57490 }, { "epoch": 378.2894736842105, "grad_norm": 1.2447706460952759, "learning_rate": 0.0001, "loss": 0.0124, "step": 57500 }, { "epoch": 378.35526315789474, "grad_norm": 1.3532993793487549, "learning_rate": 0.0001, "loss": 0.0127, "step": 57510 }, { "epoch": 378.42105263157896, "grad_norm": 1.154651165008545, "learning_rate": 0.0001, "loss": 0.0128, "step": 57520 }, { "epoch": 378.4868421052632, "grad_norm": 1.0880413055419922, "learning_rate": 0.0001, "loss": 0.0124, "step": 57530 }, { "epoch": 378.55263157894734, "grad_norm": 0.9911805987358093, "learning_rate": 0.0001, "loss": 0.0151, "step": 57540 }, { "epoch": 378.61842105263156, "grad_norm": 1.3125237226486206, "learning_rate": 0.0001, "loss": 0.0131, "step": 57550 }, { "epoch": 378.6842105263158, "grad_norm": 1.2095872163772583, "learning_rate": 0.0001, "loss": 0.0117, "step": 57560 }, { "epoch": 378.75, "grad_norm": 1.300068736076355, "learning_rate": 0.0001, "loss": 0.0149, "step": 57570 }, { "epoch": 378.8157894736842, "grad_norm": 0.9355725049972534, "learning_rate": 0.0001, "loss": 0.011, "step": 57580 }, { "epoch": 378.88157894736844, "grad_norm": 1.0073492527008057, "learning_rate": 0.0001, "loss": 0.0121, "step": 57590 }, { "epoch": 378.94736842105266, "grad_norm": 1.3095647096633911, "learning_rate": 0.0001, "loss": 0.0122, "step": 57600 }, { "epoch": 379.0131578947368, "grad_norm": 1.3785039186477661, "learning_rate": 0.0001, "loss": 0.0133, "step": 57610 }, { "epoch": 379.07894736842104, "grad_norm": 1.2601360082626343, "learning_rate": 0.0001, "loss": 0.011, "step": 57620 }, { "epoch": 379.14473684210526, "grad_norm": 1.2998868227005005, "learning_rate": 0.0001, "loss": 0.0112, "step": 57630 }, { "epoch": 379.2105263157895, "grad_norm": 1.2391390800476074, "learning_rate": 0.0001, "loss": 0.0139, "step": 57640 }, { "epoch": 379.2763157894737, "grad_norm": 1.320417046546936, "learning_rate": 0.0001, "loss": 0.0135, "step": 57650 }, { "epoch": 379.3421052631579, "grad_norm": 1.2020084857940674, "learning_rate": 0.0001, "loss": 0.015, "step": 57660 }, { "epoch": 379.4078947368421, "grad_norm": 1.2088592052459717, "learning_rate": 0.0001, "loss": 0.0129, "step": 57670 }, { "epoch": 379.4736842105263, "grad_norm": 0.9938647747039795, "learning_rate": 0.0001, "loss": 0.0127, "step": 57680 }, { "epoch": 379.5394736842105, "grad_norm": 1.057723879814148, "learning_rate": 0.0001, "loss": 0.0124, "step": 57690 }, { "epoch": 379.60526315789474, "grad_norm": 0.9933873414993286, "learning_rate": 0.0001, "loss": 0.0119, "step": 57700 }, { "epoch": 379.67105263157896, "grad_norm": 0.7416632771492004, "learning_rate": 0.0001, "loss": 0.0132, "step": 57710 }, { "epoch": 379.7368421052632, "grad_norm": 1.292233943939209, "learning_rate": 0.0001, "loss": 0.0147, "step": 57720 }, { "epoch": 379.80263157894734, "grad_norm": 1.1518487930297852, "learning_rate": 0.0001, "loss": 0.0124, "step": 57730 }, { "epoch": 379.86842105263156, "grad_norm": 1.1070126295089722, "learning_rate": 0.0001, "loss": 0.014, "step": 57740 }, { "epoch": 379.9342105263158, "grad_norm": 0.9124812483787537, "learning_rate": 0.0001, "loss": 0.012, "step": 57750 }, { "epoch": 380.0, "grad_norm": 1.2951626777648926, "learning_rate": 0.0001, "loss": 0.0124, "step": 57760 }, { "epoch": 380.0657894736842, "grad_norm": 1.2988245487213135, "learning_rate": 0.0001, "loss": 0.0114, "step": 57770 }, { "epoch": 380.13157894736844, "grad_norm": 1.5885379314422607, "learning_rate": 0.0001, "loss": 0.0128, "step": 57780 }, { "epoch": 380.19736842105266, "grad_norm": 1.343135118484497, "learning_rate": 0.0001, "loss": 0.012, "step": 57790 }, { "epoch": 380.2631578947368, "grad_norm": 1.0885659456253052, "learning_rate": 0.0001, "loss": 0.014, "step": 57800 }, { "epoch": 380.32894736842104, "grad_norm": 1.7573096752166748, "learning_rate": 0.0001, "loss": 0.0119, "step": 57810 }, { "epoch": 380.39473684210526, "grad_norm": 1.4243195056915283, "learning_rate": 0.0001, "loss": 0.0119, "step": 57820 }, { "epoch": 380.4605263157895, "grad_norm": 1.1941052675247192, "learning_rate": 0.0001, "loss": 0.013, "step": 57830 }, { "epoch": 380.5263157894737, "grad_norm": 1.3269431591033936, "learning_rate": 0.0001, "loss": 0.0126, "step": 57840 }, { "epoch": 380.5921052631579, "grad_norm": 1.0225571393966675, "learning_rate": 0.0001, "loss": 0.0131, "step": 57850 }, { "epoch": 380.6578947368421, "grad_norm": 1.412798523902893, "learning_rate": 0.0001, "loss": 0.013, "step": 57860 }, { "epoch": 380.7236842105263, "grad_norm": 1.382020354270935, "learning_rate": 0.0001, "loss": 0.0124, "step": 57870 }, { "epoch": 380.7894736842105, "grad_norm": 1.5677051544189453, "learning_rate": 0.0001, "loss": 0.0134, "step": 57880 }, { "epoch": 380.85526315789474, "grad_norm": 1.2945635318756104, "learning_rate": 0.0001, "loss": 0.0135, "step": 57890 }, { "epoch": 380.92105263157896, "grad_norm": 1.3230671882629395, "learning_rate": 0.0001, "loss": 0.012, "step": 57900 }, { "epoch": 380.9868421052632, "grad_norm": 1.1088383197784424, "learning_rate": 0.0001, "loss": 0.0139, "step": 57910 }, { "epoch": 381.05263157894734, "grad_norm": 1.479811668395996, "learning_rate": 0.0001, "loss": 0.0129, "step": 57920 }, { "epoch": 381.11842105263156, "grad_norm": 1.2625958919525146, "learning_rate": 0.0001, "loss": 0.0122, "step": 57930 }, { "epoch": 381.1842105263158, "grad_norm": 1.2790255546569824, "learning_rate": 0.0001, "loss": 0.0128, "step": 57940 }, { "epoch": 381.25, "grad_norm": 1.589355707168579, "learning_rate": 0.0001, "loss": 0.0116, "step": 57950 }, { "epoch": 381.3157894736842, "grad_norm": 1.3272757530212402, "learning_rate": 0.0001, "loss": 0.0114, "step": 57960 }, { "epoch": 381.38157894736844, "grad_norm": 0.8817294239997864, "learning_rate": 0.0001, "loss": 0.0132, "step": 57970 }, { "epoch": 381.44736842105266, "grad_norm": 1.2385287284851074, "learning_rate": 0.0001, "loss": 0.0113, "step": 57980 }, { "epoch": 381.5131578947368, "grad_norm": 1.324324131011963, "learning_rate": 0.0001, "loss": 0.0146, "step": 57990 }, { "epoch": 381.57894736842104, "grad_norm": 1.4512399435043335, "learning_rate": 0.0001, "loss": 0.0149, "step": 58000 }, { "epoch": 381.64473684210526, "grad_norm": 1.277093529701233, "learning_rate": 0.0001, "loss": 0.0133, "step": 58010 }, { "epoch": 381.7105263157895, "grad_norm": 1.15549635887146, "learning_rate": 0.0001, "loss": 0.0121, "step": 58020 }, { "epoch": 381.7763157894737, "grad_norm": 0.7976158261299133, "learning_rate": 0.0001, "loss": 0.0132, "step": 58030 }, { "epoch": 381.8421052631579, "grad_norm": 1.189330816268921, "learning_rate": 0.0001, "loss": 0.0142, "step": 58040 }, { "epoch": 381.9078947368421, "grad_norm": 1.5687916278839111, "learning_rate": 0.0001, "loss": 0.0133, "step": 58050 }, { "epoch": 381.9736842105263, "grad_norm": 1.33320152759552, "learning_rate": 0.0001, "loss": 0.0108, "step": 58060 }, { "epoch": 382.0394736842105, "grad_norm": 1.0838332176208496, "learning_rate": 0.0001, "loss": 0.0123, "step": 58070 }, { "epoch": 382.10526315789474, "grad_norm": 1.0593252182006836, "learning_rate": 0.0001, "loss": 0.0142, "step": 58080 }, { "epoch": 382.17105263157896, "grad_norm": 1.1587085723876953, "learning_rate": 0.0001, "loss": 0.0132, "step": 58090 }, { "epoch": 382.2368421052632, "grad_norm": 1.2897980213165283, "learning_rate": 0.0001, "loss": 0.0124, "step": 58100 }, { "epoch": 382.30263157894734, "grad_norm": 1.4974095821380615, "learning_rate": 0.0001, "loss": 0.0126, "step": 58110 }, { "epoch": 382.36842105263156, "grad_norm": 1.2067298889160156, "learning_rate": 0.0001, "loss": 0.0134, "step": 58120 }, { "epoch": 382.4342105263158, "grad_norm": 1.151006817817688, "learning_rate": 0.0001, "loss": 0.0124, "step": 58130 }, { "epoch": 382.5, "grad_norm": 0.9617295861244202, "learning_rate": 0.0001, "loss": 0.0124, "step": 58140 }, { "epoch": 382.5657894736842, "grad_norm": 1.2148723602294922, "learning_rate": 0.0001, "loss": 0.0125, "step": 58150 }, { "epoch": 382.63157894736844, "grad_norm": 0.9488323926925659, "learning_rate": 0.0001, "loss": 0.0126, "step": 58160 }, { "epoch": 382.69736842105266, "grad_norm": 1.2400751113891602, "learning_rate": 0.0001, "loss": 0.0134, "step": 58170 }, { "epoch": 382.7631578947368, "grad_norm": 1.123399019241333, "learning_rate": 0.0001, "loss": 0.0126, "step": 58180 }, { "epoch": 382.82894736842104, "grad_norm": 1.2090132236480713, "learning_rate": 0.0001, "loss": 0.0153, "step": 58190 }, { "epoch": 382.89473684210526, "grad_norm": 1.1165274381637573, "learning_rate": 0.0001, "loss": 0.0132, "step": 58200 }, { "epoch": 382.9605263157895, "grad_norm": 1.304680585861206, "learning_rate": 0.0001, "loss": 0.0118, "step": 58210 }, { "epoch": 383.0263157894737, "grad_norm": 1.3738752603530884, "learning_rate": 0.0001, "loss": 0.0128, "step": 58220 }, { "epoch": 383.0921052631579, "grad_norm": 0.8342259526252747, "learning_rate": 0.0001, "loss": 0.0137, "step": 58230 }, { "epoch": 383.1578947368421, "grad_norm": 1.0694853067398071, "learning_rate": 0.0001, "loss": 0.0121, "step": 58240 }, { "epoch": 383.2236842105263, "grad_norm": 1.22238028049469, "learning_rate": 0.0001, "loss": 0.0112, "step": 58250 }, { "epoch": 383.2894736842105, "grad_norm": 0.9267398715019226, "learning_rate": 0.0001, "loss": 0.0128, "step": 58260 }, { "epoch": 383.35526315789474, "grad_norm": 0.9709849953651428, "learning_rate": 0.0001, "loss": 0.0127, "step": 58270 }, { "epoch": 383.42105263157896, "grad_norm": 1.3545488119125366, "learning_rate": 0.0001, "loss": 0.0122, "step": 58280 }, { "epoch": 383.4868421052632, "grad_norm": 1.2838683128356934, "learning_rate": 0.0001, "loss": 0.0139, "step": 58290 }, { "epoch": 383.55263157894734, "grad_norm": 0.764008641242981, "learning_rate": 0.0001, "loss": 0.014, "step": 58300 }, { "epoch": 383.61842105263156, "grad_norm": 0.821171224117279, "learning_rate": 0.0001, "loss": 0.0137, "step": 58310 }, { "epoch": 383.6842105263158, "grad_norm": 0.8788691759109497, "learning_rate": 0.0001, "loss": 0.0119, "step": 58320 }, { "epoch": 383.75, "grad_norm": 0.9442429542541504, "learning_rate": 0.0001, "loss": 0.015, "step": 58330 }, { "epoch": 383.8157894736842, "grad_norm": 1.0095479488372803, "learning_rate": 0.0001, "loss": 0.0131, "step": 58340 }, { "epoch": 383.88157894736844, "grad_norm": 1.0548217296600342, "learning_rate": 0.0001, "loss": 0.0137, "step": 58350 }, { "epoch": 383.94736842105266, "grad_norm": 0.9758161902427673, "learning_rate": 0.0001, "loss": 0.0128, "step": 58360 }, { "epoch": 384.0131578947368, "grad_norm": 1.2047322988510132, "learning_rate": 0.0001, "loss": 0.0155, "step": 58370 }, { "epoch": 384.07894736842104, "grad_norm": 0.9442251324653625, "learning_rate": 0.0001, "loss": 0.0137, "step": 58380 }, { "epoch": 384.14473684210526, "grad_norm": 1.4745181798934937, "learning_rate": 0.0001, "loss": 0.0142, "step": 58390 }, { "epoch": 384.2105263157895, "grad_norm": 1.0786917209625244, "learning_rate": 0.0001, "loss": 0.014, "step": 58400 }, { "epoch": 384.2763157894737, "grad_norm": 0.8698955774307251, "learning_rate": 0.0001, "loss": 0.0122, "step": 58410 }, { "epoch": 384.3421052631579, "grad_norm": 1.3999569416046143, "learning_rate": 0.0001, "loss": 0.0138, "step": 58420 }, { "epoch": 384.4078947368421, "grad_norm": 1.203365445137024, "learning_rate": 0.0001, "loss": 0.0116, "step": 58430 }, { "epoch": 384.4736842105263, "grad_norm": 1.1867121458053589, "learning_rate": 0.0001, "loss": 0.0118, "step": 58440 }, { "epoch": 384.5394736842105, "grad_norm": 1.7259279489517212, "learning_rate": 0.0001, "loss": 0.0122, "step": 58450 }, { "epoch": 384.60526315789474, "grad_norm": 1.2636997699737549, "learning_rate": 0.0001, "loss": 0.0147, "step": 58460 }, { "epoch": 384.67105263157896, "grad_norm": 1.0157842636108398, "learning_rate": 0.0001, "loss": 0.0117, "step": 58470 }, { "epoch": 384.7368421052632, "grad_norm": 1.3222954273223877, "learning_rate": 0.0001, "loss": 0.0124, "step": 58480 }, { "epoch": 384.80263157894734, "grad_norm": 1.118708610534668, "learning_rate": 0.0001, "loss": 0.0122, "step": 58490 }, { "epoch": 384.86842105263156, "grad_norm": 0.8435554504394531, "learning_rate": 0.0001, "loss": 0.0136, "step": 58500 }, { "epoch": 384.9342105263158, "grad_norm": 0.8175836205482483, "learning_rate": 0.0001, "loss": 0.0134, "step": 58510 }, { "epoch": 385.0, "grad_norm": 1.041205883026123, "learning_rate": 0.0001, "loss": 0.0126, "step": 58520 }, { "epoch": 385.0657894736842, "grad_norm": 0.7904459238052368, "learning_rate": 0.0001, "loss": 0.0124, "step": 58530 }, { "epoch": 385.13157894736844, "grad_norm": 1.1305633783340454, "learning_rate": 0.0001, "loss": 0.013, "step": 58540 }, { "epoch": 385.19736842105266, "grad_norm": 0.7400175333023071, "learning_rate": 0.0001, "loss": 0.0127, "step": 58550 }, { "epoch": 385.2631578947368, "grad_norm": 0.870387077331543, "learning_rate": 0.0001, "loss": 0.0153, "step": 58560 }, { "epoch": 385.32894736842104, "grad_norm": 0.8553933501243591, "learning_rate": 0.0001, "loss": 0.0143, "step": 58570 }, { "epoch": 385.39473684210526, "grad_norm": 0.9809901714324951, "learning_rate": 0.0001, "loss": 0.013, "step": 58580 }, { "epoch": 385.4605263157895, "grad_norm": 0.8871316313743591, "learning_rate": 0.0001, "loss": 0.0124, "step": 58590 }, { "epoch": 385.5263157894737, "grad_norm": 1.1074786186218262, "learning_rate": 0.0001, "loss": 0.015, "step": 58600 }, { "epoch": 385.5921052631579, "grad_norm": 0.7304058074951172, "learning_rate": 0.0001, "loss": 0.0127, "step": 58610 }, { "epoch": 385.6578947368421, "grad_norm": 1.168735384941101, "learning_rate": 0.0001, "loss": 0.0129, "step": 58620 }, { "epoch": 385.7236842105263, "grad_norm": 0.8042843341827393, "learning_rate": 0.0001, "loss": 0.0132, "step": 58630 }, { "epoch": 385.7894736842105, "grad_norm": 0.9500011801719666, "learning_rate": 0.0001, "loss": 0.0129, "step": 58640 }, { "epoch": 385.85526315789474, "grad_norm": 0.9175438284873962, "learning_rate": 0.0001, "loss": 0.014, "step": 58650 }, { "epoch": 385.92105263157896, "grad_norm": 1.302797555923462, "learning_rate": 0.0001, "loss": 0.012, "step": 58660 }, { "epoch": 385.9868421052632, "grad_norm": 1.2630707025527954, "learning_rate": 0.0001, "loss": 0.0118, "step": 58670 }, { "epoch": 386.05263157894734, "grad_norm": 0.7925125360488892, "learning_rate": 0.0001, "loss": 0.0156, "step": 58680 }, { "epoch": 386.11842105263156, "grad_norm": 0.8696709275245667, "learning_rate": 0.0001, "loss": 0.0142, "step": 58690 }, { "epoch": 386.1842105263158, "grad_norm": 1.196243166923523, "learning_rate": 0.0001, "loss": 0.0143, "step": 58700 }, { "epoch": 386.25, "grad_norm": 1.1323686838150024, "learning_rate": 0.0001, "loss": 0.0118, "step": 58710 }, { "epoch": 386.3157894736842, "grad_norm": 0.9869086742401123, "learning_rate": 0.0001, "loss": 0.0122, "step": 58720 }, { "epoch": 386.38157894736844, "grad_norm": 1.3381530046463013, "learning_rate": 0.0001, "loss": 0.0131, "step": 58730 }, { "epoch": 386.44736842105266, "grad_norm": 1.3545961380004883, "learning_rate": 0.0001, "loss": 0.0131, "step": 58740 }, { "epoch": 386.5131578947368, "grad_norm": 1.1433833837509155, "learning_rate": 0.0001, "loss": 0.013, "step": 58750 }, { "epoch": 386.57894736842104, "grad_norm": 1.3282660245895386, "learning_rate": 0.0001, "loss": 0.0122, "step": 58760 }, { "epoch": 386.64473684210526, "grad_norm": 1.518915057182312, "learning_rate": 0.0001, "loss": 0.0121, "step": 58770 }, { "epoch": 386.7105263157895, "grad_norm": 1.1334174871444702, "learning_rate": 0.0001, "loss": 0.0137, "step": 58780 }, { "epoch": 386.7763157894737, "grad_norm": 1.2663590908050537, "learning_rate": 0.0001, "loss": 0.0135, "step": 58790 }, { "epoch": 386.8421052631579, "grad_norm": 1.148506999015808, "learning_rate": 0.0001, "loss": 0.0112, "step": 58800 }, { "epoch": 386.9078947368421, "grad_norm": 1.2444261312484741, "learning_rate": 0.0001, "loss": 0.013, "step": 58810 }, { "epoch": 386.9736842105263, "grad_norm": 1.1038265228271484, "learning_rate": 0.0001, "loss": 0.0151, "step": 58820 }, { "epoch": 387.0394736842105, "grad_norm": 0.9502344131469727, "learning_rate": 0.0001, "loss": 0.0112, "step": 58830 }, { "epoch": 387.10526315789474, "grad_norm": 1.0720306634902954, "learning_rate": 0.0001, "loss": 0.0127, "step": 58840 }, { "epoch": 387.17105263157896, "grad_norm": 1.243300199508667, "learning_rate": 0.0001, "loss": 0.0127, "step": 58850 }, { "epoch": 387.2368421052632, "grad_norm": 0.916761577129364, "learning_rate": 0.0001, "loss": 0.0119, "step": 58860 }, { "epoch": 387.30263157894734, "grad_norm": 1.0160788297653198, "learning_rate": 0.0001, "loss": 0.0116, "step": 58870 }, { "epoch": 387.36842105263156, "grad_norm": 1.1823748350143433, "learning_rate": 0.0001, "loss": 0.0133, "step": 58880 }, { "epoch": 387.4342105263158, "grad_norm": 1.0291345119476318, "learning_rate": 0.0001, "loss": 0.0127, "step": 58890 }, { "epoch": 387.5, "grad_norm": 1.0827511548995972, "learning_rate": 0.0001, "loss": 0.014, "step": 58900 }, { "epoch": 387.5657894736842, "grad_norm": 1.3324905633926392, "learning_rate": 0.0001, "loss": 0.0139, "step": 58910 }, { "epoch": 387.63157894736844, "grad_norm": 1.159676432609558, "learning_rate": 0.0001, "loss": 0.0134, "step": 58920 }, { "epoch": 387.69736842105266, "grad_norm": 1.6756174564361572, "learning_rate": 0.0001, "loss": 0.0123, "step": 58930 }, { "epoch": 387.7631578947368, "grad_norm": 1.63514244556427, "learning_rate": 0.0001, "loss": 0.0136, "step": 58940 }, { "epoch": 387.82894736842104, "grad_norm": 1.3889161348342896, "learning_rate": 0.0001, "loss": 0.0111, "step": 58950 }, { "epoch": 387.89473684210526, "grad_norm": 1.27745521068573, "learning_rate": 0.0001, "loss": 0.0114, "step": 58960 }, { "epoch": 387.9605263157895, "grad_norm": 1.3350027799606323, "learning_rate": 0.0001, "loss": 0.017, "step": 58970 }, { "epoch": 388.0263157894737, "grad_norm": 1.5261503458023071, "learning_rate": 0.0001, "loss": 0.0111, "step": 58980 }, { "epoch": 388.0921052631579, "grad_norm": 1.6746344566345215, "learning_rate": 0.0001, "loss": 0.0122, "step": 58990 }, { "epoch": 388.1578947368421, "grad_norm": 1.0707603693008423, "learning_rate": 0.0001, "loss": 0.0116, "step": 59000 }, { "epoch": 388.2236842105263, "grad_norm": 1.0903775691986084, "learning_rate": 0.0001, "loss": 0.0128, "step": 59010 }, { "epoch": 388.2894736842105, "grad_norm": 1.2945852279663086, "learning_rate": 0.0001, "loss": 0.0118, "step": 59020 }, { "epoch": 388.35526315789474, "grad_norm": 0.8267077803611755, "learning_rate": 0.0001, "loss": 0.0148, "step": 59030 }, { "epoch": 388.42105263157896, "grad_norm": 1.4991341829299927, "learning_rate": 0.0001, "loss": 0.0141, "step": 59040 }, { "epoch": 388.4868421052632, "grad_norm": 1.3253583908081055, "learning_rate": 0.0001, "loss": 0.0137, "step": 59050 }, { "epoch": 388.55263157894734, "grad_norm": 0.9886115789413452, "learning_rate": 0.0001, "loss": 0.0126, "step": 59060 }, { "epoch": 388.61842105263156, "grad_norm": 1.1212751865386963, "learning_rate": 0.0001, "loss": 0.0136, "step": 59070 }, { "epoch": 388.6842105263158, "grad_norm": 0.9368122220039368, "learning_rate": 0.0001, "loss": 0.0117, "step": 59080 }, { "epoch": 388.75, "grad_norm": 1.0069820880889893, "learning_rate": 0.0001, "loss": 0.0126, "step": 59090 }, { "epoch": 388.8157894736842, "grad_norm": 1.3330034017562866, "learning_rate": 0.0001, "loss": 0.0116, "step": 59100 }, { "epoch": 388.88157894736844, "grad_norm": 1.5406800508499146, "learning_rate": 0.0001, "loss": 0.0133, "step": 59110 }, { "epoch": 388.94736842105266, "grad_norm": 1.2882165908813477, "learning_rate": 0.0001, "loss": 0.013, "step": 59120 }, { "epoch": 389.0131578947368, "grad_norm": 1.138337254524231, "learning_rate": 0.0001, "loss": 0.0142, "step": 59130 }, { "epoch": 389.07894736842104, "grad_norm": 1.0221810340881348, "learning_rate": 0.0001, "loss": 0.0119, "step": 59140 }, { "epoch": 389.14473684210526, "grad_norm": 1.281477928161621, "learning_rate": 0.0001, "loss": 0.0129, "step": 59150 }, { "epoch": 389.2105263157895, "grad_norm": 1.1402971744537354, "learning_rate": 0.0001, "loss": 0.0115, "step": 59160 }, { "epoch": 389.2763157894737, "grad_norm": 0.914799153804779, "learning_rate": 0.0001, "loss": 0.0126, "step": 59170 }, { "epoch": 389.3421052631579, "grad_norm": 1.0505526065826416, "learning_rate": 0.0001, "loss": 0.0132, "step": 59180 }, { "epoch": 389.4078947368421, "grad_norm": 1.2654054164886475, "learning_rate": 0.0001, "loss": 0.0112, "step": 59190 }, { "epoch": 389.4736842105263, "grad_norm": 1.0078588724136353, "learning_rate": 0.0001, "loss": 0.0111, "step": 59200 }, { "epoch": 389.5394736842105, "grad_norm": 0.7537126541137695, "learning_rate": 0.0001, "loss": 0.0141, "step": 59210 }, { "epoch": 389.60526315789474, "grad_norm": 1.1669580936431885, "learning_rate": 0.0001, "loss": 0.0116, "step": 59220 }, { "epoch": 389.67105263157896, "grad_norm": 1.2333866357803345, "learning_rate": 0.0001, "loss": 0.0135, "step": 59230 }, { "epoch": 389.7368421052632, "grad_norm": 1.1542506217956543, "learning_rate": 0.0001, "loss": 0.0145, "step": 59240 }, { "epoch": 389.80263157894734, "grad_norm": 1.2886197566986084, "learning_rate": 0.0001, "loss": 0.0154, "step": 59250 }, { "epoch": 389.86842105263156, "grad_norm": 1.0714430809020996, "learning_rate": 0.0001, "loss": 0.0126, "step": 59260 }, { "epoch": 389.9342105263158, "grad_norm": 0.9094032049179077, "learning_rate": 0.0001, "loss": 0.0119, "step": 59270 }, { "epoch": 390.0, "grad_norm": 1.1040136814117432, "learning_rate": 0.0001, "loss": 0.0116, "step": 59280 }, { "epoch": 390.0657894736842, "grad_norm": 0.876976490020752, "learning_rate": 0.0001, "loss": 0.011, "step": 59290 }, { "epoch": 390.13157894736844, "grad_norm": 0.86655193567276, "learning_rate": 0.0001, "loss": 0.0136, "step": 59300 }, { "epoch": 390.19736842105266, "grad_norm": 1.018251657485962, "learning_rate": 0.0001, "loss": 0.013, "step": 59310 }, { "epoch": 390.2631578947368, "grad_norm": 1.1418795585632324, "learning_rate": 0.0001, "loss": 0.0136, "step": 59320 }, { "epoch": 390.32894736842104, "grad_norm": 1.039706826210022, "learning_rate": 0.0001, "loss": 0.0139, "step": 59330 }, { "epoch": 390.39473684210526, "grad_norm": 1.0030986070632935, "learning_rate": 0.0001, "loss": 0.0144, "step": 59340 }, { "epoch": 390.4605263157895, "grad_norm": 1.1898283958435059, "learning_rate": 0.0001, "loss": 0.0132, "step": 59350 }, { "epoch": 390.5263157894737, "grad_norm": 0.8979069590568542, "learning_rate": 0.0001, "loss": 0.0128, "step": 59360 }, { "epoch": 390.5921052631579, "grad_norm": 1.1258528232574463, "learning_rate": 0.0001, "loss": 0.0126, "step": 59370 }, { "epoch": 390.6578947368421, "grad_norm": 1.3766101598739624, "learning_rate": 0.0001, "loss": 0.0118, "step": 59380 }, { "epoch": 390.7236842105263, "grad_norm": 1.3407342433929443, "learning_rate": 0.0001, "loss": 0.0124, "step": 59390 }, { "epoch": 390.7894736842105, "grad_norm": 0.9154465198516846, "learning_rate": 0.0001, "loss": 0.0122, "step": 59400 }, { "epoch": 390.85526315789474, "grad_norm": 1.2275203466415405, "learning_rate": 0.0001, "loss": 0.0142, "step": 59410 }, { "epoch": 390.92105263157896, "grad_norm": 1.105592131614685, "learning_rate": 0.0001, "loss": 0.0115, "step": 59420 }, { "epoch": 390.9868421052632, "grad_norm": 1.2495988607406616, "learning_rate": 0.0001, "loss": 0.0133, "step": 59430 }, { "epoch": 391.05263157894734, "grad_norm": 1.3405605554580688, "learning_rate": 0.0001, "loss": 0.013, "step": 59440 }, { "epoch": 391.11842105263156, "grad_norm": 1.2704845666885376, "learning_rate": 0.0001, "loss": 0.0123, "step": 59450 }, { "epoch": 391.1842105263158, "grad_norm": 1.029582142829895, "learning_rate": 0.0001, "loss": 0.0132, "step": 59460 }, { "epoch": 391.25, "grad_norm": 0.8142033219337463, "learning_rate": 0.0001, "loss": 0.0119, "step": 59470 }, { "epoch": 391.3157894736842, "grad_norm": 1.6257590055465698, "learning_rate": 0.0001, "loss": 0.0143, "step": 59480 }, { "epoch": 391.38157894736844, "grad_norm": 1.2035874128341675, "learning_rate": 0.0001, "loss": 0.0115, "step": 59490 }, { "epoch": 391.44736842105266, "grad_norm": 0.7711625099182129, "learning_rate": 0.0001, "loss": 0.0112, "step": 59500 }, { "epoch": 391.5131578947368, "grad_norm": 1.076262354850769, "learning_rate": 0.0001, "loss": 0.0122, "step": 59510 }, { "epoch": 391.57894736842104, "grad_norm": 1.2418280839920044, "learning_rate": 0.0001, "loss": 0.0117, "step": 59520 }, { "epoch": 391.64473684210526, "grad_norm": 1.1036405563354492, "learning_rate": 0.0001, "loss": 0.0132, "step": 59530 }, { "epoch": 391.7105263157895, "grad_norm": 1.4087510108947754, "learning_rate": 0.0001, "loss": 0.012, "step": 59540 }, { "epoch": 391.7763157894737, "grad_norm": 0.9523179531097412, "learning_rate": 0.0001, "loss": 0.0113, "step": 59550 }, { "epoch": 391.8421052631579, "grad_norm": 1.2418298721313477, "learning_rate": 0.0001, "loss": 0.0126, "step": 59560 }, { "epoch": 391.9078947368421, "grad_norm": 1.00105881690979, "learning_rate": 0.0001, "loss": 0.0138, "step": 59570 }, { "epoch": 391.9736842105263, "grad_norm": 1.2458897829055786, "learning_rate": 0.0001, "loss": 0.0133, "step": 59580 }, { "epoch": 392.0394736842105, "grad_norm": 1.1352124214172363, "learning_rate": 0.0001, "loss": 0.0128, "step": 59590 }, { "epoch": 392.10526315789474, "grad_norm": 1.072657823562622, "learning_rate": 0.0001, "loss": 0.0106, "step": 59600 }, { "epoch": 392.17105263157896, "grad_norm": 1.4077866077423096, "learning_rate": 0.0001, "loss": 0.0114, "step": 59610 }, { "epoch": 392.2368421052632, "grad_norm": 0.9759162664413452, "learning_rate": 0.0001, "loss": 0.0115, "step": 59620 }, { "epoch": 392.30263157894734, "grad_norm": 1.0127092599868774, "learning_rate": 0.0001, "loss": 0.0137, "step": 59630 }, { "epoch": 392.36842105263156, "grad_norm": 1.2567495107650757, "learning_rate": 0.0001, "loss": 0.0117, "step": 59640 }, { "epoch": 392.4342105263158, "grad_norm": 1.346420168876648, "learning_rate": 0.0001, "loss": 0.0116, "step": 59650 }, { "epoch": 392.5, "grad_norm": 1.125648856163025, "learning_rate": 0.0001, "loss": 0.0136, "step": 59660 }, { "epoch": 392.5657894736842, "grad_norm": 1.004570484161377, "learning_rate": 0.0001, "loss": 0.0138, "step": 59670 }, { "epoch": 392.63157894736844, "grad_norm": 1.5204274654388428, "learning_rate": 0.0001, "loss": 0.0133, "step": 59680 }, { "epoch": 392.69736842105266, "grad_norm": 1.4187020063400269, "learning_rate": 0.0001, "loss": 0.0145, "step": 59690 }, { "epoch": 392.7631578947368, "grad_norm": 1.4313414096832275, "learning_rate": 0.0001, "loss": 0.015, "step": 59700 }, { "epoch": 392.82894736842104, "grad_norm": 1.398133635520935, "learning_rate": 0.0001, "loss": 0.0106, "step": 59710 }, { "epoch": 392.89473684210526, "grad_norm": 1.279089093208313, "learning_rate": 0.0001, "loss": 0.0156, "step": 59720 }, { "epoch": 392.9605263157895, "grad_norm": 1.511715292930603, "learning_rate": 0.0001, "loss": 0.0111, "step": 59730 }, { "epoch": 393.0263157894737, "grad_norm": 1.0526278018951416, "learning_rate": 0.0001, "loss": 0.0128, "step": 59740 }, { "epoch": 393.0921052631579, "grad_norm": 1.0821075439453125, "learning_rate": 0.0001, "loss": 0.0119, "step": 59750 }, { "epoch": 393.1578947368421, "grad_norm": 1.2545526027679443, "learning_rate": 0.0001, "loss": 0.0125, "step": 59760 }, { "epoch": 393.2236842105263, "grad_norm": 0.8105134963989258, "learning_rate": 0.0001, "loss": 0.012, "step": 59770 }, { "epoch": 393.2894736842105, "grad_norm": 1.4059394598007202, "learning_rate": 0.0001, "loss": 0.0123, "step": 59780 }, { "epoch": 393.35526315789474, "grad_norm": 1.157401204109192, "learning_rate": 0.0001, "loss": 0.0119, "step": 59790 }, { "epoch": 393.42105263157896, "grad_norm": 0.9264048337936401, "learning_rate": 0.0001, "loss": 0.0139, "step": 59800 }, { "epoch": 393.4868421052632, "grad_norm": 0.9000381827354431, "learning_rate": 0.0001, "loss": 0.0128, "step": 59810 }, { "epoch": 393.55263157894734, "grad_norm": 1.0139583349227905, "learning_rate": 0.0001, "loss": 0.0131, "step": 59820 }, { "epoch": 393.61842105263156, "grad_norm": 0.9514049887657166, "learning_rate": 0.0001, "loss": 0.0119, "step": 59830 }, { "epoch": 393.6842105263158, "grad_norm": 1.2830848693847656, "learning_rate": 0.0001, "loss": 0.013, "step": 59840 }, { "epoch": 393.75, "grad_norm": 0.9854030013084412, "learning_rate": 0.0001, "loss": 0.0145, "step": 59850 }, { "epoch": 393.8157894736842, "grad_norm": 1.196966290473938, "learning_rate": 0.0001, "loss": 0.0124, "step": 59860 }, { "epoch": 393.88157894736844, "grad_norm": 1.024109125137329, "learning_rate": 0.0001, "loss": 0.0122, "step": 59870 }, { "epoch": 393.94736842105266, "grad_norm": 1.1376116275787354, "learning_rate": 0.0001, "loss": 0.0117, "step": 59880 }, { "epoch": 394.0131578947368, "grad_norm": 1.670544147491455, "learning_rate": 0.0001, "loss": 0.0137, "step": 59890 }, { "epoch": 394.07894736842104, "grad_norm": 1.2976588010787964, "learning_rate": 0.0001, "loss": 0.0142, "step": 59900 }, { "epoch": 394.14473684210526, "grad_norm": 1.2100739479064941, "learning_rate": 0.0001, "loss": 0.0132, "step": 59910 }, { "epoch": 394.2105263157895, "grad_norm": 1.5674223899841309, "learning_rate": 0.0001, "loss": 0.013, "step": 59920 }, { "epoch": 394.2763157894737, "grad_norm": 1.4684234857559204, "learning_rate": 0.0001, "loss": 0.0113, "step": 59930 }, { "epoch": 394.3421052631579, "grad_norm": 1.1361076831817627, "learning_rate": 0.0001, "loss": 0.0132, "step": 59940 }, { "epoch": 394.4078947368421, "grad_norm": 1.0538053512573242, "learning_rate": 0.0001, "loss": 0.0135, "step": 59950 }, { "epoch": 394.4736842105263, "grad_norm": 1.3147025108337402, "learning_rate": 0.0001, "loss": 0.0114, "step": 59960 }, { "epoch": 394.5394736842105, "grad_norm": 1.4038904905319214, "learning_rate": 0.0001, "loss": 0.0128, "step": 59970 }, { "epoch": 394.60526315789474, "grad_norm": 1.126792073249817, "learning_rate": 0.0001, "loss": 0.011, "step": 59980 }, { "epoch": 394.67105263157896, "grad_norm": 1.2345644235610962, "learning_rate": 0.0001, "loss": 0.0134, "step": 59990 }, { "epoch": 394.7368421052632, "grad_norm": 1.2304173707962036, "learning_rate": 0.0001, "loss": 0.0138, "step": 60000 }, { "epoch": 394.80263157894734, "grad_norm": 1.4393924474716187, "learning_rate": 0.0001, "loss": 0.0101, "step": 60010 }, { "epoch": 394.86842105263156, "grad_norm": 1.3260493278503418, "learning_rate": 0.0001, "loss": 0.0111, "step": 60020 }, { "epoch": 394.9342105263158, "grad_norm": 1.1898632049560547, "learning_rate": 0.0001, "loss": 0.0109, "step": 60030 }, { "epoch": 395.0, "grad_norm": 1.163697361946106, "learning_rate": 0.0001, "loss": 0.0121, "step": 60040 }, { "epoch": 395.0657894736842, "grad_norm": 1.0178470611572266, "learning_rate": 0.0001, "loss": 0.013, "step": 60050 }, { "epoch": 395.13157894736844, "grad_norm": 1.5706970691680908, "learning_rate": 0.0001, "loss": 0.0114, "step": 60060 }, { "epoch": 395.19736842105266, "grad_norm": 1.2328516244888306, "learning_rate": 0.0001, "loss": 0.0137, "step": 60070 }, { "epoch": 395.2631578947368, "grad_norm": 1.3404053449630737, "learning_rate": 0.0001, "loss": 0.0131, "step": 60080 }, { "epoch": 395.32894736842104, "grad_norm": 1.3610615730285645, "learning_rate": 0.0001, "loss": 0.0148, "step": 60090 }, { "epoch": 395.39473684210526, "grad_norm": 1.486354947090149, "learning_rate": 0.0001, "loss": 0.0147, "step": 60100 }, { "epoch": 395.4605263157895, "grad_norm": 1.392723560333252, "learning_rate": 0.0001, "loss": 0.0135, "step": 60110 }, { "epoch": 395.5263157894737, "grad_norm": 1.290682077407837, "learning_rate": 0.0001, "loss": 0.0136, "step": 60120 }, { "epoch": 395.5921052631579, "grad_norm": 0.9399741291999817, "learning_rate": 0.0001, "loss": 0.014, "step": 60130 }, { "epoch": 395.6578947368421, "grad_norm": 1.4426350593566895, "learning_rate": 0.0001, "loss": 0.0165, "step": 60140 }, { "epoch": 395.7236842105263, "grad_norm": 1.2685151100158691, "learning_rate": 0.0001, "loss": 0.0129, "step": 60150 }, { "epoch": 395.7894736842105, "grad_norm": 0.9836694002151489, "learning_rate": 0.0001, "loss": 0.013, "step": 60160 }, { "epoch": 395.85526315789474, "grad_norm": 1.0677541494369507, "learning_rate": 0.0001, "loss": 0.0138, "step": 60170 }, { "epoch": 395.92105263157896, "grad_norm": 1.1794040203094482, "learning_rate": 0.0001, "loss": 0.0137, "step": 60180 }, { "epoch": 395.9868421052632, "grad_norm": 1.7301592826843262, "learning_rate": 0.0001, "loss": 0.0154, "step": 60190 }, { "epoch": 396.05263157894734, "grad_norm": 1.1919747591018677, "learning_rate": 0.0001, "loss": 0.0125, "step": 60200 }, { "epoch": 396.11842105263156, "grad_norm": 1.3119267225265503, "learning_rate": 0.0001, "loss": 0.0127, "step": 60210 }, { "epoch": 396.1842105263158, "grad_norm": 1.2656959295272827, "learning_rate": 0.0001, "loss": 0.0144, "step": 60220 }, { "epoch": 396.25, "grad_norm": 1.2613000869750977, "learning_rate": 0.0001, "loss": 0.0145, "step": 60230 }, { "epoch": 396.3157894736842, "grad_norm": 1.25479257106781, "learning_rate": 0.0001, "loss": 0.0125, "step": 60240 }, { "epoch": 396.38157894736844, "grad_norm": 1.2844350337982178, "learning_rate": 0.0001, "loss": 0.013, "step": 60250 }, { "epoch": 396.44736842105266, "grad_norm": 1.0911136865615845, "learning_rate": 0.0001, "loss": 0.0114, "step": 60260 }, { "epoch": 396.5131578947368, "grad_norm": 1.0953329801559448, "learning_rate": 0.0001, "loss": 0.0156, "step": 60270 }, { "epoch": 396.57894736842104, "grad_norm": 1.4672765731811523, "learning_rate": 0.0001, "loss": 0.0129, "step": 60280 }, { "epoch": 396.64473684210526, "grad_norm": 1.0957013368606567, "learning_rate": 0.0001, "loss": 0.0128, "step": 60290 }, { "epoch": 396.7105263157895, "grad_norm": 1.128840684890747, "learning_rate": 0.0001, "loss": 0.0128, "step": 60300 }, { "epoch": 396.7763157894737, "grad_norm": 0.9832814931869507, "learning_rate": 0.0001, "loss": 0.0138, "step": 60310 }, { "epoch": 396.8421052631579, "grad_norm": 1.023591160774231, "learning_rate": 0.0001, "loss": 0.0147, "step": 60320 }, { "epoch": 396.9078947368421, "grad_norm": 0.9705013036727905, "learning_rate": 0.0001, "loss": 0.0141, "step": 60330 }, { "epoch": 396.9736842105263, "grad_norm": 1.1543126106262207, "learning_rate": 0.0001, "loss": 0.014, "step": 60340 }, { "epoch": 397.0394736842105, "grad_norm": 0.7225308418273926, "learning_rate": 0.0001, "loss": 0.0128, "step": 60350 }, { "epoch": 397.10526315789474, "grad_norm": 1.5401467084884644, "learning_rate": 0.0001, "loss": 0.0145, "step": 60360 }, { "epoch": 397.17105263157896, "grad_norm": 1.3008463382720947, "learning_rate": 0.0001, "loss": 0.0131, "step": 60370 }, { "epoch": 397.2368421052632, "grad_norm": 0.7832499742507935, "learning_rate": 0.0001, "loss": 0.015, "step": 60380 }, { "epoch": 397.30263157894734, "grad_norm": 0.9722589254379272, "learning_rate": 0.0001, "loss": 0.0138, "step": 60390 }, { "epoch": 397.36842105263156, "grad_norm": 0.9584972858428955, "learning_rate": 0.0001, "loss": 0.0139, "step": 60400 }, { "epoch": 397.4342105263158, "grad_norm": 1.1281046867370605, "learning_rate": 0.0001, "loss": 0.0134, "step": 60410 }, { "epoch": 397.5, "grad_norm": 0.9634712338447571, "learning_rate": 0.0001, "loss": 0.0137, "step": 60420 }, { "epoch": 397.5657894736842, "grad_norm": 0.8092069029808044, "learning_rate": 0.0001, "loss": 0.0129, "step": 60430 }, { "epoch": 397.63157894736844, "grad_norm": 0.941852331161499, "learning_rate": 0.0001, "loss": 0.0133, "step": 60440 }, { "epoch": 397.69736842105266, "grad_norm": 1.1537492275238037, "learning_rate": 0.0001, "loss": 0.0142, "step": 60450 }, { "epoch": 397.7631578947368, "grad_norm": 1.2303005456924438, "learning_rate": 0.0001, "loss": 0.0127, "step": 60460 }, { "epoch": 397.82894736842104, "grad_norm": 0.761372983455658, "learning_rate": 0.0001, "loss": 0.0138, "step": 60470 }, { "epoch": 397.89473684210526, "grad_norm": 0.8683788180351257, "learning_rate": 0.0001, "loss": 0.0132, "step": 60480 }, { "epoch": 397.9605263157895, "grad_norm": 0.7769721150398254, "learning_rate": 0.0001, "loss": 0.0171, "step": 60490 }, { "epoch": 398.0263157894737, "grad_norm": 0.8893076777458191, "learning_rate": 0.0001, "loss": 0.0171, "step": 60500 }, { "epoch": 398.0921052631579, "grad_norm": 1.5258387327194214, "learning_rate": 0.0001, "loss": 0.0133, "step": 60510 }, { "epoch": 398.1578947368421, "grad_norm": 1.3462433815002441, "learning_rate": 0.0001, "loss": 0.0162, "step": 60520 }, { "epoch": 398.2236842105263, "grad_norm": 0.9892566800117493, "learning_rate": 0.0001, "loss": 0.0145, "step": 60530 }, { "epoch": 398.2894736842105, "grad_norm": 1.024162769317627, "learning_rate": 0.0001, "loss": 0.0146, "step": 60540 }, { "epoch": 398.35526315789474, "grad_norm": 0.9343032240867615, "learning_rate": 0.0001, "loss": 0.013, "step": 60550 }, { "epoch": 398.42105263157896, "grad_norm": 1.1336026191711426, "learning_rate": 0.0001, "loss": 0.0133, "step": 60560 }, { "epoch": 398.4868421052632, "grad_norm": 1.1892248392105103, "learning_rate": 0.0001, "loss": 0.0137, "step": 60570 }, { "epoch": 398.55263157894734, "grad_norm": 1.184756875038147, "learning_rate": 0.0001, "loss": 0.0135, "step": 60580 }, { "epoch": 398.61842105263156, "grad_norm": 1.1197586059570312, "learning_rate": 0.0001, "loss": 0.013, "step": 60590 }, { "epoch": 398.6842105263158, "grad_norm": 1.4261908531188965, "learning_rate": 0.0001, "loss": 0.0137, "step": 60600 }, { "epoch": 398.75, "grad_norm": 1.3351398706436157, "learning_rate": 0.0001, "loss": 0.0172, "step": 60610 }, { "epoch": 398.8157894736842, "grad_norm": 1.4747203588485718, "learning_rate": 0.0001, "loss": 0.0146, "step": 60620 }, { "epoch": 398.88157894736844, "grad_norm": 1.32119882106781, "learning_rate": 0.0001, "loss": 0.015, "step": 60630 }, { "epoch": 398.94736842105266, "grad_norm": 1.3960144519805908, "learning_rate": 0.0001, "loss": 0.0132, "step": 60640 }, { "epoch": 399.0131578947368, "grad_norm": 1.1450289487838745, "learning_rate": 0.0001, "loss": 0.0132, "step": 60650 }, { "epoch": 399.07894736842104, "grad_norm": 1.1878561973571777, "learning_rate": 0.0001, "loss": 0.0132, "step": 60660 }, { "epoch": 399.14473684210526, "grad_norm": 1.0264465808868408, "learning_rate": 0.0001, "loss": 0.0137, "step": 60670 }, { "epoch": 399.2105263157895, "grad_norm": 1.1604735851287842, "learning_rate": 0.0001, "loss": 0.0147, "step": 60680 }, { "epoch": 399.2763157894737, "grad_norm": 1.373092532157898, "learning_rate": 0.0001, "loss": 0.014, "step": 60690 }, { "epoch": 399.3421052631579, "grad_norm": 1.260588526725769, "learning_rate": 0.0001, "loss": 0.015, "step": 60700 }, { "epoch": 399.4078947368421, "grad_norm": 1.2193946838378906, "learning_rate": 0.0001, "loss": 0.0141, "step": 60710 }, { "epoch": 399.4736842105263, "grad_norm": 1.2826273441314697, "learning_rate": 0.0001, "loss": 0.014, "step": 60720 }, { "epoch": 399.5394736842105, "grad_norm": 0.9738739132881165, "learning_rate": 0.0001, "loss": 0.0132, "step": 60730 }, { "epoch": 399.60526315789474, "grad_norm": 1.2081356048583984, "learning_rate": 0.0001, "loss": 0.0132, "step": 60740 }, { "epoch": 399.67105263157896, "grad_norm": 1.198371171951294, "learning_rate": 0.0001, "loss": 0.0128, "step": 60750 }, { "epoch": 399.7368421052632, "grad_norm": 1.064581274986267, "learning_rate": 0.0001, "loss": 0.0134, "step": 60760 }, { "epoch": 399.80263157894734, "grad_norm": 0.9279325604438782, "learning_rate": 0.0001, "loss": 0.0156, "step": 60770 }, { "epoch": 399.86842105263156, "grad_norm": 0.7750979661941528, "learning_rate": 0.0001, "loss": 0.015, "step": 60780 }, { "epoch": 399.9342105263158, "grad_norm": 0.9537990689277649, "learning_rate": 0.0001, "loss": 0.0125, "step": 60790 }, { "epoch": 400.0, "grad_norm": 1.227004885673523, "learning_rate": 0.0001, "loss": 0.0117, "step": 60800 }, { "epoch": 400.0657894736842, "grad_norm": 0.8979701399803162, "learning_rate": 0.0001, "loss": 0.0124, "step": 60810 }, { "epoch": 400.13157894736844, "grad_norm": 1.022169828414917, "learning_rate": 0.0001, "loss": 0.0134, "step": 60820 }, { "epoch": 400.19736842105266, "grad_norm": 1.129315733909607, "learning_rate": 0.0001, "loss": 0.0156, "step": 60830 }, { "epoch": 400.2631578947368, "grad_norm": 1.0634799003601074, "learning_rate": 0.0001, "loss": 0.0138, "step": 60840 }, { "epoch": 400.32894736842104, "grad_norm": 1.2709938287734985, "learning_rate": 0.0001, "loss": 0.0112, "step": 60850 }, { "epoch": 400.39473684210526, "grad_norm": 1.0252529382705688, "learning_rate": 0.0001, "loss": 0.0134, "step": 60860 }, { "epoch": 400.4605263157895, "grad_norm": 1.4631614685058594, "learning_rate": 0.0001, "loss": 0.014, "step": 60870 }, { "epoch": 400.5263157894737, "grad_norm": 1.0839378833770752, "learning_rate": 0.0001, "loss": 0.0135, "step": 60880 }, { "epoch": 400.5921052631579, "grad_norm": 0.9850987792015076, "learning_rate": 0.0001, "loss": 0.0128, "step": 60890 }, { "epoch": 400.6578947368421, "grad_norm": 1.6288450956344604, "learning_rate": 0.0001, "loss": 0.0146, "step": 60900 }, { "epoch": 400.7236842105263, "grad_norm": 1.1877349615097046, "learning_rate": 0.0001, "loss": 0.0125, "step": 60910 }, { "epoch": 400.7894736842105, "grad_norm": 1.2342051267623901, "learning_rate": 0.0001, "loss": 0.0144, "step": 60920 }, { "epoch": 400.85526315789474, "grad_norm": 1.1505542993545532, "learning_rate": 0.0001, "loss": 0.0126, "step": 60930 }, { "epoch": 400.92105263157896, "grad_norm": 0.6840497851371765, "learning_rate": 0.0001, "loss": 0.0141, "step": 60940 }, { "epoch": 400.9868421052632, "grad_norm": 1.1132681369781494, "learning_rate": 0.0001, "loss": 0.0148, "step": 60950 }, { "epoch": 401.05263157894734, "grad_norm": 0.9815819263458252, "learning_rate": 0.0001, "loss": 0.0143, "step": 60960 }, { "epoch": 401.11842105263156, "grad_norm": 1.063504695892334, "learning_rate": 0.0001, "loss": 0.015, "step": 60970 }, { "epoch": 401.1842105263158, "grad_norm": 0.8839133977890015, "learning_rate": 0.0001, "loss": 0.0128, "step": 60980 }, { "epoch": 401.25, "grad_norm": 1.3246194124221802, "learning_rate": 0.0001, "loss": 0.0144, "step": 60990 }, { "epoch": 401.3157894736842, "grad_norm": 1.150536060333252, "learning_rate": 0.0001, "loss": 0.0133, "step": 61000 }, { "epoch": 401.38157894736844, "grad_norm": 1.1703293323516846, "learning_rate": 0.0001, "loss": 0.0136, "step": 61010 }, { "epoch": 401.44736842105266, "grad_norm": 1.1966885328292847, "learning_rate": 0.0001, "loss": 0.0141, "step": 61020 }, { "epoch": 401.5131578947368, "grad_norm": 1.2253130674362183, "learning_rate": 0.0001, "loss": 0.015, "step": 61030 }, { "epoch": 401.57894736842104, "grad_norm": 0.6977288126945496, "learning_rate": 0.0001, "loss": 0.0131, "step": 61040 }, { "epoch": 401.64473684210526, "grad_norm": 1.388411045074463, "learning_rate": 0.0001, "loss": 0.0131, "step": 61050 }, { "epoch": 401.7105263157895, "grad_norm": 1.45159113407135, "learning_rate": 0.0001, "loss": 0.0112, "step": 61060 }, { "epoch": 401.7763157894737, "grad_norm": 1.221388578414917, "learning_rate": 0.0001, "loss": 0.0132, "step": 61070 }, { "epoch": 401.8421052631579, "grad_norm": 1.2289520502090454, "learning_rate": 0.0001, "loss": 0.0111, "step": 61080 }, { "epoch": 401.9078947368421, "grad_norm": 1.2441534996032715, "learning_rate": 0.0001, "loss": 0.0131, "step": 61090 }, { "epoch": 401.9736842105263, "grad_norm": 1.4623695611953735, "learning_rate": 0.0001, "loss": 0.013, "step": 61100 }, { "epoch": 402.0394736842105, "grad_norm": 1.220936894416809, "learning_rate": 0.0001, "loss": 0.014, "step": 61110 }, { "epoch": 402.10526315789474, "grad_norm": 1.2815364599227905, "learning_rate": 0.0001, "loss": 0.0119, "step": 61120 }, { "epoch": 402.17105263157896, "grad_norm": 1.2547277212142944, "learning_rate": 0.0001, "loss": 0.0125, "step": 61130 }, { "epoch": 402.2368421052632, "grad_norm": 0.9268215894699097, "learning_rate": 0.0001, "loss": 0.0123, "step": 61140 }, { "epoch": 402.30263157894734, "grad_norm": 1.4284296035766602, "learning_rate": 0.0001, "loss": 0.0106, "step": 61150 }, { "epoch": 402.36842105263156, "grad_norm": 1.183083415031433, "learning_rate": 0.0001, "loss": 0.0112, "step": 61160 }, { "epoch": 402.4342105263158, "grad_norm": 1.3317415714263916, "learning_rate": 0.0001, "loss": 0.0136, "step": 61170 }, { "epoch": 402.5, "grad_norm": 0.9961628317832947, "learning_rate": 0.0001, "loss": 0.0128, "step": 61180 }, { "epoch": 402.5657894736842, "grad_norm": 1.011443018913269, "learning_rate": 0.0001, "loss": 0.0134, "step": 61190 }, { "epoch": 402.63157894736844, "grad_norm": 1.214849829673767, "learning_rate": 0.0001, "loss": 0.0127, "step": 61200 }, { "epoch": 402.69736842105266, "grad_norm": 1.2691972255706787, "learning_rate": 0.0001, "loss": 0.0137, "step": 61210 }, { "epoch": 402.7631578947368, "grad_norm": 0.8713511228561401, "learning_rate": 0.0001, "loss": 0.012, "step": 61220 }, { "epoch": 402.82894736842104, "grad_norm": 1.2095887660980225, "learning_rate": 0.0001, "loss": 0.0125, "step": 61230 }, { "epoch": 402.89473684210526, "grad_norm": 1.3552407026290894, "learning_rate": 0.0001, "loss": 0.0129, "step": 61240 }, { "epoch": 402.9605263157895, "grad_norm": 1.3354055881500244, "learning_rate": 0.0001, "loss": 0.0119, "step": 61250 }, { "epoch": 403.0263157894737, "grad_norm": 1.0809153318405151, "learning_rate": 0.0001, "loss": 0.0137, "step": 61260 }, { "epoch": 403.0921052631579, "grad_norm": 0.9687100052833557, "learning_rate": 0.0001, "loss": 0.0136, "step": 61270 }, { "epoch": 403.1578947368421, "grad_norm": 1.0305453538894653, "learning_rate": 0.0001, "loss": 0.0114, "step": 61280 }, { "epoch": 403.2236842105263, "grad_norm": 1.0682860612869263, "learning_rate": 0.0001, "loss": 0.0122, "step": 61290 }, { "epoch": 403.2894736842105, "grad_norm": 1.3021142482757568, "learning_rate": 0.0001, "loss": 0.0126, "step": 61300 }, { "epoch": 403.35526315789474, "grad_norm": 1.2152711153030396, "learning_rate": 0.0001, "loss": 0.0105, "step": 61310 }, { "epoch": 403.42105263157896, "grad_norm": 1.1612188816070557, "learning_rate": 0.0001, "loss": 0.013, "step": 61320 }, { "epoch": 403.4868421052632, "grad_norm": 1.2532055377960205, "learning_rate": 0.0001, "loss": 0.0118, "step": 61330 }, { "epoch": 403.55263157894734, "grad_norm": 1.164185881614685, "learning_rate": 0.0001, "loss": 0.0149, "step": 61340 }, { "epoch": 403.61842105263156, "grad_norm": 1.5072556734085083, "learning_rate": 0.0001, "loss": 0.0125, "step": 61350 }, { "epoch": 403.6842105263158, "grad_norm": 1.4515098333358765, "learning_rate": 0.0001, "loss": 0.0119, "step": 61360 }, { "epoch": 403.75, "grad_norm": 1.3423659801483154, "learning_rate": 0.0001, "loss": 0.0119, "step": 61370 }, { "epoch": 403.8157894736842, "grad_norm": 1.5325146913528442, "learning_rate": 0.0001, "loss": 0.0126, "step": 61380 }, { "epoch": 403.88157894736844, "grad_norm": 1.323291301727295, "learning_rate": 0.0001, "loss": 0.0119, "step": 61390 }, { "epoch": 403.94736842105266, "grad_norm": 1.3427553176879883, "learning_rate": 0.0001, "loss": 0.0111, "step": 61400 }, { "epoch": 404.0131578947368, "grad_norm": 1.0898405313491821, "learning_rate": 0.0001, "loss": 0.0124, "step": 61410 }, { "epoch": 404.07894736842104, "grad_norm": 1.0834609270095825, "learning_rate": 0.0001, "loss": 0.011, "step": 61420 }, { "epoch": 404.14473684210526, "grad_norm": 1.5015757083892822, "learning_rate": 0.0001, "loss": 0.0123, "step": 61430 }, { "epoch": 404.2105263157895, "grad_norm": 1.4444079399108887, "learning_rate": 0.0001, "loss": 0.0119, "step": 61440 }, { "epoch": 404.2763157894737, "grad_norm": 1.0211026668548584, "learning_rate": 0.0001, "loss": 0.0119, "step": 61450 }, { "epoch": 404.3421052631579, "grad_norm": 1.4435980319976807, "learning_rate": 0.0001, "loss": 0.0126, "step": 61460 }, { "epoch": 404.4078947368421, "grad_norm": 0.9525414109230042, "learning_rate": 0.0001, "loss": 0.0126, "step": 61470 }, { "epoch": 404.4736842105263, "grad_norm": 1.289888620376587, "learning_rate": 0.0001, "loss": 0.0134, "step": 61480 }, { "epoch": 404.5394736842105, "grad_norm": 1.652472734451294, "learning_rate": 0.0001, "loss": 0.0132, "step": 61490 }, { "epoch": 404.60526315789474, "grad_norm": 1.2742666006088257, "learning_rate": 0.0001, "loss": 0.0115, "step": 61500 }, { "epoch": 404.67105263157896, "grad_norm": 0.8357386589050293, "learning_rate": 0.0001, "loss": 0.0135, "step": 61510 }, { "epoch": 404.7368421052632, "grad_norm": 0.9393722414970398, "learning_rate": 0.0001, "loss": 0.0127, "step": 61520 }, { "epoch": 404.80263157894734, "grad_norm": 1.255674123764038, "learning_rate": 0.0001, "loss": 0.0129, "step": 61530 }, { "epoch": 404.86842105263156, "grad_norm": 1.0511587858200073, "learning_rate": 0.0001, "loss": 0.0123, "step": 61540 }, { "epoch": 404.9342105263158, "grad_norm": 0.9492988586425781, "learning_rate": 0.0001, "loss": 0.0111, "step": 61550 }, { "epoch": 405.0, "grad_norm": 1.1605418920516968, "learning_rate": 0.0001, "loss": 0.0116, "step": 61560 }, { "epoch": 405.0657894736842, "grad_norm": 1.098800778388977, "learning_rate": 0.0001, "loss": 0.0134, "step": 61570 }, { "epoch": 405.13157894736844, "grad_norm": 1.138261079788208, "learning_rate": 0.0001, "loss": 0.0127, "step": 61580 }, { "epoch": 405.19736842105266, "grad_norm": 1.2927042245864868, "learning_rate": 0.0001, "loss": 0.0122, "step": 61590 }, { "epoch": 405.2631578947368, "grad_norm": 1.1841543912887573, "learning_rate": 0.0001, "loss": 0.0129, "step": 61600 }, { "epoch": 405.32894736842104, "grad_norm": 1.0832710266113281, "learning_rate": 0.0001, "loss": 0.0128, "step": 61610 }, { "epoch": 405.39473684210526, "grad_norm": 1.35568106174469, "learning_rate": 0.0001, "loss": 0.0126, "step": 61620 }, { "epoch": 405.4605263157895, "grad_norm": 1.0459115505218506, "learning_rate": 0.0001, "loss": 0.0105, "step": 61630 }, { "epoch": 405.5263157894737, "grad_norm": 1.2705081701278687, "learning_rate": 0.0001, "loss": 0.0135, "step": 61640 }, { "epoch": 405.5921052631579, "grad_norm": 0.9033043384552002, "learning_rate": 0.0001, "loss": 0.0117, "step": 61650 }, { "epoch": 405.6578947368421, "grad_norm": 1.2756457328796387, "learning_rate": 0.0001, "loss": 0.0114, "step": 61660 }, { "epoch": 405.7236842105263, "grad_norm": 1.5137383937835693, "learning_rate": 0.0001, "loss": 0.0113, "step": 61670 }, { "epoch": 405.7894736842105, "grad_norm": 0.9483539462089539, "learning_rate": 0.0001, "loss": 0.0129, "step": 61680 }, { "epoch": 405.85526315789474, "grad_norm": 1.1545567512512207, "learning_rate": 0.0001, "loss": 0.0128, "step": 61690 }, { "epoch": 405.92105263157896, "grad_norm": 0.8387587666511536, "learning_rate": 0.0001, "loss": 0.0118, "step": 61700 }, { "epoch": 405.9868421052632, "grad_norm": 0.9866654276847839, "learning_rate": 0.0001, "loss": 0.0119, "step": 61710 }, { "epoch": 406.05263157894734, "grad_norm": 0.9306283593177795, "learning_rate": 0.0001, "loss": 0.0121, "step": 61720 }, { "epoch": 406.11842105263156, "grad_norm": 1.089195966720581, "learning_rate": 0.0001, "loss": 0.0119, "step": 61730 }, { "epoch": 406.1842105263158, "grad_norm": 1.0379259586334229, "learning_rate": 0.0001, "loss": 0.0133, "step": 61740 }, { "epoch": 406.25, "grad_norm": 0.8618190288543701, "learning_rate": 0.0001, "loss": 0.0139, "step": 61750 }, { "epoch": 406.3157894736842, "grad_norm": 1.3050987720489502, "learning_rate": 0.0001, "loss": 0.0126, "step": 61760 }, { "epoch": 406.38157894736844, "grad_norm": 1.1287416219711304, "learning_rate": 0.0001, "loss": 0.0105, "step": 61770 }, { "epoch": 406.44736842105266, "grad_norm": 0.9702133536338806, "learning_rate": 0.0001, "loss": 0.0127, "step": 61780 }, { "epoch": 406.5131578947368, "grad_norm": 0.9845720529556274, "learning_rate": 0.0001, "loss": 0.0125, "step": 61790 }, { "epoch": 406.57894736842104, "grad_norm": 1.2057915925979614, "learning_rate": 0.0001, "loss": 0.0112, "step": 61800 }, { "epoch": 406.64473684210526, "grad_norm": 1.1550894975662231, "learning_rate": 0.0001, "loss": 0.0134, "step": 61810 }, { "epoch": 406.7105263157895, "grad_norm": 1.2562119960784912, "learning_rate": 0.0001, "loss": 0.0122, "step": 61820 }, { "epoch": 406.7763157894737, "grad_norm": 1.1351388692855835, "learning_rate": 0.0001, "loss": 0.0127, "step": 61830 }, { "epoch": 406.8421052631579, "grad_norm": 1.1127445697784424, "learning_rate": 0.0001, "loss": 0.012, "step": 61840 }, { "epoch": 406.9078947368421, "grad_norm": 1.1058247089385986, "learning_rate": 0.0001, "loss": 0.0134, "step": 61850 }, { "epoch": 406.9736842105263, "grad_norm": 0.9056158661842346, "learning_rate": 0.0001, "loss": 0.013, "step": 61860 }, { "epoch": 407.0394736842105, "grad_norm": 1.2106934785842896, "learning_rate": 0.0001, "loss": 0.0128, "step": 61870 }, { "epoch": 407.10526315789474, "grad_norm": 1.2813785076141357, "learning_rate": 0.0001, "loss": 0.0121, "step": 61880 }, { "epoch": 407.17105263157896, "grad_norm": 1.3114577531814575, "learning_rate": 0.0001, "loss": 0.0126, "step": 61890 }, { "epoch": 407.2368421052632, "grad_norm": 1.1716445684432983, "learning_rate": 0.0001, "loss": 0.0111, "step": 61900 }, { "epoch": 407.30263157894734, "grad_norm": 1.2091526985168457, "learning_rate": 0.0001, "loss": 0.0121, "step": 61910 }, { "epoch": 407.36842105263156, "grad_norm": 0.9812834858894348, "learning_rate": 0.0001, "loss": 0.0134, "step": 61920 }, { "epoch": 407.4342105263158, "grad_norm": 0.9171328544616699, "learning_rate": 0.0001, "loss": 0.0127, "step": 61930 }, { "epoch": 407.5, "grad_norm": 0.8399735689163208, "learning_rate": 0.0001, "loss": 0.0126, "step": 61940 }, { "epoch": 407.5657894736842, "grad_norm": 1.0794377326965332, "learning_rate": 0.0001, "loss": 0.0125, "step": 61950 }, { "epoch": 407.63157894736844, "grad_norm": 1.1794744729995728, "learning_rate": 0.0001, "loss": 0.0143, "step": 61960 }, { "epoch": 407.69736842105266, "grad_norm": 1.2339738607406616, "learning_rate": 0.0001, "loss": 0.0133, "step": 61970 }, { "epoch": 407.7631578947368, "grad_norm": 1.1554800271987915, "learning_rate": 0.0001, "loss": 0.0119, "step": 61980 }, { "epoch": 407.82894736842104, "grad_norm": 0.769442081451416, "learning_rate": 0.0001, "loss": 0.0121, "step": 61990 }, { "epoch": 407.89473684210526, "grad_norm": 0.9051490426063538, "learning_rate": 0.0001, "loss": 0.011, "step": 62000 }, { "epoch": 407.9605263157895, "grad_norm": 0.8999045491218567, "learning_rate": 0.0001, "loss": 0.0122, "step": 62010 }, { "epoch": 408.0263157894737, "grad_norm": 1.5016242265701294, "learning_rate": 0.0001, "loss": 0.0124, "step": 62020 }, { "epoch": 408.0921052631579, "grad_norm": 1.4286948442459106, "learning_rate": 0.0001, "loss": 0.0124, "step": 62030 }, { "epoch": 408.1578947368421, "grad_norm": 1.1384950876235962, "learning_rate": 0.0001, "loss": 0.0119, "step": 62040 }, { "epoch": 408.2236842105263, "grad_norm": 1.6049597263336182, "learning_rate": 0.0001, "loss": 0.0131, "step": 62050 }, { "epoch": 408.2894736842105, "grad_norm": 1.4698026180267334, "learning_rate": 0.0001, "loss": 0.0128, "step": 62060 }, { "epoch": 408.35526315789474, "grad_norm": 0.9157135486602783, "learning_rate": 0.0001, "loss": 0.0119, "step": 62070 }, { "epoch": 408.42105263157896, "grad_norm": 1.380117654800415, "learning_rate": 0.0001, "loss": 0.0129, "step": 62080 }, { "epoch": 408.4868421052632, "grad_norm": 1.374470829963684, "learning_rate": 0.0001, "loss": 0.0111, "step": 62090 }, { "epoch": 408.55263157894734, "grad_norm": 1.056437611579895, "learning_rate": 0.0001, "loss": 0.0111, "step": 62100 }, { "epoch": 408.61842105263156, "grad_norm": 1.3114112615585327, "learning_rate": 0.0001, "loss": 0.012, "step": 62110 }, { "epoch": 408.6842105263158, "grad_norm": 1.187699794769287, "learning_rate": 0.0001, "loss": 0.0126, "step": 62120 }, { "epoch": 408.75, "grad_norm": 1.2368489503860474, "learning_rate": 0.0001, "loss": 0.0133, "step": 62130 }, { "epoch": 408.8157894736842, "grad_norm": 1.251200556755066, "learning_rate": 0.0001, "loss": 0.012, "step": 62140 }, { "epoch": 408.88157894736844, "grad_norm": 1.1192817687988281, "learning_rate": 0.0001, "loss": 0.0137, "step": 62150 }, { "epoch": 408.94736842105266, "grad_norm": 1.289333701133728, "learning_rate": 0.0001, "loss": 0.0127, "step": 62160 }, { "epoch": 409.0131578947368, "grad_norm": 1.1822633743286133, "learning_rate": 0.0001, "loss": 0.0124, "step": 62170 }, { "epoch": 409.07894736842104, "grad_norm": 0.9145843982696533, "learning_rate": 0.0001, "loss": 0.012, "step": 62180 }, { "epoch": 409.14473684210526, "grad_norm": 1.0487996339797974, "learning_rate": 0.0001, "loss": 0.0127, "step": 62190 }, { "epoch": 409.2105263157895, "grad_norm": 1.1056833267211914, "learning_rate": 0.0001, "loss": 0.012, "step": 62200 }, { "epoch": 409.2763157894737, "grad_norm": 1.2669976949691772, "learning_rate": 0.0001, "loss": 0.0117, "step": 62210 }, { "epoch": 409.3421052631579, "grad_norm": 0.501373827457428, "learning_rate": 0.0001, "loss": 0.0139, "step": 62220 }, { "epoch": 409.4078947368421, "grad_norm": 1.0608346462249756, "learning_rate": 0.0001, "loss": 0.0124, "step": 62230 }, { "epoch": 409.4736842105263, "grad_norm": 1.1155251264572144, "learning_rate": 0.0001, "loss": 0.012, "step": 62240 }, { "epoch": 409.5394736842105, "grad_norm": 1.1546201705932617, "learning_rate": 0.0001, "loss": 0.0136, "step": 62250 }, { "epoch": 409.60526315789474, "grad_norm": 1.0794380903244019, "learning_rate": 0.0001, "loss": 0.0124, "step": 62260 }, { "epoch": 409.67105263157896, "grad_norm": 1.337540626525879, "learning_rate": 0.0001, "loss": 0.0116, "step": 62270 }, { "epoch": 409.7368421052632, "grad_norm": 1.1186962127685547, "learning_rate": 0.0001, "loss": 0.011, "step": 62280 }, { "epoch": 409.80263157894734, "grad_norm": 1.0747431516647339, "learning_rate": 0.0001, "loss": 0.0145, "step": 62290 }, { "epoch": 409.86842105263156, "grad_norm": 1.2456459999084473, "learning_rate": 0.0001, "loss": 0.0121, "step": 62300 }, { "epoch": 409.9342105263158, "grad_norm": 1.2872223854064941, "learning_rate": 0.0001, "loss": 0.011, "step": 62310 }, { "epoch": 410.0, "grad_norm": 1.3749200105667114, "learning_rate": 0.0001, "loss": 0.0119, "step": 62320 }, { "epoch": 410.0657894736842, "grad_norm": 1.159676432609558, "learning_rate": 0.0001, "loss": 0.0112, "step": 62330 }, { "epoch": 410.13157894736844, "grad_norm": 1.3028908967971802, "learning_rate": 0.0001, "loss": 0.0127, "step": 62340 }, { "epoch": 410.19736842105266, "grad_norm": 0.9679288864135742, "learning_rate": 0.0001, "loss": 0.0108, "step": 62350 }, { "epoch": 410.2631578947368, "grad_norm": 1.2073003053665161, "learning_rate": 0.0001, "loss": 0.014, "step": 62360 }, { "epoch": 410.32894736842104, "grad_norm": 1.3585573434829712, "learning_rate": 0.0001, "loss": 0.011, "step": 62370 }, { "epoch": 410.39473684210526, "grad_norm": 1.4543274641036987, "learning_rate": 0.0001, "loss": 0.012, "step": 62380 }, { "epoch": 410.4605263157895, "grad_norm": 1.1905204057693481, "learning_rate": 0.0001, "loss": 0.0116, "step": 62390 }, { "epoch": 410.5263157894737, "grad_norm": 1.5460284948349, "learning_rate": 0.0001, "loss": 0.0117, "step": 62400 }, { "epoch": 410.5921052631579, "grad_norm": 1.3457225561141968, "learning_rate": 0.0001, "loss": 0.0115, "step": 62410 }, { "epoch": 410.6578947368421, "grad_norm": 1.2957866191864014, "learning_rate": 0.0001, "loss": 0.0103, "step": 62420 }, { "epoch": 410.7236842105263, "grad_norm": 1.269675374031067, "learning_rate": 0.0001, "loss": 0.0117, "step": 62430 }, { "epoch": 410.7894736842105, "grad_norm": 1.0206223726272583, "learning_rate": 0.0001, "loss": 0.0136, "step": 62440 }, { "epoch": 410.85526315789474, "grad_norm": 1.2072843313217163, "learning_rate": 0.0001, "loss": 0.0129, "step": 62450 }, { "epoch": 410.92105263157896, "grad_norm": 0.9825667142868042, "learning_rate": 0.0001, "loss": 0.0125, "step": 62460 }, { "epoch": 410.9868421052632, "grad_norm": 1.3091356754302979, "learning_rate": 0.0001, "loss": 0.0121, "step": 62470 }, { "epoch": 411.05263157894734, "grad_norm": 1.3110969066619873, "learning_rate": 0.0001, "loss": 0.0105, "step": 62480 }, { "epoch": 411.11842105263156, "grad_norm": 1.2493022680282593, "learning_rate": 0.0001, "loss": 0.0131, "step": 62490 }, { "epoch": 411.1842105263158, "grad_norm": 1.1567295789718628, "learning_rate": 0.0001, "loss": 0.0131, "step": 62500 }, { "epoch": 411.25, "grad_norm": 1.2471519708633423, "learning_rate": 0.0001, "loss": 0.0122, "step": 62510 }, { "epoch": 411.3157894736842, "grad_norm": 0.6940414905548096, "learning_rate": 0.0001, "loss": 0.0122, "step": 62520 }, { "epoch": 411.38157894736844, "grad_norm": 0.9060186743736267, "learning_rate": 0.0001, "loss": 0.0116, "step": 62530 }, { "epoch": 411.44736842105266, "grad_norm": 1.0752925872802734, "learning_rate": 0.0001, "loss": 0.013, "step": 62540 }, { "epoch": 411.5131578947368, "grad_norm": 1.0197933912277222, "learning_rate": 0.0001, "loss": 0.0105, "step": 62550 }, { "epoch": 411.57894736842104, "grad_norm": 0.9554465413093567, "learning_rate": 0.0001, "loss": 0.0106, "step": 62560 }, { "epoch": 411.64473684210526, "grad_norm": 1.5054553747177124, "learning_rate": 0.0001, "loss": 0.0117, "step": 62570 }, { "epoch": 411.7105263157895, "grad_norm": 1.1572545766830444, "learning_rate": 0.0001, "loss": 0.0132, "step": 62580 }, { "epoch": 411.7763157894737, "grad_norm": 1.1669964790344238, "learning_rate": 0.0001, "loss": 0.0116, "step": 62590 }, { "epoch": 411.8421052631579, "grad_norm": 1.5255320072174072, "learning_rate": 0.0001, "loss": 0.0119, "step": 62600 }, { "epoch": 411.9078947368421, "grad_norm": 1.322806715965271, "learning_rate": 0.0001, "loss": 0.0123, "step": 62610 }, { "epoch": 411.9736842105263, "grad_norm": 1.0405142307281494, "learning_rate": 0.0001, "loss": 0.0131, "step": 62620 }, { "epoch": 412.0394736842105, "grad_norm": 1.3252968788146973, "learning_rate": 0.0001, "loss": 0.0117, "step": 62630 }, { "epoch": 412.10526315789474, "grad_norm": 0.9440876841545105, "learning_rate": 0.0001, "loss": 0.0128, "step": 62640 }, { "epoch": 412.17105263157896, "grad_norm": 1.6244385242462158, "learning_rate": 0.0001, "loss": 0.0119, "step": 62650 }, { "epoch": 412.2368421052632, "grad_norm": 0.9535412192344666, "learning_rate": 0.0001, "loss": 0.0132, "step": 62660 }, { "epoch": 412.30263157894734, "grad_norm": 1.2329378128051758, "learning_rate": 0.0001, "loss": 0.0108, "step": 62670 }, { "epoch": 412.36842105263156, "grad_norm": 0.9353330135345459, "learning_rate": 0.0001, "loss": 0.0139, "step": 62680 }, { "epoch": 412.4342105263158, "grad_norm": 1.4033151865005493, "learning_rate": 0.0001, "loss": 0.0106, "step": 62690 }, { "epoch": 412.5, "grad_norm": 1.1142581701278687, "learning_rate": 0.0001, "loss": 0.0127, "step": 62700 }, { "epoch": 412.5657894736842, "grad_norm": 1.250436544418335, "learning_rate": 0.0001, "loss": 0.0107, "step": 62710 }, { "epoch": 412.63157894736844, "grad_norm": 1.180382490158081, "learning_rate": 0.0001, "loss": 0.0118, "step": 62720 }, { "epoch": 412.69736842105266, "grad_norm": 1.2273600101470947, "learning_rate": 0.0001, "loss": 0.0111, "step": 62730 }, { "epoch": 412.7631578947368, "grad_norm": 0.9863313436508179, "learning_rate": 0.0001, "loss": 0.0127, "step": 62740 }, { "epoch": 412.82894736842104, "grad_norm": 1.3861109018325806, "learning_rate": 0.0001, "loss": 0.0125, "step": 62750 }, { "epoch": 412.89473684210526, "grad_norm": 1.238087773323059, "learning_rate": 0.0001, "loss": 0.0133, "step": 62760 }, { "epoch": 412.9605263157895, "grad_norm": 1.0432476997375488, "learning_rate": 0.0001, "loss": 0.0106, "step": 62770 }, { "epoch": 413.0263157894737, "grad_norm": 1.2043246030807495, "learning_rate": 0.0001, "loss": 0.0117, "step": 62780 }, { "epoch": 413.0921052631579, "grad_norm": 1.2075798511505127, "learning_rate": 0.0001, "loss": 0.0114, "step": 62790 }, { "epoch": 413.1578947368421, "grad_norm": 0.8619531393051147, "learning_rate": 0.0001, "loss": 0.0123, "step": 62800 }, { "epoch": 413.2236842105263, "grad_norm": 1.2081847190856934, "learning_rate": 0.0001, "loss": 0.0119, "step": 62810 }, { "epoch": 413.2894736842105, "grad_norm": 1.0683201551437378, "learning_rate": 0.0001, "loss": 0.0125, "step": 62820 }, { "epoch": 413.35526315789474, "grad_norm": 1.2931760549545288, "learning_rate": 0.0001, "loss": 0.0103, "step": 62830 }, { "epoch": 413.42105263157896, "grad_norm": 0.9082301259040833, "learning_rate": 0.0001, "loss": 0.0132, "step": 62840 }, { "epoch": 413.4868421052632, "grad_norm": 1.485244631767273, "learning_rate": 0.0001, "loss": 0.0145, "step": 62850 }, { "epoch": 413.55263157894734, "grad_norm": 1.0334351062774658, "learning_rate": 0.0001, "loss": 0.0101, "step": 62860 }, { "epoch": 413.61842105263156, "grad_norm": 1.2499173879623413, "learning_rate": 0.0001, "loss": 0.0113, "step": 62870 }, { "epoch": 413.6842105263158, "grad_norm": 1.141554832458496, "learning_rate": 0.0001, "loss": 0.0127, "step": 62880 }, { "epoch": 413.75, "grad_norm": 1.2541395425796509, "learning_rate": 0.0001, "loss": 0.0109, "step": 62890 }, { "epoch": 413.8157894736842, "grad_norm": 1.2289867401123047, "learning_rate": 0.0001, "loss": 0.0105, "step": 62900 }, { "epoch": 413.88157894736844, "grad_norm": 1.5980114936828613, "learning_rate": 0.0001, "loss": 0.0144, "step": 62910 }, { "epoch": 413.94736842105266, "grad_norm": 0.9283551573753357, "learning_rate": 0.0001, "loss": 0.0128, "step": 62920 }, { "epoch": 414.0131578947368, "grad_norm": 1.0979995727539062, "learning_rate": 0.0001, "loss": 0.0114, "step": 62930 }, { "epoch": 414.07894736842104, "grad_norm": 0.8771706819534302, "learning_rate": 0.0001, "loss": 0.0127, "step": 62940 }, { "epoch": 414.14473684210526, "grad_norm": 1.0368901491165161, "learning_rate": 0.0001, "loss": 0.0128, "step": 62950 }, { "epoch": 414.2105263157895, "grad_norm": 1.0799942016601562, "learning_rate": 0.0001, "loss": 0.0154, "step": 62960 }, { "epoch": 414.2763157894737, "grad_norm": 1.2244056463241577, "learning_rate": 0.0001, "loss": 0.0125, "step": 62970 }, { "epoch": 414.3421052631579, "grad_norm": 0.9773150086402893, "learning_rate": 0.0001, "loss": 0.0115, "step": 62980 }, { "epoch": 414.4078947368421, "grad_norm": 1.0069860219955444, "learning_rate": 0.0001, "loss": 0.0117, "step": 62990 }, { "epoch": 414.4736842105263, "grad_norm": 0.9400529265403748, "learning_rate": 0.0001, "loss": 0.0135, "step": 63000 }, { "epoch": 414.5394736842105, "grad_norm": 1.3097772598266602, "learning_rate": 0.0001, "loss": 0.0137, "step": 63010 }, { "epoch": 414.60526315789474, "grad_norm": 1.036408543586731, "learning_rate": 0.0001, "loss": 0.0133, "step": 63020 }, { "epoch": 414.67105263157896, "grad_norm": 0.8755251169204712, "learning_rate": 0.0001, "loss": 0.0136, "step": 63030 }, { "epoch": 414.7368421052632, "grad_norm": 0.8445789813995361, "learning_rate": 0.0001, "loss": 0.014, "step": 63040 }, { "epoch": 414.80263157894734, "grad_norm": 1.4002634286880493, "learning_rate": 0.0001, "loss": 0.0135, "step": 63050 }, { "epoch": 414.86842105263156, "grad_norm": 1.1597133874893188, "learning_rate": 0.0001, "loss": 0.0138, "step": 63060 }, { "epoch": 414.9342105263158, "grad_norm": 1.0446454286575317, "learning_rate": 0.0001, "loss": 0.0131, "step": 63070 }, { "epoch": 415.0, "grad_norm": 1.177336573600769, "learning_rate": 0.0001, "loss": 0.0132, "step": 63080 }, { "epoch": 415.0657894736842, "grad_norm": 1.3958925008773804, "learning_rate": 0.0001, "loss": 0.0137, "step": 63090 }, { "epoch": 415.13157894736844, "grad_norm": 1.2025551795959473, "learning_rate": 0.0001, "loss": 0.0117, "step": 63100 }, { "epoch": 415.19736842105266, "grad_norm": 1.3240025043487549, "learning_rate": 0.0001, "loss": 0.0141, "step": 63110 }, { "epoch": 415.2631578947368, "grad_norm": 1.2941898107528687, "learning_rate": 0.0001, "loss": 0.0129, "step": 63120 }, { "epoch": 415.32894736842104, "grad_norm": 1.2462248802185059, "learning_rate": 0.0001, "loss": 0.0153, "step": 63130 }, { "epoch": 415.39473684210526, "grad_norm": 1.523111343383789, "learning_rate": 0.0001, "loss": 0.0138, "step": 63140 }, { "epoch": 415.4605263157895, "grad_norm": 1.2437251806259155, "learning_rate": 0.0001, "loss": 0.0133, "step": 63150 }, { "epoch": 415.5263157894737, "grad_norm": 1.0009071826934814, "learning_rate": 0.0001, "loss": 0.0122, "step": 63160 }, { "epoch": 415.5921052631579, "grad_norm": 1.008365511894226, "learning_rate": 0.0001, "loss": 0.0125, "step": 63170 }, { "epoch": 415.6578947368421, "grad_norm": 1.136559009552002, "learning_rate": 0.0001, "loss": 0.0116, "step": 63180 }, { "epoch": 415.7236842105263, "grad_norm": 0.6877762675285339, "learning_rate": 0.0001, "loss": 0.0129, "step": 63190 }, { "epoch": 415.7894736842105, "grad_norm": 1.1004842519760132, "learning_rate": 0.0001, "loss": 0.0145, "step": 63200 }, { "epoch": 415.85526315789474, "grad_norm": 0.9160225987434387, "learning_rate": 0.0001, "loss": 0.0125, "step": 63210 }, { "epoch": 415.92105263157896, "grad_norm": 1.2179346084594727, "learning_rate": 0.0001, "loss": 0.0111, "step": 63220 }, { "epoch": 415.9868421052632, "grad_norm": 1.299672245979309, "learning_rate": 0.0001, "loss": 0.0139, "step": 63230 }, { "epoch": 416.05263157894734, "grad_norm": 1.1643931865692139, "learning_rate": 0.0001, "loss": 0.0139, "step": 63240 }, { "epoch": 416.11842105263156, "grad_norm": 1.36052405834198, "learning_rate": 0.0001, "loss": 0.0128, "step": 63250 }, { "epoch": 416.1842105263158, "grad_norm": 1.2271571159362793, "learning_rate": 0.0001, "loss": 0.0145, "step": 63260 }, { "epoch": 416.25, "grad_norm": 1.2492791414260864, "learning_rate": 0.0001, "loss": 0.0138, "step": 63270 }, { "epoch": 416.3157894736842, "grad_norm": 1.2201145887374878, "learning_rate": 0.0001, "loss": 0.0125, "step": 63280 }, { "epoch": 416.38157894736844, "grad_norm": 1.615103006362915, "learning_rate": 0.0001, "loss": 0.013, "step": 63290 }, { "epoch": 416.44736842105266, "grad_norm": 1.0031245946884155, "learning_rate": 0.0001, "loss": 0.012, "step": 63300 }, { "epoch": 416.5131578947368, "grad_norm": 1.1543900966644287, "learning_rate": 0.0001, "loss": 0.0109, "step": 63310 }, { "epoch": 416.57894736842104, "grad_norm": 1.0617386102676392, "learning_rate": 0.0001, "loss": 0.0116, "step": 63320 }, { "epoch": 416.64473684210526, "grad_norm": 1.2433397769927979, "learning_rate": 0.0001, "loss": 0.0124, "step": 63330 }, { "epoch": 416.7105263157895, "grad_norm": 1.307660698890686, "learning_rate": 0.0001, "loss": 0.0113, "step": 63340 }, { "epoch": 416.7763157894737, "grad_norm": 1.402388334274292, "learning_rate": 0.0001, "loss": 0.0144, "step": 63350 }, { "epoch": 416.8421052631579, "grad_norm": 1.2867043018341064, "learning_rate": 0.0001, "loss": 0.0138, "step": 63360 }, { "epoch": 416.9078947368421, "grad_norm": 1.1766114234924316, "learning_rate": 0.0001, "loss": 0.0131, "step": 63370 }, { "epoch": 416.9736842105263, "grad_norm": 1.2115635871887207, "learning_rate": 0.0001, "loss": 0.0106, "step": 63380 }, { "epoch": 417.0394736842105, "grad_norm": 1.2058789730072021, "learning_rate": 0.0001, "loss": 0.0146, "step": 63390 }, { "epoch": 417.10526315789474, "grad_norm": 1.194610834121704, "learning_rate": 0.0001, "loss": 0.0113, "step": 63400 }, { "epoch": 417.17105263157896, "grad_norm": 0.9211453199386597, "learning_rate": 0.0001, "loss": 0.0126, "step": 63410 }, { "epoch": 417.2368421052632, "grad_norm": 1.093869924545288, "learning_rate": 0.0001, "loss": 0.0118, "step": 63420 }, { "epoch": 417.30263157894734, "grad_norm": 0.9869666695594788, "learning_rate": 0.0001, "loss": 0.0116, "step": 63430 }, { "epoch": 417.36842105263156, "grad_norm": 1.1537610292434692, "learning_rate": 0.0001, "loss": 0.0143, "step": 63440 }, { "epoch": 417.4342105263158, "grad_norm": 0.9441319108009338, "learning_rate": 0.0001, "loss": 0.0111, "step": 63450 }, { "epoch": 417.5, "grad_norm": 1.3330210447311401, "learning_rate": 0.0001, "loss": 0.01, "step": 63460 }, { "epoch": 417.5657894736842, "grad_norm": 1.4678821563720703, "learning_rate": 0.0001, "loss": 0.0128, "step": 63470 }, { "epoch": 417.63157894736844, "grad_norm": 1.0894778966903687, "learning_rate": 0.0001, "loss": 0.0129, "step": 63480 }, { "epoch": 417.69736842105266, "grad_norm": 1.3861744403839111, "learning_rate": 0.0001, "loss": 0.0113, "step": 63490 }, { "epoch": 417.7631578947368, "grad_norm": 1.1523667573928833, "learning_rate": 0.0001, "loss": 0.0125, "step": 63500 }, { "epoch": 417.82894736842104, "grad_norm": 1.058275818824768, "learning_rate": 0.0001, "loss": 0.0138, "step": 63510 }, { "epoch": 417.89473684210526, "grad_norm": 1.2407093048095703, "learning_rate": 0.0001, "loss": 0.0128, "step": 63520 }, { "epoch": 417.9605263157895, "grad_norm": 0.861865222454071, "learning_rate": 0.0001, "loss": 0.0149, "step": 63530 }, { "epoch": 418.0263157894737, "grad_norm": 1.3533257246017456, "learning_rate": 0.0001, "loss": 0.0123, "step": 63540 }, { "epoch": 418.0921052631579, "grad_norm": 0.8494672179222107, "learning_rate": 0.0001, "loss": 0.0113, "step": 63550 }, { "epoch": 418.1578947368421, "grad_norm": 0.9071248769760132, "learning_rate": 0.0001, "loss": 0.0108, "step": 63560 }, { "epoch": 418.2236842105263, "grad_norm": 1.4930342435836792, "learning_rate": 0.0001, "loss": 0.011, "step": 63570 }, { "epoch": 418.2894736842105, "grad_norm": 1.4266362190246582, "learning_rate": 0.0001, "loss": 0.0139, "step": 63580 }, { "epoch": 418.35526315789474, "grad_norm": 1.1286511421203613, "learning_rate": 0.0001, "loss": 0.0113, "step": 63590 }, { "epoch": 418.42105263157896, "grad_norm": 1.0800775289535522, "learning_rate": 0.0001, "loss": 0.0112, "step": 63600 }, { "epoch": 418.4868421052632, "grad_norm": 1.0998226404190063, "learning_rate": 0.0001, "loss": 0.0126, "step": 63610 }, { "epoch": 418.55263157894734, "grad_norm": 1.3095847368240356, "learning_rate": 0.0001, "loss": 0.0116, "step": 63620 }, { "epoch": 418.61842105263156, "grad_norm": 1.0724563598632812, "learning_rate": 0.0001, "loss": 0.0119, "step": 63630 }, { "epoch": 418.6842105263158, "grad_norm": 0.9985296726226807, "learning_rate": 0.0001, "loss": 0.0112, "step": 63640 }, { "epoch": 418.75, "grad_norm": 1.2534633874893188, "learning_rate": 0.0001, "loss": 0.0136, "step": 63650 }, { "epoch": 418.8157894736842, "grad_norm": 1.4504799842834473, "learning_rate": 0.0001, "loss": 0.0135, "step": 63660 }, { "epoch": 418.88157894736844, "grad_norm": 1.180578589439392, "learning_rate": 0.0001, "loss": 0.0149, "step": 63670 }, { "epoch": 418.94736842105266, "grad_norm": 1.6236175298690796, "learning_rate": 0.0001, "loss": 0.0134, "step": 63680 }, { "epoch": 419.0131578947368, "grad_norm": 1.1032344102859497, "learning_rate": 0.0001, "loss": 0.0137, "step": 63690 }, { "epoch": 419.07894736842104, "grad_norm": 1.1350429058074951, "learning_rate": 0.0001, "loss": 0.0116, "step": 63700 }, { "epoch": 419.14473684210526, "grad_norm": 1.1316243410110474, "learning_rate": 0.0001, "loss": 0.0115, "step": 63710 }, { "epoch": 419.2105263157895, "grad_norm": 1.0160430669784546, "learning_rate": 0.0001, "loss": 0.011, "step": 63720 }, { "epoch": 419.2763157894737, "grad_norm": 0.9599151015281677, "learning_rate": 0.0001, "loss": 0.0128, "step": 63730 }, { "epoch": 419.3421052631579, "grad_norm": 1.0853590965270996, "learning_rate": 0.0001, "loss": 0.0122, "step": 63740 }, { "epoch": 419.4078947368421, "grad_norm": 1.0868717432022095, "learning_rate": 0.0001, "loss": 0.0138, "step": 63750 }, { "epoch": 419.4736842105263, "grad_norm": 0.9881595373153687, "learning_rate": 0.0001, "loss": 0.0142, "step": 63760 }, { "epoch": 419.5394736842105, "grad_norm": 1.1161201000213623, "learning_rate": 0.0001, "loss": 0.0137, "step": 63770 }, { "epoch": 419.60526315789474, "grad_norm": 0.8587058186531067, "learning_rate": 0.0001, "loss": 0.0115, "step": 63780 }, { "epoch": 419.67105263157896, "grad_norm": 0.959867537021637, "learning_rate": 0.0001, "loss": 0.0123, "step": 63790 }, { "epoch": 419.7368421052632, "grad_norm": 1.1373491287231445, "learning_rate": 0.0001, "loss": 0.0125, "step": 63800 }, { "epoch": 419.80263157894734, "grad_norm": 1.2124371528625488, "learning_rate": 0.0001, "loss": 0.0127, "step": 63810 }, { "epoch": 419.86842105263156, "grad_norm": 1.1370086669921875, "learning_rate": 0.0001, "loss": 0.0117, "step": 63820 }, { "epoch": 419.9342105263158, "grad_norm": 1.2051914930343628, "learning_rate": 0.0001, "loss": 0.0127, "step": 63830 }, { "epoch": 420.0, "grad_norm": 1.598705530166626, "learning_rate": 0.0001, "loss": 0.0105, "step": 63840 }, { "epoch": 420.0657894736842, "grad_norm": 1.2667107582092285, "learning_rate": 0.0001, "loss": 0.0123, "step": 63850 }, { "epoch": 420.13157894736844, "grad_norm": 1.4038127660751343, "learning_rate": 0.0001, "loss": 0.0119, "step": 63860 }, { "epoch": 420.19736842105266, "grad_norm": 1.233218789100647, "learning_rate": 0.0001, "loss": 0.0122, "step": 63870 }, { "epoch": 420.2631578947368, "grad_norm": 1.2096288204193115, "learning_rate": 0.0001, "loss": 0.0119, "step": 63880 }, { "epoch": 420.32894736842104, "grad_norm": 1.050657033920288, "learning_rate": 0.0001, "loss": 0.012, "step": 63890 }, { "epoch": 420.39473684210526, "grad_norm": 1.7145295143127441, "learning_rate": 0.0001, "loss": 0.0126, "step": 63900 }, { "epoch": 420.4605263157895, "grad_norm": 1.58820378780365, "learning_rate": 0.0001, "loss": 0.0154, "step": 63910 }, { "epoch": 420.5263157894737, "grad_norm": 0.9367581009864807, "learning_rate": 0.0001, "loss": 0.0122, "step": 63920 }, { "epoch": 420.5921052631579, "grad_norm": 1.4414033889770508, "learning_rate": 0.0001, "loss": 0.0113, "step": 63930 }, { "epoch": 420.6578947368421, "grad_norm": 1.310313105583191, "learning_rate": 0.0001, "loss": 0.0137, "step": 63940 }, { "epoch": 420.7236842105263, "grad_norm": 0.850493848323822, "learning_rate": 0.0001, "loss": 0.0114, "step": 63950 }, { "epoch": 420.7894736842105, "grad_norm": 1.3179919719696045, "learning_rate": 0.0001, "loss": 0.0112, "step": 63960 }, { "epoch": 420.85526315789474, "grad_norm": 1.3989521265029907, "learning_rate": 0.0001, "loss": 0.0103, "step": 63970 }, { "epoch": 420.92105263157896, "grad_norm": 1.3190523386001587, "learning_rate": 0.0001, "loss": 0.0126, "step": 63980 }, { "epoch": 420.9868421052632, "grad_norm": 0.8655247688293457, "learning_rate": 0.0001, "loss": 0.0111, "step": 63990 }, { "epoch": 421.05263157894734, "grad_norm": 1.5341448783874512, "learning_rate": 0.0001, "loss": 0.0133, "step": 64000 }, { "epoch": 421.11842105263156, "grad_norm": 1.0463502407073975, "learning_rate": 0.0001, "loss": 0.0111, "step": 64010 }, { "epoch": 421.1842105263158, "grad_norm": 0.6681509613990784, "learning_rate": 0.0001, "loss": 0.0126, "step": 64020 }, { "epoch": 421.25, "grad_norm": 1.323490858078003, "learning_rate": 0.0001, "loss": 0.0117, "step": 64030 }, { "epoch": 421.3157894736842, "grad_norm": 0.8754792213439941, "learning_rate": 0.0001, "loss": 0.0114, "step": 64040 }, { "epoch": 421.38157894736844, "grad_norm": 1.2560418844223022, "learning_rate": 0.0001, "loss": 0.0122, "step": 64050 }, { "epoch": 421.44736842105266, "grad_norm": 1.2175922393798828, "learning_rate": 0.0001, "loss": 0.0129, "step": 64060 }, { "epoch": 421.5131578947368, "grad_norm": 1.4118285179138184, "learning_rate": 0.0001, "loss": 0.0131, "step": 64070 }, { "epoch": 421.57894736842104, "grad_norm": 1.3266385793685913, "learning_rate": 0.0001, "loss": 0.0113, "step": 64080 }, { "epoch": 421.64473684210526, "grad_norm": 1.113729476928711, "learning_rate": 0.0001, "loss": 0.0117, "step": 64090 }, { "epoch": 421.7105263157895, "grad_norm": 1.134850025177002, "learning_rate": 0.0001, "loss": 0.0129, "step": 64100 }, { "epoch": 421.7763157894737, "grad_norm": 0.9437716603279114, "learning_rate": 0.0001, "loss": 0.0132, "step": 64110 }, { "epoch": 421.8421052631579, "grad_norm": 1.0176291465759277, "learning_rate": 0.0001, "loss": 0.0117, "step": 64120 }, { "epoch": 421.9078947368421, "grad_norm": 1.2647275924682617, "learning_rate": 0.0001, "loss": 0.0155, "step": 64130 }, { "epoch": 421.9736842105263, "grad_norm": 0.7509058713912964, "learning_rate": 0.0001, "loss": 0.0137, "step": 64140 }, { "epoch": 422.0394736842105, "grad_norm": 0.9064149260520935, "learning_rate": 0.0001, "loss": 0.0127, "step": 64150 }, { "epoch": 422.10526315789474, "grad_norm": 1.20452880859375, "learning_rate": 0.0001, "loss": 0.0159, "step": 64160 }, { "epoch": 422.17105263157896, "grad_norm": 1.04318368434906, "learning_rate": 0.0001, "loss": 0.0166, "step": 64170 }, { "epoch": 422.2368421052632, "grad_norm": 1.1167349815368652, "learning_rate": 0.0001, "loss": 0.0157, "step": 64180 }, { "epoch": 422.30263157894734, "grad_norm": 1.3725717067718506, "learning_rate": 0.0001, "loss": 0.0131, "step": 64190 }, { "epoch": 422.36842105263156, "grad_norm": 1.0042188167572021, "learning_rate": 0.0001, "loss": 0.0143, "step": 64200 }, { "epoch": 422.4342105263158, "grad_norm": 0.822090208530426, "learning_rate": 0.0001, "loss": 0.0137, "step": 64210 }, { "epoch": 422.5, "grad_norm": 1.2058206796646118, "learning_rate": 0.0001, "loss": 0.0144, "step": 64220 }, { "epoch": 422.5657894736842, "grad_norm": 0.9390090703964233, "learning_rate": 0.0001, "loss": 0.0139, "step": 64230 }, { "epoch": 422.63157894736844, "grad_norm": 1.168169379234314, "learning_rate": 0.0001, "loss": 0.0141, "step": 64240 }, { "epoch": 422.69736842105266, "grad_norm": 1.0627769231796265, "learning_rate": 0.0001, "loss": 0.0125, "step": 64250 }, { "epoch": 422.7631578947368, "grad_norm": 0.819271981716156, "learning_rate": 0.0001, "loss": 0.0121, "step": 64260 }, { "epoch": 422.82894736842104, "grad_norm": 1.1444475650787354, "learning_rate": 0.0001, "loss": 0.0152, "step": 64270 }, { "epoch": 422.89473684210526, "grad_norm": 1.187440276145935, "learning_rate": 0.0001, "loss": 0.0119, "step": 64280 }, { "epoch": 422.9605263157895, "grad_norm": 1.1766507625579834, "learning_rate": 0.0001, "loss": 0.0149, "step": 64290 }, { "epoch": 423.0263157894737, "grad_norm": 1.2091718912124634, "learning_rate": 0.0001, "loss": 0.0121, "step": 64300 }, { "epoch": 423.0921052631579, "grad_norm": 0.8333958983421326, "learning_rate": 0.0001, "loss": 0.0134, "step": 64310 }, { "epoch": 423.1578947368421, "grad_norm": 1.0338822603225708, "learning_rate": 0.0001, "loss": 0.0133, "step": 64320 }, { "epoch": 423.2236842105263, "grad_norm": 1.3641862869262695, "learning_rate": 0.0001, "loss": 0.0129, "step": 64330 }, { "epoch": 423.2894736842105, "grad_norm": 1.1286553144454956, "learning_rate": 0.0001, "loss": 0.0144, "step": 64340 }, { "epoch": 423.35526315789474, "grad_norm": 1.0030713081359863, "learning_rate": 0.0001, "loss": 0.012, "step": 64350 }, { "epoch": 423.42105263157896, "grad_norm": 0.8953898549079895, "learning_rate": 0.0001, "loss": 0.0114, "step": 64360 }, { "epoch": 423.4868421052632, "grad_norm": 0.9812647700309753, "learning_rate": 0.0001, "loss": 0.0142, "step": 64370 }, { "epoch": 423.55263157894734, "grad_norm": 1.1874929666519165, "learning_rate": 0.0001, "loss": 0.0115, "step": 64380 }, { "epoch": 423.61842105263156, "grad_norm": 1.0838314294815063, "learning_rate": 0.0001, "loss": 0.0116, "step": 64390 }, { "epoch": 423.6842105263158, "grad_norm": 1.1809245347976685, "learning_rate": 0.0001, "loss": 0.0121, "step": 64400 }, { "epoch": 423.75, "grad_norm": 1.1488882303237915, "learning_rate": 0.0001, "loss": 0.0132, "step": 64410 }, { "epoch": 423.8157894736842, "grad_norm": 1.5134193897247314, "learning_rate": 0.0001, "loss": 0.0108, "step": 64420 }, { "epoch": 423.88157894736844, "grad_norm": 1.3163988590240479, "learning_rate": 0.0001, "loss": 0.0117, "step": 64430 }, { "epoch": 423.94736842105266, "grad_norm": 1.3154017925262451, "learning_rate": 0.0001, "loss": 0.0117, "step": 64440 }, { "epoch": 424.0131578947368, "grad_norm": 0.980704665184021, "learning_rate": 0.0001, "loss": 0.0128, "step": 64450 }, { "epoch": 424.07894736842104, "grad_norm": 1.4469650983810425, "learning_rate": 0.0001, "loss": 0.0148, "step": 64460 }, { "epoch": 424.14473684210526, "grad_norm": 1.6586270332336426, "learning_rate": 0.0001, "loss": 0.0111, "step": 64470 }, { "epoch": 424.2105263157895, "grad_norm": 1.164552092552185, "learning_rate": 0.0001, "loss": 0.0124, "step": 64480 }, { "epoch": 424.2763157894737, "grad_norm": 1.2287418842315674, "learning_rate": 0.0001, "loss": 0.0128, "step": 64490 }, { "epoch": 424.3421052631579, "grad_norm": 1.2211633920669556, "learning_rate": 0.0001, "loss": 0.0117, "step": 64500 }, { "epoch": 424.4078947368421, "grad_norm": 0.9924276471138, "learning_rate": 0.0001, "loss": 0.0111, "step": 64510 }, { "epoch": 424.4736842105263, "grad_norm": 1.504193663597107, "learning_rate": 0.0001, "loss": 0.0122, "step": 64520 }, { "epoch": 424.5394736842105, "grad_norm": 1.217288613319397, "learning_rate": 0.0001, "loss": 0.0121, "step": 64530 }, { "epoch": 424.60526315789474, "grad_norm": 0.943494439125061, "learning_rate": 0.0001, "loss": 0.0139, "step": 64540 }, { "epoch": 424.67105263157896, "grad_norm": 1.0527056455612183, "learning_rate": 0.0001, "loss": 0.0133, "step": 64550 }, { "epoch": 424.7368421052632, "grad_norm": 0.7593855261802673, "learning_rate": 0.0001, "loss": 0.0115, "step": 64560 }, { "epoch": 424.80263157894734, "grad_norm": 0.9078150391578674, "learning_rate": 0.0001, "loss": 0.0111, "step": 64570 }, { "epoch": 424.86842105263156, "grad_norm": 1.2566509246826172, "learning_rate": 0.0001, "loss": 0.013, "step": 64580 }, { "epoch": 424.9342105263158, "grad_norm": 0.9278975129127502, "learning_rate": 0.0001, "loss": 0.0124, "step": 64590 }, { "epoch": 425.0, "grad_norm": 1.0791572332382202, "learning_rate": 0.0001, "loss": 0.0126, "step": 64600 }, { "epoch": 425.0657894736842, "grad_norm": 1.1237635612487793, "learning_rate": 0.0001, "loss": 0.0118, "step": 64610 }, { "epoch": 425.13157894736844, "grad_norm": 1.0109001398086548, "learning_rate": 0.0001, "loss": 0.011, "step": 64620 }, { "epoch": 425.19736842105266, "grad_norm": 1.1927449703216553, "learning_rate": 0.0001, "loss": 0.0146, "step": 64630 }, { "epoch": 425.2631578947368, "grad_norm": 0.9462395310401917, "learning_rate": 0.0001, "loss": 0.0125, "step": 64640 }, { "epoch": 425.32894736842104, "grad_norm": 1.2187925577163696, "learning_rate": 0.0001, "loss": 0.0127, "step": 64650 }, { "epoch": 425.39473684210526, "grad_norm": 0.9968974590301514, "learning_rate": 0.0001, "loss": 0.0141, "step": 64660 }, { "epoch": 425.4605263157895, "grad_norm": 1.0321686267852783, "learning_rate": 0.0001, "loss": 0.0115, "step": 64670 }, { "epoch": 425.5263157894737, "grad_norm": 0.9968619346618652, "learning_rate": 0.0001, "loss": 0.0139, "step": 64680 }, { "epoch": 425.5921052631579, "grad_norm": 1.1771488189697266, "learning_rate": 0.0001, "loss": 0.0125, "step": 64690 }, { "epoch": 425.6578947368421, "grad_norm": 1.3401336669921875, "learning_rate": 0.0001, "loss": 0.0122, "step": 64700 }, { "epoch": 425.7236842105263, "grad_norm": 1.4088808298110962, "learning_rate": 0.0001, "loss": 0.0131, "step": 64710 }, { "epoch": 425.7894736842105, "grad_norm": 0.976675808429718, "learning_rate": 0.0001, "loss": 0.0112, "step": 64720 }, { "epoch": 425.85526315789474, "grad_norm": 1.2826030254364014, "learning_rate": 0.0001, "loss": 0.011, "step": 64730 }, { "epoch": 425.92105263157896, "grad_norm": 0.7183810472488403, "learning_rate": 0.0001, "loss": 0.0139, "step": 64740 }, { "epoch": 425.9868421052632, "grad_norm": 1.3914217948913574, "learning_rate": 0.0001, "loss": 0.0113, "step": 64750 }, { "epoch": 426.05263157894734, "grad_norm": 1.1009012460708618, "learning_rate": 0.0001, "loss": 0.0131, "step": 64760 }, { "epoch": 426.11842105263156, "grad_norm": 1.2613898515701294, "learning_rate": 0.0001, "loss": 0.0116, "step": 64770 }, { "epoch": 426.1842105263158, "grad_norm": 1.666262149810791, "learning_rate": 0.0001, "loss": 0.0115, "step": 64780 }, { "epoch": 426.25, "grad_norm": 1.3029481172561646, "learning_rate": 0.0001, "loss": 0.0123, "step": 64790 }, { "epoch": 426.3157894736842, "grad_norm": 1.0970615148544312, "learning_rate": 0.0001, "loss": 0.0106, "step": 64800 }, { "epoch": 426.38157894736844, "grad_norm": 0.8303019404411316, "learning_rate": 0.0001, "loss": 0.0113, "step": 64810 }, { "epoch": 426.44736842105266, "grad_norm": 1.0774695873260498, "learning_rate": 0.0001, "loss": 0.0112, "step": 64820 }, { "epoch": 426.5131578947368, "grad_norm": 1.2249752283096313, "learning_rate": 0.0001, "loss": 0.0121, "step": 64830 }, { "epoch": 426.57894736842104, "grad_norm": 1.4472177028656006, "learning_rate": 0.0001, "loss": 0.0114, "step": 64840 }, { "epoch": 426.64473684210526, "grad_norm": 1.1271650791168213, "learning_rate": 0.0001, "loss": 0.0113, "step": 64850 }, { "epoch": 426.7105263157895, "grad_norm": 1.2733358144760132, "learning_rate": 0.0001, "loss": 0.0141, "step": 64860 }, { "epoch": 426.7763157894737, "grad_norm": 0.8488833904266357, "learning_rate": 0.0001, "loss": 0.0115, "step": 64870 }, { "epoch": 426.8421052631579, "grad_norm": 0.9887498021125793, "learning_rate": 0.0001, "loss": 0.0154, "step": 64880 }, { "epoch": 426.9078947368421, "grad_norm": 1.160360336303711, "learning_rate": 0.0001, "loss": 0.0129, "step": 64890 }, { "epoch": 426.9736842105263, "grad_norm": 1.204254388809204, "learning_rate": 0.0001, "loss": 0.0115, "step": 64900 }, { "epoch": 427.0394736842105, "grad_norm": 1.3775442838668823, "learning_rate": 0.0001, "loss": 0.0109, "step": 64910 }, { "epoch": 427.10526315789474, "grad_norm": 1.2718161344528198, "learning_rate": 0.0001, "loss": 0.0121, "step": 64920 }, { "epoch": 427.17105263157896, "grad_norm": 1.3851796388626099, "learning_rate": 0.0001, "loss": 0.0133, "step": 64930 }, { "epoch": 427.2368421052632, "grad_norm": 1.498613715171814, "learning_rate": 0.0001, "loss": 0.0142, "step": 64940 }, { "epoch": 427.30263157894734, "grad_norm": 0.9563484191894531, "learning_rate": 0.0001, "loss": 0.0106, "step": 64950 }, { "epoch": 427.36842105263156, "grad_norm": 1.0629527568817139, "learning_rate": 0.0001, "loss": 0.0107, "step": 64960 }, { "epoch": 427.4342105263158, "grad_norm": 1.2376042604446411, "learning_rate": 0.0001, "loss": 0.0127, "step": 64970 }, { "epoch": 427.5, "grad_norm": 1.0314258337020874, "learning_rate": 0.0001, "loss": 0.0112, "step": 64980 }, { "epoch": 427.5657894736842, "grad_norm": 1.2879270315170288, "learning_rate": 0.0001, "loss": 0.0129, "step": 64990 }, { "epoch": 427.63157894736844, "grad_norm": 0.971474289894104, "learning_rate": 0.0001, "loss": 0.0137, "step": 65000 }, { "epoch": 427.69736842105266, "grad_norm": 0.9196737408638, "learning_rate": 0.0001, "loss": 0.0109, "step": 65010 }, { "epoch": 427.7631578947368, "grad_norm": 0.862593412399292, "learning_rate": 0.0001, "loss": 0.0117, "step": 65020 }, { "epoch": 427.82894736842104, "grad_norm": 1.113861083984375, "learning_rate": 0.0001, "loss": 0.0121, "step": 65030 }, { "epoch": 427.89473684210526, "grad_norm": 1.189150333404541, "learning_rate": 0.0001, "loss": 0.0117, "step": 65040 }, { "epoch": 427.9605263157895, "grad_norm": 1.0264296531677246, "learning_rate": 0.0001, "loss": 0.0107, "step": 65050 }, { "epoch": 428.0263157894737, "grad_norm": 1.2653850317001343, "learning_rate": 0.0001, "loss": 0.0133, "step": 65060 }, { "epoch": 428.0921052631579, "grad_norm": 1.260859489440918, "learning_rate": 0.0001, "loss": 0.0119, "step": 65070 }, { "epoch": 428.1578947368421, "grad_norm": 1.6127079725265503, "learning_rate": 0.0001, "loss": 0.014, "step": 65080 }, { "epoch": 428.2236842105263, "grad_norm": 1.2209359407424927, "learning_rate": 0.0001, "loss": 0.0114, "step": 65090 }, { "epoch": 428.2894736842105, "grad_norm": 1.2434695959091187, "learning_rate": 0.0001, "loss": 0.0129, "step": 65100 }, { "epoch": 428.35526315789474, "grad_norm": 1.273813009262085, "learning_rate": 0.0001, "loss": 0.0116, "step": 65110 }, { "epoch": 428.42105263157896, "grad_norm": 1.580181360244751, "learning_rate": 0.0001, "loss": 0.0107, "step": 65120 }, { "epoch": 428.4868421052632, "grad_norm": 1.0633710622787476, "learning_rate": 0.0001, "loss": 0.0132, "step": 65130 }, { "epoch": 428.55263157894734, "grad_norm": 1.0825523138046265, "learning_rate": 0.0001, "loss": 0.0112, "step": 65140 }, { "epoch": 428.61842105263156, "grad_norm": 1.143089771270752, "learning_rate": 0.0001, "loss": 0.0122, "step": 65150 }, { "epoch": 428.6842105263158, "grad_norm": 0.7292158603668213, "learning_rate": 0.0001, "loss": 0.0125, "step": 65160 }, { "epoch": 428.75, "grad_norm": 1.3087058067321777, "learning_rate": 0.0001, "loss": 0.0104, "step": 65170 }, { "epoch": 428.8157894736842, "grad_norm": 1.2735743522644043, "learning_rate": 0.0001, "loss": 0.0114, "step": 65180 }, { "epoch": 428.88157894736844, "grad_norm": 1.169930100440979, "learning_rate": 0.0001, "loss": 0.0112, "step": 65190 }, { "epoch": 428.94736842105266, "grad_norm": 1.1776015758514404, "learning_rate": 0.0001, "loss": 0.0117, "step": 65200 }, { "epoch": 429.0131578947368, "grad_norm": 1.1052436828613281, "learning_rate": 0.0001, "loss": 0.0113, "step": 65210 }, { "epoch": 429.07894736842104, "grad_norm": 0.8525586724281311, "learning_rate": 0.0001, "loss": 0.0135, "step": 65220 }, { "epoch": 429.14473684210526, "grad_norm": 0.9319892525672913, "learning_rate": 0.0001, "loss": 0.0133, "step": 65230 }, { "epoch": 429.2105263157895, "grad_norm": 0.9886743426322937, "learning_rate": 0.0001, "loss": 0.0107, "step": 65240 }, { "epoch": 429.2763157894737, "grad_norm": 0.850257158279419, "learning_rate": 0.0001, "loss": 0.0128, "step": 65250 }, { "epoch": 429.3421052631579, "grad_norm": 1.0104154348373413, "learning_rate": 0.0001, "loss": 0.0119, "step": 65260 }, { "epoch": 429.4078947368421, "grad_norm": 0.8095145225524902, "learning_rate": 0.0001, "loss": 0.0122, "step": 65270 }, { "epoch": 429.4736842105263, "grad_norm": 1.1854900121688843, "learning_rate": 0.0001, "loss": 0.0117, "step": 65280 }, { "epoch": 429.5394736842105, "grad_norm": 1.049673080444336, "learning_rate": 0.0001, "loss": 0.011, "step": 65290 }, { "epoch": 429.60526315789474, "grad_norm": 1.3695430755615234, "learning_rate": 0.0001, "loss": 0.0119, "step": 65300 }, { "epoch": 429.67105263157896, "grad_norm": 1.3096168041229248, "learning_rate": 0.0001, "loss": 0.0143, "step": 65310 }, { "epoch": 429.7368421052632, "grad_norm": 1.11677086353302, "learning_rate": 0.0001, "loss": 0.0102, "step": 65320 }, { "epoch": 429.80263157894734, "grad_norm": 1.0054956674575806, "learning_rate": 0.0001, "loss": 0.0127, "step": 65330 }, { "epoch": 429.86842105263156, "grad_norm": 1.0261496305465698, "learning_rate": 0.0001, "loss": 0.012, "step": 65340 }, { "epoch": 429.9342105263158, "grad_norm": 0.8702152967453003, "learning_rate": 0.0001, "loss": 0.0128, "step": 65350 }, { "epoch": 430.0, "grad_norm": 1.1188616752624512, "learning_rate": 0.0001, "loss": 0.0114, "step": 65360 }, { "epoch": 430.0657894736842, "grad_norm": 0.9277343153953552, "learning_rate": 0.0001, "loss": 0.0128, "step": 65370 }, { "epoch": 430.13157894736844, "grad_norm": 0.8487063646316528, "learning_rate": 0.0001, "loss": 0.0121, "step": 65380 }, { "epoch": 430.19736842105266, "grad_norm": 1.0749578475952148, "learning_rate": 0.0001, "loss": 0.0125, "step": 65390 }, { "epoch": 430.2631578947368, "grad_norm": 1.4141275882720947, "learning_rate": 0.0001, "loss": 0.0123, "step": 65400 }, { "epoch": 430.32894736842104, "grad_norm": 1.4153060913085938, "learning_rate": 0.0001, "loss": 0.0112, "step": 65410 }, { "epoch": 430.39473684210526, "grad_norm": 1.0824086666107178, "learning_rate": 0.0001, "loss": 0.0141, "step": 65420 }, { "epoch": 430.4605263157895, "grad_norm": 1.1492947340011597, "learning_rate": 0.0001, "loss": 0.0116, "step": 65430 }, { "epoch": 430.5263157894737, "grad_norm": 0.9777079820632935, "learning_rate": 0.0001, "loss": 0.0131, "step": 65440 }, { "epoch": 430.5921052631579, "grad_norm": 1.278460144996643, "learning_rate": 0.0001, "loss": 0.0126, "step": 65450 }, { "epoch": 430.6578947368421, "grad_norm": 1.0033302307128906, "learning_rate": 0.0001, "loss": 0.0105, "step": 65460 }, { "epoch": 430.7236842105263, "grad_norm": 1.1041438579559326, "learning_rate": 0.0001, "loss": 0.0112, "step": 65470 }, { "epoch": 430.7894736842105, "grad_norm": 1.6266603469848633, "learning_rate": 0.0001, "loss": 0.0141, "step": 65480 }, { "epoch": 430.85526315789474, "grad_norm": 1.2963449954986572, "learning_rate": 0.0001, "loss": 0.0126, "step": 65490 }, { "epoch": 430.92105263157896, "grad_norm": 1.226628303527832, "learning_rate": 0.0001, "loss": 0.0133, "step": 65500 }, { "epoch": 430.9868421052632, "grad_norm": 1.3734506368637085, "learning_rate": 0.0001, "loss": 0.0143, "step": 65510 }, { "epoch": 431.05263157894734, "grad_norm": 1.5446348190307617, "learning_rate": 0.0001, "loss": 0.0125, "step": 65520 }, { "epoch": 431.11842105263156, "grad_norm": 1.2585902214050293, "learning_rate": 0.0001, "loss": 0.0142, "step": 65530 }, { "epoch": 431.1842105263158, "grad_norm": 1.1303677558898926, "learning_rate": 0.0001, "loss": 0.0125, "step": 65540 }, { "epoch": 431.25, "grad_norm": 1.005300521850586, "learning_rate": 0.0001, "loss": 0.0121, "step": 65550 }, { "epoch": 431.3157894736842, "grad_norm": 1.325517177581787, "learning_rate": 0.0001, "loss": 0.0111, "step": 65560 }, { "epoch": 431.38157894736844, "grad_norm": 0.9297947883605957, "learning_rate": 0.0001, "loss": 0.014, "step": 65570 }, { "epoch": 431.44736842105266, "grad_norm": 1.0581676959991455, "learning_rate": 0.0001, "loss": 0.0131, "step": 65580 }, { "epoch": 431.5131578947368, "grad_norm": 0.9958070516586304, "learning_rate": 0.0001, "loss": 0.0109, "step": 65590 }, { "epoch": 431.57894736842104, "grad_norm": 1.3091784715652466, "learning_rate": 0.0001, "loss": 0.0129, "step": 65600 }, { "epoch": 431.64473684210526, "grad_norm": 1.091407060623169, "learning_rate": 0.0001, "loss": 0.0119, "step": 65610 }, { "epoch": 431.7105263157895, "grad_norm": 1.4302153587341309, "learning_rate": 0.0001, "loss": 0.012, "step": 65620 }, { "epoch": 431.7763157894737, "grad_norm": 1.0654196739196777, "learning_rate": 0.0001, "loss": 0.0121, "step": 65630 }, { "epoch": 431.8421052631579, "grad_norm": 1.413366436958313, "learning_rate": 0.0001, "loss": 0.0128, "step": 65640 }, { "epoch": 431.9078947368421, "grad_norm": 1.09171462059021, "learning_rate": 0.0001, "loss": 0.0122, "step": 65650 }, { "epoch": 431.9736842105263, "grad_norm": 1.5487229824066162, "learning_rate": 0.0001, "loss": 0.0127, "step": 65660 }, { "epoch": 432.0394736842105, "grad_norm": 1.2298110723495483, "learning_rate": 0.0001, "loss": 0.0123, "step": 65670 }, { "epoch": 432.10526315789474, "grad_norm": 1.2410331964492798, "learning_rate": 0.0001, "loss": 0.0115, "step": 65680 }, { "epoch": 432.17105263157896, "grad_norm": 1.3613755702972412, "learning_rate": 0.0001, "loss": 0.0113, "step": 65690 }, { "epoch": 432.2368421052632, "grad_norm": 1.2268283367156982, "learning_rate": 0.0001, "loss": 0.0123, "step": 65700 }, { "epoch": 432.30263157894734, "grad_norm": 1.8424831628799438, "learning_rate": 0.0001, "loss": 0.0131, "step": 65710 }, { "epoch": 432.36842105263156, "grad_norm": 1.5524377822875977, "learning_rate": 0.0001, "loss": 0.0139, "step": 65720 }, { "epoch": 432.4342105263158, "grad_norm": 1.7905634641647339, "learning_rate": 0.0001, "loss": 0.014, "step": 65730 }, { "epoch": 432.5, "grad_norm": 1.3174190521240234, "learning_rate": 0.0001, "loss": 0.0118, "step": 65740 }, { "epoch": 432.5657894736842, "grad_norm": 1.387406587600708, "learning_rate": 0.0001, "loss": 0.0111, "step": 65750 }, { "epoch": 432.63157894736844, "grad_norm": 1.184760570526123, "learning_rate": 0.0001, "loss": 0.0111, "step": 65760 }, { "epoch": 432.69736842105266, "grad_norm": 1.0794061422348022, "learning_rate": 0.0001, "loss": 0.0104, "step": 65770 }, { "epoch": 432.7631578947368, "grad_norm": 1.1350884437561035, "learning_rate": 0.0001, "loss": 0.0117, "step": 65780 }, { "epoch": 432.82894736842104, "grad_norm": 1.20332670211792, "learning_rate": 0.0001, "loss": 0.0103, "step": 65790 }, { "epoch": 432.89473684210526, "grad_norm": 1.2901190519332886, "learning_rate": 0.0001, "loss": 0.0112, "step": 65800 }, { "epoch": 432.9605263157895, "grad_norm": 0.93093341588974, "learning_rate": 0.0001, "loss": 0.0135, "step": 65810 }, { "epoch": 433.0263157894737, "grad_norm": 1.0623509883880615, "learning_rate": 0.0001, "loss": 0.0108, "step": 65820 }, { "epoch": 433.0921052631579, "grad_norm": 1.2282602787017822, "learning_rate": 0.0001, "loss": 0.0112, "step": 65830 }, { "epoch": 433.1578947368421, "grad_norm": 1.1212533712387085, "learning_rate": 0.0001, "loss": 0.0102, "step": 65840 }, { "epoch": 433.2236842105263, "grad_norm": 1.0316697359085083, "learning_rate": 0.0001, "loss": 0.0125, "step": 65850 }, { "epoch": 433.2894736842105, "grad_norm": 0.9327832460403442, "learning_rate": 0.0001, "loss": 0.013, "step": 65860 }, { "epoch": 433.35526315789474, "grad_norm": 1.0273808240890503, "learning_rate": 0.0001, "loss": 0.0102, "step": 65870 }, { "epoch": 433.42105263157896, "grad_norm": 0.8748319745063782, "learning_rate": 0.0001, "loss": 0.0129, "step": 65880 }, { "epoch": 433.4868421052632, "grad_norm": 0.8934853076934814, "learning_rate": 0.0001, "loss": 0.0132, "step": 65890 }, { "epoch": 433.55263157894734, "grad_norm": 0.9318240880966187, "learning_rate": 0.0001, "loss": 0.0117, "step": 65900 }, { "epoch": 433.61842105263156, "grad_norm": 1.1659666299819946, "learning_rate": 0.0001, "loss": 0.0113, "step": 65910 }, { "epoch": 433.6842105263158, "grad_norm": 0.9181238412857056, "learning_rate": 0.0001, "loss": 0.0132, "step": 65920 }, { "epoch": 433.75, "grad_norm": 0.6898985505104065, "learning_rate": 0.0001, "loss": 0.0127, "step": 65930 }, { "epoch": 433.8157894736842, "grad_norm": 0.89139324426651, "learning_rate": 0.0001, "loss": 0.0131, "step": 65940 }, { "epoch": 433.88157894736844, "grad_norm": 1.3110442161560059, "learning_rate": 0.0001, "loss": 0.0116, "step": 65950 }, { "epoch": 433.94736842105266, "grad_norm": 1.3319469690322876, "learning_rate": 0.0001, "loss": 0.0128, "step": 65960 }, { "epoch": 434.0131578947368, "grad_norm": 0.9168400168418884, "learning_rate": 0.0001, "loss": 0.013, "step": 65970 }, { "epoch": 434.07894736842104, "grad_norm": 1.151136875152588, "learning_rate": 0.0001, "loss": 0.0126, "step": 65980 }, { "epoch": 434.14473684210526, "grad_norm": 1.2665919065475464, "learning_rate": 0.0001, "loss": 0.0127, "step": 65990 }, { "epoch": 434.2105263157895, "grad_norm": 1.0252939462661743, "learning_rate": 0.0001, "loss": 0.014, "step": 66000 }, { "epoch": 434.2763157894737, "grad_norm": 1.1638813018798828, "learning_rate": 0.0001, "loss": 0.0126, "step": 66010 }, { "epoch": 434.3421052631579, "grad_norm": 0.9939305186271667, "learning_rate": 0.0001, "loss": 0.0125, "step": 66020 }, { "epoch": 434.4078947368421, "grad_norm": 1.4579282999038696, "learning_rate": 0.0001, "loss": 0.0121, "step": 66030 }, { "epoch": 434.4736842105263, "grad_norm": 1.1966112852096558, "learning_rate": 0.0001, "loss": 0.0129, "step": 66040 }, { "epoch": 434.5394736842105, "grad_norm": 1.0983270406723022, "learning_rate": 0.0001, "loss": 0.0126, "step": 66050 }, { "epoch": 434.60526315789474, "grad_norm": 1.0120450258255005, "learning_rate": 0.0001, "loss": 0.0105, "step": 66060 }, { "epoch": 434.67105263157896, "grad_norm": 1.0957145690917969, "learning_rate": 0.0001, "loss": 0.0121, "step": 66070 }, { "epoch": 434.7368421052632, "grad_norm": 1.030774474143982, "learning_rate": 0.0001, "loss": 0.0114, "step": 66080 }, { "epoch": 434.80263157894734, "grad_norm": 1.2620954513549805, "learning_rate": 0.0001, "loss": 0.013, "step": 66090 }, { "epoch": 434.86842105263156, "grad_norm": 0.9031997919082642, "learning_rate": 0.0001, "loss": 0.0138, "step": 66100 }, { "epoch": 434.9342105263158, "grad_norm": 1.1857893466949463, "learning_rate": 0.0001, "loss": 0.0116, "step": 66110 }, { "epoch": 435.0, "grad_norm": 1.083285927772522, "learning_rate": 0.0001, "loss": 0.0125, "step": 66120 }, { "epoch": 435.0657894736842, "grad_norm": 1.357467770576477, "learning_rate": 0.0001, "loss": 0.0124, "step": 66130 }, { "epoch": 435.13157894736844, "grad_norm": 1.4382094144821167, "learning_rate": 0.0001, "loss": 0.0138, "step": 66140 }, { "epoch": 435.19736842105266, "grad_norm": 1.0308399200439453, "learning_rate": 0.0001, "loss": 0.0111, "step": 66150 }, { "epoch": 435.2631578947368, "grad_norm": 1.303141713142395, "learning_rate": 0.0001, "loss": 0.0122, "step": 66160 }, { "epoch": 435.32894736842104, "grad_norm": 1.2150816917419434, "learning_rate": 0.0001, "loss": 0.0114, "step": 66170 }, { "epoch": 435.39473684210526, "grad_norm": 0.9982725977897644, "learning_rate": 0.0001, "loss": 0.0125, "step": 66180 }, { "epoch": 435.4605263157895, "grad_norm": 0.9391024112701416, "learning_rate": 0.0001, "loss": 0.0119, "step": 66190 }, { "epoch": 435.5263157894737, "grad_norm": 0.7449613809585571, "learning_rate": 0.0001, "loss": 0.0119, "step": 66200 }, { "epoch": 435.5921052631579, "grad_norm": 1.597276210784912, "learning_rate": 0.0001, "loss": 0.0153, "step": 66210 }, { "epoch": 435.6578947368421, "grad_norm": 1.5920016765594482, "learning_rate": 0.0001, "loss": 0.0119, "step": 66220 }, { "epoch": 435.7236842105263, "grad_norm": 1.151368260383606, "learning_rate": 0.0001, "loss": 0.0126, "step": 66230 }, { "epoch": 435.7894736842105, "grad_norm": 1.2534699440002441, "learning_rate": 0.0001, "loss": 0.0115, "step": 66240 }, { "epoch": 435.85526315789474, "grad_norm": 1.220913052558899, "learning_rate": 0.0001, "loss": 0.0139, "step": 66250 }, { "epoch": 435.92105263157896, "grad_norm": 1.2110836505889893, "learning_rate": 0.0001, "loss": 0.0105, "step": 66260 }, { "epoch": 435.9868421052632, "grad_norm": 0.7796202898025513, "learning_rate": 0.0001, "loss": 0.0131, "step": 66270 }, { "epoch": 436.05263157894734, "grad_norm": 1.0769124031066895, "learning_rate": 0.0001, "loss": 0.0118, "step": 66280 }, { "epoch": 436.11842105263156, "grad_norm": 0.732404887676239, "learning_rate": 0.0001, "loss": 0.0112, "step": 66290 }, { "epoch": 436.1842105263158, "grad_norm": 0.9934023022651672, "learning_rate": 0.0001, "loss": 0.0125, "step": 66300 }, { "epoch": 436.25, "grad_norm": 1.068806529045105, "learning_rate": 0.0001, "loss": 0.0119, "step": 66310 }, { "epoch": 436.3157894736842, "grad_norm": 0.819196343421936, "learning_rate": 0.0001, "loss": 0.0123, "step": 66320 }, { "epoch": 436.38157894736844, "grad_norm": 1.0213623046875, "learning_rate": 0.0001, "loss": 0.0118, "step": 66330 }, { "epoch": 436.44736842105266, "grad_norm": 1.4873377084732056, "learning_rate": 0.0001, "loss": 0.0123, "step": 66340 }, { "epoch": 436.5131578947368, "grad_norm": 0.9596549272537231, "learning_rate": 0.0001, "loss": 0.0144, "step": 66350 }, { "epoch": 436.57894736842104, "grad_norm": 1.3335829973220825, "learning_rate": 0.0001, "loss": 0.0123, "step": 66360 }, { "epoch": 436.64473684210526, "grad_norm": 1.1521795988082886, "learning_rate": 0.0001, "loss": 0.0111, "step": 66370 }, { "epoch": 436.7105263157895, "grad_norm": 1.2757539749145508, "learning_rate": 0.0001, "loss": 0.0137, "step": 66380 }, { "epoch": 436.7763157894737, "grad_norm": 1.143549919128418, "learning_rate": 0.0001, "loss": 0.0116, "step": 66390 }, { "epoch": 436.8421052631579, "grad_norm": 1.0633903741836548, "learning_rate": 0.0001, "loss": 0.0127, "step": 66400 }, { "epoch": 436.9078947368421, "grad_norm": 0.8937993049621582, "learning_rate": 0.0001, "loss": 0.011, "step": 66410 }, { "epoch": 436.9736842105263, "grad_norm": 1.0790894031524658, "learning_rate": 0.0001, "loss": 0.0147, "step": 66420 }, { "epoch": 437.0394736842105, "grad_norm": 1.1991922855377197, "learning_rate": 0.0001, "loss": 0.0138, "step": 66430 }, { "epoch": 437.10526315789474, "grad_norm": 1.1077334880828857, "learning_rate": 0.0001, "loss": 0.0128, "step": 66440 }, { "epoch": 437.17105263157896, "grad_norm": 0.9122137427330017, "learning_rate": 0.0001, "loss": 0.014, "step": 66450 }, { "epoch": 437.2368421052632, "grad_norm": 1.2790870666503906, "learning_rate": 0.0001, "loss": 0.0122, "step": 66460 }, { "epoch": 437.30263157894734, "grad_norm": 1.557098388671875, "learning_rate": 0.0001, "loss": 0.0121, "step": 66470 }, { "epoch": 437.36842105263156, "grad_norm": 1.4872727394104004, "learning_rate": 0.0001, "loss": 0.0129, "step": 66480 }, { "epoch": 437.4342105263158, "grad_norm": 1.1132712364196777, "learning_rate": 0.0001, "loss": 0.0121, "step": 66490 }, { "epoch": 437.5, "grad_norm": 1.3181506395339966, "learning_rate": 0.0001, "loss": 0.0128, "step": 66500 }, { "epoch": 437.5657894736842, "grad_norm": 0.976253867149353, "learning_rate": 0.0001, "loss": 0.0124, "step": 66510 }, { "epoch": 437.63157894736844, "grad_norm": 0.8042446970939636, "learning_rate": 0.0001, "loss": 0.0129, "step": 66520 }, { "epoch": 437.69736842105266, "grad_norm": 0.6995433568954468, "learning_rate": 0.0001, "loss": 0.0129, "step": 66530 }, { "epoch": 437.7631578947368, "grad_norm": 1.2673261165618896, "learning_rate": 0.0001, "loss": 0.013, "step": 66540 }, { "epoch": 437.82894736842104, "grad_norm": 1.1991665363311768, "learning_rate": 0.0001, "loss": 0.0111, "step": 66550 }, { "epoch": 437.89473684210526, "grad_norm": 1.2135796546936035, "learning_rate": 0.0001, "loss": 0.0129, "step": 66560 }, { "epoch": 437.9605263157895, "grad_norm": 1.4307721853256226, "learning_rate": 0.0001, "loss": 0.0111, "step": 66570 }, { "epoch": 438.0263157894737, "grad_norm": 1.061484694480896, "learning_rate": 0.0001, "loss": 0.0127, "step": 66580 }, { "epoch": 438.0921052631579, "grad_norm": 1.2536671161651611, "learning_rate": 0.0001, "loss": 0.013, "step": 66590 }, { "epoch": 438.1578947368421, "grad_norm": 1.211685299873352, "learning_rate": 0.0001, "loss": 0.0123, "step": 66600 }, { "epoch": 438.2236842105263, "grad_norm": 0.9517151117324829, "learning_rate": 0.0001, "loss": 0.0136, "step": 66610 }, { "epoch": 438.2894736842105, "grad_norm": 1.3910490274429321, "learning_rate": 0.0001, "loss": 0.0124, "step": 66620 }, { "epoch": 438.35526315789474, "grad_norm": 1.2333219051361084, "learning_rate": 0.0001, "loss": 0.0124, "step": 66630 }, { "epoch": 438.42105263157896, "grad_norm": 1.4747133255004883, "learning_rate": 0.0001, "loss": 0.0128, "step": 66640 }, { "epoch": 438.4868421052632, "grad_norm": 1.4502803087234497, "learning_rate": 0.0001, "loss": 0.0151, "step": 66650 }, { "epoch": 438.55263157894734, "grad_norm": 1.0021837949752808, "learning_rate": 0.0001, "loss": 0.0121, "step": 66660 }, { "epoch": 438.61842105263156, "grad_norm": 1.2110358476638794, "learning_rate": 0.0001, "loss": 0.0111, "step": 66670 }, { "epoch": 438.6842105263158, "grad_norm": 1.2951200008392334, "learning_rate": 0.0001, "loss": 0.0116, "step": 66680 }, { "epoch": 438.75, "grad_norm": 1.4728100299835205, "learning_rate": 0.0001, "loss": 0.0098, "step": 66690 }, { "epoch": 438.8157894736842, "grad_norm": 1.3396544456481934, "learning_rate": 0.0001, "loss": 0.0109, "step": 66700 }, { "epoch": 438.88157894736844, "grad_norm": 0.9344916343688965, "learning_rate": 0.0001, "loss": 0.0115, "step": 66710 }, { "epoch": 438.94736842105266, "grad_norm": 0.8151391744613647, "learning_rate": 0.0001, "loss": 0.0127, "step": 66720 }, { "epoch": 439.0131578947368, "grad_norm": 0.8161630630493164, "learning_rate": 0.0001, "loss": 0.0114, "step": 66730 }, { "epoch": 439.07894736842104, "grad_norm": 1.26533043384552, "learning_rate": 0.0001, "loss": 0.0129, "step": 66740 }, { "epoch": 439.14473684210526, "grad_norm": 1.3989818096160889, "learning_rate": 0.0001, "loss": 0.0134, "step": 66750 }, { "epoch": 439.2105263157895, "grad_norm": 1.301026463508606, "learning_rate": 0.0001, "loss": 0.0109, "step": 66760 }, { "epoch": 439.2763157894737, "grad_norm": 0.9039576649665833, "learning_rate": 0.0001, "loss": 0.0113, "step": 66770 }, { "epoch": 439.3421052631579, "grad_norm": 1.453680396080017, "learning_rate": 0.0001, "loss": 0.015, "step": 66780 }, { "epoch": 439.4078947368421, "grad_norm": 1.273836612701416, "learning_rate": 0.0001, "loss": 0.0126, "step": 66790 }, { "epoch": 439.4736842105263, "grad_norm": 1.5056235790252686, "learning_rate": 0.0001, "loss": 0.0113, "step": 66800 }, { "epoch": 439.5394736842105, "grad_norm": 1.0324246883392334, "learning_rate": 0.0001, "loss": 0.0133, "step": 66810 }, { "epoch": 439.60526315789474, "grad_norm": 1.2244229316711426, "learning_rate": 0.0001, "loss": 0.0122, "step": 66820 }, { "epoch": 439.67105263157896, "grad_norm": 1.2605669498443604, "learning_rate": 0.0001, "loss": 0.0112, "step": 66830 }, { "epoch": 439.7368421052632, "grad_norm": 1.2557387351989746, "learning_rate": 0.0001, "loss": 0.0116, "step": 66840 }, { "epoch": 439.80263157894734, "grad_norm": 1.641819953918457, "learning_rate": 0.0001, "loss": 0.0124, "step": 66850 }, { "epoch": 439.86842105263156, "grad_norm": 1.0714577436447144, "learning_rate": 0.0001, "loss": 0.01, "step": 66860 }, { "epoch": 439.9342105263158, "grad_norm": 1.5716043710708618, "learning_rate": 0.0001, "loss": 0.0112, "step": 66870 }, { "epoch": 440.0, "grad_norm": 1.3139293193817139, "learning_rate": 0.0001, "loss": 0.0125, "step": 66880 }, { "epoch": 440.0657894736842, "grad_norm": 1.3056998252868652, "learning_rate": 0.0001, "loss": 0.0123, "step": 66890 }, { "epoch": 440.13157894736844, "grad_norm": 1.1983897686004639, "learning_rate": 0.0001, "loss": 0.0125, "step": 66900 }, { "epoch": 440.19736842105266, "grad_norm": 1.2643394470214844, "learning_rate": 0.0001, "loss": 0.0119, "step": 66910 }, { "epoch": 440.2631578947368, "grad_norm": 1.501151204109192, "learning_rate": 0.0001, "loss": 0.0111, "step": 66920 }, { "epoch": 440.32894736842104, "grad_norm": 1.387885570526123, "learning_rate": 0.0001, "loss": 0.0135, "step": 66930 }, { "epoch": 440.39473684210526, "grad_norm": 1.3784682750701904, "learning_rate": 0.0001, "loss": 0.0136, "step": 66940 }, { "epoch": 440.4605263157895, "grad_norm": 0.9655961394309998, "learning_rate": 0.0001, "loss": 0.0111, "step": 66950 }, { "epoch": 440.5263157894737, "grad_norm": 0.8454356789588928, "learning_rate": 0.0001, "loss": 0.0124, "step": 66960 }, { "epoch": 440.5921052631579, "grad_norm": 1.001245141029358, "learning_rate": 0.0001, "loss": 0.0114, "step": 66970 }, { "epoch": 440.6578947368421, "grad_norm": 1.1278152465820312, "learning_rate": 0.0001, "loss": 0.0119, "step": 66980 }, { "epoch": 440.7236842105263, "grad_norm": 0.9985861778259277, "learning_rate": 0.0001, "loss": 0.0132, "step": 66990 }, { "epoch": 440.7894736842105, "grad_norm": 1.476648211479187, "learning_rate": 0.0001, "loss": 0.0125, "step": 67000 }, { "epoch": 440.85526315789474, "grad_norm": 1.0903433561325073, "learning_rate": 0.0001, "loss": 0.0129, "step": 67010 }, { "epoch": 440.92105263157896, "grad_norm": 1.2222623825073242, "learning_rate": 0.0001, "loss": 0.0117, "step": 67020 }, { "epoch": 440.9868421052632, "grad_norm": 1.298284888267517, "learning_rate": 0.0001, "loss": 0.0107, "step": 67030 }, { "epoch": 441.05263157894734, "grad_norm": 1.4199997186660767, "learning_rate": 0.0001, "loss": 0.012, "step": 67040 }, { "epoch": 441.11842105263156, "grad_norm": 0.9580684900283813, "learning_rate": 0.0001, "loss": 0.0129, "step": 67050 }, { "epoch": 441.1842105263158, "grad_norm": 1.2852739095687866, "learning_rate": 0.0001, "loss": 0.013, "step": 67060 }, { "epoch": 441.25, "grad_norm": 0.9127504825592041, "learning_rate": 0.0001, "loss": 0.0123, "step": 67070 }, { "epoch": 441.3157894736842, "grad_norm": 1.0054773092269897, "learning_rate": 0.0001, "loss": 0.0118, "step": 67080 }, { "epoch": 441.38157894736844, "grad_norm": 1.0165013074874878, "learning_rate": 0.0001, "loss": 0.0113, "step": 67090 }, { "epoch": 441.44736842105266, "grad_norm": 1.144066572189331, "learning_rate": 0.0001, "loss": 0.0136, "step": 67100 }, { "epoch": 441.5131578947368, "grad_norm": 0.9844335913658142, "learning_rate": 0.0001, "loss": 0.0113, "step": 67110 }, { "epoch": 441.57894736842104, "grad_norm": 1.0714805126190186, "learning_rate": 0.0001, "loss": 0.0119, "step": 67120 }, { "epoch": 441.64473684210526, "grad_norm": 1.3069833517074585, "learning_rate": 0.0001, "loss": 0.0131, "step": 67130 }, { "epoch": 441.7105263157895, "grad_norm": 1.189154028892517, "learning_rate": 0.0001, "loss": 0.0112, "step": 67140 }, { "epoch": 441.7763157894737, "grad_norm": 1.2548660039901733, "learning_rate": 0.0001, "loss": 0.0123, "step": 67150 }, { "epoch": 441.8421052631579, "grad_norm": 1.6267309188842773, "learning_rate": 0.0001, "loss": 0.0097, "step": 67160 }, { "epoch": 441.9078947368421, "grad_norm": 1.1438740491867065, "learning_rate": 0.0001, "loss": 0.0125, "step": 67170 }, { "epoch": 441.9736842105263, "grad_norm": 1.1230552196502686, "learning_rate": 0.0001, "loss": 0.0126, "step": 67180 }, { "epoch": 442.0394736842105, "grad_norm": 1.0231391191482544, "learning_rate": 0.0001, "loss": 0.013, "step": 67190 }, { "epoch": 442.10526315789474, "grad_norm": 1.1591103076934814, "learning_rate": 0.0001, "loss": 0.0113, "step": 67200 }, { "epoch": 442.17105263157896, "grad_norm": 1.3125256299972534, "learning_rate": 0.0001, "loss": 0.0132, "step": 67210 }, { "epoch": 442.2368421052632, "grad_norm": 0.9967691898345947, "learning_rate": 0.0001, "loss": 0.0118, "step": 67220 }, { "epoch": 442.30263157894734, "grad_norm": 1.2353827953338623, "learning_rate": 0.0001, "loss": 0.0109, "step": 67230 }, { "epoch": 442.36842105263156, "grad_norm": 0.9676706194877625, "learning_rate": 0.0001, "loss": 0.0126, "step": 67240 }, { "epoch": 442.4342105263158, "grad_norm": 0.89171302318573, "learning_rate": 0.0001, "loss": 0.0132, "step": 67250 }, { "epoch": 442.5, "grad_norm": 0.8268073797225952, "learning_rate": 0.0001, "loss": 0.0102, "step": 67260 }, { "epoch": 442.5657894736842, "grad_norm": 0.8292796015739441, "learning_rate": 0.0001, "loss": 0.012, "step": 67270 }, { "epoch": 442.63157894736844, "grad_norm": 1.1891852617263794, "learning_rate": 0.0001, "loss": 0.0118, "step": 67280 }, { "epoch": 442.69736842105266, "grad_norm": 1.0386085510253906, "learning_rate": 0.0001, "loss": 0.0142, "step": 67290 }, { "epoch": 442.7631578947368, "grad_norm": 1.4309957027435303, "learning_rate": 0.0001, "loss": 0.0135, "step": 67300 }, { "epoch": 442.82894736842104, "grad_norm": 1.046149730682373, "learning_rate": 0.0001, "loss": 0.0104, "step": 67310 }, { "epoch": 442.89473684210526, "grad_norm": 1.2012232542037964, "learning_rate": 0.0001, "loss": 0.012, "step": 67320 }, { "epoch": 442.9605263157895, "grad_norm": 0.963539183139801, "learning_rate": 0.0001, "loss": 0.0112, "step": 67330 }, { "epoch": 443.0263157894737, "grad_norm": 0.8693314790725708, "learning_rate": 0.0001, "loss": 0.0116, "step": 67340 }, { "epoch": 443.0921052631579, "grad_norm": 1.503976821899414, "learning_rate": 0.0001, "loss": 0.0123, "step": 67350 }, { "epoch": 443.1578947368421, "grad_norm": 0.8034859895706177, "learning_rate": 0.0001, "loss": 0.0104, "step": 67360 }, { "epoch": 443.2236842105263, "grad_norm": 1.4391164779663086, "learning_rate": 0.0001, "loss": 0.0127, "step": 67370 }, { "epoch": 443.2894736842105, "grad_norm": 1.2415558099746704, "learning_rate": 0.0001, "loss": 0.0132, "step": 67380 }, { "epoch": 443.35526315789474, "grad_norm": 1.0258815288543701, "learning_rate": 0.0001, "loss": 0.0122, "step": 67390 }, { "epoch": 443.42105263157896, "grad_norm": 0.7889366149902344, "learning_rate": 0.0001, "loss": 0.0125, "step": 67400 }, { "epoch": 443.4868421052632, "grad_norm": 0.8310263752937317, "learning_rate": 0.0001, "loss": 0.0122, "step": 67410 }, { "epoch": 443.55263157894734, "grad_norm": 1.102107286453247, "learning_rate": 0.0001, "loss": 0.0115, "step": 67420 }, { "epoch": 443.61842105263156, "grad_norm": 0.891818106174469, "learning_rate": 0.0001, "loss": 0.0105, "step": 67430 }, { "epoch": 443.6842105263158, "grad_norm": 1.2549525499343872, "learning_rate": 0.0001, "loss": 0.0121, "step": 67440 }, { "epoch": 443.75, "grad_norm": 1.475273847579956, "learning_rate": 0.0001, "loss": 0.0121, "step": 67450 }, { "epoch": 443.8157894736842, "grad_norm": 1.5199368000030518, "learning_rate": 0.0001, "loss": 0.013, "step": 67460 }, { "epoch": 443.88157894736844, "grad_norm": 1.0391242504119873, "learning_rate": 0.0001, "loss": 0.0115, "step": 67470 }, { "epoch": 443.94736842105266, "grad_norm": 1.2882357835769653, "learning_rate": 0.0001, "loss": 0.0121, "step": 67480 }, { "epoch": 444.0131578947368, "grad_norm": 1.4327231645584106, "learning_rate": 0.0001, "loss": 0.0126, "step": 67490 }, { "epoch": 444.07894736842104, "grad_norm": 1.25886869430542, "learning_rate": 0.0001, "loss": 0.0117, "step": 67500 }, { "epoch": 444.14473684210526, "grad_norm": 0.9697027802467346, "learning_rate": 0.0001, "loss": 0.0115, "step": 67510 }, { "epoch": 444.2105263157895, "grad_norm": 1.3872424364089966, "learning_rate": 0.0001, "loss": 0.0109, "step": 67520 }, { "epoch": 444.2763157894737, "grad_norm": 1.1043860912322998, "learning_rate": 0.0001, "loss": 0.0119, "step": 67530 }, { "epoch": 444.3421052631579, "grad_norm": 1.202193021774292, "learning_rate": 0.0001, "loss": 0.012, "step": 67540 }, { "epoch": 444.4078947368421, "grad_norm": 1.0167324542999268, "learning_rate": 0.0001, "loss": 0.011, "step": 67550 }, { "epoch": 444.4736842105263, "grad_norm": 0.9748640656471252, "learning_rate": 0.0001, "loss": 0.0127, "step": 67560 }, { "epoch": 444.5394736842105, "grad_norm": 1.1496931314468384, "learning_rate": 0.0001, "loss": 0.0129, "step": 67570 }, { "epoch": 444.60526315789474, "grad_norm": 1.193697452545166, "learning_rate": 0.0001, "loss": 0.0133, "step": 67580 }, { "epoch": 444.67105263157896, "grad_norm": 1.2025853395462036, "learning_rate": 0.0001, "loss": 0.0127, "step": 67590 }, { "epoch": 444.7368421052632, "grad_norm": 0.9726876020431519, "learning_rate": 0.0001, "loss": 0.0112, "step": 67600 }, { "epoch": 444.80263157894734, "grad_norm": 0.8311697840690613, "learning_rate": 0.0001, "loss": 0.0136, "step": 67610 }, { "epoch": 444.86842105263156, "grad_norm": 0.8062876462936401, "learning_rate": 0.0001, "loss": 0.0121, "step": 67620 }, { "epoch": 444.9342105263158, "grad_norm": 0.8064841628074646, "learning_rate": 0.0001, "loss": 0.0103, "step": 67630 }, { "epoch": 445.0, "grad_norm": 0.9301232695579529, "learning_rate": 0.0001, "loss": 0.0134, "step": 67640 }, { "epoch": 445.0657894736842, "grad_norm": 1.1378428936004639, "learning_rate": 0.0001, "loss": 0.0117, "step": 67650 }, { "epoch": 445.13157894736844, "grad_norm": 1.1521905660629272, "learning_rate": 0.0001, "loss": 0.0133, "step": 67660 }, { "epoch": 445.19736842105266, "grad_norm": 1.2799955606460571, "learning_rate": 0.0001, "loss": 0.0115, "step": 67670 }, { "epoch": 445.2631578947368, "grad_norm": 1.008537769317627, "learning_rate": 0.0001, "loss": 0.0136, "step": 67680 }, { "epoch": 445.32894736842104, "grad_norm": 1.2457149028778076, "learning_rate": 0.0001, "loss": 0.0115, "step": 67690 }, { "epoch": 445.39473684210526, "grad_norm": 1.1175915002822876, "learning_rate": 0.0001, "loss": 0.0123, "step": 67700 }, { "epoch": 445.4605263157895, "grad_norm": 1.0198794603347778, "learning_rate": 0.0001, "loss": 0.0125, "step": 67710 }, { "epoch": 445.5263157894737, "grad_norm": 1.1624159812927246, "learning_rate": 0.0001, "loss": 0.0126, "step": 67720 }, { "epoch": 445.5921052631579, "grad_norm": 1.1959896087646484, "learning_rate": 0.0001, "loss": 0.0111, "step": 67730 }, { "epoch": 445.6578947368421, "grad_norm": 1.2008717060089111, "learning_rate": 0.0001, "loss": 0.0136, "step": 67740 }, { "epoch": 445.7236842105263, "grad_norm": 1.321126937866211, "learning_rate": 0.0001, "loss": 0.0137, "step": 67750 }, { "epoch": 445.7894736842105, "grad_norm": 1.1660131216049194, "learning_rate": 0.0001, "loss": 0.0125, "step": 67760 }, { "epoch": 445.85526315789474, "grad_norm": 1.2999013662338257, "learning_rate": 0.0001, "loss": 0.012, "step": 67770 }, { "epoch": 445.92105263157896, "grad_norm": 0.8440237045288086, "learning_rate": 0.0001, "loss": 0.0129, "step": 67780 }, { "epoch": 445.9868421052632, "grad_norm": 1.0197548866271973, "learning_rate": 0.0001, "loss": 0.0109, "step": 67790 }, { "epoch": 446.05263157894734, "grad_norm": 1.0732282400131226, "learning_rate": 0.0001, "loss": 0.0128, "step": 67800 }, { "epoch": 446.11842105263156, "grad_norm": 1.2190271615982056, "learning_rate": 0.0001, "loss": 0.0128, "step": 67810 }, { "epoch": 446.1842105263158, "grad_norm": 1.098029375076294, "learning_rate": 0.0001, "loss": 0.0103, "step": 67820 }, { "epoch": 446.25, "grad_norm": 1.064057469367981, "learning_rate": 0.0001, "loss": 0.0119, "step": 67830 }, { "epoch": 446.3157894736842, "grad_norm": 1.0440378189086914, "learning_rate": 0.0001, "loss": 0.0114, "step": 67840 }, { "epoch": 446.38157894736844, "grad_norm": 1.2264587879180908, "learning_rate": 0.0001, "loss": 0.0113, "step": 67850 }, { "epoch": 446.44736842105266, "grad_norm": 0.7877911925315857, "learning_rate": 0.0001, "loss": 0.0153, "step": 67860 }, { "epoch": 446.5131578947368, "grad_norm": 1.3286290168762207, "learning_rate": 0.0001, "loss": 0.0122, "step": 67870 }, { "epoch": 446.57894736842104, "grad_norm": 0.9419989585876465, "learning_rate": 0.0001, "loss": 0.0123, "step": 67880 }, { "epoch": 446.64473684210526, "grad_norm": 1.018985629081726, "learning_rate": 0.0001, "loss": 0.0111, "step": 67890 }, { "epoch": 446.7105263157895, "grad_norm": 0.9366230964660645, "learning_rate": 0.0001, "loss": 0.01, "step": 67900 }, { "epoch": 446.7763157894737, "grad_norm": 0.7390005588531494, "learning_rate": 0.0001, "loss": 0.0144, "step": 67910 }, { "epoch": 446.8421052631579, "grad_norm": 0.7992033362388611, "learning_rate": 0.0001, "loss": 0.011, "step": 67920 }, { "epoch": 446.9078947368421, "grad_norm": 1.0633325576782227, "learning_rate": 0.0001, "loss": 0.0145, "step": 67930 }, { "epoch": 446.9736842105263, "grad_norm": 1.1128759384155273, "learning_rate": 0.0001, "loss": 0.0128, "step": 67940 }, { "epoch": 447.0394736842105, "grad_norm": 1.405713677406311, "learning_rate": 0.0001, "loss": 0.0127, "step": 67950 }, { "epoch": 447.10526315789474, "grad_norm": 1.0440222024917603, "learning_rate": 0.0001, "loss": 0.0143, "step": 67960 }, { "epoch": 447.17105263157896, "grad_norm": 1.1741487979888916, "learning_rate": 0.0001, "loss": 0.0145, "step": 67970 }, { "epoch": 447.2368421052632, "grad_norm": 1.0820094347000122, "learning_rate": 0.0001, "loss": 0.0127, "step": 67980 }, { "epoch": 447.30263157894734, "grad_norm": 1.0338839292526245, "learning_rate": 0.0001, "loss": 0.0121, "step": 67990 }, { "epoch": 447.36842105263156, "grad_norm": 1.3230096101760864, "learning_rate": 0.0001, "loss": 0.0145, "step": 68000 }, { "epoch": 447.4342105263158, "grad_norm": 0.8464519381523132, "learning_rate": 0.0001, "loss": 0.0111, "step": 68010 }, { "epoch": 447.5, "grad_norm": 1.3465580940246582, "learning_rate": 0.0001, "loss": 0.013, "step": 68020 }, { "epoch": 447.5657894736842, "grad_norm": 1.2503917217254639, "learning_rate": 0.0001, "loss": 0.0128, "step": 68030 }, { "epoch": 447.63157894736844, "grad_norm": 1.2269294261932373, "learning_rate": 0.0001, "loss": 0.0131, "step": 68040 }, { "epoch": 447.69736842105266, "grad_norm": 0.9101136922836304, "learning_rate": 0.0001, "loss": 0.0111, "step": 68050 }, { "epoch": 447.7631578947368, "grad_norm": 1.1109895706176758, "learning_rate": 0.0001, "loss": 0.0136, "step": 68060 }, { "epoch": 447.82894736842104, "grad_norm": 1.2800877094268799, "learning_rate": 0.0001, "loss": 0.0146, "step": 68070 }, { "epoch": 447.89473684210526, "grad_norm": 1.3578050136566162, "learning_rate": 0.0001, "loss": 0.011, "step": 68080 }, { "epoch": 447.9605263157895, "grad_norm": 1.1380995512008667, "learning_rate": 0.0001, "loss": 0.0134, "step": 68090 }, { "epoch": 448.0263157894737, "grad_norm": 1.510892629623413, "learning_rate": 0.0001, "loss": 0.0125, "step": 68100 }, { "epoch": 448.0921052631579, "grad_norm": 1.1936112642288208, "learning_rate": 0.0001, "loss": 0.0113, "step": 68110 }, { "epoch": 448.1578947368421, "grad_norm": 1.3569926023483276, "learning_rate": 0.0001, "loss": 0.0121, "step": 68120 }, { "epoch": 448.2236842105263, "grad_norm": 1.0964100360870361, "learning_rate": 0.0001, "loss": 0.0121, "step": 68130 }, { "epoch": 448.2894736842105, "grad_norm": 0.7216523289680481, "learning_rate": 0.0001, "loss": 0.0119, "step": 68140 }, { "epoch": 448.35526315789474, "grad_norm": 1.2212159633636475, "learning_rate": 0.0001, "loss": 0.0162, "step": 68150 }, { "epoch": 448.42105263157896, "grad_norm": 1.0280985832214355, "learning_rate": 0.0001, "loss": 0.0142, "step": 68160 }, { "epoch": 448.4868421052632, "grad_norm": 1.3027311563491821, "learning_rate": 0.0001, "loss": 0.0135, "step": 68170 }, { "epoch": 448.55263157894734, "grad_norm": 1.462243914604187, "learning_rate": 0.0001, "loss": 0.0121, "step": 68180 }, { "epoch": 448.61842105263156, "grad_norm": 1.231683373451233, "learning_rate": 0.0001, "loss": 0.0118, "step": 68190 }, { "epoch": 448.6842105263158, "grad_norm": 1.2947146892547607, "learning_rate": 0.0001, "loss": 0.0122, "step": 68200 }, { "epoch": 448.75, "grad_norm": 1.0188627243041992, "learning_rate": 0.0001, "loss": 0.0125, "step": 68210 }, { "epoch": 448.8157894736842, "grad_norm": 1.1780815124511719, "learning_rate": 0.0001, "loss": 0.0114, "step": 68220 }, { "epoch": 448.88157894736844, "grad_norm": 1.1166077852249146, "learning_rate": 0.0001, "loss": 0.0134, "step": 68230 }, { "epoch": 448.94736842105266, "grad_norm": 0.9867680668830872, "learning_rate": 0.0001, "loss": 0.0118, "step": 68240 }, { "epoch": 449.0131578947368, "grad_norm": 1.014134168624878, "learning_rate": 0.0001, "loss": 0.0097, "step": 68250 }, { "epoch": 449.07894736842104, "grad_norm": 0.8704817891120911, "learning_rate": 0.0001, "loss": 0.011, "step": 68260 }, { "epoch": 449.14473684210526, "grad_norm": 1.1056900024414062, "learning_rate": 0.0001, "loss": 0.0114, "step": 68270 }, { "epoch": 449.2105263157895, "grad_norm": 1.131632924079895, "learning_rate": 0.0001, "loss": 0.0145, "step": 68280 }, { "epoch": 449.2763157894737, "grad_norm": 1.3262722492218018, "learning_rate": 0.0001, "loss": 0.0126, "step": 68290 }, { "epoch": 449.3421052631579, "grad_norm": 1.328003168106079, "learning_rate": 0.0001, "loss": 0.013, "step": 68300 }, { "epoch": 449.4078947368421, "grad_norm": 1.1644665002822876, "learning_rate": 0.0001, "loss": 0.0129, "step": 68310 }, { "epoch": 449.4736842105263, "grad_norm": 1.419346809387207, "learning_rate": 0.0001, "loss": 0.0138, "step": 68320 }, { "epoch": 449.5394736842105, "grad_norm": 0.9281734228134155, "learning_rate": 0.0001, "loss": 0.0112, "step": 68330 }, { "epoch": 449.60526315789474, "grad_norm": 1.394044280052185, "learning_rate": 0.0001, "loss": 0.0106, "step": 68340 }, { "epoch": 449.67105263157896, "grad_norm": 1.435802936553955, "learning_rate": 0.0001, "loss": 0.0123, "step": 68350 }, { "epoch": 449.7368421052632, "grad_norm": 1.2658971548080444, "learning_rate": 0.0001, "loss": 0.0117, "step": 68360 }, { "epoch": 449.80263157894734, "grad_norm": 1.4946037530899048, "learning_rate": 0.0001, "loss": 0.0131, "step": 68370 }, { "epoch": 449.86842105263156, "grad_norm": 0.7400012016296387, "learning_rate": 0.0001, "loss": 0.0122, "step": 68380 }, { "epoch": 449.9342105263158, "grad_norm": 1.1774287223815918, "learning_rate": 0.0001, "loss": 0.0094, "step": 68390 }, { "epoch": 450.0, "grad_norm": 1.2723482847213745, "learning_rate": 0.0001, "loss": 0.0114, "step": 68400 }, { "epoch": 450.0657894736842, "grad_norm": 1.0337828397750854, "learning_rate": 0.0001, "loss": 0.011, "step": 68410 }, { "epoch": 450.13157894736844, "grad_norm": 0.836054265499115, "learning_rate": 0.0001, "loss": 0.0123, "step": 68420 }, { "epoch": 450.19736842105266, "grad_norm": 1.1785285472869873, "learning_rate": 0.0001, "loss": 0.0123, "step": 68430 }, { "epoch": 450.2631578947368, "grad_norm": 1.1145614385604858, "learning_rate": 0.0001, "loss": 0.0134, "step": 68440 }, { "epoch": 450.32894736842104, "grad_norm": 1.3522475957870483, "learning_rate": 0.0001, "loss": 0.014, "step": 68450 }, { "epoch": 450.39473684210526, "grad_norm": 1.1406996250152588, "learning_rate": 0.0001, "loss": 0.012, "step": 68460 }, { "epoch": 450.4605263157895, "grad_norm": 1.2381994724273682, "learning_rate": 0.0001, "loss": 0.0117, "step": 68470 }, { "epoch": 450.5263157894737, "grad_norm": 1.4865307807922363, "learning_rate": 0.0001, "loss": 0.0121, "step": 68480 }, { "epoch": 450.5921052631579, "grad_norm": 1.2501206398010254, "learning_rate": 0.0001, "loss": 0.0114, "step": 68490 }, { "epoch": 450.6578947368421, "grad_norm": 1.3360950946807861, "learning_rate": 0.0001, "loss": 0.0121, "step": 68500 }, { "epoch": 450.7236842105263, "grad_norm": 1.5637435913085938, "learning_rate": 0.0001, "loss": 0.0124, "step": 68510 }, { "epoch": 450.7894736842105, "grad_norm": 1.383474349975586, "learning_rate": 0.0001, "loss": 0.0108, "step": 68520 }, { "epoch": 450.85526315789474, "grad_norm": 1.3459497690200806, "learning_rate": 0.0001, "loss": 0.0106, "step": 68530 }, { "epoch": 450.92105263157896, "grad_norm": 0.9514352679252625, "learning_rate": 0.0001, "loss": 0.0118, "step": 68540 }, { "epoch": 450.9868421052632, "grad_norm": 1.1830934286117554, "learning_rate": 0.0001, "loss": 0.0124, "step": 68550 }, { "epoch": 451.05263157894734, "grad_norm": 1.131381630897522, "learning_rate": 0.0001, "loss": 0.0113, "step": 68560 }, { "epoch": 451.11842105263156, "grad_norm": 0.9725364446640015, "learning_rate": 0.0001, "loss": 0.0117, "step": 68570 }, { "epoch": 451.1842105263158, "grad_norm": 1.4071309566497803, "learning_rate": 0.0001, "loss": 0.0132, "step": 68580 }, { "epoch": 451.25, "grad_norm": 1.218841552734375, "learning_rate": 0.0001, "loss": 0.012, "step": 68590 }, { "epoch": 451.3157894736842, "grad_norm": 1.2471004724502563, "learning_rate": 0.0001, "loss": 0.0108, "step": 68600 }, { "epoch": 451.38157894736844, "grad_norm": 1.2200270891189575, "learning_rate": 0.0001, "loss": 0.0128, "step": 68610 }, { "epoch": 451.44736842105266, "grad_norm": 0.7983037233352661, "learning_rate": 0.0001, "loss": 0.0127, "step": 68620 }, { "epoch": 451.5131578947368, "grad_norm": 1.0153566598892212, "learning_rate": 0.0001, "loss": 0.0137, "step": 68630 }, { "epoch": 451.57894736842104, "grad_norm": 0.8792093396186829, "learning_rate": 0.0001, "loss": 0.0133, "step": 68640 }, { "epoch": 451.64473684210526, "grad_norm": 1.1908783912658691, "learning_rate": 0.0001, "loss": 0.0122, "step": 68650 }, { "epoch": 451.7105263157895, "grad_norm": 1.2272428274154663, "learning_rate": 0.0001, "loss": 0.0115, "step": 68660 }, { "epoch": 451.7763157894737, "grad_norm": 0.9203829765319824, "learning_rate": 0.0001, "loss": 0.0113, "step": 68670 }, { "epoch": 451.8421052631579, "grad_norm": 1.4180694818496704, "learning_rate": 0.0001, "loss": 0.0098, "step": 68680 }, { "epoch": 451.9078947368421, "grad_norm": 0.8548322319984436, "learning_rate": 0.0001, "loss": 0.012, "step": 68690 }, { "epoch": 451.9736842105263, "grad_norm": 1.259071946144104, "learning_rate": 0.0001, "loss": 0.0113, "step": 68700 }, { "epoch": 452.0394736842105, "grad_norm": 1.4840049743652344, "learning_rate": 0.0001, "loss": 0.0119, "step": 68710 }, { "epoch": 452.10526315789474, "grad_norm": 0.9539170861244202, "learning_rate": 0.0001, "loss": 0.0119, "step": 68720 }, { "epoch": 452.17105263157896, "grad_norm": 1.287971019744873, "learning_rate": 0.0001, "loss": 0.0127, "step": 68730 }, { "epoch": 452.2368421052632, "grad_norm": 1.5346850156784058, "learning_rate": 0.0001, "loss": 0.0106, "step": 68740 }, { "epoch": 452.30263157894734, "grad_norm": 1.4423147439956665, "learning_rate": 0.0001, "loss": 0.0124, "step": 68750 }, { "epoch": 452.36842105263156, "grad_norm": 1.0236411094665527, "learning_rate": 0.0001, "loss": 0.0131, "step": 68760 }, { "epoch": 452.4342105263158, "grad_norm": 0.9541664719581604, "learning_rate": 0.0001, "loss": 0.0123, "step": 68770 }, { "epoch": 452.5, "grad_norm": 0.7958992719650269, "learning_rate": 0.0001, "loss": 0.012, "step": 68780 }, { "epoch": 452.5657894736842, "grad_norm": 1.2157082557678223, "learning_rate": 0.0001, "loss": 0.0134, "step": 68790 }, { "epoch": 452.63157894736844, "grad_norm": 0.8588937520980835, "learning_rate": 0.0001, "loss": 0.0136, "step": 68800 }, { "epoch": 452.69736842105266, "grad_norm": 1.442825436592102, "learning_rate": 0.0001, "loss": 0.0123, "step": 68810 }, { "epoch": 452.7631578947368, "grad_norm": 1.1870979070663452, "learning_rate": 0.0001, "loss": 0.0115, "step": 68820 }, { "epoch": 452.82894736842104, "grad_norm": 1.28781259059906, "learning_rate": 0.0001, "loss": 0.0112, "step": 68830 }, { "epoch": 452.89473684210526, "grad_norm": 0.9649443030357361, "learning_rate": 0.0001, "loss": 0.0119, "step": 68840 }, { "epoch": 452.9605263157895, "grad_norm": 0.9508198499679565, "learning_rate": 0.0001, "loss": 0.0132, "step": 68850 }, { "epoch": 453.0263157894737, "grad_norm": 1.5486016273498535, "learning_rate": 0.0001, "loss": 0.0119, "step": 68860 }, { "epoch": 453.0921052631579, "grad_norm": 1.217083215713501, "learning_rate": 0.0001, "loss": 0.0131, "step": 68870 }, { "epoch": 453.1578947368421, "grad_norm": 1.2917876243591309, "learning_rate": 0.0001, "loss": 0.0129, "step": 68880 }, { "epoch": 453.2236842105263, "grad_norm": 1.0706918239593506, "learning_rate": 0.0001, "loss": 0.0136, "step": 68890 }, { "epoch": 453.2894736842105, "grad_norm": 1.453224778175354, "learning_rate": 0.0001, "loss": 0.0133, "step": 68900 }, { "epoch": 453.35526315789474, "grad_norm": 1.209774374961853, "learning_rate": 0.0001, "loss": 0.0099, "step": 68910 }, { "epoch": 453.42105263157896, "grad_norm": 0.9678460955619812, "learning_rate": 0.0001, "loss": 0.0123, "step": 68920 }, { "epoch": 453.4868421052632, "grad_norm": 1.2222375869750977, "learning_rate": 0.0001, "loss": 0.011, "step": 68930 }, { "epoch": 453.55263157894734, "grad_norm": 0.9485657215118408, "learning_rate": 0.0001, "loss": 0.0107, "step": 68940 }, { "epoch": 453.61842105263156, "grad_norm": 1.2582299709320068, "learning_rate": 0.0001, "loss": 0.0119, "step": 68950 }, { "epoch": 453.6842105263158, "grad_norm": 1.2892476320266724, "learning_rate": 0.0001, "loss": 0.0108, "step": 68960 }, { "epoch": 453.75, "grad_norm": 1.3842599391937256, "learning_rate": 0.0001, "loss": 0.0128, "step": 68970 }, { "epoch": 453.8157894736842, "grad_norm": 1.095976710319519, "learning_rate": 0.0001, "loss": 0.0123, "step": 68980 }, { "epoch": 453.88157894736844, "grad_norm": 1.2450053691864014, "learning_rate": 0.0001, "loss": 0.0121, "step": 68990 }, { "epoch": 453.94736842105266, "grad_norm": 0.8790797591209412, "learning_rate": 0.0001, "loss": 0.011, "step": 69000 }, { "epoch": 454.0131578947368, "grad_norm": 1.2739403247833252, "learning_rate": 0.0001, "loss": 0.0127, "step": 69010 }, { "epoch": 454.07894736842104, "grad_norm": 1.2191137075424194, "learning_rate": 0.0001, "loss": 0.0118, "step": 69020 }, { "epoch": 454.14473684210526, "grad_norm": 1.4736047983169556, "learning_rate": 0.0001, "loss": 0.0142, "step": 69030 }, { "epoch": 454.2105263157895, "grad_norm": 0.9068169593811035, "learning_rate": 0.0001, "loss": 0.0125, "step": 69040 }, { "epoch": 454.2763157894737, "grad_norm": 1.5603197813034058, "learning_rate": 0.0001, "loss": 0.0111, "step": 69050 }, { "epoch": 454.3421052631579, "grad_norm": 1.4251413345336914, "learning_rate": 0.0001, "loss": 0.0127, "step": 69060 }, { "epoch": 454.4078947368421, "grad_norm": 1.295698642730713, "learning_rate": 0.0001, "loss": 0.0117, "step": 69070 }, { "epoch": 454.4736842105263, "grad_norm": 1.536226511001587, "learning_rate": 0.0001, "loss": 0.0109, "step": 69080 }, { "epoch": 454.5394736842105, "grad_norm": 1.6740570068359375, "learning_rate": 0.0001, "loss": 0.0105, "step": 69090 }, { "epoch": 454.60526315789474, "grad_norm": 1.091256856918335, "learning_rate": 0.0001, "loss": 0.0118, "step": 69100 }, { "epoch": 454.67105263157896, "grad_norm": 0.9562748670578003, "learning_rate": 0.0001, "loss": 0.0123, "step": 69110 }, { "epoch": 454.7368421052632, "grad_norm": 1.3585184812545776, "learning_rate": 0.0001, "loss": 0.0106, "step": 69120 }, { "epoch": 454.80263157894734, "grad_norm": 1.2994377613067627, "learning_rate": 0.0001, "loss": 0.013, "step": 69130 }, { "epoch": 454.86842105263156, "grad_norm": 1.152564525604248, "learning_rate": 0.0001, "loss": 0.0104, "step": 69140 }, { "epoch": 454.9342105263158, "grad_norm": 1.6018366813659668, "learning_rate": 0.0001, "loss": 0.0103, "step": 69150 }, { "epoch": 455.0, "grad_norm": 1.2731026411056519, "learning_rate": 0.0001, "loss": 0.012, "step": 69160 }, { "epoch": 455.0657894736842, "grad_norm": 1.0202285051345825, "learning_rate": 0.0001, "loss": 0.0107, "step": 69170 }, { "epoch": 455.13157894736844, "grad_norm": 1.0699840784072876, "learning_rate": 0.0001, "loss": 0.0104, "step": 69180 }, { "epoch": 455.19736842105266, "grad_norm": 1.36112380027771, "learning_rate": 0.0001, "loss": 0.013, "step": 69190 }, { "epoch": 455.2631578947368, "grad_norm": 1.1160879135131836, "learning_rate": 0.0001, "loss": 0.0111, "step": 69200 }, { "epoch": 455.32894736842104, "grad_norm": 1.1175243854522705, "learning_rate": 0.0001, "loss": 0.0125, "step": 69210 }, { "epoch": 455.39473684210526, "grad_norm": 1.1258723735809326, "learning_rate": 0.0001, "loss": 0.0124, "step": 69220 }, { "epoch": 455.4605263157895, "grad_norm": 1.0541523694992065, "learning_rate": 0.0001, "loss": 0.011, "step": 69230 }, { "epoch": 455.5263157894737, "grad_norm": 0.8870919942855835, "learning_rate": 0.0001, "loss": 0.0116, "step": 69240 }, { "epoch": 455.5921052631579, "grad_norm": 1.1106444597244263, "learning_rate": 0.0001, "loss": 0.0112, "step": 69250 }, { "epoch": 455.6578947368421, "grad_norm": 1.2560456991195679, "learning_rate": 0.0001, "loss": 0.0138, "step": 69260 }, { "epoch": 455.7236842105263, "grad_norm": 0.831874668598175, "learning_rate": 0.0001, "loss": 0.0115, "step": 69270 }, { "epoch": 455.7894736842105, "grad_norm": 1.3205691576004028, "learning_rate": 0.0001, "loss": 0.0143, "step": 69280 }, { "epoch": 455.85526315789474, "grad_norm": 1.1079440116882324, "learning_rate": 0.0001, "loss": 0.0117, "step": 69290 }, { "epoch": 455.92105263157896, "grad_norm": 1.0580300092697144, "learning_rate": 0.0001, "loss": 0.0118, "step": 69300 }, { "epoch": 455.9868421052632, "grad_norm": 1.5250871181488037, "learning_rate": 0.0001, "loss": 0.0095, "step": 69310 }, { "epoch": 456.05263157894734, "grad_norm": 1.4987032413482666, "learning_rate": 0.0001, "loss": 0.015, "step": 69320 }, { "epoch": 456.11842105263156, "grad_norm": 1.686471939086914, "learning_rate": 0.0001, "loss": 0.012, "step": 69330 }, { "epoch": 456.1842105263158, "grad_norm": 1.4890836477279663, "learning_rate": 0.0001, "loss": 0.0118, "step": 69340 }, { "epoch": 456.25, "grad_norm": 1.3004556894302368, "learning_rate": 0.0001, "loss": 0.0117, "step": 69350 }, { "epoch": 456.3157894736842, "grad_norm": 1.3867583274841309, "learning_rate": 0.0001, "loss": 0.0124, "step": 69360 }, { "epoch": 456.38157894736844, "grad_norm": 1.4305163621902466, "learning_rate": 0.0001, "loss": 0.0115, "step": 69370 }, { "epoch": 456.44736842105266, "grad_norm": 0.9518430233001709, "learning_rate": 0.0001, "loss": 0.0112, "step": 69380 }, { "epoch": 456.5131578947368, "grad_norm": 1.2672168016433716, "learning_rate": 0.0001, "loss": 0.0121, "step": 69390 }, { "epoch": 456.57894736842104, "grad_norm": 1.313014030456543, "learning_rate": 0.0001, "loss": 0.011, "step": 69400 }, { "epoch": 456.64473684210526, "grad_norm": 0.7348408699035645, "learning_rate": 0.0001, "loss": 0.0123, "step": 69410 }, { "epoch": 456.7105263157895, "grad_norm": 1.0799474716186523, "learning_rate": 0.0001, "loss": 0.0125, "step": 69420 }, { "epoch": 456.7763157894737, "grad_norm": 0.9113180637359619, "learning_rate": 0.0001, "loss": 0.0101, "step": 69430 }, { "epoch": 456.8421052631579, "grad_norm": 0.8408834934234619, "learning_rate": 0.0001, "loss": 0.0107, "step": 69440 }, { "epoch": 456.9078947368421, "grad_norm": 0.9288105964660645, "learning_rate": 0.0001, "loss": 0.0133, "step": 69450 }, { "epoch": 456.9736842105263, "grad_norm": 1.152346134185791, "learning_rate": 0.0001, "loss": 0.0105, "step": 69460 }, { "epoch": 457.0394736842105, "grad_norm": 1.110255241394043, "learning_rate": 0.0001, "loss": 0.0123, "step": 69470 }, { "epoch": 457.10526315789474, "grad_norm": 0.9234119057655334, "learning_rate": 0.0001, "loss": 0.0128, "step": 69480 }, { "epoch": 457.17105263157896, "grad_norm": 1.131162405014038, "learning_rate": 0.0001, "loss": 0.0137, "step": 69490 }, { "epoch": 457.2368421052632, "grad_norm": 1.2793774604797363, "learning_rate": 0.0001, "loss": 0.0117, "step": 69500 }, { "epoch": 457.30263157894734, "grad_norm": 0.9325578808784485, "learning_rate": 0.0001, "loss": 0.0119, "step": 69510 }, { "epoch": 457.36842105263156, "grad_norm": 0.8188279867172241, "learning_rate": 0.0001, "loss": 0.0134, "step": 69520 }, { "epoch": 457.4342105263158, "grad_norm": 1.163884162902832, "learning_rate": 0.0001, "loss": 0.0119, "step": 69530 }, { "epoch": 457.5, "grad_norm": 0.8260605335235596, "learning_rate": 0.0001, "loss": 0.0121, "step": 69540 }, { "epoch": 457.5657894736842, "grad_norm": 0.9177809357643127, "learning_rate": 0.0001, "loss": 0.0121, "step": 69550 }, { "epoch": 457.63157894736844, "grad_norm": 1.1885771751403809, "learning_rate": 0.0001, "loss": 0.0121, "step": 69560 }, { "epoch": 457.69736842105266, "grad_norm": 0.9311556816101074, "learning_rate": 0.0001, "loss": 0.0124, "step": 69570 }, { "epoch": 457.7631578947368, "grad_norm": 1.2804585695266724, "learning_rate": 0.0001, "loss": 0.0118, "step": 69580 }, { "epoch": 457.82894736842104, "grad_norm": 1.344652771949768, "learning_rate": 0.0001, "loss": 0.0131, "step": 69590 }, { "epoch": 457.89473684210526, "grad_norm": 1.4862884283065796, "learning_rate": 0.0001, "loss": 0.0116, "step": 69600 }, { "epoch": 457.9605263157895, "grad_norm": 1.1936073303222656, "learning_rate": 0.0001, "loss": 0.0133, "step": 69610 }, { "epoch": 458.0263157894737, "grad_norm": 1.193771243095398, "learning_rate": 0.0001, "loss": 0.0116, "step": 69620 }, { "epoch": 458.0921052631579, "grad_norm": 1.1889840364456177, "learning_rate": 0.0001, "loss": 0.0127, "step": 69630 }, { "epoch": 458.1578947368421, "grad_norm": 1.3708604574203491, "learning_rate": 0.0001, "loss": 0.0121, "step": 69640 }, { "epoch": 458.2236842105263, "grad_norm": 0.5873807072639465, "learning_rate": 0.0001, "loss": 0.0121, "step": 69650 }, { "epoch": 458.2894736842105, "grad_norm": 1.2545485496520996, "learning_rate": 0.0001, "loss": 0.0125, "step": 69660 }, { "epoch": 458.35526315789474, "grad_norm": 1.2837920188903809, "learning_rate": 0.0001, "loss": 0.0132, "step": 69670 }, { "epoch": 458.42105263157896, "grad_norm": 1.2512160539627075, "learning_rate": 0.0001, "loss": 0.0119, "step": 69680 }, { "epoch": 458.4868421052632, "grad_norm": 0.9939183592796326, "learning_rate": 0.0001, "loss": 0.0111, "step": 69690 }, { "epoch": 458.55263157894734, "grad_norm": 0.8284690380096436, "learning_rate": 0.0001, "loss": 0.0107, "step": 69700 }, { "epoch": 458.61842105263156, "grad_norm": 1.1884512901306152, "learning_rate": 0.0001, "loss": 0.0117, "step": 69710 }, { "epoch": 458.6842105263158, "grad_norm": 1.069785475730896, "learning_rate": 0.0001, "loss": 0.0132, "step": 69720 }, { "epoch": 458.75, "grad_norm": 1.294502854347229, "learning_rate": 0.0001, "loss": 0.0128, "step": 69730 }, { "epoch": 458.8157894736842, "grad_norm": 1.4734914302825928, "learning_rate": 0.0001, "loss": 0.0109, "step": 69740 }, { "epoch": 458.88157894736844, "grad_norm": 0.9889464974403381, "learning_rate": 0.0001, "loss": 0.016, "step": 69750 }, { "epoch": 458.94736842105266, "grad_norm": 1.268761396408081, "learning_rate": 0.0001, "loss": 0.0106, "step": 69760 }, { "epoch": 459.0131578947368, "grad_norm": 1.004045009613037, "learning_rate": 0.0001, "loss": 0.0116, "step": 69770 }, { "epoch": 459.07894736842104, "grad_norm": 0.9832544326782227, "learning_rate": 0.0001, "loss": 0.0125, "step": 69780 }, { "epoch": 459.14473684210526, "grad_norm": 1.2626286745071411, "learning_rate": 0.0001, "loss": 0.0122, "step": 69790 }, { "epoch": 459.2105263157895, "grad_norm": 1.5737202167510986, "learning_rate": 0.0001, "loss": 0.0166, "step": 69800 }, { "epoch": 459.2763157894737, "grad_norm": 1.1485472917556763, "learning_rate": 0.0001, "loss": 0.0114, "step": 69810 }, { "epoch": 459.3421052631579, "grad_norm": 0.9378661513328552, "learning_rate": 0.0001, "loss": 0.0121, "step": 69820 }, { "epoch": 459.4078947368421, "grad_norm": 0.9889057874679565, "learning_rate": 0.0001, "loss": 0.0126, "step": 69830 }, { "epoch": 459.4736842105263, "grad_norm": 0.7639946341514587, "learning_rate": 0.0001, "loss": 0.0128, "step": 69840 }, { "epoch": 459.5394736842105, "grad_norm": 1.084297776222229, "learning_rate": 0.0001, "loss": 0.0124, "step": 69850 }, { "epoch": 459.60526315789474, "grad_norm": 1.2830708026885986, "learning_rate": 0.0001, "loss": 0.012, "step": 69860 }, { "epoch": 459.67105263157896, "grad_norm": 0.9912347793579102, "learning_rate": 0.0001, "loss": 0.0105, "step": 69870 }, { "epoch": 459.7368421052632, "grad_norm": 1.7279759645462036, "learning_rate": 0.0001, "loss": 0.012, "step": 69880 }, { "epoch": 459.80263157894734, "grad_norm": 1.2045061588287354, "learning_rate": 0.0001, "loss": 0.0113, "step": 69890 }, { "epoch": 459.86842105263156, "grad_norm": 1.4160776138305664, "learning_rate": 0.0001, "loss": 0.0119, "step": 69900 }, { "epoch": 459.9342105263158, "grad_norm": 0.9676499366760254, "learning_rate": 0.0001, "loss": 0.0128, "step": 69910 }, { "epoch": 460.0, "grad_norm": 0.8560894131660461, "learning_rate": 0.0001, "loss": 0.0106, "step": 69920 }, { "epoch": 460.0657894736842, "grad_norm": 1.248684287071228, "learning_rate": 0.0001, "loss": 0.0115, "step": 69930 }, { "epoch": 460.13157894736844, "grad_norm": 0.8064592480659485, "learning_rate": 0.0001, "loss": 0.0124, "step": 69940 }, { "epoch": 460.19736842105266, "grad_norm": 1.0934714078903198, "learning_rate": 0.0001, "loss": 0.0117, "step": 69950 }, { "epoch": 460.2631578947368, "grad_norm": 1.0918971300125122, "learning_rate": 0.0001, "loss": 0.013, "step": 69960 }, { "epoch": 460.32894736842104, "grad_norm": 1.164820909500122, "learning_rate": 0.0001, "loss": 0.0118, "step": 69970 }, { "epoch": 460.39473684210526, "grad_norm": 0.8863160610198975, "learning_rate": 0.0001, "loss": 0.0133, "step": 69980 }, { "epoch": 460.4605263157895, "grad_norm": 1.3123438358306885, "learning_rate": 0.0001, "loss": 0.0128, "step": 69990 }, { "epoch": 460.5263157894737, "grad_norm": 1.3433982133865356, "learning_rate": 0.0001, "loss": 0.0126, "step": 70000 }, { "epoch": 460.5921052631579, "grad_norm": 1.089854121208191, "learning_rate": 0.0001, "loss": 0.0116, "step": 70010 }, { "epoch": 460.6578947368421, "grad_norm": 1.2574924230575562, "learning_rate": 0.0001, "loss": 0.0111, "step": 70020 }, { "epoch": 460.7236842105263, "grad_norm": 1.087710976600647, "learning_rate": 0.0001, "loss": 0.0123, "step": 70030 }, { "epoch": 460.7894736842105, "grad_norm": 1.1236116886138916, "learning_rate": 0.0001, "loss": 0.0104, "step": 70040 }, { "epoch": 460.85526315789474, "grad_norm": 1.117353081703186, "learning_rate": 0.0001, "loss": 0.014, "step": 70050 }, { "epoch": 460.92105263157896, "grad_norm": 1.1128649711608887, "learning_rate": 0.0001, "loss": 0.0129, "step": 70060 }, { "epoch": 460.9868421052632, "grad_norm": 1.0730403661727905, "learning_rate": 0.0001, "loss": 0.0126, "step": 70070 }, { "epoch": 461.05263157894734, "grad_norm": 1.179450273513794, "learning_rate": 0.0001, "loss": 0.0132, "step": 70080 }, { "epoch": 461.11842105263156, "grad_norm": 1.2371575832366943, "learning_rate": 0.0001, "loss": 0.0109, "step": 70090 }, { "epoch": 461.1842105263158, "grad_norm": 1.42441725730896, "learning_rate": 0.0001, "loss": 0.0126, "step": 70100 }, { "epoch": 461.25, "grad_norm": 1.179229497909546, "learning_rate": 0.0001, "loss": 0.0123, "step": 70110 }, { "epoch": 461.3157894736842, "grad_norm": 0.8924153447151184, "learning_rate": 0.0001, "loss": 0.0119, "step": 70120 }, { "epoch": 461.38157894736844, "grad_norm": 1.3798414468765259, "learning_rate": 0.0001, "loss": 0.0127, "step": 70130 }, { "epoch": 461.44736842105266, "grad_norm": 1.3410454988479614, "learning_rate": 0.0001, "loss": 0.0118, "step": 70140 }, { "epoch": 461.5131578947368, "grad_norm": 0.9191851019859314, "learning_rate": 0.0001, "loss": 0.0129, "step": 70150 }, { "epoch": 461.57894736842104, "grad_norm": 1.0787506103515625, "learning_rate": 0.0001, "loss": 0.013, "step": 70160 }, { "epoch": 461.64473684210526, "grad_norm": 1.022510290145874, "learning_rate": 0.0001, "loss": 0.0117, "step": 70170 }, { "epoch": 461.7105263157895, "grad_norm": 1.0433993339538574, "learning_rate": 0.0001, "loss": 0.0125, "step": 70180 }, { "epoch": 461.7763157894737, "grad_norm": 1.2744972705841064, "learning_rate": 0.0001, "loss": 0.0116, "step": 70190 }, { "epoch": 461.8421052631579, "grad_norm": 1.1916465759277344, "learning_rate": 0.0001, "loss": 0.012, "step": 70200 }, { "epoch": 461.9078947368421, "grad_norm": 1.2578779458999634, "learning_rate": 0.0001, "loss": 0.0115, "step": 70210 }, { "epoch": 461.9736842105263, "grad_norm": 1.0983797311782837, "learning_rate": 0.0001, "loss": 0.0119, "step": 70220 }, { "epoch": 462.0394736842105, "grad_norm": 1.3907415866851807, "learning_rate": 0.0001, "loss": 0.0122, "step": 70230 }, { "epoch": 462.10526315789474, "grad_norm": 1.0465130805969238, "learning_rate": 0.0001, "loss": 0.0143, "step": 70240 }, { "epoch": 462.17105263157896, "grad_norm": 1.1414015293121338, "learning_rate": 0.0001, "loss": 0.0118, "step": 70250 }, { "epoch": 462.2368421052632, "grad_norm": 1.4109493494033813, "learning_rate": 0.0001, "loss": 0.0117, "step": 70260 }, { "epoch": 462.30263157894734, "grad_norm": 1.1486443281173706, "learning_rate": 0.0001, "loss": 0.0118, "step": 70270 }, { "epoch": 462.36842105263156, "grad_norm": 1.240187168121338, "learning_rate": 0.0001, "loss": 0.0112, "step": 70280 }, { "epoch": 462.4342105263158, "grad_norm": 1.2985851764678955, "learning_rate": 0.0001, "loss": 0.0124, "step": 70290 }, { "epoch": 462.5, "grad_norm": 1.3922537565231323, "learning_rate": 0.0001, "loss": 0.0106, "step": 70300 }, { "epoch": 462.5657894736842, "grad_norm": 1.1055222749710083, "learning_rate": 0.0001, "loss": 0.0114, "step": 70310 }, { "epoch": 462.63157894736844, "grad_norm": 1.1833173036575317, "learning_rate": 0.0001, "loss": 0.0117, "step": 70320 }, { "epoch": 462.69736842105266, "grad_norm": 1.226995587348938, "learning_rate": 0.0001, "loss": 0.0126, "step": 70330 }, { "epoch": 462.7631578947368, "grad_norm": 1.0808686017990112, "learning_rate": 0.0001, "loss": 0.012, "step": 70340 }, { "epoch": 462.82894736842104, "grad_norm": 1.443953275680542, "learning_rate": 0.0001, "loss": 0.0141, "step": 70350 }, { "epoch": 462.89473684210526, "grad_norm": 1.205458402633667, "learning_rate": 0.0001, "loss": 0.0141, "step": 70360 }, { "epoch": 462.9605263157895, "grad_norm": 1.6020748615264893, "learning_rate": 0.0001, "loss": 0.0139, "step": 70370 }, { "epoch": 463.0263157894737, "grad_norm": 1.2139602899551392, "learning_rate": 0.0001, "loss": 0.0128, "step": 70380 }, { "epoch": 463.0921052631579, "grad_norm": 1.0997470617294312, "learning_rate": 0.0001, "loss": 0.0137, "step": 70390 }, { "epoch": 463.1578947368421, "grad_norm": 0.7857748866081238, "learning_rate": 0.0001, "loss": 0.013, "step": 70400 }, { "epoch": 463.2236842105263, "grad_norm": 1.3937057256698608, "learning_rate": 0.0001, "loss": 0.0151, "step": 70410 }, { "epoch": 463.2894736842105, "grad_norm": 1.2709908485412598, "learning_rate": 0.0001, "loss": 0.0123, "step": 70420 }, { "epoch": 463.35526315789474, "grad_norm": 1.2485954761505127, "learning_rate": 0.0001, "loss": 0.0126, "step": 70430 }, { "epoch": 463.42105263157896, "grad_norm": 1.241417646408081, "learning_rate": 0.0001, "loss": 0.0136, "step": 70440 }, { "epoch": 463.4868421052632, "grad_norm": 1.4835032224655151, "learning_rate": 0.0001, "loss": 0.0127, "step": 70450 }, { "epoch": 463.55263157894734, "grad_norm": 1.4047082662582397, "learning_rate": 0.0001, "loss": 0.0152, "step": 70460 }, { "epoch": 463.61842105263156, "grad_norm": 1.533784031867981, "learning_rate": 0.0001, "loss": 0.0138, "step": 70470 }, { "epoch": 463.6842105263158, "grad_norm": 1.3087002038955688, "learning_rate": 0.0001, "loss": 0.0147, "step": 70480 }, { "epoch": 463.75, "grad_norm": 1.1302224397659302, "learning_rate": 0.0001, "loss": 0.0147, "step": 70490 }, { "epoch": 463.8157894736842, "grad_norm": 0.9580501317977905, "learning_rate": 0.0001, "loss": 0.0114, "step": 70500 }, { "epoch": 463.88157894736844, "grad_norm": 0.8459773063659668, "learning_rate": 0.0001, "loss": 0.0132, "step": 70510 }, { "epoch": 463.94736842105266, "grad_norm": 1.1055728197097778, "learning_rate": 0.0001, "loss": 0.0115, "step": 70520 }, { "epoch": 464.0131578947368, "grad_norm": 1.2454581260681152, "learning_rate": 0.0001, "loss": 0.0118, "step": 70530 }, { "epoch": 464.07894736842104, "grad_norm": 1.3780239820480347, "learning_rate": 0.0001, "loss": 0.0138, "step": 70540 }, { "epoch": 464.14473684210526, "grad_norm": 1.2484023571014404, "learning_rate": 0.0001, "loss": 0.0131, "step": 70550 }, { "epoch": 464.2105263157895, "grad_norm": 1.0500701665878296, "learning_rate": 0.0001, "loss": 0.0161, "step": 70560 }, { "epoch": 464.2763157894737, "grad_norm": 0.9749228358268738, "learning_rate": 0.0001, "loss": 0.0163, "step": 70570 }, { "epoch": 464.3421052631579, "grad_norm": 1.0054138898849487, "learning_rate": 0.0001, "loss": 0.0155, "step": 70580 }, { "epoch": 464.4078947368421, "grad_norm": 1.2907658815383911, "learning_rate": 0.0001, "loss": 0.0144, "step": 70590 }, { "epoch": 464.4736842105263, "grad_norm": 1.1337512731552124, "learning_rate": 0.0001, "loss": 0.0143, "step": 70600 }, { "epoch": 464.5394736842105, "grad_norm": 1.3894444704055786, "learning_rate": 0.0001, "loss": 0.0158, "step": 70610 }, { "epoch": 464.60526315789474, "grad_norm": 1.5000969171524048, "learning_rate": 0.0001, "loss": 0.0146, "step": 70620 }, { "epoch": 464.67105263157896, "grad_norm": 1.1303669214248657, "learning_rate": 0.0001, "loss": 0.0134, "step": 70630 }, { "epoch": 464.7368421052632, "grad_norm": 0.8212418556213379, "learning_rate": 0.0001, "loss": 0.0133, "step": 70640 }, { "epoch": 464.80263157894734, "grad_norm": 1.0030567646026611, "learning_rate": 0.0001, "loss": 0.016, "step": 70650 }, { "epoch": 464.86842105263156, "grad_norm": 1.0976468324661255, "learning_rate": 0.0001, "loss": 0.0157, "step": 70660 }, { "epoch": 464.9342105263158, "grad_norm": 1.2463607788085938, "learning_rate": 0.0001, "loss": 0.0135, "step": 70670 }, { "epoch": 465.0, "grad_norm": 0.891297459602356, "learning_rate": 0.0001, "loss": 0.0137, "step": 70680 }, { "epoch": 465.0657894736842, "grad_norm": 1.2342113256454468, "learning_rate": 0.0001, "loss": 0.0152, "step": 70690 }, { "epoch": 465.13157894736844, "grad_norm": 0.8251991868019104, "learning_rate": 0.0001, "loss": 0.0139, "step": 70700 }, { "epoch": 465.19736842105266, "grad_norm": 0.9566839337348938, "learning_rate": 0.0001, "loss": 0.0123, "step": 70710 }, { "epoch": 465.2631578947368, "grad_norm": 0.8764354586601257, "learning_rate": 0.0001, "loss": 0.0131, "step": 70720 }, { "epoch": 465.32894736842104, "grad_norm": 1.0923231840133667, "learning_rate": 0.0001, "loss": 0.0142, "step": 70730 }, { "epoch": 465.39473684210526, "grad_norm": 1.1273107528686523, "learning_rate": 0.0001, "loss": 0.012, "step": 70740 }, { "epoch": 465.4605263157895, "grad_norm": 1.488062858581543, "learning_rate": 0.0001, "loss": 0.0119, "step": 70750 }, { "epoch": 465.5263157894737, "grad_norm": 1.199041724205017, "learning_rate": 0.0001, "loss": 0.0147, "step": 70760 }, { "epoch": 465.5921052631579, "grad_norm": 1.3205357789993286, "learning_rate": 0.0001, "loss": 0.0133, "step": 70770 }, { "epoch": 465.6578947368421, "grad_norm": 1.4035863876342773, "learning_rate": 0.0001, "loss": 0.0122, "step": 70780 }, { "epoch": 465.7236842105263, "grad_norm": 1.1187028884887695, "learning_rate": 0.0001, "loss": 0.0125, "step": 70790 }, { "epoch": 465.7894736842105, "grad_norm": 1.0568621158599854, "learning_rate": 0.0001, "loss": 0.0153, "step": 70800 }, { "epoch": 465.85526315789474, "grad_norm": 1.3467577695846558, "learning_rate": 0.0001, "loss": 0.0119, "step": 70810 }, { "epoch": 465.92105263157896, "grad_norm": 1.3389008045196533, "learning_rate": 0.0001, "loss": 0.0117, "step": 70820 }, { "epoch": 465.9868421052632, "grad_norm": 1.372363567352295, "learning_rate": 0.0001, "loss": 0.0133, "step": 70830 }, { "epoch": 466.05263157894734, "grad_norm": 1.0468113422393799, "learning_rate": 0.0001, "loss": 0.0113, "step": 70840 }, { "epoch": 466.11842105263156, "grad_norm": 0.9883679747581482, "learning_rate": 0.0001, "loss": 0.0124, "step": 70850 }, { "epoch": 466.1842105263158, "grad_norm": 0.710003674030304, "learning_rate": 0.0001, "loss": 0.0119, "step": 70860 }, { "epoch": 466.25, "grad_norm": 1.3106662034988403, "learning_rate": 0.0001, "loss": 0.0143, "step": 70870 }, { "epoch": 466.3157894736842, "grad_norm": 1.022836685180664, "learning_rate": 0.0001, "loss": 0.0116, "step": 70880 }, { "epoch": 466.38157894736844, "grad_norm": 1.1058964729309082, "learning_rate": 0.0001, "loss": 0.0129, "step": 70890 }, { "epoch": 466.44736842105266, "grad_norm": 0.9566864371299744, "learning_rate": 0.0001, "loss": 0.015, "step": 70900 }, { "epoch": 466.5131578947368, "grad_norm": 1.0572712421417236, "learning_rate": 0.0001, "loss": 0.0105, "step": 70910 }, { "epoch": 466.57894736842104, "grad_norm": 0.6852365136146545, "learning_rate": 0.0001, "loss": 0.0115, "step": 70920 }, { "epoch": 466.64473684210526, "grad_norm": 0.9085026383399963, "learning_rate": 0.0001, "loss": 0.0123, "step": 70930 }, { "epoch": 466.7105263157895, "grad_norm": 1.1825387477874756, "learning_rate": 0.0001, "loss": 0.0134, "step": 70940 }, { "epoch": 466.7763157894737, "grad_norm": 1.0037176609039307, "learning_rate": 0.0001, "loss": 0.0126, "step": 70950 }, { "epoch": 466.8421052631579, "grad_norm": 1.0735546350479126, "learning_rate": 0.0001, "loss": 0.0125, "step": 70960 }, { "epoch": 466.9078947368421, "grad_norm": 1.3803229331970215, "learning_rate": 0.0001, "loss": 0.0119, "step": 70970 }, { "epoch": 466.9736842105263, "grad_norm": 0.845694363117218, "learning_rate": 0.0001, "loss": 0.0122, "step": 70980 }, { "epoch": 467.0394736842105, "grad_norm": 1.186300277709961, "learning_rate": 0.0001, "loss": 0.0139, "step": 70990 }, { "epoch": 467.10526315789474, "grad_norm": 1.171091079711914, "learning_rate": 0.0001, "loss": 0.0142, "step": 71000 }, { "epoch": 467.17105263157896, "grad_norm": 1.3238587379455566, "learning_rate": 0.0001, "loss": 0.0132, "step": 71010 }, { "epoch": 467.2368421052632, "grad_norm": 1.0442191362380981, "learning_rate": 0.0001, "loss": 0.0118, "step": 71020 }, { "epoch": 467.30263157894734, "grad_norm": 0.9936031103134155, "learning_rate": 0.0001, "loss": 0.0114, "step": 71030 }, { "epoch": 467.36842105263156, "grad_norm": 0.8942242860794067, "learning_rate": 0.0001, "loss": 0.0114, "step": 71040 }, { "epoch": 467.4342105263158, "grad_norm": 1.1458888053894043, "learning_rate": 0.0001, "loss": 0.0132, "step": 71050 }, { "epoch": 467.5, "grad_norm": 1.038323163986206, "learning_rate": 0.0001, "loss": 0.0123, "step": 71060 }, { "epoch": 467.5657894736842, "grad_norm": 1.2702325582504272, "learning_rate": 0.0001, "loss": 0.0117, "step": 71070 }, { "epoch": 467.63157894736844, "grad_norm": 1.037888526916504, "learning_rate": 0.0001, "loss": 0.0111, "step": 71080 }, { "epoch": 467.69736842105266, "grad_norm": 1.5120564699172974, "learning_rate": 0.0001, "loss": 0.0106, "step": 71090 }, { "epoch": 467.7631578947368, "grad_norm": 1.0400851964950562, "learning_rate": 0.0001, "loss": 0.0133, "step": 71100 }, { "epoch": 467.82894736842104, "grad_norm": 1.0576149225234985, "learning_rate": 0.0001, "loss": 0.0133, "step": 71110 }, { "epoch": 467.89473684210526, "grad_norm": 1.1322977542877197, "learning_rate": 0.0001, "loss": 0.0112, "step": 71120 }, { "epoch": 467.9605263157895, "grad_norm": 1.2082958221435547, "learning_rate": 0.0001, "loss": 0.0104, "step": 71130 }, { "epoch": 468.0263157894737, "grad_norm": 0.967847466468811, "learning_rate": 0.0001, "loss": 0.0114, "step": 71140 }, { "epoch": 468.0921052631579, "grad_norm": 1.2820326089859009, "learning_rate": 0.0001, "loss": 0.0114, "step": 71150 }, { "epoch": 468.1578947368421, "grad_norm": 1.3719661235809326, "learning_rate": 0.0001, "loss": 0.0118, "step": 71160 }, { "epoch": 468.2236842105263, "grad_norm": 1.6810905933380127, "learning_rate": 0.0001, "loss": 0.0122, "step": 71170 }, { "epoch": 468.2894736842105, "grad_norm": 0.994966447353363, "learning_rate": 0.0001, "loss": 0.0112, "step": 71180 }, { "epoch": 468.35526315789474, "grad_norm": 1.2583346366882324, "learning_rate": 0.0001, "loss": 0.012, "step": 71190 }, { "epoch": 468.42105263157896, "grad_norm": 1.1598786115646362, "learning_rate": 0.0001, "loss": 0.0122, "step": 71200 }, { "epoch": 468.4868421052632, "grad_norm": 1.2424436807632446, "learning_rate": 0.0001, "loss": 0.0113, "step": 71210 }, { "epoch": 468.55263157894734, "grad_norm": 1.1447334289550781, "learning_rate": 0.0001, "loss": 0.0108, "step": 71220 }, { "epoch": 468.61842105263156, "grad_norm": 0.9471054077148438, "learning_rate": 0.0001, "loss": 0.0123, "step": 71230 }, { "epoch": 468.6842105263158, "grad_norm": 1.2661805152893066, "learning_rate": 0.0001, "loss": 0.0125, "step": 71240 }, { "epoch": 468.75, "grad_norm": 1.1477277278900146, "learning_rate": 0.0001, "loss": 0.0104, "step": 71250 }, { "epoch": 468.8157894736842, "grad_norm": 1.0798248052597046, "learning_rate": 0.0001, "loss": 0.011, "step": 71260 }, { "epoch": 468.88157894736844, "grad_norm": 1.0489845275878906, "learning_rate": 0.0001, "loss": 0.0133, "step": 71270 }, { "epoch": 468.94736842105266, "grad_norm": 1.2444311380386353, "learning_rate": 0.0001, "loss": 0.0099, "step": 71280 }, { "epoch": 469.0131578947368, "grad_norm": 1.1508184671401978, "learning_rate": 0.0001, "loss": 0.0125, "step": 71290 }, { "epoch": 469.07894736842104, "grad_norm": 1.0724323987960815, "learning_rate": 0.0001, "loss": 0.0126, "step": 71300 }, { "epoch": 469.14473684210526, "grad_norm": 1.3677020072937012, "learning_rate": 0.0001, "loss": 0.0117, "step": 71310 }, { "epoch": 469.2105263157895, "grad_norm": 0.8909698128700256, "learning_rate": 0.0001, "loss": 0.0102, "step": 71320 }, { "epoch": 469.2763157894737, "grad_norm": 1.2063177824020386, "learning_rate": 0.0001, "loss": 0.0114, "step": 71330 }, { "epoch": 469.3421052631579, "grad_norm": 1.4263769388198853, "learning_rate": 0.0001, "loss": 0.0131, "step": 71340 }, { "epoch": 469.4078947368421, "grad_norm": 1.343396782875061, "learning_rate": 0.0001, "loss": 0.0109, "step": 71350 }, { "epoch": 469.4736842105263, "grad_norm": 1.0640443563461304, "learning_rate": 0.0001, "loss": 0.0126, "step": 71360 }, { "epoch": 469.5394736842105, "grad_norm": 1.131130576133728, "learning_rate": 0.0001, "loss": 0.0115, "step": 71370 }, { "epoch": 469.60526315789474, "grad_norm": 0.8437464833259583, "learning_rate": 0.0001, "loss": 0.0116, "step": 71380 }, { "epoch": 469.67105263157896, "grad_norm": 0.8952385187149048, "learning_rate": 0.0001, "loss": 0.0124, "step": 71390 }, { "epoch": 469.7368421052632, "grad_norm": 0.8962030410766602, "learning_rate": 0.0001, "loss": 0.01, "step": 71400 }, { "epoch": 469.80263157894734, "grad_norm": 0.953825056552887, "learning_rate": 0.0001, "loss": 0.0112, "step": 71410 }, { "epoch": 469.86842105263156, "grad_norm": 0.8790578246116638, "learning_rate": 0.0001, "loss": 0.0113, "step": 71420 }, { "epoch": 469.9342105263158, "grad_norm": 1.3036227226257324, "learning_rate": 0.0001, "loss": 0.0125, "step": 71430 }, { "epoch": 470.0, "grad_norm": 0.9618934392929077, "learning_rate": 0.0001, "loss": 0.0117, "step": 71440 }, { "epoch": 470.0657894736842, "grad_norm": 0.908938467502594, "learning_rate": 0.0001, "loss": 0.0133, "step": 71450 }, { "epoch": 470.13157894736844, "grad_norm": 1.156169056892395, "learning_rate": 0.0001, "loss": 0.0105, "step": 71460 }, { "epoch": 470.19736842105266, "grad_norm": 0.8988484144210815, "learning_rate": 0.0001, "loss": 0.0119, "step": 71470 }, { "epoch": 470.2631578947368, "grad_norm": 1.0380802154541016, "learning_rate": 0.0001, "loss": 0.0113, "step": 71480 }, { "epoch": 470.32894736842104, "grad_norm": 1.1827173233032227, "learning_rate": 0.0001, "loss": 0.0097, "step": 71490 }, { "epoch": 470.39473684210526, "grad_norm": 0.9451979994773865, "learning_rate": 0.0001, "loss": 0.0106, "step": 71500 }, { "epoch": 470.4605263157895, "grad_norm": 0.8743581771850586, "learning_rate": 0.0001, "loss": 0.0123, "step": 71510 }, { "epoch": 470.5263157894737, "grad_norm": 0.9873955249786377, "learning_rate": 0.0001, "loss": 0.013, "step": 71520 }, { "epoch": 470.5921052631579, "grad_norm": 0.9436299204826355, "learning_rate": 0.0001, "loss": 0.0112, "step": 71530 }, { "epoch": 470.6578947368421, "grad_norm": 0.8567780256271362, "learning_rate": 0.0001, "loss": 0.0119, "step": 71540 }, { "epoch": 470.7236842105263, "grad_norm": 1.0895605087280273, "learning_rate": 0.0001, "loss": 0.0108, "step": 71550 }, { "epoch": 470.7894736842105, "grad_norm": 1.3269134759902954, "learning_rate": 0.0001, "loss": 0.0122, "step": 71560 }, { "epoch": 470.85526315789474, "grad_norm": 1.2100285291671753, "learning_rate": 0.0001, "loss": 0.0111, "step": 71570 }, { "epoch": 470.92105263157896, "grad_norm": 1.2074404954910278, "learning_rate": 0.0001, "loss": 0.011, "step": 71580 }, { "epoch": 470.9868421052632, "grad_norm": 1.3929208517074585, "learning_rate": 0.0001, "loss": 0.0121, "step": 71590 }, { "epoch": 471.05263157894734, "grad_norm": 1.3554884195327759, "learning_rate": 0.0001, "loss": 0.0117, "step": 71600 }, { "epoch": 471.11842105263156, "grad_norm": 1.3486567735671997, "learning_rate": 0.0001, "loss": 0.0124, "step": 71610 }, { "epoch": 471.1842105263158, "grad_norm": 0.9685068726539612, "learning_rate": 0.0001, "loss": 0.0118, "step": 71620 }, { "epoch": 471.25, "grad_norm": 1.542588233947754, "learning_rate": 0.0001, "loss": 0.011, "step": 71630 }, { "epoch": 471.3157894736842, "grad_norm": 1.5774450302124023, "learning_rate": 0.0001, "loss": 0.0111, "step": 71640 }, { "epoch": 471.38157894736844, "grad_norm": 1.1362987756729126, "learning_rate": 0.0001, "loss": 0.0125, "step": 71650 }, { "epoch": 471.44736842105266, "grad_norm": 1.0866764783859253, "learning_rate": 0.0001, "loss": 0.0121, "step": 71660 }, { "epoch": 471.5131578947368, "grad_norm": 1.3600131273269653, "learning_rate": 0.0001, "loss": 0.012, "step": 71670 }, { "epoch": 471.57894736842104, "grad_norm": 1.3982903957366943, "learning_rate": 0.0001, "loss": 0.01, "step": 71680 }, { "epoch": 471.64473684210526, "grad_norm": 1.3128663301467896, "learning_rate": 0.0001, "loss": 0.011, "step": 71690 }, { "epoch": 471.7105263157895, "grad_norm": 1.2559845447540283, "learning_rate": 0.0001, "loss": 0.0089, "step": 71700 }, { "epoch": 471.7763157894737, "grad_norm": 0.9143256545066833, "learning_rate": 0.0001, "loss": 0.0104, "step": 71710 }, { "epoch": 471.8421052631579, "grad_norm": 1.2619221210479736, "learning_rate": 0.0001, "loss": 0.0125, "step": 71720 }, { "epoch": 471.9078947368421, "grad_norm": 1.3303905725479126, "learning_rate": 0.0001, "loss": 0.0115, "step": 71730 }, { "epoch": 471.9736842105263, "grad_norm": 1.1719235181808472, "learning_rate": 0.0001, "loss": 0.011, "step": 71740 }, { "epoch": 472.0394736842105, "grad_norm": 1.1172962188720703, "learning_rate": 0.0001, "loss": 0.0113, "step": 71750 }, { "epoch": 472.10526315789474, "grad_norm": 1.4545469284057617, "learning_rate": 0.0001, "loss": 0.0104, "step": 71760 }, { "epoch": 472.17105263157896, "grad_norm": 0.806731641292572, "learning_rate": 0.0001, "loss": 0.0111, "step": 71770 }, { "epoch": 472.2368421052632, "grad_norm": 1.0789495706558228, "learning_rate": 0.0001, "loss": 0.0121, "step": 71780 }, { "epoch": 472.30263157894734, "grad_norm": 1.059844732284546, "learning_rate": 0.0001, "loss": 0.0126, "step": 71790 }, { "epoch": 472.36842105263156, "grad_norm": 1.3796194791793823, "learning_rate": 0.0001, "loss": 0.0112, "step": 71800 }, { "epoch": 472.4342105263158, "grad_norm": 1.1321066617965698, "learning_rate": 0.0001, "loss": 0.0123, "step": 71810 }, { "epoch": 472.5, "grad_norm": 0.8206098675727844, "learning_rate": 0.0001, "loss": 0.0111, "step": 71820 }, { "epoch": 472.5657894736842, "grad_norm": 0.9593137502670288, "learning_rate": 0.0001, "loss": 0.0125, "step": 71830 }, { "epoch": 472.63157894736844, "grad_norm": 1.0930147171020508, "learning_rate": 0.0001, "loss": 0.0125, "step": 71840 }, { "epoch": 472.69736842105266, "grad_norm": 1.3240526914596558, "learning_rate": 0.0001, "loss": 0.0106, "step": 71850 }, { "epoch": 472.7631578947368, "grad_norm": 1.3134987354278564, "learning_rate": 0.0001, "loss": 0.0109, "step": 71860 }, { "epoch": 472.82894736842104, "grad_norm": 1.409283995628357, "learning_rate": 0.0001, "loss": 0.0106, "step": 71870 }, { "epoch": 472.89473684210526, "grad_norm": 0.9180440306663513, "learning_rate": 0.0001, "loss": 0.0123, "step": 71880 }, { "epoch": 472.9605263157895, "grad_norm": 0.8743194937705994, "learning_rate": 0.0001, "loss": 0.0132, "step": 71890 }, { "epoch": 473.0263157894737, "grad_norm": 1.0829284191131592, "learning_rate": 0.0001, "loss": 0.012, "step": 71900 }, { "epoch": 473.0921052631579, "grad_norm": 1.0314230918884277, "learning_rate": 0.0001, "loss": 0.0112, "step": 71910 }, { "epoch": 473.1578947368421, "grad_norm": 1.0930405855178833, "learning_rate": 0.0001, "loss": 0.0106, "step": 71920 }, { "epoch": 473.2236842105263, "grad_norm": 1.387052059173584, "learning_rate": 0.0001, "loss": 0.0127, "step": 71930 }, { "epoch": 473.2894736842105, "grad_norm": 1.1095280647277832, "learning_rate": 0.0001, "loss": 0.0128, "step": 71940 }, { "epoch": 473.35526315789474, "grad_norm": 1.1823383569717407, "learning_rate": 0.0001, "loss": 0.0113, "step": 71950 }, { "epoch": 473.42105263157896, "grad_norm": 1.2319684028625488, "learning_rate": 0.0001, "loss": 0.0117, "step": 71960 }, { "epoch": 473.4868421052632, "grad_norm": 1.3296653032302856, "learning_rate": 0.0001, "loss": 0.0105, "step": 71970 }, { "epoch": 473.55263157894734, "grad_norm": 1.1372100114822388, "learning_rate": 0.0001, "loss": 0.0114, "step": 71980 }, { "epoch": 473.61842105263156, "grad_norm": 1.3765093088150024, "learning_rate": 0.0001, "loss": 0.0116, "step": 71990 }, { "epoch": 473.6842105263158, "grad_norm": 1.0475393533706665, "learning_rate": 0.0001, "loss": 0.012, "step": 72000 }, { "epoch": 473.75, "grad_norm": 0.8706821203231812, "learning_rate": 0.0001, "loss": 0.0118, "step": 72010 }, { "epoch": 473.8157894736842, "grad_norm": 1.1622743606567383, "learning_rate": 0.0001, "loss": 0.0123, "step": 72020 }, { "epoch": 473.88157894736844, "grad_norm": 1.101678729057312, "learning_rate": 0.0001, "loss": 0.0118, "step": 72030 }, { "epoch": 473.94736842105266, "grad_norm": 1.4966200590133667, "learning_rate": 0.0001, "loss": 0.0126, "step": 72040 }, { "epoch": 474.0131578947368, "grad_norm": 0.8615991473197937, "learning_rate": 0.0001, "loss": 0.0119, "step": 72050 }, { "epoch": 474.07894736842104, "grad_norm": 1.0953624248504639, "learning_rate": 0.0001, "loss": 0.0117, "step": 72060 }, { "epoch": 474.14473684210526, "grad_norm": 1.2191064357757568, "learning_rate": 0.0001, "loss": 0.0107, "step": 72070 }, { "epoch": 474.2105263157895, "grad_norm": 0.9888071417808533, "learning_rate": 0.0001, "loss": 0.0111, "step": 72080 }, { "epoch": 474.2763157894737, "grad_norm": 1.138601541519165, "learning_rate": 0.0001, "loss": 0.0111, "step": 72090 }, { "epoch": 474.3421052631579, "grad_norm": 0.9573279023170471, "learning_rate": 0.0001, "loss": 0.0119, "step": 72100 }, { "epoch": 474.4078947368421, "grad_norm": 1.5391374826431274, "learning_rate": 0.0001, "loss": 0.0119, "step": 72110 }, { "epoch": 474.4736842105263, "grad_norm": 1.4656072854995728, "learning_rate": 0.0001, "loss": 0.0128, "step": 72120 }, { "epoch": 474.5394736842105, "grad_norm": 1.4683994054794312, "learning_rate": 0.0001, "loss": 0.0115, "step": 72130 }, { "epoch": 474.60526315789474, "grad_norm": 1.0790272951126099, "learning_rate": 0.0001, "loss": 0.0117, "step": 72140 }, { "epoch": 474.67105263157896, "grad_norm": 1.186042070388794, "learning_rate": 0.0001, "loss": 0.0106, "step": 72150 }, { "epoch": 474.7368421052632, "grad_norm": 1.169363021850586, "learning_rate": 0.0001, "loss": 0.012, "step": 72160 }, { "epoch": 474.80263157894734, "grad_norm": 1.1990368366241455, "learning_rate": 0.0001, "loss": 0.013, "step": 72170 }, { "epoch": 474.86842105263156, "grad_norm": 1.200440764427185, "learning_rate": 0.0001, "loss": 0.0121, "step": 72180 }, { "epoch": 474.9342105263158, "grad_norm": 1.1117382049560547, "learning_rate": 0.0001, "loss": 0.0112, "step": 72190 }, { "epoch": 475.0, "grad_norm": 1.0739638805389404, "learning_rate": 0.0001, "loss": 0.011, "step": 72200 }, { "epoch": 475.0657894736842, "grad_norm": 1.1158939599990845, "learning_rate": 0.0001, "loss": 0.0106, "step": 72210 }, { "epoch": 475.13157894736844, "grad_norm": 1.6269727945327759, "learning_rate": 0.0001, "loss": 0.0125, "step": 72220 }, { "epoch": 475.19736842105266, "grad_norm": 1.75429368019104, "learning_rate": 0.0001, "loss": 0.0143, "step": 72230 }, { "epoch": 475.2631578947368, "grad_norm": 1.5069760084152222, "learning_rate": 0.0001, "loss": 0.0136, "step": 72240 }, { "epoch": 475.32894736842104, "grad_norm": 1.1883662939071655, "learning_rate": 0.0001, "loss": 0.0132, "step": 72250 }, { "epoch": 475.39473684210526, "grad_norm": 1.3766677379608154, "learning_rate": 0.0001, "loss": 0.0105, "step": 72260 }, { "epoch": 475.4605263157895, "grad_norm": 1.2644368410110474, "learning_rate": 0.0001, "loss": 0.0107, "step": 72270 }, { "epoch": 475.5263157894737, "grad_norm": 1.295596718788147, "learning_rate": 0.0001, "loss": 0.0118, "step": 72280 }, { "epoch": 475.5921052631579, "grad_norm": 1.2578319311141968, "learning_rate": 0.0001, "loss": 0.0094, "step": 72290 }, { "epoch": 475.6578947368421, "grad_norm": 1.1036814451217651, "learning_rate": 0.0001, "loss": 0.0099, "step": 72300 }, { "epoch": 475.7236842105263, "grad_norm": 1.1769661903381348, "learning_rate": 0.0001, "loss": 0.0153, "step": 72310 }, { "epoch": 475.7894736842105, "grad_norm": 0.9555389285087585, "learning_rate": 0.0001, "loss": 0.0118, "step": 72320 }, { "epoch": 475.85526315789474, "grad_norm": 1.3688865900039673, "learning_rate": 0.0001, "loss": 0.0095, "step": 72330 }, { "epoch": 475.92105263157896, "grad_norm": 1.4835799932479858, "learning_rate": 0.0001, "loss": 0.011, "step": 72340 }, { "epoch": 475.9868421052632, "grad_norm": 1.1539369821548462, "learning_rate": 0.0001, "loss": 0.0097, "step": 72350 }, { "epoch": 476.05263157894734, "grad_norm": 1.3297797441482544, "learning_rate": 0.0001, "loss": 0.0112, "step": 72360 }, { "epoch": 476.11842105263156, "grad_norm": 1.5591670274734497, "learning_rate": 0.0001, "loss": 0.011, "step": 72370 }, { "epoch": 476.1842105263158, "grad_norm": 1.2825270891189575, "learning_rate": 0.0001, "loss": 0.011, "step": 72380 }, { "epoch": 476.25, "grad_norm": 0.7293868064880371, "learning_rate": 0.0001, "loss": 0.011, "step": 72390 }, { "epoch": 476.3157894736842, "grad_norm": 1.2907273769378662, "learning_rate": 0.0001, "loss": 0.0104, "step": 72400 }, { "epoch": 476.38157894736844, "grad_norm": 0.7899346351623535, "learning_rate": 0.0001, "loss": 0.0132, "step": 72410 }, { "epoch": 476.44736842105266, "grad_norm": 1.0589332580566406, "learning_rate": 0.0001, "loss": 0.011, "step": 72420 }, { "epoch": 476.5131578947368, "grad_norm": 1.1962878704071045, "learning_rate": 0.0001, "loss": 0.0114, "step": 72430 }, { "epoch": 476.57894736842104, "grad_norm": 1.0695892572402954, "learning_rate": 0.0001, "loss": 0.0124, "step": 72440 }, { "epoch": 476.64473684210526, "grad_norm": 0.8697053790092468, "learning_rate": 0.0001, "loss": 0.0133, "step": 72450 }, { "epoch": 476.7105263157895, "grad_norm": 0.8389332890510559, "learning_rate": 0.0001, "loss": 0.0123, "step": 72460 }, { "epoch": 476.7763157894737, "grad_norm": 1.0303555727005005, "learning_rate": 0.0001, "loss": 0.01, "step": 72470 }, { "epoch": 476.8421052631579, "grad_norm": 0.8726432919502258, "learning_rate": 0.0001, "loss": 0.0119, "step": 72480 }, { "epoch": 476.9078947368421, "grad_norm": 1.1818912029266357, "learning_rate": 0.0001, "loss": 0.0135, "step": 72490 }, { "epoch": 476.9736842105263, "grad_norm": 1.0324101448059082, "learning_rate": 0.0001, "loss": 0.0108, "step": 72500 }, { "epoch": 477.0394736842105, "grad_norm": 1.1190623044967651, "learning_rate": 0.0001, "loss": 0.0105, "step": 72510 }, { "epoch": 477.10526315789474, "grad_norm": 1.3751294612884521, "learning_rate": 0.0001, "loss": 0.011, "step": 72520 }, { "epoch": 477.17105263157896, "grad_norm": 1.0744208097457886, "learning_rate": 0.0001, "loss": 0.0107, "step": 72530 }, { "epoch": 477.2368421052632, "grad_norm": 1.5082820653915405, "learning_rate": 0.0001, "loss": 0.0124, "step": 72540 }, { "epoch": 477.30263157894734, "grad_norm": 0.6048269867897034, "learning_rate": 0.0001, "loss": 0.0116, "step": 72550 }, { "epoch": 477.36842105263156, "grad_norm": 1.0494825839996338, "learning_rate": 0.0001, "loss": 0.0112, "step": 72560 }, { "epoch": 477.4342105263158, "grad_norm": 1.2498975992202759, "learning_rate": 0.0001, "loss": 0.012, "step": 72570 }, { "epoch": 477.5, "grad_norm": 0.980216383934021, "learning_rate": 0.0001, "loss": 0.0114, "step": 72580 }, { "epoch": 477.5657894736842, "grad_norm": 1.551719069480896, "learning_rate": 0.0001, "loss": 0.012, "step": 72590 }, { "epoch": 477.63157894736844, "grad_norm": 0.9513759016990662, "learning_rate": 0.0001, "loss": 0.0114, "step": 72600 }, { "epoch": 477.69736842105266, "grad_norm": 1.2828733921051025, "learning_rate": 0.0001, "loss": 0.0134, "step": 72610 }, { "epoch": 477.7631578947368, "grad_norm": 0.9803738594055176, "learning_rate": 0.0001, "loss": 0.0118, "step": 72620 }, { "epoch": 477.82894736842104, "grad_norm": 1.0510716438293457, "learning_rate": 0.0001, "loss": 0.0112, "step": 72630 }, { "epoch": 477.89473684210526, "grad_norm": 0.7186201214790344, "learning_rate": 0.0001, "loss": 0.0118, "step": 72640 }, { "epoch": 477.9605263157895, "grad_norm": 1.2259303331375122, "learning_rate": 0.0001, "loss": 0.0119, "step": 72650 }, { "epoch": 478.0263157894737, "grad_norm": 0.948920726776123, "learning_rate": 0.0001, "loss": 0.012, "step": 72660 }, { "epoch": 478.0921052631579, "grad_norm": 1.4578410387039185, "learning_rate": 0.0001, "loss": 0.0112, "step": 72670 }, { "epoch": 478.1578947368421, "grad_norm": 1.2089077234268188, "learning_rate": 0.0001, "loss": 0.0108, "step": 72680 }, { "epoch": 478.2236842105263, "grad_norm": 1.2857468128204346, "learning_rate": 0.0001, "loss": 0.0134, "step": 72690 }, { "epoch": 478.2894736842105, "grad_norm": 0.8931973576545715, "learning_rate": 0.0001, "loss": 0.013, "step": 72700 }, { "epoch": 478.35526315789474, "grad_norm": 0.7080501317977905, "learning_rate": 0.0001, "loss": 0.0126, "step": 72710 }, { "epoch": 478.42105263157896, "grad_norm": 1.3679475784301758, "learning_rate": 0.0001, "loss": 0.0136, "step": 72720 }, { "epoch": 478.4868421052632, "grad_norm": 1.0242080688476562, "learning_rate": 0.0001, "loss": 0.0119, "step": 72730 }, { "epoch": 478.55263157894734, "grad_norm": 1.1003470420837402, "learning_rate": 0.0001, "loss": 0.0107, "step": 72740 }, { "epoch": 478.61842105263156, "grad_norm": 1.2705953121185303, "learning_rate": 0.0001, "loss": 0.0104, "step": 72750 }, { "epoch": 478.6842105263158, "grad_norm": 0.9441986680030823, "learning_rate": 0.0001, "loss": 0.0108, "step": 72760 }, { "epoch": 478.75, "grad_norm": 1.438714861869812, "learning_rate": 0.0001, "loss": 0.0116, "step": 72770 }, { "epoch": 478.8157894736842, "grad_norm": 1.4461417198181152, "learning_rate": 0.0001, "loss": 0.0122, "step": 72780 }, { "epoch": 478.88157894736844, "grad_norm": 1.4734560251235962, "learning_rate": 0.0001, "loss": 0.0112, "step": 72790 }, { "epoch": 478.94736842105266, "grad_norm": 1.064142107963562, "learning_rate": 0.0001, "loss": 0.0093, "step": 72800 }, { "epoch": 479.0131578947368, "grad_norm": 0.7491738796234131, "learning_rate": 0.0001, "loss": 0.0116, "step": 72810 }, { "epoch": 479.07894736842104, "grad_norm": 1.0148205757141113, "learning_rate": 0.0001, "loss": 0.0125, "step": 72820 }, { "epoch": 479.14473684210526, "grad_norm": 0.9256365299224854, "learning_rate": 0.0001, "loss": 0.011, "step": 72830 }, { "epoch": 479.2105263157895, "grad_norm": 1.0574623346328735, "learning_rate": 0.0001, "loss": 0.0111, "step": 72840 }, { "epoch": 479.2763157894737, "grad_norm": 1.2569656372070312, "learning_rate": 0.0001, "loss": 0.0139, "step": 72850 }, { "epoch": 479.3421052631579, "grad_norm": 0.737408459186554, "learning_rate": 0.0001, "loss": 0.0113, "step": 72860 }, { "epoch": 479.4078947368421, "grad_norm": 1.0380948781967163, "learning_rate": 0.0001, "loss": 0.0133, "step": 72870 }, { "epoch": 479.4736842105263, "grad_norm": 0.9933061599731445, "learning_rate": 0.0001, "loss": 0.0113, "step": 72880 }, { "epoch": 479.5394736842105, "grad_norm": 1.3550677299499512, "learning_rate": 0.0001, "loss": 0.0112, "step": 72890 }, { "epoch": 479.60526315789474, "grad_norm": 1.2398386001586914, "learning_rate": 0.0001, "loss": 0.0111, "step": 72900 }, { "epoch": 479.67105263157896, "grad_norm": 1.213581919670105, "learning_rate": 0.0001, "loss": 0.0114, "step": 72910 }, { "epoch": 479.7368421052632, "grad_norm": 1.4044569730758667, "learning_rate": 0.0001, "loss": 0.0111, "step": 72920 }, { "epoch": 479.80263157894734, "grad_norm": 0.956494152545929, "learning_rate": 0.0001, "loss": 0.0149, "step": 72930 }, { "epoch": 479.86842105263156, "grad_norm": 0.9288977384567261, "learning_rate": 0.0001, "loss": 0.0121, "step": 72940 }, { "epoch": 479.9342105263158, "grad_norm": 1.0925542116165161, "learning_rate": 0.0001, "loss": 0.0097, "step": 72950 }, { "epoch": 480.0, "grad_norm": 1.3831939697265625, "learning_rate": 0.0001, "loss": 0.0107, "step": 72960 }, { "epoch": 480.0657894736842, "grad_norm": 1.177885890007019, "learning_rate": 0.0001, "loss": 0.011, "step": 72970 }, { "epoch": 480.13157894736844, "grad_norm": 1.154605507850647, "learning_rate": 0.0001, "loss": 0.0103, "step": 72980 }, { "epoch": 480.19736842105266, "grad_norm": 1.4074510335922241, "learning_rate": 0.0001, "loss": 0.012, "step": 72990 }, { "epoch": 480.2631578947368, "grad_norm": 1.3587064743041992, "learning_rate": 0.0001, "loss": 0.0122, "step": 73000 }, { "epoch": 480.32894736842104, "grad_norm": 1.0905214548110962, "learning_rate": 0.0001, "loss": 0.0125, "step": 73010 }, { "epoch": 480.39473684210526, "grad_norm": 1.2721132040023804, "learning_rate": 0.0001, "loss": 0.012, "step": 73020 }, { "epoch": 480.4605263157895, "grad_norm": 1.2455694675445557, "learning_rate": 0.0001, "loss": 0.0117, "step": 73030 }, { "epoch": 480.5263157894737, "grad_norm": 1.1374874114990234, "learning_rate": 0.0001, "loss": 0.0127, "step": 73040 }, { "epoch": 480.5921052631579, "grad_norm": 0.9991102814674377, "learning_rate": 0.0001, "loss": 0.0129, "step": 73050 }, { "epoch": 480.6578947368421, "grad_norm": 0.9660846590995789, "learning_rate": 0.0001, "loss": 0.0132, "step": 73060 }, { "epoch": 480.7236842105263, "grad_norm": 1.3656002283096313, "learning_rate": 0.0001, "loss": 0.0112, "step": 73070 }, { "epoch": 480.7894736842105, "grad_norm": 1.039703607559204, "learning_rate": 0.0001, "loss": 0.0103, "step": 73080 }, { "epoch": 480.85526315789474, "grad_norm": 1.1544702053070068, "learning_rate": 0.0001, "loss": 0.0107, "step": 73090 }, { "epoch": 480.92105263157896, "grad_norm": 1.4217747449874878, "learning_rate": 0.0001, "loss": 0.0132, "step": 73100 }, { "epoch": 480.9868421052632, "grad_norm": 1.363659143447876, "learning_rate": 0.0001, "loss": 0.0116, "step": 73110 }, { "epoch": 481.05263157894734, "grad_norm": 1.2060271501541138, "learning_rate": 0.0001, "loss": 0.0106, "step": 73120 }, { "epoch": 481.11842105263156, "grad_norm": 0.9276877045631409, "learning_rate": 0.0001, "loss": 0.0107, "step": 73130 }, { "epoch": 481.1842105263158, "grad_norm": 1.148481845855713, "learning_rate": 0.0001, "loss": 0.0133, "step": 73140 }, { "epoch": 481.25, "grad_norm": 1.3816297054290771, "learning_rate": 0.0001, "loss": 0.011, "step": 73150 }, { "epoch": 481.3157894736842, "grad_norm": 1.1494829654693604, "learning_rate": 0.0001, "loss": 0.0124, "step": 73160 }, { "epoch": 481.38157894736844, "grad_norm": 0.8974570035934448, "learning_rate": 0.0001, "loss": 0.0117, "step": 73170 }, { "epoch": 481.44736842105266, "grad_norm": 1.2027701139450073, "learning_rate": 0.0001, "loss": 0.0113, "step": 73180 }, { "epoch": 481.5131578947368, "grad_norm": 1.0080323219299316, "learning_rate": 0.0001, "loss": 0.0135, "step": 73190 }, { "epoch": 481.57894736842104, "grad_norm": 1.2009938955307007, "learning_rate": 0.0001, "loss": 0.0116, "step": 73200 }, { "epoch": 481.64473684210526, "grad_norm": 1.2606686353683472, "learning_rate": 0.0001, "loss": 0.0113, "step": 73210 }, { "epoch": 481.7105263157895, "grad_norm": 0.9282734990119934, "learning_rate": 0.0001, "loss": 0.0125, "step": 73220 }, { "epoch": 481.7763157894737, "grad_norm": 0.6285842657089233, "learning_rate": 0.0001, "loss": 0.0123, "step": 73230 }, { "epoch": 481.8421052631579, "grad_norm": 0.895392894744873, "learning_rate": 0.0001, "loss": 0.0118, "step": 73240 }, { "epoch": 481.9078947368421, "grad_norm": 1.0975970029830933, "learning_rate": 0.0001, "loss": 0.0137, "step": 73250 }, { "epoch": 481.9736842105263, "grad_norm": 1.0048805475234985, "learning_rate": 0.0001, "loss": 0.0125, "step": 73260 }, { "epoch": 482.0394736842105, "grad_norm": 1.4439254999160767, "learning_rate": 0.0001, "loss": 0.0115, "step": 73270 }, { "epoch": 482.10526315789474, "grad_norm": 1.0936566591262817, "learning_rate": 0.0001, "loss": 0.0123, "step": 73280 }, { "epoch": 482.17105263157896, "grad_norm": 1.1018260717391968, "learning_rate": 0.0001, "loss": 0.0128, "step": 73290 }, { "epoch": 482.2368421052632, "grad_norm": 1.48947012424469, "learning_rate": 0.0001, "loss": 0.0117, "step": 73300 }, { "epoch": 482.30263157894734, "grad_norm": 0.8502678871154785, "learning_rate": 0.0001, "loss": 0.0116, "step": 73310 }, { "epoch": 482.36842105263156, "grad_norm": 1.0486079454421997, "learning_rate": 0.0001, "loss": 0.0123, "step": 73320 }, { "epoch": 482.4342105263158, "grad_norm": 1.3065072298049927, "learning_rate": 0.0001, "loss": 0.0113, "step": 73330 }, { "epoch": 482.5, "grad_norm": 1.218176007270813, "learning_rate": 0.0001, "loss": 0.0117, "step": 73340 }, { "epoch": 482.5657894736842, "grad_norm": 1.2767457962036133, "learning_rate": 0.0001, "loss": 0.0117, "step": 73350 }, { "epoch": 482.63157894736844, "grad_norm": 1.322757363319397, "learning_rate": 0.0001, "loss": 0.0142, "step": 73360 }, { "epoch": 482.69736842105266, "grad_norm": 1.248364806175232, "learning_rate": 0.0001, "loss": 0.0112, "step": 73370 }, { "epoch": 482.7631578947368, "grad_norm": 0.9732344746589661, "learning_rate": 0.0001, "loss": 0.0126, "step": 73380 }, { "epoch": 482.82894736842104, "grad_norm": 1.0928685665130615, "learning_rate": 0.0001, "loss": 0.0119, "step": 73390 }, { "epoch": 482.89473684210526, "grad_norm": 1.0037014484405518, "learning_rate": 0.0001, "loss": 0.0103, "step": 73400 }, { "epoch": 482.9605263157895, "grad_norm": 1.0865426063537598, "learning_rate": 0.0001, "loss": 0.0115, "step": 73410 }, { "epoch": 483.0263157894737, "grad_norm": 1.5085911750793457, "learning_rate": 0.0001, "loss": 0.0114, "step": 73420 }, { "epoch": 483.0921052631579, "grad_norm": 1.8017462491989136, "learning_rate": 0.0001, "loss": 0.0103, "step": 73430 }, { "epoch": 483.1578947368421, "grad_norm": 1.266747236251831, "learning_rate": 0.0001, "loss": 0.0125, "step": 73440 }, { "epoch": 483.2236842105263, "grad_norm": 1.4259337186813354, "learning_rate": 0.0001, "loss": 0.0127, "step": 73450 }, { "epoch": 483.2894736842105, "grad_norm": 1.2923461198806763, "learning_rate": 0.0001, "loss": 0.0098, "step": 73460 }, { "epoch": 483.35526315789474, "grad_norm": 1.453090786933899, "learning_rate": 0.0001, "loss": 0.0105, "step": 73470 }, { "epoch": 483.42105263157896, "grad_norm": 1.3885729312896729, "learning_rate": 0.0001, "loss": 0.0117, "step": 73480 }, { "epoch": 483.4868421052632, "grad_norm": 0.8967188000679016, "learning_rate": 0.0001, "loss": 0.0125, "step": 73490 }, { "epoch": 483.55263157894734, "grad_norm": 1.2121909856796265, "learning_rate": 0.0001, "loss": 0.0112, "step": 73500 }, { "epoch": 483.61842105263156, "grad_norm": 1.01747465133667, "learning_rate": 0.0001, "loss": 0.0123, "step": 73510 }, { "epoch": 483.6842105263158, "grad_norm": 1.1471728086471558, "learning_rate": 0.0001, "loss": 0.0116, "step": 73520 }, { "epoch": 483.75, "grad_norm": 1.196329116821289, "learning_rate": 0.0001, "loss": 0.0127, "step": 73530 }, { "epoch": 483.8157894736842, "grad_norm": 1.0889172554016113, "learning_rate": 0.0001, "loss": 0.0117, "step": 73540 }, { "epoch": 483.88157894736844, "grad_norm": 1.1677290201187134, "learning_rate": 0.0001, "loss": 0.0112, "step": 73550 }, { "epoch": 483.94736842105266, "grad_norm": 1.2402957677841187, "learning_rate": 0.0001, "loss": 0.0101, "step": 73560 }, { "epoch": 484.0131578947368, "grad_norm": 1.241837739944458, "learning_rate": 0.0001, "loss": 0.0122, "step": 73570 }, { "epoch": 484.07894736842104, "grad_norm": 1.1311156749725342, "learning_rate": 0.0001, "loss": 0.0119, "step": 73580 }, { "epoch": 484.14473684210526, "grad_norm": 1.0363242626190186, "learning_rate": 0.0001, "loss": 0.011, "step": 73590 }, { "epoch": 484.2105263157895, "grad_norm": 1.2085379362106323, "learning_rate": 0.0001, "loss": 0.0109, "step": 73600 }, { "epoch": 484.2763157894737, "grad_norm": 0.8469356298446655, "learning_rate": 0.0001, "loss": 0.0132, "step": 73610 }, { "epoch": 484.3421052631579, "grad_norm": 1.0864289999008179, "learning_rate": 0.0001, "loss": 0.011, "step": 73620 }, { "epoch": 484.4078947368421, "grad_norm": 0.8979195952415466, "learning_rate": 0.0001, "loss": 0.0116, "step": 73630 }, { "epoch": 484.4736842105263, "grad_norm": 1.0335921049118042, "learning_rate": 0.0001, "loss": 0.012, "step": 73640 }, { "epoch": 484.5394736842105, "grad_norm": 0.8344136476516724, "learning_rate": 0.0001, "loss": 0.0122, "step": 73650 }, { "epoch": 484.60526315789474, "grad_norm": 1.1805672645568848, "learning_rate": 0.0001, "loss": 0.0111, "step": 73660 }, { "epoch": 484.67105263157896, "grad_norm": 0.9891030192375183, "learning_rate": 0.0001, "loss": 0.0121, "step": 73670 }, { "epoch": 484.7368421052632, "grad_norm": 1.2835693359375, "learning_rate": 0.0001, "loss": 0.0117, "step": 73680 }, { "epoch": 484.80263157894734, "grad_norm": 1.1186614036560059, "learning_rate": 0.0001, "loss": 0.0119, "step": 73690 }, { "epoch": 484.86842105263156, "grad_norm": 0.9351660013198853, "learning_rate": 0.0001, "loss": 0.0103, "step": 73700 }, { "epoch": 484.9342105263158, "grad_norm": 1.1274996995925903, "learning_rate": 0.0001, "loss": 0.0116, "step": 73710 }, { "epoch": 485.0, "grad_norm": 1.3561069965362549, "learning_rate": 0.0001, "loss": 0.0136, "step": 73720 }, { "epoch": 485.0657894736842, "grad_norm": 1.0583314895629883, "learning_rate": 0.0001, "loss": 0.0125, "step": 73730 }, { "epoch": 485.13157894736844, "grad_norm": 1.318225383758545, "learning_rate": 0.0001, "loss": 0.0098, "step": 73740 }, { "epoch": 485.19736842105266, "grad_norm": 1.3357988595962524, "learning_rate": 0.0001, "loss": 0.012, "step": 73750 }, { "epoch": 485.2631578947368, "grad_norm": 0.7873854637145996, "learning_rate": 0.0001, "loss": 0.014, "step": 73760 }, { "epoch": 485.32894736842104, "grad_norm": 0.9330838322639465, "learning_rate": 0.0001, "loss": 0.0141, "step": 73770 }, { "epoch": 485.39473684210526, "grad_norm": 0.956305205821991, "learning_rate": 0.0001, "loss": 0.0116, "step": 73780 }, { "epoch": 485.4605263157895, "grad_norm": 1.252623200416565, "learning_rate": 0.0001, "loss": 0.0109, "step": 73790 }, { "epoch": 485.5263157894737, "grad_norm": 1.2766034603118896, "learning_rate": 0.0001, "loss": 0.0124, "step": 73800 }, { "epoch": 485.5921052631579, "grad_norm": 1.2142257690429688, "learning_rate": 0.0001, "loss": 0.0118, "step": 73810 }, { "epoch": 485.6578947368421, "grad_norm": 0.972432017326355, "learning_rate": 0.0001, "loss": 0.0104, "step": 73820 }, { "epoch": 485.7236842105263, "grad_norm": 1.1014715433120728, "learning_rate": 0.0001, "loss": 0.0136, "step": 73830 }, { "epoch": 485.7894736842105, "grad_norm": 1.1053544282913208, "learning_rate": 0.0001, "loss": 0.0109, "step": 73840 }, { "epoch": 485.85526315789474, "grad_norm": 1.4604548215866089, "learning_rate": 0.0001, "loss": 0.0107, "step": 73850 }, { "epoch": 485.92105263157896, "grad_norm": 0.9957262277603149, "learning_rate": 0.0001, "loss": 0.0116, "step": 73860 }, { "epoch": 485.9868421052632, "grad_norm": 0.8840813636779785, "learning_rate": 0.0001, "loss": 0.0102, "step": 73870 }, { "epoch": 486.05263157894734, "grad_norm": 1.2641267776489258, "learning_rate": 0.0001, "loss": 0.0118, "step": 73880 }, { "epoch": 486.11842105263156, "grad_norm": 0.9049034714698792, "learning_rate": 0.0001, "loss": 0.0118, "step": 73890 }, { "epoch": 486.1842105263158, "grad_norm": 0.798561155796051, "learning_rate": 0.0001, "loss": 0.0126, "step": 73900 }, { "epoch": 486.25, "grad_norm": 1.3658287525177002, "learning_rate": 0.0001, "loss": 0.0103, "step": 73910 }, { "epoch": 486.3157894736842, "grad_norm": 1.3198788166046143, "learning_rate": 0.0001, "loss": 0.0124, "step": 73920 }, { "epoch": 486.38157894736844, "grad_norm": 1.0078285932540894, "learning_rate": 0.0001, "loss": 0.0114, "step": 73930 }, { "epoch": 486.44736842105266, "grad_norm": 1.053542971611023, "learning_rate": 0.0001, "loss": 0.0118, "step": 73940 }, { "epoch": 486.5131578947368, "grad_norm": 0.8640962243080139, "learning_rate": 0.0001, "loss": 0.0118, "step": 73950 }, { "epoch": 486.57894736842104, "grad_norm": 1.2860034704208374, "learning_rate": 0.0001, "loss": 0.0115, "step": 73960 }, { "epoch": 486.64473684210526, "grad_norm": 1.0493406057357788, "learning_rate": 0.0001, "loss": 0.0113, "step": 73970 }, { "epoch": 486.7105263157895, "grad_norm": 0.9338852763175964, "learning_rate": 0.0001, "loss": 0.0136, "step": 73980 }, { "epoch": 486.7763157894737, "grad_norm": 1.1906099319458008, "learning_rate": 0.0001, "loss": 0.0125, "step": 73990 }, { "epoch": 486.8421052631579, "grad_norm": 1.1527206897735596, "learning_rate": 0.0001, "loss": 0.0117, "step": 74000 }, { "epoch": 486.9078947368421, "grad_norm": 1.1856426000595093, "learning_rate": 0.0001, "loss": 0.0112, "step": 74010 }, { "epoch": 486.9736842105263, "grad_norm": 1.0806999206542969, "learning_rate": 0.0001, "loss": 0.0114, "step": 74020 }, { "epoch": 487.0394736842105, "grad_norm": 0.960417628288269, "learning_rate": 0.0001, "loss": 0.0127, "step": 74030 }, { "epoch": 487.10526315789474, "grad_norm": 0.9834040403366089, "learning_rate": 0.0001, "loss": 0.0137, "step": 74040 }, { "epoch": 487.17105263157896, "grad_norm": 0.9878292679786682, "learning_rate": 0.0001, "loss": 0.0102, "step": 74050 }, { "epoch": 487.2368421052632, "grad_norm": 1.11509370803833, "learning_rate": 0.0001, "loss": 0.0113, "step": 74060 }, { "epoch": 487.30263157894734, "grad_norm": 1.1951452493667603, "learning_rate": 0.0001, "loss": 0.0118, "step": 74070 }, { "epoch": 487.36842105263156, "grad_norm": 0.9179181456565857, "learning_rate": 0.0001, "loss": 0.0146, "step": 74080 }, { "epoch": 487.4342105263158, "grad_norm": 1.1499123573303223, "learning_rate": 0.0001, "loss": 0.0109, "step": 74090 }, { "epoch": 487.5, "grad_norm": 1.035010576248169, "learning_rate": 0.0001, "loss": 0.0119, "step": 74100 }, { "epoch": 487.5657894736842, "grad_norm": 1.0398813486099243, "learning_rate": 0.0001, "loss": 0.0094, "step": 74110 }, { "epoch": 487.63157894736844, "grad_norm": 1.191170573234558, "learning_rate": 0.0001, "loss": 0.0132, "step": 74120 }, { "epoch": 487.69736842105266, "grad_norm": 1.0563814640045166, "learning_rate": 0.0001, "loss": 0.0121, "step": 74130 }, { "epoch": 487.7631578947368, "grad_norm": 1.0885233879089355, "learning_rate": 0.0001, "loss": 0.0113, "step": 74140 }, { "epoch": 487.82894736842104, "grad_norm": 1.0386284589767456, "learning_rate": 0.0001, "loss": 0.0107, "step": 74150 }, { "epoch": 487.89473684210526, "grad_norm": 1.3478586673736572, "learning_rate": 0.0001, "loss": 0.0128, "step": 74160 }, { "epoch": 487.9605263157895, "grad_norm": 0.8383530378341675, "learning_rate": 0.0001, "loss": 0.011, "step": 74170 }, { "epoch": 488.0263157894737, "grad_norm": 1.2732715606689453, "learning_rate": 0.0001, "loss": 0.0105, "step": 74180 }, { "epoch": 488.0921052631579, "grad_norm": 1.2103279829025269, "learning_rate": 0.0001, "loss": 0.0104, "step": 74190 }, { "epoch": 488.1578947368421, "grad_norm": 1.6459661722183228, "learning_rate": 0.0001, "loss": 0.0129, "step": 74200 }, { "epoch": 488.2236842105263, "grad_norm": 1.0316122770309448, "learning_rate": 0.0001, "loss": 0.0116, "step": 74210 }, { "epoch": 488.2894736842105, "grad_norm": 1.0485596656799316, "learning_rate": 0.0001, "loss": 0.012, "step": 74220 }, { "epoch": 488.35526315789474, "grad_norm": 1.27117121219635, "learning_rate": 0.0001, "loss": 0.012, "step": 74230 }, { "epoch": 488.42105263157896, "grad_norm": 1.240209698677063, "learning_rate": 0.0001, "loss": 0.0106, "step": 74240 }, { "epoch": 488.4868421052632, "grad_norm": 0.7314859628677368, "learning_rate": 0.0001, "loss": 0.013, "step": 74250 }, { "epoch": 488.55263157894734, "grad_norm": 1.4494084119796753, "learning_rate": 0.0001, "loss": 0.0111, "step": 74260 }, { "epoch": 488.61842105263156, "grad_norm": 1.7090212106704712, "learning_rate": 0.0001, "loss": 0.0136, "step": 74270 }, { "epoch": 488.6842105263158, "grad_norm": 1.248016119003296, "learning_rate": 0.0001, "loss": 0.0119, "step": 74280 }, { "epoch": 488.75, "grad_norm": 1.0164270401000977, "learning_rate": 0.0001, "loss": 0.0129, "step": 74290 }, { "epoch": 488.8157894736842, "grad_norm": 0.8657320141792297, "learning_rate": 0.0001, "loss": 0.0103, "step": 74300 }, { "epoch": 488.88157894736844, "grad_norm": 1.136674165725708, "learning_rate": 0.0001, "loss": 0.0121, "step": 74310 }, { "epoch": 488.94736842105266, "grad_norm": 1.108292579650879, "learning_rate": 0.0001, "loss": 0.0128, "step": 74320 }, { "epoch": 489.0131578947368, "grad_norm": 0.756219744682312, "learning_rate": 0.0001, "loss": 0.0112, "step": 74330 }, { "epoch": 489.07894736842104, "grad_norm": 1.0228742361068726, "learning_rate": 0.0001, "loss": 0.0119, "step": 74340 }, { "epoch": 489.14473684210526, "grad_norm": 0.9281026721000671, "learning_rate": 0.0001, "loss": 0.0136, "step": 74350 }, { "epoch": 489.2105263157895, "grad_norm": 0.9329313039779663, "learning_rate": 0.0001, "loss": 0.0126, "step": 74360 }, { "epoch": 489.2763157894737, "grad_norm": 0.8658884167671204, "learning_rate": 0.0001, "loss": 0.0137, "step": 74370 }, { "epoch": 489.3421052631579, "grad_norm": 0.9127900004386902, "learning_rate": 0.0001, "loss": 0.0113, "step": 74380 }, { "epoch": 489.4078947368421, "grad_norm": 1.145900011062622, "learning_rate": 0.0001, "loss": 0.0112, "step": 74390 }, { "epoch": 489.4736842105263, "grad_norm": 1.131519079208374, "learning_rate": 0.0001, "loss": 0.0115, "step": 74400 }, { "epoch": 489.5394736842105, "grad_norm": 1.071823000907898, "learning_rate": 0.0001, "loss": 0.0154, "step": 74410 }, { "epoch": 489.60526315789474, "grad_norm": 0.8739954233169556, "learning_rate": 0.0001, "loss": 0.0127, "step": 74420 }, { "epoch": 489.67105263157896, "grad_norm": 1.0112184286117554, "learning_rate": 0.0001, "loss": 0.0106, "step": 74430 }, { "epoch": 489.7368421052632, "grad_norm": 0.9393345713615417, "learning_rate": 0.0001, "loss": 0.0105, "step": 74440 }, { "epoch": 489.80263157894734, "grad_norm": 1.0487806797027588, "learning_rate": 0.0001, "loss": 0.011, "step": 74450 }, { "epoch": 489.86842105263156, "grad_norm": 1.1946709156036377, "learning_rate": 0.0001, "loss": 0.011, "step": 74460 }, { "epoch": 489.9342105263158, "grad_norm": 1.154524803161621, "learning_rate": 0.0001, "loss": 0.0121, "step": 74470 }, { "epoch": 490.0, "grad_norm": 1.5723423957824707, "learning_rate": 0.0001, "loss": 0.0116, "step": 74480 }, { "epoch": 490.0657894736842, "grad_norm": 1.4050586223602295, "learning_rate": 0.0001, "loss": 0.0121, "step": 74490 }, { "epoch": 490.13157894736844, "grad_norm": 1.064405918121338, "learning_rate": 0.0001, "loss": 0.0122, "step": 74500 }, { "epoch": 490.19736842105266, "grad_norm": 1.1484532356262207, "learning_rate": 0.0001, "loss": 0.0105, "step": 74510 }, { "epoch": 490.2631578947368, "grad_norm": 0.8688686490058899, "learning_rate": 0.0001, "loss": 0.011, "step": 74520 }, { "epoch": 490.32894736842104, "grad_norm": 1.151997447013855, "learning_rate": 0.0001, "loss": 0.0141, "step": 74530 }, { "epoch": 490.39473684210526, "grad_norm": 1.0896145105361938, "learning_rate": 0.0001, "loss": 0.0142, "step": 74540 }, { "epoch": 490.4605263157895, "grad_norm": 1.170395016670227, "learning_rate": 0.0001, "loss": 0.0118, "step": 74550 }, { "epoch": 490.5263157894737, "grad_norm": 1.2518529891967773, "learning_rate": 0.0001, "loss": 0.0129, "step": 74560 }, { "epoch": 490.5921052631579, "grad_norm": 1.385285496711731, "learning_rate": 0.0001, "loss": 0.0104, "step": 74570 }, { "epoch": 490.6578947368421, "grad_norm": 1.2214215993881226, "learning_rate": 0.0001, "loss": 0.011, "step": 74580 }, { "epoch": 490.7236842105263, "grad_norm": 1.1095465421676636, "learning_rate": 0.0001, "loss": 0.0114, "step": 74590 }, { "epoch": 490.7894736842105, "grad_norm": 1.1326744556427002, "learning_rate": 0.0001, "loss": 0.0124, "step": 74600 }, { "epoch": 490.85526315789474, "grad_norm": 1.0107364654541016, "learning_rate": 0.0001, "loss": 0.0098, "step": 74610 }, { "epoch": 490.92105263157896, "grad_norm": 1.571817398071289, "learning_rate": 0.0001, "loss": 0.0127, "step": 74620 }, { "epoch": 490.9868421052632, "grad_norm": 1.312083125114441, "learning_rate": 0.0001, "loss": 0.0118, "step": 74630 }, { "epoch": 491.05263157894734, "grad_norm": 1.4065839052200317, "learning_rate": 0.0001, "loss": 0.0111, "step": 74640 }, { "epoch": 491.11842105263156, "grad_norm": 1.1010332107543945, "learning_rate": 0.0001, "loss": 0.0105, "step": 74650 }, { "epoch": 491.1842105263158, "grad_norm": 1.1380709409713745, "learning_rate": 0.0001, "loss": 0.0119, "step": 74660 }, { "epoch": 491.25, "grad_norm": 1.1120402812957764, "learning_rate": 0.0001, "loss": 0.0112, "step": 74670 }, { "epoch": 491.3157894736842, "grad_norm": 1.5647122859954834, "learning_rate": 0.0001, "loss": 0.0122, "step": 74680 }, { "epoch": 491.38157894736844, "grad_norm": 1.1048648357391357, "learning_rate": 0.0001, "loss": 0.0108, "step": 74690 }, { "epoch": 491.44736842105266, "grad_norm": 1.0886578559875488, "learning_rate": 0.0001, "loss": 0.0107, "step": 74700 }, { "epoch": 491.5131578947368, "grad_norm": 1.111810564994812, "learning_rate": 0.0001, "loss": 0.01, "step": 74710 }, { "epoch": 491.57894736842104, "grad_norm": 1.0857470035552979, "learning_rate": 0.0001, "loss": 0.0129, "step": 74720 }, { "epoch": 491.64473684210526, "grad_norm": 1.4645519256591797, "learning_rate": 0.0001, "loss": 0.0141, "step": 74730 }, { "epoch": 491.7105263157895, "grad_norm": 1.5546987056732178, "learning_rate": 0.0001, "loss": 0.0125, "step": 74740 }, { "epoch": 491.7763157894737, "grad_norm": 1.0544028282165527, "learning_rate": 0.0001, "loss": 0.0115, "step": 74750 }, { "epoch": 491.8421052631579, "grad_norm": 1.1025646924972534, "learning_rate": 0.0001, "loss": 0.0115, "step": 74760 }, { "epoch": 491.9078947368421, "grad_norm": 1.238974928855896, "learning_rate": 0.0001, "loss": 0.0116, "step": 74770 }, { "epoch": 491.9736842105263, "grad_norm": 1.3790466785430908, "learning_rate": 0.0001, "loss": 0.012, "step": 74780 }, { "epoch": 492.0394736842105, "grad_norm": 1.2064578533172607, "learning_rate": 0.0001, "loss": 0.0106, "step": 74790 }, { "epoch": 492.10526315789474, "grad_norm": 0.9122753143310547, "learning_rate": 0.0001, "loss": 0.0104, "step": 74800 }, { "epoch": 492.17105263157896, "grad_norm": 1.4887069463729858, "learning_rate": 0.0001, "loss": 0.011, "step": 74810 }, { "epoch": 492.2368421052632, "grad_norm": 1.2702773809432983, "learning_rate": 0.0001, "loss": 0.013, "step": 74820 }, { "epoch": 492.30263157894734, "grad_norm": 1.2110621929168701, "learning_rate": 0.0001, "loss": 0.0114, "step": 74830 }, { "epoch": 492.36842105263156, "grad_norm": 1.53854238986969, "learning_rate": 0.0001, "loss": 0.0121, "step": 74840 }, { "epoch": 492.4342105263158, "grad_norm": 1.8626569509506226, "learning_rate": 0.0001, "loss": 0.0115, "step": 74850 }, { "epoch": 492.5, "grad_norm": 1.666455864906311, "learning_rate": 0.0001, "loss": 0.0124, "step": 74860 }, { "epoch": 492.5657894736842, "grad_norm": 1.684195876121521, "learning_rate": 0.0001, "loss": 0.0103, "step": 74870 }, { "epoch": 492.63157894736844, "grad_norm": 1.1221078634262085, "learning_rate": 0.0001, "loss": 0.0131, "step": 74880 }, { "epoch": 492.69736842105266, "grad_norm": 1.2585291862487793, "learning_rate": 0.0001, "loss": 0.0103, "step": 74890 }, { "epoch": 492.7631578947368, "grad_norm": 1.376713514328003, "learning_rate": 0.0001, "loss": 0.0121, "step": 74900 }, { "epoch": 492.82894736842104, "grad_norm": 0.9732930064201355, "learning_rate": 0.0001, "loss": 0.0113, "step": 74910 }, { "epoch": 492.89473684210526, "grad_norm": 1.3538066148757935, "learning_rate": 0.0001, "loss": 0.0113, "step": 74920 }, { "epoch": 492.9605263157895, "grad_norm": 1.5460445880889893, "learning_rate": 0.0001, "loss": 0.0123, "step": 74930 }, { "epoch": 493.0263157894737, "grad_norm": 1.332470417022705, "learning_rate": 0.0001, "loss": 0.0109, "step": 74940 }, { "epoch": 493.0921052631579, "grad_norm": 1.0886743068695068, "learning_rate": 0.0001, "loss": 0.0126, "step": 74950 }, { "epoch": 493.1578947368421, "grad_norm": 0.8579538464546204, "learning_rate": 0.0001, "loss": 0.0115, "step": 74960 }, { "epoch": 493.2236842105263, "grad_norm": 1.033247470855713, "learning_rate": 0.0001, "loss": 0.0117, "step": 74970 }, { "epoch": 493.2894736842105, "grad_norm": 1.0347689390182495, "learning_rate": 0.0001, "loss": 0.0131, "step": 74980 }, { "epoch": 493.35526315789474, "grad_norm": 1.149769902229309, "learning_rate": 0.0001, "loss": 0.012, "step": 74990 }, { "epoch": 493.42105263157896, "grad_norm": 0.8017956614494324, "learning_rate": 0.0001, "loss": 0.0111, "step": 75000 }, { "epoch": 493.4868421052632, "grad_norm": 0.933280348777771, "learning_rate": 0.0001, "loss": 0.0114, "step": 75010 }, { "epoch": 493.55263157894734, "grad_norm": 1.4169752597808838, "learning_rate": 0.0001, "loss": 0.0118, "step": 75020 }, { "epoch": 493.61842105263156, "grad_norm": 1.0053260326385498, "learning_rate": 0.0001, "loss": 0.012, "step": 75030 }, { "epoch": 493.6842105263158, "grad_norm": 1.0985119342803955, "learning_rate": 0.0001, "loss": 0.0128, "step": 75040 }, { "epoch": 493.75, "grad_norm": 0.89968341588974, "learning_rate": 0.0001, "loss": 0.0119, "step": 75050 }, { "epoch": 493.8157894736842, "grad_norm": 1.0092484951019287, "learning_rate": 0.0001, "loss": 0.0149, "step": 75060 }, { "epoch": 493.88157894736844, "grad_norm": 0.8683974742889404, "learning_rate": 0.0001, "loss": 0.0142, "step": 75070 }, { "epoch": 493.94736842105266, "grad_norm": 1.1474254131317139, "learning_rate": 0.0001, "loss": 0.0125, "step": 75080 }, { "epoch": 494.0131578947368, "grad_norm": 0.9479317665100098, "learning_rate": 0.0001, "loss": 0.0145, "step": 75090 }, { "epoch": 494.07894736842104, "grad_norm": 1.194884181022644, "learning_rate": 0.0001, "loss": 0.0158, "step": 75100 }, { "epoch": 494.14473684210526, "grad_norm": 1.3148432970046997, "learning_rate": 0.0001, "loss": 0.0162, "step": 75110 }, { "epoch": 494.2105263157895, "grad_norm": 1.1125714778900146, "learning_rate": 0.0001, "loss": 0.0125, "step": 75120 }, { "epoch": 494.2763157894737, "grad_norm": 1.0506175756454468, "learning_rate": 0.0001, "loss": 0.0127, "step": 75130 }, { "epoch": 494.3421052631579, "grad_norm": 1.3244073390960693, "learning_rate": 0.0001, "loss": 0.0135, "step": 75140 }, { "epoch": 494.4078947368421, "grad_norm": 1.082236647605896, "learning_rate": 0.0001, "loss": 0.0139, "step": 75150 }, { "epoch": 494.4736842105263, "grad_norm": 1.702755093574524, "learning_rate": 0.0001, "loss": 0.0141, "step": 75160 }, { "epoch": 494.5394736842105, "grad_norm": 1.0720016956329346, "learning_rate": 0.0001, "loss": 0.0126, "step": 75170 }, { "epoch": 494.60526315789474, "grad_norm": 0.8905903100967407, "learning_rate": 0.0001, "loss": 0.0146, "step": 75180 }, { "epoch": 494.67105263157896, "grad_norm": 0.9259252548217773, "learning_rate": 0.0001, "loss": 0.012, "step": 75190 }, { "epoch": 494.7368421052632, "grad_norm": 1.1381369829177856, "learning_rate": 0.0001, "loss": 0.0111, "step": 75200 }, { "epoch": 494.80263157894734, "grad_norm": 1.1923221349716187, "learning_rate": 0.0001, "loss": 0.0135, "step": 75210 }, { "epoch": 494.86842105263156, "grad_norm": 1.3231667280197144, "learning_rate": 0.0001, "loss": 0.0132, "step": 75220 }, { "epoch": 494.9342105263158, "grad_norm": 1.1546229124069214, "learning_rate": 0.0001, "loss": 0.0133, "step": 75230 }, { "epoch": 495.0, "grad_norm": 0.942509114742279, "learning_rate": 0.0001, "loss": 0.0116, "step": 75240 }, { "epoch": 495.0657894736842, "grad_norm": 1.1736472845077515, "learning_rate": 0.0001, "loss": 0.0121, "step": 75250 }, { "epoch": 495.13157894736844, "grad_norm": 1.43409264087677, "learning_rate": 0.0001, "loss": 0.012, "step": 75260 }, { "epoch": 495.19736842105266, "grad_norm": 1.1117990016937256, "learning_rate": 0.0001, "loss": 0.0116, "step": 75270 }, { "epoch": 495.2631578947368, "grad_norm": 1.196996808052063, "learning_rate": 0.0001, "loss": 0.0109, "step": 75280 }, { "epoch": 495.32894736842104, "grad_norm": 1.3209561109542847, "learning_rate": 0.0001, "loss": 0.0114, "step": 75290 }, { "epoch": 495.39473684210526, "grad_norm": 1.633660078048706, "learning_rate": 0.0001, "loss": 0.0122, "step": 75300 }, { "epoch": 495.4605263157895, "grad_norm": 1.3215038776397705, "learning_rate": 0.0001, "loss": 0.0104, "step": 75310 }, { "epoch": 495.5263157894737, "grad_norm": 1.1978789567947388, "learning_rate": 0.0001, "loss": 0.0137, "step": 75320 }, { "epoch": 495.5921052631579, "grad_norm": 1.508163332939148, "learning_rate": 0.0001, "loss": 0.0121, "step": 75330 }, { "epoch": 495.6578947368421, "grad_norm": 1.5541365146636963, "learning_rate": 0.0001, "loss": 0.0132, "step": 75340 }, { "epoch": 495.7236842105263, "grad_norm": 1.6904091835021973, "learning_rate": 0.0001, "loss": 0.0116, "step": 75350 }, { "epoch": 495.7894736842105, "grad_norm": 1.0997204780578613, "learning_rate": 0.0001, "loss": 0.0119, "step": 75360 }, { "epoch": 495.85526315789474, "grad_norm": 1.1885972023010254, "learning_rate": 0.0001, "loss": 0.0113, "step": 75370 }, { "epoch": 495.92105263157896, "grad_norm": 1.2370365858078003, "learning_rate": 0.0001, "loss": 0.0131, "step": 75380 }, { "epoch": 495.9868421052632, "grad_norm": 1.3986064195632935, "learning_rate": 0.0001, "loss": 0.0131, "step": 75390 }, { "epoch": 496.05263157894734, "grad_norm": 1.3072096109390259, "learning_rate": 0.0001, "loss": 0.0147, "step": 75400 }, { "epoch": 496.11842105263156, "grad_norm": 1.2789565324783325, "learning_rate": 0.0001, "loss": 0.0127, "step": 75410 }, { "epoch": 496.1842105263158, "grad_norm": 1.0061566829681396, "learning_rate": 0.0001, "loss": 0.0121, "step": 75420 }, { "epoch": 496.25, "grad_norm": 0.9549350738525391, "learning_rate": 0.0001, "loss": 0.0113, "step": 75430 }, { "epoch": 496.3157894736842, "grad_norm": 0.9655198454856873, "learning_rate": 0.0001, "loss": 0.0104, "step": 75440 }, { "epoch": 496.38157894736844, "grad_norm": 0.9944902062416077, "learning_rate": 0.0001, "loss": 0.0128, "step": 75450 }, { "epoch": 496.44736842105266, "grad_norm": 1.0632195472717285, "learning_rate": 0.0001, "loss": 0.0111, "step": 75460 }, { "epoch": 496.5131578947368, "grad_norm": 1.0134518146514893, "learning_rate": 0.0001, "loss": 0.0115, "step": 75470 }, { "epoch": 496.57894736842104, "grad_norm": 1.3689377307891846, "learning_rate": 0.0001, "loss": 0.0106, "step": 75480 }, { "epoch": 496.64473684210526, "grad_norm": 1.2085026502609253, "learning_rate": 0.0001, "loss": 0.0118, "step": 75490 }, { "epoch": 496.7105263157895, "grad_norm": 0.7359601259231567, "learning_rate": 0.0001, "loss": 0.0127, "step": 75500 }, { "epoch": 496.7763157894737, "grad_norm": 1.2168084383010864, "learning_rate": 0.0001, "loss": 0.0135, "step": 75510 }, { "epoch": 496.8421052631579, "grad_norm": 0.99058598279953, "learning_rate": 0.0001, "loss": 0.0111, "step": 75520 }, { "epoch": 496.9078947368421, "grad_norm": 0.9866731762886047, "learning_rate": 0.0001, "loss": 0.0126, "step": 75530 }, { "epoch": 496.9736842105263, "grad_norm": 0.7462350726127625, "learning_rate": 0.0001, "loss": 0.012, "step": 75540 }, { "epoch": 497.0394736842105, "grad_norm": 0.8804934620857239, "learning_rate": 0.0001, "loss": 0.0149, "step": 75550 }, { "epoch": 497.10526315789474, "grad_norm": 0.9820599555969238, "learning_rate": 0.0001, "loss": 0.0136, "step": 75560 }, { "epoch": 497.17105263157896, "grad_norm": 0.8656883239746094, "learning_rate": 0.0001, "loss": 0.0119, "step": 75570 }, { "epoch": 497.2368421052632, "grad_norm": 0.9848073124885559, "learning_rate": 0.0001, "loss": 0.0134, "step": 75580 }, { "epoch": 497.30263157894734, "grad_norm": 0.7762583494186401, "learning_rate": 0.0001, "loss": 0.0124, "step": 75590 }, { "epoch": 497.36842105263156, "grad_norm": 0.9925325512886047, "learning_rate": 0.0001, "loss": 0.0148, "step": 75600 }, { "epoch": 497.4342105263158, "grad_norm": 0.905873715877533, "learning_rate": 0.0001, "loss": 0.0123, "step": 75610 }, { "epoch": 497.5, "grad_norm": 0.8699862957000732, "learning_rate": 0.0001, "loss": 0.012, "step": 75620 }, { "epoch": 497.5657894736842, "grad_norm": 1.1519784927368164, "learning_rate": 0.0001, "loss": 0.0147, "step": 75630 }, { "epoch": 497.63157894736844, "grad_norm": 1.285287857055664, "learning_rate": 0.0001, "loss": 0.0131, "step": 75640 }, { "epoch": 497.69736842105266, "grad_norm": 0.9361952543258667, "learning_rate": 0.0001, "loss": 0.0132, "step": 75650 }, { "epoch": 497.7631578947368, "grad_norm": 1.113816499710083, "learning_rate": 0.0001, "loss": 0.0129, "step": 75660 }, { "epoch": 497.82894736842104, "grad_norm": 1.1863288879394531, "learning_rate": 0.0001, "loss": 0.014, "step": 75670 }, { "epoch": 497.89473684210526, "grad_norm": 0.6203667521476746, "learning_rate": 0.0001, "loss": 0.013, "step": 75680 }, { "epoch": 497.9605263157895, "grad_norm": 1.0878044366836548, "learning_rate": 0.0001, "loss": 0.013, "step": 75690 }, { "epoch": 498.0263157894737, "grad_norm": 0.5921301245689392, "learning_rate": 0.0001, "loss": 0.0126, "step": 75700 }, { "epoch": 498.0921052631579, "grad_norm": 0.9742683172225952, "learning_rate": 0.0001, "loss": 0.0126, "step": 75710 }, { "epoch": 498.1578947368421, "grad_norm": 1.1154931783676147, "learning_rate": 0.0001, "loss": 0.0136, "step": 75720 }, { "epoch": 498.2236842105263, "grad_norm": 1.0638843774795532, "learning_rate": 0.0001, "loss": 0.0131, "step": 75730 }, { "epoch": 498.2894736842105, "grad_norm": 1.2512115240097046, "learning_rate": 0.0001, "loss": 0.0137, "step": 75740 }, { "epoch": 498.35526315789474, "grad_norm": 1.373955488204956, "learning_rate": 0.0001, "loss": 0.0135, "step": 75750 }, { "epoch": 498.42105263157896, "grad_norm": 1.1924251317977905, "learning_rate": 0.0001, "loss": 0.0127, "step": 75760 }, { "epoch": 498.4868421052632, "grad_norm": 1.0624274015426636, "learning_rate": 0.0001, "loss": 0.0126, "step": 75770 }, { "epoch": 498.55263157894734, "grad_norm": 0.7194404602050781, "learning_rate": 0.0001, "loss": 0.0125, "step": 75780 }, { "epoch": 498.61842105263156, "grad_norm": 1.0660557746887207, "learning_rate": 0.0001, "loss": 0.0145, "step": 75790 }, { "epoch": 498.6842105263158, "grad_norm": 1.237527847290039, "learning_rate": 0.0001, "loss": 0.0136, "step": 75800 }, { "epoch": 498.75, "grad_norm": 1.1295133829116821, "learning_rate": 0.0001, "loss": 0.0139, "step": 75810 }, { "epoch": 498.8157894736842, "grad_norm": 0.8984070420265198, "learning_rate": 0.0001, "loss": 0.0121, "step": 75820 }, { "epoch": 498.88157894736844, "grad_norm": 1.035714864730835, "learning_rate": 0.0001, "loss": 0.0125, "step": 75830 }, { "epoch": 498.94736842105266, "grad_norm": 1.1130855083465576, "learning_rate": 0.0001, "loss": 0.0135, "step": 75840 }, { "epoch": 499.0131578947368, "grad_norm": 1.1051881313323975, "learning_rate": 0.0001, "loss": 0.0135, "step": 75850 }, { "epoch": 499.07894736842104, "grad_norm": 0.8507428765296936, "learning_rate": 0.0001, "loss": 0.0136, "step": 75860 }, { "epoch": 499.14473684210526, "grad_norm": 0.9365002512931824, "learning_rate": 0.0001, "loss": 0.0122, "step": 75870 }, { "epoch": 499.2105263157895, "grad_norm": 1.0836772918701172, "learning_rate": 0.0001, "loss": 0.0155, "step": 75880 }, { "epoch": 499.2763157894737, "grad_norm": 1.0799518823623657, "learning_rate": 0.0001, "loss": 0.012, "step": 75890 }, { "epoch": 499.3421052631579, "grad_norm": 1.1762809753417969, "learning_rate": 0.0001, "loss": 0.0115, "step": 75900 }, { "epoch": 499.4078947368421, "grad_norm": 0.9116426110267639, "learning_rate": 0.0001, "loss": 0.011, "step": 75910 }, { "epoch": 499.4736842105263, "grad_norm": 1.1221206188201904, "learning_rate": 0.0001, "loss": 0.0141, "step": 75920 }, { "epoch": 499.5394736842105, "grad_norm": 0.9344303011894226, "learning_rate": 0.0001, "loss": 0.014, "step": 75930 }, { "epoch": 499.60526315789474, "grad_norm": 1.2901678085327148, "learning_rate": 0.0001, "loss": 0.0132, "step": 75940 }, { "epoch": 499.67105263157896, "grad_norm": 1.0376948118209839, "learning_rate": 0.0001, "loss": 0.0142, "step": 75950 }, { "epoch": 499.7368421052632, "grad_norm": 0.9793575406074524, "learning_rate": 0.0001, "loss": 0.0129, "step": 75960 }, { "epoch": 499.80263157894734, "grad_norm": 1.1645046472549438, "learning_rate": 0.0001, "loss": 0.0138, "step": 75970 }, { "epoch": 499.86842105263156, "grad_norm": 0.8754516243934631, "learning_rate": 0.0001, "loss": 0.0121, "step": 75980 }, { "epoch": 499.9342105263158, "grad_norm": 1.0461459159851074, "learning_rate": 0.0001, "loss": 0.0137, "step": 75990 }, { "epoch": 500.0, "grad_norm": 0.9663628339767456, "learning_rate": 0.0001, "loss": 0.0114, "step": 76000 }, { "epoch": 500.0657894736842, "grad_norm": 1.32105553150177, "learning_rate": 0.0001, "loss": 0.0117, "step": 76010 }, { "epoch": 500.13157894736844, "grad_norm": 1.3787779808044434, "learning_rate": 0.0001, "loss": 0.0119, "step": 76020 }, { "epoch": 500.19736842105266, "grad_norm": 1.2713334560394287, "learning_rate": 0.0001, "loss": 0.0129, "step": 76030 }, { "epoch": 500.2631578947368, "grad_norm": 1.2451242208480835, "learning_rate": 0.0001, "loss": 0.0132, "step": 76040 }, { "epoch": 500.32894736842104, "grad_norm": 1.1290003061294556, "learning_rate": 0.0001, "loss": 0.0137, "step": 76050 }, { "epoch": 500.39473684210526, "grad_norm": 1.2039153575897217, "learning_rate": 0.0001, "loss": 0.0125, "step": 76060 }, { "epoch": 500.4605263157895, "grad_norm": 1.2840030193328857, "learning_rate": 0.0001, "loss": 0.0125, "step": 76070 }, { "epoch": 500.5263157894737, "grad_norm": 1.0132211446762085, "learning_rate": 0.0001, "loss": 0.0103, "step": 76080 }, { "epoch": 500.5921052631579, "grad_norm": 0.8630693554878235, "learning_rate": 0.0001, "loss": 0.013, "step": 76090 }, { "epoch": 500.6578947368421, "grad_norm": 1.200491189956665, "learning_rate": 0.0001, "loss": 0.0114, "step": 76100 }, { "epoch": 500.7236842105263, "grad_norm": 1.0840117931365967, "learning_rate": 0.0001, "loss": 0.0134, "step": 76110 }, { "epoch": 500.7894736842105, "grad_norm": 1.058173418045044, "learning_rate": 0.0001, "loss": 0.0158, "step": 76120 }, { "epoch": 500.85526315789474, "grad_norm": 0.7691683769226074, "learning_rate": 0.0001, "loss": 0.0143, "step": 76130 }, { "epoch": 500.92105263157896, "grad_norm": 1.3571420907974243, "learning_rate": 0.0001, "loss": 0.0143, "step": 76140 }, { "epoch": 500.9868421052632, "grad_norm": 1.397667646408081, "learning_rate": 0.0001, "loss": 0.0129, "step": 76150 }, { "epoch": 501.05263157894734, "grad_norm": 1.2376160621643066, "learning_rate": 0.0001, "loss": 0.0131, "step": 76160 }, { "epoch": 501.11842105263156, "grad_norm": 1.193163514137268, "learning_rate": 0.0001, "loss": 0.0142, "step": 76170 }, { "epoch": 501.1842105263158, "grad_norm": 1.4034628868103027, "learning_rate": 0.0001, "loss": 0.0147, "step": 76180 }, { "epoch": 501.25, "grad_norm": 1.207105278968811, "learning_rate": 0.0001, "loss": 0.0142, "step": 76190 }, { "epoch": 501.3157894736842, "grad_norm": 0.7556562423706055, "learning_rate": 0.0001, "loss": 0.0129, "step": 76200 }, { "epoch": 501.38157894736844, "grad_norm": 1.2998090982437134, "learning_rate": 0.0001, "loss": 0.0119, "step": 76210 }, { "epoch": 501.44736842105266, "grad_norm": 0.9922952055931091, "learning_rate": 0.0001, "loss": 0.0111, "step": 76220 }, { "epoch": 501.5131578947368, "grad_norm": 1.225348949432373, "learning_rate": 0.0001, "loss": 0.0154, "step": 76230 }, { "epoch": 501.57894736842104, "grad_norm": 1.3138529062271118, "learning_rate": 0.0001, "loss": 0.0154, "step": 76240 }, { "epoch": 501.64473684210526, "grad_norm": 0.9411857724189758, "learning_rate": 0.0001, "loss": 0.0124, "step": 76250 }, { "epoch": 501.7105263157895, "grad_norm": 1.271523356437683, "learning_rate": 0.0001, "loss": 0.0137, "step": 76260 }, { "epoch": 501.7763157894737, "grad_norm": 1.2197598218917847, "learning_rate": 0.0001, "loss": 0.0125, "step": 76270 }, { "epoch": 501.8421052631579, "grad_norm": 1.3674578666687012, "learning_rate": 0.0001, "loss": 0.0112, "step": 76280 }, { "epoch": 501.9078947368421, "grad_norm": 1.3369501829147339, "learning_rate": 0.0001, "loss": 0.0163, "step": 76290 }, { "epoch": 501.9736842105263, "grad_norm": 1.6227846145629883, "learning_rate": 0.0001, "loss": 0.0125, "step": 76300 }, { "epoch": 502.0394736842105, "grad_norm": 1.3651342391967773, "learning_rate": 0.0001, "loss": 0.0118, "step": 76310 }, { "epoch": 502.10526315789474, "grad_norm": 1.1167755126953125, "learning_rate": 0.0001, "loss": 0.0133, "step": 76320 }, { "epoch": 502.17105263157896, "grad_norm": 1.0122556686401367, "learning_rate": 0.0001, "loss": 0.0118, "step": 76330 }, { "epoch": 502.2368421052632, "grad_norm": 1.339971899986267, "learning_rate": 0.0001, "loss": 0.0118, "step": 76340 }, { "epoch": 502.30263157894734, "grad_norm": 1.1776334047317505, "learning_rate": 0.0001, "loss": 0.0128, "step": 76350 }, { "epoch": 502.36842105263156, "grad_norm": 1.1204994916915894, "learning_rate": 0.0001, "loss": 0.0133, "step": 76360 }, { "epoch": 502.4342105263158, "grad_norm": 1.1037061214447021, "learning_rate": 0.0001, "loss": 0.0127, "step": 76370 }, { "epoch": 502.5, "grad_norm": 1.4613289833068848, "learning_rate": 0.0001, "loss": 0.0115, "step": 76380 }, { "epoch": 502.5657894736842, "grad_norm": 1.3934388160705566, "learning_rate": 0.0001, "loss": 0.011, "step": 76390 }, { "epoch": 502.63157894736844, "grad_norm": 1.470294713973999, "learning_rate": 0.0001, "loss": 0.0116, "step": 76400 }, { "epoch": 502.69736842105266, "grad_norm": 1.2108807563781738, "learning_rate": 0.0001, "loss": 0.0118, "step": 76410 }, { "epoch": 502.7631578947368, "grad_norm": 1.1446878910064697, "learning_rate": 0.0001, "loss": 0.0111, "step": 76420 }, { "epoch": 502.82894736842104, "grad_norm": 1.1863682270050049, "learning_rate": 0.0001, "loss": 0.0128, "step": 76430 }, { "epoch": 502.89473684210526, "grad_norm": 1.2745025157928467, "learning_rate": 0.0001, "loss": 0.0131, "step": 76440 }, { "epoch": 502.9605263157895, "grad_norm": 1.2708007097244263, "learning_rate": 0.0001, "loss": 0.0116, "step": 76450 }, { "epoch": 503.0263157894737, "grad_norm": 1.109881043434143, "learning_rate": 0.0001, "loss": 0.011, "step": 76460 }, { "epoch": 503.0921052631579, "grad_norm": 0.837130606174469, "learning_rate": 0.0001, "loss": 0.0112, "step": 76470 }, { "epoch": 503.1578947368421, "grad_norm": 1.1610591411590576, "learning_rate": 0.0001, "loss": 0.0132, "step": 76480 }, { "epoch": 503.2236842105263, "grad_norm": 0.9062598943710327, "learning_rate": 0.0001, "loss": 0.0127, "step": 76490 }, { "epoch": 503.2894736842105, "grad_norm": 0.880432665348053, "learning_rate": 0.0001, "loss": 0.0114, "step": 76500 }, { "epoch": 503.35526315789474, "grad_norm": 1.1003761291503906, "learning_rate": 0.0001, "loss": 0.0131, "step": 76510 }, { "epoch": 503.42105263157896, "grad_norm": 1.0578047037124634, "learning_rate": 0.0001, "loss": 0.0101, "step": 76520 }, { "epoch": 503.4868421052632, "grad_norm": 1.265828251838684, "learning_rate": 0.0001, "loss": 0.0124, "step": 76530 }, { "epoch": 503.55263157894734, "grad_norm": 0.7461774945259094, "learning_rate": 0.0001, "loss": 0.0109, "step": 76540 }, { "epoch": 503.61842105263156, "grad_norm": 1.1446738243103027, "learning_rate": 0.0001, "loss": 0.0123, "step": 76550 }, { "epoch": 503.6842105263158, "grad_norm": 1.0880557298660278, "learning_rate": 0.0001, "loss": 0.0112, "step": 76560 }, { "epoch": 503.75, "grad_norm": 1.399858832359314, "learning_rate": 0.0001, "loss": 0.0098, "step": 76570 }, { "epoch": 503.8157894736842, "grad_norm": 1.1367653608322144, "learning_rate": 0.0001, "loss": 0.0121, "step": 76580 }, { "epoch": 503.88157894736844, "grad_norm": 1.018934965133667, "learning_rate": 0.0001, "loss": 0.0119, "step": 76590 }, { "epoch": 503.94736842105266, "grad_norm": 1.066989779472351, "learning_rate": 0.0001, "loss": 0.0122, "step": 76600 }, { "epoch": 504.0131578947368, "grad_norm": 0.9867700338363647, "learning_rate": 0.0001, "loss": 0.0115, "step": 76610 }, { "epoch": 504.07894736842104, "grad_norm": 1.0449360609054565, "learning_rate": 0.0001, "loss": 0.0112, "step": 76620 }, { "epoch": 504.14473684210526, "grad_norm": 0.7353350520133972, "learning_rate": 0.0001, "loss": 0.0096, "step": 76630 }, { "epoch": 504.2105263157895, "grad_norm": 0.8877101540565491, "learning_rate": 0.0001, "loss": 0.0106, "step": 76640 }, { "epoch": 504.2763157894737, "grad_norm": 1.084471583366394, "learning_rate": 0.0001, "loss": 0.0146, "step": 76650 }, { "epoch": 504.3421052631579, "grad_norm": 1.1777323484420776, "learning_rate": 0.0001, "loss": 0.0115, "step": 76660 }, { "epoch": 504.4078947368421, "grad_norm": 1.193827748298645, "learning_rate": 0.0001, "loss": 0.0115, "step": 76670 }, { "epoch": 504.4736842105263, "grad_norm": 1.2224369049072266, "learning_rate": 0.0001, "loss": 0.0119, "step": 76680 }, { "epoch": 504.5394736842105, "grad_norm": 1.1256766319274902, "learning_rate": 0.0001, "loss": 0.0102, "step": 76690 }, { "epoch": 504.60526315789474, "grad_norm": 1.0881433486938477, "learning_rate": 0.0001, "loss": 0.0119, "step": 76700 }, { "epoch": 504.67105263157896, "grad_norm": 0.9593660831451416, "learning_rate": 0.0001, "loss": 0.0129, "step": 76710 }, { "epoch": 504.7368421052632, "grad_norm": 1.1365207433700562, "learning_rate": 0.0001, "loss": 0.0128, "step": 76720 }, { "epoch": 504.80263157894734, "grad_norm": 1.1816807985305786, "learning_rate": 0.0001, "loss": 0.0095, "step": 76730 }, { "epoch": 504.86842105263156, "grad_norm": 1.0027751922607422, "learning_rate": 0.0001, "loss": 0.011, "step": 76740 }, { "epoch": 504.9342105263158, "grad_norm": 0.7428399920463562, "learning_rate": 0.0001, "loss": 0.0129, "step": 76750 }, { "epoch": 505.0, "grad_norm": 1.1550109386444092, "learning_rate": 0.0001, "loss": 0.0102, "step": 76760 }, { "epoch": 505.0657894736842, "grad_norm": 0.9820820093154907, "learning_rate": 0.0001, "loss": 0.0118, "step": 76770 }, { "epoch": 505.13157894736844, "grad_norm": 1.3091676235198975, "learning_rate": 0.0001, "loss": 0.0112, "step": 76780 }, { "epoch": 505.19736842105266, "grad_norm": 1.0327069759368896, "learning_rate": 0.0001, "loss": 0.0104, "step": 76790 }, { "epoch": 505.2631578947368, "grad_norm": 0.9336044788360596, "learning_rate": 0.0001, "loss": 0.0109, "step": 76800 }, { "epoch": 505.32894736842104, "grad_norm": 1.4007383584976196, "learning_rate": 0.0001, "loss": 0.0116, "step": 76810 }, { "epoch": 505.39473684210526, "grad_norm": 1.2266377210617065, "learning_rate": 0.0001, "loss": 0.0127, "step": 76820 }, { "epoch": 505.4605263157895, "grad_norm": 1.1764065027236938, "learning_rate": 0.0001, "loss": 0.0111, "step": 76830 }, { "epoch": 505.5263157894737, "grad_norm": 1.091961145401001, "learning_rate": 0.0001, "loss": 0.0118, "step": 76840 }, { "epoch": 505.5921052631579, "grad_norm": 0.9492750763893127, "learning_rate": 0.0001, "loss": 0.0108, "step": 76850 }, { "epoch": 505.6578947368421, "grad_norm": 1.088575839996338, "learning_rate": 0.0001, "loss": 0.0128, "step": 76860 }, { "epoch": 505.7236842105263, "grad_norm": 0.8977634906768799, "learning_rate": 0.0001, "loss": 0.0114, "step": 76870 }, { "epoch": 505.7894736842105, "grad_norm": 0.9890562891960144, "learning_rate": 0.0001, "loss": 0.0128, "step": 76880 }, { "epoch": 505.85526315789474, "grad_norm": 0.8245396018028259, "learning_rate": 0.0001, "loss": 0.0101, "step": 76890 }, { "epoch": 505.92105263157896, "grad_norm": 0.810986340045929, "learning_rate": 0.0001, "loss": 0.0112, "step": 76900 }, { "epoch": 505.9868421052632, "grad_norm": 1.3287197351455688, "learning_rate": 0.0001, "loss": 0.0125, "step": 76910 }, { "epoch": 506.05263157894734, "grad_norm": 0.7860926985740662, "learning_rate": 0.0001, "loss": 0.011, "step": 76920 }, { "epoch": 506.11842105263156, "grad_norm": 1.179496169090271, "learning_rate": 0.0001, "loss": 0.0122, "step": 76930 }, { "epoch": 506.1842105263158, "grad_norm": 1.606441617012024, "learning_rate": 0.0001, "loss": 0.0117, "step": 76940 }, { "epoch": 506.25, "grad_norm": 1.499569058418274, "learning_rate": 0.0001, "loss": 0.0115, "step": 76950 }, { "epoch": 506.3157894736842, "grad_norm": 1.24739670753479, "learning_rate": 0.0001, "loss": 0.0105, "step": 76960 }, { "epoch": 506.38157894736844, "grad_norm": 1.2441381216049194, "learning_rate": 0.0001, "loss": 0.0106, "step": 76970 }, { "epoch": 506.44736842105266, "grad_norm": 0.8672195672988892, "learning_rate": 0.0001, "loss": 0.0115, "step": 76980 }, { "epoch": 506.5131578947368, "grad_norm": 0.9006538391113281, "learning_rate": 0.0001, "loss": 0.013, "step": 76990 }, { "epoch": 506.57894736842104, "grad_norm": 0.9952192902565002, "learning_rate": 0.0001, "loss": 0.0116, "step": 77000 }, { "epoch": 506.64473684210526, "grad_norm": 0.8276636004447937, "learning_rate": 0.0001, "loss": 0.0122, "step": 77010 }, { "epoch": 506.7105263157895, "grad_norm": 1.2697261571884155, "learning_rate": 0.0001, "loss": 0.009, "step": 77020 }, { "epoch": 506.7763157894737, "grad_norm": 1.3981074094772339, "learning_rate": 0.0001, "loss": 0.0122, "step": 77030 }, { "epoch": 506.8421052631579, "grad_norm": 1.0681496858596802, "learning_rate": 0.0001, "loss": 0.0117, "step": 77040 }, { "epoch": 506.9078947368421, "grad_norm": 0.8381850719451904, "learning_rate": 0.0001, "loss": 0.01, "step": 77050 }, { "epoch": 506.9736842105263, "grad_norm": 1.236061692237854, "learning_rate": 0.0001, "loss": 0.0123, "step": 77060 }, { "epoch": 507.0394736842105, "grad_norm": 0.8843864798545837, "learning_rate": 0.0001, "loss": 0.0111, "step": 77070 }, { "epoch": 507.10526315789474, "grad_norm": 1.0024884939193726, "learning_rate": 0.0001, "loss": 0.0105, "step": 77080 }, { "epoch": 507.17105263157896, "grad_norm": 1.0010193586349487, "learning_rate": 0.0001, "loss": 0.0105, "step": 77090 }, { "epoch": 507.2368421052632, "grad_norm": 1.3840446472167969, "learning_rate": 0.0001, "loss": 0.0112, "step": 77100 }, { "epoch": 507.30263157894734, "grad_norm": 0.9459224343299866, "learning_rate": 0.0001, "loss": 0.0127, "step": 77110 }, { "epoch": 507.36842105263156, "grad_norm": 1.2796701192855835, "learning_rate": 0.0001, "loss": 0.0102, "step": 77120 }, { "epoch": 507.4342105263158, "grad_norm": 0.9489369988441467, "learning_rate": 0.0001, "loss": 0.0118, "step": 77130 }, { "epoch": 507.5, "grad_norm": 0.9329638481140137, "learning_rate": 0.0001, "loss": 0.0111, "step": 77140 }, { "epoch": 507.5657894736842, "grad_norm": 1.1059828996658325, "learning_rate": 0.0001, "loss": 0.0119, "step": 77150 }, { "epoch": 507.63157894736844, "grad_norm": 0.9566002488136292, "learning_rate": 0.0001, "loss": 0.0123, "step": 77160 }, { "epoch": 507.69736842105266, "grad_norm": 1.44321608543396, "learning_rate": 0.0001, "loss": 0.0118, "step": 77170 }, { "epoch": 507.7631578947368, "grad_norm": 0.9149665832519531, "learning_rate": 0.0001, "loss": 0.0125, "step": 77180 }, { "epoch": 507.82894736842104, "grad_norm": 1.2634365558624268, "learning_rate": 0.0001, "loss": 0.0112, "step": 77190 }, { "epoch": 507.89473684210526, "grad_norm": 1.0077171325683594, "learning_rate": 0.0001, "loss": 0.0108, "step": 77200 }, { "epoch": 507.9605263157895, "grad_norm": 1.3527032136917114, "learning_rate": 0.0001, "loss": 0.0105, "step": 77210 }, { "epoch": 508.0263157894737, "grad_norm": 1.3523765802383423, "learning_rate": 0.0001, "loss": 0.0114, "step": 77220 }, { "epoch": 508.0921052631579, "grad_norm": 1.2622536420822144, "learning_rate": 0.0001, "loss": 0.011, "step": 77230 }, { "epoch": 508.1578947368421, "grad_norm": 1.2173060178756714, "learning_rate": 0.0001, "loss": 0.0107, "step": 77240 }, { "epoch": 508.2236842105263, "grad_norm": 1.077372431755066, "learning_rate": 0.0001, "loss": 0.0126, "step": 77250 }, { "epoch": 508.2894736842105, "grad_norm": 1.0975558757781982, "learning_rate": 0.0001, "loss": 0.0118, "step": 77260 }, { "epoch": 508.35526315789474, "grad_norm": 1.3480600118637085, "learning_rate": 0.0001, "loss": 0.0088, "step": 77270 }, { "epoch": 508.42105263157896, "grad_norm": 1.1693116426467896, "learning_rate": 0.0001, "loss": 0.0124, "step": 77280 }, { "epoch": 508.4868421052632, "grad_norm": 1.1305088996887207, "learning_rate": 0.0001, "loss": 0.0124, "step": 77290 }, { "epoch": 508.55263157894734, "grad_norm": 1.2774187326431274, "learning_rate": 0.0001, "loss": 0.0119, "step": 77300 }, { "epoch": 508.61842105263156, "grad_norm": 1.1564953327178955, "learning_rate": 0.0001, "loss": 0.0097, "step": 77310 }, { "epoch": 508.6842105263158, "grad_norm": 0.8752122521400452, "learning_rate": 0.0001, "loss": 0.0095, "step": 77320 }, { "epoch": 508.75, "grad_norm": 1.2619119882583618, "learning_rate": 0.0001, "loss": 0.0115, "step": 77330 }, { "epoch": 508.8157894736842, "grad_norm": 0.9893479943275452, "learning_rate": 0.0001, "loss": 0.0112, "step": 77340 }, { "epoch": 508.88157894736844, "grad_norm": 1.0533159971237183, "learning_rate": 0.0001, "loss": 0.0112, "step": 77350 }, { "epoch": 508.94736842105266, "grad_norm": 1.032997488975525, "learning_rate": 0.0001, "loss": 0.0107, "step": 77360 }, { "epoch": 509.0131578947368, "grad_norm": 1.0692557096481323, "learning_rate": 0.0001, "loss": 0.0106, "step": 77370 }, { "epoch": 509.07894736842104, "grad_norm": 0.8767366409301758, "learning_rate": 0.0001, "loss": 0.0112, "step": 77380 }, { "epoch": 509.14473684210526, "grad_norm": 1.1660325527191162, "learning_rate": 0.0001, "loss": 0.0125, "step": 77390 }, { "epoch": 509.2105263157895, "grad_norm": 0.8173626661300659, "learning_rate": 0.0001, "loss": 0.0115, "step": 77400 }, { "epoch": 509.2763157894737, "grad_norm": 0.7611241936683655, "learning_rate": 0.0001, "loss": 0.0101, "step": 77410 }, { "epoch": 509.3421052631579, "grad_norm": 1.0339752435684204, "learning_rate": 0.0001, "loss": 0.0109, "step": 77420 }, { "epoch": 509.4078947368421, "grad_norm": 1.1537483930587769, "learning_rate": 0.0001, "loss": 0.014, "step": 77430 }, { "epoch": 509.4736842105263, "grad_norm": 1.1419742107391357, "learning_rate": 0.0001, "loss": 0.0094, "step": 77440 }, { "epoch": 509.5394736842105, "grad_norm": 1.0991747379302979, "learning_rate": 0.0001, "loss": 0.0109, "step": 77450 }, { "epoch": 509.60526315789474, "grad_norm": 1.048770785331726, "learning_rate": 0.0001, "loss": 0.0115, "step": 77460 }, { "epoch": 509.67105263157896, "grad_norm": 1.1474910974502563, "learning_rate": 0.0001, "loss": 0.0142, "step": 77470 }, { "epoch": 509.7368421052632, "grad_norm": 1.519187331199646, "learning_rate": 0.0001, "loss": 0.0108, "step": 77480 }, { "epoch": 509.80263157894734, "grad_norm": 1.7529782056808472, "learning_rate": 0.0001, "loss": 0.0102, "step": 77490 }, { "epoch": 509.86842105263156, "grad_norm": 1.0271803140640259, "learning_rate": 0.0001, "loss": 0.0117, "step": 77500 }, { "epoch": 509.9342105263158, "grad_norm": 1.1275503635406494, "learning_rate": 0.0001, "loss": 0.0117, "step": 77510 }, { "epoch": 510.0, "grad_norm": 1.0205459594726562, "learning_rate": 0.0001, "loss": 0.0097, "step": 77520 }, { "epoch": 510.0657894736842, "grad_norm": 1.088582158088684, "learning_rate": 0.0001, "loss": 0.0097, "step": 77530 }, { "epoch": 510.13157894736844, "grad_norm": 0.9400649070739746, "learning_rate": 0.0001, "loss": 0.0113, "step": 77540 }, { "epoch": 510.19736842105266, "grad_norm": 1.1834319829940796, "learning_rate": 0.0001, "loss": 0.0111, "step": 77550 }, { "epoch": 510.2631578947368, "grad_norm": 1.0135151147842407, "learning_rate": 0.0001, "loss": 0.0132, "step": 77560 }, { "epoch": 510.32894736842104, "grad_norm": 1.5176630020141602, "learning_rate": 0.0001, "loss": 0.0095, "step": 77570 }, { "epoch": 510.39473684210526, "grad_norm": 1.283932089805603, "learning_rate": 0.0001, "loss": 0.0098, "step": 77580 }, { "epoch": 510.4605263157895, "grad_norm": 1.1694949865341187, "learning_rate": 0.0001, "loss": 0.0107, "step": 77590 }, { "epoch": 510.5263157894737, "grad_norm": 0.772484540939331, "learning_rate": 0.0001, "loss": 0.0132, "step": 77600 }, { "epoch": 510.5921052631579, "grad_norm": 1.1480525732040405, "learning_rate": 0.0001, "loss": 0.0104, "step": 77610 }, { "epoch": 510.6578947368421, "grad_norm": 1.1137710809707642, "learning_rate": 0.0001, "loss": 0.0104, "step": 77620 }, { "epoch": 510.7236842105263, "grad_norm": 0.9193239212036133, "learning_rate": 0.0001, "loss": 0.0114, "step": 77630 }, { "epoch": 510.7894736842105, "grad_norm": 0.9958206415176392, "learning_rate": 0.0001, "loss": 0.0109, "step": 77640 }, { "epoch": 510.85526315789474, "grad_norm": 0.8010199069976807, "learning_rate": 0.0001, "loss": 0.0118, "step": 77650 }, { "epoch": 510.92105263157896, "grad_norm": 1.4143106937408447, "learning_rate": 0.0001, "loss": 0.0124, "step": 77660 }, { "epoch": 510.9868421052632, "grad_norm": 0.9478830695152283, "learning_rate": 0.0001, "loss": 0.013, "step": 77670 }, { "epoch": 511.05263157894734, "grad_norm": 1.2224031686782837, "learning_rate": 0.0001, "loss": 0.012, "step": 77680 }, { "epoch": 511.11842105263156, "grad_norm": 1.0343413352966309, "learning_rate": 0.0001, "loss": 0.0101, "step": 77690 }, { "epoch": 511.1842105263158, "grad_norm": 1.211520791053772, "learning_rate": 0.0001, "loss": 0.0114, "step": 77700 }, { "epoch": 511.25, "grad_norm": 1.017286777496338, "learning_rate": 0.0001, "loss": 0.0113, "step": 77710 }, { "epoch": 511.3157894736842, "grad_norm": 0.9983707666397095, "learning_rate": 0.0001, "loss": 0.0102, "step": 77720 }, { "epoch": 511.38157894736844, "grad_norm": 1.5086129903793335, "learning_rate": 0.0001, "loss": 0.0113, "step": 77730 }, { "epoch": 511.44736842105266, "grad_norm": 1.1469098329544067, "learning_rate": 0.0001, "loss": 0.0131, "step": 77740 }, { "epoch": 511.5131578947368, "grad_norm": 1.2588064670562744, "learning_rate": 0.0001, "loss": 0.0112, "step": 77750 }, { "epoch": 511.57894736842104, "grad_norm": 0.9466657638549805, "learning_rate": 0.0001, "loss": 0.0113, "step": 77760 }, { "epoch": 511.64473684210526, "grad_norm": 1.3773024082183838, "learning_rate": 0.0001, "loss": 0.0127, "step": 77770 }, { "epoch": 511.7105263157895, "grad_norm": 1.0853861570358276, "learning_rate": 0.0001, "loss": 0.0096, "step": 77780 }, { "epoch": 511.7763157894737, "grad_norm": 1.2546428442001343, "learning_rate": 0.0001, "loss": 0.0117, "step": 77790 }, { "epoch": 511.8421052631579, "grad_norm": 1.0669403076171875, "learning_rate": 0.0001, "loss": 0.011, "step": 77800 }, { "epoch": 511.9078947368421, "grad_norm": 1.2506325244903564, "learning_rate": 0.0001, "loss": 0.0108, "step": 77810 }, { "epoch": 511.9736842105263, "grad_norm": 1.007456660270691, "learning_rate": 0.0001, "loss": 0.0118, "step": 77820 }, { "epoch": 512.0394736842105, "grad_norm": 1.0313053131103516, "learning_rate": 0.0001, "loss": 0.0104, "step": 77830 }, { "epoch": 512.1052631578947, "grad_norm": 1.1146388053894043, "learning_rate": 0.0001, "loss": 0.014, "step": 77840 }, { "epoch": 512.171052631579, "grad_norm": 1.1103726625442505, "learning_rate": 0.0001, "loss": 0.0096, "step": 77850 }, { "epoch": 512.2368421052631, "grad_norm": 1.240702748298645, "learning_rate": 0.0001, "loss": 0.0111, "step": 77860 }, { "epoch": 512.3026315789474, "grad_norm": 1.3598905801773071, "learning_rate": 0.0001, "loss": 0.0108, "step": 77870 }, { "epoch": 512.3684210526316, "grad_norm": 1.3365449905395508, "learning_rate": 0.0001, "loss": 0.0117, "step": 77880 }, { "epoch": 512.4342105263158, "grad_norm": 0.8507790565490723, "learning_rate": 0.0001, "loss": 0.01, "step": 77890 }, { "epoch": 512.5, "grad_norm": 0.9919387698173523, "learning_rate": 0.0001, "loss": 0.0122, "step": 77900 }, { "epoch": 512.5657894736842, "grad_norm": 0.742108941078186, "learning_rate": 0.0001, "loss": 0.0117, "step": 77910 }, { "epoch": 512.6315789473684, "grad_norm": 1.1056491136550903, "learning_rate": 0.0001, "loss": 0.0113, "step": 77920 }, { "epoch": 512.6973684210526, "grad_norm": 1.045460820198059, "learning_rate": 0.0001, "loss": 0.0115, "step": 77930 }, { "epoch": 512.7631578947369, "grad_norm": 1.2098149061203003, "learning_rate": 0.0001, "loss": 0.0108, "step": 77940 }, { "epoch": 512.828947368421, "grad_norm": 1.1252753734588623, "learning_rate": 0.0001, "loss": 0.01, "step": 77950 }, { "epoch": 512.8947368421053, "grad_norm": 1.4744460582733154, "learning_rate": 0.0001, "loss": 0.0111, "step": 77960 }, { "epoch": 512.9605263157895, "grad_norm": 0.7933367490768433, "learning_rate": 0.0001, "loss": 0.0117, "step": 77970 }, { "epoch": 513.0263157894736, "grad_norm": 1.1913303136825562, "learning_rate": 0.0001, "loss": 0.0125, "step": 77980 }, { "epoch": 513.0921052631579, "grad_norm": 1.1555919647216797, "learning_rate": 0.0001, "loss": 0.0109, "step": 77990 }, { "epoch": 513.1578947368421, "grad_norm": 1.4469823837280273, "learning_rate": 0.0001, "loss": 0.0115, "step": 78000 }, { "epoch": 513.2236842105264, "grad_norm": 1.104987382888794, "learning_rate": 0.0001, "loss": 0.0105, "step": 78010 }, { "epoch": 513.2894736842105, "grad_norm": 1.0023167133331299, "learning_rate": 0.0001, "loss": 0.0108, "step": 78020 }, { "epoch": 513.3552631578947, "grad_norm": 1.0223348140716553, "learning_rate": 0.0001, "loss": 0.012, "step": 78030 }, { "epoch": 513.421052631579, "grad_norm": 1.0552802085876465, "learning_rate": 0.0001, "loss": 0.012, "step": 78040 }, { "epoch": 513.4868421052631, "grad_norm": 1.0780552625656128, "learning_rate": 0.0001, "loss": 0.0142, "step": 78050 }, { "epoch": 513.5526315789474, "grad_norm": 1.1177747249603271, "learning_rate": 0.0001, "loss": 0.0109, "step": 78060 }, { "epoch": 513.6184210526316, "grad_norm": 1.3587802648544312, "learning_rate": 0.0001, "loss": 0.01, "step": 78070 }, { "epoch": 513.6842105263158, "grad_norm": 1.1935032606124878, "learning_rate": 0.0001, "loss": 0.0101, "step": 78080 }, { "epoch": 513.75, "grad_norm": 1.1695393323898315, "learning_rate": 0.0001, "loss": 0.011, "step": 78090 }, { "epoch": 513.8157894736842, "grad_norm": 0.7090479135513306, "learning_rate": 0.0001, "loss": 0.0102, "step": 78100 }, { "epoch": 513.8815789473684, "grad_norm": 1.5053452253341675, "learning_rate": 0.0001, "loss": 0.0134, "step": 78110 }, { "epoch": 513.9473684210526, "grad_norm": 1.4671512842178345, "learning_rate": 0.0001, "loss": 0.0116, "step": 78120 }, { "epoch": 514.0131578947369, "grad_norm": 1.2938599586486816, "learning_rate": 0.0001, "loss": 0.0097, "step": 78130 }, { "epoch": 514.078947368421, "grad_norm": 1.09611177444458, "learning_rate": 0.0001, "loss": 0.0125, "step": 78140 }, { "epoch": 514.1447368421053, "grad_norm": 1.0053191184997559, "learning_rate": 0.0001, "loss": 0.0101, "step": 78150 }, { "epoch": 514.2105263157895, "grad_norm": 0.8893487453460693, "learning_rate": 0.0001, "loss": 0.0116, "step": 78160 }, { "epoch": 514.2763157894736, "grad_norm": 1.13539457321167, "learning_rate": 0.0001, "loss": 0.01, "step": 78170 }, { "epoch": 514.3421052631579, "grad_norm": 1.3186606168746948, "learning_rate": 0.0001, "loss": 0.0115, "step": 78180 }, { "epoch": 514.4078947368421, "grad_norm": 1.296586036682129, "learning_rate": 0.0001, "loss": 0.0119, "step": 78190 }, { "epoch": 514.4736842105264, "grad_norm": 1.1459144353866577, "learning_rate": 0.0001, "loss": 0.0126, "step": 78200 }, { "epoch": 514.5394736842105, "grad_norm": 0.9058517813682556, "learning_rate": 0.0001, "loss": 0.0115, "step": 78210 }, { "epoch": 514.6052631578947, "grad_norm": 1.4812514781951904, "learning_rate": 0.0001, "loss": 0.0137, "step": 78220 }, { "epoch": 514.671052631579, "grad_norm": 1.5883773565292358, "learning_rate": 0.0001, "loss": 0.0114, "step": 78230 }, { "epoch": 514.7368421052631, "grad_norm": 1.2800959348678589, "learning_rate": 0.0001, "loss": 0.0142, "step": 78240 }, { "epoch": 514.8026315789474, "grad_norm": 0.9361305236816406, "learning_rate": 0.0001, "loss": 0.0102, "step": 78250 }, { "epoch": 514.8684210526316, "grad_norm": 1.3019955158233643, "learning_rate": 0.0001, "loss": 0.0108, "step": 78260 }, { "epoch": 514.9342105263158, "grad_norm": 1.1924444437026978, "learning_rate": 0.0001, "loss": 0.0104, "step": 78270 }, { "epoch": 515.0, "grad_norm": 1.103654384613037, "learning_rate": 0.0001, "loss": 0.011, "step": 78280 }, { "epoch": 515.0657894736842, "grad_norm": 1.3044533729553223, "learning_rate": 0.0001, "loss": 0.0106, "step": 78290 }, { "epoch": 515.1315789473684, "grad_norm": 0.844568133354187, "learning_rate": 0.0001, "loss": 0.0102, "step": 78300 }, { "epoch": 515.1973684210526, "grad_norm": 1.071467399597168, "learning_rate": 0.0001, "loss": 0.0137, "step": 78310 }, { "epoch": 515.2631578947369, "grad_norm": 0.9115777015686035, "learning_rate": 0.0001, "loss": 0.0116, "step": 78320 }, { "epoch": 515.328947368421, "grad_norm": 0.8897896409034729, "learning_rate": 0.0001, "loss": 0.0112, "step": 78330 }, { "epoch": 515.3947368421053, "grad_norm": 1.273638367652893, "learning_rate": 0.0001, "loss": 0.0112, "step": 78340 }, { "epoch": 515.4605263157895, "grad_norm": 0.7495373487472534, "learning_rate": 0.0001, "loss": 0.0117, "step": 78350 }, { "epoch": 515.5263157894736, "grad_norm": 1.1438860893249512, "learning_rate": 0.0001, "loss": 0.0122, "step": 78360 }, { "epoch": 515.5921052631579, "grad_norm": 0.8679733276367188, "learning_rate": 0.0001, "loss": 0.0106, "step": 78370 }, { "epoch": 515.6578947368421, "grad_norm": 1.0994764566421509, "learning_rate": 0.0001, "loss": 0.0135, "step": 78380 }, { "epoch": 515.7236842105264, "grad_norm": 1.1603001356124878, "learning_rate": 0.0001, "loss": 0.0129, "step": 78390 }, { "epoch": 515.7894736842105, "grad_norm": 1.049238920211792, "learning_rate": 0.0001, "loss": 0.0106, "step": 78400 }, { "epoch": 515.8552631578947, "grad_norm": 1.0663156509399414, "learning_rate": 0.0001, "loss": 0.0112, "step": 78410 }, { "epoch": 515.921052631579, "grad_norm": 0.7960723638534546, "learning_rate": 0.0001, "loss": 0.0112, "step": 78420 }, { "epoch": 515.9868421052631, "grad_norm": 0.9979690909385681, "learning_rate": 0.0001, "loss": 0.0101, "step": 78430 }, { "epoch": 516.0526315789474, "grad_norm": 0.8607260584831238, "learning_rate": 0.0001, "loss": 0.0108, "step": 78440 }, { "epoch": 516.1184210526316, "grad_norm": 1.2933063507080078, "learning_rate": 0.0001, "loss": 0.0108, "step": 78450 }, { "epoch": 516.1842105263158, "grad_norm": 0.9800442457199097, "learning_rate": 0.0001, "loss": 0.0132, "step": 78460 }, { "epoch": 516.25, "grad_norm": 1.1290419101715088, "learning_rate": 0.0001, "loss": 0.0118, "step": 78470 }, { "epoch": 516.3157894736842, "grad_norm": 1.2492495775222778, "learning_rate": 0.0001, "loss": 0.011, "step": 78480 }, { "epoch": 516.3815789473684, "grad_norm": 1.2778388261795044, "learning_rate": 0.0001, "loss": 0.0107, "step": 78490 }, { "epoch": 516.4473684210526, "grad_norm": 1.31378173828125, "learning_rate": 0.0001, "loss": 0.0112, "step": 78500 }, { "epoch": 516.5131578947369, "grad_norm": 0.9775269627571106, "learning_rate": 0.0001, "loss": 0.0124, "step": 78510 }, { "epoch": 516.578947368421, "grad_norm": 0.8219498991966248, "learning_rate": 0.0001, "loss": 0.012, "step": 78520 }, { "epoch": 516.6447368421053, "grad_norm": 0.7330734729766846, "learning_rate": 0.0001, "loss": 0.013, "step": 78530 }, { "epoch": 516.7105263157895, "grad_norm": 0.9409795999526978, "learning_rate": 0.0001, "loss": 0.0112, "step": 78540 }, { "epoch": 516.7763157894736, "grad_norm": 1.4014899730682373, "learning_rate": 0.0001, "loss": 0.0115, "step": 78550 }, { "epoch": 516.8421052631579, "grad_norm": 1.3369898796081543, "learning_rate": 0.0001, "loss": 0.0125, "step": 78560 }, { "epoch": 516.9078947368421, "grad_norm": 0.7518098950386047, "learning_rate": 0.0001, "loss": 0.0114, "step": 78570 }, { "epoch": 516.9736842105264, "grad_norm": 0.977200448513031, "learning_rate": 0.0001, "loss": 0.0141, "step": 78580 }, { "epoch": 517.0394736842105, "grad_norm": 0.9853875041007996, "learning_rate": 0.0001, "loss": 0.0127, "step": 78590 }, { "epoch": 517.1052631578947, "grad_norm": 0.9010801911354065, "learning_rate": 0.0001, "loss": 0.0122, "step": 78600 }, { "epoch": 517.171052631579, "grad_norm": 0.7902107238769531, "learning_rate": 0.0001, "loss": 0.0119, "step": 78610 }, { "epoch": 517.2368421052631, "grad_norm": 0.7945990562438965, "learning_rate": 0.0001, "loss": 0.0115, "step": 78620 }, { "epoch": 517.3026315789474, "grad_norm": 0.8374760746955872, "learning_rate": 0.0001, "loss": 0.011, "step": 78630 }, { "epoch": 517.3684210526316, "grad_norm": 0.8473724722862244, "learning_rate": 0.0001, "loss": 0.0118, "step": 78640 }, { "epoch": 517.4342105263158, "grad_norm": 1.1389491558074951, "learning_rate": 0.0001, "loss": 0.0125, "step": 78650 }, { "epoch": 517.5, "grad_norm": 1.1223610639572144, "learning_rate": 0.0001, "loss": 0.0129, "step": 78660 }, { "epoch": 517.5657894736842, "grad_norm": 0.9881941080093384, "learning_rate": 0.0001, "loss": 0.013, "step": 78670 }, { "epoch": 517.6315789473684, "grad_norm": 1.021945834159851, "learning_rate": 0.0001, "loss": 0.0099, "step": 78680 }, { "epoch": 517.6973684210526, "grad_norm": 0.9731574058532715, "learning_rate": 0.0001, "loss": 0.0096, "step": 78690 }, { "epoch": 517.7631578947369, "grad_norm": 1.2203140258789062, "learning_rate": 0.0001, "loss": 0.0123, "step": 78700 }, { "epoch": 517.828947368421, "grad_norm": 1.5560880899429321, "learning_rate": 0.0001, "loss": 0.014, "step": 78710 }, { "epoch": 517.8947368421053, "grad_norm": 1.2174243927001953, "learning_rate": 0.0001, "loss": 0.0114, "step": 78720 }, { "epoch": 517.9605263157895, "grad_norm": 1.308231234550476, "learning_rate": 0.0001, "loss": 0.012, "step": 78730 }, { "epoch": 518.0263157894736, "grad_norm": 1.0401585102081299, "learning_rate": 0.0001, "loss": 0.0118, "step": 78740 }, { "epoch": 518.0921052631579, "grad_norm": 1.1766376495361328, "learning_rate": 0.0001, "loss": 0.0117, "step": 78750 }, { "epoch": 518.1578947368421, "grad_norm": 1.2370878458023071, "learning_rate": 0.0001, "loss": 0.0131, "step": 78760 }, { "epoch": 518.2236842105264, "grad_norm": 1.2282683849334717, "learning_rate": 0.0001, "loss": 0.0132, "step": 78770 }, { "epoch": 518.2894736842105, "grad_norm": 1.6565943956375122, "learning_rate": 0.0001, "loss": 0.0129, "step": 78780 }, { "epoch": 518.3552631578947, "grad_norm": 1.6417547464370728, "learning_rate": 0.0001, "loss": 0.0113, "step": 78790 }, { "epoch": 518.421052631579, "grad_norm": 1.1247665882110596, "learning_rate": 0.0001, "loss": 0.0118, "step": 78800 }, { "epoch": 518.4868421052631, "grad_norm": 1.1730952262878418, "learning_rate": 0.0001, "loss": 0.0109, "step": 78810 }, { "epoch": 518.5526315789474, "grad_norm": 1.2181925773620605, "learning_rate": 0.0001, "loss": 0.013, "step": 78820 }, { "epoch": 518.6184210526316, "grad_norm": 1.0916987657546997, "learning_rate": 0.0001, "loss": 0.0117, "step": 78830 }, { "epoch": 518.6842105263158, "grad_norm": 0.9300577640533447, "learning_rate": 0.0001, "loss": 0.0118, "step": 78840 }, { "epoch": 518.75, "grad_norm": 1.0200120210647583, "learning_rate": 0.0001, "loss": 0.013, "step": 78850 }, { "epoch": 518.8157894736842, "grad_norm": 1.1790283918380737, "learning_rate": 0.0001, "loss": 0.0117, "step": 78860 }, { "epoch": 518.8815789473684, "grad_norm": 1.4343522787094116, "learning_rate": 0.0001, "loss": 0.0118, "step": 78870 }, { "epoch": 518.9473684210526, "grad_norm": 1.2748308181762695, "learning_rate": 0.0001, "loss": 0.0105, "step": 78880 }, { "epoch": 519.0131578947369, "grad_norm": 1.3263063430786133, "learning_rate": 0.0001, "loss": 0.0123, "step": 78890 }, { "epoch": 519.078947368421, "grad_norm": 0.8143279552459717, "learning_rate": 0.0001, "loss": 0.0111, "step": 78900 }, { "epoch": 519.1447368421053, "grad_norm": 0.8361022472381592, "learning_rate": 0.0001, "loss": 0.0124, "step": 78910 }, { "epoch": 519.2105263157895, "grad_norm": 1.0913989543914795, "learning_rate": 0.0001, "loss": 0.0111, "step": 78920 }, { "epoch": 519.2763157894736, "grad_norm": 1.3421107530593872, "learning_rate": 0.0001, "loss": 0.0129, "step": 78930 }, { "epoch": 519.3421052631579, "grad_norm": 1.0078271627426147, "learning_rate": 0.0001, "loss": 0.0112, "step": 78940 }, { "epoch": 519.4078947368421, "grad_norm": 1.1363328695297241, "learning_rate": 0.0001, "loss": 0.0115, "step": 78950 }, { "epoch": 519.4736842105264, "grad_norm": 1.1798573732376099, "learning_rate": 0.0001, "loss": 0.0134, "step": 78960 }, { "epoch": 519.5394736842105, "grad_norm": 1.3588061332702637, "learning_rate": 0.0001, "loss": 0.0127, "step": 78970 }, { "epoch": 519.6052631578947, "grad_norm": 0.9301666021347046, "learning_rate": 0.0001, "loss": 0.0125, "step": 78980 }, { "epoch": 519.671052631579, "grad_norm": 1.2737902402877808, "learning_rate": 0.0001, "loss": 0.0107, "step": 78990 }, { "epoch": 519.7368421052631, "grad_norm": 1.1327887773513794, "learning_rate": 0.0001, "loss": 0.0108, "step": 79000 }, { "epoch": 519.8026315789474, "grad_norm": 1.0071492195129395, "learning_rate": 0.0001, "loss": 0.0119, "step": 79010 }, { "epoch": 519.8684210526316, "grad_norm": 1.227921724319458, "learning_rate": 0.0001, "loss": 0.0108, "step": 79020 }, { "epoch": 519.9342105263158, "grad_norm": 1.2975742816925049, "learning_rate": 0.0001, "loss": 0.0119, "step": 79030 }, { "epoch": 520.0, "grad_norm": 1.392256259918213, "learning_rate": 0.0001, "loss": 0.0128, "step": 79040 }, { "epoch": 520.0657894736842, "grad_norm": 1.469244360923767, "learning_rate": 0.0001, "loss": 0.0108, "step": 79050 }, { "epoch": 520.1315789473684, "grad_norm": 0.9325305223464966, "learning_rate": 0.0001, "loss": 0.0112, "step": 79060 }, { "epoch": 520.1973684210526, "grad_norm": 1.2577964067459106, "learning_rate": 0.0001, "loss": 0.0105, "step": 79070 }, { "epoch": 520.2631578947369, "grad_norm": 1.347522497177124, "learning_rate": 0.0001, "loss": 0.0112, "step": 79080 }, { "epoch": 520.328947368421, "grad_norm": 1.0849223136901855, "learning_rate": 0.0001, "loss": 0.0124, "step": 79090 }, { "epoch": 520.3947368421053, "grad_norm": 1.3118022680282593, "learning_rate": 0.0001, "loss": 0.0112, "step": 79100 }, { "epoch": 520.4605263157895, "grad_norm": 1.0926787853240967, "learning_rate": 0.0001, "loss": 0.0113, "step": 79110 }, { "epoch": 520.5263157894736, "grad_norm": 1.3140438795089722, "learning_rate": 0.0001, "loss": 0.0131, "step": 79120 }, { "epoch": 520.5921052631579, "grad_norm": 1.4527376890182495, "learning_rate": 0.0001, "loss": 0.0119, "step": 79130 }, { "epoch": 520.6578947368421, "grad_norm": 0.9648956656455994, "learning_rate": 0.0001, "loss": 0.0134, "step": 79140 }, { "epoch": 520.7236842105264, "grad_norm": 0.9755305051803589, "learning_rate": 0.0001, "loss": 0.0093, "step": 79150 }, { "epoch": 520.7894736842105, "grad_norm": 1.085988163948059, "learning_rate": 0.0001, "loss": 0.0109, "step": 79160 }, { "epoch": 520.8552631578947, "grad_norm": 0.982824444770813, "learning_rate": 0.0001, "loss": 0.0107, "step": 79170 }, { "epoch": 520.921052631579, "grad_norm": 1.0134260654449463, "learning_rate": 0.0001, "loss": 0.0115, "step": 79180 }, { "epoch": 520.9868421052631, "grad_norm": 1.0143835544586182, "learning_rate": 0.0001, "loss": 0.0119, "step": 79190 }, { "epoch": 521.0526315789474, "grad_norm": 1.0507410764694214, "learning_rate": 0.0001, "loss": 0.0113, "step": 79200 }, { "epoch": 521.1184210526316, "grad_norm": 0.9346352815628052, "learning_rate": 0.0001, "loss": 0.012, "step": 79210 }, { "epoch": 521.1842105263158, "grad_norm": 0.6871551871299744, "learning_rate": 0.0001, "loss": 0.0125, "step": 79220 }, { "epoch": 521.25, "grad_norm": 1.1676675081253052, "learning_rate": 0.0001, "loss": 0.0124, "step": 79230 }, { "epoch": 521.3157894736842, "grad_norm": 0.6791279911994934, "learning_rate": 0.0001, "loss": 0.013, "step": 79240 }, { "epoch": 521.3815789473684, "grad_norm": 0.735756516456604, "learning_rate": 0.0001, "loss": 0.0104, "step": 79250 }, { "epoch": 521.4473684210526, "grad_norm": 1.068332314491272, "learning_rate": 0.0001, "loss": 0.0103, "step": 79260 }, { "epoch": 521.5131578947369, "grad_norm": 1.0991226434707642, "learning_rate": 0.0001, "loss": 0.0106, "step": 79270 }, { "epoch": 521.578947368421, "grad_norm": 0.9471065402030945, "learning_rate": 0.0001, "loss": 0.0109, "step": 79280 }, { "epoch": 521.6447368421053, "grad_norm": 1.1460009813308716, "learning_rate": 0.0001, "loss": 0.013, "step": 79290 }, { "epoch": 521.7105263157895, "grad_norm": 1.2541179656982422, "learning_rate": 0.0001, "loss": 0.0131, "step": 79300 }, { "epoch": 521.7763157894736, "grad_norm": 1.3320528268814087, "learning_rate": 0.0001, "loss": 0.0103, "step": 79310 }, { "epoch": 521.8421052631579, "grad_norm": 1.2252389192581177, "learning_rate": 0.0001, "loss": 0.0123, "step": 79320 }, { "epoch": 521.9078947368421, "grad_norm": 1.0492901802062988, "learning_rate": 0.0001, "loss": 0.0109, "step": 79330 }, { "epoch": 521.9736842105264, "grad_norm": 0.755221962928772, "learning_rate": 0.0001, "loss": 0.011, "step": 79340 }, { "epoch": 522.0394736842105, "grad_norm": 0.9267183542251587, "learning_rate": 0.0001, "loss": 0.0131, "step": 79350 }, { "epoch": 522.1052631578947, "grad_norm": 1.2305091619491577, "learning_rate": 0.0001, "loss": 0.0127, "step": 79360 }, { "epoch": 522.171052631579, "grad_norm": 1.3195940256118774, "learning_rate": 0.0001, "loss": 0.0145, "step": 79370 }, { "epoch": 522.2368421052631, "grad_norm": 1.2802159786224365, "learning_rate": 0.0001, "loss": 0.0114, "step": 79380 }, { "epoch": 522.3026315789474, "grad_norm": 1.2666503190994263, "learning_rate": 0.0001, "loss": 0.0101, "step": 79390 }, { "epoch": 522.3684210526316, "grad_norm": 1.2319374084472656, "learning_rate": 0.0001, "loss": 0.0113, "step": 79400 }, { "epoch": 522.4342105263158, "grad_norm": 1.361430287361145, "learning_rate": 0.0001, "loss": 0.0104, "step": 79410 }, { "epoch": 522.5, "grad_norm": 1.0288817882537842, "learning_rate": 0.0001, "loss": 0.0104, "step": 79420 }, { "epoch": 522.5657894736842, "grad_norm": 1.1962461471557617, "learning_rate": 0.0001, "loss": 0.0102, "step": 79430 }, { "epoch": 522.6315789473684, "grad_norm": 1.5438764095306396, "learning_rate": 0.0001, "loss": 0.0112, "step": 79440 }, { "epoch": 522.6973684210526, "grad_norm": 1.5173245668411255, "learning_rate": 0.0001, "loss": 0.0133, "step": 79450 }, { "epoch": 522.7631578947369, "grad_norm": 1.4040913581848145, "learning_rate": 0.0001, "loss": 0.0117, "step": 79460 }, { "epoch": 522.828947368421, "grad_norm": 1.4810621738433838, "learning_rate": 0.0001, "loss": 0.0114, "step": 79470 }, { "epoch": 522.8947368421053, "grad_norm": 1.279215931892395, "learning_rate": 0.0001, "loss": 0.0091, "step": 79480 }, { "epoch": 522.9605263157895, "grad_norm": 1.1824666261672974, "learning_rate": 0.0001, "loss": 0.0114, "step": 79490 }, { "epoch": 523.0263157894736, "grad_norm": 1.2264102697372437, "learning_rate": 0.0001, "loss": 0.0118, "step": 79500 }, { "epoch": 523.0921052631579, "grad_norm": 1.2089831829071045, "learning_rate": 0.0001, "loss": 0.0112, "step": 79510 }, { "epoch": 523.1578947368421, "grad_norm": 1.6189210414886475, "learning_rate": 0.0001, "loss": 0.0118, "step": 79520 }, { "epoch": 523.2236842105264, "grad_norm": 1.5252612829208374, "learning_rate": 0.0001, "loss": 0.0113, "step": 79530 }, { "epoch": 523.2894736842105, "grad_norm": 1.3406212329864502, "learning_rate": 0.0001, "loss": 0.0126, "step": 79540 }, { "epoch": 523.3552631578947, "grad_norm": 1.1579642295837402, "learning_rate": 0.0001, "loss": 0.0115, "step": 79550 }, { "epoch": 523.421052631579, "grad_norm": 1.012661337852478, "learning_rate": 0.0001, "loss": 0.0093, "step": 79560 }, { "epoch": 523.4868421052631, "grad_norm": 0.8289533257484436, "learning_rate": 0.0001, "loss": 0.0117, "step": 79570 }, { "epoch": 523.5526315789474, "grad_norm": 1.1507660150527954, "learning_rate": 0.0001, "loss": 0.0118, "step": 79580 }, { "epoch": 523.6184210526316, "grad_norm": 1.1765552759170532, "learning_rate": 0.0001, "loss": 0.0117, "step": 79590 }, { "epoch": 523.6842105263158, "grad_norm": 1.2260336875915527, "learning_rate": 0.0001, "loss": 0.0118, "step": 79600 }, { "epoch": 523.75, "grad_norm": 1.341565728187561, "learning_rate": 0.0001, "loss": 0.0103, "step": 79610 }, { "epoch": 523.8157894736842, "grad_norm": 0.9649227261543274, "learning_rate": 0.0001, "loss": 0.0118, "step": 79620 }, { "epoch": 523.8815789473684, "grad_norm": 1.075209617614746, "learning_rate": 0.0001, "loss": 0.0109, "step": 79630 }, { "epoch": 523.9473684210526, "grad_norm": 1.4141005277633667, "learning_rate": 0.0001, "loss": 0.0099, "step": 79640 }, { "epoch": 524.0131578947369, "grad_norm": 1.0523087978363037, "learning_rate": 0.0001, "loss": 0.0124, "step": 79650 }, { "epoch": 524.078947368421, "grad_norm": 1.1604161262512207, "learning_rate": 0.0001, "loss": 0.0137, "step": 79660 }, { "epoch": 524.1447368421053, "grad_norm": 0.8030272722244263, "learning_rate": 0.0001, "loss": 0.0109, "step": 79670 }, { "epoch": 524.2105263157895, "grad_norm": 0.9838864207267761, "learning_rate": 0.0001, "loss": 0.0112, "step": 79680 }, { "epoch": 524.2763157894736, "grad_norm": 1.2279644012451172, "learning_rate": 0.0001, "loss": 0.0119, "step": 79690 }, { "epoch": 524.3421052631579, "grad_norm": 1.4876803159713745, "learning_rate": 0.0001, "loss": 0.0124, "step": 79700 }, { "epoch": 524.4078947368421, "grad_norm": 1.1998071670532227, "learning_rate": 0.0001, "loss": 0.0093, "step": 79710 }, { "epoch": 524.4736842105264, "grad_norm": 1.1491254568099976, "learning_rate": 0.0001, "loss": 0.0118, "step": 79720 }, { "epoch": 524.5394736842105, "grad_norm": 1.4121824502944946, "learning_rate": 0.0001, "loss": 0.0121, "step": 79730 }, { "epoch": 524.6052631578947, "grad_norm": 0.9082379341125488, "learning_rate": 0.0001, "loss": 0.0098, "step": 79740 }, { "epoch": 524.671052631579, "grad_norm": 1.1881701946258545, "learning_rate": 0.0001, "loss": 0.0104, "step": 79750 }, { "epoch": 524.7368421052631, "grad_norm": 1.3639180660247803, "learning_rate": 0.0001, "loss": 0.0112, "step": 79760 }, { "epoch": 524.8026315789474, "grad_norm": 1.2215701341629028, "learning_rate": 0.0001, "loss": 0.0107, "step": 79770 }, { "epoch": 524.8684210526316, "grad_norm": 0.7113543748855591, "learning_rate": 0.0001, "loss": 0.0116, "step": 79780 }, { "epoch": 524.9342105263158, "grad_norm": 1.2361555099487305, "learning_rate": 0.0001, "loss": 0.0103, "step": 79790 }, { "epoch": 525.0, "grad_norm": 1.0537495613098145, "learning_rate": 0.0001, "loss": 0.0117, "step": 79800 }, { "epoch": 525.0657894736842, "grad_norm": 1.0916693210601807, "learning_rate": 0.0001, "loss": 0.0123, "step": 79810 }, { "epoch": 525.1315789473684, "grad_norm": 1.286116600036621, "learning_rate": 0.0001, "loss": 0.0119, "step": 79820 }, { "epoch": 525.1973684210526, "grad_norm": 1.1319963932037354, "learning_rate": 0.0001, "loss": 0.0114, "step": 79830 }, { "epoch": 525.2631578947369, "grad_norm": 1.149547815322876, "learning_rate": 0.0001, "loss": 0.0121, "step": 79840 }, { "epoch": 525.328947368421, "grad_norm": 1.2941960096359253, "learning_rate": 0.0001, "loss": 0.0114, "step": 79850 }, { "epoch": 525.3947368421053, "grad_norm": 0.9617957472801208, "learning_rate": 0.0001, "loss": 0.0122, "step": 79860 }, { "epoch": 525.4605263157895, "grad_norm": 1.2239959239959717, "learning_rate": 0.0001, "loss": 0.012, "step": 79870 }, { "epoch": 525.5263157894736, "grad_norm": 1.0392863750457764, "learning_rate": 0.0001, "loss": 0.0135, "step": 79880 }, { "epoch": 525.5921052631579, "grad_norm": 1.2805525064468384, "learning_rate": 0.0001, "loss": 0.0112, "step": 79890 }, { "epoch": 525.6578947368421, "grad_norm": 1.201557993888855, "learning_rate": 0.0001, "loss": 0.0097, "step": 79900 }, { "epoch": 525.7236842105264, "grad_norm": 1.165624737739563, "learning_rate": 0.0001, "loss": 0.0117, "step": 79910 }, { "epoch": 525.7894736842105, "grad_norm": 1.2472856044769287, "learning_rate": 0.0001, "loss": 0.01, "step": 79920 }, { "epoch": 525.8552631578947, "grad_norm": 0.8875550627708435, "learning_rate": 0.0001, "loss": 0.01, "step": 79930 }, { "epoch": 525.921052631579, "grad_norm": 1.0350874662399292, "learning_rate": 0.0001, "loss": 0.0117, "step": 79940 }, { "epoch": 525.9868421052631, "grad_norm": 1.2986034154891968, "learning_rate": 0.0001, "loss": 0.0104, "step": 79950 }, { "epoch": 526.0526315789474, "grad_norm": 1.1310527324676514, "learning_rate": 0.0001, "loss": 0.0109, "step": 79960 }, { "epoch": 526.1184210526316, "grad_norm": 1.1110562086105347, "learning_rate": 0.0001, "loss": 0.0112, "step": 79970 }, { "epoch": 526.1842105263158, "grad_norm": 1.1927745342254639, "learning_rate": 0.0001, "loss": 0.0094, "step": 79980 }, { "epoch": 526.25, "grad_norm": 1.0965501070022583, "learning_rate": 0.0001, "loss": 0.0121, "step": 79990 }, { "epoch": 526.3157894736842, "grad_norm": 1.4071152210235596, "learning_rate": 0.0001, "loss": 0.0121, "step": 80000 }, { "epoch": 526.3815789473684, "grad_norm": 1.0651636123657227, "learning_rate": 0.0001, "loss": 0.0113, "step": 80010 }, { "epoch": 526.4473684210526, "grad_norm": 1.3853951692581177, "learning_rate": 0.0001, "loss": 0.0108, "step": 80020 }, { "epoch": 526.5131578947369, "grad_norm": 1.1486934423446655, "learning_rate": 0.0001, "loss": 0.0119, "step": 80030 }, { "epoch": 526.578947368421, "grad_norm": 1.088996171951294, "learning_rate": 0.0001, "loss": 0.0115, "step": 80040 }, { "epoch": 526.6447368421053, "grad_norm": 1.4724292755126953, "learning_rate": 0.0001, "loss": 0.0122, "step": 80050 }, { "epoch": 526.7105263157895, "grad_norm": 1.1345316171646118, "learning_rate": 0.0001, "loss": 0.0098, "step": 80060 }, { "epoch": 526.7763157894736, "grad_norm": 1.065085768699646, "learning_rate": 0.0001, "loss": 0.0123, "step": 80070 }, { "epoch": 526.8421052631579, "grad_norm": 1.199918270111084, "learning_rate": 0.0001, "loss": 0.013, "step": 80080 }, { "epoch": 526.9078947368421, "grad_norm": 1.019289255142212, "learning_rate": 0.0001, "loss": 0.0115, "step": 80090 }, { "epoch": 526.9736842105264, "grad_norm": 1.2872799634933472, "learning_rate": 0.0001, "loss": 0.0108, "step": 80100 }, { "epoch": 527.0394736842105, "grad_norm": 1.3231974840164185, "learning_rate": 0.0001, "loss": 0.0119, "step": 80110 }, { "epoch": 527.1052631578947, "grad_norm": 1.0737744569778442, "learning_rate": 0.0001, "loss": 0.0098, "step": 80120 }, { "epoch": 527.171052631579, "grad_norm": 0.7930030822753906, "learning_rate": 0.0001, "loss": 0.0129, "step": 80130 }, { "epoch": 527.2368421052631, "grad_norm": 1.4024803638458252, "learning_rate": 0.0001, "loss": 0.0117, "step": 80140 }, { "epoch": 527.3026315789474, "grad_norm": 0.9458417892456055, "learning_rate": 0.0001, "loss": 0.0128, "step": 80150 }, { "epoch": 527.3684210526316, "grad_norm": 1.3698056936264038, "learning_rate": 0.0001, "loss": 0.0119, "step": 80160 }, { "epoch": 527.4342105263158, "grad_norm": 0.9708862900733948, "learning_rate": 0.0001, "loss": 0.0095, "step": 80170 }, { "epoch": 527.5, "grad_norm": 0.9804370403289795, "learning_rate": 0.0001, "loss": 0.0115, "step": 80180 }, { "epoch": 527.5657894736842, "grad_norm": 1.2980481386184692, "learning_rate": 0.0001, "loss": 0.0133, "step": 80190 }, { "epoch": 527.6315789473684, "grad_norm": 1.3205853700637817, "learning_rate": 0.0001, "loss": 0.0105, "step": 80200 }, { "epoch": 527.6973684210526, "grad_norm": 0.9288889765739441, "learning_rate": 0.0001, "loss": 0.011, "step": 80210 }, { "epoch": 527.7631578947369, "grad_norm": 1.5022870302200317, "learning_rate": 0.0001, "loss": 0.0122, "step": 80220 }, { "epoch": 527.828947368421, "grad_norm": 1.310860276222229, "learning_rate": 0.0001, "loss": 0.0116, "step": 80230 }, { "epoch": 527.8947368421053, "grad_norm": 1.0890276432037354, "learning_rate": 0.0001, "loss": 0.0124, "step": 80240 }, { "epoch": 527.9605263157895, "grad_norm": 1.25932776927948, "learning_rate": 0.0001, "loss": 0.0122, "step": 80250 }, { "epoch": 528.0263157894736, "grad_norm": 1.037662386894226, "learning_rate": 0.0001, "loss": 0.0101, "step": 80260 }, { "epoch": 528.0921052631579, "grad_norm": 1.1433483362197876, "learning_rate": 0.0001, "loss": 0.0106, "step": 80270 }, { "epoch": 528.1578947368421, "grad_norm": 1.212409257888794, "learning_rate": 0.0001, "loss": 0.0109, "step": 80280 }, { "epoch": 528.2236842105264, "grad_norm": 1.226341962814331, "learning_rate": 0.0001, "loss": 0.0111, "step": 80290 }, { "epoch": 528.2894736842105, "grad_norm": 1.4430090188980103, "learning_rate": 0.0001, "loss": 0.013, "step": 80300 }, { "epoch": 528.3552631578947, "grad_norm": 1.1568644046783447, "learning_rate": 0.0001, "loss": 0.0119, "step": 80310 }, { "epoch": 528.421052631579, "grad_norm": 1.183970332145691, "learning_rate": 0.0001, "loss": 0.0112, "step": 80320 }, { "epoch": 528.4868421052631, "grad_norm": 1.3099122047424316, "learning_rate": 0.0001, "loss": 0.0131, "step": 80330 }, { "epoch": 528.5526315789474, "grad_norm": 1.2902318239212036, "learning_rate": 0.0001, "loss": 0.0118, "step": 80340 }, { "epoch": 528.6184210526316, "grad_norm": 1.0034809112548828, "learning_rate": 0.0001, "loss": 0.0124, "step": 80350 }, { "epoch": 528.6842105263158, "grad_norm": 0.9680561423301697, "learning_rate": 0.0001, "loss": 0.0102, "step": 80360 }, { "epoch": 528.75, "grad_norm": 1.1589789390563965, "learning_rate": 0.0001, "loss": 0.0097, "step": 80370 }, { "epoch": 528.8157894736842, "grad_norm": 1.2295933961868286, "learning_rate": 0.0001, "loss": 0.0128, "step": 80380 }, { "epoch": 528.8815789473684, "grad_norm": 1.5712645053863525, "learning_rate": 0.0001, "loss": 0.0121, "step": 80390 }, { "epoch": 528.9473684210526, "grad_norm": 1.4577996730804443, "learning_rate": 0.0001, "loss": 0.0124, "step": 80400 }, { "epoch": 529.0131578947369, "grad_norm": 1.1000990867614746, "learning_rate": 0.0001, "loss": 0.0131, "step": 80410 }, { "epoch": 529.078947368421, "grad_norm": 1.2241917848587036, "learning_rate": 0.0001, "loss": 0.0115, "step": 80420 }, { "epoch": 529.1447368421053, "grad_norm": 1.0974723100662231, "learning_rate": 0.0001, "loss": 0.012, "step": 80430 }, { "epoch": 529.2105263157895, "grad_norm": 0.9757032990455627, "learning_rate": 0.0001, "loss": 0.0115, "step": 80440 }, { "epoch": 529.2763157894736, "grad_norm": 1.317093849182129, "learning_rate": 0.0001, "loss": 0.0126, "step": 80450 }, { "epoch": 529.3421052631579, "grad_norm": 1.1558804512023926, "learning_rate": 0.0001, "loss": 0.0105, "step": 80460 }, { "epoch": 529.4078947368421, "grad_norm": 1.0101091861724854, "learning_rate": 0.0001, "loss": 0.0119, "step": 80470 }, { "epoch": 529.4736842105264, "grad_norm": 1.2322200536727905, "learning_rate": 0.0001, "loss": 0.0102, "step": 80480 }, { "epoch": 529.5394736842105, "grad_norm": 1.0501878261566162, "learning_rate": 0.0001, "loss": 0.0103, "step": 80490 }, { "epoch": 529.6052631578947, "grad_norm": 1.1091357469558716, "learning_rate": 0.0001, "loss": 0.0106, "step": 80500 }, { "epoch": 529.671052631579, "grad_norm": 1.2675368785858154, "learning_rate": 0.0001, "loss": 0.011, "step": 80510 }, { "epoch": 529.7368421052631, "grad_norm": 1.0387545824050903, "learning_rate": 0.0001, "loss": 0.0101, "step": 80520 }, { "epoch": 529.8026315789474, "grad_norm": 1.0804754495620728, "learning_rate": 0.0001, "loss": 0.0106, "step": 80530 }, { "epoch": 529.8684210526316, "grad_norm": 1.0754399299621582, "learning_rate": 0.0001, "loss": 0.0121, "step": 80540 }, { "epoch": 529.9342105263158, "grad_norm": 1.0641555786132812, "learning_rate": 0.0001, "loss": 0.0146, "step": 80550 }, { "epoch": 530.0, "grad_norm": 1.3832042217254639, "learning_rate": 0.0001, "loss": 0.0118, "step": 80560 }, { "epoch": 530.0657894736842, "grad_norm": 1.0079296827316284, "learning_rate": 0.0001, "loss": 0.0125, "step": 80570 }, { "epoch": 530.1315789473684, "grad_norm": 0.7935236692428589, "learning_rate": 0.0001, "loss": 0.0126, "step": 80580 }, { "epoch": 530.1973684210526, "grad_norm": 1.2828476428985596, "learning_rate": 0.0001, "loss": 0.0115, "step": 80590 }, { "epoch": 530.2631578947369, "grad_norm": 1.0356460809707642, "learning_rate": 0.0001, "loss": 0.013, "step": 80600 }, { "epoch": 530.328947368421, "grad_norm": 1.1222877502441406, "learning_rate": 0.0001, "loss": 0.0108, "step": 80610 }, { "epoch": 530.3947368421053, "grad_norm": 0.8804997801780701, "learning_rate": 0.0001, "loss": 0.0121, "step": 80620 }, { "epoch": 530.4605263157895, "grad_norm": 1.159664273262024, "learning_rate": 0.0001, "loss": 0.0111, "step": 80630 }, { "epoch": 530.5263157894736, "grad_norm": 1.4239946603775024, "learning_rate": 0.0001, "loss": 0.0132, "step": 80640 }, { "epoch": 530.5921052631579, "grad_norm": 1.7034857273101807, "learning_rate": 0.0001, "loss": 0.0114, "step": 80650 }, { "epoch": 530.6578947368421, "grad_norm": 1.6954020261764526, "learning_rate": 0.0001, "loss": 0.0115, "step": 80660 }, { "epoch": 530.7236842105264, "grad_norm": 1.291788935661316, "learning_rate": 0.0001, "loss": 0.0113, "step": 80670 }, { "epoch": 530.7894736842105, "grad_norm": 2.2857964038848877, "learning_rate": 0.0001, "loss": 0.012, "step": 80680 }, { "epoch": 530.8552631578947, "grad_norm": 1.9390625953674316, "learning_rate": 0.0001, "loss": 0.0116, "step": 80690 }, { "epoch": 530.921052631579, "grad_norm": 1.31684410572052, "learning_rate": 0.0001, "loss": 0.0103, "step": 80700 }, { "epoch": 530.9868421052631, "grad_norm": 1.6871960163116455, "learning_rate": 0.0001, "loss": 0.0111, "step": 80710 }, { "epoch": 531.0526315789474, "grad_norm": 1.471971869468689, "learning_rate": 0.0001, "loss": 0.0105, "step": 80720 }, { "epoch": 531.1184210526316, "grad_norm": 1.5256435871124268, "learning_rate": 0.0001, "loss": 0.012, "step": 80730 }, { "epoch": 531.1842105263158, "grad_norm": 1.4224085807800293, "learning_rate": 0.0001, "loss": 0.01, "step": 80740 }, { "epoch": 531.25, "grad_norm": 1.179168701171875, "learning_rate": 0.0001, "loss": 0.0099, "step": 80750 }, { "epoch": 531.3157894736842, "grad_norm": 1.2524338960647583, "learning_rate": 0.0001, "loss": 0.0121, "step": 80760 }, { "epoch": 531.3815789473684, "grad_norm": 1.2265777587890625, "learning_rate": 0.0001, "loss": 0.0123, "step": 80770 }, { "epoch": 531.4473684210526, "grad_norm": 1.4335110187530518, "learning_rate": 0.0001, "loss": 0.0131, "step": 80780 }, { "epoch": 531.5131578947369, "grad_norm": 1.5284316539764404, "learning_rate": 0.0001, "loss": 0.011, "step": 80790 }, { "epoch": 531.578947368421, "grad_norm": 1.42241370677948, "learning_rate": 0.0001, "loss": 0.0102, "step": 80800 }, { "epoch": 531.6447368421053, "grad_norm": 1.530569314956665, "learning_rate": 0.0001, "loss": 0.0123, "step": 80810 }, { "epoch": 531.7105263157895, "grad_norm": 0.7496344447135925, "learning_rate": 0.0001, "loss": 0.0114, "step": 80820 }, { "epoch": 531.7763157894736, "grad_norm": 1.7025457620620728, "learning_rate": 0.0001, "loss": 0.0118, "step": 80830 }, { "epoch": 531.8421052631579, "grad_norm": 1.1982378959655762, "learning_rate": 0.0001, "loss": 0.0128, "step": 80840 }, { "epoch": 531.9078947368421, "grad_norm": 1.0408469438552856, "learning_rate": 0.0001, "loss": 0.0129, "step": 80850 }, { "epoch": 531.9736842105264, "grad_norm": 1.2916014194488525, "learning_rate": 0.0001, "loss": 0.0113, "step": 80860 }, { "epoch": 532.0394736842105, "grad_norm": 1.271699070930481, "learning_rate": 0.0001, "loss": 0.0113, "step": 80870 }, { "epoch": 532.1052631578947, "grad_norm": 1.313772439956665, "learning_rate": 0.0001, "loss": 0.0117, "step": 80880 }, { "epoch": 532.171052631579, "grad_norm": 1.413476824760437, "learning_rate": 0.0001, "loss": 0.0117, "step": 80890 }, { "epoch": 532.2368421052631, "grad_norm": 1.160772442817688, "learning_rate": 0.0001, "loss": 0.0117, "step": 80900 }, { "epoch": 532.3026315789474, "grad_norm": 1.0700751543045044, "learning_rate": 0.0001, "loss": 0.0113, "step": 80910 }, { "epoch": 532.3684210526316, "grad_norm": 1.1918394565582275, "learning_rate": 0.0001, "loss": 0.0111, "step": 80920 }, { "epoch": 532.4342105263158, "grad_norm": 0.9265491366386414, "learning_rate": 0.0001, "loss": 0.0133, "step": 80930 }, { "epoch": 532.5, "grad_norm": 0.8995620012283325, "learning_rate": 0.0001, "loss": 0.0124, "step": 80940 }, { "epoch": 532.5657894736842, "grad_norm": 1.0701876878738403, "learning_rate": 0.0001, "loss": 0.012, "step": 80950 }, { "epoch": 532.6315789473684, "grad_norm": 1.1642286777496338, "learning_rate": 0.0001, "loss": 0.012, "step": 80960 }, { "epoch": 532.6973684210526, "grad_norm": 1.027518630027771, "learning_rate": 0.0001, "loss": 0.0138, "step": 80970 }, { "epoch": 532.7631578947369, "grad_norm": 1.2189431190490723, "learning_rate": 0.0001, "loss": 0.0127, "step": 80980 }, { "epoch": 532.828947368421, "grad_norm": 1.1508433818817139, "learning_rate": 0.0001, "loss": 0.0129, "step": 80990 }, { "epoch": 532.8947368421053, "grad_norm": 1.2702674865722656, "learning_rate": 0.0001, "loss": 0.0109, "step": 81000 }, { "epoch": 532.9605263157895, "grad_norm": 1.1186507940292358, "learning_rate": 0.0001, "loss": 0.0122, "step": 81010 }, { "epoch": 533.0263157894736, "grad_norm": 1.3020246028900146, "learning_rate": 0.0001, "loss": 0.0126, "step": 81020 }, { "epoch": 533.0921052631579, "grad_norm": 1.0293231010437012, "learning_rate": 0.0001, "loss": 0.0114, "step": 81030 }, { "epoch": 533.1578947368421, "grad_norm": 1.2916498184204102, "learning_rate": 0.0001, "loss": 0.0124, "step": 81040 }, { "epoch": 533.2236842105264, "grad_norm": 1.2180614471435547, "learning_rate": 0.0001, "loss": 0.0117, "step": 81050 }, { "epoch": 533.2894736842105, "grad_norm": 0.9033607840538025, "learning_rate": 0.0001, "loss": 0.0135, "step": 81060 }, { "epoch": 533.3552631578947, "grad_norm": 1.1329870223999023, "learning_rate": 0.0001, "loss": 0.0111, "step": 81070 }, { "epoch": 533.421052631579, "grad_norm": 0.9711558818817139, "learning_rate": 0.0001, "loss": 0.0127, "step": 81080 }, { "epoch": 533.4868421052631, "grad_norm": 1.2064048051834106, "learning_rate": 0.0001, "loss": 0.0124, "step": 81090 }, { "epoch": 533.5526315789474, "grad_norm": 1.200500726699829, "learning_rate": 0.0001, "loss": 0.012, "step": 81100 }, { "epoch": 533.6184210526316, "grad_norm": 1.3031874895095825, "learning_rate": 0.0001, "loss": 0.0144, "step": 81110 }, { "epoch": 533.6842105263158, "grad_norm": 1.249746322631836, "learning_rate": 0.0001, "loss": 0.0127, "step": 81120 }, { "epoch": 533.75, "grad_norm": 1.4198533296585083, "learning_rate": 0.0001, "loss": 0.012, "step": 81130 }, { "epoch": 533.8157894736842, "grad_norm": 1.0344791412353516, "learning_rate": 0.0001, "loss": 0.0123, "step": 81140 }, { "epoch": 533.8815789473684, "grad_norm": 1.2430800199508667, "learning_rate": 0.0001, "loss": 0.011, "step": 81150 }, { "epoch": 533.9473684210526, "grad_norm": 1.0078041553497314, "learning_rate": 0.0001, "loss": 0.011, "step": 81160 }, { "epoch": 534.0131578947369, "grad_norm": 1.1461764574050903, "learning_rate": 0.0001, "loss": 0.0102, "step": 81170 }, { "epoch": 534.078947368421, "grad_norm": 1.1230483055114746, "learning_rate": 0.0001, "loss": 0.0104, "step": 81180 }, { "epoch": 534.1447368421053, "grad_norm": 1.142557144165039, "learning_rate": 0.0001, "loss": 0.0118, "step": 81190 }, { "epoch": 534.2105263157895, "grad_norm": 1.1560312509536743, "learning_rate": 0.0001, "loss": 0.0106, "step": 81200 }, { "epoch": 534.2763157894736, "grad_norm": 1.1191767454147339, "learning_rate": 0.0001, "loss": 0.012, "step": 81210 }, { "epoch": 534.3421052631579, "grad_norm": 0.7681576013565063, "learning_rate": 0.0001, "loss": 0.012, "step": 81220 }, { "epoch": 534.4078947368421, "grad_norm": 0.7272207736968994, "learning_rate": 0.0001, "loss": 0.0107, "step": 81230 }, { "epoch": 534.4736842105264, "grad_norm": 0.9689534306526184, "learning_rate": 0.0001, "loss": 0.0115, "step": 81240 }, { "epoch": 534.5394736842105, "grad_norm": 1.1911404132843018, "learning_rate": 0.0001, "loss": 0.0102, "step": 81250 }, { "epoch": 534.6052631578947, "grad_norm": 1.2596523761749268, "learning_rate": 0.0001, "loss": 0.0117, "step": 81260 }, { "epoch": 534.671052631579, "grad_norm": 1.0386167764663696, "learning_rate": 0.0001, "loss": 0.0117, "step": 81270 }, { "epoch": 534.7368421052631, "grad_norm": 0.7270429730415344, "learning_rate": 0.0001, "loss": 0.0122, "step": 81280 }, { "epoch": 534.8026315789474, "grad_norm": 0.9695709347724915, "learning_rate": 0.0001, "loss": 0.0112, "step": 81290 }, { "epoch": 534.8684210526316, "grad_norm": 1.0142713785171509, "learning_rate": 0.0001, "loss": 0.0125, "step": 81300 }, { "epoch": 534.9342105263158, "grad_norm": 1.2047271728515625, "learning_rate": 0.0001, "loss": 0.0122, "step": 81310 }, { "epoch": 535.0, "grad_norm": 1.0015687942504883, "learning_rate": 0.0001, "loss": 0.0143, "step": 81320 }, { "epoch": 535.0657894736842, "grad_norm": 1.3443199396133423, "learning_rate": 0.0001, "loss": 0.0128, "step": 81330 }, { "epoch": 535.1315789473684, "grad_norm": 1.1419992446899414, "learning_rate": 0.0001, "loss": 0.0113, "step": 81340 }, { "epoch": 535.1973684210526, "grad_norm": 1.3758596181869507, "learning_rate": 0.0001, "loss": 0.0113, "step": 81350 }, { "epoch": 535.2631578947369, "grad_norm": 1.384379267692566, "learning_rate": 0.0001, "loss": 0.0123, "step": 81360 }, { "epoch": 535.328947368421, "grad_norm": 0.993719220161438, "learning_rate": 0.0001, "loss": 0.0106, "step": 81370 }, { "epoch": 535.3947368421053, "grad_norm": 0.9903963804244995, "learning_rate": 0.0001, "loss": 0.0125, "step": 81380 }, { "epoch": 535.4605263157895, "grad_norm": 1.840928077697754, "learning_rate": 0.0001, "loss": 0.0135, "step": 81390 }, { "epoch": 535.5263157894736, "grad_norm": 1.8315232992172241, "learning_rate": 0.0001, "loss": 0.0114, "step": 81400 }, { "epoch": 535.5921052631579, "grad_norm": 1.0046427249908447, "learning_rate": 0.0001, "loss": 0.0099, "step": 81410 }, { "epoch": 535.6578947368421, "grad_norm": 1.2056866884231567, "learning_rate": 0.0001, "loss": 0.0128, "step": 81420 }, { "epoch": 535.7236842105264, "grad_norm": 1.0682623386383057, "learning_rate": 0.0001, "loss": 0.0094, "step": 81430 }, { "epoch": 535.7894736842105, "grad_norm": 1.022611141204834, "learning_rate": 0.0001, "loss": 0.0122, "step": 81440 }, { "epoch": 535.8552631578947, "grad_norm": 0.8441836833953857, "learning_rate": 0.0001, "loss": 0.0107, "step": 81450 }, { "epoch": 535.921052631579, "grad_norm": 0.9319405555725098, "learning_rate": 0.0001, "loss": 0.0101, "step": 81460 }, { "epoch": 535.9868421052631, "grad_norm": 1.3196568489074707, "learning_rate": 0.0001, "loss": 0.0124, "step": 81470 }, { "epoch": 536.0526315789474, "grad_norm": 0.9573938846588135, "learning_rate": 0.0001, "loss": 0.0112, "step": 81480 }, { "epoch": 536.1184210526316, "grad_norm": 1.1913952827453613, "learning_rate": 0.0001, "loss": 0.0116, "step": 81490 }, { "epoch": 536.1842105263158, "grad_norm": 1.4894800186157227, "learning_rate": 0.0001, "loss": 0.0106, "step": 81500 }, { "epoch": 536.25, "grad_norm": 1.2386541366577148, "learning_rate": 0.0001, "loss": 0.0111, "step": 81510 }, { "epoch": 536.3157894736842, "grad_norm": 1.4592559337615967, "learning_rate": 0.0001, "loss": 0.0108, "step": 81520 }, { "epoch": 536.3815789473684, "grad_norm": 1.2014906406402588, "learning_rate": 0.0001, "loss": 0.0123, "step": 81530 }, { "epoch": 536.4473684210526, "grad_norm": 0.9843008518218994, "learning_rate": 0.0001, "loss": 0.0111, "step": 81540 }, { "epoch": 536.5131578947369, "grad_norm": 1.1141507625579834, "learning_rate": 0.0001, "loss": 0.0112, "step": 81550 }, { "epoch": 536.578947368421, "grad_norm": 1.0911942720413208, "learning_rate": 0.0001, "loss": 0.0114, "step": 81560 }, { "epoch": 536.6447368421053, "grad_norm": 0.9761918783187866, "learning_rate": 0.0001, "loss": 0.012, "step": 81570 }, { "epoch": 536.7105263157895, "grad_norm": 1.1536732912063599, "learning_rate": 0.0001, "loss": 0.0115, "step": 81580 }, { "epoch": 536.7763157894736, "grad_norm": 0.9874551296234131, "learning_rate": 0.0001, "loss": 0.0112, "step": 81590 }, { "epoch": 536.8421052631579, "grad_norm": 1.2264351844787598, "learning_rate": 0.0001, "loss": 0.0117, "step": 81600 }, { "epoch": 536.9078947368421, "grad_norm": 1.0342381000518799, "learning_rate": 0.0001, "loss": 0.0131, "step": 81610 }, { "epoch": 536.9736842105264, "grad_norm": 0.8672528266906738, "learning_rate": 0.0001, "loss": 0.0106, "step": 81620 }, { "epoch": 537.0394736842105, "grad_norm": 1.4549974203109741, "learning_rate": 0.0001, "loss": 0.0125, "step": 81630 }, { "epoch": 537.1052631578947, "grad_norm": 1.4086586236953735, "learning_rate": 0.0001, "loss": 0.0117, "step": 81640 }, { "epoch": 537.171052631579, "grad_norm": 0.8391483426094055, "learning_rate": 0.0001, "loss": 0.0097, "step": 81650 }, { "epoch": 537.2368421052631, "grad_norm": 1.2846829891204834, "learning_rate": 0.0001, "loss": 0.0092, "step": 81660 }, { "epoch": 537.3026315789474, "grad_norm": 0.9655391573905945, "learning_rate": 0.0001, "loss": 0.0115, "step": 81670 }, { "epoch": 537.3684210526316, "grad_norm": 1.599820613861084, "learning_rate": 0.0001, "loss": 0.011, "step": 81680 }, { "epoch": 537.4342105263158, "grad_norm": 0.971474826335907, "learning_rate": 0.0001, "loss": 0.012, "step": 81690 }, { "epoch": 537.5, "grad_norm": 0.8270283937454224, "learning_rate": 0.0001, "loss": 0.0121, "step": 81700 }, { "epoch": 537.5657894736842, "grad_norm": 1.2286924123764038, "learning_rate": 0.0001, "loss": 0.0111, "step": 81710 }, { "epoch": 537.6315789473684, "grad_norm": 0.9310139417648315, "learning_rate": 0.0001, "loss": 0.0103, "step": 81720 }, { "epoch": 537.6973684210526, "grad_norm": 0.9931386113166809, "learning_rate": 0.0001, "loss": 0.013, "step": 81730 }, { "epoch": 537.7631578947369, "grad_norm": 1.1506545543670654, "learning_rate": 0.0001, "loss": 0.0097, "step": 81740 }, { "epoch": 537.828947368421, "grad_norm": 0.7511075735092163, "learning_rate": 0.0001, "loss": 0.0124, "step": 81750 }, { "epoch": 537.8947368421053, "grad_norm": 1.1868315935134888, "learning_rate": 0.0001, "loss": 0.0117, "step": 81760 }, { "epoch": 537.9605263157895, "grad_norm": 1.2605093717575073, "learning_rate": 0.0001, "loss": 0.012, "step": 81770 }, { "epoch": 538.0263157894736, "grad_norm": 0.8703752756118774, "learning_rate": 0.0001, "loss": 0.0109, "step": 81780 }, { "epoch": 538.0921052631579, "grad_norm": 0.8889430165290833, "learning_rate": 0.0001, "loss": 0.0147, "step": 81790 }, { "epoch": 538.1578947368421, "grad_norm": 1.4555449485778809, "learning_rate": 0.0001, "loss": 0.011, "step": 81800 }, { "epoch": 538.2236842105264, "grad_norm": 0.8129600882530212, "learning_rate": 0.0001, "loss": 0.0093, "step": 81810 }, { "epoch": 538.2894736842105, "grad_norm": 0.6337363123893738, "learning_rate": 0.0001, "loss": 0.0118, "step": 81820 }, { "epoch": 538.3552631578947, "grad_norm": 1.1285789012908936, "learning_rate": 0.0001, "loss": 0.0103, "step": 81830 }, { "epoch": 538.421052631579, "grad_norm": 1.1603407859802246, "learning_rate": 0.0001, "loss": 0.0109, "step": 81840 }, { "epoch": 538.4868421052631, "grad_norm": 0.8500170707702637, "learning_rate": 0.0001, "loss": 0.012, "step": 81850 }, { "epoch": 538.5526315789474, "grad_norm": 0.9496303200721741, "learning_rate": 0.0001, "loss": 0.0109, "step": 81860 }, { "epoch": 538.6184210526316, "grad_norm": 1.0320367813110352, "learning_rate": 0.0001, "loss": 0.0115, "step": 81870 }, { "epoch": 538.6842105263158, "grad_norm": 0.8967204689979553, "learning_rate": 0.0001, "loss": 0.0105, "step": 81880 }, { "epoch": 538.75, "grad_norm": 1.439362645149231, "learning_rate": 0.0001, "loss": 0.0138, "step": 81890 }, { "epoch": 538.8157894736842, "grad_norm": 1.2981364727020264, "learning_rate": 0.0001, "loss": 0.0118, "step": 81900 }, { "epoch": 538.8815789473684, "grad_norm": 0.7729794979095459, "learning_rate": 0.0001, "loss": 0.0125, "step": 81910 }, { "epoch": 538.9473684210526, "grad_norm": 0.9322214722633362, "learning_rate": 0.0001, "loss": 0.0097, "step": 81920 }, { "epoch": 539.0131578947369, "grad_norm": 1.2167713642120361, "learning_rate": 0.0001, "loss": 0.0126, "step": 81930 }, { "epoch": 539.078947368421, "grad_norm": 0.9902080297470093, "learning_rate": 0.0001, "loss": 0.0122, "step": 81940 }, { "epoch": 539.1447368421053, "grad_norm": 1.3800474405288696, "learning_rate": 0.0001, "loss": 0.0119, "step": 81950 }, { "epoch": 539.2105263157895, "grad_norm": 1.2342554330825806, "learning_rate": 0.0001, "loss": 0.0117, "step": 81960 }, { "epoch": 539.2763157894736, "grad_norm": 1.1256905794143677, "learning_rate": 0.0001, "loss": 0.0112, "step": 81970 }, { "epoch": 539.3421052631579, "grad_norm": 0.8514827489852905, "learning_rate": 0.0001, "loss": 0.0124, "step": 81980 }, { "epoch": 539.4078947368421, "grad_norm": 1.0054851770401, "learning_rate": 0.0001, "loss": 0.0129, "step": 81990 }, { "epoch": 539.4736842105264, "grad_norm": 1.3685269355773926, "learning_rate": 0.0001, "loss": 0.0119, "step": 82000 }, { "epoch": 539.5394736842105, "grad_norm": 0.8945045471191406, "learning_rate": 0.0001, "loss": 0.0113, "step": 82010 }, { "epoch": 539.6052631578947, "grad_norm": 0.7146797776222229, "learning_rate": 0.0001, "loss": 0.0129, "step": 82020 }, { "epoch": 539.671052631579, "grad_norm": 1.4394199848175049, "learning_rate": 0.0001, "loss": 0.0109, "step": 82030 }, { "epoch": 539.7368421052631, "grad_norm": 1.3503402471542358, "learning_rate": 0.0001, "loss": 0.0099, "step": 82040 }, { "epoch": 539.8026315789474, "grad_norm": 0.9955316185951233, "learning_rate": 0.0001, "loss": 0.0129, "step": 82050 }, { "epoch": 539.8684210526316, "grad_norm": 1.2270481586456299, "learning_rate": 0.0001, "loss": 0.0115, "step": 82060 }, { "epoch": 539.9342105263158, "grad_norm": 0.8091347217559814, "learning_rate": 0.0001, "loss": 0.0098, "step": 82070 }, { "epoch": 540.0, "grad_norm": 0.9882093071937561, "learning_rate": 0.0001, "loss": 0.0118, "step": 82080 }, { "epoch": 540.0657894736842, "grad_norm": 1.3003027439117432, "learning_rate": 0.0001, "loss": 0.0121, "step": 82090 }, { "epoch": 540.1315789473684, "grad_norm": 1.113666296005249, "learning_rate": 0.0001, "loss": 0.0114, "step": 82100 }, { "epoch": 540.1973684210526, "grad_norm": 1.5681219100952148, "learning_rate": 0.0001, "loss": 0.0106, "step": 82110 }, { "epoch": 540.2631578947369, "grad_norm": 0.7766351103782654, "learning_rate": 0.0001, "loss": 0.0108, "step": 82120 }, { "epoch": 540.328947368421, "grad_norm": 1.0728567838668823, "learning_rate": 0.0001, "loss": 0.0106, "step": 82130 }, { "epoch": 540.3947368421053, "grad_norm": 0.9811382293701172, "learning_rate": 0.0001, "loss": 0.013, "step": 82140 }, { "epoch": 540.4605263157895, "grad_norm": 0.8526586890220642, "learning_rate": 0.0001, "loss": 0.0103, "step": 82150 }, { "epoch": 540.5263157894736, "grad_norm": 0.9959100484848022, "learning_rate": 0.0001, "loss": 0.0114, "step": 82160 }, { "epoch": 540.5921052631579, "grad_norm": 1.1363943815231323, "learning_rate": 0.0001, "loss": 0.0111, "step": 82170 }, { "epoch": 540.6578947368421, "grad_norm": 1.0341233015060425, "learning_rate": 0.0001, "loss": 0.0115, "step": 82180 }, { "epoch": 540.7236842105264, "grad_norm": 1.2639089822769165, "learning_rate": 0.0001, "loss": 0.0116, "step": 82190 }, { "epoch": 540.7894736842105, "grad_norm": 1.197579026222229, "learning_rate": 0.0001, "loss": 0.0089, "step": 82200 }, { "epoch": 540.8552631578947, "grad_norm": 1.078098177909851, "learning_rate": 0.0001, "loss": 0.012, "step": 82210 }, { "epoch": 540.921052631579, "grad_norm": 1.026748538017273, "learning_rate": 0.0001, "loss": 0.0111, "step": 82220 }, { "epoch": 540.9868421052631, "grad_norm": 1.1622309684753418, "learning_rate": 0.0001, "loss": 0.0113, "step": 82230 }, { "epoch": 541.0526315789474, "grad_norm": 0.9618615508079529, "learning_rate": 0.0001, "loss": 0.0119, "step": 82240 }, { "epoch": 541.1184210526316, "grad_norm": 1.2612966299057007, "learning_rate": 0.0001, "loss": 0.0118, "step": 82250 }, { "epoch": 541.1842105263158, "grad_norm": 1.0327303409576416, "learning_rate": 0.0001, "loss": 0.0123, "step": 82260 }, { "epoch": 541.25, "grad_norm": 1.160564661026001, "learning_rate": 0.0001, "loss": 0.0098, "step": 82270 }, { "epoch": 541.3157894736842, "grad_norm": 1.1718395948410034, "learning_rate": 0.0001, "loss": 0.0104, "step": 82280 }, { "epoch": 541.3815789473684, "grad_norm": 0.9807857275009155, "learning_rate": 0.0001, "loss": 0.0132, "step": 82290 }, { "epoch": 541.4473684210526, "grad_norm": 1.2698783874511719, "learning_rate": 0.0001, "loss": 0.0104, "step": 82300 }, { "epoch": 541.5131578947369, "grad_norm": 1.2093521356582642, "learning_rate": 0.0001, "loss": 0.0108, "step": 82310 }, { "epoch": 541.578947368421, "grad_norm": 1.0178953409194946, "learning_rate": 0.0001, "loss": 0.0107, "step": 82320 }, { "epoch": 541.6447368421053, "grad_norm": 0.9447916150093079, "learning_rate": 0.0001, "loss": 0.0106, "step": 82330 }, { "epoch": 541.7105263157895, "grad_norm": 1.0798600912094116, "learning_rate": 0.0001, "loss": 0.0099, "step": 82340 }, { "epoch": 541.7763157894736, "grad_norm": 1.1212877035140991, "learning_rate": 0.0001, "loss": 0.0126, "step": 82350 }, { "epoch": 541.8421052631579, "grad_norm": 1.1646664142608643, "learning_rate": 0.0001, "loss": 0.0118, "step": 82360 }, { "epoch": 541.9078947368421, "grad_norm": 1.073519229888916, "learning_rate": 0.0001, "loss": 0.0121, "step": 82370 }, { "epoch": 541.9736842105264, "grad_norm": 0.8553018569946289, "learning_rate": 0.0001, "loss": 0.0126, "step": 82380 }, { "epoch": 542.0394736842105, "grad_norm": 1.2172927856445312, "learning_rate": 0.0001, "loss": 0.0121, "step": 82390 }, { "epoch": 542.1052631578947, "grad_norm": 1.0330595970153809, "learning_rate": 0.0001, "loss": 0.0124, "step": 82400 }, { "epoch": 542.171052631579, "grad_norm": 0.9683526158332825, "learning_rate": 0.0001, "loss": 0.012, "step": 82410 }, { "epoch": 542.2368421052631, "grad_norm": 1.1833120584487915, "learning_rate": 0.0001, "loss": 0.0121, "step": 82420 }, { "epoch": 542.3026315789474, "grad_norm": 1.3812700510025024, "learning_rate": 0.0001, "loss": 0.0123, "step": 82430 }, { "epoch": 542.3684210526316, "grad_norm": 1.6291792392730713, "learning_rate": 0.0001, "loss": 0.0114, "step": 82440 }, { "epoch": 542.4342105263158, "grad_norm": 1.2887747287750244, "learning_rate": 0.0001, "loss": 0.0115, "step": 82450 }, { "epoch": 542.5, "grad_norm": 1.5787415504455566, "learning_rate": 0.0001, "loss": 0.0117, "step": 82460 }, { "epoch": 542.5657894736842, "grad_norm": 1.3304678201675415, "learning_rate": 0.0001, "loss": 0.0105, "step": 82470 }, { "epoch": 542.6315789473684, "grad_norm": 1.3938226699829102, "learning_rate": 0.0001, "loss": 0.0102, "step": 82480 }, { "epoch": 542.6973684210526, "grad_norm": 1.4595518112182617, "learning_rate": 0.0001, "loss": 0.0096, "step": 82490 }, { "epoch": 542.7631578947369, "grad_norm": 1.0828487873077393, "learning_rate": 0.0001, "loss": 0.01, "step": 82500 }, { "epoch": 542.828947368421, "grad_norm": 1.4781620502471924, "learning_rate": 0.0001, "loss": 0.011, "step": 82510 }, { "epoch": 542.8947368421053, "grad_norm": 1.2808284759521484, "learning_rate": 0.0001, "loss": 0.012, "step": 82520 }, { "epoch": 542.9605263157895, "grad_norm": 1.0565862655639648, "learning_rate": 0.0001, "loss": 0.011, "step": 82530 }, { "epoch": 543.0263157894736, "grad_norm": 1.2909079790115356, "learning_rate": 0.0001, "loss": 0.0103, "step": 82540 }, { "epoch": 543.0921052631579, "grad_norm": 1.1060192584991455, "learning_rate": 0.0001, "loss": 0.0107, "step": 82550 }, { "epoch": 543.1578947368421, "grad_norm": 0.9638274908065796, "learning_rate": 0.0001, "loss": 0.0124, "step": 82560 }, { "epoch": 543.2236842105264, "grad_norm": 1.082889437675476, "learning_rate": 0.0001, "loss": 0.0116, "step": 82570 }, { "epoch": 543.2894736842105, "grad_norm": 0.6525837182998657, "learning_rate": 0.0001, "loss": 0.0093, "step": 82580 }, { "epoch": 543.3552631578947, "grad_norm": 1.2893084287643433, "learning_rate": 0.0001, "loss": 0.0124, "step": 82590 }, { "epoch": 543.421052631579, "grad_norm": 1.0150879621505737, "learning_rate": 0.0001, "loss": 0.0103, "step": 82600 }, { "epoch": 543.4868421052631, "grad_norm": 0.5977818965911865, "learning_rate": 0.0001, "loss": 0.0115, "step": 82610 }, { "epoch": 543.5526315789474, "grad_norm": 1.1314287185668945, "learning_rate": 0.0001, "loss": 0.0122, "step": 82620 }, { "epoch": 543.6184210526316, "grad_norm": 1.184816837310791, "learning_rate": 0.0001, "loss": 0.0116, "step": 82630 }, { "epoch": 543.6842105263158, "grad_norm": 0.9463902711868286, "learning_rate": 0.0001, "loss": 0.0109, "step": 82640 }, { "epoch": 543.75, "grad_norm": 1.0592985153198242, "learning_rate": 0.0001, "loss": 0.0121, "step": 82650 }, { "epoch": 543.8157894736842, "grad_norm": 1.3953194618225098, "learning_rate": 0.0001, "loss": 0.0109, "step": 82660 }, { "epoch": 543.8815789473684, "grad_norm": 1.4499620199203491, "learning_rate": 0.0001, "loss": 0.0107, "step": 82670 }, { "epoch": 543.9473684210526, "grad_norm": 1.2184102535247803, "learning_rate": 0.0001, "loss": 0.0111, "step": 82680 }, { "epoch": 544.0131578947369, "grad_norm": 0.9526715874671936, "learning_rate": 0.0001, "loss": 0.0116, "step": 82690 }, { "epoch": 544.078947368421, "grad_norm": 1.111823320388794, "learning_rate": 0.0001, "loss": 0.0127, "step": 82700 }, { "epoch": 544.1447368421053, "grad_norm": 1.0925346612930298, "learning_rate": 0.0001, "loss": 0.0101, "step": 82710 }, { "epoch": 544.2105263157895, "grad_norm": 1.0318348407745361, "learning_rate": 0.0001, "loss": 0.0111, "step": 82720 }, { "epoch": 544.2763157894736, "grad_norm": 1.3430664539337158, "learning_rate": 0.0001, "loss": 0.0118, "step": 82730 }, { "epoch": 544.3421052631579, "grad_norm": 0.8877522945404053, "learning_rate": 0.0001, "loss": 0.0112, "step": 82740 }, { "epoch": 544.4078947368421, "grad_norm": 1.2313371896743774, "learning_rate": 0.0001, "loss": 0.0126, "step": 82750 }, { "epoch": 544.4736842105264, "grad_norm": 1.1085177659988403, "learning_rate": 0.0001, "loss": 0.0106, "step": 82760 }, { "epoch": 544.5394736842105, "grad_norm": 1.0655664205551147, "learning_rate": 0.0001, "loss": 0.0117, "step": 82770 }, { "epoch": 544.6052631578947, "grad_norm": 1.3107541799545288, "learning_rate": 0.0001, "loss": 0.01, "step": 82780 }, { "epoch": 544.671052631579, "grad_norm": 1.2717570066452026, "learning_rate": 0.0001, "loss": 0.0104, "step": 82790 }, { "epoch": 544.7368421052631, "grad_norm": 1.365206003189087, "learning_rate": 0.0001, "loss": 0.0119, "step": 82800 }, { "epoch": 544.8026315789474, "grad_norm": 1.2015581130981445, "learning_rate": 0.0001, "loss": 0.0121, "step": 82810 }, { "epoch": 544.8684210526316, "grad_norm": 0.8265142440795898, "learning_rate": 0.0001, "loss": 0.0125, "step": 82820 }, { "epoch": 544.9342105263158, "grad_norm": 1.278316617012024, "learning_rate": 0.0001, "loss": 0.0111, "step": 82830 }, { "epoch": 545.0, "grad_norm": 0.8285160064697266, "learning_rate": 0.0001, "loss": 0.0115, "step": 82840 }, { "epoch": 545.0657894736842, "grad_norm": 1.131121277809143, "learning_rate": 0.0001, "loss": 0.0118, "step": 82850 }, { "epoch": 545.1315789473684, "grad_norm": 1.1002274751663208, "learning_rate": 0.0001, "loss": 0.0105, "step": 82860 }, { "epoch": 545.1973684210526, "grad_norm": 1.3011295795440674, "learning_rate": 0.0001, "loss": 0.0108, "step": 82870 }, { "epoch": 545.2631578947369, "grad_norm": 0.9043525457382202, "learning_rate": 0.0001, "loss": 0.011, "step": 82880 }, { "epoch": 545.328947368421, "grad_norm": 1.1586050987243652, "learning_rate": 0.0001, "loss": 0.0127, "step": 82890 }, { "epoch": 545.3947368421053, "grad_norm": 1.1788783073425293, "learning_rate": 0.0001, "loss": 0.0123, "step": 82900 }, { "epoch": 545.4605263157895, "grad_norm": 1.1490991115570068, "learning_rate": 0.0001, "loss": 0.0122, "step": 82910 }, { "epoch": 545.5263157894736, "grad_norm": 0.9973250031471252, "learning_rate": 0.0001, "loss": 0.0117, "step": 82920 }, { "epoch": 545.5921052631579, "grad_norm": 0.9318479299545288, "learning_rate": 0.0001, "loss": 0.0101, "step": 82930 }, { "epoch": 545.6578947368421, "grad_norm": 0.8547037839889526, "learning_rate": 0.0001, "loss": 0.0117, "step": 82940 }, { "epoch": 545.7236842105264, "grad_norm": 1.1414095163345337, "learning_rate": 0.0001, "loss": 0.0103, "step": 82950 }, { "epoch": 545.7894736842105, "grad_norm": 1.1906640529632568, "learning_rate": 0.0001, "loss": 0.0112, "step": 82960 }, { "epoch": 545.8552631578947, "grad_norm": 0.7530783414840698, "learning_rate": 0.0001, "loss": 0.0112, "step": 82970 }, { "epoch": 545.921052631579, "grad_norm": 1.2797421216964722, "learning_rate": 0.0001, "loss": 0.0116, "step": 82980 }, { "epoch": 545.9868421052631, "grad_norm": 0.995582640171051, "learning_rate": 0.0001, "loss": 0.0117, "step": 82990 }, { "epoch": 546.0526315789474, "grad_norm": 1.1054130792617798, "learning_rate": 0.0001, "loss": 0.012, "step": 83000 }, { "epoch": 546.1184210526316, "grad_norm": 1.2854208946228027, "learning_rate": 0.0001, "loss": 0.0127, "step": 83010 }, { "epoch": 546.1842105263158, "grad_norm": 1.1343199014663696, "learning_rate": 0.0001, "loss": 0.0107, "step": 83020 }, { "epoch": 546.25, "grad_norm": 0.953602135181427, "learning_rate": 0.0001, "loss": 0.0112, "step": 83030 }, { "epoch": 546.3157894736842, "grad_norm": 1.223362922668457, "learning_rate": 0.0001, "loss": 0.0105, "step": 83040 }, { "epoch": 546.3815789473684, "grad_norm": 1.4338486194610596, "learning_rate": 0.0001, "loss": 0.0118, "step": 83050 }, { "epoch": 546.4473684210526, "grad_norm": 1.1713676452636719, "learning_rate": 0.0001, "loss": 0.0111, "step": 83060 }, { "epoch": 546.5131578947369, "grad_norm": 1.1168043613433838, "learning_rate": 0.0001, "loss": 0.0135, "step": 83070 }, { "epoch": 546.578947368421, "grad_norm": 1.1231666803359985, "learning_rate": 0.0001, "loss": 0.0117, "step": 83080 }, { "epoch": 546.6447368421053, "grad_norm": 1.1550521850585938, "learning_rate": 0.0001, "loss": 0.0118, "step": 83090 }, { "epoch": 546.7105263157895, "grad_norm": 1.1135015487670898, "learning_rate": 0.0001, "loss": 0.0115, "step": 83100 }, { "epoch": 546.7763157894736, "grad_norm": 1.2623625993728638, "learning_rate": 0.0001, "loss": 0.0098, "step": 83110 }, { "epoch": 546.8421052631579, "grad_norm": 0.7757622003555298, "learning_rate": 0.0001, "loss": 0.0113, "step": 83120 }, { "epoch": 546.9078947368421, "grad_norm": 0.9508805274963379, "learning_rate": 0.0001, "loss": 0.0107, "step": 83130 }, { "epoch": 546.9736842105264, "grad_norm": 0.7631132006645203, "learning_rate": 0.0001, "loss": 0.0102, "step": 83140 }, { "epoch": 547.0394736842105, "grad_norm": 0.9571039080619812, "learning_rate": 0.0001, "loss": 0.0113, "step": 83150 }, { "epoch": 547.1052631578947, "grad_norm": 0.9324824213981628, "learning_rate": 0.0001, "loss": 0.0109, "step": 83160 }, { "epoch": 547.171052631579, "grad_norm": 1.1061806678771973, "learning_rate": 0.0001, "loss": 0.0118, "step": 83170 }, { "epoch": 547.2368421052631, "grad_norm": 0.9221788048744202, "learning_rate": 0.0001, "loss": 0.0129, "step": 83180 }, { "epoch": 547.3026315789474, "grad_norm": 1.1765472888946533, "learning_rate": 0.0001, "loss": 0.0102, "step": 83190 }, { "epoch": 547.3684210526316, "grad_norm": 1.0740410089492798, "learning_rate": 0.0001, "loss": 0.0132, "step": 83200 }, { "epoch": 547.4342105263158, "grad_norm": 1.1589864492416382, "learning_rate": 0.0001, "loss": 0.0135, "step": 83210 }, { "epoch": 547.5, "grad_norm": 1.1935651302337646, "learning_rate": 0.0001, "loss": 0.0108, "step": 83220 }, { "epoch": 547.5657894736842, "grad_norm": 0.8413220047950745, "learning_rate": 0.0001, "loss": 0.0135, "step": 83230 }, { "epoch": 547.6315789473684, "grad_norm": 0.8780201077461243, "learning_rate": 0.0001, "loss": 0.0106, "step": 83240 }, { "epoch": 547.6973684210526, "grad_norm": 1.1799852848052979, "learning_rate": 0.0001, "loss": 0.0119, "step": 83250 }, { "epoch": 547.7631578947369, "grad_norm": 0.9087440371513367, "learning_rate": 0.0001, "loss": 0.0108, "step": 83260 }, { "epoch": 547.828947368421, "grad_norm": 1.065704584121704, "learning_rate": 0.0001, "loss": 0.011, "step": 83270 }, { "epoch": 547.8947368421053, "grad_norm": 1.0333939790725708, "learning_rate": 0.0001, "loss": 0.0099, "step": 83280 }, { "epoch": 547.9605263157895, "grad_norm": 0.951385498046875, "learning_rate": 0.0001, "loss": 0.0103, "step": 83290 }, { "epoch": 548.0263157894736, "grad_norm": 0.7980932593345642, "learning_rate": 0.0001, "loss": 0.0112, "step": 83300 }, { "epoch": 548.0921052631579, "grad_norm": 1.1457808017730713, "learning_rate": 0.0001, "loss": 0.0114, "step": 83310 }, { "epoch": 548.1578947368421, "grad_norm": 0.9355278015136719, "learning_rate": 0.0001, "loss": 0.0123, "step": 83320 }, { "epoch": 548.2236842105264, "grad_norm": 0.7948923707008362, "learning_rate": 0.0001, "loss": 0.0103, "step": 83330 }, { "epoch": 548.2894736842105, "grad_norm": 0.9692309498786926, "learning_rate": 0.0001, "loss": 0.0114, "step": 83340 }, { "epoch": 548.3552631578947, "grad_norm": 1.2351458072662354, "learning_rate": 0.0001, "loss": 0.0137, "step": 83350 }, { "epoch": 548.421052631579, "grad_norm": 0.8993010520935059, "learning_rate": 0.0001, "loss": 0.0102, "step": 83360 }, { "epoch": 548.4868421052631, "grad_norm": 1.3239619731903076, "learning_rate": 0.0001, "loss": 0.0141, "step": 83370 }, { "epoch": 548.5526315789474, "grad_norm": 1.2244657278060913, "learning_rate": 0.0001, "loss": 0.0113, "step": 83380 }, { "epoch": 548.6184210526316, "grad_norm": 1.208211064338684, "learning_rate": 0.0001, "loss": 0.0099, "step": 83390 }, { "epoch": 548.6842105263158, "grad_norm": 1.225913405418396, "learning_rate": 0.0001, "loss": 0.0118, "step": 83400 }, { "epoch": 548.75, "grad_norm": 1.019867181777954, "learning_rate": 0.0001, "loss": 0.012, "step": 83410 }, { "epoch": 548.8157894736842, "grad_norm": 1.138984203338623, "learning_rate": 0.0001, "loss": 0.011, "step": 83420 }, { "epoch": 548.8815789473684, "grad_norm": 1.0787227153778076, "learning_rate": 0.0001, "loss": 0.0117, "step": 83430 }, { "epoch": 548.9473684210526, "grad_norm": 1.3269912004470825, "learning_rate": 0.0001, "loss": 0.0124, "step": 83440 }, { "epoch": 549.0131578947369, "grad_norm": 1.1982839107513428, "learning_rate": 0.0001, "loss": 0.0107, "step": 83450 }, { "epoch": 549.078947368421, "grad_norm": 1.4429033994674683, "learning_rate": 0.0001, "loss": 0.0106, "step": 83460 }, { "epoch": 549.1447368421053, "grad_norm": 1.1085598468780518, "learning_rate": 0.0001, "loss": 0.0108, "step": 83470 }, { "epoch": 549.2105263157895, "grad_norm": 1.2021725177764893, "learning_rate": 0.0001, "loss": 0.0112, "step": 83480 }, { "epoch": 549.2763157894736, "grad_norm": 0.8998311161994934, "learning_rate": 0.0001, "loss": 0.011, "step": 83490 }, { "epoch": 549.3421052631579, "grad_norm": 1.5632169246673584, "learning_rate": 0.0001, "loss": 0.0124, "step": 83500 }, { "epoch": 549.4078947368421, "grad_norm": 1.549469232559204, "learning_rate": 0.0001, "loss": 0.0119, "step": 83510 }, { "epoch": 549.4736842105264, "grad_norm": 1.4514491558074951, "learning_rate": 0.0001, "loss": 0.0111, "step": 83520 }, { "epoch": 549.5394736842105, "grad_norm": 1.0409729480743408, "learning_rate": 0.0001, "loss": 0.0103, "step": 83530 }, { "epoch": 549.6052631578947, "grad_norm": 1.5041582584381104, "learning_rate": 0.0001, "loss": 0.0112, "step": 83540 }, { "epoch": 549.671052631579, "grad_norm": 0.9774188995361328, "learning_rate": 0.0001, "loss": 0.0107, "step": 83550 }, { "epoch": 549.7368421052631, "grad_norm": 1.5498707294464111, "learning_rate": 0.0001, "loss": 0.0099, "step": 83560 }, { "epoch": 549.8026315789474, "grad_norm": 1.5098849534988403, "learning_rate": 0.0001, "loss": 0.01, "step": 83570 }, { "epoch": 549.8684210526316, "grad_norm": 1.2632389068603516, "learning_rate": 0.0001, "loss": 0.0127, "step": 83580 }, { "epoch": 549.9342105263158, "grad_norm": 1.1932131052017212, "learning_rate": 0.0001, "loss": 0.0112, "step": 83590 }, { "epoch": 550.0, "grad_norm": 1.4617688655853271, "learning_rate": 0.0001, "loss": 0.014, "step": 83600 }, { "epoch": 550.0657894736842, "grad_norm": 1.0467870235443115, "learning_rate": 0.0001, "loss": 0.0119, "step": 83610 }, { "epoch": 550.1315789473684, "grad_norm": 1.3570033311843872, "learning_rate": 0.0001, "loss": 0.0107, "step": 83620 }, { "epoch": 550.1973684210526, "grad_norm": 1.4261776208877563, "learning_rate": 0.0001, "loss": 0.0113, "step": 83630 }, { "epoch": 550.2631578947369, "grad_norm": 1.2804388999938965, "learning_rate": 0.0001, "loss": 0.0115, "step": 83640 }, { "epoch": 550.328947368421, "grad_norm": 1.119381070137024, "learning_rate": 0.0001, "loss": 0.0123, "step": 83650 }, { "epoch": 550.3947368421053, "grad_norm": 1.0963352918624878, "learning_rate": 0.0001, "loss": 0.0102, "step": 83660 }, { "epoch": 550.4605263157895, "grad_norm": 0.940981924533844, "learning_rate": 0.0001, "loss": 0.0114, "step": 83670 }, { "epoch": 550.5263157894736, "grad_norm": 1.4243806600570679, "learning_rate": 0.0001, "loss": 0.0113, "step": 83680 }, { "epoch": 550.5921052631579, "grad_norm": 1.049736738204956, "learning_rate": 0.0001, "loss": 0.0107, "step": 83690 }, { "epoch": 550.6578947368421, "grad_norm": 1.2986576557159424, "learning_rate": 0.0001, "loss": 0.011, "step": 83700 }, { "epoch": 550.7236842105264, "grad_norm": 0.9634553790092468, "learning_rate": 0.0001, "loss": 0.012, "step": 83710 }, { "epoch": 550.7894736842105, "grad_norm": 1.209815263748169, "learning_rate": 0.0001, "loss": 0.0118, "step": 83720 }, { "epoch": 550.8552631578947, "grad_norm": 1.3946318626403809, "learning_rate": 0.0001, "loss": 0.0093, "step": 83730 }, { "epoch": 550.921052631579, "grad_norm": 1.1185719966888428, "learning_rate": 0.0001, "loss": 0.0111, "step": 83740 }, { "epoch": 550.9868421052631, "grad_norm": 0.8527510762214661, "learning_rate": 0.0001, "loss": 0.0094, "step": 83750 }, { "epoch": 551.0526315789474, "grad_norm": 1.235743761062622, "learning_rate": 0.0001, "loss": 0.0131, "step": 83760 }, { "epoch": 551.1184210526316, "grad_norm": 1.2538394927978516, "learning_rate": 0.0001, "loss": 0.0104, "step": 83770 }, { "epoch": 551.1842105263158, "grad_norm": 1.089035987854004, "learning_rate": 0.0001, "loss": 0.0101, "step": 83780 }, { "epoch": 551.25, "grad_norm": 1.1372734308242798, "learning_rate": 0.0001, "loss": 0.0116, "step": 83790 }, { "epoch": 551.3157894736842, "grad_norm": 1.026092290878296, "learning_rate": 0.0001, "loss": 0.0122, "step": 83800 }, { "epoch": 551.3815789473684, "grad_norm": 1.2157622575759888, "learning_rate": 0.0001, "loss": 0.0106, "step": 83810 }, { "epoch": 551.4473684210526, "grad_norm": 1.1005877256393433, "learning_rate": 0.0001, "loss": 0.0114, "step": 83820 }, { "epoch": 551.5131578947369, "grad_norm": 1.059844970703125, "learning_rate": 0.0001, "loss": 0.0103, "step": 83830 }, { "epoch": 551.578947368421, "grad_norm": 1.1124544143676758, "learning_rate": 0.0001, "loss": 0.0121, "step": 83840 }, { "epoch": 551.6447368421053, "grad_norm": 1.0764391422271729, "learning_rate": 0.0001, "loss": 0.0102, "step": 83850 }, { "epoch": 551.7105263157895, "grad_norm": 1.031297206878662, "learning_rate": 0.0001, "loss": 0.0122, "step": 83860 }, { "epoch": 551.7763157894736, "grad_norm": 1.022562026977539, "learning_rate": 0.0001, "loss": 0.01, "step": 83870 }, { "epoch": 551.8421052631579, "grad_norm": 1.1150662899017334, "learning_rate": 0.0001, "loss": 0.0124, "step": 83880 }, { "epoch": 551.9078947368421, "grad_norm": 1.0998282432556152, "learning_rate": 0.0001, "loss": 0.0107, "step": 83890 }, { "epoch": 551.9736842105264, "grad_norm": 1.265975832939148, "learning_rate": 0.0001, "loss": 0.0113, "step": 83900 }, { "epoch": 552.0394736842105, "grad_norm": 1.0744695663452148, "learning_rate": 0.0001, "loss": 0.0105, "step": 83910 }, { "epoch": 552.1052631578947, "grad_norm": 1.1602157354354858, "learning_rate": 0.0001, "loss": 0.011, "step": 83920 }, { "epoch": 552.171052631579, "grad_norm": 1.2619503736495972, "learning_rate": 0.0001, "loss": 0.0114, "step": 83930 }, { "epoch": 552.2368421052631, "grad_norm": 1.3881173133850098, "learning_rate": 0.0001, "loss": 0.0101, "step": 83940 }, { "epoch": 552.3026315789474, "grad_norm": 1.3225895166397095, "learning_rate": 0.0001, "loss": 0.0108, "step": 83950 }, { "epoch": 552.3684210526316, "grad_norm": 1.4558875560760498, "learning_rate": 0.0001, "loss": 0.0109, "step": 83960 }, { "epoch": 552.4342105263158, "grad_norm": 0.8859315514564514, "learning_rate": 0.0001, "loss": 0.0126, "step": 83970 }, { "epoch": 552.5, "grad_norm": 0.9375401735305786, "learning_rate": 0.0001, "loss": 0.0098, "step": 83980 }, { "epoch": 552.5657894736842, "grad_norm": 1.0013401508331299, "learning_rate": 0.0001, "loss": 0.0109, "step": 83990 }, { "epoch": 552.6315789473684, "grad_norm": 0.8222454190254211, "learning_rate": 0.0001, "loss": 0.0111, "step": 84000 }, { "epoch": 552.6973684210526, "grad_norm": 1.0796682834625244, "learning_rate": 0.0001, "loss": 0.011, "step": 84010 }, { "epoch": 552.7631578947369, "grad_norm": 1.3614517450332642, "learning_rate": 0.0001, "loss": 0.013, "step": 84020 }, { "epoch": 552.828947368421, "grad_norm": 1.4528874158859253, "learning_rate": 0.0001, "loss": 0.0112, "step": 84030 }, { "epoch": 552.8947368421053, "grad_norm": 1.2762951850891113, "learning_rate": 0.0001, "loss": 0.0121, "step": 84040 }, { "epoch": 552.9605263157895, "grad_norm": 1.0010347366333008, "learning_rate": 0.0001, "loss": 0.0111, "step": 84050 }, { "epoch": 553.0263157894736, "grad_norm": 1.4103955030441284, "learning_rate": 0.0001, "loss": 0.0109, "step": 84060 }, { "epoch": 553.0921052631579, "grad_norm": 1.2792900800704956, "learning_rate": 0.0001, "loss": 0.0125, "step": 84070 }, { "epoch": 553.1578947368421, "grad_norm": 1.1283965110778809, "learning_rate": 0.0001, "loss": 0.0091, "step": 84080 }, { "epoch": 553.2236842105264, "grad_norm": 1.2378798723220825, "learning_rate": 0.0001, "loss": 0.0115, "step": 84090 }, { "epoch": 553.2894736842105, "grad_norm": 1.0513310432434082, "learning_rate": 0.0001, "loss": 0.0116, "step": 84100 }, { "epoch": 553.3552631578947, "grad_norm": 1.0277812480926514, "learning_rate": 0.0001, "loss": 0.0118, "step": 84110 }, { "epoch": 553.421052631579, "grad_norm": 1.211808681488037, "learning_rate": 0.0001, "loss": 0.0129, "step": 84120 }, { "epoch": 553.4868421052631, "grad_norm": 1.0594956874847412, "learning_rate": 0.0001, "loss": 0.0105, "step": 84130 }, { "epoch": 553.5526315789474, "grad_norm": 1.1370322704315186, "learning_rate": 0.0001, "loss": 0.0114, "step": 84140 }, { "epoch": 553.6184210526316, "grad_norm": 0.897812008857727, "learning_rate": 0.0001, "loss": 0.0102, "step": 84150 }, { "epoch": 553.6842105263158, "grad_norm": 0.9729959964752197, "learning_rate": 0.0001, "loss": 0.0107, "step": 84160 }, { "epoch": 553.75, "grad_norm": 1.1582231521606445, "learning_rate": 0.0001, "loss": 0.0119, "step": 84170 }, { "epoch": 553.8157894736842, "grad_norm": 1.115011215209961, "learning_rate": 0.0001, "loss": 0.0114, "step": 84180 }, { "epoch": 553.8815789473684, "grad_norm": 1.1389967203140259, "learning_rate": 0.0001, "loss": 0.0107, "step": 84190 }, { "epoch": 553.9473684210526, "grad_norm": 1.1534051895141602, "learning_rate": 0.0001, "loss": 0.0111, "step": 84200 }, { "epoch": 554.0131578947369, "grad_norm": 1.1648868322372437, "learning_rate": 0.0001, "loss": 0.012, "step": 84210 }, { "epoch": 554.078947368421, "grad_norm": 0.9899840354919434, "learning_rate": 0.0001, "loss": 0.0111, "step": 84220 }, { "epoch": 554.1447368421053, "grad_norm": 1.097878336906433, "learning_rate": 0.0001, "loss": 0.0113, "step": 84230 }, { "epoch": 554.2105263157895, "grad_norm": 1.1488139629364014, "learning_rate": 0.0001, "loss": 0.0134, "step": 84240 }, { "epoch": 554.2763157894736, "grad_norm": 1.2230936288833618, "learning_rate": 0.0001, "loss": 0.0118, "step": 84250 }, { "epoch": 554.3421052631579, "grad_norm": 1.2412559986114502, "learning_rate": 0.0001, "loss": 0.012, "step": 84260 }, { "epoch": 554.4078947368421, "grad_norm": 1.044669270515442, "learning_rate": 0.0001, "loss": 0.0131, "step": 84270 }, { "epoch": 554.4736842105264, "grad_norm": 0.9399918913841248, "learning_rate": 0.0001, "loss": 0.0104, "step": 84280 }, { "epoch": 554.5394736842105, "grad_norm": 0.8007739782333374, "learning_rate": 0.0001, "loss": 0.0136, "step": 84290 }, { "epoch": 554.6052631578947, "grad_norm": 0.68852698802948, "learning_rate": 0.0001, "loss": 0.0125, "step": 84300 }, { "epoch": 554.671052631579, "grad_norm": 1.3844374418258667, "learning_rate": 0.0001, "loss": 0.0126, "step": 84310 }, { "epoch": 554.7368421052631, "grad_norm": 0.897927463054657, "learning_rate": 0.0001, "loss": 0.0139, "step": 84320 }, { "epoch": 554.8026315789474, "grad_norm": 1.1353065967559814, "learning_rate": 0.0001, "loss": 0.013, "step": 84330 }, { "epoch": 554.8684210526316, "grad_norm": 1.1590604782104492, "learning_rate": 0.0001, "loss": 0.012, "step": 84340 }, { "epoch": 554.9342105263158, "grad_norm": 1.2227908372879028, "learning_rate": 0.0001, "loss": 0.0146, "step": 84350 }, { "epoch": 555.0, "grad_norm": 1.2319128513336182, "learning_rate": 0.0001, "loss": 0.0127, "step": 84360 }, { "epoch": 555.0657894736842, "grad_norm": 1.0367213487625122, "learning_rate": 0.0001, "loss": 0.0131, "step": 84370 }, { "epoch": 555.1315789473684, "grad_norm": 1.069209098815918, "learning_rate": 0.0001, "loss": 0.013, "step": 84380 }, { "epoch": 555.1973684210526, "grad_norm": 1.0245308876037598, "learning_rate": 0.0001, "loss": 0.0125, "step": 84390 }, { "epoch": 555.2631578947369, "grad_norm": 1.2172003984451294, "learning_rate": 0.0001, "loss": 0.011, "step": 84400 }, { "epoch": 555.328947368421, "grad_norm": 1.0955407619476318, "learning_rate": 0.0001, "loss": 0.0116, "step": 84410 }, { "epoch": 555.3947368421053, "grad_norm": 1.0502513647079468, "learning_rate": 0.0001, "loss": 0.0114, "step": 84420 }, { "epoch": 555.4605263157895, "grad_norm": 1.2674477100372314, "learning_rate": 0.0001, "loss": 0.0107, "step": 84430 }, { "epoch": 555.5263157894736, "grad_norm": 1.4851372241973877, "learning_rate": 0.0001, "loss": 0.012, "step": 84440 }, { "epoch": 555.5921052631579, "grad_norm": 1.4355062246322632, "learning_rate": 0.0001, "loss": 0.0119, "step": 84450 }, { "epoch": 555.6578947368421, "grad_norm": 1.2910557985305786, "learning_rate": 0.0001, "loss": 0.0135, "step": 84460 }, { "epoch": 555.7236842105264, "grad_norm": 0.9084872603416443, "learning_rate": 0.0001, "loss": 0.0117, "step": 84470 }, { "epoch": 555.7894736842105, "grad_norm": 1.1866637468338013, "learning_rate": 0.0001, "loss": 0.012, "step": 84480 }, { "epoch": 555.8552631578947, "grad_norm": 1.1410084962844849, "learning_rate": 0.0001, "loss": 0.0105, "step": 84490 }, { "epoch": 555.921052631579, "grad_norm": 1.256248116493225, "learning_rate": 0.0001, "loss": 0.0136, "step": 84500 }, { "epoch": 555.9868421052631, "grad_norm": 1.1614428758621216, "learning_rate": 0.0001, "loss": 0.0123, "step": 84510 }, { "epoch": 556.0526315789474, "grad_norm": 1.1941181421279907, "learning_rate": 0.0001, "loss": 0.0113, "step": 84520 }, { "epoch": 556.1184210526316, "grad_norm": 1.1115273237228394, "learning_rate": 0.0001, "loss": 0.0128, "step": 84530 }, { "epoch": 556.1842105263158, "grad_norm": 0.8539004325866699, "learning_rate": 0.0001, "loss": 0.0127, "step": 84540 }, { "epoch": 556.25, "grad_norm": 1.1488637924194336, "learning_rate": 0.0001, "loss": 0.0121, "step": 84550 }, { "epoch": 556.3157894736842, "grad_norm": 1.1279138326644897, "learning_rate": 0.0001, "loss": 0.0115, "step": 84560 }, { "epoch": 556.3815789473684, "grad_norm": 1.3774553537368774, "learning_rate": 0.0001, "loss": 0.0128, "step": 84570 }, { "epoch": 556.4473684210526, "grad_norm": 1.3250354528427124, "learning_rate": 0.0001, "loss": 0.012, "step": 84580 }, { "epoch": 556.5131578947369, "grad_norm": 1.1280083656311035, "learning_rate": 0.0001, "loss": 0.0119, "step": 84590 }, { "epoch": 556.578947368421, "grad_norm": 0.9719551205635071, "learning_rate": 0.0001, "loss": 0.0124, "step": 84600 }, { "epoch": 556.6447368421053, "grad_norm": 0.8734564781188965, "learning_rate": 0.0001, "loss": 0.0117, "step": 84610 }, { "epoch": 556.7105263157895, "grad_norm": 1.252884864807129, "learning_rate": 0.0001, "loss": 0.0114, "step": 84620 }, { "epoch": 556.7763157894736, "grad_norm": 1.2169554233551025, "learning_rate": 0.0001, "loss": 0.012, "step": 84630 }, { "epoch": 556.8421052631579, "grad_norm": 1.3537654876708984, "learning_rate": 0.0001, "loss": 0.0106, "step": 84640 }, { "epoch": 556.9078947368421, "grad_norm": 1.1690727472305298, "learning_rate": 0.0001, "loss": 0.0107, "step": 84650 }, { "epoch": 556.9736842105264, "grad_norm": 1.4341109991073608, "learning_rate": 0.0001, "loss": 0.0113, "step": 84660 }, { "epoch": 557.0394736842105, "grad_norm": 1.1728025674819946, "learning_rate": 0.0001, "loss": 0.0101, "step": 84670 }, { "epoch": 557.1052631578947, "grad_norm": 1.1351969242095947, "learning_rate": 0.0001, "loss": 0.0111, "step": 84680 }, { "epoch": 557.171052631579, "grad_norm": 1.049938440322876, "learning_rate": 0.0001, "loss": 0.0097, "step": 84690 }, { "epoch": 557.2368421052631, "grad_norm": 1.0241278409957886, "learning_rate": 0.0001, "loss": 0.0107, "step": 84700 }, { "epoch": 557.3026315789474, "grad_norm": 1.357133150100708, "learning_rate": 0.0001, "loss": 0.0106, "step": 84710 }, { "epoch": 557.3684210526316, "grad_norm": 1.2477604150772095, "learning_rate": 0.0001, "loss": 0.0127, "step": 84720 }, { "epoch": 557.4342105263158, "grad_norm": 1.1618016958236694, "learning_rate": 0.0001, "loss": 0.0134, "step": 84730 }, { "epoch": 557.5, "grad_norm": 1.4906044006347656, "learning_rate": 0.0001, "loss": 0.0123, "step": 84740 }, { "epoch": 557.5657894736842, "grad_norm": 1.6575461626052856, "learning_rate": 0.0001, "loss": 0.0108, "step": 84750 }, { "epoch": 557.6315789473684, "grad_norm": 1.5979561805725098, "learning_rate": 0.0001, "loss": 0.01, "step": 84760 }, { "epoch": 557.6973684210526, "grad_norm": 1.49028480052948, "learning_rate": 0.0001, "loss": 0.0104, "step": 84770 }, { "epoch": 557.7631578947369, "grad_norm": 1.476001501083374, "learning_rate": 0.0001, "loss": 0.0119, "step": 84780 }, { "epoch": 557.828947368421, "grad_norm": 1.1101016998291016, "learning_rate": 0.0001, "loss": 0.0108, "step": 84790 }, { "epoch": 557.8947368421053, "grad_norm": 1.2816787958145142, "learning_rate": 0.0001, "loss": 0.0135, "step": 84800 }, { "epoch": 557.9605263157895, "grad_norm": 1.4827154874801636, "learning_rate": 0.0001, "loss": 0.0109, "step": 84810 }, { "epoch": 558.0263157894736, "grad_norm": 1.2179068326950073, "learning_rate": 0.0001, "loss": 0.0101, "step": 84820 }, { "epoch": 558.0921052631579, "grad_norm": 1.0725390911102295, "learning_rate": 0.0001, "loss": 0.0093, "step": 84830 }, { "epoch": 558.1578947368421, "grad_norm": 1.1772629022598267, "learning_rate": 0.0001, "loss": 0.0122, "step": 84840 }, { "epoch": 558.2236842105264, "grad_norm": 1.1219836473464966, "learning_rate": 0.0001, "loss": 0.0099, "step": 84850 }, { "epoch": 558.2894736842105, "grad_norm": 1.1598541736602783, "learning_rate": 0.0001, "loss": 0.0126, "step": 84860 }, { "epoch": 558.3552631578947, "grad_norm": 1.2729027271270752, "learning_rate": 0.0001, "loss": 0.009, "step": 84870 }, { "epoch": 558.421052631579, "grad_norm": 1.107276201248169, "learning_rate": 0.0001, "loss": 0.0131, "step": 84880 }, { "epoch": 558.4868421052631, "grad_norm": 0.9644288420677185, "learning_rate": 0.0001, "loss": 0.0102, "step": 84890 }, { "epoch": 558.5526315789474, "grad_norm": 0.8253970742225647, "learning_rate": 0.0001, "loss": 0.0101, "step": 84900 }, { "epoch": 558.6184210526316, "grad_norm": 0.98391193151474, "learning_rate": 0.0001, "loss": 0.0123, "step": 84910 }, { "epoch": 558.6842105263158, "grad_norm": 1.0832492113113403, "learning_rate": 0.0001, "loss": 0.0147, "step": 84920 }, { "epoch": 558.75, "grad_norm": 0.9063038229942322, "learning_rate": 0.0001, "loss": 0.0104, "step": 84930 }, { "epoch": 558.8157894736842, "grad_norm": 1.0386995077133179, "learning_rate": 0.0001, "loss": 0.0106, "step": 84940 }, { "epoch": 558.8815789473684, "grad_norm": 0.7958237528800964, "learning_rate": 0.0001, "loss": 0.0105, "step": 84950 }, { "epoch": 558.9473684210526, "grad_norm": 0.860396683216095, "learning_rate": 0.0001, "loss": 0.0107, "step": 84960 }, { "epoch": 559.0131578947369, "grad_norm": 0.9728363752365112, "learning_rate": 0.0001, "loss": 0.0138, "step": 84970 }, { "epoch": 559.078947368421, "grad_norm": 1.1759837865829468, "learning_rate": 0.0001, "loss": 0.0117, "step": 84980 }, { "epoch": 559.1447368421053, "grad_norm": 1.0870031118392944, "learning_rate": 0.0001, "loss": 0.0118, "step": 84990 }, { "epoch": 559.2105263157895, "grad_norm": 1.1532487869262695, "learning_rate": 0.0001, "loss": 0.0121, "step": 85000 }, { "epoch": 559.2763157894736, "grad_norm": 1.2575640678405762, "learning_rate": 0.0001, "loss": 0.0116, "step": 85010 }, { "epoch": 559.3421052631579, "grad_norm": 1.3786592483520508, "learning_rate": 0.0001, "loss": 0.0133, "step": 85020 }, { "epoch": 559.4078947368421, "grad_norm": 0.8900845050811768, "learning_rate": 0.0001, "loss": 0.0106, "step": 85030 }, { "epoch": 559.4736842105264, "grad_norm": 1.2417876720428467, "learning_rate": 0.0001, "loss": 0.0112, "step": 85040 }, { "epoch": 559.5394736842105, "grad_norm": 1.2613035440444946, "learning_rate": 0.0001, "loss": 0.0108, "step": 85050 }, { "epoch": 559.6052631578947, "grad_norm": 0.8748085498809814, "learning_rate": 0.0001, "loss": 0.011, "step": 85060 }, { "epoch": 559.671052631579, "grad_norm": 1.059360384941101, "learning_rate": 0.0001, "loss": 0.0107, "step": 85070 }, { "epoch": 559.7368421052631, "grad_norm": 0.8595458269119263, "learning_rate": 0.0001, "loss": 0.0101, "step": 85080 }, { "epoch": 559.8026315789474, "grad_norm": 1.0253055095672607, "learning_rate": 0.0001, "loss": 0.0113, "step": 85090 }, { "epoch": 559.8684210526316, "grad_norm": 1.1457302570343018, "learning_rate": 0.0001, "loss": 0.0127, "step": 85100 }, { "epoch": 559.9342105263158, "grad_norm": 1.5385023355484009, "learning_rate": 0.0001, "loss": 0.0107, "step": 85110 }, { "epoch": 560.0, "grad_norm": 1.041575312614441, "learning_rate": 0.0001, "loss": 0.0113, "step": 85120 }, { "epoch": 560.0657894736842, "grad_norm": 0.9644649624824524, "learning_rate": 0.0001, "loss": 0.0141, "step": 85130 }, { "epoch": 560.1315789473684, "grad_norm": 1.3442351818084717, "learning_rate": 0.0001, "loss": 0.0105, "step": 85140 }, { "epoch": 560.1973684210526, "grad_norm": 1.4101388454437256, "learning_rate": 0.0001, "loss": 0.0108, "step": 85150 }, { "epoch": 560.2631578947369, "grad_norm": 1.3312458992004395, "learning_rate": 0.0001, "loss": 0.0102, "step": 85160 }, { "epoch": 560.328947368421, "grad_norm": 1.2617542743682861, "learning_rate": 0.0001, "loss": 0.0109, "step": 85170 }, { "epoch": 560.3947368421053, "grad_norm": 0.9706904888153076, "learning_rate": 0.0001, "loss": 0.0124, "step": 85180 }, { "epoch": 560.4605263157895, "grad_norm": 1.0391241312026978, "learning_rate": 0.0001, "loss": 0.0119, "step": 85190 }, { "epoch": 560.5263157894736, "grad_norm": 0.7928785085678101, "learning_rate": 0.0001, "loss": 0.0108, "step": 85200 }, { "epoch": 560.5921052631579, "grad_norm": 0.8561776280403137, "learning_rate": 0.0001, "loss": 0.012, "step": 85210 }, { "epoch": 560.6578947368421, "grad_norm": 1.097593903541565, "learning_rate": 0.0001, "loss": 0.0113, "step": 85220 }, { "epoch": 560.7236842105264, "grad_norm": 0.9008827805519104, "learning_rate": 0.0001, "loss": 0.011, "step": 85230 }, { "epoch": 560.7894736842105, "grad_norm": 1.3910067081451416, "learning_rate": 0.0001, "loss": 0.0122, "step": 85240 }, { "epoch": 560.8552631578947, "grad_norm": 1.2383750677108765, "learning_rate": 0.0001, "loss": 0.0107, "step": 85250 }, { "epoch": 560.921052631579, "grad_norm": 1.2001198530197144, "learning_rate": 0.0001, "loss": 0.01, "step": 85260 }, { "epoch": 560.9868421052631, "grad_norm": 1.0926671028137207, "learning_rate": 0.0001, "loss": 0.0101, "step": 85270 }, { "epoch": 561.0526315789474, "grad_norm": 1.1885424852371216, "learning_rate": 0.0001, "loss": 0.0107, "step": 85280 }, { "epoch": 561.1184210526316, "grad_norm": 1.3044886589050293, "learning_rate": 0.0001, "loss": 0.0114, "step": 85290 }, { "epoch": 561.1842105263158, "grad_norm": 1.361910104751587, "learning_rate": 0.0001, "loss": 0.0108, "step": 85300 }, { "epoch": 561.25, "grad_norm": 1.2132842540740967, "learning_rate": 0.0001, "loss": 0.0127, "step": 85310 }, { "epoch": 561.3157894736842, "grad_norm": 1.2285622358322144, "learning_rate": 0.0001, "loss": 0.0119, "step": 85320 }, { "epoch": 561.3815789473684, "grad_norm": 1.0628094673156738, "learning_rate": 0.0001, "loss": 0.012, "step": 85330 }, { "epoch": 561.4473684210526, "grad_norm": 1.0431807041168213, "learning_rate": 0.0001, "loss": 0.0111, "step": 85340 }, { "epoch": 561.5131578947369, "grad_norm": 0.8674909472465515, "learning_rate": 0.0001, "loss": 0.0093, "step": 85350 }, { "epoch": 561.578947368421, "grad_norm": 0.8657084107398987, "learning_rate": 0.0001, "loss": 0.01, "step": 85360 }, { "epoch": 561.6447368421053, "grad_norm": 1.0542387962341309, "learning_rate": 0.0001, "loss": 0.0099, "step": 85370 }, { "epoch": 561.7105263157895, "grad_norm": 1.1342475414276123, "learning_rate": 0.0001, "loss": 0.0123, "step": 85380 }, { "epoch": 561.7763157894736, "grad_norm": 1.0846221446990967, "learning_rate": 0.0001, "loss": 0.0111, "step": 85390 }, { "epoch": 561.8421052631579, "grad_norm": 1.0843048095703125, "learning_rate": 0.0001, "loss": 0.0124, "step": 85400 }, { "epoch": 561.9078947368421, "grad_norm": 1.266693115234375, "learning_rate": 0.0001, "loss": 0.0104, "step": 85410 }, { "epoch": 561.9736842105264, "grad_norm": 1.3362053632736206, "learning_rate": 0.0001, "loss": 0.0097, "step": 85420 }, { "epoch": 562.0394736842105, "grad_norm": 1.1382287740707397, "learning_rate": 0.0001, "loss": 0.0114, "step": 85430 }, { "epoch": 562.1052631578947, "grad_norm": 1.0653843879699707, "learning_rate": 0.0001, "loss": 0.0092, "step": 85440 }, { "epoch": 562.171052631579, "grad_norm": 0.9367539882659912, "learning_rate": 0.0001, "loss": 0.0121, "step": 85450 }, { "epoch": 562.2368421052631, "grad_norm": 0.8476194143295288, "learning_rate": 0.0001, "loss": 0.0113, "step": 85460 }, { "epoch": 562.3026315789474, "grad_norm": 1.0958266258239746, "learning_rate": 0.0001, "loss": 0.0097, "step": 85470 }, { "epoch": 562.3684210526316, "grad_norm": 1.2579114437103271, "learning_rate": 0.0001, "loss": 0.0113, "step": 85480 }, { "epoch": 562.4342105263158, "grad_norm": 1.1456224918365479, "learning_rate": 0.0001, "loss": 0.0124, "step": 85490 }, { "epoch": 562.5, "grad_norm": 1.2678587436676025, "learning_rate": 0.0001, "loss": 0.0121, "step": 85500 }, { "epoch": 562.5657894736842, "grad_norm": 1.0048190355300903, "learning_rate": 0.0001, "loss": 0.0113, "step": 85510 }, { "epoch": 562.6315789473684, "grad_norm": 0.7200008630752563, "learning_rate": 0.0001, "loss": 0.0099, "step": 85520 }, { "epoch": 562.6973684210526, "grad_norm": 1.4088075160980225, "learning_rate": 0.0001, "loss": 0.0115, "step": 85530 }, { "epoch": 562.7631578947369, "grad_norm": 1.2613880634307861, "learning_rate": 0.0001, "loss": 0.0109, "step": 85540 }, { "epoch": 562.828947368421, "grad_norm": 1.0894513130187988, "learning_rate": 0.0001, "loss": 0.0115, "step": 85550 }, { "epoch": 562.8947368421053, "grad_norm": 1.0057373046875, "learning_rate": 0.0001, "loss": 0.0114, "step": 85560 }, { "epoch": 562.9605263157895, "grad_norm": 1.1327048540115356, "learning_rate": 0.0001, "loss": 0.0092, "step": 85570 }, { "epoch": 563.0263157894736, "grad_norm": 1.3295555114746094, "learning_rate": 0.0001, "loss": 0.0109, "step": 85580 }, { "epoch": 563.0921052631579, "grad_norm": 1.098968505859375, "learning_rate": 0.0001, "loss": 0.0126, "step": 85590 }, { "epoch": 563.1578947368421, "grad_norm": 1.1151443719863892, "learning_rate": 0.0001, "loss": 0.0108, "step": 85600 }, { "epoch": 563.2236842105264, "grad_norm": 1.261618971824646, "learning_rate": 0.0001, "loss": 0.0105, "step": 85610 }, { "epoch": 563.2894736842105, "grad_norm": 0.6959012150764465, "learning_rate": 0.0001, "loss": 0.011, "step": 85620 }, { "epoch": 563.3552631578947, "grad_norm": 0.9894868731498718, "learning_rate": 0.0001, "loss": 0.0129, "step": 85630 }, { "epoch": 563.421052631579, "grad_norm": 0.8784093260765076, "learning_rate": 0.0001, "loss": 0.0094, "step": 85640 }, { "epoch": 563.4868421052631, "grad_norm": 0.5873738527297974, "learning_rate": 0.0001, "loss": 0.0126, "step": 85650 }, { "epoch": 563.5526315789474, "grad_norm": 0.7651575803756714, "learning_rate": 0.0001, "loss": 0.012, "step": 85660 }, { "epoch": 563.6184210526316, "grad_norm": 0.6802495718002319, "learning_rate": 0.0001, "loss": 0.0105, "step": 85670 }, { "epoch": 563.6842105263158, "grad_norm": 0.948125422000885, "learning_rate": 0.0001, "loss": 0.0098, "step": 85680 }, { "epoch": 563.75, "grad_norm": 1.1198630332946777, "learning_rate": 0.0001, "loss": 0.0128, "step": 85690 }, { "epoch": 563.8157894736842, "grad_norm": 1.2532132863998413, "learning_rate": 0.0001, "loss": 0.0109, "step": 85700 }, { "epoch": 563.8815789473684, "grad_norm": 0.9288182854652405, "learning_rate": 0.0001, "loss": 0.0119, "step": 85710 }, { "epoch": 563.9473684210526, "grad_norm": 1.1498973369598389, "learning_rate": 0.0001, "loss": 0.0111, "step": 85720 }, { "epoch": 564.0131578947369, "grad_norm": 0.9289548397064209, "learning_rate": 0.0001, "loss": 0.0103, "step": 85730 }, { "epoch": 564.078947368421, "grad_norm": 0.6125806570053101, "learning_rate": 0.0001, "loss": 0.0099, "step": 85740 }, { "epoch": 564.1447368421053, "grad_norm": 0.762289822101593, "learning_rate": 0.0001, "loss": 0.0112, "step": 85750 }, { "epoch": 564.2105263157895, "grad_norm": 1.2654229402542114, "learning_rate": 0.0001, "loss": 0.0113, "step": 85760 }, { "epoch": 564.2763157894736, "grad_norm": 1.2169716358184814, "learning_rate": 0.0001, "loss": 0.0102, "step": 85770 }, { "epoch": 564.3421052631579, "grad_norm": 1.0589436292648315, "learning_rate": 0.0001, "loss": 0.0112, "step": 85780 }, { "epoch": 564.4078947368421, "grad_norm": 0.9136533141136169, "learning_rate": 0.0001, "loss": 0.0104, "step": 85790 }, { "epoch": 564.4736842105264, "grad_norm": 0.831851601600647, "learning_rate": 0.0001, "loss": 0.0113, "step": 85800 }, { "epoch": 564.5394736842105, "grad_norm": 0.9365316033363342, "learning_rate": 0.0001, "loss": 0.0115, "step": 85810 }, { "epoch": 564.6052631578947, "grad_norm": 1.4530662298202515, "learning_rate": 0.0001, "loss": 0.011, "step": 85820 }, { "epoch": 564.671052631579, "grad_norm": 1.1239123344421387, "learning_rate": 0.0001, "loss": 0.0118, "step": 85830 }, { "epoch": 564.7368421052631, "grad_norm": 1.2323276996612549, "learning_rate": 0.0001, "loss": 0.0124, "step": 85840 }, { "epoch": 564.8026315789474, "grad_norm": 0.9077661633491516, "learning_rate": 0.0001, "loss": 0.0101, "step": 85850 }, { "epoch": 564.8684210526316, "grad_norm": 1.2481967210769653, "learning_rate": 0.0001, "loss": 0.0132, "step": 85860 }, { "epoch": 564.9342105263158, "grad_norm": 1.2315006256103516, "learning_rate": 0.0001, "loss": 0.0102, "step": 85870 }, { "epoch": 565.0, "grad_norm": 1.2639683485031128, "learning_rate": 0.0001, "loss": 0.0126, "step": 85880 }, { "epoch": 565.0657894736842, "grad_norm": 1.2742642164230347, "learning_rate": 0.0001, "loss": 0.0111, "step": 85890 }, { "epoch": 565.1315789473684, "grad_norm": 1.1369564533233643, "learning_rate": 0.0001, "loss": 0.0118, "step": 85900 }, { "epoch": 565.1973684210526, "grad_norm": 1.4705597162246704, "learning_rate": 0.0001, "loss": 0.0108, "step": 85910 }, { "epoch": 565.2631578947369, "grad_norm": 1.326097846031189, "learning_rate": 0.0001, "loss": 0.0105, "step": 85920 }, { "epoch": 565.328947368421, "grad_norm": 0.9751961827278137, "learning_rate": 0.0001, "loss": 0.0128, "step": 85930 }, { "epoch": 565.3947368421053, "grad_norm": 1.3746706247329712, "learning_rate": 0.0001, "loss": 0.0123, "step": 85940 }, { "epoch": 565.4605263157895, "grad_norm": 1.2249608039855957, "learning_rate": 0.0001, "loss": 0.0139, "step": 85950 }, { "epoch": 565.5263157894736, "grad_norm": 1.0640735626220703, "learning_rate": 0.0001, "loss": 0.0119, "step": 85960 }, { "epoch": 565.5921052631579, "grad_norm": 1.0786769390106201, "learning_rate": 0.0001, "loss": 0.0126, "step": 85970 }, { "epoch": 565.6578947368421, "grad_norm": 1.5025138854980469, "learning_rate": 0.0001, "loss": 0.011, "step": 85980 }, { "epoch": 565.7236842105264, "grad_norm": 1.311964511871338, "learning_rate": 0.0001, "loss": 0.0111, "step": 85990 }, { "epoch": 565.7894736842105, "grad_norm": 1.1438058614730835, "learning_rate": 0.0001, "loss": 0.0118, "step": 86000 }, { "epoch": 565.8552631578947, "grad_norm": 0.921307384967804, "learning_rate": 0.0001, "loss": 0.014, "step": 86010 }, { "epoch": 565.921052631579, "grad_norm": 1.0109132528305054, "learning_rate": 0.0001, "loss": 0.0123, "step": 86020 }, { "epoch": 565.9868421052631, "grad_norm": 1.3036121129989624, "learning_rate": 0.0001, "loss": 0.0115, "step": 86030 }, { "epoch": 566.0526315789474, "grad_norm": 1.1101711988449097, "learning_rate": 0.0001, "loss": 0.01, "step": 86040 }, { "epoch": 566.1184210526316, "grad_norm": 1.3747954368591309, "learning_rate": 0.0001, "loss": 0.01, "step": 86050 }, { "epoch": 566.1842105263158, "grad_norm": 1.133420705795288, "learning_rate": 0.0001, "loss": 0.0125, "step": 86060 }, { "epoch": 566.25, "grad_norm": 1.0541547536849976, "learning_rate": 0.0001, "loss": 0.0114, "step": 86070 }, { "epoch": 566.3157894736842, "grad_norm": 1.0774303674697876, "learning_rate": 0.0001, "loss": 0.0105, "step": 86080 }, { "epoch": 566.3815789473684, "grad_norm": 1.3107317686080933, "learning_rate": 0.0001, "loss": 0.0127, "step": 86090 }, { "epoch": 566.4473684210526, "grad_norm": 0.8362441658973694, "learning_rate": 0.0001, "loss": 0.0114, "step": 86100 }, { "epoch": 566.5131578947369, "grad_norm": 1.1291522979736328, "learning_rate": 0.0001, "loss": 0.0121, "step": 86110 }, { "epoch": 566.578947368421, "grad_norm": 1.062232494354248, "learning_rate": 0.0001, "loss": 0.0117, "step": 86120 }, { "epoch": 566.6447368421053, "grad_norm": 1.1691906452178955, "learning_rate": 0.0001, "loss": 0.012, "step": 86130 }, { "epoch": 566.7105263157895, "grad_norm": 1.1058992147445679, "learning_rate": 0.0001, "loss": 0.0104, "step": 86140 }, { "epoch": 566.7763157894736, "grad_norm": 0.6713123321533203, "learning_rate": 0.0001, "loss": 0.011, "step": 86150 }, { "epoch": 566.8421052631579, "grad_norm": 0.7876428365707397, "learning_rate": 0.0001, "loss": 0.0111, "step": 86160 }, { "epoch": 566.9078947368421, "grad_norm": 1.0768256187438965, "learning_rate": 0.0001, "loss": 0.012, "step": 86170 }, { "epoch": 566.9736842105264, "grad_norm": 1.6893032789230347, "learning_rate": 0.0001, "loss": 0.011, "step": 86180 }, { "epoch": 567.0394736842105, "grad_norm": 0.7960553765296936, "learning_rate": 0.0001, "loss": 0.0105, "step": 86190 }, { "epoch": 567.1052631578947, "grad_norm": 0.6956373453140259, "learning_rate": 0.0001, "loss": 0.0124, "step": 86200 }, { "epoch": 567.171052631579, "grad_norm": 1.1536741256713867, "learning_rate": 0.0001, "loss": 0.0132, "step": 86210 }, { "epoch": 567.2368421052631, "grad_norm": 0.8399643301963806, "learning_rate": 0.0001, "loss": 0.0128, "step": 86220 }, { "epoch": 567.3026315789474, "grad_norm": 0.8098105192184448, "learning_rate": 0.0001, "loss": 0.012, "step": 86230 }, { "epoch": 567.3684210526316, "grad_norm": 0.8427768349647522, "learning_rate": 0.0001, "loss": 0.0091, "step": 86240 }, { "epoch": 567.4342105263158, "grad_norm": 0.9141896963119507, "learning_rate": 0.0001, "loss": 0.011, "step": 86250 }, { "epoch": 567.5, "grad_norm": 1.6003632545471191, "learning_rate": 0.0001, "loss": 0.01, "step": 86260 }, { "epoch": 567.5657894736842, "grad_norm": 1.1154911518096924, "learning_rate": 0.0001, "loss": 0.0134, "step": 86270 }, { "epoch": 567.6315789473684, "grad_norm": 1.1013606786727905, "learning_rate": 0.0001, "loss": 0.0113, "step": 86280 }, { "epoch": 567.6973684210526, "grad_norm": 1.0994194746017456, "learning_rate": 0.0001, "loss": 0.0101, "step": 86290 }, { "epoch": 567.7631578947369, "grad_norm": 0.9053112864494324, "learning_rate": 0.0001, "loss": 0.0115, "step": 86300 }, { "epoch": 567.828947368421, "grad_norm": 1.4429203271865845, "learning_rate": 0.0001, "loss": 0.0106, "step": 86310 }, { "epoch": 567.8947368421053, "grad_norm": 1.0973800420761108, "learning_rate": 0.0001, "loss": 0.0113, "step": 86320 }, { "epoch": 567.9605263157895, "grad_norm": 1.2074874639511108, "learning_rate": 0.0001, "loss": 0.0112, "step": 86330 }, { "epoch": 568.0263157894736, "grad_norm": 1.1671373844146729, "learning_rate": 0.0001, "loss": 0.0108, "step": 86340 }, { "epoch": 568.0921052631579, "grad_norm": 1.0695538520812988, "learning_rate": 0.0001, "loss": 0.0105, "step": 86350 }, { "epoch": 568.1578947368421, "grad_norm": 1.335986852645874, "learning_rate": 0.0001, "loss": 0.0112, "step": 86360 }, { "epoch": 568.2236842105264, "grad_norm": 1.4699814319610596, "learning_rate": 0.0001, "loss": 0.0102, "step": 86370 }, { "epoch": 568.2894736842105, "grad_norm": 1.071112871170044, "learning_rate": 0.0001, "loss": 0.0106, "step": 86380 }, { "epoch": 568.3552631578947, "grad_norm": 1.1350902318954468, "learning_rate": 0.0001, "loss": 0.0118, "step": 86390 }, { "epoch": 568.421052631579, "grad_norm": 1.3547534942626953, "learning_rate": 0.0001, "loss": 0.0118, "step": 86400 }, { "epoch": 568.4868421052631, "grad_norm": 1.5289572477340698, "learning_rate": 0.0001, "loss": 0.013, "step": 86410 }, { "epoch": 568.5526315789474, "grad_norm": 1.0102062225341797, "learning_rate": 0.0001, "loss": 0.0133, "step": 86420 }, { "epoch": 568.6184210526316, "grad_norm": 0.8501527309417725, "learning_rate": 0.0001, "loss": 0.0117, "step": 86430 }, { "epoch": 568.6842105263158, "grad_norm": 0.9667913317680359, "learning_rate": 0.0001, "loss": 0.0093, "step": 86440 }, { "epoch": 568.75, "grad_norm": 1.1724212169647217, "learning_rate": 0.0001, "loss": 0.0106, "step": 86450 }, { "epoch": 568.8157894736842, "grad_norm": 1.0859756469726562, "learning_rate": 0.0001, "loss": 0.0122, "step": 86460 }, { "epoch": 568.8815789473684, "grad_norm": 1.2196053266525269, "learning_rate": 0.0001, "loss": 0.0126, "step": 86470 }, { "epoch": 568.9473684210526, "grad_norm": 0.9872120022773743, "learning_rate": 0.0001, "loss": 0.0105, "step": 86480 }, { "epoch": 569.0131578947369, "grad_norm": 1.1090824604034424, "learning_rate": 0.0001, "loss": 0.0108, "step": 86490 }, { "epoch": 569.078947368421, "grad_norm": 1.0156619548797607, "learning_rate": 0.0001, "loss": 0.0094, "step": 86500 }, { "epoch": 569.1447368421053, "grad_norm": 0.6294759511947632, "learning_rate": 0.0001, "loss": 0.0115, "step": 86510 }, { "epoch": 569.2105263157895, "grad_norm": 0.8278417587280273, "learning_rate": 0.0001, "loss": 0.0112, "step": 86520 }, { "epoch": 569.2763157894736, "grad_norm": 0.8335527777671814, "learning_rate": 0.0001, "loss": 0.0103, "step": 86530 }, { "epoch": 569.3421052631579, "grad_norm": 0.728941798210144, "learning_rate": 0.0001, "loss": 0.0106, "step": 86540 }, { "epoch": 569.4078947368421, "grad_norm": 1.1835122108459473, "learning_rate": 0.0001, "loss": 0.011, "step": 86550 }, { "epoch": 569.4736842105264, "grad_norm": 1.3987102508544922, "learning_rate": 0.0001, "loss": 0.0124, "step": 86560 }, { "epoch": 569.5394736842105, "grad_norm": 1.288018822669983, "learning_rate": 0.0001, "loss": 0.0135, "step": 86570 }, { "epoch": 569.6052631578947, "grad_norm": 1.1396912336349487, "learning_rate": 0.0001, "loss": 0.0104, "step": 86580 }, { "epoch": 569.671052631579, "grad_norm": 0.8793830871582031, "learning_rate": 0.0001, "loss": 0.0121, "step": 86590 }, { "epoch": 569.7368421052631, "grad_norm": 0.8946493268013, "learning_rate": 0.0001, "loss": 0.0099, "step": 86600 }, { "epoch": 569.8026315789474, "grad_norm": 0.8746724724769592, "learning_rate": 0.0001, "loss": 0.0116, "step": 86610 }, { "epoch": 569.8684210526316, "grad_norm": 0.7413333654403687, "learning_rate": 0.0001, "loss": 0.0104, "step": 86620 }, { "epoch": 569.9342105263158, "grad_norm": 1.0053696632385254, "learning_rate": 0.0001, "loss": 0.0112, "step": 86630 }, { "epoch": 570.0, "grad_norm": 1.109494924545288, "learning_rate": 0.0001, "loss": 0.0117, "step": 86640 }, { "epoch": 570.0657894736842, "grad_norm": 1.0057399272918701, "learning_rate": 0.0001, "loss": 0.0096, "step": 86650 }, { "epoch": 570.1315789473684, "grad_norm": 0.9789668321609497, "learning_rate": 0.0001, "loss": 0.0125, "step": 86660 }, { "epoch": 570.1973684210526, "grad_norm": 1.0310949087142944, "learning_rate": 0.0001, "loss": 0.0101, "step": 86670 }, { "epoch": 570.2631578947369, "grad_norm": 0.9389269351959229, "learning_rate": 0.0001, "loss": 0.0101, "step": 86680 }, { "epoch": 570.328947368421, "grad_norm": 0.8357144594192505, "learning_rate": 0.0001, "loss": 0.0138, "step": 86690 }, { "epoch": 570.3947368421053, "grad_norm": 0.850374698638916, "learning_rate": 0.0001, "loss": 0.0122, "step": 86700 }, { "epoch": 570.4605263157895, "grad_norm": 0.8948403596878052, "learning_rate": 0.0001, "loss": 0.0112, "step": 86710 }, { "epoch": 570.5263157894736, "grad_norm": 1.1358330249786377, "learning_rate": 0.0001, "loss": 0.0114, "step": 86720 }, { "epoch": 570.5921052631579, "grad_norm": 1.393713355064392, "learning_rate": 0.0001, "loss": 0.0111, "step": 86730 }, { "epoch": 570.6578947368421, "grad_norm": 1.0343717336654663, "learning_rate": 0.0001, "loss": 0.0115, "step": 86740 }, { "epoch": 570.7236842105264, "grad_norm": 0.8011754751205444, "learning_rate": 0.0001, "loss": 0.0118, "step": 86750 }, { "epoch": 570.7894736842105, "grad_norm": 1.0685820579528809, "learning_rate": 0.0001, "loss": 0.0107, "step": 86760 }, { "epoch": 570.8552631578947, "grad_norm": 1.2427990436553955, "learning_rate": 0.0001, "loss": 0.0122, "step": 86770 }, { "epoch": 570.921052631579, "grad_norm": 1.2266359329223633, "learning_rate": 0.0001, "loss": 0.0096, "step": 86780 }, { "epoch": 570.9868421052631, "grad_norm": 1.432370662689209, "learning_rate": 0.0001, "loss": 0.0117, "step": 86790 }, { "epoch": 571.0526315789474, "grad_norm": 1.2090051174163818, "learning_rate": 0.0001, "loss": 0.0116, "step": 86800 }, { "epoch": 571.1184210526316, "grad_norm": 0.8846063017845154, "learning_rate": 0.0001, "loss": 0.0113, "step": 86810 }, { "epoch": 571.1842105263158, "grad_norm": 1.1082240343093872, "learning_rate": 0.0001, "loss": 0.0111, "step": 86820 }, { "epoch": 571.25, "grad_norm": 1.0534160137176514, "learning_rate": 0.0001, "loss": 0.0115, "step": 86830 }, { "epoch": 571.3157894736842, "grad_norm": 1.1436423063278198, "learning_rate": 0.0001, "loss": 0.0117, "step": 86840 }, { "epoch": 571.3815789473684, "grad_norm": 0.8122831583023071, "learning_rate": 0.0001, "loss": 0.0098, "step": 86850 }, { "epoch": 571.4473684210526, "grad_norm": 1.0820256471633911, "learning_rate": 0.0001, "loss": 0.0094, "step": 86860 }, { "epoch": 571.5131578947369, "grad_norm": 0.848965585231781, "learning_rate": 0.0001, "loss": 0.0112, "step": 86870 }, { "epoch": 571.578947368421, "grad_norm": 1.2625044584274292, "learning_rate": 0.0001, "loss": 0.0105, "step": 86880 }, { "epoch": 571.6447368421053, "grad_norm": 1.3473550081253052, "learning_rate": 0.0001, "loss": 0.0098, "step": 86890 }, { "epoch": 571.7105263157895, "grad_norm": 1.336229920387268, "learning_rate": 0.0001, "loss": 0.0112, "step": 86900 }, { "epoch": 571.7763157894736, "grad_norm": 0.9691115617752075, "learning_rate": 0.0001, "loss": 0.0114, "step": 86910 }, { "epoch": 571.8421052631579, "grad_norm": 1.2057015895843506, "learning_rate": 0.0001, "loss": 0.0107, "step": 86920 }, { "epoch": 571.9078947368421, "grad_norm": 1.0640344619750977, "learning_rate": 0.0001, "loss": 0.0108, "step": 86930 }, { "epoch": 571.9736842105264, "grad_norm": 1.1055980920791626, "learning_rate": 0.0001, "loss": 0.0103, "step": 86940 }, { "epoch": 572.0394736842105, "grad_norm": 0.6833211779594421, "learning_rate": 0.0001, "loss": 0.013, "step": 86950 }, { "epoch": 572.1052631578947, "grad_norm": 1.1310468912124634, "learning_rate": 0.0001, "loss": 0.0091, "step": 86960 }, { "epoch": 572.171052631579, "grad_norm": 1.280174970626831, "learning_rate": 0.0001, "loss": 0.011, "step": 86970 }, { "epoch": 572.2368421052631, "grad_norm": 1.0595585107803345, "learning_rate": 0.0001, "loss": 0.0109, "step": 86980 }, { "epoch": 572.3026315789474, "grad_norm": 1.1463285684585571, "learning_rate": 0.0001, "loss": 0.0119, "step": 86990 }, { "epoch": 572.3684210526316, "grad_norm": 1.0951709747314453, "learning_rate": 0.0001, "loss": 0.0096, "step": 87000 }, { "epoch": 572.4342105263158, "grad_norm": 1.1616261005401611, "learning_rate": 0.0001, "loss": 0.0121, "step": 87010 }, { "epoch": 572.5, "grad_norm": 1.0127562284469604, "learning_rate": 0.0001, "loss": 0.0105, "step": 87020 }, { "epoch": 572.5657894736842, "grad_norm": 1.3694489002227783, "learning_rate": 0.0001, "loss": 0.0101, "step": 87030 }, { "epoch": 572.6315789473684, "grad_norm": 1.5818332433700562, "learning_rate": 0.0001, "loss": 0.0118, "step": 87040 }, { "epoch": 572.6973684210526, "grad_norm": 1.6021660566329956, "learning_rate": 0.0001, "loss": 0.0127, "step": 87050 }, { "epoch": 572.7631578947369, "grad_norm": 1.4695814847946167, "learning_rate": 0.0001, "loss": 0.0097, "step": 87060 }, { "epoch": 572.828947368421, "grad_norm": 1.1281092166900635, "learning_rate": 0.0001, "loss": 0.0115, "step": 87070 }, { "epoch": 572.8947368421053, "grad_norm": 1.168941617012024, "learning_rate": 0.0001, "loss": 0.0109, "step": 87080 }, { "epoch": 572.9605263157895, "grad_norm": 1.1322778463363647, "learning_rate": 0.0001, "loss": 0.0104, "step": 87090 }, { "epoch": 573.0263157894736, "grad_norm": 1.4014652967453003, "learning_rate": 0.0001, "loss": 0.0112, "step": 87100 }, { "epoch": 573.0921052631579, "grad_norm": 1.2505130767822266, "learning_rate": 0.0001, "loss": 0.0119, "step": 87110 }, { "epoch": 573.1578947368421, "grad_norm": 1.029102087020874, "learning_rate": 0.0001, "loss": 0.0099, "step": 87120 }, { "epoch": 573.2236842105264, "grad_norm": 1.2195322513580322, "learning_rate": 0.0001, "loss": 0.012, "step": 87130 }, { "epoch": 573.2894736842105, "grad_norm": 1.3658270835876465, "learning_rate": 0.0001, "loss": 0.011, "step": 87140 }, { "epoch": 573.3552631578947, "grad_norm": 1.0435492992401123, "learning_rate": 0.0001, "loss": 0.0097, "step": 87150 }, { "epoch": 573.421052631579, "grad_norm": 1.1018818616867065, "learning_rate": 0.0001, "loss": 0.0089, "step": 87160 }, { "epoch": 573.4868421052631, "grad_norm": 0.9364510774612427, "learning_rate": 0.0001, "loss": 0.0094, "step": 87170 }, { "epoch": 573.5526315789474, "grad_norm": 1.258358359336853, "learning_rate": 0.0001, "loss": 0.0144, "step": 87180 }, { "epoch": 573.6184210526316, "grad_norm": 1.2375657558441162, "learning_rate": 0.0001, "loss": 0.0105, "step": 87190 }, { "epoch": 573.6842105263158, "grad_norm": 1.2008768320083618, "learning_rate": 0.0001, "loss": 0.0098, "step": 87200 }, { "epoch": 573.75, "grad_norm": 0.818011999130249, "learning_rate": 0.0001, "loss": 0.0113, "step": 87210 }, { "epoch": 573.8157894736842, "grad_norm": 1.2211856842041016, "learning_rate": 0.0001, "loss": 0.0122, "step": 87220 }, { "epoch": 573.8815789473684, "grad_norm": 1.0101572275161743, "learning_rate": 0.0001, "loss": 0.0113, "step": 87230 }, { "epoch": 573.9473684210526, "grad_norm": 1.2342889308929443, "learning_rate": 0.0001, "loss": 0.0106, "step": 87240 }, { "epoch": 574.0131578947369, "grad_norm": 0.9465478658676147, "learning_rate": 0.0001, "loss": 0.011, "step": 87250 }, { "epoch": 574.078947368421, "grad_norm": 0.9019796848297119, "learning_rate": 0.0001, "loss": 0.0102, "step": 87260 }, { "epoch": 574.1447368421053, "grad_norm": 1.0364303588867188, "learning_rate": 0.0001, "loss": 0.0089, "step": 87270 }, { "epoch": 574.2105263157895, "grad_norm": 1.4639605283737183, "learning_rate": 0.0001, "loss": 0.012, "step": 87280 }, { "epoch": 574.2763157894736, "grad_norm": 1.2540547847747803, "learning_rate": 0.0001, "loss": 0.0129, "step": 87290 }, { "epoch": 574.3421052631579, "grad_norm": 1.2065496444702148, "learning_rate": 0.0001, "loss": 0.0098, "step": 87300 }, { "epoch": 574.4078947368421, "grad_norm": 1.3892549276351929, "learning_rate": 0.0001, "loss": 0.0129, "step": 87310 }, { "epoch": 574.4736842105264, "grad_norm": 1.1445990800857544, "learning_rate": 0.0001, "loss": 0.0109, "step": 87320 }, { "epoch": 574.5394736842105, "grad_norm": 1.227105975151062, "learning_rate": 0.0001, "loss": 0.0111, "step": 87330 }, { "epoch": 574.6052631578947, "grad_norm": 1.262370228767395, "learning_rate": 0.0001, "loss": 0.0123, "step": 87340 }, { "epoch": 574.671052631579, "grad_norm": 1.0574274063110352, "learning_rate": 0.0001, "loss": 0.0108, "step": 87350 }, { "epoch": 574.7368421052631, "grad_norm": 0.9377652406692505, "learning_rate": 0.0001, "loss": 0.0095, "step": 87360 }, { "epoch": 574.8026315789474, "grad_norm": 0.9124358296394348, "learning_rate": 0.0001, "loss": 0.0097, "step": 87370 }, { "epoch": 574.8684210526316, "grad_norm": 1.0424607992172241, "learning_rate": 0.0001, "loss": 0.0115, "step": 87380 }, { "epoch": 574.9342105263158, "grad_norm": 1.2752419710159302, "learning_rate": 0.0001, "loss": 0.0128, "step": 87390 }, { "epoch": 575.0, "grad_norm": 1.1712169647216797, "learning_rate": 0.0001, "loss": 0.009, "step": 87400 }, { "epoch": 575.0657894736842, "grad_norm": 1.0214799642562866, "learning_rate": 0.0001, "loss": 0.0095, "step": 87410 }, { "epoch": 575.1315789473684, "grad_norm": 0.9856082797050476, "learning_rate": 0.0001, "loss": 0.0116, "step": 87420 }, { "epoch": 575.1973684210526, "grad_norm": 1.4361339807510376, "learning_rate": 0.0001, "loss": 0.0119, "step": 87430 }, { "epoch": 575.2631578947369, "grad_norm": 1.2261438369750977, "learning_rate": 0.0001, "loss": 0.0111, "step": 87440 }, { "epoch": 575.328947368421, "grad_norm": 1.267825722694397, "learning_rate": 0.0001, "loss": 0.011, "step": 87450 }, { "epoch": 575.3947368421053, "grad_norm": 1.1174856424331665, "learning_rate": 0.0001, "loss": 0.0102, "step": 87460 }, { "epoch": 575.4605263157895, "grad_norm": 1.3079813718795776, "learning_rate": 0.0001, "loss": 0.0106, "step": 87470 }, { "epoch": 575.5263157894736, "grad_norm": 1.29449462890625, "learning_rate": 0.0001, "loss": 0.0115, "step": 87480 }, { "epoch": 575.5921052631579, "grad_norm": 1.4480912685394287, "learning_rate": 0.0001, "loss": 0.0094, "step": 87490 }, { "epoch": 575.6578947368421, "grad_norm": 1.2274054288864136, "learning_rate": 0.0001, "loss": 0.0104, "step": 87500 }, { "epoch": 575.7236842105264, "grad_norm": 1.114770770072937, "learning_rate": 0.0001, "loss": 0.0093, "step": 87510 }, { "epoch": 575.7894736842105, "grad_norm": 1.2212456464767456, "learning_rate": 0.0001, "loss": 0.011, "step": 87520 }, { "epoch": 575.8552631578947, "grad_norm": 0.8942835927009583, "learning_rate": 0.0001, "loss": 0.0116, "step": 87530 }, { "epoch": 575.921052631579, "grad_norm": 1.293633222579956, "learning_rate": 0.0001, "loss": 0.0114, "step": 87540 }, { "epoch": 575.9868421052631, "grad_norm": 1.2598819732666016, "learning_rate": 0.0001, "loss": 0.0123, "step": 87550 }, { "epoch": 576.0526315789474, "grad_norm": 1.32999849319458, "learning_rate": 0.0001, "loss": 0.011, "step": 87560 }, { "epoch": 576.1184210526316, "grad_norm": 1.2802047729492188, "learning_rate": 0.0001, "loss": 0.0104, "step": 87570 }, { "epoch": 576.1842105263158, "grad_norm": 1.7709383964538574, "learning_rate": 0.0001, "loss": 0.0118, "step": 87580 }, { "epoch": 576.25, "grad_norm": 1.3085800409317017, "learning_rate": 0.0001, "loss": 0.0113, "step": 87590 }, { "epoch": 576.3157894736842, "grad_norm": 1.2775263786315918, "learning_rate": 0.0001, "loss": 0.0106, "step": 87600 }, { "epoch": 576.3815789473684, "grad_norm": 1.5602463483810425, "learning_rate": 0.0001, "loss": 0.0106, "step": 87610 }, { "epoch": 576.4473684210526, "grad_norm": 1.5633126497268677, "learning_rate": 0.0001, "loss": 0.01, "step": 87620 }, { "epoch": 576.5131578947369, "grad_norm": 1.0983535051345825, "learning_rate": 0.0001, "loss": 0.0104, "step": 87630 }, { "epoch": 576.578947368421, "grad_norm": 1.0312081575393677, "learning_rate": 0.0001, "loss": 0.0136, "step": 87640 }, { "epoch": 576.6447368421053, "grad_norm": 1.3446412086486816, "learning_rate": 0.0001, "loss": 0.0107, "step": 87650 }, { "epoch": 576.7105263157895, "grad_norm": 1.1297847032546997, "learning_rate": 0.0001, "loss": 0.0103, "step": 87660 }, { "epoch": 576.7763157894736, "grad_norm": 1.5967837572097778, "learning_rate": 0.0001, "loss": 0.0095, "step": 87670 }, { "epoch": 576.8421052631579, "grad_norm": 1.1126961708068848, "learning_rate": 0.0001, "loss": 0.0109, "step": 87680 }, { "epoch": 576.9078947368421, "grad_norm": 1.0181384086608887, "learning_rate": 0.0001, "loss": 0.0113, "step": 87690 }, { "epoch": 576.9736842105264, "grad_norm": 0.9504018425941467, "learning_rate": 0.0001, "loss": 0.0116, "step": 87700 }, { "epoch": 577.0394736842105, "grad_norm": 1.2466793060302734, "learning_rate": 0.0001, "loss": 0.0102, "step": 87710 }, { "epoch": 577.1052631578947, "grad_norm": 0.9084358215332031, "learning_rate": 0.0001, "loss": 0.0134, "step": 87720 }, { "epoch": 577.171052631579, "grad_norm": 1.0614901781082153, "learning_rate": 0.0001, "loss": 0.0113, "step": 87730 }, { "epoch": 577.2368421052631, "grad_norm": 1.036545991897583, "learning_rate": 0.0001, "loss": 0.0105, "step": 87740 }, { "epoch": 577.3026315789474, "grad_norm": 0.8779444098472595, "learning_rate": 0.0001, "loss": 0.0119, "step": 87750 }, { "epoch": 577.3684210526316, "grad_norm": 0.7535855174064636, "learning_rate": 0.0001, "loss": 0.0102, "step": 87760 }, { "epoch": 577.4342105263158, "grad_norm": 1.1626191139221191, "learning_rate": 0.0001, "loss": 0.0118, "step": 87770 }, { "epoch": 577.5, "grad_norm": 1.0589649677276611, "learning_rate": 0.0001, "loss": 0.0104, "step": 87780 }, { "epoch": 577.5657894736842, "grad_norm": 1.277850866317749, "learning_rate": 0.0001, "loss": 0.01, "step": 87790 }, { "epoch": 577.6315789473684, "grad_norm": 0.9819774031639099, "learning_rate": 0.0001, "loss": 0.0105, "step": 87800 }, { "epoch": 577.6973684210526, "grad_norm": 0.9045615792274475, "learning_rate": 0.0001, "loss": 0.0095, "step": 87810 }, { "epoch": 577.7631578947369, "grad_norm": 1.1192935705184937, "learning_rate": 0.0001, "loss": 0.0127, "step": 87820 }, { "epoch": 577.828947368421, "grad_norm": 1.2856429815292358, "learning_rate": 0.0001, "loss": 0.0102, "step": 87830 }, { "epoch": 577.8947368421053, "grad_norm": 1.3235225677490234, "learning_rate": 0.0001, "loss": 0.0129, "step": 87840 }, { "epoch": 577.9605263157895, "grad_norm": 0.8897786140441895, "learning_rate": 0.0001, "loss": 0.0104, "step": 87850 }, { "epoch": 578.0263157894736, "grad_norm": 0.9557298421859741, "learning_rate": 0.0001, "loss": 0.0104, "step": 87860 }, { "epoch": 578.0921052631579, "grad_norm": 1.0050173997879028, "learning_rate": 0.0001, "loss": 0.0098, "step": 87870 }, { "epoch": 578.1578947368421, "grad_norm": 0.8767371773719788, "learning_rate": 0.0001, "loss": 0.0107, "step": 87880 }, { "epoch": 578.2236842105264, "grad_norm": 1.2130120992660522, "learning_rate": 0.0001, "loss": 0.0128, "step": 87890 }, { "epoch": 578.2894736842105, "grad_norm": 1.0449823141098022, "learning_rate": 0.0001, "loss": 0.0105, "step": 87900 }, { "epoch": 578.3552631578947, "grad_norm": 0.840207040309906, "learning_rate": 0.0001, "loss": 0.01, "step": 87910 }, { "epoch": 578.421052631579, "grad_norm": 0.847531795501709, "learning_rate": 0.0001, "loss": 0.0105, "step": 87920 }, { "epoch": 578.4868421052631, "grad_norm": 1.5373563766479492, "learning_rate": 0.0001, "loss": 0.0097, "step": 87930 }, { "epoch": 578.5526315789474, "grad_norm": 1.208318829536438, "learning_rate": 0.0001, "loss": 0.0115, "step": 87940 }, { "epoch": 578.6184210526316, "grad_norm": 1.1403499841690063, "learning_rate": 0.0001, "loss": 0.0134, "step": 87950 }, { "epoch": 578.6842105263158, "grad_norm": 1.375062108039856, "learning_rate": 0.0001, "loss": 0.0097, "step": 87960 }, { "epoch": 578.75, "grad_norm": 0.9031561017036438, "learning_rate": 0.0001, "loss": 0.0105, "step": 87970 }, { "epoch": 578.8157894736842, "grad_norm": 1.3210184574127197, "learning_rate": 0.0001, "loss": 0.0116, "step": 87980 }, { "epoch": 578.8815789473684, "grad_norm": 1.013850450515747, "learning_rate": 0.0001, "loss": 0.0103, "step": 87990 }, { "epoch": 578.9473684210526, "grad_norm": 1.104063630104065, "learning_rate": 0.0001, "loss": 0.0127, "step": 88000 }, { "epoch": 579.0131578947369, "grad_norm": 0.8655098080635071, "learning_rate": 0.0001, "loss": 0.0104, "step": 88010 }, { "epoch": 579.078947368421, "grad_norm": 1.320491075515747, "learning_rate": 0.0001, "loss": 0.0104, "step": 88020 }, { "epoch": 579.1447368421053, "grad_norm": 1.221070408821106, "learning_rate": 0.0001, "loss": 0.0121, "step": 88030 }, { "epoch": 579.2105263157895, "grad_norm": 1.0820084810256958, "learning_rate": 0.0001, "loss": 0.0123, "step": 88040 }, { "epoch": 579.2763157894736, "grad_norm": 1.2061529159545898, "learning_rate": 0.0001, "loss": 0.0108, "step": 88050 }, { "epoch": 579.3421052631579, "grad_norm": 0.9127936959266663, "learning_rate": 0.0001, "loss": 0.0111, "step": 88060 }, { "epoch": 579.4078947368421, "grad_norm": 1.4120718240737915, "learning_rate": 0.0001, "loss": 0.0126, "step": 88070 }, { "epoch": 579.4736842105264, "grad_norm": 1.0153974294662476, "learning_rate": 0.0001, "loss": 0.0116, "step": 88080 }, { "epoch": 579.5394736842105, "grad_norm": 1.3774847984313965, "learning_rate": 0.0001, "loss": 0.0113, "step": 88090 }, { "epoch": 579.6052631578947, "grad_norm": 1.1112881898880005, "learning_rate": 0.0001, "loss": 0.0105, "step": 88100 }, { "epoch": 579.671052631579, "grad_norm": 1.1673177480697632, "learning_rate": 0.0001, "loss": 0.0106, "step": 88110 }, { "epoch": 579.7368421052631, "grad_norm": 1.160556674003601, "learning_rate": 0.0001, "loss": 0.0115, "step": 88120 }, { "epoch": 579.8026315789474, "grad_norm": 1.118118405342102, "learning_rate": 0.0001, "loss": 0.0129, "step": 88130 }, { "epoch": 579.8684210526316, "grad_norm": 0.7637477517127991, "learning_rate": 0.0001, "loss": 0.0095, "step": 88140 }, { "epoch": 579.9342105263158, "grad_norm": 1.3707367181777954, "learning_rate": 0.0001, "loss": 0.0105, "step": 88150 }, { "epoch": 580.0, "grad_norm": 1.009476900100708, "learning_rate": 0.0001, "loss": 0.0102, "step": 88160 }, { "epoch": 580.0657894736842, "grad_norm": 1.3438063859939575, "learning_rate": 0.0001, "loss": 0.0108, "step": 88170 }, { "epoch": 580.1315789473684, "grad_norm": 1.126423716545105, "learning_rate": 0.0001, "loss": 0.0101, "step": 88180 }, { "epoch": 580.1973684210526, "grad_norm": 1.1746346950531006, "learning_rate": 0.0001, "loss": 0.0102, "step": 88190 }, { "epoch": 580.2631578947369, "grad_norm": 1.0661026239395142, "learning_rate": 0.0001, "loss": 0.0105, "step": 88200 }, { "epoch": 580.328947368421, "grad_norm": 0.9947375059127808, "learning_rate": 0.0001, "loss": 0.0106, "step": 88210 }, { "epoch": 580.3947368421053, "grad_norm": 1.178560733795166, "learning_rate": 0.0001, "loss": 0.011, "step": 88220 }, { "epoch": 580.4605263157895, "grad_norm": 1.3273268938064575, "learning_rate": 0.0001, "loss": 0.0125, "step": 88230 }, { "epoch": 580.5263157894736, "grad_norm": 1.040791392326355, "learning_rate": 0.0001, "loss": 0.011, "step": 88240 }, { "epoch": 580.5921052631579, "grad_norm": 1.4010847806930542, "learning_rate": 0.0001, "loss": 0.0113, "step": 88250 }, { "epoch": 580.6578947368421, "grad_norm": 1.2355036735534668, "learning_rate": 0.0001, "loss": 0.0109, "step": 88260 }, { "epoch": 580.7236842105264, "grad_norm": 0.9420546889305115, "learning_rate": 0.0001, "loss": 0.0127, "step": 88270 }, { "epoch": 580.7894736842105, "grad_norm": 1.1292775869369507, "learning_rate": 0.0001, "loss": 0.0117, "step": 88280 }, { "epoch": 580.8552631578947, "grad_norm": 1.118950366973877, "learning_rate": 0.0001, "loss": 0.0115, "step": 88290 }, { "epoch": 580.921052631579, "grad_norm": 1.3669413328170776, "learning_rate": 0.0001, "loss": 0.0103, "step": 88300 }, { "epoch": 580.9868421052631, "grad_norm": 1.1475030183792114, "learning_rate": 0.0001, "loss": 0.0108, "step": 88310 }, { "epoch": 581.0526315789474, "grad_norm": 0.9832250475883484, "learning_rate": 0.0001, "loss": 0.0121, "step": 88320 }, { "epoch": 581.1184210526316, "grad_norm": 1.0179389715194702, "learning_rate": 0.0001, "loss": 0.0109, "step": 88330 }, { "epoch": 581.1842105263158, "grad_norm": 1.026995301246643, "learning_rate": 0.0001, "loss": 0.0117, "step": 88340 }, { "epoch": 581.25, "grad_norm": 1.0577194690704346, "learning_rate": 0.0001, "loss": 0.0097, "step": 88350 }, { "epoch": 581.3157894736842, "grad_norm": 0.8815690875053406, "learning_rate": 0.0001, "loss": 0.0109, "step": 88360 }, { "epoch": 581.3815789473684, "grad_norm": 0.9633040428161621, "learning_rate": 0.0001, "loss": 0.011, "step": 88370 }, { "epoch": 581.4473684210526, "grad_norm": 1.289242148399353, "learning_rate": 0.0001, "loss": 0.0103, "step": 88380 }, { "epoch": 581.5131578947369, "grad_norm": 0.8594449758529663, "learning_rate": 0.0001, "loss": 0.0126, "step": 88390 }, { "epoch": 581.578947368421, "grad_norm": 0.8207187652587891, "learning_rate": 0.0001, "loss": 0.0109, "step": 88400 }, { "epoch": 581.6447368421053, "grad_norm": 1.0943405628204346, "learning_rate": 0.0001, "loss": 0.0118, "step": 88410 }, { "epoch": 581.7105263157895, "grad_norm": 0.834723711013794, "learning_rate": 0.0001, "loss": 0.0133, "step": 88420 }, { "epoch": 581.7763157894736, "grad_norm": 1.02744722366333, "learning_rate": 0.0001, "loss": 0.0115, "step": 88430 }, { "epoch": 581.8421052631579, "grad_norm": 1.1640406847000122, "learning_rate": 0.0001, "loss": 0.0131, "step": 88440 }, { "epoch": 581.9078947368421, "grad_norm": 1.0775976181030273, "learning_rate": 0.0001, "loss": 0.0115, "step": 88450 }, { "epoch": 581.9736842105264, "grad_norm": 1.163527250289917, "learning_rate": 0.0001, "loss": 0.0138, "step": 88460 }, { "epoch": 582.0394736842105, "grad_norm": 0.8929876089096069, "learning_rate": 0.0001, "loss": 0.0132, "step": 88470 }, { "epoch": 582.1052631578947, "grad_norm": 1.3309000730514526, "learning_rate": 0.0001, "loss": 0.0145, "step": 88480 }, { "epoch": 582.171052631579, "grad_norm": 1.0838176012039185, "learning_rate": 0.0001, "loss": 0.0124, "step": 88490 }, { "epoch": 582.2368421052631, "grad_norm": 1.5269906520843506, "learning_rate": 0.0001, "loss": 0.0135, "step": 88500 }, { "epoch": 582.3026315789474, "grad_norm": 0.8997194170951843, "learning_rate": 0.0001, "loss": 0.0124, "step": 88510 }, { "epoch": 582.3684210526316, "grad_norm": 1.0836443901062012, "learning_rate": 0.0001, "loss": 0.0116, "step": 88520 }, { "epoch": 582.4342105263158, "grad_norm": 0.7760714888572693, "learning_rate": 0.0001, "loss": 0.0106, "step": 88530 }, { "epoch": 582.5, "grad_norm": 1.0541595220565796, "learning_rate": 0.0001, "loss": 0.012, "step": 88540 }, { "epoch": 582.5657894736842, "grad_norm": 1.0740338563919067, "learning_rate": 0.0001, "loss": 0.0114, "step": 88550 }, { "epoch": 582.6315789473684, "grad_norm": 1.234104871749878, "learning_rate": 0.0001, "loss": 0.0108, "step": 88560 }, { "epoch": 582.6973684210526, "grad_norm": 1.1293065547943115, "learning_rate": 0.0001, "loss": 0.0107, "step": 88570 }, { "epoch": 582.7631578947369, "grad_norm": 0.9713568091392517, "learning_rate": 0.0001, "loss": 0.0121, "step": 88580 }, { "epoch": 582.828947368421, "grad_norm": 0.9887568950653076, "learning_rate": 0.0001, "loss": 0.0138, "step": 88590 }, { "epoch": 582.8947368421053, "grad_norm": 1.0662295818328857, "learning_rate": 0.0001, "loss": 0.0115, "step": 88600 }, { "epoch": 582.9605263157895, "grad_norm": 1.09245765209198, "learning_rate": 0.0001, "loss": 0.0111, "step": 88610 }, { "epoch": 583.0263157894736, "grad_norm": 1.12832510471344, "learning_rate": 0.0001, "loss": 0.0133, "step": 88620 }, { "epoch": 583.0921052631579, "grad_norm": 1.6330440044403076, "learning_rate": 0.0001, "loss": 0.0134, "step": 88630 }, { "epoch": 583.1578947368421, "grad_norm": 1.4116557836532593, "learning_rate": 0.0001, "loss": 0.012, "step": 88640 }, { "epoch": 583.2236842105264, "grad_norm": 1.313237190246582, "learning_rate": 0.0001, "loss": 0.0143, "step": 88650 }, { "epoch": 583.2894736842105, "grad_norm": 1.496812105178833, "learning_rate": 0.0001, "loss": 0.0128, "step": 88660 }, { "epoch": 583.3552631578947, "grad_norm": 1.3389391899108887, "learning_rate": 0.0001, "loss": 0.0121, "step": 88670 }, { "epoch": 583.421052631579, "grad_norm": 0.9950214624404907, "learning_rate": 0.0001, "loss": 0.0112, "step": 88680 }, { "epoch": 583.4868421052631, "grad_norm": 1.0509663820266724, "learning_rate": 0.0001, "loss": 0.0105, "step": 88690 }, { "epoch": 583.5526315789474, "grad_norm": 1.2093925476074219, "learning_rate": 0.0001, "loss": 0.0122, "step": 88700 }, { "epoch": 583.6184210526316, "grad_norm": 1.2887691259384155, "learning_rate": 0.0001, "loss": 0.0146, "step": 88710 }, { "epoch": 583.6842105263158, "grad_norm": 1.2735077142715454, "learning_rate": 0.0001, "loss": 0.0119, "step": 88720 }, { "epoch": 583.75, "grad_norm": 1.4364982843399048, "learning_rate": 0.0001, "loss": 0.0109, "step": 88730 }, { "epoch": 583.8157894736842, "grad_norm": 1.1002883911132812, "learning_rate": 0.0001, "loss": 0.0123, "step": 88740 }, { "epoch": 583.8815789473684, "grad_norm": 0.9703975915908813, "learning_rate": 0.0001, "loss": 0.0117, "step": 88750 }, { "epoch": 583.9473684210526, "grad_norm": 0.8523468971252441, "learning_rate": 0.0001, "loss": 0.0101, "step": 88760 }, { "epoch": 584.0131578947369, "grad_norm": 1.2249200344085693, "learning_rate": 0.0001, "loss": 0.0091, "step": 88770 }, { "epoch": 584.078947368421, "grad_norm": 1.7450804710388184, "learning_rate": 0.0001, "loss": 0.012, "step": 88780 }, { "epoch": 584.1447368421053, "grad_norm": 1.176170825958252, "learning_rate": 0.0001, "loss": 0.0114, "step": 88790 }, { "epoch": 584.2105263157895, "grad_norm": 0.7471736073493958, "learning_rate": 0.0001, "loss": 0.0111, "step": 88800 }, { "epoch": 584.2763157894736, "grad_norm": 1.824398159980774, "learning_rate": 0.0001, "loss": 0.01, "step": 88810 }, { "epoch": 584.3421052631579, "grad_norm": 1.3423678874969482, "learning_rate": 0.0001, "loss": 0.0127, "step": 88820 }, { "epoch": 584.4078947368421, "grad_norm": 1.2872895002365112, "learning_rate": 0.0001, "loss": 0.0106, "step": 88830 }, { "epoch": 584.4736842105264, "grad_norm": 1.6336323022842407, "learning_rate": 0.0001, "loss": 0.014, "step": 88840 }, { "epoch": 584.5394736842105, "grad_norm": 1.0463390350341797, "learning_rate": 0.0001, "loss": 0.0113, "step": 88850 }, { "epoch": 584.6052631578947, "grad_norm": 1.3081940412521362, "learning_rate": 0.0001, "loss": 0.0122, "step": 88860 }, { "epoch": 584.671052631579, "grad_norm": 1.1893011331558228, "learning_rate": 0.0001, "loss": 0.0094, "step": 88870 }, { "epoch": 584.7368421052631, "grad_norm": 1.2873516082763672, "learning_rate": 0.0001, "loss": 0.0126, "step": 88880 }, { "epoch": 584.8026315789474, "grad_norm": 0.9724097847938538, "learning_rate": 0.0001, "loss": 0.0122, "step": 88890 }, { "epoch": 584.8684210526316, "grad_norm": 0.8140742778778076, "learning_rate": 0.0001, "loss": 0.0134, "step": 88900 }, { "epoch": 584.9342105263158, "grad_norm": 1.4383323192596436, "learning_rate": 0.0001, "loss": 0.0135, "step": 88910 }, { "epoch": 585.0, "grad_norm": 0.8941511511802673, "learning_rate": 0.0001, "loss": 0.0141, "step": 88920 }, { "epoch": 585.0657894736842, "grad_norm": 1.3522826433181763, "learning_rate": 0.0001, "loss": 0.0152, "step": 88930 }, { "epoch": 585.1315789473684, "grad_norm": 1.0675338506698608, "learning_rate": 0.0001, "loss": 0.0141, "step": 88940 }, { "epoch": 585.1973684210526, "grad_norm": 1.4727058410644531, "learning_rate": 0.0001, "loss": 0.0126, "step": 88950 }, { "epoch": 585.2631578947369, "grad_norm": 1.2497466802597046, "learning_rate": 0.0001, "loss": 0.0132, "step": 88960 }, { "epoch": 585.328947368421, "grad_norm": 0.8785649538040161, "learning_rate": 0.0001, "loss": 0.0131, "step": 88970 }, { "epoch": 585.3947368421053, "grad_norm": 1.2095212936401367, "learning_rate": 0.0001, "loss": 0.013, "step": 88980 }, { "epoch": 585.4605263157895, "grad_norm": 1.3274887800216675, "learning_rate": 0.0001, "loss": 0.0131, "step": 88990 }, { "epoch": 585.5263157894736, "grad_norm": 1.0932297706604004, "learning_rate": 0.0001, "loss": 0.0134, "step": 89000 }, { "epoch": 585.5921052631579, "grad_norm": 1.1494570970535278, "learning_rate": 0.0001, "loss": 0.0123, "step": 89010 }, { "epoch": 585.6578947368421, "grad_norm": 1.5100291967391968, "learning_rate": 0.0001, "loss": 0.0112, "step": 89020 }, { "epoch": 585.7236842105264, "grad_norm": 1.3086682558059692, "learning_rate": 0.0001, "loss": 0.0127, "step": 89030 }, { "epoch": 585.7894736842105, "grad_norm": 1.1666781902313232, "learning_rate": 0.0001, "loss": 0.0146, "step": 89040 }, { "epoch": 585.8552631578947, "grad_norm": 0.9592698216438293, "learning_rate": 0.0001, "loss": 0.0105, "step": 89050 }, { "epoch": 585.921052631579, "grad_norm": 0.6694148182868958, "learning_rate": 0.0001, "loss": 0.0126, "step": 89060 }, { "epoch": 585.9868421052631, "grad_norm": 1.0263710021972656, "learning_rate": 0.0001, "loss": 0.0127, "step": 89070 }, { "epoch": 586.0526315789474, "grad_norm": 0.6993970274925232, "learning_rate": 0.0001, "loss": 0.0127, "step": 89080 }, { "epoch": 586.1184210526316, "grad_norm": 0.8670745491981506, "learning_rate": 0.0001, "loss": 0.0121, "step": 89090 }, { "epoch": 586.1842105263158, "grad_norm": 0.7880876064300537, "learning_rate": 0.0001, "loss": 0.0114, "step": 89100 }, { "epoch": 586.25, "grad_norm": 0.9657894968986511, "learning_rate": 0.0001, "loss": 0.0138, "step": 89110 }, { "epoch": 586.3157894736842, "grad_norm": 0.7474163174629211, "learning_rate": 0.0001, "loss": 0.0117, "step": 89120 }, { "epoch": 586.3815789473684, "grad_norm": 1.1458895206451416, "learning_rate": 0.0001, "loss": 0.0123, "step": 89130 }, { "epoch": 586.4473684210526, "grad_norm": 1.0431797504425049, "learning_rate": 0.0001, "loss": 0.0126, "step": 89140 }, { "epoch": 586.5131578947369, "grad_norm": 0.9211791157722473, "learning_rate": 0.0001, "loss": 0.0119, "step": 89150 }, { "epoch": 586.578947368421, "grad_norm": 1.1954262256622314, "learning_rate": 0.0001, "loss": 0.0135, "step": 89160 }, { "epoch": 586.6447368421053, "grad_norm": 1.164695143699646, "learning_rate": 0.0001, "loss": 0.0125, "step": 89170 }, { "epoch": 586.7105263157895, "grad_norm": 1.2543039321899414, "learning_rate": 0.0001, "loss": 0.0137, "step": 89180 }, { "epoch": 586.7763157894736, "grad_norm": 1.077324390411377, "learning_rate": 0.0001, "loss": 0.012, "step": 89190 }, { "epoch": 586.8421052631579, "grad_norm": 0.7734859585762024, "learning_rate": 0.0001, "loss": 0.0117, "step": 89200 }, { "epoch": 586.9078947368421, "grad_norm": 1.3108208179473877, "learning_rate": 0.0001, "loss": 0.0137, "step": 89210 }, { "epoch": 586.9736842105264, "grad_norm": 1.516038179397583, "learning_rate": 0.0001, "loss": 0.0113, "step": 89220 }, { "epoch": 587.0394736842105, "grad_norm": 1.5509519577026367, "learning_rate": 0.0001, "loss": 0.0106, "step": 89230 }, { "epoch": 587.1052631578947, "grad_norm": 1.3086330890655518, "learning_rate": 0.0001, "loss": 0.0095, "step": 89240 }, { "epoch": 587.171052631579, "grad_norm": 1.3632980585098267, "learning_rate": 0.0001, "loss": 0.0139, "step": 89250 }, { "epoch": 587.2368421052631, "grad_norm": 1.1639214754104614, "learning_rate": 0.0001, "loss": 0.013, "step": 89260 }, { "epoch": 587.3026315789474, "grad_norm": 1.0251145362854004, "learning_rate": 0.0001, "loss": 0.0125, "step": 89270 }, { "epoch": 587.3684210526316, "grad_norm": 1.2207335233688354, "learning_rate": 0.0001, "loss": 0.015, "step": 89280 }, { "epoch": 587.4342105263158, "grad_norm": 1.3697402477264404, "learning_rate": 0.0001, "loss": 0.0125, "step": 89290 }, { "epoch": 587.5, "grad_norm": 1.4069492816925049, "learning_rate": 0.0001, "loss": 0.0122, "step": 89300 }, { "epoch": 587.5657894736842, "grad_norm": 1.0171831846237183, "learning_rate": 0.0001, "loss": 0.0125, "step": 89310 }, { "epoch": 587.6315789473684, "grad_norm": 1.1951297521591187, "learning_rate": 0.0001, "loss": 0.0105, "step": 89320 }, { "epoch": 587.6973684210526, "grad_norm": 0.9013453125953674, "learning_rate": 0.0001, "loss": 0.014, "step": 89330 }, { "epoch": 587.7631578947369, "grad_norm": 1.1464810371398926, "learning_rate": 0.0001, "loss": 0.0108, "step": 89340 }, { "epoch": 587.828947368421, "grad_norm": 1.0271261930465698, "learning_rate": 0.0001, "loss": 0.0106, "step": 89350 }, { "epoch": 587.8947368421053, "grad_norm": 1.106770396232605, "learning_rate": 0.0001, "loss": 0.0145, "step": 89360 }, { "epoch": 587.9605263157895, "grad_norm": 0.9565857648849487, "learning_rate": 0.0001, "loss": 0.0117, "step": 89370 }, { "epoch": 588.0263157894736, "grad_norm": 1.2642189264297485, "learning_rate": 0.0001, "loss": 0.0119, "step": 89380 }, { "epoch": 588.0921052631579, "grad_norm": 1.1177488565444946, "learning_rate": 0.0001, "loss": 0.012, "step": 89390 }, { "epoch": 588.1578947368421, "grad_norm": 0.8865790367126465, "learning_rate": 0.0001, "loss": 0.0125, "step": 89400 }, { "epoch": 588.2236842105264, "grad_norm": 1.007317066192627, "learning_rate": 0.0001, "loss": 0.0126, "step": 89410 }, { "epoch": 588.2894736842105, "grad_norm": 1.041730523109436, "learning_rate": 0.0001, "loss": 0.0119, "step": 89420 }, { "epoch": 588.3552631578947, "grad_norm": 1.389570713043213, "learning_rate": 0.0001, "loss": 0.0131, "step": 89430 }, { "epoch": 588.421052631579, "grad_norm": 1.1564185619354248, "learning_rate": 0.0001, "loss": 0.012, "step": 89440 }, { "epoch": 588.4868421052631, "grad_norm": 1.2922906875610352, "learning_rate": 0.0001, "loss": 0.0108, "step": 89450 }, { "epoch": 588.5526315789474, "grad_norm": 1.2938793897628784, "learning_rate": 0.0001, "loss": 0.0121, "step": 89460 }, { "epoch": 588.6184210526316, "grad_norm": 0.9030840992927551, "learning_rate": 0.0001, "loss": 0.0144, "step": 89470 }, { "epoch": 588.6842105263158, "grad_norm": 1.131744384765625, "learning_rate": 0.0001, "loss": 0.0112, "step": 89480 }, { "epoch": 588.75, "grad_norm": 1.0331127643585205, "learning_rate": 0.0001, "loss": 0.013, "step": 89490 }, { "epoch": 588.8157894736842, "grad_norm": 0.9357874989509583, "learning_rate": 0.0001, "loss": 0.011, "step": 89500 }, { "epoch": 588.8815789473684, "grad_norm": 1.4031165838241577, "learning_rate": 0.0001, "loss": 0.0127, "step": 89510 }, { "epoch": 588.9473684210526, "grad_norm": 1.0815694332122803, "learning_rate": 0.0001, "loss": 0.0131, "step": 89520 }, { "epoch": 589.0131578947369, "grad_norm": 1.360446810722351, "learning_rate": 0.0001, "loss": 0.0136, "step": 89530 }, { "epoch": 589.078947368421, "grad_norm": 1.0539952516555786, "learning_rate": 0.0001, "loss": 0.0101, "step": 89540 }, { "epoch": 589.1447368421053, "grad_norm": 0.9572492837905884, "learning_rate": 0.0001, "loss": 0.0111, "step": 89550 }, { "epoch": 589.2105263157895, "grad_norm": 1.1082371473312378, "learning_rate": 0.0001, "loss": 0.0116, "step": 89560 }, { "epoch": 589.2763157894736, "grad_norm": 1.2691287994384766, "learning_rate": 0.0001, "loss": 0.0125, "step": 89570 }, { "epoch": 589.3421052631579, "grad_norm": 1.6313940286636353, "learning_rate": 0.0001, "loss": 0.0113, "step": 89580 }, { "epoch": 589.4078947368421, "grad_norm": 0.9120645523071289, "learning_rate": 0.0001, "loss": 0.0111, "step": 89590 }, { "epoch": 589.4736842105264, "grad_norm": 1.050746202468872, "learning_rate": 0.0001, "loss": 0.0116, "step": 89600 }, { "epoch": 589.5394736842105, "grad_norm": 1.3637466430664062, "learning_rate": 0.0001, "loss": 0.0125, "step": 89610 }, { "epoch": 589.6052631578947, "grad_norm": 1.2993441820144653, "learning_rate": 0.0001, "loss": 0.0099, "step": 89620 }, { "epoch": 589.671052631579, "grad_norm": 1.2083455324172974, "learning_rate": 0.0001, "loss": 0.0103, "step": 89630 }, { "epoch": 589.7368421052631, "grad_norm": 0.9543986320495605, "learning_rate": 0.0001, "loss": 0.012, "step": 89640 }, { "epoch": 589.8026315789474, "grad_norm": 1.0169148445129395, "learning_rate": 0.0001, "loss": 0.0121, "step": 89650 }, { "epoch": 589.8684210526316, "grad_norm": 0.9722794890403748, "learning_rate": 0.0001, "loss": 0.0105, "step": 89660 }, { "epoch": 589.9342105263158, "grad_norm": 1.2600023746490479, "learning_rate": 0.0001, "loss": 0.0116, "step": 89670 }, { "epoch": 590.0, "grad_norm": 1.084512710571289, "learning_rate": 0.0001, "loss": 0.0107, "step": 89680 }, { "epoch": 590.0657894736842, "grad_norm": 0.9002571105957031, "learning_rate": 0.0001, "loss": 0.0114, "step": 89690 }, { "epoch": 590.1315789473684, "grad_norm": 1.2132498025894165, "learning_rate": 0.0001, "loss": 0.0111, "step": 89700 }, { "epoch": 590.1973684210526, "grad_norm": 1.4505194425582886, "learning_rate": 0.0001, "loss": 0.0103, "step": 89710 }, { "epoch": 590.2631578947369, "grad_norm": 1.2071669101715088, "learning_rate": 0.0001, "loss": 0.011, "step": 89720 }, { "epoch": 590.328947368421, "grad_norm": 1.1249711513519287, "learning_rate": 0.0001, "loss": 0.0102, "step": 89730 }, { "epoch": 590.3947368421053, "grad_norm": 0.9531528353691101, "learning_rate": 0.0001, "loss": 0.0103, "step": 89740 }, { "epoch": 590.4605263157895, "grad_norm": 1.5327131748199463, "learning_rate": 0.0001, "loss": 0.0099, "step": 89750 }, { "epoch": 590.5263157894736, "grad_norm": 1.2193597555160522, "learning_rate": 0.0001, "loss": 0.0108, "step": 89760 }, { "epoch": 590.5921052631579, "grad_norm": 1.114341378211975, "learning_rate": 0.0001, "loss": 0.0111, "step": 89770 }, { "epoch": 590.6578947368421, "grad_norm": 0.7572506070137024, "learning_rate": 0.0001, "loss": 0.0093, "step": 89780 }, { "epoch": 590.7236842105264, "grad_norm": 1.0355122089385986, "learning_rate": 0.0001, "loss": 0.013, "step": 89790 }, { "epoch": 590.7894736842105, "grad_norm": 1.0882655382156372, "learning_rate": 0.0001, "loss": 0.0108, "step": 89800 }, { "epoch": 590.8552631578947, "grad_norm": 1.2581169605255127, "learning_rate": 0.0001, "loss": 0.011, "step": 89810 }, { "epoch": 590.921052631579, "grad_norm": 1.334328055381775, "learning_rate": 0.0001, "loss": 0.0117, "step": 89820 }, { "epoch": 590.9868421052631, "grad_norm": 1.0767223834991455, "learning_rate": 0.0001, "loss": 0.0116, "step": 89830 }, { "epoch": 591.0526315789474, "grad_norm": 1.1012510061264038, "learning_rate": 0.0001, "loss": 0.0113, "step": 89840 }, { "epoch": 591.1184210526316, "grad_norm": 0.7526869773864746, "learning_rate": 0.0001, "loss": 0.0093, "step": 89850 }, { "epoch": 591.1842105263158, "grad_norm": 1.0117813348770142, "learning_rate": 0.0001, "loss": 0.0116, "step": 89860 }, { "epoch": 591.25, "grad_norm": 1.2263928651809692, "learning_rate": 0.0001, "loss": 0.01, "step": 89870 }, { "epoch": 591.3157894736842, "grad_norm": 0.8752433657646179, "learning_rate": 0.0001, "loss": 0.0099, "step": 89880 }, { "epoch": 591.3815789473684, "grad_norm": 1.4674887657165527, "learning_rate": 0.0001, "loss": 0.0117, "step": 89890 }, { "epoch": 591.4473684210526, "grad_norm": 1.1503700017929077, "learning_rate": 0.0001, "loss": 0.0114, "step": 89900 }, { "epoch": 591.5131578947369, "grad_norm": 1.218532681465149, "learning_rate": 0.0001, "loss": 0.0114, "step": 89910 }, { "epoch": 591.578947368421, "grad_norm": 1.5845961570739746, "learning_rate": 0.0001, "loss": 0.0092, "step": 89920 }, { "epoch": 591.6447368421053, "grad_norm": 1.585045576095581, "learning_rate": 0.0001, "loss": 0.0109, "step": 89930 }, { "epoch": 591.7105263157895, "grad_norm": 1.2569578886032104, "learning_rate": 0.0001, "loss": 0.0136, "step": 89940 }, { "epoch": 591.7763157894736, "grad_norm": 1.1375163793563843, "learning_rate": 0.0001, "loss": 0.0105, "step": 89950 }, { "epoch": 591.8421052631579, "grad_norm": 0.8566263914108276, "learning_rate": 0.0001, "loss": 0.0107, "step": 89960 }, { "epoch": 591.9078947368421, "grad_norm": 1.2207688093185425, "learning_rate": 0.0001, "loss": 0.0108, "step": 89970 }, { "epoch": 591.9736842105264, "grad_norm": 1.099183440208435, "learning_rate": 0.0001, "loss": 0.0093, "step": 89980 }, { "epoch": 592.0394736842105, "grad_norm": 1.3385899066925049, "learning_rate": 0.0001, "loss": 0.0098, "step": 89990 }, { "epoch": 592.1052631578947, "grad_norm": 1.2732150554656982, "learning_rate": 0.0001, "loss": 0.0101, "step": 90000 }, { "epoch": 592.171052631579, "grad_norm": 1.053571105003357, "learning_rate": 0.0001, "loss": 0.0107, "step": 90010 }, { "epoch": 592.2368421052631, "grad_norm": 0.9966652393341064, "learning_rate": 0.0001, "loss": 0.0114, "step": 90020 }, { "epoch": 592.3026315789474, "grad_norm": 1.0630309581756592, "learning_rate": 0.0001, "loss": 0.0104, "step": 90030 }, { "epoch": 592.3684210526316, "grad_norm": 1.1324174404144287, "learning_rate": 0.0001, "loss": 0.0112, "step": 90040 }, { "epoch": 592.4342105263158, "grad_norm": 1.2332628965377808, "learning_rate": 0.0001, "loss": 0.0098, "step": 90050 }, { "epoch": 592.5, "grad_norm": 1.151996374130249, "learning_rate": 0.0001, "loss": 0.0118, "step": 90060 }, { "epoch": 592.5657894736842, "grad_norm": 0.7464222311973572, "learning_rate": 0.0001, "loss": 0.0111, "step": 90070 }, { "epoch": 592.6315789473684, "grad_norm": 0.8995870351791382, "learning_rate": 0.0001, "loss": 0.0099, "step": 90080 }, { "epoch": 592.6973684210526, "grad_norm": 1.467040777206421, "learning_rate": 0.0001, "loss": 0.0097, "step": 90090 }, { "epoch": 592.7631578947369, "grad_norm": 0.7274995446205139, "learning_rate": 0.0001, "loss": 0.0114, "step": 90100 }, { "epoch": 592.828947368421, "grad_norm": 1.107151985168457, "learning_rate": 0.0001, "loss": 0.0109, "step": 90110 }, { "epoch": 592.8947368421053, "grad_norm": 1.1053555011749268, "learning_rate": 0.0001, "loss": 0.0127, "step": 90120 }, { "epoch": 592.9605263157895, "grad_norm": 0.884556770324707, "learning_rate": 0.0001, "loss": 0.0103, "step": 90130 }, { "epoch": 593.0263157894736, "grad_norm": 1.09501051902771, "learning_rate": 0.0001, "loss": 0.011, "step": 90140 }, { "epoch": 593.0921052631579, "grad_norm": 1.0981241464614868, "learning_rate": 0.0001, "loss": 0.0111, "step": 90150 }, { "epoch": 593.1578947368421, "grad_norm": 0.5903021097183228, "learning_rate": 0.0001, "loss": 0.0108, "step": 90160 }, { "epoch": 593.2236842105264, "grad_norm": 0.7272729873657227, "learning_rate": 0.0001, "loss": 0.0109, "step": 90170 }, { "epoch": 593.2894736842105, "grad_norm": 1.2663187980651855, "learning_rate": 0.0001, "loss": 0.0105, "step": 90180 }, { "epoch": 593.3552631578947, "grad_norm": 1.210874080657959, "learning_rate": 0.0001, "loss": 0.0104, "step": 90190 }, { "epoch": 593.421052631579, "grad_norm": 1.0026808977127075, "learning_rate": 0.0001, "loss": 0.0102, "step": 90200 }, { "epoch": 593.4868421052631, "grad_norm": 1.1828995943069458, "learning_rate": 0.0001, "loss": 0.0098, "step": 90210 }, { "epoch": 593.5526315789474, "grad_norm": 1.2319470643997192, "learning_rate": 0.0001, "loss": 0.0107, "step": 90220 }, { "epoch": 593.6184210526316, "grad_norm": 1.5883439779281616, "learning_rate": 0.0001, "loss": 0.0123, "step": 90230 }, { "epoch": 593.6842105263158, "grad_norm": 1.1823478937149048, "learning_rate": 0.0001, "loss": 0.0113, "step": 90240 }, { "epoch": 593.75, "grad_norm": 1.1434694528579712, "learning_rate": 0.0001, "loss": 0.0098, "step": 90250 }, { "epoch": 593.8157894736842, "grad_norm": 0.8349093794822693, "learning_rate": 0.0001, "loss": 0.0098, "step": 90260 }, { "epoch": 593.8815789473684, "grad_norm": 1.1716779470443726, "learning_rate": 0.0001, "loss": 0.0111, "step": 90270 }, { "epoch": 593.9473684210526, "grad_norm": 0.9748353362083435, "learning_rate": 0.0001, "loss": 0.0107, "step": 90280 }, { "epoch": 594.0131578947369, "grad_norm": 1.1021101474761963, "learning_rate": 0.0001, "loss": 0.0123, "step": 90290 }, { "epoch": 594.078947368421, "grad_norm": 1.007872223854065, "learning_rate": 0.0001, "loss": 0.0107, "step": 90300 }, { "epoch": 594.1447368421053, "grad_norm": 0.8488720059394836, "learning_rate": 0.0001, "loss": 0.0098, "step": 90310 }, { "epoch": 594.2105263157895, "grad_norm": 0.9892115592956543, "learning_rate": 0.0001, "loss": 0.0106, "step": 90320 }, { "epoch": 594.2763157894736, "grad_norm": 1.2533457279205322, "learning_rate": 0.0001, "loss": 0.0109, "step": 90330 }, { "epoch": 594.3421052631579, "grad_norm": 1.1002429723739624, "learning_rate": 0.0001, "loss": 0.0104, "step": 90340 }, { "epoch": 594.4078947368421, "grad_norm": 1.0091798305511475, "learning_rate": 0.0001, "loss": 0.0099, "step": 90350 }, { "epoch": 594.4736842105264, "grad_norm": 1.074102520942688, "learning_rate": 0.0001, "loss": 0.0094, "step": 90360 }, { "epoch": 594.5394736842105, "grad_norm": 0.9539269804954529, "learning_rate": 0.0001, "loss": 0.0126, "step": 90370 }, { "epoch": 594.6052631578947, "grad_norm": 1.0919673442840576, "learning_rate": 0.0001, "loss": 0.0104, "step": 90380 }, { "epoch": 594.671052631579, "grad_norm": 1.65645170211792, "learning_rate": 0.0001, "loss": 0.0114, "step": 90390 }, { "epoch": 594.7368421052631, "grad_norm": 1.5577173233032227, "learning_rate": 0.0001, "loss": 0.0123, "step": 90400 }, { "epoch": 594.8026315789474, "grad_norm": 0.9961990714073181, "learning_rate": 0.0001, "loss": 0.011, "step": 90410 }, { "epoch": 594.8684210526316, "grad_norm": 1.1721851825714111, "learning_rate": 0.0001, "loss": 0.0102, "step": 90420 }, { "epoch": 594.9342105263158, "grad_norm": 1.203532099723816, "learning_rate": 0.0001, "loss": 0.0106, "step": 90430 }, { "epoch": 595.0, "grad_norm": 1.3049278259277344, "learning_rate": 0.0001, "loss": 0.01, "step": 90440 }, { "epoch": 595.0657894736842, "grad_norm": 1.4618241786956787, "learning_rate": 0.0001, "loss": 0.0113, "step": 90450 }, { "epoch": 595.1315789473684, "grad_norm": 1.3204786777496338, "learning_rate": 0.0001, "loss": 0.01, "step": 90460 }, { "epoch": 595.1973684210526, "grad_norm": 1.2331825494766235, "learning_rate": 0.0001, "loss": 0.0099, "step": 90470 }, { "epoch": 595.2631578947369, "grad_norm": 1.2492101192474365, "learning_rate": 0.0001, "loss": 0.01, "step": 90480 }, { "epoch": 595.328947368421, "grad_norm": 1.1810252666473389, "learning_rate": 0.0001, "loss": 0.0132, "step": 90490 }, { "epoch": 595.3947368421053, "grad_norm": 1.0970319509506226, "learning_rate": 0.0001, "loss": 0.0098, "step": 90500 }, { "epoch": 595.4605263157895, "grad_norm": 1.2086048126220703, "learning_rate": 0.0001, "loss": 0.0098, "step": 90510 }, { "epoch": 595.5263157894736, "grad_norm": 0.9637195467948914, "learning_rate": 0.0001, "loss": 0.012, "step": 90520 }, { "epoch": 595.5921052631579, "grad_norm": 1.4612727165222168, "learning_rate": 0.0001, "loss": 0.0103, "step": 90530 }, { "epoch": 595.6578947368421, "grad_norm": 1.3866544961929321, "learning_rate": 0.0001, "loss": 0.0091, "step": 90540 }, { "epoch": 595.7236842105264, "grad_norm": 1.3224684000015259, "learning_rate": 0.0001, "loss": 0.0091, "step": 90550 }, { "epoch": 595.7894736842105, "grad_norm": 1.1373405456542969, "learning_rate": 0.0001, "loss": 0.0095, "step": 90560 }, { "epoch": 595.8552631578947, "grad_norm": 0.909879207611084, "learning_rate": 0.0001, "loss": 0.0108, "step": 90570 }, { "epoch": 595.921052631579, "grad_norm": 0.965241551399231, "learning_rate": 0.0001, "loss": 0.0099, "step": 90580 }, { "epoch": 595.9868421052631, "grad_norm": 1.0295147895812988, "learning_rate": 0.0001, "loss": 0.0113, "step": 90590 }, { "epoch": 596.0526315789474, "grad_norm": 1.1842830181121826, "learning_rate": 0.0001, "loss": 0.0109, "step": 90600 }, { "epoch": 596.1184210526316, "grad_norm": 1.119468331336975, "learning_rate": 0.0001, "loss": 0.012, "step": 90610 }, { "epoch": 596.1842105263158, "grad_norm": 1.0015408992767334, "learning_rate": 0.0001, "loss": 0.0107, "step": 90620 }, { "epoch": 596.25, "grad_norm": 1.0252635478973389, "learning_rate": 0.0001, "loss": 0.0105, "step": 90630 }, { "epoch": 596.3157894736842, "grad_norm": 1.0679528713226318, "learning_rate": 0.0001, "loss": 0.0094, "step": 90640 }, { "epoch": 596.3815789473684, "grad_norm": 1.1197189092636108, "learning_rate": 0.0001, "loss": 0.0105, "step": 90650 }, { "epoch": 596.4473684210526, "grad_norm": 1.1711959838867188, "learning_rate": 0.0001, "loss": 0.0103, "step": 90660 }, { "epoch": 596.5131578947369, "grad_norm": 0.8267062306404114, "learning_rate": 0.0001, "loss": 0.0108, "step": 90670 }, { "epoch": 596.578947368421, "grad_norm": 0.9089040756225586, "learning_rate": 0.0001, "loss": 0.0112, "step": 90680 }, { "epoch": 596.6447368421053, "grad_norm": 1.1336896419525146, "learning_rate": 0.0001, "loss": 0.0108, "step": 90690 }, { "epoch": 596.7105263157895, "grad_norm": 1.5351234674453735, "learning_rate": 0.0001, "loss": 0.0102, "step": 90700 }, { "epoch": 596.7763157894736, "grad_norm": 1.4574313163757324, "learning_rate": 0.0001, "loss": 0.0102, "step": 90710 }, { "epoch": 596.8421052631579, "grad_norm": 1.3572088479995728, "learning_rate": 0.0001, "loss": 0.0113, "step": 90720 }, { "epoch": 596.9078947368421, "grad_norm": 1.556077480316162, "learning_rate": 0.0001, "loss": 0.0127, "step": 90730 }, { "epoch": 596.9736842105264, "grad_norm": 1.4981848001480103, "learning_rate": 0.0001, "loss": 0.0099, "step": 90740 }, { "epoch": 597.0394736842105, "grad_norm": 1.3176113367080688, "learning_rate": 0.0001, "loss": 0.0093, "step": 90750 }, { "epoch": 597.1052631578947, "grad_norm": 1.1030333042144775, "learning_rate": 0.0001, "loss": 0.0118, "step": 90760 }, { "epoch": 597.171052631579, "grad_norm": 1.3109503984451294, "learning_rate": 0.0001, "loss": 0.0107, "step": 90770 }, { "epoch": 597.2368421052631, "grad_norm": 1.4163578748703003, "learning_rate": 0.0001, "loss": 0.0105, "step": 90780 }, { "epoch": 597.3026315789474, "grad_norm": 1.3910415172576904, "learning_rate": 0.0001, "loss": 0.0095, "step": 90790 }, { "epoch": 597.3684210526316, "grad_norm": 1.3340750932693481, "learning_rate": 0.0001, "loss": 0.0108, "step": 90800 }, { "epoch": 597.4342105263158, "grad_norm": 0.9749845862388611, "learning_rate": 0.0001, "loss": 0.0107, "step": 90810 }, { "epoch": 597.5, "grad_norm": 0.9810044169425964, "learning_rate": 0.0001, "loss": 0.0096, "step": 90820 }, { "epoch": 597.5657894736842, "grad_norm": 1.392261266708374, "learning_rate": 0.0001, "loss": 0.0086, "step": 90830 }, { "epoch": 597.6315789473684, "grad_norm": 1.2727516889572144, "learning_rate": 0.0001, "loss": 0.0119, "step": 90840 }, { "epoch": 597.6973684210526, "grad_norm": 1.2531763315200806, "learning_rate": 0.0001, "loss": 0.0106, "step": 90850 }, { "epoch": 597.7631578947369, "grad_norm": 1.1090060472488403, "learning_rate": 0.0001, "loss": 0.0107, "step": 90860 }, { "epoch": 597.828947368421, "grad_norm": 1.283146619796753, "learning_rate": 0.0001, "loss": 0.0109, "step": 90870 }, { "epoch": 597.8947368421053, "grad_norm": 0.8939560055732727, "learning_rate": 0.0001, "loss": 0.0115, "step": 90880 }, { "epoch": 597.9605263157895, "grad_norm": 0.7372931838035583, "learning_rate": 0.0001, "loss": 0.0099, "step": 90890 }, { "epoch": 598.0263157894736, "grad_norm": 0.992813229560852, "learning_rate": 0.0001, "loss": 0.0102, "step": 90900 }, { "epoch": 598.0921052631579, "grad_norm": 1.0757131576538086, "learning_rate": 0.0001, "loss": 0.0101, "step": 90910 }, { "epoch": 598.1578947368421, "grad_norm": 1.3756095170974731, "learning_rate": 0.0001, "loss": 0.0125, "step": 90920 }, { "epoch": 598.2236842105264, "grad_norm": 1.1609660387039185, "learning_rate": 0.0001, "loss": 0.0091, "step": 90930 }, { "epoch": 598.2894736842105, "grad_norm": 1.197822093963623, "learning_rate": 0.0001, "loss": 0.0112, "step": 90940 }, { "epoch": 598.3552631578947, "grad_norm": 1.0572071075439453, "learning_rate": 0.0001, "loss": 0.0092, "step": 90950 }, { "epoch": 598.421052631579, "grad_norm": 1.026586890220642, "learning_rate": 0.0001, "loss": 0.0119, "step": 90960 }, { "epoch": 598.4868421052631, "grad_norm": 1.0258917808532715, "learning_rate": 0.0001, "loss": 0.0107, "step": 90970 }, { "epoch": 598.5526315789474, "grad_norm": 1.1755049228668213, "learning_rate": 0.0001, "loss": 0.0103, "step": 90980 }, { "epoch": 598.6184210526316, "grad_norm": 1.2228240966796875, "learning_rate": 0.0001, "loss": 0.0115, "step": 90990 }, { "epoch": 598.6842105263158, "grad_norm": 1.14155912399292, "learning_rate": 0.0001, "loss": 0.0101, "step": 91000 }, { "epoch": 598.75, "grad_norm": 1.355281114578247, "learning_rate": 0.0001, "loss": 0.0111, "step": 91010 }, { "epoch": 598.8157894736842, "grad_norm": 0.7199444770812988, "learning_rate": 0.0001, "loss": 0.0124, "step": 91020 }, { "epoch": 598.8815789473684, "grad_norm": 0.8066603541374207, "learning_rate": 0.0001, "loss": 0.0103, "step": 91030 }, { "epoch": 598.9473684210526, "grad_norm": 0.8587542772293091, "learning_rate": 0.0001, "loss": 0.0108, "step": 91040 }, { "epoch": 599.0131578947369, "grad_norm": 0.8503077030181885, "learning_rate": 0.0001, "loss": 0.0098, "step": 91050 }, { "epoch": 599.078947368421, "grad_norm": 0.5482606887817383, "learning_rate": 0.0001, "loss": 0.0105, "step": 91060 }, { "epoch": 599.1447368421053, "grad_norm": 1.3587243556976318, "learning_rate": 0.0001, "loss": 0.0117, "step": 91070 }, { "epoch": 599.2105263157895, "grad_norm": 1.3960087299346924, "learning_rate": 0.0001, "loss": 0.0092, "step": 91080 }, { "epoch": 599.2763157894736, "grad_norm": 1.0723680257797241, "learning_rate": 0.0001, "loss": 0.0112, "step": 91090 }, { "epoch": 599.3421052631579, "grad_norm": 0.911795437335968, "learning_rate": 0.0001, "loss": 0.0111, "step": 91100 }, { "epoch": 599.4078947368421, "grad_norm": 1.3153270483016968, "learning_rate": 0.0001, "loss": 0.0126, "step": 91110 }, { "epoch": 599.4736842105264, "grad_norm": 1.3088375329971313, "learning_rate": 0.0001, "loss": 0.0128, "step": 91120 }, { "epoch": 599.5394736842105, "grad_norm": 1.0322259664535522, "learning_rate": 0.0001, "loss": 0.012, "step": 91130 }, { "epoch": 599.6052631578947, "grad_norm": 0.8440555334091187, "learning_rate": 0.0001, "loss": 0.0102, "step": 91140 }, { "epoch": 599.671052631579, "grad_norm": 1.4241567850112915, "learning_rate": 0.0001, "loss": 0.012, "step": 91150 }, { "epoch": 599.7368421052631, "grad_norm": 1.298012375831604, "learning_rate": 0.0001, "loss": 0.0096, "step": 91160 }, { "epoch": 599.8026315789474, "grad_norm": 2.0893771648406982, "learning_rate": 0.0001, "loss": 0.0108, "step": 91170 }, { "epoch": 599.8684210526316, "grad_norm": 1.2253968715667725, "learning_rate": 0.0001, "loss": 0.0112, "step": 91180 }, { "epoch": 599.9342105263158, "grad_norm": 1.2711297273635864, "learning_rate": 0.0001, "loss": 0.0106, "step": 91190 }, { "epoch": 600.0, "grad_norm": 0.9054577350616455, "learning_rate": 0.0001, "loss": 0.0095, "step": 91200 }, { "epoch": 600.0657894736842, "grad_norm": 0.9020711779594421, "learning_rate": 0.0001, "loss": 0.0105, "step": 91210 }, { "epoch": 600.1315789473684, "grad_norm": 1.009748935699463, "learning_rate": 0.0001, "loss": 0.0112, "step": 91220 }, { "epoch": 600.1973684210526, "grad_norm": 1.466187834739685, "learning_rate": 0.0001, "loss": 0.009, "step": 91230 }, { "epoch": 600.2631578947369, "grad_norm": 1.3021607398986816, "learning_rate": 0.0001, "loss": 0.012, "step": 91240 }, { "epoch": 600.328947368421, "grad_norm": 1.0254582166671753, "learning_rate": 0.0001, "loss": 0.0107, "step": 91250 }, { "epoch": 600.3947368421053, "grad_norm": 0.8893698453903198, "learning_rate": 0.0001, "loss": 0.0089, "step": 91260 }, { "epoch": 600.4605263157895, "grad_norm": 1.190147042274475, "learning_rate": 0.0001, "loss": 0.0095, "step": 91270 }, { "epoch": 600.5263157894736, "grad_norm": 1.1104352474212646, "learning_rate": 0.0001, "loss": 0.0112, "step": 91280 }, { "epoch": 600.5921052631579, "grad_norm": 1.2664473056793213, "learning_rate": 0.0001, "loss": 0.0111, "step": 91290 }, { "epoch": 600.6578947368421, "grad_norm": 1.136460781097412, "learning_rate": 0.0001, "loss": 0.0109, "step": 91300 }, { "epoch": 600.7236842105264, "grad_norm": 1.4673341512680054, "learning_rate": 0.0001, "loss": 0.0118, "step": 91310 }, { "epoch": 600.7894736842105, "grad_norm": 1.533826231956482, "learning_rate": 0.0001, "loss": 0.0124, "step": 91320 }, { "epoch": 600.8552631578947, "grad_norm": 1.178153157234192, "learning_rate": 0.0001, "loss": 0.0103, "step": 91330 }, { "epoch": 600.921052631579, "grad_norm": 1.365976095199585, "learning_rate": 0.0001, "loss": 0.0129, "step": 91340 }, { "epoch": 600.9868421052631, "grad_norm": 1.1566143035888672, "learning_rate": 0.0001, "loss": 0.0097, "step": 91350 }, { "epoch": 601.0526315789474, "grad_norm": 1.1433640718460083, "learning_rate": 0.0001, "loss": 0.0123, "step": 91360 }, { "epoch": 601.1184210526316, "grad_norm": 0.9018469452857971, "learning_rate": 0.0001, "loss": 0.011, "step": 91370 }, { "epoch": 601.1842105263158, "grad_norm": 1.1393685340881348, "learning_rate": 0.0001, "loss": 0.0106, "step": 91380 }, { "epoch": 601.25, "grad_norm": 1.1455858945846558, "learning_rate": 0.0001, "loss": 0.0126, "step": 91390 }, { "epoch": 601.3157894736842, "grad_norm": 1.4033070802688599, "learning_rate": 0.0001, "loss": 0.0121, "step": 91400 }, { "epoch": 601.3815789473684, "grad_norm": 1.4642856121063232, "learning_rate": 0.0001, "loss": 0.0112, "step": 91410 }, { "epoch": 601.4473684210526, "grad_norm": 1.0535764694213867, "learning_rate": 0.0001, "loss": 0.0101, "step": 91420 }, { "epoch": 601.5131578947369, "grad_norm": 1.0785901546478271, "learning_rate": 0.0001, "loss": 0.0096, "step": 91430 }, { "epoch": 601.578947368421, "grad_norm": 1.2098617553710938, "learning_rate": 0.0001, "loss": 0.011, "step": 91440 }, { "epoch": 601.6447368421053, "grad_norm": 1.4227787256240845, "learning_rate": 0.0001, "loss": 0.0117, "step": 91450 }, { "epoch": 601.7105263157895, "grad_norm": 1.245746374130249, "learning_rate": 0.0001, "loss": 0.0112, "step": 91460 }, { "epoch": 601.7763157894736, "grad_norm": 1.1183278560638428, "learning_rate": 0.0001, "loss": 0.0098, "step": 91470 }, { "epoch": 601.8421052631579, "grad_norm": 1.2257438898086548, "learning_rate": 0.0001, "loss": 0.0107, "step": 91480 }, { "epoch": 601.9078947368421, "grad_norm": 0.8656227588653564, "learning_rate": 0.0001, "loss": 0.01, "step": 91490 }, { "epoch": 601.9736842105264, "grad_norm": 0.9605566263198853, "learning_rate": 0.0001, "loss": 0.0115, "step": 91500 }, { "epoch": 602.0394736842105, "grad_norm": 1.148573637008667, "learning_rate": 0.0001, "loss": 0.0099, "step": 91510 }, { "epoch": 602.1052631578947, "grad_norm": 1.1873161792755127, "learning_rate": 0.0001, "loss": 0.0114, "step": 91520 }, { "epoch": 602.171052631579, "grad_norm": 1.2313774824142456, "learning_rate": 0.0001, "loss": 0.0102, "step": 91530 }, { "epoch": 602.2368421052631, "grad_norm": 0.932478666305542, "learning_rate": 0.0001, "loss": 0.0112, "step": 91540 }, { "epoch": 602.3026315789474, "grad_norm": 1.0965076684951782, "learning_rate": 0.0001, "loss": 0.0124, "step": 91550 }, { "epoch": 602.3684210526316, "grad_norm": 1.0548715591430664, "learning_rate": 0.0001, "loss": 0.0113, "step": 91560 }, { "epoch": 602.4342105263158, "grad_norm": 0.6713628172874451, "learning_rate": 0.0001, "loss": 0.011, "step": 91570 }, { "epoch": 602.5, "grad_norm": 1.1099224090576172, "learning_rate": 0.0001, "loss": 0.0118, "step": 91580 }, { "epoch": 602.5657894736842, "grad_norm": 1.04884672164917, "learning_rate": 0.0001, "loss": 0.0097, "step": 91590 }, { "epoch": 602.6315789473684, "grad_norm": 0.7628430128097534, "learning_rate": 0.0001, "loss": 0.0107, "step": 91600 }, { "epoch": 602.6973684210526, "grad_norm": 0.8330525755882263, "learning_rate": 0.0001, "loss": 0.0098, "step": 91610 }, { "epoch": 602.7631578947369, "grad_norm": 1.0476446151733398, "learning_rate": 0.0001, "loss": 0.0121, "step": 91620 }, { "epoch": 602.828947368421, "grad_norm": 0.9488280415534973, "learning_rate": 0.0001, "loss": 0.0108, "step": 91630 }, { "epoch": 602.8947368421053, "grad_norm": 1.0100157260894775, "learning_rate": 0.0001, "loss": 0.0122, "step": 91640 }, { "epoch": 602.9605263157895, "grad_norm": 1.3274532556533813, "learning_rate": 0.0001, "loss": 0.0092, "step": 91650 }, { "epoch": 603.0263157894736, "grad_norm": 1.0656237602233887, "learning_rate": 0.0001, "loss": 0.0118, "step": 91660 }, { "epoch": 603.0921052631579, "grad_norm": 1.239275336265564, "learning_rate": 0.0001, "loss": 0.0105, "step": 91670 }, { "epoch": 603.1578947368421, "grad_norm": 1.4684178829193115, "learning_rate": 0.0001, "loss": 0.0109, "step": 91680 }, { "epoch": 603.2236842105264, "grad_norm": 1.0184123516082764, "learning_rate": 0.0001, "loss": 0.0106, "step": 91690 }, { "epoch": 603.2894736842105, "grad_norm": 0.9157920479774475, "learning_rate": 0.0001, "loss": 0.012, "step": 91700 }, { "epoch": 603.3552631578947, "grad_norm": 1.0523053407669067, "learning_rate": 0.0001, "loss": 0.0106, "step": 91710 }, { "epoch": 603.421052631579, "grad_norm": 1.4203752279281616, "learning_rate": 0.0001, "loss": 0.0113, "step": 91720 }, { "epoch": 603.4868421052631, "grad_norm": 1.1770908832550049, "learning_rate": 0.0001, "loss": 0.0108, "step": 91730 }, { "epoch": 603.5526315789474, "grad_norm": 1.181809425354004, "learning_rate": 0.0001, "loss": 0.0125, "step": 91740 }, { "epoch": 603.6184210526316, "grad_norm": 1.3628486394882202, "learning_rate": 0.0001, "loss": 0.0099, "step": 91750 }, { "epoch": 603.6842105263158, "grad_norm": 1.0041279792785645, "learning_rate": 0.0001, "loss": 0.0111, "step": 91760 }, { "epoch": 603.75, "grad_norm": 1.1971017122268677, "learning_rate": 0.0001, "loss": 0.0108, "step": 91770 }, { "epoch": 603.8157894736842, "grad_norm": 0.9669057726860046, "learning_rate": 0.0001, "loss": 0.0097, "step": 91780 }, { "epoch": 603.8815789473684, "grad_norm": 1.1059627532958984, "learning_rate": 0.0001, "loss": 0.012, "step": 91790 }, { "epoch": 603.9473684210526, "grad_norm": 1.0041719675064087, "learning_rate": 0.0001, "loss": 0.0116, "step": 91800 }, { "epoch": 604.0131578947369, "grad_norm": 0.8442330360412598, "learning_rate": 0.0001, "loss": 0.0118, "step": 91810 }, { "epoch": 604.078947368421, "grad_norm": 0.9047984480857849, "learning_rate": 0.0001, "loss": 0.0135, "step": 91820 }, { "epoch": 604.1447368421053, "grad_norm": 1.000754714012146, "learning_rate": 0.0001, "loss": 0.0108, "step": 91830 }, { "epoch": 604.2105263157895, "grad_norm": 1.2270958423614502, "learning_rate": 0.0001, "loss": 0.0098, "step": 91840 }, { "epoch": 604.2763157894736, "grad_norm": 0.8679501414299011, "learning_rate": 0.0001, "loss": 0.0107, "step": 91850 }, { "epoch": 604.3421052631579, "grad_norm": 0.7244850397109985, "learning_rate": 0.0001, "loss": 0.0102, "step": 91860 }, { "epoch": 604.4078947368421, "grad_norm": 0.9245374202728271, "learning_rate": 0.0001, "loss": 0.0092, "step": 91870 }, { "epoch": 604.4736842105264, "grad_norm": 0.977651834487915, "learning_rate": 0.0001, "loss": 0.0125, "step": 91880 }, { "epoch": 604.5394736842105, "grad_norm": 0.9714836478233337, "learning_rate": 0.0001, "loss": 0.0109, "step": 91890 }, { "epoch": 604.6052631578947, "grad_norm": 1.11795973777771, "learning_rate": 0.0001, "loss": 0.0113, "step": 91900 }, { "epoch": 604.671052631579, "grad_norm": 1.1345611810684204, "learning_rate": 0.0001, "loss": 0.0112, "step": 91910 }, { "epoch": 604.7368421052631, "grad_norm": 1.1849992275238037, "learning_rate": 0.0001, "loss": 0.0116, "step": 91920 }, { "epoch": 604.8026315789474, "grad_norm": 1.2668788433074951, "learning_rate": 0.0001, "loss": 0.0118, "step": 91930 }, { "epoch": 604.8684210526316, "grad_norm": 1.0477993488311768, "learning_rate": 0.0001, "loss": 0.0103, "step": 91940 }, { "epoch": 604.9342105263158, "grad_norm": 1.0195016860961914, "learning_rate": 0.0001, "loss": 0.0105, "step": 91950 }, { "epoch": 605.0, "grad_norm": 0.9753181338310242, "learning_rate": 0.0001, "loss": 0.0092, "step": 91960 }, { "epoch": 605.0657894736842, "grad_norm": 0.8940220475196838, "learning_rate": 0.0001, "loss": 0.0106, "step": 91970 }, { "epoch": 605.1315789473684, "grad_norm": 0.779227077960968, "learning_rate": 0.0001, "loss": 0.0114, "step": 91980 }, { "epoch": 605.1973684210526, "grad_norm": 1.1637389659881592, "learning_rate": 0.0001, "loss": 0.0099, "step": 91990 }, { "epoch": 605.2631578947369, "grad_norm": 1.1492772102355957, "learning_rate": 0.0001, "loss": 0.0102, "step": 92000 }, { "epoch": 605.328947368421, "grad_norm": 1.265363097190857, "learning_rate": 0.0001, "loss": 0.0099, "step": 92010 }, { "epoch": 605.3947368421053, "grad_norm": 1.1617438793182373, "learning_rate": 0.0001, "loss": 0.0115, "step": 92020 }, { "epoch": 605.4605263157895, "grad_norm": 1.0030673742294312, "learning_rate": 0.0001, "loss": 0.0111, "step": 92030 }, { "epoch": 605.5263157894736, "grad_norm": 1.7177019119262695, "learning_rate": 0.0001, "loss": 0.0102, "step": 92040 }, { "epoch": 605.5921052631579, "grad_norm": 1.1032168865203857, "learning_rate": 0.0001, "loss": 0.0109, "step": 92050 }, { "epoch": 605.6578947368421, "grad_norm": 1.0121941566467285, "learning_rate": 0.0001, "loss": 0.0108, "step": 92060 }, { "epoch": 605.7236842105264, "grad_norm": 1.0975619554519653, "learning_rate": 0.0001, "loss": 0.0122, "step": 92070 }, { "epoch": 605.7894736842105, "grad_norm": 0.9619658589363098, "learning_rate": 0.0001, "loss": 0.0114, "step": 92080 }, { "epoch": 605.8552631578947, "grad_norm": 0.8327913284301758, "learning_rate": 0.0001, "loss": 0.0115, "step": 92090 }, { "epoch": 605.921052631579, "grad_norm": 1.230811357498169, "learning_rate": 0.0001, "loss": 0.0108, "step": 92100 }, { "epoch": 605.9868421052631, "grad_norm": 1.1441431045532227, "learning_rate": 0.0001, "loss": 0.0126, "step": 92110 }, { "epoch": 606.0526315789474, "grad_norm": 0.8227072954177856, "learning_rate": 0.0001, "loss": 0.0134, "step": 92120 }, { "epoch": 606.1184210526316, "grad_norm": 0.9248983860015869, "learning_rate": 0.0001, "loss": 0.0109, "step": 92130 }, { "epoch": 606.1842105263158, "grad_norm": 1.613922119140625, "learning_rate": 0.0001, "loss": 0.0116, "step": 92140 }, { "epoch": 606.25, "grad_norm": 1.2741233110427856, "learning_rate": 0.0001, "loss": 0.0106, "step": 92150 }, { "epoch": 606.3157894736842, "grad_norm": 1.0718578100204468, "learning_rate": 0.0001, "loss": 0.0126, "step": 92160 }, { "epoch": 606.3815789473684, "grad_norm": 1.7179508209228516, "learning_rate": 0.0001, "loss": 0.0109, "step": 92170 }, { "epoch": 606.4473684210526, "grad_norm": 1.6176236867904663, "learning_rate": 0.0001, "loss": 0.0111, "step": 92180 }, { "epoch": 606.5131578947369, "grad_norm": 1.308016300201416, "learning_rate": 0.0001, "loss": 0.0116, "step": 92190 }, { "epoch": 606.578947368421, "grad_norm": 1.2682429552078247, "learning_rate": 0.0001, "loss": 0.0104, "step": 92200 }, { "epoch": 606.6447368421053, "grad_norm": 1.2215003967285156, "learning_rate": 0.0001, "loss": 0.0111, "step": 92210 }, { "epoch": 606.7105263157895, "grad_norm": 1.3663808107376099, "learning_rate": 0.0001, "loss": 0.0122, "step": 92220 }, { "epoch": 606.7763157894736, "grad_norm": 0.8408277630805969, "learning_rate": 0.0001, "loss": 0.0093, "step": 92230 }, { "epoch": 606.8421052631579, "grad_norm": 0.7498778104782104, "learning_rate": 0.0001, "loss": 0.0088, "step": 92240 }, { "epoch": 606.9078947368421, "grad_norm": 1.0909346342086792, "learning_rate": 0.0001, "loss": 0.0105, "step": 92250 }, { "epoch": 606.9736842105264, "grad_norm": 0.9541800618171692, "learning_rate": 0.0001, "loss": 0.0102, "step": 92260 }, { "epoch": 607.0394736842105, "grad_norm": 1.0058972835540771, "learning_rate": 0.0001, "loss": 0.0118, "step": 92270 }, { "epoch": 607.1052631578947, "grad_norm": 0.4746664762496948, "learning_rate": 0.0001, "loss": 0.0123, "step": 92280 }, { "epoch": 607.171052631579, "grad_norm": 0.893031656742096, "learning_rate": 0.0001, "loss": 0.0092, "step": 92290 }, { "epoch": 607.2368421052631, "grad_norm": 1.1034398078918457, "learning_rate": 0.0001, "loss": 0.0103, "step": 92300 }, { "epoch": 607.3026315789474, "grad_norm": 1.0481274127960205, "learning_rate": 0.0001, "loss": 0.0101, "step": 92310 }, { "epoch": 607.3684210526316, "grad_norm": 1.0464096069335938, "learning_rate": 0.0001, "loss": 0.0121, "step": 92320 }, { "epoch": 607.4342105263158, "grad_norm": 1.2462482452392578, "learning_rate": 0.0001, "loss": 0.0095, "step": 92330 }, { "epoch": 607.5, "grad_norm": 1.3764554262161255, "learning_rate": 0.0001, "loss": 0.0123, "step": 92340 }, { "epoch": 607.5657894736842, "grad_norm": 1.0368061065673828, "learning_rate": 0.0001, "loss": 0.0121, "step": 92350 }, { "epoch": 607.6315789473684, "grad_norm": 0.9703469276428223, "learning_rate": 0.0001, "loss": 0.0099, "step": 92360 }, { "epoch": 607.6973684210526, "grad_norm": 1.2898932695388794, "learning_rate": 0.0001, "loss": 0.0099, "step": 92370 }, { "epoch": 607.7631578947369, "grad_norm": 0.900486946105957, "learning_rate": 0.0001, "loss": 0.0111, "step": 92380 }, { "epoch": 607.828947368421, "grad_norm": 1.2100414037704468, "learning_rate": 0.0001, "loss": 0.0119, "step": 92390 }, { "epoch": 607.8947368421053, "grad_norm": 1.1729727983474731, "learning_rate": 0.0001, "loss": 0.0112, "step": 92400 }, { "epoch": 607.9605263157895, "grad_norm": 1.127228856086731, "learning_rate": 0.0001, "loss": 0.0113, "step": 92410 }, { "epoch": 608.0263157894736, "grad_norm": 0.8651881217956543, "learning_rate": 0.0001, "loss": 0.0125, "step": 92420 }, { "epoch": 608.0921052631579, "grad_norm": 0.8712637424468994, "learning_rate": 0.0001, "loss": 0.0093, "step": 92430 }, { "epoch": 608.1578947368421, "grad_norm": 1.1293796300888062, "learning_rate": 0.0001, "loss": 0.0101, "step": 92440 }, { "epoch": 608.2236842105264, "grad_norm": 1.2297700643539429, "learning_rate": 0.0001, "loss": 0.0123, "step": 92450 }, { "epoch": 608.2894736842105, "grad_norm": 1.038426399230957, "learning_rate": 0.0001, "loss": 0.012, "step": 92460 }, { "epoch": 608.3552631578947, "grad_norm": 1.3792036771774292, "learning_rate": 0.0001, "loss": 0.0108, "step": 92470 }, { "epoch": 608.421052631579, "grad_norm": 1.0525060892105103, "learning_rate": 0.0001, "loss": 0.0107, "step": 92480 }, { "epoch": 608.4868421052631, "grad_norm": 1.0307869911193848, "learning_rate": 0.0001, "loss": 0.0095, "step": 92490 }, { "epoch": 608.5526315789474, "grad_norm": 0.7510896325111389, "learning_rate": 0.0001, "loss": 0.0128, "step": 92500 }, { "epoch": 608.6184210526316, "grad_norm": 0.973735511302948, "learning_rate": 0.0001, "loss": 0.0115, "step": 92510 }, { "epoch": 608.6842105263158, "grad_norm": 1.3305103778839111, "learning_rate": 0.0001, "loss": 0.0118, "step": 92520 }, { "epoch": 608.75, "grad_norm": 0.8219487071037292, "learning_rate": 0.0001, "loss": 0.0107, "step": 92530 }, { "epoch": 608.8157894736842, "grad_norm": 0.836087703704834, "learning_rate": 0.0001, "loss": 0.0105, "step": 92540 }, { "epoch": 608.8815789473684, "grad_norm": 1.397853970527649, "learning_rate": 0.0001, "loss": 0.0103, "step": 92550 }, { "epoch": 608.9473684210526, "grad_norm": 1.1062546968460083, "learning_rate": 0.0001, "loss": 0.0107, "step": 92560 }, { "epoch": 609.0131578947369, "grad_norm": 0.8973669409751892, "learning_rate": 0.0001, "loss": 0.0117, "step": 92570 }, { "epoch": 609.078947368421, "grad_norm": 1.047875165939331, "learning_rate": 0.0001, "loss": 0.0115, "step": 92580 }, { "epoch": 609.1447368421053, "grad_norm": 1.2423298358917236, "learning_rate": 0.0001, "loss": 0.0099, "step": 92590 }, { "epoch": 609.2105263157895, "grad_norm": 0.6068233251571655, "learning_rate": 0.0001, "loss": 0.0119, "step": 92600 }, { "epoch": 609.2763157894736, "grad_norm": 1.0823287963867188, "learning_rate": 0.0001, "loss": 0.0101, "step": 92610 }, { "epoch": 609.3421052631579, "grad_norm": 0.9071270227432251, "learning_rate": 0.0001, "loss": 0.0111, "step": 92620 }, { "epoch": 609.4078947368421, "grad_norm": 0.6887641549110413, "learning_rate": 0.0001, "loss": 0.0096, "step": 92630 }, { "epoch": 609.4736842105264, "grad_norm": 1.3606575727462769, "learning_rate": 0.0001, "loss": 0.0105, "step": 92640 }, { "epoch": 609.5394736842105, "grad_norm": 0.9844822287559509, "learning_rate": 0.0001, "loss": 0.01, "step": 92650 }, { "epoch": 609.6052631578947, "grad_norm": 1.0416951179504395, "learning_rate": 0.0001, "loss": 0.0098, "step": 92660 }, { "epoch": 609.671052631579, "grad_norm": 1.3930866718292236, "learning_rate": 0.0001, "loss": 0.0112, "step": 92670 }, { "epoch": 609.7368421052631, "grad_norm": 1.41727614402771, "learning_rate": 0.0001, "loss": 0.0108, "step": 92680 }, { "epoch": 609.8026315789474, "grad_norm": 1.0249476432800293, "learning_rate": 0.0001, "loss": 0.0128, "step": 92690 }, { "epoch": 609.8684210526316, "grad_norm": 1.2941230535507202, "learning_rate": 0.0001, "loss": 0.0097, "step": 92700 }, { "epoch": 609.9342105263158, "grad_norm": 1.1592159271240234, "learning_rate": 0.0001, "loss": 0.0133, "step": 92710 }, { "epoch": 610.0, "grad_norm": 1.3490889072418213, "learning_rate": 0.0001, "loss": 0.0101, "step": 92720 }, { "epoch": 610.0657894736842, "grad_norm": 1.1179836988449097, "learning_rate": 0.0001, "loss": 0.0138, "step": 92730 }, { "epoch": 610.1315789473684, "grad_norm": 1.387890100479126, "learning_rate": 0.0001, "loss": 0.0117, "step": 92740 }, { "epoch": 610.1973684210526, "grad_norm": 1.3158421516418457, "learning_rate": 0.0001, "loss": 0.0117, "step": 92750 }, { "epoch": 610.2631578947369, "grad_norm": 1.1624884605407715, "learning_rate": 0.0001, "loss": 0.011, "step": 92760 }, { "epoch": 610.328947368421, "grad_norm": 0.9471017718315125, "learning_rate": 0.0001, "loss": 0.0106, "step": 92770 }, { "epoch": 610.3947368421053, "grad_norm": 1.1989469528198242, "learning_rate": 0.0001, "loss": 0.0095, "step": 92780 }, { "epoch": 610.4605263157895, "grad_norm": 1.1440564393997192, "learning_rate": 0.0001, "loss": 0.0105, "step": 92790 }, { "epoch": 610.5263157894736, "grad_norm": 1.0892585515975952, "learning_rate": 0.0001, "loss": 0.0118, "step": 92800 }, { "epoch": 610.5921052631579, "grad_norm": 1.0231949090957642, "learning_rate": 0.0001, "loss": 0.0124, "step": 92810 }, { "epoch": 610.6578947368421, "grad_norm": 1.5775789022445679, "learning_rate": 0.0001, "loss": 0.0123, "step": 92820 }, { "epoch": 610.7236842105264, "grad_norm": 1.2640076875686646, "learning_rate": 0.0001, "loss": 0.0128, "step": 92830 }, { "epoch": 610.7894736842105, "grad_norm": 1.052940011024475, "learning_rate": 0.0001, "loss": 0.0138, "step": 92840 }, { "epoch": 610.8552631578947, "grad_norm": 1.000012993812561, "learning_rate": 0.0001, "loss": 0.0125, "step": 92850 }, { "epoch": 610.921052631579, "grad_norm": 1.0184035301208496, "learning_rate": 0.0001, "loss": 0.0135, "step": 92860 }, { "epoch": 610.9868421052631, "grad_norm": 1.1735719442367554, "learning_rate": 0.0001, "loss": 0.0119, "step": 92870 }, { "epoch": 611.0526315789474, "grad_norm": 1.5585322380065918, "learning_rate": 0.0001, "loss": 0.0148, "step": 92880 }, { "epoch": 611.1184210526316, "grad_norm": 1.2368836402893066, "learning_rate": 0.0001, "loss": 0.0109, "step": 92890 }, { "epoch": 611.1842105263158, "grad_norm": 0.9293583035469055, "learning_rate": 0.0001, "loss": 0.0129, "step": 92900 }, { "epoch": 611.25, "grad_norm": 1.3896769285202026, "learning_rate": 0.0001, "loss": 0.0126, "step": 92910 }, { "epoch": 611.3157894736842, "grad_norm": 1.0448602437973022, "learning_rate": 0.0001, "loss": 0.013, "step": 92920 }, { "epoch": 611.3815789473684, "grad_norm": 1.4385607242584229, "learning_rate": 0.0001, "loss": 0.0153, "step": 92930 }, { "epoch": 611.4473684210526, "grad_norm": 1.271680474281311, "learning_rate": 0.0001, "loss": 0.0107, "step": 92940 }, { "epoch": 611.5131578947369, "grad_norm": 1.3433717489242554, "learning_rate": 0.0001, "loss": 0.011, "step": 92950 }, { "epoch": 611.578947368421, "grad_norm": 1.157219409942627, "learning_rate": 0.0001, "loss": 0.0115, "step": 92960 }, { "epoch": 611.6447368421053, "grad_norm": 1.4146461486816406, "learning_rate": 0.0001, "loss": 0.0162, "step": 92970 }, { "epoch": 611.7105263157895, "grad_norm": 1.070226788520813, "learning_rate": 0.0001, "loss": 0.0123, "step": 92980 }, { "epoch": 611.7763157894736, "grad_norm": 1.0087385177612305, "learning_rate": 0.0001, "loss": 0.0135, "step": 92990 }, { "epoch": 611.8421052631579, "grad_norm": 1.1002082824707031, "learning_rate": 0.0001, "loss": 0.012, "step": 93000 }, { "epoch": 611.9078947368421, "grad_norm": 1.0607959032058716, "learning_rate": 0.0001, "loss": 0.0126, "step": 93010 }, { "epoch": 611.9736842105264, "grad_norm": 0.7643837928771973, "learning_rate": 0.0001, "loss": 0.0131, "step": 93020 }, { "epoch": 612.0394736842105, "grad_norm": 1.0315144062042236, "learning_rate": 0.0001, "loss": 0.012, "step": 93030 }, { "epoch": 612.1052631578947, "grad_norm": 1.1846758127212524, "learning_rate": 0.0001, "loss": 0.0134, "step": 93040 }, { "epoch": 612.171052631579, "grad_norm": 1.1276209354400635, "learning_rate": 0.0001, "loss": 0.0116, "step": 93050 }, { "epoch": 612.2368421052631, "grad_norm": 1.1060857772827148, "learning_rate": 0.0001, "loss": 0.0131, "step": 93060 }, { "epoch": 612.3026315789474, "grad_norm": 0.8714224696159363, "learning_rate": 0.0001, "loss": 0.0109, "step": 93070 }, { "epoch": 612.3684210526316, "grad_norm": 1.2322101593017578, "learning_rate": 0.0001, "loss": 0.0124, "step": 93080 }, { "epoch": 612.4342105263158, "grad_norm": 1.2291126251220703, "learning_rate": 0.0001, "loss": 0.0117, "step": 93090 }, { "epoch": 612.5, "grad_norm": 1.4525691270828247, "learning_rate": 0.0001, "loss": 0.0134, "step": 93100 }, { "epoch": 612.5657894736842, "grad_norm": 0.7913857698440552, "learning_rate": 0.0001, "loss": 0.0133, "step": 93110 }, { "epoch": 612.6315789473684, "grad_norm": 1.1557925939559937, "learning_rate": 0.0001, "loss": 0.0128, "step": 93120 }, { "epoch": 612.6973684210526, "grad_norm": 0.9045695066452026, "learning_rate": 0.0001, "loss": 0.0128, "step": 93130 }, { "epoch": 612.7631578947369, "grad_norm": 1.2888610363006592, "learning_rate": 0.0001, "loss": 0.0129, "step": 93140 }, { "epoch": 612.828947368421, "grad_norm": 1.0652800798416138, "learning_rate": 0.0001, "loss": 0.0116, "step": 93150 }, { "epoch": 612.8947368421053, "grad_norm": 1.246463656425476, "learning_rate": 0.0001, "loss": 0.0117, "step": 93160 }, { "epoch": 612.9605263157895, "grad_norm": 0.9784409403800964, "learning_rate": 0.0001, "loss": 0.0102, "step": 93170 }, { "epoch": 613.0263157894736, "grad_norm": 1.0943495035171509, "learning_rate": 0.0001, "loss": 0.0124, "step": 93180 }, { "epoch": 613.0921052631579, "grad_norm": 1.1042827367782593, "learning_rate": 0.0001, "loss": 0.0112, "step": 93190 }, { "epoch": 613.1578947368421, "grad_norm": 1.0616756677627563, "learning_rate": 0.0001, "loss": 0.0121, "step": 93200 }, { "epoch": 613.2236842105264, "grad_norm": 1.2859280109405518, "learning_rate": 0.0001, "loss": 0.0125, "step": 93210 }, { "epoch": 613.2894736842105, "grad_norm": 1.3570408821105957, "learning_rate": 0.0001, "loss": 0.0116, "step": 93220 }, { "epoch": 613.3552631578947, "grad_norm": 0.9719215035438538, "learning_rate": 0.0001, "loss": 0.0093, "step": 93230 }, { "epoch": 613.421052631579, "grad_norm": 1.1417118310928345, "learning_rate": 0.0001, "loss": 0.0123, "step": 93240 }, { "epoch": 613.4868421052631, "grad_norm": 1.616381287574768, "learning_rate": 0.0001, "loss": 0.0128, "step": 93250 }, { "epoch": 613.5526315789474, "grad_norm": 1.248619794845581, "learning_rate": 0.0001, "loss": 0.0114, "step": 93260 }, { "epoch": 613.6184210526316, "grad_norm": 1.056206226348877, "learning_rate": 0.0001, "loss": 0.0125, "step": 93270 }, { "epoch": 613.6842105263158, "grad_norm": 0.8425273299217224, "learning_rate": 0.0001, "loss": 0.0109, "step": 93280 }, { "epoch": 613.75, "grad_norm": 0.8737837076187134, "learning_rate": 0.0001, "loss": 0.0097, "step": 93290 }, { "epoch": 613.8157894736842, "grad_norm": 0.8249803185462952, "learning_rate": 0.0001, "loss": 0.0119, "step": 93300 }, { "epoch": 613.8815789473684, "grad_norm": 1.2743202447891235, "learning_rate": 0.0001, "loss": 0.0131, "step": 93310 }, { "epoch": 613.9473684210526, "grad_norm": 0.8276104927062988, "learning_rate": 0.0001, "loss": 0.0089, "step": 93320 }, { "epoch": 614.0131578947369, "grad_norm": 0.7873020768165588, "learning_rate": 0.0001, "loss": 0.011, "step": 93330 }, { "epoch": 614.078947368421, "grad_norm": 0.9627916812896729, "learning_rate": 0.0001, "loss": 0.0108, "step": 93340 }, { "epoch": 614.1447368421053, "grad_norm": 1.3203983306884766, "learning_rate": 0.0001, "loss": 0.0107, "step": 93350 }, { "epoch": 614.2105263157895, "grad_norm": 0.8902283906936646, "learning_rate": 0.0001, "loss": 0.0114, "step": 93360 }, { "epoch": 614.2763157894736, "grad_norm": 1.3011919260025024, "learning_rate": 0.0001, "loss": 0.0111, "step": 93370 }, { "epoch": 614.3421052631579, "grad_norm": 1.173582673072815, "learning_rate": 0.0001, "loss": 0.0095, "step": 93380 }, { "epoch": 614.4078947368421, "grad_norm": 0.8310754299163818, "learning_rate": 0.0001, "loss": 0.0123, "step": 93390 }, { "epoch": 614.4736842105264, "grad_norm": 1.0314079523086548, "learning_rate": 0.0001, "loss": 0.0123, "step": 93400 }, { "epoch": 614.5394736842105, "grad_norm": 0.9039306640625, "learning_rate": 0.0001, "loss": 0.0117, "step": 93410 }, { "epoch": 614.6052631578947, "grad_norm": 0.8027017116546631, "learning_rate": 0.0001, "loss": 0.0101, "step": 93420 }, { "epoch": 614.671052631579, "grad_norm": 0.8218366503715515, "learning_rate": 0.0001, "loss": 0.0123, "step": 93430 }, { "epoch": 614.7368421052631, "grad_norm": 0.8938267827033997, "learning_rate": 0.0001, "loss": 0.0091, "step": 93440 }, { "epoch": 614.8026315789474, "grad_norm": 0.6793553829193115, "learning_rate": 0.0001, "loss": 0.0106, "step": 93450 }, { "epoch": 614.8684210526316, "grad_norm": 1.1240465641021729, "learning_rate": 0.0001, "loss": 0.0112, "step": 93460 }, { "epoch": 614.9342105263158, "grad_norm": 1.2319480180740356, "learning_rate": 0.0001, "loss": 0.0104, "step": 93470 }, { "epoch": 615.0, "grad_norm": 1.1686429977416992, "learning_rate": 0.0001, "loss": 0.0116, "step": 93480 }, { "epoch": 615.0657894736842, "grad_norm": 1.3799974918365479, "learning_rate": 0.0001, "loss": 0.0106, "step": 93490 }, { "epoch": 615.1315789473684, "grad_norm": 0.9599565863609314, "learning_rate": 0.0001, "loss": 0.0116, "step": 93500 }, { "epoch": 615.1973684210526, "grad_norm": 0.8217589855194092, "learning_rate": 0.0001, "loss": 0.0116, "step": 93510 }, { "epoch": 615.2631578947369, "grad_norm": 1.090279221534729, "learning_rate": 0.0001, "loss": 0.0095, "step": 93520 }, { "epoch": 615.328947368421, "grad_norm": 0.7684426307678223, "learning_rate": 0.0001, "loss": 0.0099, "step": 93530 }, { "epoch": 615.3947368421053, "grad_norm": 1.3158767223358154, "learning_rate": 0.0001, "loss": 0.0099, "step": 93540 }, { "epoch": 615.4605263157895, "grad_norm": 1.1429448127746582, "learning_rate": 0.0001, "loss": 0.0115, "step": 93550 }, { "epoch": 615.5263157894736, "grad_norm": 1.2864338159561157, "learning_rate": 0.0001, "loss": 0.0126, "step": 93560 }, { "epoch": 615.5921052631579, "grad_norm": 1.0945682525634766, "learning_rate": 0.0001, "loss": 0.0115, "step": 93570 }, { "epoch": 615.6578947368421, "grad_norm": 0.966946542263031, "learning_rate": 0.0001, "loss": 0.0102, "step": 93580 }, { "epoch": 615.7236842105264, "grad_norm": 0.7336428761482239, "learning_rate": 0.0001, "loss": 0.0105, "step": 93590 }, { "epoch": 615.7894736842105, "grad_norm": 0.6933264136314392, "learning_rate": 0.0001, "loss": 0.0124, "step": 93600 }, { "epoch": 615.8552631578947, "grad_norm": 1.118443489074707, "learning_rate": 0.0001, "loss": 0.0109, "step": 93610 }, { "epoch": 615.921052631579, "grad_norm": 1.2565184831619263, "learning_rate": 0.0001, "loss": 0.0115, "step": 93620 }, { "epoch": 615.9868421052631, "grad_norm": 1.2820919752120972, "learning_rate": 0.0001, "loss": 0.0093, "step": 93630 }, { "epoch": 616.0526315789474, "grad_norm": 1.0800106525421143, "learning_rate": 0.0001, "loss": 0.0115, "step": 93640 }, { "epoch": 616.1184210526316, "grad_norm": 1.172425627708435, "learning_rate": 0.0001, "loss": 0.0101, "step": 93650 }, { "epoch": 616.1842105263158, "grad_norm": 1.2287852764129639, "learning_rate": 0.0001, "loss": 0.01, "step": 93660 }, { "epoch": 616.25, "grad_norm": 1.2166169881820679, "learning_rate": 0.0001, "loss": 0.011, "step": 93670 }, { "epoch": 616.3157894736842, "grad_norm": 0.8677031397819519, "learning_rate": 0.0001, "loss": 0.011, "step": 93680 }, { "epoch": 616.3815789473684, "grad_norm": 1.3982559442520142, "learning_rate": 0.0001, "loss": 0.0116, "step": 93690 }, { "epoch": 616.4473684210526, "grad_norm": 1.3162497282028198, "learning_rate": 0.0001, "loss": 0.0097, "step": 93700 }, { "epoch": 616.5131578947369, "grad_norm": 1.3209203481674194, "learning_rate": 0.0001, "loss": 0.0101, "step": 93710 }, { "epoch": 616.578947368421, "grad_norm": 1.223204493522644, "learning_rate": 0.0001, "loss": 0.0093, "step": 93720 }, { "epoch": 616.6447368421053, "grad_norm": 1.7229893207550049, "learning_rate": 0.0001, "loss": 0.0104, "step": 93730 }, { "epoch": 616.7105263157895, "grad_norm": 1.5067758560180664, "learning_rate": 0.0001, "loss": 0.01, "step": 93740 }, { "epoch": 616.7763157894736, "grad_norm": 1.7268178462982178, "learning_rate": 0.0001, "loss": 0.0103, "step": 93750 }, { "epoch": 616.8421052631579, "grad_norm": 1.096575140953064, "learning_rate": 0.0001, "loss": 0.0116, "step": 93760 }, { "epoch": 616.9078947368421, "grad_norm": 1.145380973815918, "learning_rate": 0.0001, "loss": 0.01, "step": 93770 }, { "epoch": 616.9736842105264, "grad_norm": 1.3325169086456299, "learning_rate": 0.0001, "loss": 0.011, "step": 93780 }, { "epoch": 617.0394736842105, "grad_norm": 1.6675764322280884, "learning_rate": 0.0001, "loss": 0.0108, "step": 93790 }, { "epoch": 617.1052631578947, "grad_norm": 1.2384144067764282, "learning_rate": 0.0001, "loss": 0.009, "step": 93800 }, { "epoch": 617.171052631579, "grad_norm": 1.222592830657959, "learning_rate": 0.0001, "loss": 0.0114, "step": 93810 }, { "epoch": 617.2368421052631, "grad_norm": 1.2035316228866577, "learning_rate": 0.0001, "loss": 0.0115, "step": 93820 }, { "epoch": 617.3026315789474, "grad_norm": 1.2100892066955566, "learning_rate": 0.0001, "loss": 0.0112, "step": 93830 }, { "epoch": 617.3684210526316, "grad_norm": 1.365777850151062, "learning_rate": 0.0001, "loss": 0.0108, "step": 93840 }, { "epoch": 617.4342105263158, "grad_norm": 1.5900193452835083, "learning_rate": 0.0001, "loss": 0.0099, "step": 93850 }, { "epoch": 617.5, "grad_norm": 1.3592753410339355, "learning_rate": 0.0001, "loss": 0.0096, "step": 93860 }, { "epoch": 617.5657894736842, "grad_norm": 1.417954683303833, "learning_rate": 0.0001, "loss": 0.0111, "step": 93870 }, { "epoch": 617.6315789473684, "grad_norm": 1.517095923423767, "learning_rate": 0.0001, "loss": 0.0104, "step": 93880 }, { "epoch": 617.6973684210526, "grad_norm": 1.3372291326522827, "learning_rate": 0.0001, "loss": 0.0096, "step": 93890 }, { "epoch": 617.7631578947369, "grad_norm": 1.4415357112884521, "learning_rate": 0.0001, "loss": 0.0102, "step": 93900 }, { "epoch": 617.828947368421, "grad_norm": 0.986171543598175, "learning_rate": 0.0001, "loss": 0.0089, "step": 93910 }, { "epoch": 617.8947368421053, "grad_norm": 1.1450518369674683, "learning_rate": 0.0001, "loss": 0.0105, "step": 93920 }, { "epoch": 617.9605263157895, "grad_norm": 1.439809799194336, "learning_rate": 0.0001, "loss": 0.0112, "step": 93930 }, { "epoch": 618.0263157894736, "grad_norm": 1.4576247930526733, "learning_rate": 0.0001, "loss": 0.0093, "step": 93940 }, { "epoch": 618.0921052631579, "grad_norm": 1.307482361793518, "learning_rate": 0.0001, "loss": 0.0097, "step": 93950 }, { "epoch": 618.1578947368421, "grad_norm": 1.5274423360824585, "learning_rate": 0.0001, "loss": 0.0117, "step": 93960 }, { "epoch": 618.2236842105264, "grad_norm": 1.0210192203521729, "learning_rate": 0.0001, "loss": 0.0087, "step": 93970 }, { "epoch": 618.2894736842105, "grad_norm": 1.1291130781173706, "learning_rate": 0.0001, "loss": 0.0095, "step": 93980 }, { "epoch": 618.3552631578947, "grad_norm": 0.7608104944229126, "learning_rate": 0.0001, "loss": 0.0115, "step": 93990 }, { "epoch": 618.421052631579, "grad_norm": 0.9358824491500854, "learning_rate": 0.0001, "loss": 0.0116, "step": 94000 }, { "epoch": 618.4868421052631, "grad_norm": 0.9126713871955872, "learning_rate": 0.0001, "loss": 0.01, "step": 94010 }, { "epoch": 618.5526315789474, "grad_norm": 0.9021777510643005, "learning_rate": 0.0001, "loss": 0.0102, "step": 94020 }, { "epoch": 618.6184210526316, "grad_norm": 0.9748514890670776, "learning_rate": 0.0001, "loss": 0.0096, "step": 94030 }, { "epoch": 618.6842105263158, "grad_norm": 0.7533281445503235, "learning_rate": 0.0001, "loss": 0.0116, "step": 94040 }, { "epoch": 618.75, "grad_norm": 0.7163051962852478, "learning_rate": 0.0001, "loss": 0.0111, "step": 94050 }, { "epoch": 618.8157894736842, "grad_norm": 1.0862754583358765, "learning_rate": 0.0001, "loss": 0.0113, "step": 94060 }, { "epoch": 618.8815789473684, "grad_norm": 0.8490597009658813, "learning_rate": 0.0001, "loss": 0.0105, "step": 94070 }, { "epoch": 618.9473684210526, "grad_norm": 1.2494447231292725, "learning_rate": 0.0001, "loss": 0.0102, "step": 94080 }, { "epoch": 619.0131578947369, "grad_norm": 1.0552870035171509, "learning_rate": 0.0001, "loss": 0.0109, "step": 94090 }, { "epoch": 619.078947368421, "grad_norm": 0.8766233921051025, "learning_rate": 0.0001, "loss": 0.0137, "step": 94100 }, { "epoch": 619.1447368421053, "grad_norm": 1.181583285331726, "learning_rate": 0.0001, "loss": 0.0096, "step": 94110 }, { "epoch": 619.2105263157895, "grad_norm": 1.25386381149292, "learning_rate": 0.0001, "loss": 0.0113, "step": 94120 }, { "epoch": 619.2763157894736, "grad_norm": 1.0907665491104126, "learning_rate": 0.0001, "loss": 0.0096, "step": 94130 }, { "epoch": 619.3421052631579, "grad_norm": 1.258827567100525, "learning_rate": 0.0001, "loss": 0.0104, "step": 94140 }, { "epoch": 619.4078947368421, "grad_norm": 1.2584177255630493, "learning_rate": 0.0001, "loss": 0.0118, "step": 94150 }, { "epoch": 619.4736842105264, "grad_norm": 1.2424556016921997, "learning_rate": 0.0001, "loss": 0.0114, "step": 94160 }, { "epoch": 619.5394736842105, "grad_norm": 1.395603060722351, "learning_rate": 0.0001, "loss": 0.0117, "step": 94170 }, { "epoch": 619.6052631578947, "grad_norm": 0.9532821774482727, "learning_rate": 0.0001, "loss": 0.0107, "step": 94180 }, { "epoch": 619.671052631579, "grad_norm": 1.263140082359314, "learning_rate": 0.0001, "loss": 0.0103, "step": 94190 }, { "epoch": 619.7368421052631, "grad_norm": 0.8259878754615784, "learning_rate": 0.0001, "loss": 0.0112, "step": 94200 }, { "epoch": 619.8026315789474, "grad_norm": 1.4339922666549683, "learning_rate": 0.0001, "loss": 0.0106, "step": 94210 }, { "epoch": 619.8684210526316, "grad_norm": 0.9723814725875854, "learning_rate": 0.0001, "loss": 0.0122, "step": 94220 }, { "epoch": 619.9342105263158, "grad_norm": 0.9586044549942017, "learning_rate": 0.0001, "loss": 0.01, "step": 94230 }, { "epoch": 620.0, "grad_norm": 0.9816604852676392, "learning_rate": 0.0001, "loss": 0.0097, "step": 94240 }, { "epoch": 620.0657894736842, "grad_norm": 0.8294615149497986, "learning_rate": 0.0001, "loss": 0.0099, "step": 94250 }, { "epoch": 620.1315789473684, "grad_norm": 0.8866937160491943, "learning_rate": 0.0001, "loss": 0.0092, "step": 94260 }, { "epoch": 620.1973684210526, "grad_norm": 0.9527875781059265, "learning_rate": 0.0001, "loss": 0.0106, "step": 94270 }, { "epoch": 620.2631578947369, "grad_norm": 1.1666209697723389, "learning_rate": 0.0001, "loss": 0.0114, "step": 94280 }, { "epoch": 620.328947368421, "grad_norm": 0.9513602256774902, "learning_rate": 0.0001, "loss": 0.0118, "step": 94290 }, { "epoch": 620.3947368421053, "grad_norm": 0.7834760546684265, "learning_rate": 0.0001, "loss": 0.011, "step": 94300 }, { "epoch": 620.4605263157895, "grad_norm": 0.9751595258712769, "learning_rate": 0.0001, "loss": 0.0107, "step": 94310 }, { "epoch": 620.5263157894736, "grad_norm": 1.2259643077850342, "learning_rate": 0.0001, "loss": 0.0113, "step": 94320 }, { "epoch": 620.5921052631579, "grad_norm": 0.9753211736679077, "learning_rate": 0.0001, "loss": 0.0106, "step": 94330 }, { "epoch": 620.6578947368421, "grad_norm": 1.5966567993164062, "learning_rate": 0.0001, "loss": 0.0112, "step": 94340 }, { "epoch": 620.7236842105264, "grad_norm": 1.2838525772094727, "learning_rate": 0.0001, "loss": 0.0102, "step": 94350 }, { "epoch": 620.7894736842105, "grad_norm": 1.416764497756958, "learning_rate": 0.0001, "loss": 0.0119, "step": 94360 }, { "epoch": 620.8552631578947, "grad_norm": 1.317114233970642, "learning_rate": 0.0001, "loss": 0.0101, "step": 94370 }, { "epoch": 620.921052631579, "grad_norm": 1.6000310182571411, "learning_rate": 0.0001, "loss": 0.0126, "step": 94380 }, { "epoch": 620.9868421052631, "grad_norm": 1.296604037284851, "learning_rate": 0.0001, "loss": 0.0131, "step": 94390 }, { "epoch": 621.0526315789474, "grad_norm": 1.1899832487106323, "learning_rate": 0.0001, "loss": 0.0108, "step": 94400 }, { "epoch": 621.1184210526316, "grad_norm": 1.3651827573776245, "learning_rate": 0.0001, "loss": 0.0104, "step": 94410 }, { "epoch": 621.1842105263158, "grad_norm": 1.230488657951355, "learning_rate": 0.0001, "loss": 0.0103, "step": 94420 }, { "epoch": 621.25, "grad_norm": 1.2421315908432007, "learning_rate": 0.0001, "loss": 0.0093, "step": 94430 }, { "epoch": 621.3157894736842, "grad_norm": 1.5289520025253296, "learning_rate": 0.0001, "loss": 0.0126, "step": 94440 }, { "epoch": 621.3815789473684, "grad_norm": 1.2667958736419678, "learning_rate": 0.0001, "loss": 0.0097, "step": 94450 }, { "epoch": 621.4473684210526, "grad_norm": 1.2462661266326904, "learning_rate": 0.0001, "loss": 0.0109, "step": 94460 }, { "epoch": 621.5131578947369, "grad_norm": 1.1925386190414429, "learning_rate": 0.0001, "loss": 0.0111, "step": 94470 }, { "epoch": 621.578947368421, "grad_norm": 0.7232915759086609, "learning_rate": 0.0001, "loss": 0.0105, "step": 94480 }, { "epoch": 621.6447368421053, "grad_norm": 0.9784261584281921, "learning_rate": 0.0001, "loss": 0.009, "step": 94490 }, { "epoch": 621.7105263157895, "grad_norm": 0.8584251999855042, "learning_rate": 0.0001, "loss": 0.0103, "step": 94500 }, { "epoch": 621.7763157894736, "grad_norm": 1.0461781024932861, "learning_rate": 0.0001, "loss": 0.0097, "step": 94510 }, { "epoch": 621.8421052631579, "grad_norm": 0.9463827013969421, "learning_rate": 0.0001, "loss": 0.0099, "step": 94520 }, { "epoch": 621.9078947368421, "grad_norm": 0.9517367482185364, "learning_rate": 0.0001, "loss": 0.0107, "step": 94530 }, { "epoch": 621.9736842105264, "grad_norm": 1.1361017227172852, "learning_rate": 0.0001, "loss": 0.0114, "step": 94540 }, { "epoch": 622.0394736842105, "grad_norm": 1.1935226917266846, "learning_rate": 0.0001, "loss": 0.0111, "step": 94550 }, { "epoch": 622.1052631578947, "grad_norm": 1.004470705986023, "learning_rate": 0.0001, "loss": 0.0098, "step": 94560 }, { "epoch": 622.171052631579, "grad_norm": 1.2536550760269165, "learning_rate": 0.0001, "loss": 0.0108, "step": 94570 }, { "epoch": 622.2368421052631, "grad_norm": 1.262121319770813, "learning_rate": 0.0001, "loss": 0.0109, "step": 94580 }, { "epoch": 622.3026315789474, "grad_norm": 1.241873860359192, "learning_rate": 0.0001, "loss": 0.0111, "step": 94590 }, { "epoch": 622.3684210526316, "grad_norm": 0.9200069308280945, "learning_rate": 0.0001, "loss": 0.0098, "step": 94600 }, { "epoch": 622.4342105263158, "grad_norm": 0.8964002132415771, "learning_rate": 0.0001, "loss": 0.0098, "step": 94610 }, { "epoch": 622.5, "grad_norm": 1.2735228538513184, "learning_rate": 0.0001, "loss": 0.0125, "step": 94620 }, { "epoch": 622.5657894736842, "grad_norm": 1.320853590965271, "learning_rate": 0.0001, "loss": 0.0103, "step": 94630 }, { "epoch": 622.6315789473684, "grad_norm": 1.3654546737670898, "learning_rate": 0.0001, "loss": 0.0117, "step": 94640 }, { "epoch": 622.6973684210526, "grad_norm": 0.8524348139762878, "learning_rate": 0.0001, "loss": 0.0094, "step": 94650 }, { "epoch": 622.7631578947369, "grad_norm": 1.1246964931488037, "learning_rate": 0.0001, "loss": 0.0112, "step": 94660 }, { "epoch": 622.828947368421, "grad_norm": 1.0840219259262085, "learning_rate": 0.0001, "loss": 0.0104, "step": 94670 }, { "epoch": 622.8947368421053, "grad_norm": 0.9175322651863098, "learning_rate": 0.0001, "loss": 0.0097, "step": 94680 }, { "epoch": 622.9605263157895, "grad_norm": 0.6897749900817871, "learning_rate": 0.0001, "loss": 0.0097, "step": 94690 }, { "epoch": 623.0263157894736, "grad_norm": 1.313354730606079, "learning_rate": 0.0001, "loss": 0.0114, "step": 94700 }, { "epoch": 623.0921052631579, "grad_norm": 0.9614347815513611, "learning_rate": 0.0001, "loss": 0.0111, "step": 94710 }, { "epoch": 623.1578947368421, "grad_norm": 1.1975575685501099, "learning_rate": 0.0001, "loss": 0.0135, "step": 94720 }, { "epoch": 623.2236842105264, "grad_norm": 1.0742084980010986, "learning_rate": 0.0001, "loss": 0.0107, "step": 94730 }, { "epoch": 623.2894736842105, "grad_norm": 0.9996278882026672, "learning_rate": 0.0001, "loss": 0.0105, "step": 94740 }, { "epoch": 623.3552631578947, "grad_norm": 1.3011069297790527, "learning_rate": 0.0001, "loss": 0.0113, "step": 94750 }, { "epoch": 623.421052631579, "grad_norm": 0.7885226011276245, "learning_rate": 0.0001, "loss": 0.0105, "step": 94760 }, { "epoch": 623.4868421052631, "grad_norm": 0.9483335018157959, "learning_rate": 0.0001, "loss": 0.0095, "step": 94770 }, { "epoch": 623.5526315789474, "grad_norm": 1.2516369819641113, "learning_rate": 0.0001, "loss": 0.0092, "step": 94780 }, { "epoch": 623.6184210526316, "grad_norm": 1.041446566581726, "learning_rate": 0.0001, "loss": 0.0118, "step": 94790 }, { "epoch": 623.6842105263158, "grad_norm": 1.1910645961761475, "learning_rate": 0.0001, "loss": 0.0108, "step": 94800 }, { "epoch": 623.75, "grad_norm": 1.1067719459533691, "learning_rate": 0.0001, "loss": 0.0108, "step": 94810 }, { "epoch": 623.8157894736842, "grad_norm": 1.0154963731765747, "learning_rate": 0.0001, "loss": 0.01, "step": 94820 }, { "epoch": 623.8815789473684, "grad_norm": 1.08232843875885, "learning_rate": 0.0001, "loss": 0.0087, "step": 94830 }, { "epoch": 623.9473684210526, "grad_norm": 1.1486544609069824, "learning_rate": 0.0001, "loss": 0.0122, "step": 94840 }, { "epoch": 624.0131578947369, "grad_norm": 1.1820085048675537, "learning_rate": 0.0001, "loss": 0.0111, "step": 94850 }, { "epoch": 624.078947368421, "grad_norm": 1.095815658569336, "learning_rate": 0.0001, "loss": 0.0116, "step": 94860 }, { "epoch": 624.1447368421053, "grad_norm": 0.9863499999046326, "learning_rate": 0.0001, "loss": 0.0099, "step": 94870 }, { "epoch": 624.2105263157895, "grad_norm": 1.2064094543457031, "learning_rate": 0.0001, "loss": 0.0109, "step": 94880 }, { "epoch": 624.2763157894736, "grad_norm": 0.7593415379524231, "learning_rate": 0.0001, "loss": 0.0103, "step": 94890 }, { "epoch": 624.3421052631579, "grad_norm": 0.7509251832962036, "learning_rate": 0.0001, "loss": 0.011, "step": 94900 }, { "epoch": 624.4078947368421, "grad_norm": 0.8254883289337158, "learning_rate": 0.0001, "loss": 0.0102, "step": 94910 }, { "epoch": 624.4736842105264, "grad_norm": 1.2074943780899048, "learning_rate": 0.0001, "loss": 0.0093, "step": 94920 }, { "epoch": 624.5394736842105, "grad_norm": 1.1988282203674316, "learning_rate": 0.0001, "loss": 0.011, "step": 94930 }, { "epoch": 624.6052631578947, "grad_norm": 1.4289294481277466, "learning_rate": 0.0001, "loss": 0.0101, "step": 94940 }, { "epoch": 624.671052631579, "grad_norm": 1.4623303413391113, "learning_rate": 0.0001, "loss": 0.0105, "step": 94950 }, { "epoch": 624.7368421052631, "grad_norm": 1.0225207805633545, "learning_rate": 0.0001, "loss": 0.0113, "step": 94960 }, { "epoch": 624.8026315789474, "grad_norm": 1.1724567413330078, "learning_rate": 0.0001, "loss": 0.0104, "step": 94970 }, { "epoch": 624.8684210526316, "grad_norm": 1.0636459589004517, "learning_rate": 0.0001, "loss": 0.0102, "step": 94980 }, { "epoch": 624.9342105263158, "grad_norm": 1.551671028137207, "learning_rate": 0.0001, "loss": 0.0136, "step": 94990 }, { "epoch": 625.0, "grad_norm": 1.9914339780807495, "learning_rate": 0.0001, "loss": 0.0113, "step": 95000 }, { "epoch": 625.0657894736842, "grad_norm": 1.3119971752166748, "learning_rate": 0.0001, "loss": 0.0102, "step": 95010 }, { "epoch": 625.1315789473684, "grad_norm": 1.2091944217681885, "learning_rate": 0.0001, "loss": 0.0103, "step": 95020 }, { "epoch": 625.1973684210526, "grad_norm": 1.333477258682251, "learning_rate": 0.0001, "loss": 0.0095, "step": 95030 }, { "epoch": 625.2631578947369, "grad_norm": 1.5082767009735107, "learning_rate": 0.0001, "loss": 0.0108, "step": 95040 }, { "epoch": 625.328947368421, "grad_norm": 1.2716140747070312, "learning_rate": 0.0001, "loss": 0.0105, "step": 95050 }, { "epoch": 625.3947368421053, "grad_norm": 1.2596821784973145, "learning_rate": 0.0001, "loss": 0.0104, "step": 95060 }, { "epoch": 625.4605263157895, "grad_norm": 1.198418140411377, "learning_rate": 0.0001, "loss": 0.009, "step": 95070 }, { "epoch": 625.5263157894736, "grad_norm": 1.1842753887176514, "learning_rate": 0.0001, "loss": 0.0112, "step": 95080 }, { "epoch": 625.5921052631579, "grad_norm": 1.162773847579956, "learning_rate": 0.0001, "loss": 0.0105, "step": 95090 }, { "epoch": 625.6578947368421, "grad_norm": 0.8941752910614014, "learning_rate": 0.0001, "loss": 0.0092, "step": 95100 }, { "epoch": 625.7236842105264, "grad_norm": 0.9903761744499207, "learning_rate": 0.0001, "loss": 0.0116, "step": 95110 }, { "epoch": 625.7894736842105, "grad_norm": 1.0713928937911987, "learning_rate": 0.0001, "loss": 0.0106, "step": 95120 }, { "epoch": 625.8552631578947, "grad_norm": 1.1026970148086548, "learning_rate": 0.0001, "loss": 0.0129, "step": 95130 }, { "epoch": 625.921052631579, "grad_norm": 1.091262936592102, "learning_rate": 0.0001, "loss": 0.0106, "step": 95140 }, { "epoch": 625.9868421052631, "grad_norm": 1.0287009477615356, "learning_rate": 0.0001, "loss": 0.0105, "step": 95150 }, { "epoch": 626.0526315789474, "grad_norm": 0.9312505125999451, "learning_rate": 0.0001, "loss": 0.0101, "step": 95160 }, { "epoch": 626.1184210526316, "grad_norm": 1.1395642757415771, "learning_rate": 0.0001, "loss": 0.0096, "step": 95170 }, { "epoch": 626.1842105263158, "grad_norm": 1.1278079748153687, "learning_rate": 0.0001, "loss": 0.0126, "step": 95180 }, { "epoch": 626.25, "grad_norm": 1.112074613571167, "learning_rate": 0.0001, "loss": 0.013, "step": 95190 }, { "epoch": 626.3157894736842, "grad_norm": 1.41941499710083, "learning_rate": 0.0001, "loss": 0.0093, "step": 95200 }, { "epoch": 626.3815789473684, "grad_norm": 1.5267459154129028, "learning_rate": 0.0001, "loss": 0.0109, "step": 95210 }, { "epoch": 626.4473684210526, "grad_norm": 1.2732293605804443, "learning_rate": 0.0001, "loss": 0.0101, "step": 95220 }, { "epoch": 626.5131578947369, "grad_norm": 1.3131200075149536, "learning_rate": 0.0001, "loss": 0.0097, "step": 95230 }, { "epoch": 626.578947368421, "grad_norm": 1.115815281867981, "learning_rate": 0.0001, "loss": 0.0122, "step": 95240 }, { "epoch": 626.6447368421053, "grad_norm": 1.6979179382324219, "learning_rate": 0.0001, "loss": 0.0098, "step": 95250 }, { "epoch": 626.7105263157895, "grad_norm": 1.280093789100647, "learning_rate": 0.0001, "loss": 0.0099, "step": 95260 }, { "epoch": 626.7763157894736, "grad_norm": 1.349902629852295, "learning_rate": 0.0001, "loss": 0.0121, "step": 95270 }, { "epoch": 626.8421052631579, "grad_norm": 1.4500685930252075, "learning_rate": 0.0001, "loss": 0.0114, "step": 95280 }, { "epoch": 626.9078947368421, "grad_norm": 1.2372456789016724, "learning_rate": 0.0001, "loss": 0.0088, "step": 95290 }, { "epoch": 626.9736842105264, "grad_norm": 1.7116293907165527, "learning_rate": 0.0001, "loss": 0.0097, "step": 95300 }, { "epoch": 627.0394736842105, "grad_norm": 1.3504589796066284, "learning_rate": 0.0001, "loss": 0.0099, "step": 95310 }, { "epoch": 627.1052631578947, "grad_norm": 1.3713061809539795, "learning_rate": 0.0001, "loss": 0.0101, "step": 95320 }, { "epoch": 627.171052631579, "grad_norm": 1.5668679475784302, "learning_rate": 0.0001, "loss": 0.0107, "step": 95330 }, { "epoch": 627.2368421052631, "grad_norm": 1.5698161125183105, "learning_rate": 0.0001, "loss": 0.0097, "step": 95340 }, { "epoch": 627.3026315789474, "grad_norm": 1.002941370010376, "learning_rate": 0.0001, "loss": 0.0118, "step": 95350 }, { "epoch": 627.3684210526316, "grad_norm": 1.125559687614441, "learning_rate": 0.0001, "loss": 0.0091, "step": 95360 }, { "epoch": 627.4342105263158, "grad_norm": 0.9617181420326233, "learning_rate": 0.0001, "loss": 0.0095, "step": 95370 }, { "epoch": 627.5, "grad_norm": 0.9981349110603333, "learning_rate": 0.0001, "loss": 0.011, "step": 95380 }, { "epoch": 627.5657894736842, "grad_norm": 1.1463162899017334, "learning_rate": 0.0001, "loss": 0.0118, "step": 95390 }, { "epoch": 627.6315789473684, "grad_norm": 1.363034725189209, "learning_rate": 0.0001, "loss": 0.0089, "step": 95400 }, { "epoch": 627.6973684210526, "grad_norm": 1.0812790393829346, "learning_rate": 0.0001, "loss": 0.0123, "step": 95410 }, { "epoch": 627.7631578947369, "grad_norm": 0.9944791793823242, "learning_rate": 0.0001, "loss": 0.0108, "step": 95420 }, { "epoch": 627.828947368421, "grad_norm": 1.1499660015106201, "learning_rate": 0.0001, "loss": 0.01, "step": 95430 }, { "epoch": 627.8947368421053, "grad_norm": 1.1649742126464844, "learning_rate": 0.0001, "loss": 0.0106, "step": 95440 }, { "epoch": 627.9605263157895, "grad_norm": 0.9862028360366821, "learning_rate": 0.0001, "loss": 0.0126, "step": 95450 }, { "epoch": 628.0263157894736, "grad_norm": 1.0337918996810913, "learning_rate": 0.0001, "loss": 0.0102, "step": 95460 }, { "epoch": 628.0921052631579, "grad_norm": 1.1768397092819214, "learning_rate": 0.0001, "loss": 0.011, "step": 95470 }, { "epoch": 628.1578947368421, "grad_norm": 1.1246834993362427, "learning_rate": 0.0001, "loss": 0.0109, "step": 95480 }, { "epoch": 628.2236842105264, "grad_norm": 0.6552825570106506, "learning_rate": 0.0001, "loss": 0.0094, "step": 95490 }, { "epoch": 628.2894736842105, "grad_norm": 0.9042338728904724, "learning_rate": 0.0001, "loss": 0.0123, "step": 95500 }, { "epoch": 628.3552631578947, "grad_norm": 1.1680855751037598, "learning_rate": 0.0001, "loss": 0.0119, "step": 95510 }, { "epoch": 628.421052631579, "grad_norm": 1.1862854957580566, "learning_rate": 0.0001, "loss": 0.0113, "step": 95520 }, { "epoch": 628.4868421052631, "grad_norm": 0.9259408712387085, "learning_rate": 0.0001, "loss": 0.0104, "step": 95530 }, { "epoch": 628.5526315789474, "grad_norm": 1.0121794939041138, "learning_rate": 0.0001, "loss": 0.0099, "step": 95540 }, { "epoch": 628.6184210526316, "grad_norm": 1.021821141242981, "learning_rate": 0.0001, "loss": 0.0111, "step": 95550 }, { "epoch": 628.6842105263158, "grad_norm": 1.0887248516082764, "learning_rate": 0.0001, "loss": 0.0104, "step": 95560 }, { "epoch": 628.75, "grad_norm": 1.1664204597473145, "learning_rate": 0.0001, "loss": 0.0117, "step": 95570 }, { "epoch": 628.8157894736842, "grad_norm": 0.9954600930213928, "learning_rate": 0.0001, "loss": 0.0098, "step": 95580 }, { "epoch": 628.8815789473684, "grad_norm": 1.174320101737976, "learning_rate": 0.0001, "loss": 0.0122, "step": 95590 }, { "epoch": 628.9473684210526, "grad_norm": 1.5383800268173218, "learning_rate": 0.0001, "loss": 0.0111, "step": 95600 }, { "epoch": 629.0131578947369, "grad_norm": 1.5063879489898682, "learning_rate": 0.0001, "loss": 0.01, "step": 95610 }, { "epoch": 629.078947368421, "grad_norm": 1.2808583974838257, "learning_rate": 0.0001, "loss": 0.0093, "step": 95620 }, { "epoch": 629.1447368421053, "grad_norm": 1.3587077856063843, "learning_rate": 0.0001, "loss": 0.011, "step": 95630 }, { "epoch": 629.2105263157895, "grad_norm": 1.1514275074005127, "learning_rate": 0.0001, "loss": 0.0121, "step": 95640 }, { "epoch": 629.2763157894736, "grad_norm": 1.3568569421768188, "learning_rate": 0.0001, "loss": 0.0142, "step": 95650 }, { "epoch": 629.3421052631579, "grad_norm": 1.1111990213394165, "learning_rate": 0.0001, "loss": 0.0096, "step": 95660 }, { "epoch": 629.4078947368421, "grad_norm": 1.160679578781128, "learning_rate": 0.0001, "loss": 0.0108, "step": 95670 }, { "epoch": 629.4736842105264, "grad_norm": 0.875325083732605, "learning_rate": 0.0001, "loss": 0.0099, "step": 95680 }, { "epoch": 629.5394736842105, "grad_norm": 0.676986575126648, "learning_rate": 0.0001, "loss": 0.0105, "step": 95690 }, { "epoch": 629.6052631578947, "grad_norm": 1.0533658266067505, "learning_rate": 0.0001, "loss": 0.0113, "step": 95700 }, { "epoch": 629.671052631579, "grad_norm": 1.126704454421997, "learning_rate": 0.0001, "loss": 0.013, "step": 95710 }, { "epoch": 629.7368421052631, "grad_norm": 0.875869631767273, "learning_rate": 0.0001, "loss": 0.0105, "step": 95720 }, { "epoch": 629.8026315789474, "grad_norm": 0.843349277973175, "learning_rate": 0.0001, "loss": 0.0097, "step": 95730 }, { "epoch": 629.8684210526316, "grad_norm": 1.0324947834014893, "learning_rate": 0.0001, "loss": 0.0122, "step": 95740 }, { "epoch": 629.9342105263158, "grad_norm": 1.0827348232269287, "learning_rate": 0.0001, "loss": 0.0102, "step": 95750 }, { "epoch": 630.0, "grad_norm": 0.8079311847686768, "learning_rate": 0.0001, "loss": 0.0098, "step": 95760 }, { "epoch": 630.0657894736842, "grad_norm": 1.0577661991119385, "learning_rate": 0.0001, "loss": 0.0112, "step": 95770 }, { "epoch": 630.1315789473684, "grad_norm": 1.0803674459457397, "learning_rate": 0.0001, "loss": 0.0101, "step": 95780 }, { "epoch": 630.1973684210526, "grad_norm": 1.1568626165390015, "learning_rate": 0.0001, "loss": 0.0113, "step": 95790 }, { "epoch": 630.2631578947369, "grad_norm": 0.7139347195625305, "learning_rate": 0.0001, "loss": 0.0095, "step": 95800 }, { "epoch": 630.328947368421, "grad_norm": 1.54314386844635, "learning_rate": 0.0001, "loss": 0.0108, "step": 95810 }, { "epoch": 630.3947368421053, "grad_norm": 0.8538906574249268, "learning_rate": 0.0001, "loss": 0.0105, "step": 95820 }, { "epoch": 630.4605263157895, "grad_norm": 1.0766382217407227, "learning_rate": 0.0001, "loss": 0.0097, "step": 95830 }, { "epoch": 630.5263157894736, "grad_norm": 0.9496257901191711, "learning_rate": 0.0001, "loss": 0.0104, "step": 95840 }, { "epoch": 630.5921052631579, "grad_norm": 1.2234865427017212, "learning_rate": 0.0001, "loss": 0.013, "step": 95850 }, { "epoch": 630.6578947368421, "grad_norm": 0.8329818248748779, "learning_rate": 0.0001, "loss": 0.0115, "step": 95860 }, { "epoch": 630.7236842105264, "grad_norm": 1.100710153579712, "learning_rate": 0.0001, "loss": 0.0108, "step": 95870 }, { "epoch": 630.7894736842105, "grad_norm": 0.9329621195793152, "learning_rate": 0.0001, "loss": 0.0123, "step": 95880 }, { "epoch": 630.8552631578947, "grad_norm": 1.2335596084594727, "learning_rate": 0.0001, "loss": 0.0099, "step": 95890 }, { "epoch": 630.921052631579, "grad_norm": 1.0950738191604614, "learning_rate": 0.0001, "loss": 0.0116, "step": 95900 }, { "epoch": 630.9868421052631, "grad_norm": 0.9743636846542358, "learning_rate": 0.0001, "loss": 0.0128, "step": 95910 }, { "epoch": 631.0526315789474, "grad_norm": 0.9001186490058899, "learning_rate": 0.0001, "loss": 0.0125, "step": 95920 }, { "epoch": 631.1184210526316, "grad_norm": 1.3222036361694336, "learning_rate": 0.0001, "loss": 0.0122, "step": 95930 }, { "epoch": 631.1842105263158, "grad_norm": 1.2145307064056396, "learning_rate": 0.0001, "loss": 0.0127, "step": 95940 }, { "epoch": 631.25, "grad_norm": 1.1518114805221558, "learning_rate": 0.0001, "loss": 0.0122, "step": 95950 }, { "epoch": 631.3157894736842, "grad_norm": 1.0182396173477173, "learning_rate": 0.0001, "loss": 0.0105, "step": 95960 }, { "epoch": 631.3815789473684, "grad_norm": 0.998622477054596, "learning_rate": 0.0001, "loss": 0.0105, "step": 95970 }, { "epoch": 631.4473684210526, "grad_norm": 0.9129418730735779, "learning_rate": 0.0001, "loss": 0.0116, "step": 95980 }, { "epoch": 631.5131578947369, "grad_norm": 0.8914371728897095, "learning_rate": 0.0001, "loss": 0.01, "step": 95990 }, { "epoch": 631.578947368421, "grad_norm": 1.324182152748108, "learning_rate": 0.0001, "loss": 0.0094, "step": 96000 }, { "epoch": 631.6447368421053, "grad_norm": 1.3271923065185547, "learning_rate": 0.0001, "loss": 0.0103, "step": 96010 }, { "epoch": 631.7105263157895, "grad_norm": 1.2438828945159912, "learning_rate": 0.0001, "loss": 0.0123, "step": 96020 }, { "epoch": 631.7763157894736, "grad_norm": 0.808535635471344, "learning_rate": 0.0001, "loss": 0.0106, "step": 96030 }, { "epoch": 631.8421052631579, "grad_norm": 1.3767280578613281, "learning_rate": 0.0001, "loss": 0.0089, "step": 96040 }, { "epoch": 631.9078947368421, "grad_norm": 1.3440818786621094, "learning_rate": 0.0001, "loss": 0.0119, "step": 96050 }, { "epoch": 631.9736842105264, "grad_norm": 1.1138218641281128, "learning_rate": 0.0001, "loss": 0.0106, "step": 96060 }, { "epoch": 632.0394736842105, "grad_norm": 0.868145227432251, "learning_rate": 0.0001, "loss": 0.0104, "step": 96070 }, { "epoch": 632.1052631578947, "grad_norm": 1.3606065511703491, "learning_rate": 0.0001, "loss": 0.0102, "step": 96080 }, { "epoch": 632.171052631579, "grad_norm": 1.3854315280914307, "learning_rate": 0.0001, "loss": 0.0096, "step": 96090 }, { "epoch": 632.2368421052631, "grad_norm": 1.2756513357162476, "learning_rate": 0.0001, "loss": 0.0094, "step": 96100 }, { "epoch": 632.3026315789474, "grad_norm": 1.1794759035110474, "learning_rate": 0.0001, "loss": 0.0112, "step": 96110 }, { "epoch": 632.3684210526316, "grad_norm": 1.2816321849822998, "learning_rate": 0.0001, "loss": 0.0091, "step": 96120 }, { "epoch": 632.4342105263158, "grad_norm": 1.0967599153518677, "learning_rate": 0.0001, "loss": 0.0121, "step": 96130 }, { "epoch": 632.5, "grad_norm": 1.3079113960266113, "learning_rate": 0.0001, "loss": 0.0109, "step": 96140 }, { "epoch": 632.5657894736842, "grad_norm": 1.0151442289352417, "learning_rate": 0.0001, "loss": 0.0118, "step": 96150 }, { "epoch": 632.6315789473684, "grad_norm": 2.014241933822632, "learning_rate": 0.0001, "loss": 0.0112, "step": 96160 }, { "epoch": 632.6973684210526, "grad_norm": 1.0446006059646606, "learning_rate": 0.0001, "loss": 0.0135, "step": 96170 }, { "epoch": 632.7631578947369, "grad_norm": 1.2573105096817017, "learning_rate": 0.0001, "loss": 0.0111, "step": 96180 }, { "epoch": 632.828947368421, "grad_norm": 1.1705162525177002, "learning_rate": 0.0001, "loss": 0.0104, "step": 96190 }, { "epoch": 632.8947368421053, "grad_norm": 1.3962786197662354, "learning_rate": 0.0001, "loss": 0.0113, "step": 96200 }, { "epoch": 632.9605263157895, "grad_norm": 1.2889972925186157, "learning_rate": 0.0001, "loss": 0.0101, "step": 96210 }, { "epoch": 633.0263157894736, "grad_norm": 1.30446457862854, "learning_rate": 0.0001, "loss": 0.0111, "step": 96220 }, { "epoch": 633.0921052631579, "grad_norm": 1.0984090566635132, "learning_rate": 0.0001, "loss": 0.0098, "step": 96230 }, { "epoch": 633.1578947368421, "grad_norm": 1.0214024782180786, "learning_rate": 0.0001, "loss": 0.0102, "step": 96240 }, { "epoch": 633.2236842105264, "grad_norm": 1.3582532405853271, "learning_rate": 0.0001, "loss": 0.0108, "step": 96250 }, { "epoch": 633.2894736842105, "grad_norm": 1.1658719778060913, "learning_rate": 0.0001, "loss": 0.0095, "step": 96260 }, { "epoch": 633.3552631578947, "grad_norm": 1.1683430671691895, "learning_rate": 0.0001, "loss": 0.0109, "step": 96270 }, { "epoch": 633.421052631579, "grad_norm": 1.399514079093933, "learning_rate": 0.0001, "loss": 0.0108, "step": 96280 }, { "epoch": 633.4868421052631, "grad_norm": 1.0703054666519165, "learning_rate": 0.0001, "loss": 0.0116, "step": 96290 }, { "epoch": 633.5526315789474, "grad_norm": 1.1495949029922485, "learning_rate": 0.0001, "loss": 0.0098, "step": 96300 }, { "epoch": 633.6184210526316, "grad_norm": 1.1547905206680298, "learning_rate": 0.0001, "loss": 0.0128, "step": 96310 }, { "epoch": 633.6842105263158, "grad_norm": 1.3389787673950195, "learning_rate": 0.0001, "loss": 0.0108, "step": 96320 }, { "epoch": 633.75, "grad_norm": 0.622372031211853, "learning_rate": 0.0001, "loss": 0.0101, "step": 96330 }, { "epoch": 633.8157894736842, "grad_norm": 0.7797669768333435, "learning_rate": 0.0001, "loss": 0.0094, "step": 96340 }, { "epoch": 633.8815789473684, "grad_norm": 1.225666880607605, "learning_rate": 0.0001, "loss": 0.0115, "step": 96350 }, { "epoch": 633.9473684210526, "grad_norm": 1.2764816284179688, "learning_rate": 0.0001, "loss": 0.0116, "step": 96360 }, { "epoch": 634.0131578947369, "grad_norm": 1.3104716539382935, "learning_rate": 0.0001, "loss": 0.0105, "step": 96370 }, { "epoch": 634.078947368421, "grad_norm": 1.122225046157837, "learning_rate": 0.0001, "loss": 0.0122, "step": 96380 }, { "epoch": 634.1447368421053, "grad_norm": 1.331106424331665, "learning_rate": 0.0001, "loss": 0.0105, "step": 96390 }, { "epoch": 634.2105263157895, "grad_norm": 1.1977465152740479, "learning_rate": 0.0001, "loss": 0.0095, "step": 96400 }, { "epoch": 634.2763157894736, "grad_norm": 1.4676520824432373, "learning_rate": 0.0001, "loss": 0.0109, "step": 96410 }, { "epoch": 634.3421052631579, "grad_norm": 1.1884233951568604, "learning_rate": 0.0001, "loss": 0.0096, "step": 96420 }, { "epoch": 634.4078947368421, "grad_norm": 1.448885440826416, "learning_rate": 0.0001, "loss": 0.0123, "step": 96430 }, { "epoch": 634.4736842105264, "grad_norm": 1.1831235885620117, "learning_rate": 0.0001, "loss": 0.0109, "step": 96440 }, { "epoch": 634.5394736842105, "grad_norm": 1.1921072006225586, "learning_rate": 0.0001, "loss": 0.0105, "step": 96450 }, { "epoch": 634.6052631578947, "grad_norm": 0.9832690954208374, "learning_rate": 0.0001, "loss": 0.0099, "step": 96460 }, { "epoch": 634.671052631579, "grad_norm": 1.3597534894943237, "learning_rate": 0.0001, "loss": 0.0126, "step": 96470 }, { "epoch": 634.7368421052631, "grad_norm": 1.2656805515289307, "learning_rate": 0.0001, "loss": 0.0088, "step": 96480 }, { "epoch": 634.8026315789474, "grad_norm": 0.9572046995162964, "learning_rate": 0.0001, "loss": 0.0095, "step": 96490 }, { "epoch": 634.8684210526316, "grad_norm": 1.0890827178955078, "learning_rate": 0.0001, "loss": 0.0095, "step": 96500 }, { "epoch": 634.9342105263158, "grad_norm": 1.2112818956375122, "learning_rate": 0.0001, "loss": 0.0132, "step": 96510 }, { "epoch": 635.0, "grad_norm": 1.3493852615356445, "learning_rate": 0.0001, "loss": 0.0121, "step": 96520 }, { "epoch": 635.0657894736842, "grad_norm": 0.8873164057731628, "learning_rate": 0.0001, "loss": 0.0104, "step": 96530 }, { "epoch": 635.1315789473684, "grad_norm": 1.3361443281173706, "learning_rate": 0.0001, "loss": 0.0104, "step": 96540 }, { "epoch": 635.1973684210526, "grad_norm": 1.2855825424194336, "learning_rate": 0.0001, "loss": 0.0108, "step": 96550 }, { "epoch": 635.2631578947369, "grad_norm": 1.0987671613693237, "learning_rate": 0.0001, "loss": 0.0108, "step": 96560 }, { "epoch": 635.328947368421, "grad_norm": 0.7303910255432129, "learning_rate": 0.0001, "loss": 0.0096, "step": 96570 }, { "epoch": 635.3947368421053, "grad_norm": 1.2419792413711548, "learning_rate": 0.0001, "loss": 0.0101, "step": 96580 }, { "epoch": 635.4605263157895, "grad_norm": 1.311336874961853, "learning_rate": 0.0001, "loss": 0.0116, "step": 96590 }, { "epoch": 635.5263157894736, "grad_norm": 1.0877469778060913, "learning_rate": 0.0001, "loss": 0.0129, "step": 96600 }, { "epoch": 635.5921052631579, "grad_norm": 1.0030274391174316, "learning_rate": 0.0001, "loss": 0.0128, "step": 96610 }, { "epoch": 635.6578947368421, "grad_norm": 1.3998702764511108, "learning_rate": 0.0001, "loss": 0.0121, "step": 96620 }, { "epoch": 635.7236842105264, "grad_norm": 0.7409038543701172, "learning_rate": 0.0001, "loss": 0.0111, "step": 96630 }, { "epoch": 635.7894736842105, "grad_norm": 0.9985183477401733, "learning_rate": 0.0001, "loss": 0.0092, "step": 96640 }, { "epoch": 635.8552631578947, "grad_norm": 1.0499337911605835, "learning_rate": 0.0001, "loss": 0.0091, "step": 96650 }, { "epoch": 635.921052631579, "grad_norm": 0.9747737050056458, "learning_rate": 0.0001, "loss": 0.0085, "step": 96660 }, { "epoch": 635.9868421052631, "grad_norm": 1.1201547384262085, "learning_rate": 0.0001, "loss": 0.0138, "step": 96670 }, { "epoch": 636.0526315789474, "grad_norm": 1.0508447885513306, "learning_rate": 0.0001, "loss": 0.01, "step": 96680 }, { "epoch": 636.1184210526316, "grad_norm": 0.8244642019271851, "learning_rate": 0.0001, "loss": 0.01, "step": 96690 }, { "epoch": 636.1842105263158, "grad_norm": 1.1868029832839966, "learning_rate": 0.0001, "loss": 0.0124, "step": 96700 }, { "epoch": 636.25, "grad_norm": 1.2624951601028442, "learning_rate": 0.0001, "loss": 0.0119, "step": 96710 }, { "epoch": 636.3157894736842, "grad_norm": 1.0682224035263062, "learning_rate": 0.0001, "loss": 0.0111, "step": 96720 }, { "epoch": 636.3815789473684, "grad_norm": 1.52485990524292, "learning_rate": 0.0001, "loss": 0.0112, "step": 96730 }, { "epoch": 636.4473684210526, "grad_norm": 1.0763731002807617, "learning_rate": 0.0001, "loss": 0.0092, "step": 96740 }, { "epoch": 636.5131578947369, "grad_norm": 1.3827406167984009, "learning_rate": 0.0001, "loss": 0.0137, "step": 96750 }, { "epoch": 636.578947368421, "grad_norm": 1.2517787218093872, "learning_rate": 0.0001, "loss": 0.0119, "step": 96760 }, { "epoch": 636.6447368421053, "grad_norm": 1.0848556756973267, "learning_rate": 0.0001, "loss": 0.0095, "step": 96770 }, { "epoch": 636.7105263157895, "grad_norm": 0.9772914052009583, "learning_rate": 0.0001, "loss": 0.01, "step": 96780 }, { "epoch": 636.7763157894736, "grad_norm": 1.1397275924682617, "learning_rate": 0.0001, "loss": 0.0106, "step": 96790 }, { "epoch": 636.8421052631579, "grad_norm": 0.940963089466095, "learning_rate": 0.0001, "loss": 0.0106, "step": 96800 }, { "epoch": 636.9078947368421, "grad_norm": 0.8980696797370911, "learning_rate": 0.0001, "loss": 0.0091, "step": 96810 }, { "epoch": 636.9736842105264, "grad_norm": 0.9884552359580994, "learning_rate": 0.0001, "loss": 0.011, "step": 96820 }, { "epoch": 637.0394736842105, "grad_norm": 1.1750383377075195, "learning_rate": 0.0001, "loss": 0.0115, "step": 96830 }, { "epoch": 637.1052631578947, "grad_norm": 1.1034425497055054, "learning_rate": 0.0001, "loss": 0.0099, "step": 96840 }, { "epoch": 637.171052631579, "grad_norm": 0.7074925899505615, "learning_rate": 0.0001, "loss": 0.0098, "step": 96850 }, { "epoch": 637.2368421052631, "grad_norm": 0.977379322052002, "learning_rate": 0.0001, "loss": 0.0113, "step": 96860 }, { "epoch": 637.3026315789474, "grad_norm": 0.942234456539154, "learning_rate": 0.0001, "loss": 0.0128, "step": 96870 }, { "epoch": 637.3684210526316, "grad_norm": 1.336562156677246, "learning_rate": 0.0001, "loss": 0.0122, "step": 96880 }, { "epoch": 637.4342105263158, "grad_norm": 1.1461960077285767, "learning_rate": 0.0001, "loss": 0.0097, "step": 96890 }, { "epoch": 637.5, "grad_norm": 1.023284912109375, "learning_rate": 0.0001, "loss": 0.011, "step": 96900 }, { "epoch": 637.5657894736842, "grad_norm": 1.1981929540634155, "learning_rate": 0.0001, "loss": 0.0104, "step": 96910 }, { "epoch": 637.6315789473684, "grad_norm": 1.3196550607681274, "learning_rate": 0.0001, "loss": 0.0095, "step": 96920 }, { "epoch": 637.6973684210526, "grad_norm": 1.042585015296936, "learning_rate": 0.0001, "loss": 0.0107, "step": 96930 }, { "epoch": 637.7631578947369, "grad_norm": 0.9002341628074646, "learning_rate": 0.0001, "loss": 0.0113, "step": 96940 }, { "epoch": 637.828947368421, "grad_norm": 0.8911952376365662, "learning_rate": 0.0001, "loss": 0.0113, "step": 96950 }, { "epoch": 637.8947368421053, "grad_norm": 1.2505193948745728, "learning_rate": 0.0001, "loss": 0.0104, "step": 96960 }, { "epoch": 637.9605263157895, "grad_norm": 1.0482158660888672, "learning_rate": 0.0001, "loss": 0.0109, "step": 96970 }, { "epoch": 638.0263157894736, "grad_norm": 1.1949135065078735, "learning_rate": 0.0001, "loss": 0.0114, "step": 96980 }, { "epoch": 638.0921052631579, "grad_norm": 1.1491917371749878, "learning_rate": 0.0001, "loss": 0.0099, "step": 96990 }, { "epoch": 638.1578947368421, "grad_norm": 1.0446337461471558, "learning_rate": 0.0001, "loss": 0.0115, "step": 97000 }, { "epoch": 638.2236842105264, "grad_norm": 1.1364582777023315, "learning_rate": 0.0001, "loss": 0.0103, "step": 97010 }, { "epoch": 638.2894736842105, "grad_norm": 0.856816291809082, "learning_rate": 0.0001, "loss": 0.0126, "step": 97020 }, { "epoch": 638.3552631578947, "grad_norm": 0.8455696702003479, "learning_rate": 0.0001, "loss": 0.0102, "step": 97030 }, { "epoch": 638.421052631579, "grad_norm": 1.37261164188385, "learning_rate": 0.0001, "loss": 0.0106, "step": 97040 }, { "epoch": 638.4868421052631, "grad_norm": 1.065354585647583, "learning_rate": 0.0001, "loss": 0.0096, "step": 97050 }, { "epoch": 638.5526315789474, "grad_norm": 1.0042915344238281, "learning_rate": 0.0001, "loss": 0.0112, "step": 97060 }, { "epoch": 638.6184210526316, "grad_norm": 0.9727343320846558, "learning_rate": 0.0001, "loss": 0.0106, "step": 97070 }, { "epoch": 638.6842105263158, "grad_norm": 0.724888026714325, "learning_rate": 0.0001, "loss": 0.0124, "step": 97080 }, { "epoch": 638.75, "grad_norm": 1.3290162086486816, "learning_rate": 0.0001, "loss": 0.0123, "step": 97090 }, { "epoch": 638.8157894736842, "grad_norm": 1.107700228691101, "learning_rate": 0.0001, "loss": 0.0097, "step": 97100 }, { "epoch": 638.8815789473684, "grad_norm": 1.076773762702942, "learning_rate": 0.0001, "loss": 0.0095, "step": 97110 }, { "epoch": 638.9473684210526, "grad_norm": 0.8548887968063354, "learning_rate": 0.0001, "loss": 0.0099, "step": 97120 }, { "epoch": 639.0131578947369, "grad_norm": 1.2806280851364136, "learning_rate": 0.0001, "loss": 0.0108, "step": 97130 }, { "epoch": 639.078947368421, "grad_norm": 0.99432772397995, "learning_rate": 0.0001, "loss": 0.0093, "step": 97140 }, { "epoch": 639.1447368421053, "grad_norm": 1.1284815073013306, "learning_rate": 0.0001, "loss": 0.0113, "step": 97150 }, { "epoch": 639.2105263157895, "grad_norm": 1.4430863857269287, "learning_rate": 0.0001, "loss": 0.0123, "step": 97160 }, { "epoch": 639.2763157894736, "grad_norm": 0.9166154265403748, "learning_rate": 0.0001, "loss": 0.0121, "step": 97170 }, { "epoch": 639.3421052631579, "grad_norm": 1.2260555028915405, "learning_rate": 0.0001, "loss": 0.0141, "step": 97180 }, { "epoch": 639.4078947368421, "grad_norm": 1.1222833395004272, "learning_rate": 0.0001, "loss": 0.0107, "step": 97190 }, { "epoch": 639.4736842105264, "grad_norm": 1.1462934017181396, "learning_rate": 0.0001, "loss": 0.011, "step": 97200 }, { "epoch": 639.5394736842105, "grad_norm": 1.0073168277740479, "learning_rate": 0.0001, "loss": 0.0103, "step": 97210 }, { "epoch": 639.6052631578947, "grad_norm": 1.0227516889572144, "learning_rate": 0.0001, "loss": 0.0116, "step": 97220 }, { "epoch": 639.671052631579, "grad_norm": 0.9892854690551758, "learning_rate": 0.0001, "loss": 0.0103, "step": 97230 }, { "epoch": 639.7368421052631, "grad_norm": 0.918364942073822, "learning_rate": 0.0001, "loss": 0.0093, "step": 97240 }, { "epoch": 639.8026315789474, "grad_norm": 0.8302596211433411, "learning_rate": 0.0001, "loss": 0.0096, "step": 97250 }, { "epoch": 639.8684210526316, "grad_norm": 1.3149603605270386, "learning_rate": 0.0001, "loss": 0.0131, "step": 97260 }, { "epoch": 639.9342105263158, "grad_norm": 1.0401725769042969, "learning_rate": 0.0001, "loss": 0.0117, "step": 97270 }, { "epoch": 640.0, "grad_norm": 0.8277792930603027, "learning_rate": 0.0001, "loss": 0.0088, "step": 97280 }, { "epoch": 640.0657894736842, "grad_norm": 0.8482030034065247, "learning_rate": 0.0001, "loss": 0.0119, "step": 97290 }, { "epoch": 640.1315789473684, "grad_norm": 1.0147836208343506, "learning_rate": 0.0001, "loss": 0.0106, "step": 97300 }, { "epoch": 640.1973684210526, "grad_norm": 1.1419466733932495, "learning_rate": 0.0001, "loss": 0.0102, "step": 97310 }, { "epoch": 640.2631578947369, "grad_norm": 0.7621695399284363, "learning_rate": 0.0001, "loss": 0.012, "step": 97320 }, { "epoch": 640.328947368421, "grad_norm": 1.1174577474594116, "learning_rate": 0.0001, "loss": 0.0093, "step": 97330 }, { "epoch": 640.3947368421053, "grad_norm": 0.8022199273109436, "learning_rate": 0.0001, "loss": 0.0116, "step": 97340 }, { "epoch": 640.4605263157895, "grad_norm": 0.8935840129852295, "learning_rate": 0.0001, "loss": 0.0098, "step": 97350 }, { "epoch": 640.5263157894736, "grad_norm": 1.3368089199066162, "learning_rate": 0.0001, "loss": 0.0117, "step": 97360 }, { "epoch": 640.5921052631579, "grad_norm": 1.0535829067230225, "learning_rate": 0.0001, "loss": 0.0128, "step": 97370 }, { "epoch": 640.6578947368421, "grad_norm": 1.1254475116729736, "learning_rate": 0.0001, "loss": 0.0123, "step": 97380 }, { "epoch": 640.7236842105264, "grad_norm": 1.1946035623550415, "learning_rate": 0.0001, "loss": 0.014, "step": 97390 }, { "epoch": 640.7894736842105, "grad_norm": 1.2451772689819336, "learning_rate": 0.0001, "loss": 0.0126, "step": 97400 }, { "epoch": 640.8552631578947, "grad_norm": 0.8775976896286011, "learning_rate": 0.0001, "loss": 0.009, "step": 97410 }, { "epoch": 640.921052631579, "grad_norm": 1.0031647682189941, "learning_rate": 0.0001, "loss": 0.0088, "step": 97420 }, { "epoch": 640.9868421052631, "grad_norm": 1.1627060174942017, "learning_rate": 0.0001, "loss": 0.0119, "step": 97430 }, { "epoch": 641.0526315789474, "grad_norm": 0.9640074372291565, "learning_rate": 0.0001, "loss": 0.0115, "step": 97440 }, { "epoch": 641.1184210526316, "grad_norm": 1.0985318422317505, "learning_rate": 0.0001, "loss": 0.0101, "step": 97450 }, { "epoch": 641.1842105263158, "grad_norm": 0.9738800525665283, "learning_rate": 0.0001, "loss": 0.0095, "step": 97460 }, { "epoch": 641.25, "grad_norm": 0.9755899906158447, "learning_rate": 0.0001, "loss": 0.0107, "step": 97470 }, { "epoch": 641.3157894736842, "grad_norm": 1.0237241983413696, "learning_rate": 0.0001, "loss": 0.0108, "step": 97480 }, { "epoch": 641.3815789473684, "grad_norm": 0.8942602872848511, "learning_rate": 0.0001, "loss": 0.0104, "step": 97490 }, { "epoch": 641.4473684210526, "grad_norm": 0.7703356742858887, "learning_rate": 0.0001, "loss": 0.0097, "step": 97500 }, { "epoch": 641.5131578947369, "grad_norm": 1.2847163677215576, "learning_rate": 0.0001, "loss": 0.0096, "step": 97510 }, { "epoch": 641.578947368421, "grad_norm": 0.8879604339599609, "learning_rate": 0.0001, "loss": 0.0115, "step": 97520 }, { "epoch": 641.6447368421053, "grad_norm": 1.2082008123397827, "learning_rate": 0.0001, "loss": 0.0111, "step": 97530 }, { "epoch": 641.7105263157895, "grad_norm": 1.3666983842849731, "learning_rate": 0.0001, "loss": 0.0107, "step": 97540 }, { "epoch": 641.7763157894736, "grad_norm": 1.454520583152771, "learning_rate": 0.0001, "loss": 0.0124, "step": 97550 }, { "epoch": 641.8421052631579, "grad_norm": 1.0853079557418823, "learning_rate": 0.0001, "loss": 0.0106, "step": 97560 }, { "epoch": 641.9078947368421, "grad_norm": 0.8811721205711365, "learning_rate": 0.0001, "loss": 0.0135, "step": 97570 }, { "epoch": 641.9736842105264, "grad_norm": 1.1882359981536865, "learning_rate": 0.0001, "loss": 0.0136, "step": 97580 }, { "epoch": 642.0394736842105, "grad_norm": 1.194444179534912, "learning_rate": 0.0001, "loss": 0.0107, "step": 97590 }, { "epoch": 642.1052631578947, "grad_norm": 1.390655517578125, "learning_rate": 0.0001, "loss": 0.0106, "step": 97600 }, { "epoch": 642.171052631579, "grad_norm": 0.7747687697410583, "learning_rate": 0.0001, "loss": 0.013, "step": 97610 }, { "epoch": 642.2368421052631, "grad_norm": 0.9693976640701294, "learning_rate": 0.0001, "loss": 0.0135, "step": 97620 }, { "epoch": 642.3026315789474, "grad_norm": 1.255947470664978, "learning_rate": 0.0001, "loss": 0.0112, "step": 97630 }, { "epoch": 642.3684210526316, "grad_norm": 1.0807551145553589, "learning_rate": 0.0001, "loss": 0.0143, "step": 97640 }, { "epoch": 642.4342105263158, "grad_norm": 1.077979326248169, "learning_rate": 0.0001, "loss": 0.0108, "step": 97650 }, { "epoch": 642.5, "grad_norm": 0.8917891979217529, "learning_rate": 0.0001, "loss": 0.0109, "step": 97660 }, { "epoch": 642.5657894736842, "grad_norm": 1.0527293682098389, "learning_rate": 0.0001, "loss": 0.0102, "step": 97670 }, { "epoch": 642.6315789473684, "grad_norm": 0.8535796403884888, "learning_rate": 0.0001, "loss": 0.0114, "step": 97680 }, { "epoch": 642.6973684210526, "grad_norm": 0.9337286949157715, "learning_rate": 0.0001, "loss": 0.0099, "step": 97690 }, { "epoch": 642.7631578947369, "grad_norm": 1.0351840257644653, "learning_rate": 0.0001, "loss": 0.0127, "step": 97700 }, { "epoch": 642.828947368421, "grad_norm": 0.8320999145507812, "learning_rate": 0.0001, "loss": 0.0097, "step": 97710 }, { "epoch": 642.8947368421053, "grad_norm": 1.2720904350280762, "learning_rate": 0.0001, "loss": 0.013, "step": 97720 }, { "epoch": 642.9605263157895, "grad_norm": 1.19514799118042, "learning_rate": 0.0001, "loss": 0.0109, "step": 97730 }, { "epoch": 643.0263157894736, "grad_norm": 1.1567554473876953, "learning_rate": 0.0001, "loss": 0.0112, "step": 97740 }, { "epoch": 643.0921052631579, "grad_norm": 0.992160975933075, "learning_rate": 0.0001, "loss": 0.0107, "step": 97750 }, { "epoch": 643.1578947368421, "grad_norm": 0.9545310735702515, "learning_rate": 0.0001, "loss": 0.0127, "step": 97760 }, { "epoch": 643.2236842105264, "grad_norm": 0.5216129422187805, "learning_rate": 0.0001, "loss": 0.0106, "step": 97770 }, { "epoch": 643.2894736842105, "grad_norm": 1.1363471746444702, "learning_rate": 0.0001, "loss": 0.0112, "step": 97780 }, { "epoch": 643.3552631578947, "grad_norm": 1.0840483903884888, "learning_rate": 0.0001, "loss": 0.0115, "step": 97790 }, { "epoch": 643.421052631579, "grad_norm": 0.9298526048660278, "learning_rate": 0.0001, "loss": 0.0108, "step": 97800 }, { "epoch": 643.4868421052631, "grad_norm": 0.9495653510093689, "learning_rate": 0.0001, "loss": 0.0126, "step": 97810 }, { "epoch": 643.5526315789474, "grad_norm": 0.9317263960838318, "learning_rate": 0.0001, "loss": 0.0126, "step": 97820 }, { "epoch": 643.6184210526316, "grad_norm": 1.0475777387619019, "learning_rate": 0.0001, "loss": 0.0127, "step": 97830 }, { "epoch": 643.6842105263158, "grad_norm": 1.4607027769088745, "learning_rate": 0.0001, "loss": 0.0119, "step": 97840 }, { "epoch": 643.75, "grad_norm": 0.8541473150253296, "learning_rate": 0.0001, "loss": 0.0108, "step": 97850 }, { "epoch": 643.8157894736842, "grad_norm": 0.600893497467041, "learning_rate": 0.0001, "loss": 0.0117, "step": 97860 }, { "epoch": 643.8815789473684, "grad_norm": 0.9018236994743347, "learning_rate": 0.0001, "loss": 0.0139, "step": 97870 }, { "epoch": 643.9473684210526, "grad_norm": 0.7661747932434082, "learning_rate": 0.0001, "loss": 0.0124, "step": 97880 }, { "epoch": 644.0131578947369, "grad_norm": 0.8002960085868835, "learning_rate": 0.0001, "loss": 0.0105, "step": 97890 }, { "epoch": 644.078947368421, "grad_norm": 0.8436306715011597, "learning_rate": 0.0001, "loss": 0.0147, "step": 97900 }, { "epoch": 644.1447368421053, "grad_norm": 1.3472542762756348, "learning_rate": 0.0001, "loss": 0.0106, "step": 97910 }, { "epoch": 644.2105263157895, "grad_norm": 0.8664248585700989, "learning_rate": 0.0001, "loss": 0.0115, "step": 97920 }, { "epoch": 644.2763157894736, "grad_norm": 0.9880051612854004, "learning_rate": 0.0001, "loss": 0.0118, "step": 97930 }, { "epoch": 644.3421052631579, "grad_norm": 1.1191985607147217, "learning_rate": 0.0001, "loss": 0.0145, "step": 97940 }, { "epoch": 644.4078947368421, "grad_norm": 1.2010605335235596, "learning_rate": 0.0001, "loss": 0.0115, "step": 97950 }, { "epoch": 644.4736842105264, "grad_norm": 1.250929594039917, "learning_rate": 0.0001, "loss": 0.0104, "step": 97960 }, { "epoch": 644.5394736842105, "grad_norm": 1.0687397718429565, "learning_rate": 0.0001, "loss": 0.0107, "step": 97970 }, { "epoch": 644.6052631578947, "grad_norm": 0.9296792149543762, "learning_rate": 0.0001, "loss": 0.0126, "step": 97980 }, { "epoch": 644.671052631579, "grad_norm": 1.3585340976715088, "learning_rate": 0.0001, "loss": 0.0115, "step": 97990 }, { "epoch": 644.7368421052631, "grad_norm": 1.1659291982650757, "learning_rate": 0.0001, "loss": 0.009, "step": 98000 }, { "epoch": 644.8026315789474, "grad_norm": 1.242610216140747, "learning_rate": 0.0001, "loss": 0.0112, "step": 98010 }, { "epoch": 644.8684210526316, "grad_norm": 1.096759557723999, "learning_rate": 0.0001, "loss": 0.0123, "step": 98020 }, { "epoch": 644.9342105263158, "grad_norm": 1.0130127668380737, "learning_rate": 0.0001, "loss": 0.011, "step": 98030 }, { "epoch": 645.0, "grad_norm": 1.1907083988189697, "learning_rate": 0.0001, "loss": 0.0125, "step": 98040 }, { "epoch": 645.0657894736842, "grad_norm": 1.232130765914917, "learning_rate": 0.0001, "loss": 0.0115, "step": 98050 }, { "epoch": 645.1315789473684, "grad_norm": 1.1238503456115723, "learning_rate": 0.0001, "loss": 0.0113, "step": 98060 }, { "epoch": 645.1973684210526, "grad_norm": 0.9487515091896057, "learning_rate": 0.0001, "loss": 0.0104, "step": 98070 }, { "epoch": 645.2631578947369, "grad_norm": 0.9860408902168274, "learning_rate": 0.0001, "loss": 0.0101, "step": 98080 }, { "epoch": 645.328947368421, "grad_norm": 0.7561169862747192, "learning_rate": 0.0001, "loss": 0.0113, "step": 98090 }, { "epoch": 645.3947368421053, "grad_norm": 0.9736877679824829, "learning_rate": 0.0001, "loss": 0.0122, "step": 98100 }, { "epoch": 645.4605263157895, "grad_norm": 0.6840393543243408, "learning_rate": 0.0001, "loss": 0.0101, "step": 98110 }, { "epoch": 645.5263157894736, "grad_norm": 1.2354158163070679, "learning_rate": 0.0001, "loss": 0.0122, "step": 98120 }, { "epoch": 645.5921052631579, "grad_norm": 1.306350827217102, "learning_rate": 0.0001, "loss": 0.012, "step": 98130 }, { "epoch": 645.6578947368421, "grad_norm": 1.1757615804672241, "learning_rate": 0.0001, "loss": 0.009, "step": 98140 }, { "epoch": 645.7236842105264, "grad_norm": 1.1148210763931274, "learning_rate": 0.0001, "loss": 0.0112, "step": 98150 }, { "epoch": 645.7894736842105, "grad_norm": 1.281851887702942, "learning_rate": 0.0001, "loss": 0.0103, "step": 98160 }, { "epoch": 645.8552631578947, "grad_norm": 0.8599856495857239, "learning_rate": 0.0001, "loss": 0.0099, "step": 98170 }, { "epoch": 645.921052631579, "grad_norm": 0.9599358439445496, "learning_rate": 0.0001, "loss": 0.0103, "step": 98180 }, { "epoch": 645.9868421052631, "grad_norm": 1.0771713256835938, "learning_rate": 0.0001, "loss": 0.012, "step": 98190 }, { "epoch": 646.0526315789474, "grad_norm": 0.7740371227264404, "learning_rate": 0.0001, "loss": 0.012, "step": 98200 }, { "epoch": 646.1184210526316, "grad_norm": 1.125326156616211, "learning_rate": 0.0001, "loss": 0.0098, "step": 98210 }, { "epoch": 646.1842105263158, "grad_norm": 1.2309529781341553, "learning_rate": 0.0001, "loss": 0.0121, "step": 98220 }, { "epoch": 646.25, "grad_norm": 1.0347673892974854, "learning_rate": 0.0001, "loss": 0.0091, "step": 98230 }, { "epoch": 646.3157894736842, "grad_norm": 1.0844132900238037, "learning_rate": 0.0001, "loss": 0.0101, "step": 98240 }, { "epoch": 646.3815789473684, "grad_norm": 0.7745186686515808, "learning_rate": 0.0001, "loss": 0.0119, "step": 98250 }, { "epoch": 646.4473684210526, "grad_norm": 1.0603491067886353, "learning_rate": 0.0001, "loss": 0.0131, "step": 98260 }, { "epoch": 646.5131578947369, "grad_norm": 1.0565520524978638, "learning_rate": 0.0001, "loss": 0.0111, "step": 98270 }, { "epoch": 646.578947368421, "grad_norm": 1.1339266300201416, "learning_rate": 0.0001, "loss": 0.0105, "step": 98280 }, { "epoch": 646.6447368421053, "grad_norm": 1.2667466402053833, "learning_rate": 0.0001, "loss": 0.0112, "step": 98290 }, { "epoch": 646.7105263157895, "grad_norm": 1.1827952861785889, "learning_rate": 0.0001, "loss": 0.0113, "step": 98300 }, { "epoch": 646.7763157894736, "grad_norm": 1.1136367321014404, "learning_rate": 0.0001, "loss": 0.0109, "step": 98310 }, { "epoch": 646.8421052631579, "grad_norm": 0.8930481672286987, "learning_rate": 0.0001, "loss": 0.0102, "step": 98320 }, { "epoch": 646.9078947368421, "grad_norm": 1.1677948236465454, "learning_rate": 0.0001, "loss": 0.011, "step": 98330 }, { "epoch": 646.9736842105264, "grad_norm": 1.003639817237854, "learning_rate": 0.0001, "loss": 0.011, "step": 98340 }, { "epoch": 647.0394736842105, "grad_norm": 0.9792555570602417, "learning_rate": 0.0001, "loss": 0.01, "step": 98350 }, { "epoch": 647.1052631578947, "grad_norm": 1.3798192739486694, "learning_rate": 0.0001, "loss": 0.0115, "step": 98360 }, { "epoch": 647.171052631579, "grad_norm": 0.9844328165054321, "learning_rate": 0.0001, "loss": 0.0109, "step": 98370 }, { "epoch": 647.2368421052631, "grad_norm": 1.2482646703720093, "learning_rate": 0.0001, "loss": 0.0128, "step": 98380 }, { "epoch": 647.3026315789474, "grad_norm": 0.796341598033905, "learning_rate": 0.0001, "loss": 0.0095, "step": 98390 }, { "epoch": 647.3684210526316, "grad_norm": 0.8844649791717529, "learning_rate": 0.0001, "loss": 0.0118, "step": 98400 }, { "epoch": 647.4342105263158, "grad_norm": 1.4429231882095337, "learning_rate": 0.0001, "loss": 0.011, "step": 98410 }, { "epoch": 647.5, "grad_norm": 1.1434082984924316, "learning_rate": 0.0001, "loss": 0.0091, "step": 98420 }, { "epoch": 647.5657894736842, "grad_norm": 1.153358817100525, "learning_rate": 0.0001, "loss": 0.011, "step": 98430 }, { "epoch": 647.6315789473684, "grad_norm": 1.1155855655670166, "learning_rate": 0.0001, "loss": 0.0097, "step": 98440 }, { "epoch": 647.6973684210526, "grad_norm": 0.8420267701148987, "learning_rate": 0.0001, "loss": 0.0108, "step": 98450 }, { "epoch": 647.7631578947369, "grad_norm": 0.9819120764732361, "learning_rate": 0.0001, "loss": 0.0103, "step": 98460 }, { "epoch": 647.828947368421, "grad_norm": 1.6169120073318481, "learning_rate": 0.0001, "loss": 0.0098, "step": 98470 }, { "epoch": 647.8947368421053, "grad_norm": 1.1635892391204834, "learning_rate": 0.0001, "loss": 0.0095, "step": 98480 }, { "epoch": 647.9605263157895, "grad_norm": 1.0484620332717896, "learning_rate": 0.0001, "loss": 0.0102, "step": 98490 }, { "epoch": 648.0263157894736, "grad_norm": 0.8859636783599854, "learning_rate": 0.0001, "loss": 0.0118, "step": 98500 }, { "epoch": 648.0921052631579, "grad_norm": 0.9191065430641174, "learning_rate": 0.0001, "loss": 0.0134, "step": 98510 }, { "epoch": 648.1578947368421, "grad_norm": 1.2865121364593506, "learning_rate": 0.0001, "loss": 0.0099, "step": 98520 }, { "epoch": 648.2236842105264, "grad_norm": 1.1271616220474243, "learning_rate": 0.0001, "loss": 0.0089, "step": 98530 }, { "epoch": 648.2894736842105, "grad_norm": 1.0781117677688599, "learning_rate": 0.0001, "loss": 0.009, "step": 98540 }, { "epoch": 648.3552631578947, "grad_norm": 0.8963111639022827, "learning_rate": 0.0001, "loss": 0.0129, "step": 98550 }, { "epoch": 648.421052631579, "grad_norm": 0.8486378192901611, "learning_rate": 0.0001, "loss": 0.0092, "step": 98560 }, { "epoch": 648.4868421052631, "grad_norm": 0.7802874445915222, "learning_rate": 0.0001, "loss": 0.0129, "step": 98570 }, { "epoch": 648.5526315789474, "grad_norm": 0.9380501508712769, "learning_rate": 0.0001, "loss": 0.0108, "step": 98580 }, { "epoch": 648.6184210526316, "grad_norm": 1.1357520818710327, "learning_rate": 0.0001, "loss": 0.0105, "step": 98590 }, { "epoch": 648.6842105263158, "grad_norm": 0.9585915207862854, "learning_rate": 0.0001, "loss": 0.0109, "step": 98600 }, { "epoch": 648.75, "grad_norm": 1.170572280883789, "learning_rate": 0.0001, "loss": 0.0104, "step": 98610 }, { "epoch": 648.8157894736842, "grad_norm": 0.7782623767852783, "learning_rate": 0.0001, "loss": 0.0104, "step": 98620 }, { "epoch": 648.8815789473684, "grad_norm": 1.2383381128311157, "learning_rate": 0.0001, "loss": 0.0108, "step": 98630 }, { "epoch": 648.9473684210526, "grad_norm": 1.1335386037826538, "learning_rate": 0.0001, "loss": 0.009, "step": 98640 }, { "epoch": 649.0131578947369, "grad_norm": 1.0189485549926758, "learning_rate": 0.0001, "loss": 0.0103, "step": 98650 }, { "epoch": 649.078947368421, "grad_norm": 1.0290753841400146, "learning_rate": 0.0001, "loss": 0.0102, "step": 98660 }, { "epoch": 649.1447368421053, "grad_norm": 1.2437363862991333, "learning_rate": 0.0001, "loss": 0.0109, "step": 98670 }, { "epoch": 649.2105263157895, "grad_norm": 0.781124472618103, "learning_rate": 0.0001, "loss": 0.01, "step": 98680 }, { "epoch": 649.2763157894736, "grad_norm": 1.2116012573242188, "learning_rate": 0.0001, "loss": 0.0101, "step": 98690 }, { "epoch": 649.3421052631579, "grad_norm": 0.9548416137695312, "learning_rate": 0.0001, "loss": 0.01, "step": 98700 }, { "epoch": 649.4078947368421, "grad_norm": 0.7918787002563477, "learning_rate": 0.0001, "loss": 0.0096, "step": 98710 }, { "epoch": 649.4736842105264, "grad_norm": 0.8988084197044373, "learning_rate": 0.0001, "loss": 0.0125, "step": 98720 }, { "epoch": 649.5394736842105, "grad_norm": 0.9593010544776917, "learning_rate": 0.0001, "loss": 0.0098, "step": 98730 }, { "epoch": 649.6052631578947, "grad_norm": 1.205809235572815, "learning_rate": 0.0001, "loss": 0.0124, "step": 98740 }, { "epoch": 649.671052631579, "grad_norm": 1.1642436981201172, "learning_rate": 0.0001, "loss": 0.0117, "step": 98750 }, { "epoch": 649.7368421052631, "grad_norm": 0.8834133744239807, "learning_rate": 0.0001, "loss": 0.0121, "step": 98760 }, { "epoch": 649.8026315789474, "grad_norm": 1.1076525449752808, "learning_rate": 0.0001, "loss": 0.0099, "step": 98770 }, { "epoch": 649.8684210526316, "grad_norm": 1.2798569202423096, "learning_rate": 0.0001, "loss": 0.0115, "step": 98780 }, { "epoch": 649.9342105263158, "grad_norm": 0.937585175037384, "learning_rate": 0.0001, "loss": 0.0101, "step": 98790 }, { "epoch": 650.0, "grad_norm": 1.0356471538543701, "learning_rate": 0.0001, "loss": 0.0095, "step": 98800 }, { "epoch": 650.0657894736842, "grad_norm": 1.1217914819717407, "learning_rate": 0.0001, "loss": 0.0122, "step": 98810 }, { "epoch": 650.1315789473684, "grad_norm": 0.9628757238388062, "learning_rate": 0.0001, "loss": 0.0115, "step": 98820 }, { "epoch": 650.1973684210526, "grad_norm": 1.3987805843353271, "learning_rate": 0.0001, "loss": 0.0105, "step": 98830 }, { "epoch": 650.2631578947369, "grad_norm": 1.4442050457000732, "learning_rate": 0.0001, "loss": 0.0098, "step": 98840 }, { "epoch": 650.328947368421, "grad_norm": 1.1787656545639038, "learning_rate": 0.0001, "loss": 0.0119, "step": 98850 }, { "epoch": 650.3947368421053, "grad_norm": 1.3678606748580933, "learning_rate": 0.0001, "loss": 0.0102, "step": 98860 }, { "epoch": 650.4605263157895, "grad_norm": 1.1847083568572998, "learning_rate": 0.0001, "loss": 0.0111, "step": 98870 }, { "epoch": 650.5263157894736, "grad_norm": 1.195621132850647, "learning_rate": 0.0001, "loss": 0.0098, "step": 98880 }, { "epoch": 650.5921052631579, "grad_norm": 1.2353485822677612, "learning_rate": 0.0001, "loss": 0.0111, "step": 98890 }, { "epoch": 650.6578947368421, "grad_norm": 0.8940504789352417, "learning_rate": 0.0001, "loss": 0.011, "step": 98900 }, { "epoch": 650.7236842105264, "grad_norm": 1.0895377397537231, "learning_rate": 0.0001, "loss": 0.0096, "step": 98910 }, { "epoch": 650.7894736842105, "grad_norm": 1.0993573665618896, "learning_rate": 0.0001, "loss": 0.0091, "step": 98920 }, { "epoch": 650.8552631578947, "grad_norm": 1.2110718488693237, "learning_rate": 0.0001, "loss": 0.0098, "step": 98930 }, { "epoch": 650.921052631579, "grad_norm": 0.9158070683479309, "learning_rate": 0.0001, "loss": 0.0092, "step": 98940 }, { "epoch": 650.9868421052631, "grad_norm": 1.1650333404541016, "learning_rate": 0.0001, "loss": 0.0108, "step": 98950 }, { "epoch": 651.0526315789474, "grad_norm": 1.2735977172851562, "learning_rate": 0.0001, "loss": 0.0106, "step": 98960 }, { "epoch": 651.1184210526316, "grad_norm": 1.1164957284927368, "learning_rate": 0.0001, "loss": 0.0101, "step": 98970 }, { "epoch": 651.1842105263158, "grad_norm": 1.1054691076278687, "learning_rate": 0.0001, "loss": 0.0096, "step": 98980 }, { "epoch": 651.25, "grad_norm": 1.2364662885665894, "learning_rate": 0.0001, "loss": 0.0095, "step": 98990 }, { "epoch": 651.3157894736842, "grad_norm": 1.0556890964508057, "learning_rate": 0.0001, "loss": 0.0108, "step": 99000 }, { "epoch": 651.3815789473684, "grad_norm": 1.3006346225738525, "learning_rate": 0.0001, "loss": 0.0111, "step": 99010 }, { "epoch": 651.4473684210526, "grad_norm": 1.3828588724136353, "learning_rate": 0.0001, "loss": 0.0091, "step": 99020 }, { "epoch": 651.5131578947369, "grad_norm": 1.1058744192123413, "learning_rate": 0.0001, "loss": 0.0102, "step": 99030 }, { "epoch": 651.578947368421, "grad_norm": 1.0962953567504883, "learning_rate": 0.0001, "loss": 0.0093, "step": 99040 }, { "epoch": 651.6447368421053, "grad_norm": 1.1675875186920166, "learning_rate": 0.0001, "loss": 0.0111, "step": 99050 }, { "epoch": 651.7105263157895, "grad_norm": 1.486832857131958, "learning_rate": 0.0001, "loss": 0.0116, "step": 99060 }, { "epoch": 651.7763157894736, "grad_norm": 0.9721072912216187, "learning_rate": 0.0001, "loss": 0.0106, "step": 99070 }, { "epoch": 651.8421052631579, "grad_norm": 1.1138964891433716, "learning_rate": 0.0001, "loss": 0.0104, "step": 99080 }, { "epoch": 651.9078947368421, "grad_norm": 0.9725443124771118, "learning_rate": 0.0001, "loss": 0.0096, "step": 99090 }, { "epoch": 651.9736842105264, "grad_norm": 1.2599581480026245, "learning_rate": 0.0001, "loss": 0.0106, "step": 99100 }, { "epoch": 652.0394736842105, "grad_norm": 0.8848244547843933, "learning_rate": 0.0001, "loss": 0.0103, "step": 99110 }, { "epoch": 652.1052631578947, "grad_norm": 1.0993847846984863, "learning_rate": 0.0001, "loss": 0.0097, "step": 99120 }, { "epoch": 652.171052631579, "grad_norm": 1.1078648567199707, "learning_rate": 0.0001, "loss": 0.0086, "step": 99130 }, { "epoch": 652.2368421052631, "grad_norm": 1.1154159307479858, "learning_rate": 0.0001, "loss": 0.0109, "step": 99140 }, { "epoch": 652.3026315789474, "grad_norm": 1.2033315896987915, "learning_rate": 0.0001, "loss": 0.0096, "step": 99150 }, { "epoch": 652.3684210526316, "grad_norm": 0.8731604218482971, "learning_rate": 0.0001, "loss": 0.012, "step": 99160 }, { "epoch": 652.4342105263158, "grad_norm": 1.3480206727981567, "learning_rate": 0.0001, "loss": 0.0104, "step": 99170 }, { "epoch": 652.5, "grad_norm": 1.2460421323776245, "learning_rate": 0.0001, "loss": 0.0103, "step": 99180 }, { "epoch": 652.5657894736842, "grad_norm": 1.5589008331298828, "learning_rate": 0.0001, "loss": 0.0102, "step": 99190 }, { "epoch": 652.6315789473684, "grad_norm": 1.400813341140747, "learning_rate": 0.0001, "loss": 0.01, "step": 99200 }, { "epoch": 652.6973684210526, "grad_norm": 1.5220674276351929, "learning_rate": 0.0001, "loss": 0.0103, "step": 99210 }, { "epoch": 652.7631578947369, "grad_norm": 1.3454396724700928, "learning_rate": 0.0001, "loss": 0.0098, "step": 99220 }, { "epoch": 652.828947368421, "grad_norm": 1.3195019960403442, "learning_rate": 0.0001, "loss": 0.011, "step": 99230 }, { "epoch": 652.8947368421053, "grad_norm": 1.0843956470489502, "learning_rate": 0.0001, "loss": 0.0109, "step": 99240 }, { "epoch": 652.9605263157895, "grad_norm": 1.2075053453445435, "learning_rate": 0.0001, "loss": 0.0116, "step": 99250 }, { "epoch": 653.0263157894736, "grad_norm": 1.3347957134246826, "learning_rate": 0.0001, "loss": 0.0083, "step": 99260 }, { "epoch": 653.0921052631579, "grad_norm": 1.1405830383300781, "learning_rate": 0.0001, "loss": 0.0092, "step": 99270 }, { "epoch": 653.1578947368421, "grad_norm": 0.9808728098869324, "learning_rate": 0.0001, "loss": 0.009, "step": 99280 }, { "epoch": 653.2236842105264, "grad_norm": 1.1668258905410767, "learning_rate": 0.0001, "loss": 0.0093, "step": 99290 }, { "epoch": 653.2894736842105, "grad_norm": 1.2372167110443115, "learning_rate": 0.0001, "loss": 0.0096, "step": 99300 }, { "epoch": 653.3552631578947, "grad_norm": 1.0494773387908936, "learning_rate": 0.0001, "loss": 0.0113, "step": 99310 }, { "epoch": 653.421052631579, "grad_norm": 1.203221082687378, "learning_rate": 0.0001, "loss": 0.0112, "step": 99320 }, { "epoch": 653.4868421052631, "grad_norm": 1.0435203313827515, "learning_rate": 0.0001, "loss": 0.0101, "step": 99330 }, { "epoch": 653.5526315789474, "grad_norm": 1.1974724531173706, "learning_rate": 0.0001, "loss": 0.0102, "step": 99340 }, { "epoch": 653.6184210526316, "grad_norm": 0.9907897710800171, "learning_rate": 0.0001, "loss": 0.0096, "step": 99350 }, { "epoch": 653.6842105263158, "grad_norm": 0.9244227409362793, "learning_rate": 0.0001, "loss": 0.0118, "step": 99360 }, { "epoch": 653.75, "grad_norm": 0.8877261877059937, "learning_rate": 0.0001, "loss": 0.0102, "step": 99370 }, { "epoch": 653.8157894736842, "grad_norm": 0.8094327449798584, "learning_rate": 0.0001, "loss": 0.0116, "step": 99380 }, { "epoch": 653.8815789473684, "grad_norm": 1.055003046989441, "learning_rate": 0.0001, "loss": 0.0135, "step": 99390 }, { "epoch": 653.9473684210526, "grad_norm": 1.1381503343582153, "learning_rate": 0.0001, "loss": 0.011, "step": 99400 }, { "epoch": 654.0131578947369, "grad_norm": 0.7804838418960571, "learning_rate": 0.0001, "loss": 0.0104, "step": 99410 }, { "epoch": 654.078947368421, "grad_norm": 1.3052879571914673, "learning_rate": 0.0001, "loss": 0.0131, "step": 99420 }, { "epoch": 654.1447368421053, "grad_norm": 0.9438977241516113, "learning_rate": 0.0001, "loss": 0.0112, "step": 99430 }, { "epoch": 654.2105263157895, "grad_norm": 1.324198603630066, "learning_rate": 0.0001, "loss": 0.0116, "step": 99440 }, { "epoch": 654.2763157894736, "grad_norm": 1.0651750564575195, "learning_rate": 0.0001, "loss": 0.0106, "step": 99450 }, { "epoch": 654.3421052631579, "grad_norm": 1.2382241487503052, "learning_rate": 0.0001, "loss": 0.0095, "step": 99460 }, { "epoch": 654.4078947368421, "grad_norm": 1.0247857570648193, "learning_rate": 0.0001, "loss": 0.0098, "step": 99470 }, { "epoch": 654.4736842105264, "grad_norm": 0.8664435744285583, "learning_rate": 0.0001, "loss": 0.0098, "step": 99480 }, { "epoch": 654.5394736842105, "grad_norm": 1.1170216798782349, "learning_rate": 0.0001, "loss": 0.0112, "step": 99490 }, { "epoch": 654.6052631578947, "grad_norm": 0.9370788931846619, "learning_rate": 0.0001, "loss": 0.01, "step": 99500 }, { "epoch": 654.671052631579, "grad_norm": 1.2386893033981323, "learning_rate": 0.0001, "loss": 0.0094, "step": 99510 }, { "epoch": 654.7368421052631, "grad_norm": 1.4200820922851562, "learning_rate": 0.0001, "loss": 0.0096, "step": 99520 }, { "epoch": 654.8026315789474, "grad_norm": 1.3450512886047363, "learning_rate": 0.0001, "loss": 0.0113, "step": 99530 }, { "epoch": 654.8684210526316, "grad_norm": 1.1321581602096558, "learning_rate": 0.0001, "loss": 0.0114, "step": 99540 }, { "epoch": 654.9342105263158, "grad_norm": 1.3326202630996704, "learning_rate": 0.0001, "loss": 0.0106, "step": 99550 }, { "epoch": 655.0, "grad_norm": 0.9542607665061951, "learning_rate": 0.0001, "loss": 0.0107, "step": 99560 }, { "epoch": 655.0657894736842, "grad_norm": 1.4707971811294556, "learning_rate": 0.0001, "loss": 0.011, "step": 99570 }, { "epoch": 655.1315789473684, "grad_norm": 1.3422001600265503, "learning_rate": 0.0001, "loss": 0.0092, "step": 99580 }, { "epoch": 655.1973684210526, "grad_norm": 1.2660077810287476, "learning_rate": 0.0001, "loss": 0.0098, "step": 99590 }, { "epoch": 655.2631578947369, "grad_norm": 1.3600062131881714, "learning_rate": 0.0001, "loss": 0.0099, "step": 99600 }, { "epoch": 655.328947368421, "grad_norm": 1.2667269706726074, "learning_rate": 0.0001, "loss": 0.0115, "step": 99610 }, { "epoch": 655.3947368421053, "grad_norm": 1.327304482460022, "learning_rate": 0.0001, "loss": 0.0105, "step": 99620 }, { "epoch": 655.4605263157895, "grad_norm": 1.309645414352417, "learning_rate": 0.0001, "loss": 0.0097, "step": 99630 }, { "epoch": 655.5263157894736, "grad_norm": 0.8108488917350769, "learning_rate": 0.0001, "loss": 0.0094, "step": 99640 }, { "epoch": 655.5921052631579, "grad_norm": 0.985109806060791, "learning_rate": 0.0001, "loss": 0.0111, "step": 99650 }, { "epoch": 655.6578947368421, "grad_norm": 0.8888941407203674, "learning_rate": 0.0001, "loss": 0.0105, "step": 99660 }, { "epoch": 655.7236842105264, "grad_norm": 1.006456971168518, "learning_rate": 0.0001, "loss": 0.0101, "step": 99670 }, { "epoch": 655.7894736842105, "grad_norm": 1.1159669160842896, "learning_rate": 0.0001, "loss": 0.0119, "step": 99680 }, { "epoch": 655.8552631578947, "grad_norm": 0.9909751415252686, "learning_rate": 0.0001, "loss": 0.01, "step": 99690 }, { "epoch": 655.921052631579, "grad_norm": 1.045996904373169, "learning_rate": 0.0001, "loss": 0.0107, "step": 99700 }, { "epoch": 655.9868421052631, "grad_norm": 1.0817979574203491, "learning_rate": 0.0001, "loss": 0.0116, "step": 99710 }, { "epoch": 656.0526315789474, "grad_norm": 1.2081537246704102, "learning_rate": 0.0001, "loss": 0.01, "step": 99720 }, { "epoch": 656.1184210526316, "grad_norm": 0.9195104241371155, "learning_rate": 0.0001, "loss": 0.0096, "step": 99730 }, { "epoch": 656.1842105263158, "grad_norm": 1.1696057319641113, "learning_rate": 0.0001, "loss": 0.0094, "step": 99740 }, { "epoch": 656.25, "grad_norm": 1.0604538917541504, "learning_rate": 0.0001, "loss": 0.0108, "step": 99750 }, { "epoch": 656.3157894736842, "grad_norm": 1.1938915252685547, "learning_rate": 0.0001, "loss": 0.0098, "step": 99760 }, { "epoch": 656.3815789473684, "grad_norm": 1.0641862154006958, "learning_rate": 0.0001, "loss": 0.01, "step": 99770 }, { "epoch": 656.4473684210526, "grad_norm": 0.7114510536193848, "learning_rate": 0.0001, "loss": 0.0111, "step": 99780 }, { "epoch": 656.5131578947369, "grad_norm": 1.3345415592193604, "learning_rate": 0.0001, "loss": 0.0123, "step": 99790 }, { "epoch": 656.578947368421, "grad_norm": 1.2074189186096191, "learning_rate": 0.0001, "loss": 0.0118, "step": 99800 }, { "epoch": 656.6447368421053, "grad_norm": 1.2852963209152222, "learning_rate": 0.0001, "loss": 0.0102, "step": 99810 }, { "epoch": 656.7105263157895, "grad_norm": 0.9789184331893921, "learning_rate": 0.0001, "loss": 0.0097, "step": 99820 }, { "epoch": 656.7763157894736, "grad_norm": 1.1512428522109985, "learning_rate": 0.0001, "loss": 0.0113, "step": 99830 }, { "epoch": 656.8421052631579, "grad_norm": 0.9603427052497864, "learning_rate": 0.0001, "loss": 0.01, "step": 99840 }, { "epoch": 656.9078947368421, "grad_norm": 0.7649758458137512, "learning_rate": 0.0001, "loss": 0.013, "step": 99850 }, { "epoch": 656.9736842105264, "grad_norm": 1.2759963274002075, "learning_rate": 0.0001, "loss": 0.0105, "step": 99860 }, { "epoch": 657.0394736842105, "grad_norm": 0.8978010416030884, "learning_rate": 0.0001, "loss": 0.0092, "step": 99870 }, { "epoch": 657.1052631578947, "grad_norm": 1.0761810541152954, "learning_rate": 0.0001, "loss": 0.0096, "step": 99880 }, { "epoch": 657.171052631579, "grad_norm": 1.0069093704223633, "learning_rate": 0.0001, "loss": 0.0095, "step": 99890 }, { "epoch": 657.2368421052631, "grad_norm": 1.188727855682373, "learning_rate": 0.0001, "loss": 0.012, "step": 99900 }, { "epoch": 657.3026315789474, "grad_norm": 1.0814998149871826, "learning_rate": 0.0001, "loss": 0.0096, "step": 99910 }, { "epoch": 657.3684210526316, "grad_norm": 1.085719347000122, "learning_rate": 0.0001, "loss": 0.0106, "step": 99920 }, { "epoch": 657.4342105263158, "grad_norm": 1.0952857732772827, "learning_rate": 0.0001, "loss": 0.011, "step": 99930 }, { "epoch": 657.5, "grad_norm": 0.7331225275993347, "learning_rate": 0.0001, "loss": 0.0134, "step": 99940 }, { "epoch": 657.5657894736842, "grad_norm": 1.202723503112793, "learning_rate": 0.0001, "loss": 0.0123, "step": 99950 }, { "epoch": 657.6315789473684, "grad_norm": 1.0389039516448975, "learning_rate": 0.0001, "loss": 0.0102, "step": 99960 }, { "epoch": 657.6973684210526, "grad_norm": 0.9595016241073608, "learning_rate": 0.0001, "loss": 0.0099, "step": 99970 }, { "epoch": 657.7631578947369, "grad_norm": 0.8788406848907471, "learning_rate": 0.0001, "loss": 0.0117, "step": 99980 }, { "epoch": 657.828947368421, "grad_norm": 1.0672800540924072, "learning_rate": 0.0001, "loss": 0.0108, "step": 99990 }, { "epoch": 657.8947368421053, "grad_norm": 1.2291871309280396, "learning_rate": 0.0001, "loss": 0.0096, "step": 100000 }, { "epoch": 657.9605263157895, "grad_norm": 1.470028042793274, "learning_rate": 0.0001, "loss": 0.0111, "step": 100010 }, { "epoch": 658.0263157894736, "grad_norm": 1.5565192699432373, "learning_rate": 0.0001, "loss": 0.0113, "step": 100020 }, { "epoch": 658.0921052631579, "grad_norm": 1.5067261457443237, "learning_rate": 0.0001, "loss": 0.0089, "step": 100030 }, { "epoch": 658.1578947368421, "grad_norm": 1.442273497581482, "learning_rate": 0.0001, "loss": 0.0103, "step": 100040 }, { "epoch": 658.2236842105264, "grad_norm": 1.4687352180480957, "learning_rate": 0.0001, "loss": 0.0125, "step": 100050 }, { "epoch": 658.2894736842105, "grad_norm": 1.2669801712036133, "learning_rate": 0.0001, "loss": 0.01, "step": 100060 }, { "epoch": 658.3552631578947, "grad_norm": 0.9612995386123657, "learning_rate": 0.0001, "loss": 0.0096, "step": 100070 }, { "epoch": 658.421052631579, "grad_norm": 1.0884873867034912, "learning_rate": 0.0001, "loss": 0.0092, "step": 100080 }, { "epoch": 658.4868421052631, "grad_norm": 1.34589421749115, "learning_rate": 0.0001, "loss": 0.0106, "step": 100090 }, { "epoch": 658.5526315789474, "grad_norm": 1.1699200868606567, "learning_rate": 0.0001, "loss": 0.0113, "step": 100100 }, { "epoch": 658.6184210526316, "grad_norm": 0.9280645847320557, "learning_rate": 0.0001, "loss": 0.0123, "step": 100110 }, { "epoch": 658.6842105263158, "grad_norm": 1.307676911354065, "learning_rate": 0.0001, "loss": 0.0113, "step": 100120 }, { "epoch": 658.75, "grad_norm": 1.0661935806274414, "learning_rate": 0.0001, "loss": 0.0122, "step": 100130 }, { "epoch": 658.8157894736842, "grad_norm": 0.9746012687683105, "learning_rate": 0.0001, "loss": 0.0104, "step": 100140 }, { "epoch": 658.8815789473684, "grad_norm": 0.9165105223655701, "learning_rate": 0.0001, "loss": 0.0108, "step": 100150 }, { "epoch": 658.9473684210526, "grad_norm": 1.2205272912979126, "learning_rate": 0.0001, "loss": 0.0113, "step": 100160 }, { "epoch": 659.0131578947369, "grad_norm": 0.8076446652412415, "learning_rate": 0.0001, "loss": 0.0104, "step": 100170 }, { "epoch": 659.078947368421, "grad_norm": 0.9755529165267944, "learning_rate": 0.0001, "loss": 0.0105, "step": 100180 }, { "epoch": 659.1447368421053, "grad_norm": 1.021377682685852, "learning_rate": 0.0001, "loss": 0.0111, "step": 100190 }, { "epoch": 659.2105263157895, "grad_norm": 1.2089534997940063, "learning_rate": 0.0001, "loss": 0.0113, "step": 100200 }, { "epoch": 659.2763157894736, "grad_norm": 1.0684491395950317, "learning_rate": 0.0001, "loss": 0.0089, "step": 100210 }, { "epoch": 659.3421052631579, "grad_norm": 1.0578230619430542, "learning_rate": 0.0001, "loss": 0.0113, "step": 100220 }, { "epoch": 659.4078947368421, "grad_norm": 1.1062239408493042, "learning_rate": 0.0001, "loss": 0.0102, "step": 100230 }, { "epoch": 659.4736842105264, "grad_norm": 1.1410943269729614, "learning_rate": 0.0001, "loss": 0.011, "step": 100240 }, { "epoch": 659.5394736842105, "grad_norm": 1.0092296600341797, "learning_rate": 0.0001, "loss": 0.0105, "step": 100250 }, { "epoch": 659.6052631578947, "grad_norm": 1.231850266456604, "learning_rate": 0.0001, "loss": 0.011, "step": 100260 }, { "epoch": 659.671052631579, "grad_norm": 1.1187002658843994, "learning_rate": 0.0001, "loss": 0.0104, "step": 100270 }, { "epoch": 659.7368421052631, "grad_norm": 1.5265558958053589, "learning_rate": 0.0001, "loss": 0.0112, "step": 100280 }, { "epoch": 659.8026315789474, "grad_norm": 1.3116233348846436, "learning_rate": 0.0001, "loss": 0.0116, "step": 100290 }, { "epoch": 659.8684210526316, "grad_norm": 1.1131277084350586, "learning_rate": 0.0001, "loss": 0.0103, "step": 100300 }, { "epoch": 659.9342105263158, "grad_norm": 0.9748517274856567, "learning_rate": 0.0001, "loss": 0.0099, "step": 100310 }, { "epoch": 660.0, "grad_norm": 0.9195544719696045, "learning_rate": 0.0001, "loss": 0.0118, "step": 100320 }, { "epoch": 660.0657894736842, "grad_norm": 1.3856765031814575, "learning_rate": 0.0001, "loss": 0.0094, "step": 100330 }, { "epoch": 660.1315789473684, "grad_norm": 1.0203367471694946, "learning_rate": 0.0001, "loss": 0.0091, "step": 100340 }, { "epoch": 660.1973684210526, "grad_norm": 1.344549536705017, "learning_rate": 0.0001, "loss": 0.0101, "step": 100350 }, { "epoch": 660.2631578947369, "grad_norm": 1.2398855686187744, "learning_rate": 0.0001, "loss": 0.0101, "step": 100360 }, { "epoch": 660.328947368421, "grad_norm": 1.2883145809173584, "learning_rate": 0.0001, "loss": 0.0106, "step": 100370 }, { "epoch": 660.3947368421053, "grad_norm": 1.2523137331008911, "learning_rate": 0.0001, "loss": 0.0098, "step": 100380 }, { "epoch": 660.4605263157895, "grad_norm": 0.7788773775100708, "learning_rate": 0.0001, "loss": 0.0115, "step": 100390 }, { "epoch": 660.5263157894736, "grad_norm": 1.165600061416626, "learning_rate": 0.0001, "loss": 0.0105, "step": 100400 }, { "epoch": 660.5921052631579, "grad_norm": 0.8959761261940002, "learning_rate": 0.0001, "loss": 0.0113, "step": 100410 }, { "epoch": 660.6578947368421, "grad_norm": 0.9459083676338196, "learning_rate": 0.0001, "loss": 0.0103, "step": 100420 }, { "epoch": 660.7236842105264, "grad_norm": 1.0725597143173218, "learning_rate": 0.0001, "loss": 0.0109, "step": 100430 }, { "epoch": 660.7894736842105, "grad_norm": 1.0163500308990479, "learning_rate": 0.0001, "loss": 0.0108, "step": 100440 }, { "epoch": 660.8552631578947, "grad_norm": 1.0939759016036987, "learning_rate": 0.0001, "loss": 0.0104, "step": 100450 }, { "epoch": 660.921052631579, "grad_norm": 1.2891944646835327, "learning_rate": 0.0001, "loss": 0.0118, "step": 100460 }, { "epoch": 660.9868421052631, "grad_norm": 0.9678558111190796, "learning_rate": 0.0001, "loss": 0.0115, "step": 100470 }, { "epoch": 661.0526315789474, "grad_norm": 0.6295179724693298, "learning_rate": 0.0001, "loss": 0.0089, "step": 100480 }, { "epoch": 661.1184210526316, "grad_norm": 1.092713475227356, "learning_rate": 0.0001, "loss": 0.0141, "step": 100490 }, { "epoch": 661.1842105263158, "grad_norm": 0.7096255421638489, "learning_rate": 0.0001, "loss": 0.0103, "step": 100500 }, { "epoch": 661.25, "grad_norm": 1.1376711130142212, "learning_rate": 0.0001, "loss": 0.01, "step": 100510 }, { "epoch": 661.3157894736842, "grad_norm": 1.0104148387908936, "learning_rate": 0.0001, "loss": 0.0093, "step": 100520 }, { "epoch": 661.3815789473684, "grad_norm": 0.9783036112785339, "learning_rate": 0.0001, "loss": 0.0106, "step": 100530 }, { "epoch": 661.4473684210526, "grad_norm": 1.1273771524429321, "learning_rate": 0.0001, "loss": 0.0106, "step": 100540 }, { "epoch": 661.5131578947369, "grad_norm": 1.2987968921661377, "learning_rate": 0.0001, "loss": 0.0109, "step": 100550 }, { "epoch": 661.578947368421, "grad_norm": 1.0228668451309204, "learning_rate": 0.0001, "loss": 0.0106, "step": 100560 }, { "epoch": 661.6447368421053, "grad_norm": 0.901421844959259, "learning_rate": 0.0001, "loss": 0.01, "step": 100570 }, { "epoch": 661.7105263157895, "grad_norm": 1.1462628841400146, "learning_rate": 0.0001, "loss": 0.0129, "step": 100580 }, { "epoch": 661.7763157894736, "grad_norm": 0.7442108392715454, "learning_rate": 0.0001, "loss": 0.011, "step": 100590 }, { "epoch": 661.8421052631579, "grad_norm": 1.2875144481658936, "learning_rate": 0.0001, "loss": 0.0119, "step": 100600 }, { "epoch": 661.9078947368421, "grad_norm": 1.1649748086929321, "learning_rate": 0.0001, "loss": 0.0098, "step": 100610 }, { "epoch": 661.9736842105264, "grad_norm": 1.0540515184402466, "learning_rate": 0.0001, "loss": 0.0099, "step": 100620 }, { "epoch": 662.0394736842105, "grad_norm": 1.0686025619506836, "learning_rate": 0.0001, "loss": 0.0094, "step": 100630 }, { "epoch": 662.1052631578947, "grad_norm": 0.9818456768989563, "learning_rate": 0.0001, "loss": 0.0106, "step": 100640 }, { "epoch": 662.171052631579, "grad_norm": 0.9439074397087097, "learning_rate": 0.0001, "loss": 0.0104, "step": 100650 }, { "epoch": 662.2368421052631, "grad_norm": 0.9941381216049194, "learning_rate": 0.0001, "loss": 0.0097, "step": 100660 }, { "epoch": 662.3026315789474, "grad_norm": 1.1093741655349731, "learning_rate": 0.0001, "loss": 0.0117, "step": 100670 }, { "epoch": 662.3684210526316, "grad_norm": 1.1444611549377441, "learning_rate": 0.0001, "loss": 0.0089, "step": 100680 }, { "epoch": 662.4342105263158, "grad_norm": 1.0933109521865845, "learning_rate": 0.0001, "loss": 0.0103, "step": 100690 }, { "epoch": 662.5, "grad_norm": 1.2394286394119263, "learning_rate": 0.0001, "loss": 0.0122, "step": 100700 }, { "epoch": 662.5657894736842, "grad_norm": 1.3169022798538208, "learning_rate": 0.0001, "loss": 0.0099, "step": 100710 }, { "epoch": 662.6315789473684, "grad_norm": 0.8559953570365906, "learning_rate": 0.0001, "loss": 0.0122, "step": 100720 }, { "epoch": 662.6973684210526, "grad_norm": 1.2426526546478271, "learning_rate": 0.0001, "loss": 0.0109, "step": 100730 }, { "epoch": 662.7631578947369, "grad_norm": 1.1118953227996826, "learning_rate": 0.0001, "loss": 0.0097, "step": 100740 }, { "epoch": 662.828947368421, "grad_norm": 0.955522894859314, "learning_rate": 0.0001, "loss": 0.0124, "step": 100750 }, { "epoch": 662.8947368421053, "grad_norm": 1.0989824533462524, "learning_rate": 0.0001, "loss": 0.0126, "step": 100760 }, { "epoch": 662.9605263157895, "grad_norm": 1.0677485466003418, "learning_rate": 0.0001, "loss": 0.012, "step": 100770 }, { "epoch": 663.0263157894736, "grad_norm": 0.7644389271736145, "learning_rate": 0.0001, "loss": 0.0099, "step": 100780 }, { "epoch": 663.0921052631579, "grad_norm": 1.139380931854248, "learning_rate": 0.0001, "loss": 0.0106, "step": 100790 }, { "epoch": 663.1578947368421, "grad_norm": 0.9676651954650879, "learning_rate": 0.0001, "loss": 0.0119, "step": 100800 }, { "epoch": 663.2236842105264, "grad_norm": 1.2177777290344238, "learning_rate": 0.0001, "loss": 0.0104, "step": 100810 }, { "epoch": 663.2894736842105, "grad_norm": 1.339358925819397, "learning_rate": 0.0001, "loss": 0.0107, "step": 100820 }, { "epoch": 663.3552631578947, "grad_norm": 1.3607467412948608, "learning_rate": 0.0001, "loss": 0.0083, "step": 100830 }, { "epoch": 663.421052631579, "grad_norm": 1.0772532224655151, "learning_rate": 0.0001, "loss": 0.0135, "step": 100840 }, { "epoch": 663.4868421052631, "grad_norm": 1.1806657314300537, "learning_rate": 0.0001, "loss": 0.0112, "step": 100850 }, { "epoch": 663.5526315789474, "grad_norm": 0.8904920220375061, "learning_rate": 0.0001, "loss": 0.0107, "step": 100860 }, { "epoch": 663.6184210526316, "grad_norm": 1.1465924978256226, "learning_rate": 0.0001, "loss": 0.01, "step": 100870 }, { "epoch": 663.6842105263158, "grad_norm": 1.0607256889343262, "learning_rate": 0.0001, "loss": 0.0117, "step": 100880 }, { "epoch": 663.75, "grad_norm": 1.294001579284668, "learning_rate": 0.0001, "loss": 0.0102, "step": 100890 }, { "epoch": 663.8157894736842, "grad_norm": 1.168131947517395, "learning_rate": 0.0001, "loss": 0.0111, "step": 100900 }, { "epoch": 663.8815789473684, "grad_norm": 0.9382193684577942, "learning_rate": 0.0001, "loss": 0.0089, "step": 100910 }, { "epoch": 663.9473684210526, "grad_norm": 1.172577142715454, "learning_rate": 0.0001, "loss": 0.0098, "step": 100920 }, { "epoch": 664.0131578947369, "grad_norm": 1.1000980138778687, "learning_rate": 0.0001, "loss": 0.0111, "step": 100930 }, { "epoch": 664.078947368421, "grad_norm": 1.3834381103515625, "learning_rate": 0.0001, "loss": 0.0099, "step": 100940 }, { "epoch": 664.1447368421053, "grad_norm": 1.354061484336853, "learning_rate": 0.0001, "loss": 0.0117, "step": 100950 }, { "epoch": 664.2105263157895, "grad_norm": 1.3133666515350342, "learning_rate": 0.0001, "loss": 0.0104, "step": 100960 }, { "epoch": 664.2763157894736, "grad_norm": 0.8378098011016846, "learning_rate": 0.0001, "loss": 0.0105, "step": 100970 }, { "epoch": 664.3421052631579, "grad_norm": 0.8867501616477966, "learning_rate": 0.0001, "loss": 0.0111, "step": 100980 }, { "epoch": 664.4078947368421, "grad_norm": 1.0622562170028687, "learning_rate": 0.0001, "loss": 0.0111, "step": 100990 }, { "epoch": 664.4736842105264, "grad_norm": 0.6690018773078918, "learning_rate": 0.0001, "loss": 0.0115, "step": 101000 }, { "epoch": 664.5394736842105, "grad_norm": 0.7047531008720398, "learning_rate": 0.0001, "loss": 0.0112, "step": 101010 }, { "epoch": 664.6052631578947, "grad_norm": 1.0882129669189453, "learning_rate": 0.0001, "loss": 0.0103, "step": 101020 }, { "epoch": 664.671052631579, "grad_norm": 0.8649616241455078, "learning_rate": 0.0001, "loss": 0.0101, "step": 101030 }, { "epoch": 664.7368421052631, "grad_norm": 0.8693536520004272, "learning_rate": 0.0001, "loss": 0.0103, "step": 101040 }, { "epoch": 664.8026315789474, "grad_norm": 1.3576804399490356, "learning_rate": 0.0001, "loss": 0.011, "step": 101050 }, { "epoch": 664.8684210526316, "grad_norm": 1.0976057052612305, "learning_rate": 0.0001, "loss": 0.0098, "step": 101060 }, { "epoch": 664.9342105263158, "grad_norm": 1.4118653535842896, "learning_rate": 0.0001, "loss": 0.0104, "step": 101070 }, { "epoch": 665.0, "grad_norm": 0.913599967956543, "learning_rate": 0.0001, "loss": 0.0107, "step": 101080 }, { "epoch": 665.0657894736842, "grad_norm": 1.0430513620376587, "learning_rate": 0.0001, "loss": 0.012, "step": 101090 }, { "epoch": 665.1315789473684, "grad_norm": 1.3427150249481201, "learning_rate": 0.0001, "loss": 0.0116, "step": 101100 }, { "epoch": 665.1973684210526, "grad_norm": 1.5117545127868652, "learning_rate": 0.0001, "loss": 0.0102, "step": 101110 }, { "epoch": 665.2631578947369, "grad_norm": 0.9170265197753906, "learning_rate": 0.0001, "loss": 0.0086, "step": 101120 }, { "epoch": 665.328947368421, "grad_norm": 1.115020513534546, "learning_rate": 0.0001, "loss": 0.0112, "step": 101130 }, { "epoch": 665.3947368421053, "grad_norm": 0.7325767874717712, "learning_rate": 0.0001, "loss": 0.0099, "step": 101140 }, { "epoch": 665.4605263157895, "grad_norm": 1.1029810905456543, "learning_rate": 0.0001, "loss": 0.0092, "step": 101150 }, { "epoch": 665.5263157894736, "grad_norm": 1.1868432760238647, "learning_rate": 0.0001, "loss": 0.01, "step": 101160 }, { "epoch": 665.5921052631579, "grad_norm": 0.9201452136039734, "learning_rate": 0.0001, "loss": 0.0105, "step": 101170 }, { "epoch": 665.6578947368421, "grad_norm": 1.1828328371047974, "learning_rate": 0.0001, "loss": 0.0129, "step": 101180 }, { "epoch": 665.7236842105264, "grad_norm": 1.1864783763885498, "learning_rate": 0.0001, "loss": 0.0107, "step": 101190 }, { "epoch": 665.7894736842105, "grad_norm": 1.2099254131317139, "learning_rate": 0.0001, "loss": 0.0126, "step": 101200 }, { "epoch": 665.8552631578947, "grad_norm": 1.1975241899490356, "learning_rate": 0.0001, "loss": 0.0097, "step": 101210 }, { "epoch": 665.921052631579, "grad_norm": 1.1442780494689941, "learning_rate": 0.0001, "loss": 0.01, "step": 101220 }, { "epoch": 665.9868421052631, "grad_norm": 1.2307586669921875, "learning_rate": 0.0001, "loss": 0.0102, "step": 101230 }, { "epoch": 666.0526315789474, "grad_norm": 0.816516101360321, "learning_rate": 0.0001, "loss": 0.01, "step": 101240 }, { "epoch": 666.1184210526316, "grad_norm": 1.2257626056671143, "learning_rate": 0.0001, "loss": 0.0105, "step": 101250 }, { "epoch": 666.1842105263158, "grad_norm": 1.1634641885757446, "learning_rate": 0.0001, "loss": 0.0107, "step": 101260 }, { "epoch": 666.25, "grad_norm": 1.2208856344223022, "learning_rate": 0.0001, "loss": 0.0098, "step": 101270 }, { "epoch": 666.3157894736842, "grad_norm": 0.8225875496864319, "learning_rate": 0.0001, "loss": 0.0109, "step": 101280 }, { "epoch": 666.3815789473684, "grad_norm": 1.0692633390426636, "learning_rate": 0.0001, "loss": 0.0116, "step": 101290 }, { "epoch": 666.4473684210526, "grad_norm": 1.2944279909133911, "learning_rate": 0.0001, "loss": 0.0105, "step": 101300 }, { "epoch": 666.5131578947369, "grad_norm": 1.2648913860321045, "learning_rate": 0.0001, "loss": 0.0108, "step": 101310 }, { "epoch": 666.578947368421, "grad_norm": 1.435753345489502, "learning_rate": 0.0001, "loss": 0.0108, "step": 101320 }, { "epoch": 666.6447368421053, "grad_norm": 1.2368513345718384, "learning_rate": 0.0001, "loss": 0.0097, "step": 101330 }, { "epoch": 666.7105263157895, "grad_norm": 1.1920435428619385, "learning_rate": 0.0001, "loss": 0.0094, "step": 101340 }, { "epoch": 666.7763157894736, "grad_norm": 1.2127162218093872, "learning_rate": 0.0001, "loss": 0.0108, "step": 101350 }, { "epoch": 666.8421052631579, "grad_norm": 1.2593845129013062, "learning_rate": 0.0001, "loss": 0.0106, "step": 101360 }, { "epoch": 666.9078947368421, "grad_norm": 1.0275031328201294, "learning_rate": 0.0001, "loss": 0.0091, "step": 101370 }, { "epoch": 666.9736842105264, "grad_norm": 1.224514126777649, "learning_rate": 0.0001, "loss": 0.0129, "step": 101380 }, { "epoch": 667.0394736842105, "grad_norm": 1.1807442903518677, "learning_rate": 0.0001, "loss": 0.0112, "step": 101390 }, { "epoch": 667.1052631578947, "grad_norm": 0.8523136377334595, "learning_rate": 0.0001, "loss": 0.0089, "step": 101400 }, { "epoch": 667.171052631579, "grad_norm": 1.0002000331878662, "learning_rate": 0.0001, "loss": 0.0107, "step": 101410 }, { "epoch": 667.2368421052631, "grad_norm": 1.0773143768310547, "learning_rate": 0.0001, "loss": 0.0123, "step": 101420 }, { "epoch": 667.3026315789474, "grad_norm": 1.2809265851974487, "learning_rate": 0.0001, "loss": 0.0118, "step": 101430 }, { "epoch": 667.3684210526316, "grad_norm": 1.272909164428711, "learning_rate": 0.0001, "loss": 0.0104, "step": 101440 }, { "epoch": 667.4342105263158, "grad_norm": 1.0183367729187012, "learning_rate": 0.0001, "loss": 0.0109, "step": 101450 }, { "epoch": 667.5, "grad_norm": 1.0988458395004272, "learning_rate": 0.0001, "loss": 0.0099, "step": 101460 }, { "epoch": 667.5657894736842, "grad_norm": 1.024822473526001, "learning_rate": 0.0001, "loss": 0.0119, "step": 101470 }, { "epoch": 667.6315789473684, "grad_norm": 1.2332916259765625, "learning_rate": 0.0001, "loss": 0.0111, "step": 101480 }, { "epoch": 667.6973684210526, "grad_norm": 1.0866150856018066, "learning_rate": 0.0001, "loss": 0.0113, "step": 101490 }, { "epoch": 667.7631578947369, "grad_norm": 1.0552276372909546, "learning_rate": 0.0001, "loss": 0.0138, "step": 101500 }, { "epoch": 667.828947368421, "grad_norm": 1.0964107513427734, "learning_rate": 0.0001, "loss": 0.0099, "step": 101510 }, { "epoch": 667.8947368421053, "grad_norm": 1.264575719833374, "learning_rate": 0.0001, "loss": 0.0121, "step": 101520 }, { "epoch": 667.9605263157895, "grad_norm": 1.4266283512115479, "learning_rate": 0.0001, "loss": 0.009, "step": 101530 }, { "epoch": 668.0263157894736, "grad_norm": 0.7991641759872437, "learning_rate": 0.0001, "loss": 0.0112, "step": 101540 }, { "epoch": 668.0921052631579, "grad_norm": 1.0575485229492188, "learning_rate": 0.0001, "loss": 0.0096, "step": 101550 }, { "epoch": 668.1578947368421, "grad_norm": 1.040031909942627, "learning_rate": 0.0001, "loss": 0.0119, "step": 101560 }, { "epoch": 668.2236842105264, "grad_norm": 0.8988039493560791, "learning_rate": 0.0001, "loss": 0.0101, "step": 101570 }, { "epoch": 668.2894736842105, "grad_norm": 1.1382830142974854, "learning_rate": 0.0001, "loss": 0.0115, "step": 101580 }, { "epoch": 668.3552631578947, "grad_norm": 1.2425233125686646, "learning_rate": 0.0001, "loss": 0.0111, "step": 101590 }, { "epoch": 668.421052631579, "grad_norm": 1.2461456060409546, "learning_rate": 0.0001, "loss": 0.0106, "step": 101600 }, { "epoch": 668.4868421052631, "grad_norm": 0.828624427318573, "learning_rate": 0.0001, "loss": 0.0097, "step": 101610 }, { "epoch": 668.5526315789474, "grad_norm": 0.6338958144187927, "learning_rate": 0.0001, "loss": 0.009, "step": 101620 }, { "epoch": 668.6184210526316, "grad_norm": 0.9959868788719177, "learning_rate": 0.0001, "loss": 0.0102, "step": 101630 }, { "epoch": 668.6842105263158, "grad_norm": 0.9457986354827881, "learning_rate": 0.0001, "loss": 0.0124, "step": 101640 }, { "epoch": 668.75, "grad_norm": 1.2366830110549927, "learning_rate": 0.0001, "loss": 0.0113, "step": 101650 }, { "epoch": 668.8157894736842, "grad_norm": 1.192163348197937, "learning_rate": 0.0001, "loss": 0.0099, "step": 101660 }, { "epoch": 668.8815789473684, "grad_norm": 0.9253160953521729, "learning_rate": 0.0001, "loss": 0.0107, "step": 101670 }, { "epoch": 668.9473684210526, "grad_norm": 0.8594035506248474, "learning_rate": 0.0001, "loss": 0.0113, "step": 101680 }, { "epoch": 669.0131578947369, "grad_norm": 1.1185978651046753, "learning_rate": 0.0001, "loss": 0.0127, "step": 101690 }, { "epoch": 669.078947368421, "grad_norm": 0.9524565935134888, "learning_rate": 0.0001, "loss": 0.0124, "step": 101700 }, { "epoch": 669.1447368421053, "grad_norm": 1.0104931592941284, "learning_rate": 0.0001, "loss": 0.0134, "step": 101710 }, { "epoch": 669.2105263157895, "grad_norm": 1.1492105722427368, "learning_rate": 0.0001, "loss": 0.0115, "step": 101720 }, { "epoch": 669.2763157894736, "grad_norm": 1.1701027154922485, "learning_rate": 0.0001, "loss": 0.0106, "step": 101730 }, { "epoch": 669.3421052631579, "grad_norm": 1.386569619178772, "learning_rate": 0.0001, "loss": 0.0122, "step": 101740 }, { "epoch": 669.4078947368421, "grad_norm": 1.1431201696395874, "learning_rate": 0.0001, "loss": 0.0107, "step": 101750 }, { "epoch": 669.4736842105264, "grad_norm": 1.031060814857483, "learning_rate": 0.0001, "loss": 0.0113, "step": 101760 }, { "epoch": 669.5394736842105, "grad_norm": 0.8447512984275818, "learning_rate": 0.0001, "loss": 0.0115, "step": 101770 }, { "epoch": 669.6052631578947, "grad_norm": 1.1162457466125488, "learning_rate": 0.0001, "loss": 0.0111, "step": 101780 }, { "epoch": 669.671052631579, "grad_norm": 1.321597933769226, "learning_rate": 0.0001, "loss": 0.0114, "step": 101790 }, { "epoch": 669.7368421052631, "grad_norm": 1.5164763927459717, "learning_rate": 0.0001, "loss": 0.0114, "step": 101800 }, { "epoch": 669.8026315789474, "grad_norm": 1.3314334154129028, "learning_rate": 0.0001, "loss": 0.0126, "step": 101810 }, { "epoch": 669.8684210526316, "grad_norm": 1.0497831106185913, "learning_rate": 0.0001, "loss": 0.0138, "step": 101820 }, { "epoch": 669.9342105263158, "grad_norm": 1.1639678478240967, "learning_rate": 0.0001, "loss": 0.0115, "step": 101830 }, { "epoch": 670.0, "grad_norm": 1.2581437826156616, "learning_rate": 0.0001, "loss": 0.0118, "step": 101840 }, { "epoch": 670.0657894736842, "grad_norm": 1.3124868869781494, "learning_rate": 0.0001, "loss": 0.0099, "step": 101850 }, { "epoch": 670.1315789473684, "grad_norm": 1.232029676437378, "learning_rate": 0.0001, "loss": 0.011, "step": 101860 }, { "epoch": 670.1973684210526, "grad_norm": 1.5936830043792725, "learning_rate": 0.0001, "loss": 0.0133, "step": 101870 }, { "epoch": 670.2631578947369, "grad_norm": 1.1136298179626465, "learning_rate": 0.0001, "loss": 0.0122, "step": 101880 }, { "epoch": 670.328947368421, "grad_norm": 1.2041126489639282, "learning_rate": 0.0001, "loss": 0.0127, "step": 101890 }, { "epoch": 670.3947368421053, "grad_norm": 1.0076220035552979, "learning_rate": 0.0001, "loss": 0.0122, "step": 101900 }, { "epoch": 670.4605263157895, "grad_norm": 1.001729965209961, "learning_rate": 0.0001, "loss": 0.0128, "step": 101910 }, { "epoch": 670.5263157894736, "grad_norm": 1.1370996236801147, "learning_rate": 0.0001, "loss": 0.0113, "step": 101920 }, { "epoch": 670.5921052631579, "grad_norm": 1.3312040567398071, "learning_rate": 0.0001, "loss": 0.0103, "step": 101930 }, { "epoch": 670.6578947368421, "grad_norm": 1.0733331441879272, "learning_rate": 0.0001, "loss": 0.0113, "step": 101940 }, { "epoch": 670.7236842105264, "grad_norm": 1.2168687582015991, "learning_rate": 0.0001, "loss": 0.012, "step": 101950 }, { "epoch": 670.7894736842105, "grad_norm": 1.0879305601119995, "learning_rate": 0.0001, "loss": 0.0109, "step": 101960 }, { "epoch": 670.8552631578947, "grad_norm": 1.084376573562622, "learning_rate": 0.0001, "loss": 0.0109, "step": 101970 }, { "epoch": 670.921052631579, "grad_norm": 1.1504342555999756, "learning_rate": 0.0001, "loss": 0.0109, "step": 101980 }, { "epoch": 670.9868421052631, "grad_norm": 1.2068742513656616, "learning_rate": 0.0001, "loss": 0.0123, "step": 101990 }, { "epoch": 671.0526315789474, "grad_norm": 1.264833688735962, "learning_rate": 0.0001, "loss": 0.0135, "step": 102000 }, { "epoch": 671.1184210526316, "grad_norm": 0.7933449149131775, "learning_rate": 0.0001, "loss": 0.011, "step": 102010 }, { "epoch": 671.1842105263158, "grad_norm": 0.9847843647003174, "learning_rate": 0.0001, "loss": 0.0105, "step": 102020 }, { "epoch": 671.25, "grad_norm": 1.2704505920410156, "learning_rate": 0.0001, "loss": 0.01, "step": 102030 }, { "epoch": 671.3157894736842, "grad_norm": 1.4738961458206177, "learning_rate": 0.0001, "loss": 0.0104, "step": 102040 }, { "epoch": 671.3815789473684, "grad_norm": 1.338627576828003, "learning_rate": 0.0001, "loss": 0.0109, "step": 102050 }, { "epoch": 671.4473684210526, "grad_norm": 1.2926578521728516, "learning_rate": 0.0001, "loss": 0.0105, "step": 102060 }, { "epoch": 671.5131578947369, "grad_norm": 1.442939043045044, "learning_rate": 0.0001, "loss": 0.0105, "step": 102070 }, { "epoch": 671.578947368421, "grad_norm": 1.1999456882476807, "learning_rate": 0.0001, "loss": 0.01, "step": 102080 }, { "epoch": 671.6447368421053, "grad_norm": 1.2807422876358032, "learning_rate": 0.0001, "loss": 0.0103, "step": 102090 }, { "epoch": 671.7105263157895, "grad_norm": 1.0529870986938477, "learning_rate": 0.0001, "loss": 0.0129, "step": 102100 }, { "epoch": 671.7763157894736, "grad_norm": 1.044051170349121, "learning_rate": 0.0001, "loss": 0.0109, "step": 102110 }, { "epoch": 671.8421052631579, "grad_norm": 0.934209406375885, "learning_rate": 0.0001, "loss": 0.0104, "step": 102120 }, { "epoch": 671.9078947368421, "grad_norm": 1.350449562072754, "learning_rate": 0.0001, "loss": 0.0093, "step": 102130 }, { "epoch": 671.9736842105264, "grad_norm": 1.2345993518829346, "learning_rate": 0.0001, "loss": 0.01, "step": 102140 }, { "epoch": 672.0394736842105, "grad_norm": 1.41543447971344, "learning_rate": 0.0001, "loss": 0.011, "step": 102150 }, { "epoch": 672.1052631578947, "grad_norm": 0.7719045877456665, "learning_rate": 0.0001, "loss": 0.0119, "step": 102160 }, { "epoch": 672.171052631579, "grad_norm": 1.0525262355804443, "learning_rate": 0.0001, "loss": 0.013, "step": 102170 }, { "epoch": 672.2368421052631, "grad_norm": 1.1869667768478394, "learning_rate": 0.0001, "loss": 0.0107, "step": 102180 }, { "epoch": 672.3026315789474, "grad_norm": 1.0475441217422485, "learning_rate": 0.0001, "loss": 0.01, "step": 102190 }, { "epoch": 672.3684210526316, "grad_norm": 1.173707127571106, "learning_rate": 0.0001, "loss": 0.011, "step": 102200 }, { "epoch": 672.4342105263158, "grad_norm": 1.1738572120666504, "learning_rate": 0.0001, "loss": 0.0118, "step": 102210 }, { "epoch": 672.5, "grad_norm": 1.0514370203018188, "learning_rate": 0.0001, "loss": 0.0096, "step": 102220 }, { "epoch": 672.5657894736842, "grad_norm": 1.058618426322937, "learning_rate": 0.0001, "loss": 0.0108, "step": 102230 }, { "epoch": 672.6315789473684, "grad_norm": 1.6183222532272339, "learning_rate": 0.0001, "loss": 0.0122, "step": 102240 }, { "epoch": 672.6973684210526, "grad_norm": 1.2186968326568604, "learning_rate": 0.0001, "loss": 0.0101, "step": 102250 }, { "epoch": 672.7631578947369, "grad_norm": 0.9119374752044678, "learning_rate": 0.0001, "loss": 0.0096, "step": 102260 }, { "epoch": 672.828947368421, "grad_norm": 1.2468377351760864, "learning_rate": 0.0001, "loss": 0.0105, "step": 102270 }, { "epoch": 672.8947368421053, "grad_norm": 0.8254573941230774, "learning_rate": 0.0001, "loss": 0.0091, "step": 102280 }, { "epoch": 672.9605263157895, "grad_norm": 0.8752316832542419, "learning_rate": 0.0001, "loss": 0.0097, "step": 102290 }, { "epoch": 673.0263157894736, "grad_norm": 0.8799929618835449, "learning_rate": 0.0001, "loss": 0.0108, "step": 102300 }, { "epoch": 673.0921052631579, "grad_norm": 1.0996203422546387, "learning_rate": 0.0001, "loss": 0.0096, "step": 102310 }, { "epoch": 673.1578947368421, "grad_norm": 1.4787687063217163, "learning_rate": 0.0001, "loss": 0.0106, "step": 102320 }, { "epoch": 673.2236842105264, "grad_norm": 0.9628654718399048, "learning_rate": 0.0001, "loss": 0.0113, "step": 102330 }, { "epoch": 673.2894736842105, "grad_norm": 1.1669740676879883, "learning_rate": 0.0001, "loss": 0.0102, "step": 102340 }, { "epoch": 673.3552631578947, "grad_norm": 0.9935758709907532, "learning_rate": 0.0001, "loss": 0.0099, "step": 102350 }, { "epoch": 673.421052631579, "grad_norm": 1.2170686721801758, "learning_rate": 0.0001, "loss": 0.011, "step": 102360 }, { "epoch": 673.4868421052631, "grad_norm": 1.3537840843200684, "learning_rate": 0.0001, "loss": 0.0091, "step": 102370 }, { "epoch": 673.5526315789474, "grad_norm": 1.2032088041305542, "learning_rate": 0.0001, "loss": 0.0107, "step": 102380 }, { "epoch": 673.6184210526316, "grad_norm": 1.0807600021362305, "learning_rate": 0.0001, "loss": 0.0104, "step": 102390 }, { "epoch": 673.6842105263158, "grad_norm": 1.3539401292800903, "learning_rate": 0.0001, "loss": 0.0108, "step": 102400 }, { "epoch": 673.75, "grad_norm": 0.9600739479064941, "learning_rate": 0.0001, "loss": 0.0119, "step": 102410 }, { "epoch": 673.8157894736842, "grad_norm": 0.8437861204147339, "learning_rate": 0.0001, "loss": 0.0123, "step": 102420 }, { "epoch": 673.8815789473684, "grad_norm": 1.0616252422332764, "learning_rate": 0.0001, "loss": 0.0114, "step": 102430 }, { "epoch": 673.9473684210526, "grad_norm": 1.1971665620803833, "learning_rate": 0.0001, "loss": 0.009, "step": 102440 }, { "epoch": 674.0131578947369, "grad_norm": 1.0292773246765137, "learning_rate": 0.0001, "loss": 0.0094, "step": 102450 }, { "epoch": 674.078947368421, "grad_norm": 1.0635182857513428, "learning_rate": 0.0001, "loss": 0.0115, "step": 102460 }, { "epoch": 674.1447368421053, "grad_norm": 1.1411423683166504, "learning_rate": 0.0001, "loss": 0.0109, "step": 102470 }, { "epoch": 674.2105263157895, "grad_norm": 1.0148522853851318, "learning_rate": 0.0001, "loss": 0.0098, "step": 102480 }, { "epoch": 674.2763157894736, "grad_norm": 1.2934777736663818, "learning_rate": 0.0001, "loss": 0.0099, "step": 102490 }, { "epoch": 674.3421052631579, "grad_norm": 1.151720404624939, "learning_rate": 0.0001, "loss": 0.009, "step": 102500 }, { "epoch": 674.4078947368421, "grad_norm": 0.8841307759284973, "learning_rate": 0.0001, "loss": 0.0102, "step": 102510 }, { "epoch": 674.4736842105264, "grad_norm": 0.8646332621574402, "learning_rate": 0.0001, "loss": 0.0107, "step": 102520 }, { "epoch": 674.5394736842105, "grad_norm": 0.8606105446815491, "learning_rate": 0.0001, "loss": 0.0097, "step": 102530 }, { "epoch": 674.6052631578947, "grad_norm": 0.7609214186668396, "learning_rate": 0.0001, "loss": 0.0105, "step": 102540 }, { "epoch": 674.671052631579, "grad_norm": 1.0096818208694458, "learning_rate": 0.0001, "loss": 0.0109, "step": 102550 }, { "epoch": 674.7368421052631, "grad_norm": 1.0138216018676758, "learning_rate": 0.0001, "loss": 0.0107, "step": 102560 }, { "epoch": 674.8026315789474, "grad_norm": 1.2651227712631226, "learning_rate": 0.0001, "loss": 0.0112, "step": 102570 }, { "epoch": 674.8684210526316, "grad_norm": 1.3024859428405762, "learning_rate": 0.0001, "loss": 0.0101, "step": 102580 }, { "epoch": 674.9342105263158, "grad_norm": 0.9451261758804321, "learning_rate": 0.0001, "loss": 0.0136, "step": 102590 }, { "epoch": 675.0, "grad_norm": 1.1993613243103027, "learning_rate": 0.0001, "loss": 0.0095, "step": 102600 }, { "epoch": 675.0657894736842, "grad_norm": 1.2021538019180298, "learning_rate": 0.0001, "loss": 0.0099, "step": 102610 }, { "epoch": 675.1315789473684, "grad_norm": 0.7005917429924011, "learning_rate": 0.0001, "loss": 0.0097, "step": 102620 }, { "epoch": 675.1973684210526, "grad_norm": 0.9240332245826721, "learning_rate": 0.0001, "loss": 0.0093, "step": 102630 }, { "epoch": 675.2631578947369, "grad_norm": 1.2300150394439697, "learning_rate": 0.0001, "loss": 0.0106, "step": 102640 }, { "epoch": 675.328947368421, "grad_norm": 1.2758888006210327, "learning_rate": 0.0001, "loss": 0.0106, "step": 102650 }, { "epoch": 675.3947368421053, "grad_norm": 1.0356707572937012, "learning_rate": 0.0001, "loss": 0.011, "step": 102660 }, { "epoch": 675.4605263157895, "grad_norm": 0.9405995011329651, "learning_rate": 0.0001, "loss": 0.0099, "step": 102670 }, { "epoch": 675.5263157894736, "grad_norm": 1.1242586374282837, "learning_rate": 0.0001, "loss": 0.0103, "step": 102680 }, { "epoch": 675.5921052631579, "grad_norm": 1.0802112817764282, "learning_rate": 0.0001, "loss": 0.0108, "step": 102690 }, { "epoch": 675.6578947368421, "grad_norm": 0.9207102656364441, "learning_rate": 0.0001, "loss": 0.0102, "step": 102700 }, { "epoch": 675.7236842105264, "grad_norm": 0.9859259128570557, "learning_rate": 0.0001, "loss": 0.0132, "step": 102710 }, { "epoch": 675.7894736842105, "grad_norm": 1.63200044631958, "learning_rate": 0.0001, "loss": 0.0099, "step": 102720 }, { "epoch": 675.8552631578947, "grad_norm": 1.0292948484420776, "learning_rate": 0.0001, "loss": 0.0093, "step": 102730 }, { "epoch": 675.921052631579, "grad_norm": 0.9589559435844421, "learning_rate": 0.0001, "loss": 0.0121, "step": 102740 }, { "epoch": 675.9868421052631, "grad_norm": 1.2861278057098389, "learning_rate": 0.0001, "loss": 0.01, "step": 102750 }, { "epoch": 676.0526315789474, "grad_norm": 1.23167884349823, "learning_rate": 0.0001, "loss": 0.0099, "step": 102760 }, { "epoch": 676.1184210526316, "grad_norm": 1.2074878215789795, "learning_rate": 0.0001, "loss": 0.0097, "step": 102770 }, { "epoch": 676.1842105263158, "grad_norm": 1.1100069284439087, "learning_rate": 0.0001, "loss": 0.0104, "step": 102780 }, { "epoch": 676.25, "grad_norm": 1.2543814182281494, "learning_rate": 0.0001, "loss": 0.0107, "step": 102790 }, { "epoch": 676.3157894736842, "grad_norm": 1.0590333938598633, "learning_rate": 0.0001, "loss": 0.0112, "step": 102800 }, { "epoch": 676.3815789473684, "grad_norm": 1.1635903120040894, "learning_rate": 0.0001, "loss": 0.0103, "step": 102810 }, { "epoch": 676.4473684210526, "grad_norm": 1.191482663154602, "learning_rate": 0.0001, "loss": 0.0113, "step": 102820 }, { "epoch": 676.5131578947369, "grad_norm": 1.2780073881149292, "learning_rate": 0.0001, "loss": 0.009, "step": 102830 }, { "epoch": 676.578947368421, "grad_norm": 1.0318385362625122, "learning_rate": 0.0001, "loss": 0.0115, "step": 102840 }, { "epoch": 676.6447368421053, "grad_norm": 1.411617636680603, "learning_rate": 0.0001, "loss": 0.0113, "step": 102850 }, { "epoch": 676.7105263157895, "grad_norm": 1.007462739944458, "learning_rate": 0.0001, "loss": 0.0105, "step": 102860 }, { "epoch": 676.7763157894736, "grad_norm": 1.653554916381836, "learning_rate": 0.0001, "loss": 0.0104, "step": 102870 }, { "epoch": 676.8421052631579, "grad_norm": 1.147131085395813, "learning_rate": 0.0001, "loss": 0.0104, "step": 102880 }, { "epoch": 676.9078947368421, "grad_norm": 0.8459810018539429, "learning_rate": 0.0001, "loss": 0.01, "step": 102890 }, { "epoch": 676.9736842105264, "grad_norm": 1.1392914056777954, "learning_rate": 0.0001, "loss": 0.0103, "step": 102900 }, { "epoch": 677.0394736842105, "grad_norm": 1.1632038354873657, "learning_rate": 0.0001, "loss": 0.0096, "step": 102910 }, { "epoch": 677.1052631578947, "grad_norm": 1.4160902500152588, "learning_rate": 0.0001, "loss": 0.009, "step": 102920 }, { "epoch": 677.171052631579, "grad_norm": 1.1566230058670044, "learning_rate": 0.0001, "loss": 0.0098, "step": 102930 }, { "epoch": 677.2368421052631, "grad_norm": 0.9785585403442383, "learning_rate": 0.0001, "loss": 0.0092, "step": 102940 }, { "epoch": 677.3026315789474, "grad_norm": 0.8845245242118835, "learning_rate": 0.0001, "loss": 0.0089, "step": 102950 }, { "epoch": 677.3684210526316, "grad_norm": 0.8199566006660461, "learning_rate": 0.0001, "loss": 0.0106, "step": 102960 }, { "epoch": 677.4342105263158, "grad_norm": 0.8677225708961487, "learning_rate": 0.0001, "loss": 0.0105, "step": 102970 }, { "epoch": 677.5, "grad_norm": 1.06745445728302, "learning_rate": 0.0001, "loss": 0.0107, "step": 102980 }, { "epoch": 677.5657894736842, "grad_norm": 1.011222004890442, "learning_rate": 0.0001, "loss": 0.0107, "step": 102990 }, { "epoch": 677.6315789473684, "grad_norm": 1.0868799686431885, "learning_rate": 0.0001, "loss": 0.01, "step": 103000 }, { "epoch": 677.6973684210526, "grad_norm": 0.8394410610198975, "learning_rate": 0.0001, "loss": 0.011, "step": 103010 }, { "epoch": 677.7631578947369, "grad_norm": 1.1427472829818726, "learning_rate": 0.0001, "loss": 0.0144, "step": 103020 }, { "epoch": 677.828947368421, "grad_norm": 1.2967380285263062, "learning_rate": 0.0001, "loss": 0.0107, "step": 103030 }, { "epoch": 677.8947368421053, "grad_norm": 1.249349594116211, "learning_rate": 0.0001, "loss": 0.0108, "step": 103040 }, { "epoch": 677.9605263157895, "grad_norm": 1.3048033714294434, "learning_rate": 0.0001, "loss": 0.0098, "step": 103050 }, { "epoch": 678.0263157894736, "grad_norm": 1.421112298965454, "learning_rate": 0.0001, "loss": 0.0098, "step": 103060 }, { "epoch": 678.0921052631579, "grad_norm": 1.029354214668274, "learning_rate": 0.0001, "loss": 0.0086, "step": 103070 }, { "epoch": 678.1578947368421, "grad_norm": 0.6975602507591248, "learning_rate": 0.0001, "loss": 0.0118, "step": 103080 }, { "epoch": 678.2236842105264, "grad_norm": 1.1515804529190063, "learning_rate": 0.0001, "loss": 0.0113, "step": 103090 }, { "epoch": 678.2894736842105, "grad_norm": 1.4265718460083008, "learning_rate": 0.0001, "loss": 0.0091, "step": 103100 }, { "epoch": 678.3552631578947, "grad_norm": 1.0537846088409424, "learning_rate": 0.0001, "loss": 0.0092, "step": 103110 }, { "epoch": 678.421052631579, "grad_norm": 1.3082852363586426, "learning_rate": 0.0001, "loss": 0.0116, "step": 103120 }, { "epoch": 678.4868421052631, "grad_norm": 1.1465588808059692, "learning_rate": 0.0001, "loss": 0.0114, "step": 103130 }, { "epoch": 678.5526315789474, "grad_norm": 1.254515290260315, "learning_rate": 0.0001, "loss": 0.0107, "step": 103140 }, { "epoch": 678.6184210526316, "grad_norm": 0.8402789831161499, "learning_rate": 0.0001, "loss": 0.01, "step": 103150 }, { "epoch": 678.6842105263158, "grad_norm": 1.0215173959732056, "learning_rate": 0.0001, "loss": 0.0092, "step": 103160 }, { "epoch": 678.75, "grad_norm": 1.0827964544296265, "learning_rate": 0.0001, "loss": 0.0107, "step": 103170 }, { "epoch": 678.8157894736842, "grad_norm": 1.0201925039291382, "learning_rate": 0.0001, "loss": 0.0097, "step": 103180 }, { "epoch": 678.8815789473684, "grad_norm": 0.9389650821685791, "learning_rate": 0.0001, "loss": 0.0119, "step": 103190 }, { "epoch": 678.9473684210526, "grad_norm": 0.8603837490081787, "learning_rate": 0.0001, "loss": 0.0089, "step": 103200 }, { "epoch": 679.0131578947369, "grad_norm": 1.0995327234268188, "learning_rate": 0.0001, "loss": 0.0109, "step": 103210 }, { "epoch": 679.078947368421, "grad_norm": 1.0585341453552246, "learning_rate": 0.0001, "loss": 0.0119, "step": 103220 }, { "epoch": 679.1447368421053, "grad_norm": 1.1244828701019287, "learning_rate": 0.0001, "loss": 0.0103, "step": 103230 }, { "epoch": 679.2105263157895, "grad_norm": 1.1929583549499512, "learning_rate": 0.0001, "loss": 0.0093, "step": 103240 }, { "epoch": 679.2763157894736, "grad_norm": 1.2523797750473022, "learning_rate": 0.0001, "loss": 0.0095, "step": 103250 }, { "epoch": 679.3421052631579, "grad_norm": 1.0820051431655884, "learning_rate": 0.0001, "loss": 0.0107, "step": 103260 }, { "epoch": 679.4078947368421, "grad_norm": 1.3185031414031982, "learning_rate": 0.0001, "loss": 0.0097, "step": 103270 }, { "epoch": 679.4736842105264, "grad_norm": 1.0388109683990479, "learning_rate": 0.0001, "loss": 0.0113, "step": 103280 }, { "epoch": 679.5394736842105, "grad_norm": 1.443453073501587, "learning_rate": 0.0001, "loss": 0.0115, "step": 103290 }, { "epoch": 679.6052631578947, "grad_norm": 0.7869113087654114, "learning_rate": 0.0001, "loss": 0.0091, "step": 103300 }, { "epoch": 679.671052631579, "grad_norm": 1.2342416048049927, "learning_rate": 0.0001, "loss": 0.0106, "step": 103310 }, { "epoch": 679.7368421052631, "grad_norm": 1.1199147701263428, "learning_rate": 0.0001, "loss": 0.011, "step": 103320 }, { "epoch": 679.8026315789474, "grad_norm": 0.9589998126029968, "learning_rate": 0.0001, "loss": 0.01, "step": 103330 }, { "epoch": 679.8684210526316, "grad_norm": 1.0581178665161133, "learning_rate": 0.0001, "loss": 0.01, "step": 103340 }, { "epoch": 679.9342105263158, "grad_norm": 0.9367907643318176, "learning_rate": 0.0001, "loss": 0.0106, "step": 103350 }, { "epoch": 680.0, "grad_norm": 0.9200935363769531, "learning_rate": 0.0001, "loss": 0.0118, "step": 103360 }, { "epoch": 680.0657894736842, "grad_norm": 1.1545464992523193, "learning_rate": 0.0001, "loss": 0.01, "step": 103370 }, { "epoch": 680.1315789473684, "grad_norm": 0.9586318731307983, "learning_rate": 0.0001, "loss": 0.01, "step": 103380 }, { "epoch": 680.1973684210526, "grad_norm": 1.0395398139953613, "learning_rate": 0.0001, "loss": 0.0097, "step": 103390 }, { "epoch": 680.2631578947369, "grad_norm": 1.193278431892395, "learning_rate": 0.0001, "loss": 0.0112, "step": 103400 }, { "epoch": 680.328947368421, "grad_norm": 0.9284635186195374, "learning_rate": 0.0001, "loss": 0.0108, "step": 103410 }, { "epoch": 680.3947368421053, "grad_norm": 1.0416210889816284, "learning_rate": 0.0001, "loss": 0.0107, "step": 103420 }, { "epoch": 680.4605263157895, "grad_norm": 0.880230188369751, "learning_rate": 0.0001, "loss": 0.0104, "step": 103430 }, { "epoch": 680.5263157894736, "grad_norm": 1.139072299003601, "learning_rate": 0.0001, "loss": 0.0091, "step": 103440 }, { "epoch": 680.5921052631579, "grad_norm": 0.859987735748291, "learning_rate": 0.0001, "loss": 0.01, "step": 103450 }, { "epoch": 680.6578947368421, "grad_norm": 1.0812146663665771, "learning_rate": 0.0001, "loss": 0.011, "step": 103460 }, { "epoch": 680.7236842105264, "grad_norm": 0.973387598991394, "learning_rate": 0.0001, "loss": 0.0108, "step": 103470 }, { "epoch": 680.7894736842105, "grad_norm": 1.28573739528656, "learning_rate": 0.0001, "loss": 0.0117, "step": 103480 }, { "epoch": 680.8552631578947, "grad_norm": 1.2095247507095337, "learning_rate": 0.0001, "loss": 0.0112, "step": 103490 }, { "epoch": 680.921052631579, "grad_norm": 1.1945544481277466, "learning_rate": 0.0001, "loss": 0.0109, "step": 103500 }, { "epoch": 680.9868421052631, "grad_norm": 1.0555740594863892, "learning_rate": 0.0001, "loss": 0.0102, "step": 103510 }, { "epoch": 681.0526315789474, "grad_norm": 1.0830953121185303, "learning_rate": 0.0001, "loss": 0.0099, "step": 103520 }, { "epoch": 681.1184210526316, "grad_norm": 1.1533199548721313, "learning_rate": 0.0001, "loss": 0.0102, "step": 103530 }, { "epoch": 681.1842105263158, "grad_norm": 1.2779477834701538, "learning_rate": 0.0001, "loss": 0.0098, "step": 103540 }, { "epoch": 681.25, "grad_norm": 1.3541412353515625, "learning_rate": 0.0001, "loss": 0.0109, "step": 103550 }, { "epoch": 681.3157894736842, "grad_norm": 1.2908533811569214, "learning_rate": 0.0001, "loss": 0.0099, "step": 103560 }, { "epoch": 681.3815789473684, "grad_norm": 1.2065902948379517, "learning_rate": 0.0001, "loss": 0.0115, "step": 103570 }, { "epoch": 681.4473684210526, "grad_norm": 0.755403995513916, "learning_rate": 0.0001, "loss": 0.0102, "step": 103580 }, { "epoch": 681.5131578947369, "grad_norm": 1.087852954864502, "learning_rate": 0.0001, "loss": 0.0112, "step": 103590 }, { "epoch": 681.578947368421, "grad_norm": 0.9179571270942688, "learning_rate": 0.0001, "loss": 0.0101, "step": 103600 }, { "epoch": 681.6447368421053, "grad_norm": 1.2405117750167847, "learning_rate": 0.0001, "loss": 0.0103, "step": 103610 }, { "epoch": 681.7105263157895, "grad_norm": 0.9832314252853394, "learning_rate": 0.0001, "loss": 0.0102, "step": 103620 }, { "epoch": 681.7763157894736, "grad_norm": 1.0479704141616821, "learning_rate": 0.0001, "loss": 0.0112, "step": 103630 }, { "epoch": 681.8421052631579, "grad_norm": 1.2243036031723022, "learning_rate": 0.0001, "loss": 0.0104, "step": 103640 }, { "epoch": 681.9078947368421, "grad_norm": 0.8835751414299011, "learning_rate": 0.0001, "loss": 0.0105, "step": 103650 }, { "epoch": 681.9736842105264, "grad_norm": 0.982793927192688, "learning_rate": 0.0001, "loss": 0.0122, "step": 103660 }, { "epoch": 682.0394736842105, "grad_norm": 1.2806956768035889, "learning_rate": 0.0001, "loss": 0.0099, "step": 103670 }, { "epoch": 682.1052631578947, "grad_norm": 1.2741878032684326, "learning_rate": 0.0001, "loss": 0.0112, "step": 103680 }, { "epoch": 682.171052631579, "grad_norm": 0.92679762840271, "learning_rate": 0.0001, "loss": 0.0113, "step": 103690 }, { "epoch": 682.2368421052631, "grad_norm": 1.367266058921814, "learning_rate": 0.0001, "loss": 0.0104, "step": 103700 }, { "epoch": 682.3026315789474, "grad_norm": 0.7617621421813965, "learning_rate": 0.0001, "loss": 0.0109, "step": 103710 }, { "epoch": 682.3684210526316, "grad_norm": 0.740490734577179, "learning_rate": 0.0001, "loss": 0.0121, "step": 103720 }, { "epoch": 682.4342105263158, "grad_norm": 1.2656317949295044, "learning_rate": 0.0001, "loss": 0.0096, "step": 103730 }, { "epoch": 682.5, "grad_norm": 1.286602258682251, "learning_rate": 0.0001, "loss": 0.0101, "step": 103740 }, { "epoch": 682.5657894736842, "grad_norm": 1.0341092348098755, "learning_rate": 0.0001, "loss": 0.012, "step": 103750 }, { "epoch": 682.6315789473684, "grad_norm": 1.1948630809783936, "learning_rate": 0.0001, "loss": 0.0092, "step": 103760 }, { "epoch": 682.6973684210526, "grad_norm": 1.1949267387390137, "learning_rate": 0.0001, "loss": 0.0117, "step": 103770 }, { "epoch": 682.7631578947369, "grad_norm": 1.2278507947921753, "learning_rate": 0.0001, "loss": 0.0103, "step": 103780 }, { "epoch": 682.828947368421, "grad_norm": 1.1813137531280518, "learning_rate": 0.0001, "loss": 0.011, "step": 103790 }, { "epoch": 682.8947368421053, "grad_norm": 1.0005921125411987, "learning_rate": 0.0001, "loss": 0.0105, "step": 103800 }, { "epoch": 682.9605263157895, "grad_norm": 0.7229200005531311, "learning_rate": 0.0001, "loss": 0.0113, "step": 103810 }, { "epoch": 683.0263157894736, "grad_norm": 0.7326030135154724, "learning_rate": 0.0001, "loss": 0.0097, "step": 103820 }, { "epoch": 683.0921052631579, "grad_norm": 0.5357984900474548, "learning_rate": 0.0001, "loss": 0.011, "step": 103830 }, { "epoch": 683.1578947368421, "grad_norm": 0.856106162071228, "learning_rate": 0.0001, "loss": 0.0114, "step": 103840 }, { "epoch": 683.2236842105264, "grad_norm": 0.9525607228279114, "learning_rate": 0.0001, "loss": 0.0113, "step": 103850 }, { "epoch": 683.2894736842105, "grad_norm": 0.6489855647087097, "learning_rate": 0.0001, "loss": 0.0108, "step": 103860 }, { "epoch": 683.3552631578947, "grad_norm": 0.7851767539978027, "learning_rate": 0.0001, "loss": 0.0098, "step": 103870 }, { "epoch": 683.421052631579, "grad_norm": 1.2372000217437744, "learning_rate": 0.0001, "loss": 0.0095, "step": 103880 }, { "epoch": 683.4868421052631, "grad_norm": 1.1379144191741943, "learning_rate": 0.0001, "loss": 0.0096, "step": 103890 }, { "epoch": 683.5526315789474, "grad_norm": 1.1280021667480469, "learning_rate": 0.0001, "loss": 0.0108, "step": 103900 }, { "epoch": 683.6184210526316, "grad_norm": 1.049955129623413, "learning_rate": 0.0001, "loss": 0.0131, "step": 103910 }, { "epoch": 683.6842105263158, "grad_norm": 1.1678504943847656, "learning_rate": 0.0001, "loss": 0.0102, "step": 103920 }, { "epoch": 683.75, "grad_norm": 1.31575345993042, "learning_rate": 0.0001, "loss": 0.0091, "step": 103930 }, { "epoch": 683.8157894736842, "grad_norm": 1.1992285251617432, "learning_rate": 0.0001, "loss": 0.012, "step": 103940 }, { "epoch": 683.8815789473684, "grad_norm": 1.1740840673446655, "learning_rate": 0.0001, "loss": 0.01, "step": 103950 }, { "epoch": 683.9473684210526, "grad_norm": 1.3050742149353027, "learning_rate": 0.0001, "loss": 0.0109, "step": 103960 }, { "epoch": 684.0131578947369, "grad_norm": 1.0441380739212036, "learning_rate": 0.0001, "loss": 0.0102, "step": 103970 }, { "epoch": 684.078947368421, "grad_norm": 0.8772562146186829, "learning_rate": 0.0001, "loss": 0.0103, "step": 103980 }, { "epoch": 684.1447368421053, "grad_norm": 1.1009024381637573, "learning_rate": 0.0001, "loss": 0.0109, "step": 103990 }, { "epoch": 684.2105263157895, "grad_norm": 1.1562106609344482, "learning_rate": 0.0001, "loss": 0.0125, "step": 104000 }, { "epoch": 684.2763157894736, "grad_norm": 1.0368518829345703, "learning_rate": 0.0001, "loss": 0.0097, "step": 104010 }, { "epoch": 684.3421052631579, "grad_norm": 0.992682933807373, "learning_rate": 0.0001, "loss": 0.0103, "step": 104020 }, { "epoch": 684.4078947368421, "grad_norm": 0.7431752681732178, "learning_rate": 0.0001, "loss": 0.0105, "step": 104030 }, { "epoch": 684.4736842105264, "grad_norm": 1.2687865495681763, "learning_rate": 0.0001, "loss": 0.0106, "step": 104040 }, { "epoch": 684.5394736842105, "grad_norm": 1.1064850091934204, "learning_rate": 0.0001, "loss": 0.0107, "step": 104050 }, { "epoch": 684.6052631578947, "grad_norm": 1.0989872217178345, "learning_rate": 0.0001, "loss": 0.0121, "step": 104060 }, { "epoch": 684.671052631579, "grad_norm": 0.931109607219696, "learning_rate": 0.0001, "loss": 0.0086, "step": 104070 }, { "epoch": 684.7368421052631, "grad_norm": 1.1777174472808838, "learning_rate": 0.0001, "loss": 0.0109, "step": 104080 }, { "epoch": 684.8026315789474, "grad_norm": 1.218532919883728, "learning_rate": 0.0001, "loss": 0.0104, "step": 104090 }, { "epoch": 684.8684210526316, "grad_norm": 1.0839264392852783, "learning_rate": 0.0001, "loss": 0.01, "step": 104100 }, { "epoch": 684.9342105263158, "grad_norm": 1.5299314260482788, "learning_rate": 0.0001, "loss": 0.012, "step": 104110 }, { "epoch": 685.0, "grad_norm": 0.85472172498703, "learning_rate": 0.0001, "loss": 0.0094, "step": 104120 }, { "epoch": 685.0657894736842, "grad_norm": 1.038342833518982, "learning_rate": 0.0001, "loss": 0.0125, "step": 104130 }, { "epoch": 685.1315789473684, "grad_norm": 1.3401973247528076, "learning_rate": 0.0001, "loss": 0.0108, "step": 104140 }, { "epoch": 685.1973684210526, "grad_norm": 1.3317383527755737, "learning_rate": 0.0001, "loss": 0.0095, "step": 104150 }, { "epoch": 685.2631578947369, "grad_norm": 1.0047595500946045, "learning_rate": 0.0001, "loss": 0.0111, "step": 104160 }, { "epoch": 685.328947368421, "grad_norm": 1.0131652355194092, "learning_rate": 0.0001, "loss": 0.0095, "step": 104170 }, { "epoch": 685.3947368421053, "grad_norm": 0.9121875166893005, "learning_rate": 0.0001, "loss": 0.0106, "step": 104180 }, { "epoch": 685.4605263157895, "grad_norm": 1.079406976699829, "learning_rate": 0.0001, "loss": 0.0097, "step": 104190 }, { "epoch": 685.5263157894736, "grad_norm": 1.008108139038086, "learning_rate": 0.0001, "loss": 0.0107, "step": 104200 }, { "epoch": 685.5921052631579, "grad_norm": 0.8241605758666992, "learning_rate": 0.0001, "loss": 0.0085, "step": 104210 }, { "epoch": 685.6578947368421, "grad_norm": 0.8410286903381348, "learning_rate": 0.0001, "loss": 0.0105, "step": 104220 }, { "epoch": 685.7236842105264, "grad_norm": 0.8288211226463318, "learning_rate": 0.0001, "loss": 0.0118, "step": 104230 }, { "epoch": 685.7894736842105, "grad_norm": 0.8989503383636475, "learning_rate": 0.0001, "loss": 0.0108, "step": 104240 }, { "epoch": 685.8552631578947, "grad_norm": 1.1565848588943481, "learning_rate": 0.0001, "loss": 0.0109, "step": 104250 }, { "epoch": 685.921052631579, "grad_norm": 1.376077651977539, "learning_rate": 0.0001, "loss": 0.0117, "step": 104260 }, { "epoch": 685.9868421052631, "grad_norm": 0.8136918544769287, "learning_rate": 0.0001, "loss": 0.0091, "step": 104270 }, { "epoch": 686.0526315789474, "grad_norm": 1.2481287717819214, "learning_rate": 0.0001, "loss": 0.0109, "step": 104280 }, { "epoch": 686.1184210526316, "grad_norm": 1.1149044036865234, "learning_rate": 0.0001, "loss": 0.0102, "step": 104290 }, { "epoch": 686.1842105263158, "grad_norm": 1.1859511137008667, "learning_rate": 0.0001, "loss": 0.0097, "step": 104300 }, { "epoch": 686.25, "grad_norm": 1.0750795602798462, "learning_rate": 0.0001, "loss": 0.0087, "step": 104310 }, { "epoch": 686.3157894736842, "grad_norm": 1.4063059091567993, "learning_rate": 0.0001, "loss": 0.009, "step": 104320 }, { "epoch": 686.3815789473684, "grad_norm": 1.4898396730422974, "learning_rate": 0.0001, "loss": 0.0113, "step": 104330 }, { "epoch": 686.4473684210526, "grad_norm": 1.1081674098968506, "learning_rate": 0.0001, "loss": 0.0104, "step": 104340 }, { "epoch": 686.5131578947369, "grad_norm": 1.1555651426315308, "learning_rate": 0.0001, "loss": 0.0116, "step": 104350 }, { "epoch": 686.578947368421, "grad_norm": 1.2798792123794556, "learning_rate": 0.0001, "loss": 0.0108, "step": 104360 }, { "epoch": 686.6447368421053, "grad_norm": 0.9958081841468811, "learning_rate": 0.0001, "loss": 0.0089, "step": 104370 }, { "epoch": 686.7105263157895, "grad_norm": 1.3393999338150024, "learning_rate": 0.0001, "loss": 0.0108, "step": 104380 }, { "epoch": 686.7763157894736, "grad_norm": 1.0677820444107056, "learning_rate": 0.0001, "loss": 0.0127, "step": 104390 }, { "epoch": 686.8421052631579, "grad_norm": 1.1201198101043701, "learning_rate": 0.0001, "loss": 0.0107, "step": 104400 }, { "epoch": 686.9078947368421, "grad_norm": 0.9454928040504456, "learning_rate": 0.0001, "loss": 0.0116, "step": 104410 }, { "epoch": 686.9736842105264, "grad_norm": 1.056349515914917, "learning_rate": 0.0001, "loss": 0.0099, "step": 104420 }, { "epoch": 687.0394736842105, "grad_norm": 0.84330153465271, "learning_rate": 0.0001, "loss": 0.01, "step": 104430 }, { "epoch": 687.1052631578947, "grad_norm": 0.9636723399162292, "learning_rate": 0.0001, "loss": 0.0102, "step": 104440 }, { "epoch": 687.171052631579, "grad_norm": 1.4594964981079102, "learning_rate": 0.0001, "loss": 0.0103, "step": 104450 }, { "epoch": 687.2368421052631, "grad_norm": 0.8333598375320435, "learning_rate": 0.0001, "loss": 0.0101, "step": 104460 }, { "epoch": 687.3026315789474, "grad_norm": 0.9445642828941345, "learning_rate": 0.0001, "loss": 0.0105, "step": 104470 }, { "epoch": 687.3684210526316, "grad_norm": 1.2898783683776855, "learning_rate": 0.0001, "loss": 0.0092, "step": 104480 }, { "epoch": 687.4342105263158, "grad_norm": 1.1669278144836426, "learning_rate": 0.0001, "loss": 0.013, "step": 104490 }, { "epoch": 687.5, "grad_norm": 0.9558044075965881, "learning_rate": 0.0001, "loss": 0.0097, "step": 104500 }, { "epoch": 687.5657894736842, "grad_norm": 1.2216863632202148, "learning_rate": 0.0001, "loss": 0.0098, "step": 104510 }, { "epoch": 687.6315789473684, "grad_norm": 1.3271684646606445, "learning_rate": 0.0001, "loss": 0.0099, "step": 104520 }, { "epoch": 687.6973684210526, "grad_norm": 1.0442551374435425, "learning_rate": 0.0001, "loss": 0.011, "step": 104530 }, { "epoch": 687.7631578947369, "grad_norm": 0.6984831094741821, "learning_rate": 0.0001, "loss": 0.0103, "step": 104540 }, { "epoch": 687.828947368421, "grad_norm": 1.0056344270706177, "learning_rate": 0.0001, "loss": 0.0101, "step": 104550 }, { "epoch": 687.8947368421053, "grad_norm": 1.0924835205078125, "learning_rate": 0.0001, "loss": 0.0111, "step": 104560 }, { "epoch": 687.9605263157895, "grad_norm": 1.1241261959075928, "learning_rate": 0.0001, "loss": 0.011, "step": 104570 }, { "epoch": 688.0263157894736, "grad_norm": 0.9026856422424316, "learning_rate": 0.0001, "loss": 0.0109, "step": 104580 }, { "epoch": 688.0921052631579, "grad_norm": 0.7907440662384033, "learning_rate": 0.0001, "loss": 0.0098, "step": 104590 }, { "epoch": 688.1578947368421, "grad_norm": 1.012350082397461, "learning_rate": 0.0001, "loss": 0.0108, "step": 104600 }, { "epoch": 688.2236842105264, "grad_norm": 1.0234317779541016, "learning_rate": 0.0001, "loss": 0.0097, "step": 104610 }, { "epoch": 688.2894736842105, "grad_norm": 0.9007984399795532, "learning_rate": 0.0001, "loss": 0.0093, "step": 104620 }, { "epoch": 688.3552631578947, "grad_norm": 1.339667797088623, "learning_rate": 0.0001, "loss": 0.0101, "step": 104630 }, { "epoch": 688.421052631579, "grad_norm": 1.140592336654663, "learning_rate": 0.0001, "loss": 0.0127, "step": 104640 }, { "epoch": 688.4868421052631, "grad_norm": 1.2752139568328857, "learning_rate": 0.0001, "loss": 0.0095, "step": 104650 }, { "epoch": 688.5526315789474, "grad_norm": 1.20328950881958, "learning_rate": 0.0001, "loss": 0.0114, "step": 104660 }, { "epoch": 688.6184210526316, "grad_norm": 1.0453274250030518, "learning_rate": 0.0001, "loss": 0.011, "step": 104670 }, { "epoch": 688.6842105263158, "grad_norm": 1.0228806734085083, "learning_rate": 0.0001, "loss": 0.0116, "step": 104680 }, { "epoch": 688.75, "grad_norm": 1.1263914108276367, "learning_rate": 0.0001, "loss": 0.0105, "step": 104690 }, { "epoch": 688.8157894736842, "grad_norm": 1.1457710266113281, "learning_rate": 0.0001, "loss": 0.0113, "step": 104700 }, { "epoch": 688.8815789473684, "grad_norm": 1.1575244665145874, "learning_rate": 0.0001, "loss": 0.0103, "step": 104710 }, { "epoch": 688.9473684210526, "grad_norm": 1.1374318599700928, "learning_rate": 0.0001, "loss": 0.0106, "step": 104720 }, { "epoch": 689.0131578947369, "grad_norm": 1.1628844738006592, "learning_rate": 0.0001, "loss": 0.0093, "step": 104730 }, { "epoch": 689.078947368421, "grad_norm": 1.267177939414978, "learning_rate": 0.0001, "loss": 0.0107, "step": 104740 }, { "epoch": 689.1447368421053, "grad_norm": 1.0953381061553955, "learning_rate": 0.0001, "loss": 0.0103, "step": 104750 }, { "epoch": 689.2105263157895, "grad_norm": 0.6190617680549622, "learning_rate": 0.0001, "loss": 0.0106, "step": 104760 }, { "epoch": 689.2763157894736, "grad_norm": 1.0876604318618774, "learning_rate": 0.0001, "loss": 0.0114, "step": 104770 }, { "epoch": 689.3421052631579, "grad_norm": 1.1195549964904785, "learning_rate": 0.0001, "loss": 0.0134, "step": 104780 }, { "epoch": 689.4078947368421, "grad_norm": 1.1582069396972656, "learning_rate": 0.0001, "loss": 0.0097, "step": 104790 }, { "epoch": 689.4736842105264, "grad_norm": 1.3708412647247314, "learning_rate": 0.0001, "loss": 0.011, "step": 104800 }, { "epoch": 689.5394736842105, "grad_norm": 1.0523473024368286, "learning_rate": 0.0001, "loss": 0.0099, "step": 104810 }, { "epoch": 689.6052631578947, "grad_norm": 1.0299116373062134, "learning_rate": 0.0001, "loss": 0.0111, "step": 104820 }, { "epoch": 689.671052631579, "grad_norm": 1.1898280382156372, "learning_rate": 0.0001, "loss": 0.009, "step": 104830 }, { "epoch": 689.7368421052631, "grad_norm": 1.1457937955856323, "learning_rate": 0.0001, "loss": 0.0116, "step": 104840 }, { "epoch": 689.8026315789474, "grad_norm": 1.2077155113220215, "learning_rate": 0.0001, "loss": 0.009, "step": 104850 }, { "epoch": 689.8684210526316, "grad_norm": 1.064317226409912, "learning_rate": 0.0001, "loss": 0.0082, "step": 104860 }, { "epoch": 689.9342105263158, "grad_norm": 1.3832712173461914, "learning_rate": 0.0001, "loss": 0.012, "step": 104870 }, { "epoch": 690.0, "grad_norm": 1.2490742206573486, "learning_rate": 0.0001, "loss": 0.0098, "step": 104880 }, { "epoch": 690.0657894736842, "grad_norm": 0.9746840000152588, "learning_rate": 0.0001, "loss": 0.0095, "step": 104890 }, { "epoch": 690.1315789473684, "grad_norm": 1.291522741317749, "learning_rate": 0.0001, "loss": 0.0104, "step": 104900 }, { "epoch": 690.1973684210526, "grad_norm": 1.422899603843689, "learning_rate": 0.0001, "loss": 0.0117, "step": 104910 }, { "epoch": 690.2631578947369, "grad_norm": 1.692259669303894, "learning_rate": 0.0001, "loss": 0.0102, "step": 104920 }, { "epoch": 690.328947368421, "grad_norm": 1.206990361213684, "learning_rate": 0.0001, "loss": 0.0084, "step": 104930 }, { "epoch": 690.3947368421053, "grad_norm": 1.1877973079681396, "learning_rate": 0.0001, "loss": 0.0095, "step": 104940 }, { "epoch": 690.4605263157895, "grad_norm": 0.9999209642410278, "learning_rate": 0.0001, "loss": 0.0113, "step": 104950 }, { "epoch": 690.5263157894736, "grad_norm": 0.9556674361228943, "learning_rate": 0.0001, "loss": 0.0107, "step": 104960 }, { "epoch": 690.5921052631579, "grad_norm": 1.3551650047302246, "learning_rate": 0.0001, "loss": 0.0102, "step": 104970 }, { "epoch": 690.6578947368421, "grad_norm": 0.9526252746582031, "learning_rate": 0.0001, "loss": 0.01, "step": 104980 }, { "epoch": 690.7236842105264, "grad_norm": 1.16046941280365, "learning_rate": 0.0001, "loss": 0.0121, "step": 104990 }, { "epoch": 690.7894736842105, "grad_norm": 1.410556674003601, "learning_rate": 0.0001, "loss": 0.01, "step": 105000 }, { "epoch": 690.8552631578947, "grad_norm": 1.1336820125579834, "learning_rate": 0.0001, "loss": 0.013, "step": 105010 }, { "epoch": 690.921052631579, "grad_norm": 1.0210304260253906, "learning_rate": 0.0001, "loss": 0.0101, "step": 105020 }, { "epoch": 690.9868421052631, "grad_norm": 0.7669985890388489, "learning_rate": 0.0001, "loss": 0.0115, "step": 105030 }, { "epoch": 691.0526315789474, "grad_norm": 1.2497632503509521, "learning_rate": 0.0001, "loss": 0.0103, "step": 105040 }, { "epoch": 691.1184210526316, "grad_norm": 0.9652649760246277, "learning_rate": 0.0001, "loss": 0.011, "step": 105050 }, { "epoch": 691.1842105263158, "grad_norm": 1.0892058610916138, "learning_rate": 0.0001, "loss": 0.0136, "step": 105060 }, { "epoch": 691.25, "grad_norm": 1.201240062713623, "learning_rate": 0.0001, "loss": 0.0118, "step": 105070 }, { "epoch": 691.3157894736842, "grad_norm": 1.2225768566131592, "learning_rate": 0.0001, "loss": 0.0114, "step": 105080 }, { "epoch": 691.3815789473684, "grad_norm": 1.4184614419937134, "learning_rate": 0.0001, "loss": 0.0147, "step": 105090 }, { "epoch": 691.4473684210526, "grad_norm": 0.9505151510238647, "learning_rate": 0.0001, "loss": 0.0118, "step": 105100 }, { "epoch": 691.5131578947369, "grad_norm": 0.7423208355903625, "learning_rate": 0.0001, "loss": 0.011, "step": 105110 }, { "epoch": 691.578947368421, "grad_norm": 1.2950553894042969, "learning_rate": 0.0001, "loss": 0.0125, "step": 105120 }, { "epoch": 691.6447368421053, "grad_norm": 1.050185203552246, "learning_rate": 0.0001, "loss": 0.0105, "step": 105130 }, { "epoch": 691.7105263157895, "grad_norm": 1.229802131652832, "learning_rate": 0.0001, "loss": 0.0111, "step": 105140 }, { "epoch": 691.7763157894736, "grad_norm": 1.34513258934021, "learning_rate": 0.0001, "loss": 0.0118, "step": 105150 }, { "epoch": 691.8421052631579, "grad_norm": 0.9867190718650818, "learning_rate": 0.0001, "loss": 0.0108, "step": 105160 }, { "epoch": 691.9078947368421, "grad_norm": 1.0777831077575684, "learning_rate": 0.0001, "loss": 0.0107, "step": 105170 }, { "epoch": 691.9736842105264, "grad_norm": 0.8535577058792114, "learning_rate": 0.0001, "loss": 0.012, "step": 105180 }, { "epoch": 692.0394736842105, "grad_norm": 1.4894434213638306, "learning_rate": 0.0001, "loss": 0.011, "step": 105190 }, { "epoch": 692.1052631578947, "grad_norm": 1.5268137454986572, "learning_rate": 0.0001, "loss": 0.0139, "step": 105200 }, { "epoch": 692.171052631579, "grad_norm": 0.9532687067985535, "learning_rate": 0.0001, "loss": 0.0103, "step": 105210 }, { "epoch": 692.2368421052631, "grad_norm": 1.5903687477111816, "learning_rate": 0.0001, "loss": 0.0109, "step": 105220 }, { "epoch": 692.3026315789474, "grad_norm": 1.4972559213638306, "learning_rate": 0.0001, "loss": 0.0117, "step": 105230 }, { "epoch": 692.3684210526316, "grad_norm": 1.5704691410064697, "learning_rate": 0.0001, "loss": 0.0128, "step": 105240 }, { "epoch": 692.4342105263158, "grad_norm": 1.3427348136901855, "learning_rate": 0.0001, "loss": 0.0103, "step": 105250 }, { "epoch": 692.5, "grad_norm": 1.3334673643112183, "learning_rate": 0.0001, "loss": 0.0124, "step": 105260 }, { "epoch": 692.5657894736842, "grad_norm": 1.3983582258224487, "learning_rate": 0.0001, "loss": 0.0102, "step": 105270 }, { "epoch": 692.6315789473684, "grad_norm": 1.0505462884902954, "learning_rate": 0.0001, "loss": 0.0109, "step": 105280 }, { "epoch": 692.6973684210526, "grad_norm": 1.273409128189087, "learning_rate": 0.0001, "loss": 0.0126, "step": 105290 }, { "epoch": 692.7631578947369, "grad_norm": 1.1249748468399048, "learning_rate": 0.0001, "loss": 0.0129, "step": 105300 }, { "epoch": 692.828947368421, "grad_norm": 1.0098894834518433, "learning_rate": 0.0001, "loss": 0.0116, "step": 105310 }, { "epoch": 692.8947368421053, "grad_norm": 0.9849072694778442, "learning_rate": 0.0001, "loss": 0.0102, "step": 105320 }, { "epoch": 692.9605263157895, "grad_norm": 1.1056095361709595, "learning_rate": 0.0001, "loss": 0.013, "step": 105330 }, { "epoch": 693.0263157894736, "grad_norm": 1.139883041381836, "learning_rate": 0.0001, "loss": 0.0117, "step": 105340 }, { "epoch": 693.0921052631579, "grad_norm": 1.1363656520843506, "learning_rate": 0.0001, "loss": 0.0131, "step": 105350 }, { "epoch": 693.1578947368421, "grad_norm": 1.1857936382293701, "learning_rate": 0.0001, "loss": 0.0109, "step": 105360 }, { "epoch": 693.2236842105264, "grad_norm": 0.88065105676651, "learning_rate": 0.0001, "loss": 0.015, "step": 105370 }, { "epoch": 693.2894736842105, "grad_norm": 0.8013133406639099, "learning_rate": 0.0001, "loss": 0.0106, "step": 105380 }, { "epoch": 693.3552631578947, "grad_norm": 1.1217681169509888, "learning_rate": 0.0001, "loss": 0.0118, "step": 105390 }, { "epoch": 693.421052631579, "grad_norm": 1.2208178043365479, "learning_rate": 0.0001, "loss": 0.0105, "step": 105400 }, { "epoch": 693.4868421052631, "grad_norm": 1.139967918395996, "learning_rate": 0.0001, "loss": 0.0103, "step": 105410 }, { "epoch": 693.5526315789474, "grad_norm": 1.063003659248352, "learning_rate": 0.0001, "loss": 0.0158, "step": 105420 }, { "epoch": 693.6184210526316, "grad_norm": 1.4261283874511719, "learning_rate": 0.0001, "loss": 0.012, "step": 105430 }, { "epoch": 693.6842105263158, "grad_norm": 1.0778611898422241, "learning_rate": 0.0001, "loss": 0.0104, "step": 105440 }, { "epoch": 693.75, "grad_norm": 1.0890920162200928, "learning_rate": 0.0001, "loss": 0.0106, "step": 105450 }, { "epoch": 693.8157894736842, "grad_norm": 1.4902729988098145, "learning_rate": 0.0001, "loss": 0.0114, "step": 105460 }, { "epoch": 693.8815789473684, "grad_norm": 1.185309886932373, "learning_rate": 0.0001, "loss": 0.0103, "step": 105470 }, { "epoch": 693.9473684210526, "grad_norm": 1.2872449159622192, "learning_rate": 0.0001, "loss": 0.0124, "step": 105480 }, { "epoch": 694.0131578947369, "grad_norm": 1.1898432970046997, "learning_rate": 0.0001, "loss": 0.0114, "step": 105490 }, { "epoch": 694.078947368421, "grad_norm": 1.2668589353561401, "learning_rate": 0.0001, "loss": 0.0117, "step": 105500 }, { "epoch": 694.1447368421053, "grad_norm": 1.3832660913467407, "learning_rate": 0.0001, "loss": 0.0111, "step": 105510 }, { "epoch": 694.2105263157895, "grad_norm": 1.297754168510437, "learning_rate": 0.0001, "loss": 0.0114, "step": 105520 }, { "epoch": 694.2763157894736, "grad_norm": 1.1962448358535767, "learning_rate": 0.0001, "loss": 0.0095, "step": 105530 }, { "epoch": 694.3421052631579, "grad_norm": 1.3533624410629272, "learning_rate": 0.0001, "loss": 0.0106, "step": 105540 }, { "epoch": 694.4078947368421, "grad_norm": 1.2034482955932617, "learning_rate": 0.0001, "loss": 0.0113, "step": 105550 }, { "epoch": 694.4736842105264, "grad_norm": 1.1491233110427856, "learning_rate": 0.0001, "loss": 0.009, "step": 105560 }, { "epoch": 694.5394736842105, "grad_norm": 1.1336445808410645, "learning_rate": 0.0001, "loss": 0.0101, "step": 105570 }, { "epoch": 694.6052631578947, "grad_norm": 1.028238296508789, "learning_rate": 0.0001, "loss": 0.0112, "step": 105580 }, { "epoch": 694.671052631579, "grad_norm": 1.0678424835205078, "learning_rate": 0.0001, "loss": 0.0105, "step": 105590 }, { "epoch": 694.7368421052631, "grad_norm": 1.087962031364441, "learning_rate": 0.0001, "loss": 0.0118, "step": 105600 }, { "epoch": 694.8026315789474, "grad_norm": 1.0385785102844238, "learning_rate": 0.0001, "loss": 0.0106, "step": 105610 }, { "epoch": 694.8684210526316, "grad_norm": 1.196866512298584, "learning_rate": 0.0001, "loss": 0.0108, "step": 105620 }, { "epoch": 694.9342105263158, "grad_norm": 1.0130568742752075, "learning_rate": 0.0001, "loss": 0.0115, "step": 105630 }, { "epoch": 695.0, "grad_norm": 0.7995595932006836, "learning_rate": 0.0001, "loss": 0.0105, "step": 105640 }, { "epoch": 695.0657894736842, "grad_norm": 1.008529782295227, "learning_rate": 0.0001, "loss": 0.0099, "step": 105650 }, { "epoch": 695.1315789473684, "grad_norm": 0.806501030921936, "learning_rate": 0.0001, "loss": 0.01, "step": 105660 }, { "epoch": 695.1973684210526, "grad_norm": 0.8974860906600952, "learning_rate": 0.0001, "loss": 0.0114, "step": 105670 }, { "epoch": 695.2631578947369, "grad_norm": 1.2004984617233276, "learning_rate": 0.0001, "loss": 0.0107, "step": 105680 }, { "epoch": 695.328947368421, "grad_norm": 1.1275862455368042, "learning_rate": 0.0001, "loss": 0.01, "step": 105690 }, { "epoch": 695.3947368421053, "grad_norm": 0.9173672199249268, "learning_rate": 0.0001, "loss": 0.0111, "step": 105700 }, { "epoch": 695.4605263157895, "grad_norm": 0.8183532953262329, "learning_rate": 0.0001, "loss": 0.0115, "step": 105710 }, { "epoch": 695.5263157894736, "grad_norm": 0.8390060663223267, "learning_rate": 0.0001, "loss": 0.011, "step": 105720 }, { "epoch": 695.5921052631579, "grad_norm": 1.1580220460891724, "learning_rate": 0.0001, "loss": 0.0096, "step": 105730 }, { "epoch": 695.6578947368421, "grad_norm": 0.9561638832092285, "learning_rate": 0.0001, "loss": 0.0107, "step": 105740 }, { "epoch": 695.7236842105264, "grad_norm": 1.305795431137085, "learning_rate": 0.0001, "loss": 0.0113, "step": 105750 }, { "epoch": 695.7894736842105, "grad_norm": 1.044447422027588, "learning_rate": 0.0001, "loss": 0.0103, "step": 105760 }, { "epoch": 695.8552631578947, "grad_norm": 1.3991165161132812, "learning_rate": 0.0001, "loss": 0.0105, "step": 105770 }, { "epoch": 695.921052631579, "grad_norm": 0.8383837342262268, "learning_rate": 0.0001, "loss": 0.0095, "step": 105780 }, { "epoch": 695.9868421052631, "grad_norm": 0.9691088795661926, "learning_rate": 0.0001, "loss": 0.0097, "step": 105790 }, { "epoch": 696.0526315789474, "grad_norm": 1.0391768217086792, "learning_rate": 0.0001, "loss": 0.0113, "step": 105800 }, { "epoch": 696.1184210526316, "grad_norm": 0.8894152641296387, "learning_rate": 0.0001, "loss": 0.0106, "step": 105810 }, { "epoch": 696.1842105263158, "grad_norm": 0.7640244364738464, "learning_rate": 0.0001, "loss": 0.0087, "step": 105820 }, { "epoch": 696.25, "grad_norm": 0.6968852877616882, "learning_rate": 0.0001, "loss": 0.0095, "step": 105830 }, { "epoch": 696.3157894736842, "grad_norm": 0.7941989302635193, "learning_rate": 0.0001, "loss": 0.0095, "step": 105840 }, { "epoch": 696.3815789473684, "grad_norm": 1.2468845844268799, "learning_rate": 0.0001, "loss": 0.01, "step": 105850 }, { "epoch": 696.4473684210526, "grad_norm": 1.1141244173049927, "learning_rate": 0.0001, "loss": 0.0095, "step": 105860 }, { "epoch": 696.5131578947369, "grad_norm": 0.9799408912658691, "learning_rate": 0.0001, "loss": 0.0107, "step": 105870 }, { "epoch": 696.578947368421, "grad_norm": 0.7694076299667358, "learning_rate": 0.0001, "loss": 0.0089, "step": 105880 }, { "epoch": 696.6447368421053, "grad_norm": 1.0681445598602295, "learning_rate": 0.0001, "loss": 0.0113, "step": 105890 }, { "epoch": 696.7105263157895, "grad_norm": 0.7481711506843567, "learning_rate": 0.0001, "loss": 0.0113, "step": 105900 }, { "epoch": 696.7763157894736, "grad_norm": 0.9661341309547424, "learning_rate": 0.0001, "loss": 0.0103, "step": 105910 }, { "epoch": 696.8421052631579, "grad_norm": 1.1508374214172363, "learning_rate": 0.0001, "loss": 0.0119, "step": 105920 }, { "epoch": 696.9078947368421, "grad_norm": 1.2137349843978882, "learning_rate": 0.0001, "loss": 0.012, "step": 105930 }, { "epoch": 696.9736842105264, "grad_norm": 1.2096316814422607, "learning_rate": 0.0001, "loss": 0.0119, "step": 105940 }, { "epoch": 697.0394736842105, "grad_norm": 1.1053109169006348, "learning_rate": 0.0001, "loss": 0.0116, "step": 105950 }, { "epoch": 697.1052631578947, "grad_norm": 1.0717750787734985, "learning_rate": 0.0001, "loss": 0.0097, "step": 105960 }, { "epoch": 697.171052631579, "grad_norm": 1.2490699291229248, "learning_rate": 0.0001, "loss": 0.0115, "step": 105970 }, { "epoch": 697.2368421052631, "grad_norm": 1.2428292036056519, "learning_rate": 0.0001, "loss": 0.01, "step": 105980 }, { "epoch": 697.3026315789474, "grad_norm": 1.023769736289978, "learning_rate": 0.0001, "loss": 0.0105, "step": 105990 }, { "epoch": 697.3684210526316, "grad_norm": 1.1357252597808838, "learning_rate": 0.0001, "loss": 0.0095, "step": 106000 }, { "epoch": 697.4342105263158, "grad_norm": 1.178993582725525, "learning_rate": 0.0001, "loss": 0.0106, "step": 106010 }, { "epoch": 697.5, "grad_norm": 0.642010509967804, "learning_rate": 0.0001, "loss": 0.012, "step": 106020 }, { "epoch": 697.5657894736842, "grad_norm": 0.8353654742240906, "learning_rate": 0.0001, "loss": 0.0106, "step": 106030 }, { "epoch": 697.6315789473684, "grad_norm": 0.9867295622825623, "learning_rate": 0.0001, "loss": 0.0092, "step": 106040 }, { "epoch": 697.6973684210526, "grad_norm": 1.1230249404907227, "learning_rate": 0.0001, "loss": 0.0093, "step": 106050 }, { "epoch": 697.7631578947369, "grad_norm": 1.3012531995773315, "learning_rate": 0.0001, "loss": 0.0113, "step": 106060 }, { "epoch": 697.828947368421, "grad_norm": 1.0835860967636108, "learning_rate": 0.0001, "loss": 0.0105, "step": 106070 }, { "epoch": 697.8947368421053, "grad_norm": 1.1090937852859497, "learning_rate": 0.0001, "loss": 0.0104, "step": 106080 }, { "epoch": 697.9605263157895, "grad_norm": 1.16708505153656, "learning_rate": 0.0001, "loss": 0.0111, "step": 106090 }, { "epoch": 698.0263157894736, "grad_norm": 1.0519673824310303, "learning_rate": 0.0001, "loss": 0.0105, "step": 106100 }, { "epoch": 698.0921052631579, "grad_norm": 1.2105119228363037, "learning_rate": 0.0001, "loss": 0.0096, "step": 106110 }, { "epoch": 698.1578947368421, "grad_norm": 1.2713031768798828, "learning_rate": 0.0001, "loss": 0.0098, "step": 106120 }, { "epoch": 698.2236842105264, "grad_norm": 0.7641732692718506, "learning_rate": 0.0001, "loss": 0.0101, "step": 106130 }, { "epoch": 698.2894736842105, "grad_norm": 1.30423903465271, "learning_rate": 0.0001, "loss": 0.0112, "step": 106140 }, { "epoch": 698.3552631578947, "grad_norm": 1.076094388961792, "learning_rate": 0.0001, "loss": 0.0104, "step": 106150 }, { "epoch": 698.421052631579, "grad_norm": 1.1429264545440674, "learning_rate": 0.0001, "loss": 0.0105, "step": 106160 }, { "epoch": 698.4868421052631, "grad_norm": 1.1242903470993042, "learning_rate": 0.0001, "loss": 0.0113, "step": 106170 }, { "epoch": 698.5526315789474, "grad_norm": 0.9787787199020386, "learning_rate": 0.0001, "loss": 0.0091, "step": 106180 }, { "epoch": 698.6184210526316, "grad_norm": 1.1341170072555542, "learning_rate": 0.0001, "loss": 0.0124, "step": 106190 }, { "epoch": 698.6842105263158, "grad_norm": 0.9176144003868103, "learning_rate": 0.0001, "loss": 0.0087, "step": 106200 }, { "epoch": 698.75, "grad_norm": 1.141721487045288, "learning_rate": 0.0001, "loss": 0.0097, "step": 106210 }, { "epoch": 698.8157894736842, "grad_norm": 0.9301637411117554, "learning_rate": 0.0001, "loss": 0.0111, "step": 106220 }, { "epoch": 698.8815789473684, "grad_norm": 1.2631868124008179, "learning_rate": 0.0001, "loss": 0.0108, "step": 106230 }, { "epoch": 698.9473684210526, "grad_norm": 0.9568750262260437, "learning_rate": 0.0001, "loss": 0.0107, "step": 106240 }, { "epoch": 699.0131578947369, "grad_norm": 1.0320165157318115, "learning_rate": 0.0001, "loss": 0.009, "step": 106250 }, { "epoch": 699.078947368421, "grad_norm": 0.9375261664390564, "learning_rate": 0.0001, "loss": 0.0121, "step": 106260 }, { "epoch": 699.1447368421053, "grad_norm": 1.236198902130127, "learning_rate": 0.0001, "loss": 0.0106, "step": 106270 }, { "epoch": 699.2105263157895, "grad_norm": 0.9655860662460327, "learning_rate": 0.0001, "loss": 0.0107, "step": 106280 }, { "epoch": 699.2763157894736, "grad_norm": 0.9262257814407349, "learning_rate": 0.0001, "loss": 0.0098, "step": 106290 }, { "epoch": 699.3421052631579, "grad_norm": 1.303225040435791, "learning_rate": 0.0001, "loss": 0.0103, "step": 106300 }, { "epoch": 699.4078947368421, "grad_norm": 1.1554994583129883, "learning_rate": 0.0001, "loss": 0.0095, "step": 106310 }, { "epoch": 699.4736842105264, "grad_norm": 1.3132860660552979, "learning_rate": 0.0001, "loss": 0.0109, "step": 106320 }, { "epoch": 699.5394736842105, "grad_norm": 0.9363493919372559, "learning_rate": 0.0001, "loss": 0.0104, "step": 106330 }, { "epoch": 699.6052631578947, "grad_norm": 1.1558855772018433, "learning_rate": 0.0001, "loss": 0.0095, "step": 106340 }, { "epoch": 699.671052631579, "grad_norm": 1.206477403640747, "learning_rate": 0.0001, "loss": 0.0112, "step": 106350 }, { "epoch": 699.7368421052631, "grad_norm": 1.2269618511199951, "learning_rate": 0.0001, "loss": 0.0095, "step": 106360 }, { "epoch": 699.8026315789474, "grad_norm": 1.058182716369629, "learning_rate": 0.0001, "loss": 0.0118, "step": 106370 }, { "epoch": 699.8684210526316, "grad_norm": 1.0236554145812988, "learning_rate": 0.0001, "loss": 0.0105, "step": 106380 }, { "epoch": 699.9342105263158, "grad_norm": 1.0067118406295776, "learning_rate": 0.0001, "loss": 0.0099, "step": 106390 }, { "epoch": 700.0, "grad_norm": 1.0427459478378296, "learning_rate": 0.0001, "loss": 0.0097, "step": 106400 }, { "epoch": 700.0657894736842, "grad_norm": 1.3142248392105103, "learning_rate": 0.0001, "loss": 0.0102, "step": 106410 }, { "epoch": 700.1315789473684, "grad_norm": 0.9090234041213989, "learning_rate": 0.0001, "loss": 0.0114, "step": 106420 }, { "epoch": 700.1973684210526, "grad_norm": 0.9273867607116699, "learning_rate": 0.0001, "loss": 0.0099, "step": 106430 }, { "epoch": 700.2631578947369, "grad_norm": 0.958476722240448, "learning_rate": 0.0001, "loss": 0.0091, "step": 106440 }, { "epoch": 700.328947368421, "grad_norm": 1.0677157640457153, "learning_rate": 0.0001, "loss": 0.0124, "step": 106450 }, { "epoch": 700.3947368421053, "grad_norm": 1.2199952602386475, "learning_rate": 0.0001, "loss": 0.0115, "step": 106460 }, { "epoch": 700.4605263157895, "grad_norm": 1.244336724281311, "learning_rate": 0.0001, "loss": 0.0118, "step": 106470 }, { "epoch": 700.5263157894736, "grad_norm": 1.1296188831329346, "learning_rate": 0.0001, "loss": 0.0084, "step": 106480 }, { "epoch": 700.5921052631579, "grad_norm": 1.4885672330856323, "learning_rate": 0.0001, "loss": 0.0101, "step": 106490 }, { "epoch": 700.6578947368421, "grad_norm": 1.0214803218841553, "learning_rate": 0.0001, "loss": 0.0094, "step": 106500 }, { "epoch": 700.7236842105264, "grad_norm": 1.2202999591827393, "learning_rate": 0.0001, "loss": 0.0081, "step": 106510 }, { "epoch": 700.7894736842105, "grad_norm": 0.9407332539558411, "learning_rate": 0.0001, "loss": 0.0122, "step": 106520 }, { "epoch": 700.8552631578947, "grad_norm": 0.8566510677337646, "learning_rate": 0.0001, "loss": 0.0089, "step": 106530 }, { "epoch": 700.921052631579, "grad_norm": 0.9324583411216736, "learning_rate": 0.0001, "loss": 0.011, "step": 106540 }, { "epoch": 700.9868421052631, "grad_norm": 1.044553279876709, "learning_rate": 0.0001, "loss": 0.0119, "step": 106550 }, { "epoch": 701.0526315789474, "grad_norm": 1.0946259498596191, "learning_rate": 0.0001, "loss": 0.0087, "step": 106560 }, { "epoch": 701.1184210526316, "grad_norm": 1.0644162893295288, "learning_rate": 0.0001, "loss": 0.0094, "step": 106570 }, { "epoch": 701.1842105263158, "grad_norm": 1.1168071031570435, "learning_rate": 0.0001, "loss": 0.0106, "step": 106580 }, { "epoch": 701.25, "grad_norm": 1.2700804471969604, "learning_rate": 0.0001, "loss": 0.0107, "step": 106590 }, { "epoch": 701.3157894736842, "grad_norm": 1.0045099258422852, "learning_rate": 0.0001, "loss": 0.0108, "step": 106600 }, { "epoch": 701.3815789473684, "grad_norm": 1.2132641077041626, "learning_rate": 0.0001, "loss": 0.0116, "step": 106610 }, { "epoch": 701.4473684210526, "grad_norm": 0.9734482169151306, "learning_rate": 0.0001, "loss": 0.0095, "step": 106620 }, { "epoch": 701.5131578947369, "grad_norm": 0.9646698832511902, "learning_rate": 0.0001, "loss": 0.012, "step": 106630 }, { "epoch": 701.578947368421, "grad_norm": 1.0157219171524048, "learning_rate": 0.0001, "loss": 0.0097, "step": 106640 }, { "epoch": 701.6447368421053, "grad_norm": 0.8552806973457336, "learning_rate": 0.0001, "loss": 0.0113, "step": 106650 }, { "epoch": 701.7105263157895, "grad_norm": 0.9953233003616333, "learning_rate": 0.0001, "loss": 0.0113, "step": 106660 }, { "epoch": 701.7763157894736, "grad_norm": 1.2354696989059448, "learning_rate": 0.0001, "loss": 0.0117, "step": 106670 }, { "epoch": 701.8421052631579, "grad_norm": 1.019411563873291, "learning_rate": 0.0001, "loss": 0.012, "step": 106680 }, { "epoch": 701.9078947368421, "grad_norm": 1.2094115018844604, "learning_rate": 0.0001, "loss": 0.0107, "step": 106690 }, { "epoch": 701.9736842105264, "grad_norm": 1.2663511037826538, "learning_rate": 0.0001, "loss": 0.0094, "step": 106700 }, { "epoch": 702.0394736842105, "grad_norm": 1.2119556665420532, "learning_rate": 0.0001, "loss": 0.0106, "step": 106710 }, { "epoch": 702.1052631578947, "grad_norm": 1.0109612941741943, "learning_rate": 0.0001, "loss": 0.0107, "step": 106720 }, { "epoch": 702.171052631579, "grad_norm": 1.126444697380066, "learning_rate": 0.0001, "loss": 0.0099, "step": 106730 }, { "epoch": 702.2368421052631, "grad_norm": 1.0462061166763306, "learning_rate": 0.0001, "loss": 0.0115, "step": 106740 }, { "epoch": 702.3026315789474, "grad_norm": 1.3411667346954346, "learning_rate": 0.0001, "loss": 0.011, "step": 106750 }, { "epoch": 702.3684210526316, "grad_norm": 1.215775728225708, "learning_rate": 0.0001, "loss": 0.0117, "step": 106760 }, { "epoch": 702.4342105263158, "grad_norm": 1.4780688285827637, "learning_rate": 0.0001, "loss": 0.011, "step": 106770 }, { "epoch": 702.5, "grad_norm": 1.3486248254776, "learning_rate": 0.0001, "loss": 0.0104, "step": 106780 }, { "epoch": 702.5657894736842, "grad_norm": 1.1051580905914307, "learning_rate": 0.0001, "loss": 0.0097, "step": 106790 }, { "epoch": 702.6315789473684, "grad_norm": 1.1159751415252686, "learning_rate": 0.0001, "loss": 0.0101, "step": 106800 }, { "epoch": 702.6973684210526, "grad_norm": 0.8592166900634766, "learning_rate": 0.0001, "loss": 0.0089, "step": 106810 }, { "epoch": 702.7631578947369, "grad_norm": 1.0782039165496826, "learning_rate": 0.0001, "loss": 0.0078, "step": 106820 }, { "epoch": 702.828947368421, "grad_norm": 1.10999596118927, "learning_rate": 0.0001, "loss": 0.0097, "step": 106830 }, { "epoch": 702.8947368421053, "grad_norm": 1.2560298442840576, "learning_rate": 0.0001, "loss": 0.0118, "step": 106840 }, { "epoch": 702.9605263157895, "grad_norm": 1.123194694519043, "learning_rate": 0.0001, "loss": 0.0087, "step": 106850 }, { "epoch": 703.0263157894736, "grad_norm": 1.0512282848358154, "learning_rate": 0.0001, "loss": 0.0123, "step": 106860 }, { "epoch": 703.0921052631579, "grad_norm": 1.1006485223770142, "learning_rate": 0.0001, "loss": 0.0093, "step": 106870 }, { "epoch": 703.1578947368421, "grad_norm": 0.8787376284599304, "learning_rate": 0.0001, "loss": 0.0102, "step": 106880 }, { "epoch": 703.2236842105264, "grad_norm": 1.178478479385376, "learning_rate": 0.0001, "loss": 0.0091, "step": 106890 }, { "epoch": 703.2894736842105, "grad_norm": 1.2921770811080933, "learning_rate": 0.0001, "loss": 0.01, "step": 106900 }, { "epoch": 703.3552631578947, "grad_norm": 1.2834266424179077, "learning_rate": 0.0001, "loss": 0.009, "step": 106910 }, { "epoch": 703.421052631579, "grad_norm": 1.2144252061843872, "learning_rate": 0.0001, "loss": 0.0101, "step": 106920 }, { "epoch": 703.4868421052631, "grad_norm": 1.4216984510421753, "learning_rate": 0.0001, "loss": 0.0101, "step": 106930 }, { "epoch": 703.5526315789474, "grad_norm": 1.094687581062317, "learning_rate": 0.0001, "loss": 0.0118, "step": 106940 }, { "epoch": 703.6184210526316, "grad_norm": 0.8012235164642334, "learning_rate": 0.0001, "loss": 0.0117, "step": 106950 }, { "epoch": 703.6842105263158, "grad_norm": 1.1857279539108276, "learning_rate": 0.0001, "loss": 0.0105, "step": 106960 }, { "epoch": 703.75, "grad_norm": 1.2665016651153564, "learning_rate": 0.0001, "loss": 0.0101, "step": 106970 }, { "epoch": 703.8157894736842, "grad_norm": 0.9577199220657349, "learning_rate": 0.0001, "loss": 0.0091, "step": 106980 }, { "epoch": 703.8815789473684, "grad_norm": 0.9756731986999512, "learning_rate": 0.0001, "loss": 0.0121, "step": 106990 }, { "epoch": 703.9473684210526, "grad_norm": 1.3572697639465332, "learning_rate": 0.0001, "loss": 0.0103, "step": 107000 }, { "epoch": 704.0131578947369, "grad_norm": 0.9276780486106873, "learning_rate": 0.0001, "loss": 0.0114, "step": 107010 }, { "epoch": 704.078947368421, "grad_norm": 0.9649149775505066, "learning_rate": 0.0001, "loss": 0.0095, "step": 107020 }, { "epoch": 704.1447368421053, "grad_norm": 0.9579582810401917, "learning_rate": 0.0001, "loss": 0.0092, "step": 107030 }, { "epoch": 704.2105263157895, "grad_norm": 1.0553480386734009, "learning_rate": 0.0001, "loss": 0.0113, "step": 107040 }, { "epoch": 704.2763157894736, "grad_norm": 1.0931501388549805, "learning_rate": 0.0001, "loss": 0.0101, "step": 107050 }, { "epoch": 704.3421052631579, "grad_norm": 1.0343230962753296, "learning_rate": 0.0001, "loss": 0.0086, "step": 107060 }, { "epoch": 704.4078947368421, "grad_norm": 1.1571636199951172, "learning_rate": 0.0001, "loss": 0.0109, "step": 107070 }, { "epoch": 704.4736842105264, "grad_norm": 1.0774343013763428, "learning_rate": 0.0001, "loss": 0.01, "step": 107080 }, { "epoch": 704.5394736842105, "grad_norm": 0.9952540397644043, "learning_rate": 0.0001, "loss": 0.0137, "step": 107090 }, { "epoch": 704.6052631578947, "grad_norm": 1.2612237930297852, "learning_rate": 0.0001, "loss": 0.0088, "step": 107100 }, { "epoch": 704.671052631579, "grad_norm": 1.1099755764007568, "learning_rate": 0.0001, "loss": 0.0119, "step": 107110 }, { "epoch": 704.7368421052631, "grad_norm": 1.1933823823928833, "learning_rate": 0.0001, "loss": 0.0096, "step": 107120 }, { "epoch": 704.8026315789474, "grad_norm": 0.8420557379722595, "learning_rate": 0.0001, "loss": 0.0114, "step": 107130 }, { "epoch": 704.8684210526316, "grad_norm": 0.7003465294837952, "learning_rate": 0.0001, "loss": 0.0093, "step": 107140 }, { "epoch": 704.9342105263158, "grad_norm": 1.202143907546997, "learning_rate": 0.0001, "loss": 0.0091, "step": 107150 }, { "epoch": 705.0, "grad_norm": 0.8362906575202942, "learning_rate": 0.0001, "loss": 0.0086, "step": 107160 }, { "epoch": 705.0657894736842, "grad_norm": 1.445649266242981, "learning_rate": 0.0001, "loss": 0.0104, "step": 107170 }, { "epoch": 705.1315789473684, "grad_norm": 1.4059619903564453, "learning_rate": 0.0001, "loss": 0.0097, "step": 107180 }, { "epoch": 705.1973684210526, "grad_norm": 1.178699016571045, "learning_rate": 0.0001, "loss": 0.01, "step": 107190 }, { "epoch": 705.2631578947369, "grad_norm": 1.397766351699829, "learning_rate": 0.0001, "loss": 0.0121, "step": 107200 }, { "epoch": 705.328947368421, "grad_norm": 1.5875097513198853, "learning_rate": 0.0001, "loss": 0.009, "step": 107210 }, { "epoch": 705.3947368421053, "grad_norm": 1.234609842300415, "learning_rate": 0.0001, "loss": 0.0105, "step": 107220 }, { "epoch": 705.4605263157895, "grad_norm": 1.3890899419784546, "learning_rate": 0.0001, "loss": 0.0082, "step": 107230 }, { "epoch": 705.5263157894736, "grad_norm": 1.17513906955719, "learning_rate": 0.0001, "loss": 0.0099, "step": 107240 }, { "epoch": 705.5921052631579, "grad_norm": 0.8957661986351013, "learning_rate": 0.0001, "loss": 0.0113, "step": 107250 }, { "epoch": 705.6578947368421, "grad_norm": 0.950888991355896, "learning_rate": 0.0001, "loss": 0.0095, "step": 107260 }, { "epoch": 705.7236842105264, "grad_norm": 1.2250102758407593, "learning_rate": 0.0001, "loss": 0.012, "step": 107270 }, { "epoch": 705.7894736842105, "grad_norm": 1.09373939037323, "learning_rate": 0.0001, "loss": 0.0097, "step": 107280 }, { "epoch": 705.8552631578947, "grad_norm": 1.0987354516983032, "learning_rate": 0.0001, "loss": 0.0085, "step": 107290 }, { "epoch": 705.921052631579, "grad_norm": 0.9803661704063416, "learning_rate": 0.0001, "loss": 0.0106, "step": 107300 }, { "epoch": 705.9868421052631, "grad_norm": 0.8128421306610107, "learning_rate": 0.0001, "loss": 0.0109, "step": 107310 }, { "epoch": 706.0526315789474, "grad_norm": 0.9702147841453552, "learning_rate": 0.0001, "loss": 0.0099, "step": 107320 }, { "epoch": 706.1184210526316, "grad_norm": 1.0280282497406006, "learning_rate": 0.0001, "loss": 0.0115, "step": 107330 }, { "epoch": 706.1842105263158, "grad_norm": 1.0692169666290283, "learning_rate": 0.0001, "loss": 0.0128, "step": 107340 }, { "epoch": 706.25, "grad_norm": 1.2259516716003418, "learning_rate": 0.0001, "loss": 0.01, "step": 107350 }, { "epoch": 706.3157894736842, "grad_norm": 1.2134467363357544, "learning_rate": 0.0001, "loss": 0.0097, "step": 107360 }, { "epoch": 706.3815789473684, "grad_norm": 1.4001606702804565, "learning_rate": 0.0001, "loss": 0.0094, "step": 107370 }, { "epoch": 706.4473684210526, "grad_norm": 0.8867390155792236, "learning_rate": 0.0001, "loss": 0.0106, "step": 107380 }, { "epoch": 706.5131578947369, "grad_norm": 1.1521034240722656, "learning_rate": 0.0001, "loss": 0.0104, "step": 107390 }, { "epoch": 706.578947368421, "grad_norm": 1.07185959815979, "learning_rate": 0.0001, "loss": 0.0098, "step": 107400 }, { "epoch": 706.6447368421053, "grad_norm": 0.9920993447303772, "learning_rate": 0.0001, "loss": 0.0093, "step": 107410 }, { "epoch": 706.7105263157895, "grad_norm": 1.0084408521652222, "learning_rate": 0.0001, "loss": 0.0097, "step": 107420 }, { "epoch": 706.7763157894736, "grad_norm": 1.267495036125183, "learning_rate": 0.0001, "loss": 0.0116, "step": 107430 }, { "epoch": 706.8421052631579, "grad_norm": 1.3698124885559082, "learning_rate": 0.0001, "loss": 0.0098, "step": 107440 }, { "epoch": 706.9078947368421, "grad_norm": 1.062820315361023, "learning_rate": 0.0001, "loss": 0.0115, "step": 107450 }, { "epoch": 706.9736842105264, "grad_norm": 1.5721125602722168, "learning_rate": 0.0001, "loss": 0.0099, "step": 107460 }, { "epoch": 707.0394736842105, "grad_norm": 1.1249845027923584, "learning_rate": 0.0001, "loss": 0.0111, "step": 107470 }, { "epoch": 707.1052631578947, "grad_norm": 1.3193303346633911, "learning_rate": 0.0001, "loss": 0.0107, "step": 107480 }, { "epoch": 707.171052631579, "grad_norm": 1.1248865127563477, "learning_rate": 0.0001, "loss": 0.0115, "step": 107490 }, { "epoch": 707.2368421052631, "grad_norm": 1.0584726333618164, "learning_rate": 0.0001, "loss": 0.0102, "step": 107500 }, { "epoch": 707.3026315789474, "grad_norm": 1.1499197483062744, "learning_rate": 0.0001, "loss": 0.0107, "step": 107510 }, { "epoch": 707.3684210526316, "grad_norm": 1.3799810409545898, "learning_rate": 0.0001, "loss": 0.0096, "step": 107520 }, { "epoch": 707.4342105263158, "grad_norm": 1.3305660486221313, "learning_rate": 0.0001, "loss": 0.009, "step": 107530 }, { "epoch": 707.5, "grad_norm": 1.3056739568710327, "learning_rate": 0.0001, "loss": 0.0097, "step": 107540 }, { "epoch": 707.5657894736842, "grad_norm": 0.9053949117660522, "learning_rate": 0.0001, "loss": 0.0089, "step": 107550 }, { "epoch": 707.6315789473684, "grad_norm": 0.6791939735412598, "learning_rate": 0.0001, "loss": 0.0106, "step": 107560 }, { "epoch": 707.6973684210526, "grad_norm": 1.0320830345153809, "learning_rate": 0.0001, "loss": 0.0104, "step": 107570 }, { "epoch": 707.7631578947369, "grad_norm": 0.8447424173355103, "learning_rate": 0.0001, "loss": 0.0087, "step": 107580 }, { "epoch": 707.828947368421, "grad_norm": 0.9377061724662781, "learning_rate": 0.0001, "loss": 0.012, "step": 107590 }, { "epoch": 707.8947368421053, "grad_norm": 1.2281724214553833, "learning_rate": 0.0001, "loss": 0.0095, "step": 107600 }, { "epoch": 707.9605263157895, "grad_norm": 1.1441106796264648, "learning_rate": 0.0001, "loss": 0.011, "step": 107610 }, { "epoch": 708.0263157894736, "grad_norm": 1.3841345310211182, "learning_rate": 0.0001, "loss": 0.0109, "step": 107620 }, { "epoch": 708.0921052631579, "grad_norm": 1.2582640647888184, "learning_rate": 0.0001, "loss": 0.0108, "step": 107630 }, { "epoch": 708.1578947368421, "grad_norm": 1.3789803981781006, "learning_rate": 0.0001, "loss": 0.0101, "step": 107640 }, { "epoch": 708.2236842105264, "grad_norm": 1.2278392314910889, "learning_rate": 0.0001, "loss": 0.0112, "step": 107650 }, { "epoch": 708.2894736842105, "grad_norm": 1.1727294921875, "learning_rate": 0.0001, "loss": 0.0112, "step": 107660 }, { "epoch": 708.3552631578947, "grad_norm": 0.9205034375190735, "learning_rate": 0.0001, "loss": 0.0103, "step": 107670 }, { "epoch": 708.421052631579, "grad_norm": 0.9984912872314453, "learning_rate": 0.0001, "loss": 0.0108, "step": 107680 }, { "epoch": 708.4868421052631, "grad_norm": 0.7507139444351196, "learning_rate": 0.0001, "loss": 0.0095, "step": 107690 }, { "epoch": 708.5526315789474, "grad_norm": 1.054671287536621, "learning_rate": 0.0001, "loss": 0.0095, "step": 107700 }, { "epoch": 708.6184210526316, "grad_norm": 0.9641456604003906, "learning_rate": 0.0001, "loss": 0.0107, "step": 107710 }, { "epoch": 708.6842105263158, "grad_norm": 1.3411998748779297, "learning_rate": 0.0001, "loss": 0.0092, "step": 107720 }, { "epoch": 708.75, "grad_norm": 0.993855357170105, "learning_rate": 0.0001, "loss": 0.011, "step": 107730 }, { "epoch": 708.8157894736842, "grad_norm": 0.6834460496902466, "learning_rate": 0.0001, "loss": 0.0096, "step": 107740 }, { "epoch": 708.8815789473684, "grad_norm": 0.7746645212173462, "learning_rate": 0.0001, "loss": 0.0111, "step": 107750 }, { "epoch": 708.9473684210526, "grad_norm": 0.9106453061103821, "learning_rate": 0.0001, "loss": 0.0094, "step": 107760 }, { "epoch": 709.0131578947369, "grad_norm": 1.1743462085723877, "learning_rate": 0.0001, "loss": 0.0107, "step": 107770 }, { "epoch": 709.078947368421, "grad_norm": 1.0879472494125366, "learning_rate": 0.0001, "loss": 0.0104, "step": 107780 }, { "epoch": 709.1447368421053, "grad_norm": 0.9474685192108154, "learning_rate": 0.0001, "loss": 0.0112, "step": 107790 }, { "epoch": 709.2105263157895, "grad_norm": 1.2010844945907593, "learning_rate": 0.0001, "loss": 0.0106, "step": 107800 }, { "epoch": 709.2763157894736, "grad_norm": 1.3946421146392822, "learning_rate": 0.0001, "loss": 0.0092, "step": 107810 }, { "epoch": 709.3421052631579, "grad_norm": 1.2751479148864746, "learning_rate": 0.0001, "loss": 0.0101, "step": 107820 }, { "epoch": 709.4078947368421, "grad_norm": 1.0376313924789429, "learning_rate": 0.0001, "loss": 0.0099, "step": 107830 }, { "epoch": 709.4736842105264, "grad_norm": 0.96278977394104, "learning_rate": 0.0001, "loss": 0.0114, "step": 107840 }, { "epoch": 709.5394736842105, "grad_norm": 0.7817369699478149, "learning_rate": 0.0001, "loss": 0.0097, "step": 107850 }, { "epoch": 709.6052631578947, "grad_norm": 1.010752558708191, "learning_rate": 0.0001, "loss": 0.0095, "step": 107860 }, { "epoch": 709.671052631579, "grad_norm": 1.0647060871124268, "learning_rate": 0.0001, "loss": 0.0114, "step": 107870 }, { "epoch": 709.7368421052631, "grad_norm": 0.7408843636512756, "learning_rate": 0.0001, "loss": 0.0104, "step": 107880 }, { "epoch": 709.8026315789474, "grad_norm": 0.9281281232833862, "learning_rate": 0.0001, "loss": 0.0106, "step": 107890 }, { "epoch": 709.8684210526316, "grad_norm": 1.3812792301177979, "learning_rate": 0.0001, "loss": 0.0118, "step": 107900 }, { "epoch": 709.9342105263158, "grad_norm": 1.2687792778015137, "learning_rate": 0.0001, "loss": 0.0108, "step": 107910 }, { "epoch": 710.0, "grad_norm": 0.8348847031593323, "learning_rate": 0.0001, "loss": 0.0108, "step": 107920 }, { "epoch": 710.0657894736842, "grad_norm": 1.199946641921997, "learning_rate": 0.0001, "loss": 0.0097, "step": 107930 }, { "epoch": 710.1315789473684, "grad_norm": 0.9090976119041443, "learning_rate": 0.0001, "loss": 0.011, "step": 107940 }, { "epoch": 710.1973684210526, "grad_norm": 1.042677879333496, "learning_rate": 0.0001, "loss": 0.0105, "step": 107950 }, { "epoch": 710.2631578947369, "grad_norm": 1.2170066833496094, "learning_rate": 0.0001, "loss": 0.0121, "step": 107960 }, { "epoch": 710.328947368421, "grad_norm": 1.3705898523330688, "learning_rate": 0.0001, "loss": 0.0116, "step": 107970 }, { "epoch": 710.3947368421053, "grad_norm": 1.2710314989089966, "learning_rate": 0.0001, "loss": 0.0126, "step": 107980 }, { "epoch": 710.4605263157895, "grad_norm": 1.175453543663025, "learning_rate": 0.0001, "loss": 0.0132, "step": 107990 }, { "epoch": 710.5263157894736, "grad_norm": 0.9529065489768982, "learning_rate": 0.0001, "loss": 0.0126, "step": 108000 }, { "epoch": 710.5921052631579, "grad_norm": 1.1746102571487427, "learning_rate": 0.0001, "loss": 0.0137, "step": 108010 }, { "epoch": 710.6578947368421, "grad_norm": 1.0855262279510498, "learning_rate": 0.0001, "loss": 0.0111, "step": 108020 }, { "epoch": 710.7236842105264, "grad_norm": 1.1520380973815918, "learning_rate": 0.0001, "loss": 0.0138, "step": 108030 }, { "epoch": 710.7894736842105, "grad_norm": 0.9170784950256348, "learning_rate": 0.0001, "loss": 0.0132, "step": 108040 }, { "epoch": 710.8552631578947, "grad_norm": 1.1136395931243896, "learning_rate": 0.0001, "loss": 0.0131, "step": 108050 }, { "epoch": 710.921052631579, "grad_norm": 1.207287073135376, "learning_rate": 0.0001, "loss": 0.0149, "step": 108060 }, { "epoch": 710.9868421052631, "grad_norm": 1.001600980758667, "learning_rate": 0.0001, "loss": 0.0137, "step": 108070 }, { "epoch": 711.0526315789474, "grad_norm": 0.9510000348091125, "learning_rate": 0.0001, "loss": 0.0125, "step": 108080 }, { "epoch": 711.1184210526316, "grad_norm": 0.7180987000465393, "learning_rate": 0.0001, "loss": 0.0114, "step": 108090 }, { "epoch": 711.1842105263158, "grad_norm": 0.977580189704895, "learning_rate": 0.0001, "loss": 0.0137, "step": 108100 }, { "epoch": 711.25, "grad_norm": 0.843988299369812, "learning_rate": 0.0001, "loss": 0.0109, "step": 108110 }, { "epoch": 711.3157894736842, "grad_norm": 1.3870481252670288, "learning_rate": 0.0001, "loss": 0.0126, "step": 108120 }, { "epoch": 711.3815789473684, "grad_norm": 1.0824686288833618, "learning_rate": 0.0001, "loss": 0.011, "step": 108130 }, { "epoch": 711.4473684210526, "grad_norm": 1.089383602142334, "learning_rate": 0.0001, "loss": 0.0118, "step": 108140 }, { "epoch": 711.5131578947369, "grad_norm": 0.9979451298713684, "learning_rate": 0.0001, "loss": 0.0118, "step": 108150 }, { "epoch": 711.578947368421, "grad_norm": 0.9893368482589722, "learning_rate": 0.0001, "loss": 0.0105, "step": 108160 }, { "epoch": 711.6447368421053, "grad_norm": 1.0383979082107544, "learning_rate": 0.0001, "loss": 0.0104, "step": 108170 }, { "epoch": 711.7105263157895, "grad_norm": 1.280486822128296, "learning_rate": 0.0001, "loss": 0.0109, "step": 108180 }, { "epoch": 711.7763157894736, "grad_norm": 1.3183673620224, "learning_rate": 0.0001, "loss": 0.0103, "step": 108190 }, { "epoch": 711.8421052631579, "grad_norm": 1.2419326305389404, "learning_rate": 0.0001, "loss": 0.0143, "step": 108200 }, { "epoch": 711.9078947368421, "grad_norm": 1.3799538612365723, "learning_rate": 0.0001, "loss": 0.011, "step": 108210 }, { "epoch": 711.9736842105264, "grad_norm": 0.9520628452301025, "learning_rate": 0.0001, "loss": 0.0112, "step": 108220 }, { "epoch": 712.0394736842105, "grad_norm": 0.9893721342086792, "learning_rate": 0.0001, "loss": 0.0116, "step": 108230 }, { "epoch": 712.1052631578947, "grad_norm": 1.260324239730835, "learning_rate": 0.0001, "loss": 0.0098, "step": 108240 }, { "epoch": 712.171052631579, "grad_norm": 1.377862572669983, "learning_rate": 0.0001, "loss": 0.0111, "step": 108250 }, { "epoch": 712.2368421052631, "grad_norm": 1.4345449209213257, "learning_rate": 0.0001, "loss": 0.0114, "step": 108260 }, { "epoch": 712.3026315789474, "grad_norm": 1.0738847255706787, "learning_rate": 0.0001, "loss": 0.0099, "step": 108270 }, { "epoch": 712.3684210526316, "grad_norm": 1.1670902967453003, "learning_rate": 0.0001, "loss": 0.0123, "step": 108280 }, { "epoch": 712.4342105263158, "grad_norm": 1.362013578414917, "learning_rate": 0.0001, "loss": 0.0112, "step": 108290 }, { "epoch": 712.5, "grad_norm": 1.2504547834396362, "learning_rate": 0.0001, "loss": 0.0108, "step": 108300 }, { "epoch": 712.5657894736842, "grad_norm": 1.6184645891189575, "learning_rate": 0.0001, "loss": 0.01, "step": 108310 }, { "epoch": 712.6315789473684, "grad_norm": 1.2186678647994995, "learning_rate": 0.0001, "loss": 0.0111, "step": 108320 }, { "epoch": 712.6973684210526, "grad_norm": 1.0406875610351562, "learning_rate": 0.0001, "loss": 0.0096, "step": 108330 }, { "epoch": 712.7631578947369, "grad_norm": 0.9314107894897461, "learning_rate": 0.0001, "loss": 0.0099, "step": 108340 }, { "epoch": 712.828947368421, "grad_norm": 0.996528148651123, "learning_rate": 0.0001, "loss": 0.0096, "step": 108350 }, { "epoch": 712.8947368421053, "grad_norm": 0.9486305117607117, "learning_rate": 0.0001, "loss": 0.0099, "step": 108360 }, { "epoch": 712.9605263157895, "grad_norm": 0.9168351292610168, "learning_rate": 0.0001, "loss": 0.0111, "step": 108370 }, { "epoch": 713.0263157894736, "grad_norm": 0.9039626717567444, "learning_rate": 0.0001, "loss": 0.0097, "step": 108380 }, { "epoch": 713.0921052631579, "grad_norm": 0.9132272601127625, "learning_rate": 0.0001, "loss": 0.0124, "step": 108390 }, { "epoch": 713.1578947368421, "grad_norm": 0.9673351049423218, "learning_rate": 0.0001, "loss": 0.0105, "step": 108400 }, { "epoch": 713.2236842105264, "grad_norm": 0.8779450058937073, "learning_rate": 0.0001, "loss": 0.0106, "step": 108410 }, { "epoch": 713.2894736842105, "grad_norm": 1.4076229333877563, "learning_rate": 0.0001, "loss": 0.0093, "step": 108420 }, { "epoch": 713.3552631578947, "grad_norm": 1.1083792448043823, "learning_rate": 0.0001, "loss": 0.0097, "step": 108430 }, { "epoch": 713.421052631579, "grad_norm": 1.1614620685577393, "learning_rate": 0.0001, "loss": 0.0103, "step": 108440 }, { "epoch": 713.4868421052631, "grad_norm": 1.1655492782592773, "learning_rate": 0.0001, "loss": 0.0104, "step": 108450 }, { "epoch": 713.5526315789474, "grad_norm": 0.9747868776321411, "learning_rate": 0.0001, "loss": 0.0105, "step": 108460 }, { "epoch": 713.6184210526316, "grad_norm": 1.1151975393295288, "learning_rate": 0.0001, "loss": 0.0105, "step": 108470 }, { "epoch": 713.6842105263158, "grad_norm": 1.0362054109573364, "learning_rate": 0.0001, "loss": 0.0108, "step": 108480 }, { "epoch": 713.75, "grad_norm": 1.196274757385254, "learning_rate": 0.0001, "loss": 0.0102, "step": 108490 }, { "epoch": 713.8157894736842, "grad_norm": 0.6207993030548096, "learning_rate": 0.0001, "loss": 0.0102, "step": 108500 }, { "epoch": 713.8815789473684, "grad_norm": 0.8537484407424927, "learning_rate": 0.0001, "loss": 0.0106, "step": 108510 }, { "epoch": 713.9473684210526, "grad_norm": 0.7432962656021118, "learning_rate": 0.0001, "loss": 0.0097, "step": 108520 }, { "epoch": 714.0131578947369, "grad_norm": 1.1963402032852173, "learning_rate": 0.0001, "loss": 0.0102, "step": 108530 }, { "epoch": 714.078947368421, "grad_norm": 1.0920330286026, "learning_rate": 0.0001, "loss": 0.0086, "step": 108540 }, { "epoch": 714.1447368421053, "grad_norm": 0.5513865351676941, "learning_rate": 0.0001, "loss": 0.0115, "step": 108550 }, { "epoch": 714.2105263157895, "grad_norm": 1.2096713781356812, "learning_rate": 0.0001, "loss": 0.0099, "step": 108560 }, { "epoch": 714.2763157894736, "grad_norm": 0.7468422651290894, "learning_rate": 0.0001, "loss": 0.0105, "step": 108570 }, { "epoch": 714.3421052631579, "grad_norm": 1.296449899673462, "learning_rate": 0.0001, "loss": 0.0107, "step": 108580 }, { "epoch": 714.4078947368421, "grad_norm": 1.2958561182022095, "learning_rate": 0.0001, "loss": 0.0108, "step": 108590 }, { "epoch": 714.4736842105264, "grad_norm": 1.0128918886184692, "learning_rate": 0.0001, "loss": 0.0095, "step": 108600 }, { "epoch": 714.5394736842105, "grad_norm": 0.9160696864128113, "learning_rate": 0.0001, "loss": 0.0086, "step": 108610 }, { "epoch": 714.6052631578947, "grad_norm": 0.9371721148490906, "learning_rate": 0.0001, "loss": 0.0104, "step": 108620 }, { "epoch": 714.671052631579, "grad_norm": 0.8780196905136108, "learning_rate": 0.0001, "loss": 0.0093, "step": 108630 }, { "epoch": 714.7368421052631, "grad_norm": 0.8234481811523438, "learning_rate": 0.0001, "loss": 0.0127, "step": 108640 }, { "epoch": 714.8026315789474, "grad_norm": 0.9354177713394165, "learning_rate": 0.0001, "loss": 0.0107, "step": 108650 }, { "epoch": 714.8684210526316, "grad_norm": 1.4554506540298462, "learning_rate": 0.0001, "loss": 0.0097, "step": 108660 }, { "epoch": 714.9342105263158, "grad_norm": 0.8353884220123291, "learning_rate": 0.0001, "loss": 0.0118, "step": 108670 }, { "epoch": 715.0, "grad_norm": 1.3016752004623413, "learning_rate": 0.0001, "loss": 0.0114, "step": 108680 }, { "epoch": 715.0657894736842, "grad_norm": 1.0093426704406738, "learning_rate": 0.0001, "loss": 0.0097, "step": 108690 }, { "epoch": 715.1315789473684, "grad_norm": 0.8471440672874451, "learning_rate": 0.0001, "loss": 0.0112, "step": 108700 }, { "epoch": 715.1973684210526, "grad_norm": 1.381130576133728, "learning_rate": 0.0001, "loss": 0.0096, "step": 108710 }, { "epoch": 715.2631578947369, "grad_norm": 1.0571134090423584, "learning_rate": 0.0001, "loss": 0.0109, "step": 108720 }, { "epoch": 715.328947368421, "grad_norm": 1.1896861791610718, "learning_rate": 0.0001, "loss": 0.0109, "step": 108730 }, { "epoch": 715.3947368421053, "grad_norm": 0.7548841834068298, "learning_rate": 0.0001, "loss": 0.0102, "step": 108740 }, { "epoch": 715.4605263157895, "grad_norm": 0.652093231678009, "learning_rate": 0.0001, "loss": 0.0099, "step": 108750 }, { "epoch": 715.5263157894736, "grad_norm": 0.6288610696792603, "learning_rate": 0.0001, "loss": 0.0093, "step": 108760 }, { "epoch": 715.5921052631579, "grad_norm": 0.9039884209632874, "learning_rate": 0.0001, "loss": 0.0098, "step": 108770 }, { "epoch": 715.6578947368421, "grad_norm": 0.7594443559646606, "learning_rate": 0.0001, "loss": 0.0118, "step": 108780 }, { "epoch": 715.7236842105264, "grad_norm": 1.3326084613800049, "learning_rate": 0.0001, "loss": 0.0097, "step": 108790 }, { "epoch": 715.7894736842105, "grad_norm": 0.7681350708007812, "learning_rate": 0.0001, "loss": 0.0105, "step": 108800 }, { "epoch": 715.8552631578947, "grad_norm": 0.6910384893417358, "learning_rate": 0.0001, "loss": 0.0088, "step": 108810 }, { "epoch": 715.921052631579, "grad_norm": 0.9494452476501465, "learning_rate": 0.0001, "loss": 0.0118, "step": 108820 }, { "epoch": 715.9868421052631, "grad_norm": 1.248620867729187, "learning_rate": 0.0001, "loss": 0.0103, "step": 108830 }, { "epoch": 716.0526315789474, "grad_norm": 1.2778465747833252, "learning_rate": 0.0001, "loss": 0.0092, "step": 108840 }, { "epoch": 716.1184210526316, "grad_norm": 1.229967474937439, "learning_rate": 0.0001, "loss": 0.0108, "step": 108850 }, { "epoch": 716.1842105263158, "grad_norm": 1.2221544981002808, "learning_rate": 0.0001, "loss": 0.0105, "step": 108860 }, { "epoch": 716.25, "grad_norm": 0.9114399552345276, "learning_rate": 0.0001, "loss": 0.011, "step": 108870 }, { "epoch": 716.3157894736842, "grad_norm": 1.2433297634124756, "learning_rate": 0.0001, "loss": 0.0098, "step": 108880 }, { "epoch": 716.3815789473684, "grad_norm": 1.0392719507217407, "learning_rate": 0.0001, "loss": 0.0095, "step": 108890 }, { "epoch": 716.4473684210526, "grad_norm": 1.3560720682144165, "learning_rate": 0.0001, "loss": 0.0119, "step": 108900 }, { "epoch": 716.5131578947369, "grad_norm": 1.7873417139053345, "learning_rate": 0.0001, "loss": 0.0118, "step": 108910 }, { "epoch": 716.578947368421, "grad_norm": 1.2388242483139038, "learning_rate": 0.0001, "loss": 0.0105, "step": 108920 }, { "epoch": 716.6447368421053, "grad_norm": 1.307459831237793, "learning_rate": 0.0001, "loss": 0.0087, "step": 108930 }, { "epoch": 716.7105263157895, "grad_norm": 1.3190605640411377, "learning_rate": 0.0001, "loss": 0.0096, "step": 108940 }, { "epoch": 716.7763157894736, "grad_norm": 1.2416794300079346, "learning_rate": 0.0001, "loss": 0.0093, "step": 108950 }, { "epoch": 716.8421052631579, "grad_norm": 1.4050918817520142, "learning_rate": 0.0001, "loss": 0.0098, "step": 108960 }, { "epoch": 716.9078947368421, "grad_norm": 0.8353002667427063, "learning_rate": 0.0001, "loss": 0.0096, "step": 108970 }, { "epoch": 716.9736842105264, "grad_norm": 1.285539984703064, "learning_rate": 0.0001, "loss": 0.0098, "step": 108980 }, { "epoch": 717.0394736842105, "grad_norm": 0.93118816614151, "learning_rate": 0.0001, "loss": 0.0102, "step": 108990 }, { "epoch": 717.1052631578947, "grad_norm": 1.2005360126495361, "learning_rate": 0.0001, "loss": 0.0088, "step": 109000 }, { "epoch": 717.171052631579, "grad_norm": 1.0540269613265991, "learning_rate": 0.0001, "loss": 0.0106, "step": 109010 }, { "epoch": 717.2368421052631, "grad_norm": 1.0876415967941284, "learning_rate": 0.0001, "loss": 0.0099, "step": 109020 }, { "epoch": 717.3026315789474, "grad_norm": 1.1824675798416138, "learning_rate": 0.0001, "loss": 0.0085, "step": 109030 }, { "epoch": 717.3684210526316, "grad_norm": 0.9963378310203552, "learning_rate": 0.0001, "loss": 0.0105, "step": 109040 }, { "epoch": 717.4342105263158, "grad_norm": 1.1265755891799927, "learning_rate": 0.0001, "loss": 0.0111, "step": 109050 }, { "epoch": 717.5, "grad_norm": 0.9112656712532043, "learning_rate": 0.0001, "loss": 0.0104, "step": 109060 }, { "epoch": 717.5657894736842, "grad_norm": 0.6385452151298523, "learning_rate": 0.0001, "loss": 0.0111, "step": 109070 }, { "epoch": 717.6315789473684, "grad_norm": 1.0386406183242798, "learning_rate": 0.0001, "loss": 0.0094, "step": 109080 }, { "epoch": 717.6973684210526, "grad_norm": 0.5495930314064026, "learning_rate": 0.0001, "loss": 0.0102, "step": 109090 }, { "epoch": 717.7631578947369, "grad_norm": 1.1156800985336304, "learning_rate": 0.0001, "loss": 0.01, "step": 109100 }, { "epoch": 717.828947368421, "grad_norm": 1.2771714925765991, "learning_rate": 0.0001, "loss": 0.0111, "step": 109110 }, { "epoch": 717.8947368421053, "grad_norm": 1.0908472537994385, "learning_rate": 0.0001, "loss": 0.0114, "step": 109120 }, { "epoch": 717.9605263157895, "grad_norm": 0.8165896534919739, "learning_rate": 0.0001, "loss": 0.0082, "step": 109130 }, { "epoch": 718.0263157894736, "grad_norm": 1.1818957328796387, "learning_rate": 0.0001, "loss": 0.0093, "step": 109140 }, { "epoch": 718.0921052631579, "grad_norm": 1.2370299100875854, "learning_rate": 0.0001, "loss": 0.0087, "step": 109150 }, { "epoch": 718.1578947368421, "grad_norm": 1.1179412603378296, "learning_rate": 0.0001, "loss": 0.0112, "step": 109160 }, { "epoch": 718.2236842105264, "grad_norm": 1.040429711341858, "learning_rate": 0.0001, "loss": 0.0095, "step": 109170 }, { "epoch": 718.2894736842105, "grad_norm": 1.090136170387268, "learning_rate": 0.0001, "loss": 0.0098, "step": 109180 }, { "epoch": 718.3552631578947, "grad_norm": 1.2007248401641846, "learning_rate": 0.0001, "loss": 0.0093, "step": 109190 }, { "epoch": 718.421052631579, "grad_norm": 1.092756986618042, "learning_rate": 0.0001, "loss": 0.0093, "step": 109200 }, { "epoch": 718.4868421052631, "grad_norm": 0.8665720820426941, "learning_rate": 0.0001, "loss": 0.0102, "step": 109210 }, { "epoch": 718.5526315789474, "grad_norm": 1.117387056350708, "learning_rate": 0.0001, "loss": 0.0106, "step": 109220 }, { "epoch": 718.6184210526316, "grad_norm": 1.2737882137298584, "learning_rate": 0.0001, "loss": 0.01, "step": 109230 }, { "epoch": 718.6842105263158, "grad_norm": 0.8910441398620605, "learning_rate": 0.0001, "loss": 0.0101, "step": 109240 }, { "epoch": 718.75, "grad_norm": 1.0049889087677002, "learning_rate": 0.0001, "loss": 0.0131, "step": 109250 }, { "epoch": 718.8157894736842, "grad_norm": 1.1752616167068481, "learning_rate": 0.0001, "loss": 0.0122, "step": 109260 }, { "epoch": 718.8815789473684, "grad_norm": 0.8606551885604858, "learning_rate": 0.0001, "loss": 0.0103, "step": 109270 }, { "epoch": 718.9473684210526, "grad_norm": 0.8159741163253784, "learning_rate": 0.0001, "loss": 0.0101, "step": 109280 }, { "epoch": 719.0131578947369, "grad_norm": 1.0030544996261597, "learning_rate": 0.0001, "loss": 0.0089, "step": 109290 }, { "epoch": 719.078947368421, "grad_norm": 0.8993750810623169, "learning_rate": 0.0001, "loss": 0.0092, "step": 109300 }, { "epoch": 719.1447368421053, "grad_norm": 1.2870763540267944, "learning_rate": 0.0001, "loss": 0.009, "step": 109310 }, { "epoch": 719.2105263157895, "grad_norm": 0.9233137369155884, "learning_rate": 0.0001, "loss": 0.0091, "step": 109320 }, { "epoch": 719.2763157894736, "grad_norm": 0.928329586982727, "learning_rate": 0.0001, "loss": 0.0094, "step": 109330 }, { "epoch": 719.3421052631579, "grad_norm": 1.117807388305664, "learning_rate": 0.0001, "loss": 0.0104, "step": 109340 }, { "epoch": 719.4078947368421, "grad_norm": 1.040723204612732, "learning_rate": 0.0001, "loss": 0.0099, "step": 109350 }, { "epoch": 719.4736842105264, "grad_norm": 1.066552996635437, "learning_rate": 0.0001, "loss": 0.0094, "step": 109360 }, { "epoch": 719.5394736842105, "grad_norm": 0.8651625514030457, "learning_rate": 0.0001, "loss": 0.0098, "step": 109370 }, { "epoch": 719.6052631578947, "grad_norm": 1.3908514976501465, "learning_rate": 0.0001, "loss": 0.0099, "step": 109380 }, { "epoch": 719.671052631579, "grad_norm": 1.2088861465454102, "learning_rate": 0.0001, "loss": 0.0105, "step": 109390 }, { "epoch": 719.7368421052631, "grad_norm": 1.2017627954483032, "learning_rate": 0.0001, "loss": 0.0111, "step": 109400 }, { "epoch": 719.8026315789474, "grad_norm": 1.2163819074630737, "learning_rate": 0.0001, "loss": 0.0089, "step": 109410 }, { "epoch": 719.8684210526316, "grad_norm": 0.8632364273071289, "learning_rate": 0.0001, "loss": 0.0128, "step": 109420 }, { "epoch": 719.9342105263158, "grad_norm": 1.2523313760757446, "learning_rate": 0.0001, "loss": 0.0126, "step": 109430 }, { "epoch": 720.0, "grad_norm": 1.2396034002304077, "learning_rate": 0.0001, "loss": 0.0121, "step": 109440 }, { "epoch": 720.0657894736842, "grad_norm": 1.3992606401443481, "learning_rate": 0.0001, "loss": 0.0121, "step": 109450 }, { "epoch": 720.1315789473684, "grad_norm": 1.1208072900772095, "learning_rate": 0.0001, "loss": 0.0106, "step": 109460 }, { "epoch": 720.1973684210526, "grad_norm": 1.3922314643859863, "learning_rate": 0.0001, "loss": 0.0101, "step": 109470 }, { "epoch": 720.2631578947369, "grad_norm": 1.2266921997070312, "learning_rate": 0.0001, "loss": 0.01, "step": 109480 }, { "epoch": 720.328947368421, "grad_norm": 0.8874131441116333, "learning_rate": 0.0001, "loss": 0.0111, "step": 109490 }, { "epoch": 720.3947368421053, "grad_norm": 0.9725931882858276, "learning_rate": 0.0001, "loss": 0.0101, "step": 109500 }, { "epoch": 720.4605263157895, "grad_norm": 1.0284030437469482, "learning_rate": 0.0001, "loss": 0.0113, "step": 109510 }, { "epoch": 720.5263157894736, "grad_norm": 0.9212563633918762, "learning_rate": 0.0001, "loss": 0.0107, "step": 109520 }, { "epoch": 720.5921052631579, "grad_norm": 1.033521294593811, "learning_rate": 0.0001, "loss": 0.0104, "step": 109530 }, { "epoch": 720.6578947368421, "grad_norm": 0.933686375617981, "learning_rate": 0.0001, "loss": 0.0101, "step": 109540 }, { "epoch": 720.7236842105264, "grad_norm": 1.2563905715942383, "learning_rate": 0.0001, "loss": 0.01, "step": 109550 }, { "epoch": 720.7894736842105, "grad_norm": 1.322197675704956, "learning_rate": 0.0001, "loss": 0.0104, "step": 109560 }, { "epoch": 720.8552631578947, "grad_norm": 1.221634864807129, "learning_rate": 0.0001, "loss": 0.0102, "step": 109570 }, { "epoch": 720.921052631579, "grad_norm": 1.2580702304840088, "learning_rate": 0.0001, "loss": 0.0095, "step": 109580 }, { "epoch": 720.9868421052631, "grad_norm": 0.6149132251739502, "learning_rate": 0.0001, "loss": 0.0109, "step": 109590 }, { "epoch": 721.0526315789474, "grad_norm": 1.1910752058029175, "learning_rate": 0.0001, "loss": 0.012, "step": 109600 }, { "epoch": 721.1184210526316, "grad_norm": 0.7073631286621094, "learning_rate": 0.0001, "loss": 0.0096, "step": 109610 }, { "epoch": 721.1842105263158, "grad_norm": 1.3418338298797607, "learning_rate": 0.0001, "loss": 0.0103, "step": 109620 }, { "epoch": 721.25, "grad_norm": 1.2676738500595093, "learning_rate": 0.0001, "loss": 0.01, "step": 109630 }, { "epoch": 721.3157894736842, "grad_norm": 1.0815623998641968, "learning_rate": 0.0001, "loss": 0.0099, "step": 109640 }, { "epoch": 721.3815789473684, "grad_norm": 0.6487982273101807, "learning_rate": 0.0001, "loss": 0.0106, "step": 109650 }, { "epoch": 721.4473684210526, "grad_norm": 1.0579499006271362, "learning_rate": 0.0001, "loss": 0.0109, "step": 109660 }, { "epoch": 721.5131578947369, "grad_norm": 1.0880662202835083, "learning_rate": 0.0001, "loss": 0.0098, "step": 109670 }, { "epoch": 721.578947368421, "grad_norm": 1.020585298538208, "learning_rate": 0.0001, "loss": 0.0108, "step": 109680 }, { "epoch": 721.6447368421053, "grad_norm": 0.9497626423835754, "learning_rate": 0.0001, "loss": 0.0117, "step": 109690 }, { "epoch": 721.7105263157895, "grad_norm": 1.3067365884780884, "learning_rate": 0.0001, "loss": 0.0101, "step": 109700 }, { "epoch": 721.7763157894736, "grad_norm": 1.3649437427520752, "learning_rate": 0.0001, "loss": 0.0101, "step": 109710 }, { "epoch": 721.8421052631579, "grad_norm": 1.4548853635787964, "learning_rate": 0.0001, "loss": 0.0102, "step": 109720 }, { "epoch": 721.9078947368421, "grad_norm": 1.0740547180175781, "learning_rate": 0.0001, "loss": 0.0108, "step": 109730 }, { "epoch": 721.9736842105264, "grad_norm": 1.0808460712432861, "learning_rate": 0.0001, "loss": 0.0107, "step": 109740 }, { "epoch": 722.0394736842105, "grad_norm": 0.738896906375885, "learning_rate": 0.0001, "loss": 0.0093, "step": 109750 }, { "epoch": 722.1052631578947, "grad_norm": 1.3755104541778564, "learning_rate": 0.0001, "loss": 0.0121, "step": 109760 }, { "epoch": 722.171052631579, "grad_norm": 1.0921390056610107, "learning_rate": 0.0001, "loss": 0.01, "step": 109770 }, { "epoch": 722.2368421052631, "grad_norm": 1.5075722932815552, "learning_rate": 0.0001, "loss": 0.0114, "step": 109780 }, { "epoch": 722.3026315789474, "grad_norm": 1.332755446434021, "learning_rate": 0.0001, "loss": 0.0108, "step": 109790 }, { "epoch": 722.3684210526316, "grad_norm": 0.9924520254135132, "learning_rate": 0.0001, "loss": 0.0108, "step": 109800 }, { "epoch": 722.4342105263158, "grad_norm": 1.1538974046707153, "learning_rate": 0.0001, "loss": 0.0096, "step": 109810 }, { "epoch": 722.5, "grad_norm": 0.8574804663658142, "learning_rate": 0.0001, "loss": 0.0087, "step": 109820 }, { "epoch": 722.5657894736842, "grad_norm": 1.1876112222671509, "learning_rate": 0.0001, "loss": 0.0089, "step": 109830 }, { "epoch": 722.6315789473684, "grad_norm": 1.1805399656295776, "learning_rate": 0.0001, "loss": 0.0093, "step": 109840 }, { "epoch": 722.6973684210526, "grad_norm": 1.1132640838623047, "learning_rate": 0.0001, "loss": 0.0118, "step": 109850 }, { "epoch": 722.7631578947369, "grad_norm": 1.2226154804229736, "learning_rate": 0.0001, "loss": 0.0098, "step": 109860 }, { "epoch": 722.828947368421, "grad_norm": 1.0198978185653687, "learning_rate": 0.0001, "loss": 0.01, "step": 109870 }, { "epoch": 722.8947368421053, "grad_norm": 0.758260190486908, "learning_rate": 0.0001, "loss": 0.0091, "step": 109880 }, { "epoch": 722.9605263157895, "grad_norm": 1.051596999168396, "learning_rate": 0.0001, "loss": 0.0115, "step": 109890 }, { "epoch": 723.0263157894736, "grad_norm": 1.2912297248840332, "learning_rate": 0.0001, "loss": 0.0117, "step": 109900 }, { "epoch": 723.0921052631579, "grad_norm": 1.3294340372085571, "learning_rate": 0.0001, "loss": 0.0087, "step": 109910 }, { "epoch": 723.1578947368421, "grad_norm": 1.156272053718567, "learning_rate": 0.0001, "loss": 0.0097, "step": 109920 }, { "epoch": 723.2236842105264, "grad_norm": 0.9503754377365112, "learning_rate": 0.0001, "loss": 0.0093, "step": 109930 }, { "epoch": 723.2894736842105, "grad_norm": 1.5646758079528809, "learning_rate": 0.0001, "loss": 0.0129, "step": 109940 }, { "epoch": 723.3552631578947, "grad_norm": 1.065543532371521, "learning_rate": 0.0001, "loss": 0.0097, "step": 109950 }, { "epoch": 723.421052631579, "grad_norm": 1.1717690229415894, "learning_rate": 0.0001, "loss": 0.011, "step": 109960 }, { "epoch": 723.4868421052631, "grad_norm": 0.8114318251609802, "learning_rate": 0.0001, "loss": 0.0092, "step": 109970 }, { "epoch": 723.5526315789474, "grad_norm": 1.155068278312683, "learning_rate": 0.0001, "loss": 0.0139, "step": 109980 }, { "epoch": 723.6184210526316, "grad_norm": 1.220639705657959, "learning_rate": 0.0001, "loss": 0.0094, "step": 109990 }, { "epoch": 723.6842105263158, "grad_norm": 0.996774435043335, "learning_rate": 0.0001, "loss": 0.0106, "step": 110000 }, { "epoch": 723.75, "grad_norm": 1.1410325765609741, "learning_rate": 0.0001, "loss": 0.0114, "step": 110010 }, { "epoch": 723.8157894736842, "grad_norm": 1.0671559572219849, "learning_rate": 0.0001, "loss": 0.0093, "step": 110020 }, { "epoch": 723.8815789473684, "grad_norm": 1.0174076557159424, "learning_rate": 0.0001, "loss": 0.0098, "step": 110030 }, { "epoch": 723.9473684210526, "grad_norm": 1.0742820501327515, "learning_rate": 0.0001, "loss": 0.0101, "step": 110040 }, { "epoch": 724.0131578947369, "grad_norm": 0.996814489364624, "learning_rate": 0.0001, "loss": 0.0101, "step": 110050 }, { "epoch": 724.078947368421, "grad_norm": 1.142468810081482, "learning_rate": 0.0001, "loss": 0.0105, "step": 110060 }, { "epoch": 724.1447368421053, "grad_norm": 1.2725366353988647, "learning_rate": 0.0001, "loss": 0.0106, "step": 110070 }, { "epoch": 724.2105263157895, "grad_norm": 1.052403211593628, "learning_rate": 0.0001, "loss": 0.0106, "step": 110080 }, { "epoch": 724.2763157894736, "grad_norm": 1.1229180097579956, "learning_rate": 0.0001, "loss": 0.0114, "step": 110090 }, { "epoch": 724.3421052631579, "grad_norm": 0.9793723821640015, "learning_rate": 0.0001, "loss": 0.0108, "step": 110100 }, { "epoch": 724.4078947368421, "grad_norm": 0.8089613318443298, "learning_rate": 0.0001, "loss": 0.0096, "step": 110110 }, { "epoch": 724.4736842105264, "grad_norm": 1.0036156177520752, "learning_rate": 0.0001, "loss": 0.0098, "step": 110120 }, { "epoch": 724.5394736842105, "grad_norm": 1.4579956531524658, "learning_rate": 0.0001, "loss": 0.0099, "step": 110130 }, { "epoch": 724.6052631578947, "grad_norm": 1.225488543510437, "learning_rate": 0.0001, "loss": 0.0107, "step": 110140 }, { "epoch": 724.671052631579, "grad_norm": 0.904151201248169, "learning_rate": 0.0001, "loss": 0.0112, "step": 110150 }, { "epoch": 724.7368421052631, "grad_norm": 0.8515450954437256, "learning_rate": 0.0001, "loss": 0.0099, "step": 110160 }, { "epoch": 724.8026315789474, "grad_norm": 0.9605996608734131, "learning_rate": 0.0001, "loss": 0.0104, "step": 110170 }, { "epoch": 724.8684210526316, "grad_norm": 0.8468832969665527, "learning_rate": 0.0001, "loss": 0.0099, "step": 110180 }, { "epoch": 724.9342105263158, "grad_norm": 1.0464105606079102, "learning_rate": 0.0001, "loss": 0.0092, "step": 110190 }, { "epoch": 725.0, "grad_norm": 0.886685848236084, "learning_rate": 0.0001, "loss": 0.0116, "step": 110200 }, { "epoch": 725.0657894736842, "grad_norm": 1.0475046634674072, "learning_rate": 0.0001, "loss": 0.0112, "step": 110210 }, { "epoch": 725.1315789473684, "grad_norm": 1.0469486713409424, "learning_rate": 0.0001, "loss": 0.0119, "step": 110220 }, { "epoch": 725.1973684210526, "grad_norm": 1.3123482465744019, "learning_rate": 0.0001, "loss": 0.0102, "step": 110230 }, { "epoch": 725.2631578947369, "grad_norm": 1.2292877435684204, "learning_rate": 0.0001, "loss": 0.0099, "step": 110240 }, { "epoch": 725.328947368421, "grad_norm": 1.54679536819458, "learning_rate": 0.0001, "loss": 0.0107, "step": 110250 }, { "epoch": 725.3947368421053, "grad_norm": 1.5144623517990112, "learning_rate": 0.0001, "loss": 0.0095, "step": 110260 }, { "epoch": 725.4605263157895, "grad_norm": 1.4638532400131226, "learning_rate": 0.0001, "loss": 0.0102, "step": 110270 }, { "epoch": 725.5263157894736, "grad_norm": 1.037933349609375, "learning_rate": 0.0001, "loss": 0.0091, "step": 110280 }, { "epoch": 725.5921052631579, "grad_norm": 0.8391295671463013, "learning_rate": 0.0001, "loss": 0.0117, "step": 110290 }, { "epoch": 725.6578947368421, "grad_norm": 1.3161094188690186, "learning_rate": 0.0001, "loss": 0.0113, "step": 110300 }, { "epoch": 725.7236842105264, "grad_norm": 1.3753902912139893, "learning_rate": 0.0001, "loss": 0.0118, "step": 110310 }, { "epoch": 725.7894736842105, "grad_norm": 1.369210124015808, "learning_rate": 0.0001, "loss": 0.0106, "step": 110320 }, { "epoch": 725.8552631578947, "grad_norm": 0.8835994005203247, "learning_rate": 0.0001, "loss": 0.0087, "step": 110330 }, { "epoch": 725.921052631579, "grad_norm": 1.2269823551177979, "learning_rate": 0.0001, "loss": 0.0097, "step": 110340 }, { "epoch": 725.9868421052631, "grad_norm": 1.24722158908844, "learning_rate": 0.0001, "loss": 0.0079, "step": 110350 }, { "epoch": 726.0526315789474, "grad_norm": 1.065712571144104, "learning_rate": 0.0001, "loss": 0.0102, "step": 110360 }, { "epoch": 726.1184210526316, "grad_norm": 1.5890785455703735, "learning_rate": 0.0001, "loss": 0.0089, "step": 110370 }, { "epoch": 726.1842105263158, "grad_norm": 0.9685258269309998, "learning_rate": 0.0001, "loss": 0.0102, "step": 110380 }, { "epoch": 726.25, "grad_norm": 1.2705445289611816, "learning_rate": 0.0001, "loss": 0.0094, "step": 110390 }, { "epoch": 726.3157894736842, "grad_norm": 1.1773557662963867, "learning_rate": 0.0001, "loss": 0.0116, "step": 110400 }, { "epoch": 726.3815789473684, "grad_norm": 1.0990701913833618, "learning_rate": 0.0001, "loss": 0.0127, "step": 110410 }, { "epoch": 726.4473684210526, "grad_norm": 1.3985167741775513, "learning_rate": 0.0001, "loss": 0.0091, "step": 110420 }, { "epoch": 726.5131578947369, "grad_norm": 1.0265055894851685, "learning_rate": 0.0001, "loss": 0.0112, "step": 110430 }, { "epoch": 726.578947368421, "grad_norm": 0.6000217199325562, "learning_rate": 0.0001, "loss": 0.0095, "step": 110440 }, { "epoch": 726.6447368421053, "grad_norm": 1.1134717464447021, "learning_rate": 0.0001, "loss": 0.0113, "step": 110450 }, { "epoch": 726.7105263157895, "grad_norm": 1.1539477109909058, "learning_rate": 0.0001, "loss": 0.0108, "step": 110460 }, { "epoch": 726.7763157894736, "grad_norm": 1.103757619857788, "learning_rate": 0.0001, "loss": 0.0105, "step": 110470 }, { "epoch": 726.8421052631579, "grad_norm": 0.9701244235038757, "learning_rate": 0.0001, "loss": 0.0088, "step": 110480 }, { "epoch": 726.9078947368421, "grad_norm": 1.1969598531723022, "learning_rate": 0.0001, "loss": 0.01, "step": 110490 }, { "epoch": 726.9736842105264, "grad_norm": 1.2793946266174316, "learning_rate": 0.0001, "loss": 0.0101, "step": 110500 }, { "epoch": 727.0394736842105, "grad_norm": 1.4208569526672363, "learning_rate": 0.0001, "loss": 0.0089, "step": 110510 }, { "epoch": 727.1052631578947, "grad_norm": 1.100865364074707, "learning_rate": 0.0001, "loss": 0.0089, "step": 110520 }, { "epoch": 727.171052631579, "grad_norm": 1.3485084772109985, "learning_rate": 0.0001, "loss": 0.0109, "step": 110530 }, { "epoch": 727.2368421052631, "grad_norm": 1.2594733238220215, "learning_rate": 0.0001, "loss": 0.0088, "step": 110540 }, { "epoch": 727.3026315789474, "grad_norm": 1.0424339771270752, "learning_rate": 0.0001, "loss": 0.0093, "step": 110550 }, { "epoch": 727.3684210526316, "grad_norm": 0.9489828944206238, "learning_rate": 0.0001, "loss": 0.0103, "step": 110560 }, { "epoch": 727.4342105263158, "grad_norm": 1.0845290422439575, "learning_rate": 0.0001, "loss": 0.0087, "step": 110570 }, { "epoch": 727.5, "grad_norm": 1.0237302780151367, "learning_rate": 0.0001, "loss": 0.0105, "step": 110580 }, { "epoch": 727.5657894736842, "grad_norm": 0.6209089756011963, "learning_rate": 0.0001, "loss": 0.0104, "step": 110590 }, { "epoch": 727.6315789473684, "grad_norm": 0.8243921995162964, "learning_rate": 0.0001, "loss": 0.0108, "step": 110600 }, { "epoch": 727.6973684210526, "grad_norm": 1.0948792695999146, "learning_rate": 0.0001, "loss": 0.0107, "step": 110610 }, { "epoch": 727.7631578947369, "grad_norm": 1.0297280550003052, "learning_rate": 0.0001, "loss": 0.0104, "step": 110620 }, { "epoch": 727.828947368421, "grad_norm": 1.0834107398986816, "learning_rate": 0.0001, "loss": 0.0137, "step": 110630 }, { "epoch": 727.8947368421053, "grad_norm": 1.2613449096679688, "learning_rate": 0.0001, "loss": 0.0109, "step": 110640 }, { "epoch": 727.9605263157895, "grad_norm": 1.1736477613449097, "learning_rate": 0.0001, "loss": 0.0104, "step": 110650 }, { "epoch": 728.0263157894736, "grad_norm": 0.9048214554786682, "learning_rate": 0.0001, "loss": 0.0127, "step": 110660 }, { "epoch": 728.0921052631579, "grad_norm": 0.9805360436439514, "learning_rate": 0.0001, "loss": 0.0109, "step": 110670 }, { "epoch": 728.1578947368421, "grad_norm": 0.9766744375228882, "learning_rate": 0.0001, "loss": 0.0106, "step": 110680 }, { "epoch": 728.2236842105264, "grad_norm": 1.171241283416748, "learning_rate": 0.0001, "loss": 0.0103, "step": 110690 }, { "epoch": 728.2894736842105, "grad_norm": 1.0495202541351318, "learning_rate": 0.0001, "loss": 0.011, "step": 110700 }, { "epoch": 728.3552631578947, "grad_norm": 1.1976003646850586, "learning_rate": 0.0001, "loss": 0.0108, "step": 110710 }, { "epoch": 728.421052631579, "grad_norm": 1.323385238647461, "learning_rate": 0.0001, "loss": 0.0105, "step": 110720 }, { "epoch": 728.4868421052631, "grad_norm": 0.8279552459716797, "learning_rate": 0.0001, "loss": 0.0103, "step": 110730 }, { "epoch": 728.5526315789474, "grad_norm": 1.1523821353912354, "learning_rate": 0.0001, "loss": 0.0118, "step": 110740 }, { "epoch": 728.6184210526316, "grad_norm": 1.1618291139602661, "learning_rate": 0.0001, "loss": 0.0095, "step": 110750 }, { "epoch": 728.6842105263158, "grad_norm": 1.517456293106079, "learning_rate": 0.0001, "loss": 0.0093, "step": 110760 }, { "epoch": 728.75, "grad_norm": 1.259458303451538, "learning_rate": 0.0001, "loss": 0.0096, "step": 110770 }, { "epoch": 728.8157894736842, "grad_norm": 1.2021534442901611, "learning_rate": 0.0001, "loss": 0.0096, "step": 110780 }, { "epoch": 728.8815789473684, "grad_norm": 0.7205750346183777, "learning_rate": 0.0001, "loss": 0.012, "step": 110790 }, { "epoch": 728.9473684210526, "grad_norm": 0.7980080246925354, "learning_rate": 0.0001, "loss": 0.0099, "step": 110800 }, { "epoch": 729.0131578947369, "grad_norm": 1.2943599224090576, "learning_rate": 0.0001, "loss": 0.0102, "step": 110810 }, { "epoch": 729.078947368421, "grad_norm": 1.2890563011169434, "learning_rate": 0.0001, "loss": 0.0114, "step": 110820 }, { "epoch": 729.1447368421053, "grad_norm": 0.8411272764205933, "learning_rate": 0.0001, "loss": 0.0111, "step": 110830 }, { "epoch": 729.2105263157895, "grad_norm": 1.1245949268341064, "learning_rate": 0.0001, "loss": 0.0111, "step": 110840 }, { "epoch": 729.2763157894736, "grad_norm": 0.9203550815582275, "learning_rate": 0.0001, "loss": 0.0093, "step": 110850 }, { "epoch": 729.3421052631579, "grad_norm": 1.0467205047607422, "learning_rate": 0.0001, "loss": 0.01, "step": 110860 }, { "epoch": 729.4078947368421, "grad_norm": 1.1412601470947266, "learning_rate": 0.0001, "loss": 0.0101, "step": 110870 }, { "epoch": 729.4736842105264, "grad_norm": 0.8333537578582764, "learning_rate": 0.0001, "loss": 0.0116, "step": 110880 }, { "epoch": 729.5394736842105, "grad_norm": 0.9673253893852234, "learning_rate": 0.0001, "loss": 0.011, "step": 110890 }, { "epoch": 729.6052631578947, "grad_norm": 0.9576820135116577, "learning_rate": 0.0001, "loss": 0.0108, "step": 110900 }, { "epoch": 729.671052631579, "grad_norm": 1.1713014841079712, "learning_rate": 0.0001, "loss": 0.0094, "step": 110910 }, { "epoch": 729.7368421052631, "grad_norm": 1.1785231828689575, "learning_rate": 0.0001, "loss": 0.011, "step": 110920 }, { "epoch": 729.8026315789474, "grad_norm": 1.2538374662399292, "learning_rate": 0.0001, "loss": 0.0101, "step": 110930 }, { "epoch": 729.8684210526316, "grad_norm": 1.0921484231948853, "learning_rate": 0.0001, "loss": 0.0119, "step": 110940 }, { "epoch": 729.9342105263158, "grad_norm": 1.2144492864608765, "learning_rate": 0.0001, "loss": 0.0103, "step": 110950 }, { "epoch": 730.0, "grad_norm": 1.5016517639160156, "learning_rate": 0.0001, "loss": 0.0091, "step": 110960 }, { "epoch": 730.0657894736842, "grad_norm": 1.3069437742233276, "learning_rate": 0.0001, "loss": 0.0102, "step": 110970 }, { "epoch": 730.1315789473684, "grad_norm": 1.0793986320495605, "learning_rate": 0.0001, "loss": 0.0109, "step": 110980 }, { "epoch": 730.1973684210526, "grad_norm": 0.8832784295082092, "learning_rate": 0.0001, "loss": 0.0109, "step": 110990 }, { "epoch": 730.2631578947369, "grad_norm": 0.7944328188896179, "learning_rate": 0.0001, "loss": 0.0095, "step": 111000 }, { "epoch": 730.328947368421, "grad_norm": 1.1215214729309082, "learning_rate": 0.0001, "loss": 0.0095, "step": 111010 }, { "epoch": 730.3947368421053, "grad_norm": 1.332367181777954, "learning_rate": 0.0001, "loss": 0.01, "step": 111020 }, { "epoch": 730.4605263157895, "grad_norm": 1.3931553363800049, "learning_rate": 0.0001, "loss": 0.0132, "step": 111030 }, { "epoch": 730.5263157894736, "grad_norm": 1.4220699071884155, "learning_rate": 0.0001, "loss": 0.0108, "step": 111040 }, { "epoch": 730.5921052631579, "grad_norm": 1.1629732847213745, "learning_rate": 0.0001, "loss": 0.0114, "step": 111050 }, { "epoch": 730.6578947368421, "grad_norm": 1.2412759065628052, "learning_rate": 0.0001, "loss": 0.0105, "step": 111060 }, { "epoch": 730.7236842105264, "grad_norm": 1.228760838508606, "learning_rate": 0.0001, "loss": 0.0136, "step": 111070 }, { "epoch": 730.7894736842105, "grad_norm": 1.2496362924575806, "learning_rate": 0.0001, "loss": 0.0122, "step": 111080 }, { "epoch": 730.8552631578947, "grad_norm": 0.9673229455947876, "learning_rate": 0.0001, "loss": 0.0104, "step": 111090 }, { "epoch": 730.921052631579, "grad_norm": 1.0482990741729736, "learning_rate": 0.0001, "loss": 0.011, "step": 111100 }, { "epoch": 730.9868421052631, "grad_norm": 1.0728371143341064, "learning_rate": 0.0001, "loss": 0.0105, "step": 111110 }, { "epoch": 731.0526315789474, "grad_norm": 0.8858627080917358, "learning_rate": 0.0001, "loss": 0.0109, "step": 111120 }, { "epoch": 731.1184210526316, "grad_norm": 1.158208966255188, "learning_rate": 0.0001, "loss": 0.0115, "step": 111130 }, { "epoch": 731.1842105263158, "grad_norm": 1.145789623260498, "learning_rate": 0.0001, "loss": 0.0119, "step": 111140 }, { "epoch": 731.25, "grad_norm": 1.142052412033081, "learning_rate": 0.0001, "loss": 0.0112, "step": 111150 }, { "epoch": 731.3157894736842, "grad_norm": 1.0644676685333252, "learning_rate": 0.0001, "loss": 0.0125, "step": 111160 }, { "epoch": 731.3815789473684, "grad_norm": 1.0623435974121094, "learning_rate": 0.0001, "loss": 0.0106, "step": 111170 }, { "epoch": 731.4473684210526, "grad_norm": 1.053824543952942, "learning_rate": 0.0001, "loss": 0.0089, "step": 111180 }, { "epoch": 731.5131578947369, "grad_norm": 1.2041244506835938, "learning_rate": 0.0001, "loss": 0.012, "step": 111190 }, { "epoch": 731.578947368421, "grad_norm": 1.0749359130859375, "learning_rate": 0.0001, "loss": 0.012, "step": 111200 }, { "epoch": 731.6447368421053, "grad_norm": 1.109894037246704, "learning_rate": 0.0001, "loss": 0.0102, "step": 111210 }, { "epoch": 731.7105263157895, "grad_norm": 1.581756830215454, "learning_rate": 0.0001, "loss": 0.0129, "step": 111220 }, { "epoch": 731.7763157894736, "grad_norm": 0.937778115272522, "learning_rate": 0.0001, "loss": 0.0099, "step": 111230 }, { "epoch": 731.8421052631579, "grad_norm": 1.134636402130127, "learning_rate": 0.0001, "loss": 0.0109, "step": 111240 }, { "epoch": 731.9078947368421, "grad_norm": 1.137024998664856, "learning_rate": 0.0001, "loss": 0.01, "step": 111250 }, { "epoch": 731.9736842105264, "grad_norm": 0.980426013469696, "learning_rate": 0.0001, "loss": 0.0101, "step": 111260 }, { "epoch": 732.0394736842105, "grad_norm": 1.3249105215072632, "learning_rate": 0.0001, "loss": 0.0115, "step": 111270 }, { "epoch": 732.1052631578947, "grad_norm": 1.0460824966430664, "learning_rate": 0.0001, "loss": 0.0119, "step": 111280 }, { "epoch": 732.171052631579, "grad_norm": 1.2906813621520996, "learning_rate": 0.0001, "loss": 0.0108, "step": 111290 }, { "epoch": 732.2368421052631, "grad_norm": 0.8858234286308289, "learning_rate": 0.0001, "loss": 0.0131, "step": 111300 }, { "epoch": 732.3026315789474, "grad_norm": 1.170811653137207, "learning_rate": 0.0001, "loss": 0.0089, "step": 111310 }, { "epoch": 732.3684210526316, "grad_norm": 1.3404710292816162, "learning_rate": 0.0001, "loss": 0.0105, "step": 111320 }, { "epoch": 732.4342105263158, "grad_norm": 1.3341257572174072, "learning_rate": 0.0001, "loss": 0.0096, "step": 111330 }, { "epoch": 732.5, "grad_norm": 1.3876317739486694, "learning_rate": 0.0001, "loss": 0.0107, "step": 111340 }, { "epoch": 732.5657894736842, "grad_norm": 0.7830882668495178, "learning_rate": 0.0001, "loss": 0.0093, "step": 111350 }, { "epoch": 732.6315789473684, "grad_norm": 1.436397910118103, "learning_rate": 0.0001, "loss": 0.0109, "step": 111360 }, { "epoch": 732.6973684210526, "grad_norm": 0.7687216997146606, "learning_rate": 0.0001, "loss": 0.0127, "step": 111370 }, { "epoch": 732.7631578947369, "grad_norm": 0.7303744554519653, "learning_rate": 0.0001, "loss": 0.0117, "step": 111380 }, { "epoch": 732.828947368421, "grad_norm": 1.1733373403549194, "learning_rate": 0.0001, "loss": 0.0094, "step": 111390 }, { "epoch": 732.8947368421053, "grad_norm": 0.9514874219894409, "learning_rate": 0.0001, "loss": 0.0106, "step": 111400 }, { "epoch": 732.9605263157895, "grad_norm": 0.594891369342804, "learning_rate": 0.0001, "loss": 0.0104, "step": 111410 }, { "epoch": 733.0263157894736, "grad_norm": 0.945779025554657, "learning_rate": 0.0001, "loss": 0.0094, "step": 111420 }, { "epoch": 733.0921052631579, "grad_norm": 0.6920212507247925, "learning_rate": 0.0001, "loss": 0.0115, "step": 111430 }, { "epoch": 733.1578947368421, "grad_norm": 0.796112060546875, "learning_rate": 0.0001, "loss": 0.0096, "step": 111440 }, { "epoch": 733.2236842105264, "grad_norm": 0.9636341333389282, "learning_rate": 0.0001, "loss": 0.0095, "step": 111450 }, { "epoch": 733.2894736842105, "grad_norm": 1.2237485647201538, "learning_rate": 0.0001, "loss": 0.011, "step": 111460 }, { "epoch": 733.3552631578947, "grad_norm": 1.8574937582015991, "learning_rate": 0.0001, "loss": 0.0115, "step": 111470 }, { "epoch": 733.421052631579, "grad_norm": 1.866210699081421, "learning_rate": 0.0001, "loss": 0.0111, "step": 111480 }, { "epoch": 733.4868421052631, "grad_norm": 1.6350382566452026, "learning_rate": 0.0001, "loss": 0.0126, "step": 111490 }, { "epoch": 733.5526315789474, "grad_norm": 1.0349119901657104, "learning_rate": 0.0001, "loss": 0.0114, "step": 111500 }, { "epoch": 733.6184210526316, "grad_norm": 1.5205327272415161, "learning_rate": 0.0001, "loss": 0.0101, "step": 111510 }, { "epoch": 733.6842105263158, "grad_norm": 1.2245243787765503, "learning_rate": 0.0001, "loss": 0.011, "step": 111520 }, { "epoch": 733.75, "grad_norm": 1.118973731994629, "learning_rate": 0.0001, "loss": 0.0091, "step": 111530 }, { "epoch": 733.8157894736842, "grad_norm": 1.170804500579834, "learning_rate": 0.0001, "loss": 0.0089, "step": 111540 }, { "epoch": 733.8815789473684, "grad_norm": 1.130577802658081, "learning_rate": 0.0001, "loss": 0.0119, "step": 111550 }, { "epoch": 733.9473684210526, "grad_norm": 1.4491939544677734, "learning_rate": 0.0001, "loss": 0.0099, "step": 111560 }, { "epoch": 734.0131578947369, "grad_norm": 1.1310603618621826, "learning_rate": 0.0001, "loss": 0.0114, "step": 111570 }, { "epoch": 734.078947368421, "grad_norm": 0.9815219640731812, "learning_rate": 0.0001, "loss": 0.0088, "step": 111580 }, { "epoch": 734.1447368421053, "grad_norm": 1.254376769065857, "learning_rate": 0.0001, "loss": 0.009, "step": 111590 }, { "epoch": 734.2105263157895, "grad_norm": 0.9605775475502014, "learning_rate": 0.0001, "loss": 0.0122, "step": 111600 }, { "epoch": 734.2763157894736, "grad_norm": 1.2415673732757568, "learning_rate": 0.0001, "loss": 0.0104, "step": 111610 }, { "epoch": 734.3421052631579, "grad_norm": 1.124882698059082, "learning_rate": 0.0001, "loss": 0.0093, "step": 111620 }, { "epoch": 734.4078947368421, "grad_norm": 0.7746277451515198, "learning_rate": 0.0001, "loss": 0.0103, "step": 111630 }, { "epoch": 734.4736842105264, "grad_norm": 1.1267927885055542, "learning_rate": 0.0001, "loss": 0.0111, "step": 111640 }, { "epoch": 734.5394736842105, "grad_norm": 0.798928439617157, "learning_rate": 0.0001, "loss": 0.0108, "step": 111650 }, { "epoch": 734.6052631578947, "grad_norm": 1.297399878501892, "learning_rate": 0.0001, "loss": 0.0111, "step": 111660 }, { "epoch": 734.671052631579, "grad_norm": 0.66205894947052, "learning_rate": 0.0001, "loss": 0.0098, "step": 111670 }, { "epoch": 734.7368421052631, "grad_norm": 0.7813223004341125, "learning_rate": 0.0001, "loss": 0.0117, "step": 111680 }, { "epoch": 734.8026315789474, "grad_norm": 0.9377914667129517, "learning_rate": 0.0001, "loss": 0.0116, "step": 111690 }, { "epoch": 734.8684210526316, "grad_norm": 1.1338937282562256, "learning_rate": 0.0001, "loss": 0.009, "step": 111700 }, { "epoch": 734.9342105263158, "grad_norm": 1.4318599700927734, "learning_rate": 0.0001, "loss": 0.0096, "step": 111710 }, { "epoch": 735.0, "grad_norm": 1.221707820892334, "learning_rate": 0.0001, "loss": 0.0091, "step": 111720 }, { "epoch": 735.0657894736842, "grad_norm": 1.2542157173156738, "learning_rate": 0.0001, "loss": 0.0119, "step": 111730 }, { "epoch": 735.1315789473684, "grad_norm": 1.02726149559021, "learning_rate": 0.0001, "loss": 0.0115, "step": 111740 }, { "epoch": 735.1973684210526, "grad_norm": 0.7633141875267029, "learning_rate": 0.0001, "loss": 0.0108, "step": 111750 }, { "epoch": 735.2631578947369, "grad_norm": 1.315930962562561, "learning_rate": 0.0001, "loss": 0.0089, "step": 111760 }, { "epoch": 735.328947368421, "grad_norm": 1.430780291557312, "learning_rate": 0.0001, "loss": 0.0101, "step": 111770 }, { "epoch": 735.3947368421053, "grad_norm": 1.1580687761306763, "learning_rate": 0.0001, "loss": 0.0095, "step": 111780 }, { "epoch": 735.4605263157895, "grad_norm": 1.3785609006881714, "learning_rate": 0.0001, "loss": 0.0098, "step": 111790 }, { "epoch": 735.5263157894736, "grad_norm": 1.3708674907684326, "learning_rate": 0.0001, "loss": 0.0105, "step": 111800 }, { "epoch": 735.5921052631579, "grad_norm": 1.1644020080566406, "learning_rate": 0.0001, "loss": 0.0087, "step": 111810 }, { "epoch": 735.6578947368421, "grad_norm": 1.1379879713058472, "learning_rate": 0.0001, "loss": 0.0095, "step": 111820 }, { "epoch": 735.7236842105264, "grad_norm": 0.817884087562561, "learning_rate": 0.0001, "loss": 0.0111, "step": 111830 }, { "epoch": 735.7894736842105, "grad_norm": 0.846930742263794, "learning_rate": 0.0001, "loss": 0.0113, "step": 111840 }, { "epoch": 735.8552631578947, "grad_norm": 0.9146661162376404, "learning_rate": 0.0001, "loss": 0.0114, "step": 111850 }, { "epoch": 735.921052631579, "grad_norm": 1.0802667140960693, "learning_rate": 0.0001, "loss": 0.0109, "step": 111860 }, { "epoch": 735.9868421052631, "grad_norm": 0.7806511521339417, "learning_rate": 0.0001, "loss": 0.0096, "step": 111870 }, { "epoch": 736.0526315789474, "grad_norm": 0.9969675540924072, "learning_rate": 0.0001, "loss": 0.0094, "step": 111880 }, { "epoch": 736.1184210526316, "grad_norm": 1.1050714254379272, "learning_rate": 0.0001, "loss": 0.0106, "step": 111890 }, { "epoch": 736.1842105263158, "grad_norm": 1.1047745943069458, "learning_rate": 0.0001, "loss": 0.0107, "step": 111900 }, { "epoch": 736.25, "grad_norm": 1.2120915651321411, "learning_rate": 0.0001, "loss": 0.0087, "step": 111910 }, { "epoch": 736.3157894736842, "grad_norm": 0.9838027358055115, "learning_rate": 0.0001, "loss": 0.0101, "step": 111920 }, { "epoch": 736.3815789473684, "grad_norm": 1.0756142139434814, "learning_rate": 0.0001, "loss": 0.0104, "step": 111930 }, { "epoch": 736.4473684210526, "grad_norm": 0.8673560619354248, "learning_rate": 0.0001, "loss": 0.0102, "step": 111940 }, { "epoch": 736.5131578947369, "grad_norm": 0.8475706577301025, "learning_rate": 0.0001, "loss": 0.0097, "step": 111950 }, { "epoch": 736.578947368421, "grad_norm": 0.9812502264976501, "learning_rate": 0.0001, "loss": 0.0129, "step": 111960 }, { "epoch": 736.6447368421053, "grad_norm": 1.0416502952575684, "learning_rate": 0.0001, "loss": 0.0092, "step": 111970 }, { "epoch": 736.7105263157895, "grad_norm": 1.1706211566925049, "learning_rate": 0.0001, "loss": 0.0105, "step": 111980 }, { "epoch": 736.7763157894736, "grad_norm": 1.0563849210739136, "learning_rate": 0.0001, "loss": 0.0137, "step": 111990 }, { "epoch": 736.8421052631579, "grad_norm": 1.2770580053329468, "learning_rate": 0.0001, "loss": 0.0107, "step": 112000 }, { "epoch": 736.9078947368421, "grad_norm": 1.0203648805618286, "learning_rate": 0.0001, "loss": 0.0102, "step": 112010 }, { "epoch": 736.9736842105264, "grad_norm": 0.937138557434082, "learning_rate": 0.0001, "loss": 0.0087, "step": 112020 }, { "epoch": 737.0394736842105, "grad_norm": 1.422008752822876, "learning_rate": 0.0001, "loss": 0.0094, "step": 112030 }, { "epoch": 737.1052631578947, "grad_norm": 1.0362669229507446, "learning_rate": 0.0001, "loss": 0.0096, "step": 112040 }, { "epoch": 737.171052631579, "grad_norm": 0.7629768252372742, "learning_rate": 0.0001, "loss": 0.0088, "step": 112050 }, { "epoch": 737.2368421052631, "grad_norm": 0.8714403510093689, "learning_rate": 0.0001, "loss": 0.0107, "step": 112060 }, { "epoch": 737.3026315789474, "grad_norm": 1.0577564239501953, "learning_rate": 0.0001, "loss": 0.0096, "step": 112070 }, { "epoch": 737.3684210526316, "grad_norm": 1.1749887466430664, "learning_rate": 0.0001, "loss": 0.0096, "step": 112080 }, { "epoch": 737.4342105263158, "grad_norm": 1.071063756942749, "learning_rate": 0.0001, "loss": 0.0094, "step": 112090 }, { "epoch": 737.5, "grad_norm": 1.1474461555480957, "learning_rate": 0.0001, "loss": 0.012, "step": 112100 }, { "epoch": 737.5657894736842, "grad_norm": 1.089721441268921, "learning_rate": 0.0001, "loss": 0.0118, "step": 112110 }, { "epoch": 737.6315789473684, "grad_norm": 1.0660192966461182, "learning_rate": 0.0001, "loss": 0.0114, "step": 112120 }, { "epoch": 737.6973684210526, "grad_norm": 0.9403685331344604, "learning_rate": 0.0001, "loss": 0.0086, "step": 112130 }, { "epoch": 737.7631578947369, "grad_norm": 0.7262328267097473, "learning_rate": 0.0001, "loss": 0.0095, "step": 112140 }, { "epoch": 737.828947368421, "grad_norm": 0.9046671986579895, "learning_rate": 0.0001, "loss": 0.0119, "step": 112150 }, { "epoch": 737.8947368421053, "grad_norm": 1.0580425262451172, "learning_rate": 0.0001, "loss": 0.0106, "step": 112160 }, { "epoch": 737.9605263157895, "grad_norm": 0.8231117129325867, "learning_rate": 0.0001, "loss": 0.0114, "step": 112170 }, { "epoch": 738.0263157894736, "grad_norm": 0.9785894751548767, "learning_rate": 0.0001, "loss": 0.0097, "step": 112180 }, { "epoch": 738.0921052631579, "grad_norm": 1.0782703161239624, "learning_rate": 0.0001, "loss": 0.0119, "step": 112190 }, { "epoch": 738.1578947368421, "grad_norm": 1.0809273719787598, "learning_rate": 0.0001, "loss": 0.0098, "step": 112200 }, { "epoch": 738.2236842105264, "grad_norm": 1.4004234075546265, "learning_rate": 0.0001, "loss": 0.0097, "step": 112210 }, { "epoch": 738.2894736842105, "grad_norm": 1.5561052560806274, "learning_rate": 0.0001, "loss": 0.011, "step": 112220 }, { "epoch": 738.3552631578947, "grad_norm": 1.2708110809326172, "learning_rate": 0.0001, "loss": 0.0104, "step": 112230 }, { "epoch": 738.421052631579, "grad_norm": 1.1724753379821777, "learning_rate": 0.0001, "loss": 0.0109, "step": 112240 }, { "epoch": 738.4868421052631, "grad_norm": 1.3249428272247314, "learning_rate": 0.0001, "loss": 0.0113, "step": 112250 }, { "epoch": 738.5526315789474, "grad_norm": 1.2601690292358398, "learning_rate": 0.0001, "loss": 0.0097, "step": 112260 }, { "epoch": 738.6184210526316, "grad_norm": 1.4754626750946045, "learning_rate": 0.0001, "loss": 0.0092, "step": 112270 }, { "epoch": 738.6842105263158, "grad_norm": 1.0365551710128784, "learning_rate": 0.0001, "loss": 0.0115, "step": 112280 }, { "epoch": 738.75, "grad_norm": 1.195425033569336, "learning_rate": 0.0001, "loss": 0.0091, "step": 112290 }, { "epoch": 738.8157894736842, "grad_norm": 0.8115053772926331, "learning_rate": 0.0001, "loss": 0.0091, "step": 112300 }, { "epoch": 738.8815789473684, "grad_norm": 1.1184942722320557, "learning_rate": 0.0001, "loss": 0.0104, "step": 112310 }, { "epoch": 738.9473684210526, "grad_norm": 0.7952884435653687, "learning_rate": 0.0001, "loss": 0.0101, "step": 112320 }, { "epoch": 739.0131578947369, "grad_norm": 0.909673810005188, "learning_rate": 0.0001, "loss": 0.0112, "step": 112330 }, { "epoch": 739.078947368421, "grad_norm": 0.9384917616844177, "learning_rate": 0.0001, "loss": 0.0104, "step": 112340 }, { "epoch": 739.1447368421053, "grad_norm": 0.9624817967414856, "learning_rate": 0.0001, "loss": 0.0109, "step": 112350 }, { "epoch": 739.2105263157895, "grad_norm": 1.2342602014541626, "learning_rate": 0.0001, "loss": 0.0085, "step": 112360 }, { "epoch": 739.2763157894736, "grad_norm": 0.8785709142684937, "learning_rate": 0.0001, "loss": 0.0095, "step": 112370 }, { "epoch": 739.3421052631579, "grad_norm": 1.1421648263931274, "learning_rate": 0.0001, "loss": 0.0104, "step": 112380 }, { "epoch": 739.4078947368421, "grad_norm": 0.7869369387626648, "learning_rate": 0.0001, "loss": 0.0112, "step": 112390 }, { "epoch": 739.4736842105264, "grad_norm": 0.9102826118469238, "learning_rate": 0.0001, "loss": 0.0088, "step": 112400 }, { "epoch": 739.5394736842105, "grad_norm": 1.3796066045761108, "learning_rate": 0.0001, "loss": 0.0106, "step": 112410 }, { "epoch": 739.6052631578947, "grad_norm": 0.9464783072471619, "learning_rate": 0.0001, "loss": 0.0119, "step": 112420 }, { "epoch": 739.671052631579, "grad_norm": 1.226456642150879, "learning_rate": 0.0001, "loss": 0.0106, "step": 112430 }, { "epoch": 739.7368421052631, "grad_norm": 0.8695907592773438, "learning_rate": 0.0001, "loss": 0.0101, "step": 112440 }, { "epoch": 739.8026315789474, "grad_norm": 1.0778448581695557, "learning_rate": 0.0001, "loss": 0.0113, "step": 112450 }, { "epoch": 739.8684210526316, "grad_norm": 1.033981204032898, "learning_rate": 0.0001, "loss": 0.0088, "step": 112460 }, { "epoch": 739.9342105263158, "grad_norm": 1.3811094760894775, "learning_rate": 0.0001, "loss": 0.0109, "step": 112470 }, { "epoch": 740.0, "grad_norm": 1.0558196306228638, "learning_rate": 0.0001, "loss": 0.01, "step": 112480 }, { "epoch": 740.0657894736842, "grad_norm": 1.4138654470443726, "learning_rate": 0.0001, "loss": 0.013, "step": 112490 }, { "epoch": 740.1315789473684, "grad_norm": 1.170053482055664, "learning_rate": 0.0001, "loss": 0.0095, "step": 112500 }, { "epoch": 740.1973684210526, "grad_norm": 1.0176713466644287, "learning_rate": 0.0001, "loss": 0.0117, "step": 112510 }, { "epoch": 740.2631578947369, "grad_norm": 1.2900663614273071, "learning_rate": 0.0001, "loss": 0.0108, "step": 112520 }, { "epoch": 740.328947368421, "grad_norm": 1.0003573894500732, "learning_rate": 0.0001, "loss": 0.0093, "step": 112530 }, { "epoch": 740.3947368421053, "grad_norm": 1.079323410987854, "learning_rate": 0.0001, "loss": 0.0099, "step": 112540 }, { "epoch": 740.4605263157895, "grad_norm": 0.878826916217804, "learning_rate": 0.0001, "loss": 0.0104, "step": 112550 }, { "epoch": 740.5263157894736, "grad_norm": 0.9983590841293335, "learning_rate": 0.0001, "loss": 0.0101, "step": 112560 }, { "epoch": 740.5921052631579, "grad_norm": 1.1599466800689697, "learning_rate": 0.0001, "loss": 0.0099, "step": 112570 }, { "epoch": 740.6578947368421, "grad_norm": 1.4644721746444702, "learning_rate": 0.0001, "loss": 0.0087, "step": 112580 }, { "epoch": 740.7236842105264, "grad_norm": 1.0327931642532349, "learning_rate": 0.0001, "loss": 0.0116, "step": 112590 }, { "epoch": 740.7894736842105, "grad_norm": 1.1933623552322388, "learning_rate": 0.0001, "loss": 0.011, "step": 112600 }, { "epoch": 740.8552631578947, "grad_norm": 1.1514612436294556, "learning_rate": 0.0001, "loss": 0.0099, "step": 112610 }, { "epoch": 740.921052631579, "grad_norm": 1.074176549911499, "learning_rate": 0.0001, "loss": 0.0087, "step": 112620 }, { "epoch": 740.9868421052631, "grad_norm": 1.3565348386764526, "learning_rate": 0.0001, "loss": 0.0102, "step": 112630 }, { "epoch": 741.0526315789474, "grad_norm": 1.487715721130371, "learning_rate": 0.0001, "loss": 0.0096, "step": 112640 }, { "epoch": 741.1184210526316, "grad_norm": 1.228196620941162, "learning_rate": 0.0001, "loss": 0.0093, "step": 112650 }, { "epoch": 741.1842105263158, "grad_norm": 0.9726716876029968, "learning_rate": 0.0001, "loss": 0.0097, "step": 112660 }, { "epoch": 741.25, "grad_norm": 1.1228713989257812, "learning_rate": 0.0001, "loss": 0.0096, "step": 112670 }, { "epoch": 741.3157894736842, "grad_norm": 1.3492858409881592, "learning_rate": 0.0001, "loss": 0.0108, "step": 112680 }, { "epoch": 741.3815789473684, "grad_norm": 1.2448452711105347, "learning_rate": 0.0001, "loss": 0.011, "step": 112690 }, { "epoch": 741.4473684210526, "grad_norm": 1.2772343158721924, "learning_rate": 0.0001, "loss": 0.0104, "step": 112700 }, { "epoch": 741.5131578947369, "grad_norm": 0.9293051958084106, "learning_rate": 0.0001, "loss": 0.0094, "step": 112710 }, { "epoch": 741.578947368421, "grad_norm": 0.7797158360481262, "learning_rate": 0.0001, "loss": 0.009, "step": 112720 }, { "epoch": 741.6447368421053, "grad_norm": 1.2524482011795044, "learning_rate": 0.0001, "loss": 0.0106, "step": 112730 }, { "epoch": 741.7105263157895, "grad_norm": 1.0010104179382324, "learning_rate": 0.0001, "loss": 0.0103, "step": 112740 }, { "epoch": 741.7763157894736, "grad_norm": 0.8157704472541809, "learning_rate": 0.0001, "loss": 0.0101, "step": 112750 }, { "epoch": 741.8421052631579, "grad_norm": 0.7338435649871826, "learning_rate": 0.0001, "loss": 0.0108, "step": 112760 }, { "epoch": 741.9078947368421, "grad_norm": 0.8441011905670166, "learning_rate": 0.0001, "loss": 0.0093, "step": 112770 }, { "epoch": 741.9736842105264, "grad_norm": 0.5987163186073303, "learning_rate": 0.0001, "loss": 0.011, "step": 112780 }, { "epoch": 742.0394736842105, "grad_norm": 0.5863604545593262, "learning_rate": 0.0001, "loss": 0.0103, "step": 112790 }, { "epoch": 742.1052631578947, "grad_norm": 1.1553442478179932, "learning_rate": 0.0001, "loss": 0.0118, "step": 112800 }, { "epoch": 742.171052631579, "grad_norm": 1.0437456369400024, "learning_rate": 0.0001, "loss": 0.01, "step": 112810 }, { "epoch": 742.2368421052631, "grad_norm": 1.0978097915649414, "learning_rate": 0.0001, "loss": 0.0096, "step": 112820 }, { "epoch": 742.3026315789474, "grad_norm": 1.2839778661727905, "learning_rate": 0.0001, "loss": 0.0099, "step": 112830 }, { "epoch": 742.3684210526316, "grad_norm": 0.8955215215682983, "learning_rate": 0.0001, "loss": 0.0089, "step": 112840 }, { "epoch": 742.4342105263158, "grad_norm": 0.9545695781707764, "learning_rate": 0.0001, "loss": 0.0095, "step": 112850 }, { "epoch": 742.5, "grad_norm": 0.9076725840568542, "learning_rate": 0.0001, "loss": 0.0111, "step": 112860 }, { "epoch": 742.5657894736842, "grad_norm": 0.9868484735488892, "learning_rate": 0.0001, "loss": 0.0093, "step": 112870 }, { "epoch": 742.6315789473684, "grad_norm": 0.9689452052116394, "learning_rate": 0.0001, "loss": 0.0114, "step": 112880 }, { "epoch": 742.6973684210526, "grad_norm": 0.9611213207244873, "learning_rate": 0.0001, "loss": 0.0089, "step": 112890 }, { "epoch": 742.7631578947369, "grad_norm": 1.3017029762268066, "learning_rate": 0.0001, "loss": 0.0101, "step": 112900 }, { "epoch": 742.828947368421, "grad_norm": 1.010025978088379, "learning_rate": 0.0001, "loss": 0.0107, "step": 112910 }, { "epoch": 742.8947368421053, "grad_norm": 1.1082737445831299, "learning_rate": 0.0001, "loss": 0.0116, "step": 112920 }, { "epoch": 742.9605263157895, "grad_norm": 1.2095134258270264, "learning_rate": 0.0001, "loss": 0.0092, "step": 112930 }, { "epoch": 743.0263157894736, "grad_norm": 0.7735888361930847, "learning_rate": 0.0001, "loss": 0.0106, "step": 112940 }, { "epoch": 743.0921052631579, "grad_norm": 0.8232780694961548, "learning_rate": 0.0001, "loss": 0.0106, "step": 112950 }, { "epoch": 743.1578947368421, "grad_norm": 1.1837214231491089, "learning_rate": 0.0001, "loss": 0.0099, "step": 112960 }, { "epoch": 743.2236842105264, "grad_norm": 1.0164154767990112, "learning_rate": 0.0001, "loss": 0.01, "step": 112970 }, { "epoch": 743.2894736842105, "grad_norm": 0.9214721918106079, "learning_rate": 0.0001, "loss": 0.0113, "step": 112980 }, { "epoch": 743.3552631578947, "grad_norm": 1.178781270980835, "learning_rate": 0.0001, "loss": 0.0104, "step": 112990 }, { "epoch": 743.421052631579, "grad_norm": 0.8632880449295044, "learning_rate": 0.0001, "loss": 0.0103, "step": 113000 }, { "epoch": 743.4868421052631, "grad_norm": 0.8442752957344055, "learning_rate": 0.0001, "loss": 0.01, "step": 113010 }, { "epoch": 743.5526315789474, "grad_norm": 1.3869895935058594, "learning_rate": 0.0001, "loss": 0.0088, "step": 113020 }, { "epoch": 743.6184210526316, "grad_norm": 0.642612636089325, "learning_rate": 0.0001, "loss": 0.0119, "step": 113030 }, { "epoch": 743.6842105263158, "grad_norm": 0.9901149868965149, "learning_rate": 0.0001, "loss": 0.0103, "step": 113040 }, { "epoch": 743.75, "grad_norm": 0.6913559436798096, "learning_rate": 0.0001, "loss": 0.0114, "step": 113050 }, { "epoch": 743.8157894736842, "grad_norm": 1.3106902837753296, "learning_rate": 0.0001, "loss": 0.0085, "step": 113060 }, { "epoch": 743.8815789473684, "grad_norm": 0.6828902959823608, "learning_rate": 0.0001, "loss": 0.0107, "step": 113070 }, { "epoch": 743.9473684210526, "grad_norm": 1.2042633295059204, "learning_rate": 0.0001, "loss": 0.0099, "step": 113080 }, { "epoch": 744.0131578947369, "grad_norm": 1.0320390462875366, "learning_rate": 0.0001, "loss": 0.0104, "step": 113090 }, { "epoch": 744.078947368421, "grad_norm": 1.210471272468567, "learning_rate": 0.0001, "loss": 0.011, "step": 113100 }, { "epoch": 744.1447368421053, "grad_norm": 0.7670679092407227, "learning_rate": 0.0001, "loss": 0.0116, "step": 113110 }, { "epoch": 744.2105263157895, "grad_norm": 1.2954962253570557, "learning_rate": 0.0001, "loss": 0.0099, "step": 113120 }, { "epoch": 744.2763157894736, "grad_norm": 1.0254065990447998, "learning_rate": 0.0001, "loss": 0.0099, "step": 113130 }, { "epoch": 744.3421052631579, "grad_norm": 0.9328817129135132, "learning_rate": 0.0001, "loss": 0.0119, "step": 113140 }, { "epoch": 744.4078947368421, "grad_norm": 1.0787217617034912, "learning_rate": 0.0001, "loss": 0.01, "step": 113150 }, { "epoch": 744.4736842105264, "grad_norm": 1.0308198928833008, "learning_rate": 0.0001, "loss": 0.0112, "step": 113160 }, { "epoch": 744.5394736842105, "grad_norm": 1.011286973953247, "learning_rate": 0.0001, "loss": 0.0096, "step": 113170 }, { "epoch": 744.6052631578947, "grad_norm": 0.8020488023757935, "learning_rate": 0.0001, "loss": 0.0131, "step": 113180 }, { "epoch": 744.671052631579, "grad_norm": 1.1139671802520752, "learning_rate": 0.0001, "loss": 0.0093, "step": 113190 }, { "epoch": 744.7368421052631, "grad_norm": 1.1352884769439697, "learning_rate": 0.0001, "loss": 0.0085, "step": 113200 }, { "epoch": 744.8026315789474, "grad_norm": 0.8926935791969299, "learning_rate": 0.0001, "loss": 0.0084, "step": 113210 }, { "epoch": 744.8684210526316, "grad_norm": 1.2045756578445435, "learning_rate": 0.0001, "loss": 0.0106, "step": 113220 }, { "epoch": 744.9342105263158, "grad_norm": 1.2373918294906616, "learning_rate": 0.0001, "loss": 0.0104, "step": 113230 }, { "epoch": 745.0, "grad_norm": 0.9970014095306396, "learning_rate": 0.0001, "loss": 0.01, "step": 113240 }, { "epoch": 745.0657894736842, "grad_norm": 0.9644217491149902, "learning_rate": 0.0001, "loss": 0.0112, "step": 113250 }, { "epoch": 745.1315789473684, "grad_norm": 1.1484285593032837, "learning_rate": 0.0001, "loss": 0.0098, "step": 113260 }, { "epoch": 745.1973684210526, "grad_norm": 0.9639143347740173, "learning_rate": 0.0001, "loss": 0.012, "step": 113270 }, { "epoch": 745.2631578947369, "grad_norm": 1.1494964361190796, "learning_rate": 0.0001, "loss": 0.0089, "step": 113280 }, { "epoch": 745.328947368421, "grad_norm": 1.025691032409668, "learning_rate": 0.0001, "loss": 0.0097, "step": 113290 }, { "epoch": 745.3947368421053, "grad_norm": 1.154371976852417, "learning_rate": 0.0001, "loss": 0.0103, "step": 113300 }, { "epoch": 745.4605263157895, "grad_norm": 1.3313137292861938, "learning_rate": 0.0001, "loss": 0.0113, "step": 113310 }, { "epoch": 745.5263157894736, "grad_norm": 1.094545841217041, "learning_rate": 0.0001, "loss": 0.0092, "step": 113320 }, { "epoch": 745.5921052631579, "grad_norm": 1.2096467018127441, "learning_rate": 0.0001, "loss": 0.0097, "step": 113330 }, { "epoch": 745.6578947368421, "grad_norm": 1.1005749702453613, "learning_rate": 0.0001, "loss": 0.0116, "step": 113340 }, { "epoch": 745.7236842105264, "grad_norm": 1.1122779846191406, "learning_rate": 0.0001, "loss": 0.0096, "step": 113350 }, { "epoch": 745.7894736842105, "grad_norm": 1.2181123495101929, "learning_rate": 0.0001, "loss": 0.009, "step": 113360 }, { "epoch": 745.8552631578947, "grad_norm": 1.4004229307174683, "learning_rate": 0.0001, "loss": 0.0114, "step": 113370 }, { "epoch": 745.921052631579, "grad_norm": 0.9129208922386169, "learning_rate": 0.0001, "loss": 0.0088, "step": 113380 }, { "epoch": 745.9868421052631, "grad_norm": 1.1694635152816772, "learning_rate": 0.0001, "loss": 0.0109, "step": 113390 }, { "epoch": 746.0526315789474, "grad_norm": 1.1359633207321167, "learning_rate": 0.0001, "loss": 0.0109, "step": 113400 }, { "epoch": 746.1184210526316, "grad_norm": 1.1821686029434204, "learning_rate": 0.0001, "loss": 0.0101, "step": 113410 }, { "epoch": 746.1842105263158, "grad_norm": 1.0882134437561035, "learning_rate": 0.0001, "loss": 0.0094, "step": 113420 }, { "epoch": 746.25, "grad_norm": 1.2553226947784424, "learning_rate": 0.0001, "loss": 0.0084, "step": 113430 }, { "epoch": 746.3157894736842, "grad_norm": 0.9115428924560547, "learning_rate": 0.0001, "loss": 0.0115, "step": 113440 }, { "epoch": 746.3815789473684, "grad_norm": 1.281915307044983, "learning_rate": 0.0001, "loss": 0.0111, "step": 113450 }, { "epoch": 746.4473684210526, "grad_norm": 1.3433202505111694, "learning_rate": 0.0001, "loss": 0.0114, "step": 113460 }, { "epoch": 746.5131578947369, "grad_norm": 1.5758907794952393, "learning_rate": 0.0001, "loss": 0.0089, "step": 113470 }, { "epoch": 746.578947368421, "grad_norm": 1.3677822351455688, "learning_rate": 0.0001, "loss": 0.0101, "step": 113480 }, { "epoch": 746.6447368421053, "grad_norm": 1.343830943107605, "learning_rate": 0.0001, "loss": 0.0112, "step": 113490 }, { "epoch": 746.7105263157895, "grad_norm": 1.2050989866256714, "learning_rate": 0.0001, "loss": 0.0094, "step": 113500 }, { "epoch": 746.7763157894736, "grad_norm": 0.8132672905921936, "learning_rate": 0.0001, "loss": 0.0106, "step": 113510 }, { "epoch": 746.8421052631579, "grad_norm": 1.0796236991882324, "learning_rate": 0.0001, "loss": 0.0094, "step": 113520 }, { "epoch": 746.9078947368421, "grad_norm": 0.9713119268417358, "learning_rate": 0.0001, "loss": 0.011, "step": 113530 }, { "epoch": 746.9736842105264, "grad_norm": 1.068225383758545, "learning_rate": 0.0001, "loss": 0.0107, "step": 113540 }, { "epoch": 747.0394736842105, "grad_norm": 0.9908331036567688, "learning_rate": 0.0001, "loss": 0.0087, "step": 113550 }, { "epoch": 747.1052631578947, "grad_norm": 0.8865001797676086, "learning_rate": 0.0001, "loss": 0.0129, "step": 113560 }, { "epoch": 747.171052631579, "grad_norm": 0.9770333766937256, "learning_rate": 0.0001, "loss": 0.0109, "step": 113570 }, { "epoch": 747.2368421052631, "grad_norm": 0.9718663692474365, "learning_rate": 0.0001, "loss": 0.0124, "step": 113580 }, { "epoch": 747.3026315789474, "grad_norm": 0.9248496890068054, "learning_rate": 0.0001, "loss": 0.0089, "step": 113590 }, { "epoch": 747.3684210526316, "grad_norm": 0.9006205797195435, "learning_rate": 0.0001, "loss": 0.01, "step": 113600 }, { "epoch": 747.4342105263158, "grad_norm": 1.0532220602035522, "learning_rate": 0.0001, "loss": 0.0102, "step": 113610 }, { "epoch": 747.5, "grad_norm": 1.0480901002883911, "learning_rate": 0.0001, "loss": 0.0113, "step": 113620 }, { "epoch": 747.5657894736842, "grad_norm": 1.1913716793060303, "learning_rate": 0.0001, "loss": 0.0089, "step": 113630 }, { "epoch": 747.6315789473684, "grad_norm": 1.3170071840286255, "learning_rate": 0.0001, "loss": 0.0104, "step": 113640 }, { "epoch": 747.6973684210526, "grad_norm": 0.8311954140663147, "learning_rate": 0.0001, "loss": 0.0092, "step": 113650 }, { "epoch": 747.7631578947369, "grad_norm": 1.102514624595642, "learning_rate": 0.0001, "loss": 0.0093, "step": 113660 }, { "epoch": 747.828947368421, "grad_norm": 1.3776170015335083, "learning_rate": 0.0001, "loss": 0.0105, "step": 113670 }, { "epoch": 747.8947368421053, "grad_norm": 1.1094131469726562, "learning_rate": 0.0001, "loss": 0.0096, "step": 113680 }, { "epoch": 747.9605263157895, "grad_norm": 1.3613204956054688, "learning_rate": 0.0001, "loss": 0.0105, "step": 113690 }, { "epoch": 748.0263157894736, "grad_norm": 1.2817715406417847, "learning_rate": 0.0001, "loss": 0.0093, "step": 113700 }, { "epoch": 748.0921052631579, "grad_norm": 1.1699529886245728, "learning_rate": 0.0001, "loss": 0.012, "step": 113710 }, { "epoch": 748.1578947368421, "grad_norm": 1.2629177570343018, "learning_rate": 0.0001, "loss": 0.0095, "step": 113720 }, { "epoch": 748.2236842105264, "grad_norm": 1.0418188571929932, "learning_rate": 0.0001, "loss": 0.0114, "step": 113730 }, { "epoch": 748.2894736842105, "grad_norm": 1.2210304737091064, "learning_rate": 0.0001, "loss": 0.0105, "step": 113740 }, { "epoch": 748.3552631578947, "grad_norm": 0.6313312649726868, "learning_rate": 0.0001, "loss": 0.0107, "step": 113750 }, { "epoch": 748.421052631579, "grad_norm": 1.2036254405975342, "learning_rate": 0.0001, "loss": 0.0101, "step": 113760 }, { "epoch": 748.4868421052631, "grad_norm": 0.9804714918136597, "learning_rate": 0.0001, "loss": 0.0091, "step": 113770 }, { "epoch": 748.5526315789474, "grad_norm": 0.9679644107818604, "learning_rate": 0.0001, "loss": 0.0098, "step": 113780 }, { "epoch": 748.6184210526316, "grad_norm": 0.7930268049240112, "learning_rate": 0.0001, "loss": 0.0091, "step": 113790 }, { "epoch": 748.6842105263158, "grad_norm": 1.0785940885543823, "learning_rate": 0.0001, "loss": 0.009, "step": 113800 }, { "epoch": 748.75, "grad_norm": 1.1710313558578491, "learning_rate": 0.0001, "loss": 0.009, "step": 113810 }, { "epoch": 748.8157894736842, "grad_norm": 0.9718161225318909, "learning_rate": 0.0001, "loss": 0.011, "step": 113820 }, { "epoch": 748.8815789473684, "grad_norm": 1.169264316558838, "learning_rate": 0.0001, "loss": 0.0098, "step": 113830 }, { "epoch": 748.9473684210526, "grad_norm": 0.9664254188537598, "learning_rate": 0.0001, "loss": 0.0127, "step": 113840 }, { "epoch": 749.0131578947369, "grad_norm": 0.7254254817962646, "learning_rate": 0.0001, "loss": 0.0119, "step": 113850 }, { "epoch": 749.078947368421, "grad_norm": 0.9984303116798401, "learning_rate": 0.0001, "loss": 0.0097, "step": 113860 }, { "epoch": 749.1447368421053, "grad_norm": 1.2939634323120117, "learning_rate": 0.0001, "loss": 0.0101, "step": 113870 }, { "epoch": 749.2105263157895, "grad_norm": 1.12336003780365, "learning_rate": 0.0001, "loss": 0.012, "step": 113880 }, { "epoch": 749.2763157894736, "grad_norm": 1.6549626588821411, "learning_rate": 0.0001, "loss": 0.0128, "step": 113890 }, { "epoch": 749.3421052631579, "grad_norm": 1.5198620557785034, "learning_rate": 0.0001, "loss": 0.0114, "step": 113900 }, { "epoch": 749.4078947368421, "grad_norm": 1.2456414699554443, "learning_rate": 0.0001, "loss": 0.0094, "step": 113910 }, { "epoch": 749.4736842105264, "grad_norm": 1.2373472452163696, "learning_rate": 0.0001, "loss": 0.0098, "step": 113920 }, { "epoch": 749.5394736842105, "grad_norm": 1.309753179550171, "learning_rate": 0.0001, "loss": 0.0109, "step": 113930 }, { "epoch": 749.6052631578947, "grad_norm": 0.9735221862792969, "learning_rate": 0.0001, "loss": 0.0085, "step": 113940 }, { "epoch": 749.671052631579, "grad_norm": 0.6393201351165771, "learning_rate": 0.0001, "loss": 0.0118, "step": 113950 }, { "epoch": 749.7368421052631, "grad_norm": 1.0009740591049194, "learning_rate": 0.0001, "loss": 0.0092, "step": 113960 }, { "epoch": 749.8026315789474, "grad_norm": 0.8014363646507263, "learning_rate": 0.0001, "loss": 0.0101, "step": 113970 }, { "epoch": 749.8684210526316, "grad_norm": 0.9935765266418457, "learning_rate": 0.0001, "loss": 0.0102, "step": 113980 }, { "epoch": 749.9342105263158, "grad_norm": 1.1239501237869263, "learning_rate": 0.0001, "loss": 0.0114, "step": 113990 }, { "epoch": 750.0, "grad_norm": 1.2619990110397339, "learning_rate": 0.0001, "loss": 0.0102, "step": 114000 }, { "epoch": 750.0657894736842, "grad_norm": 0.8633625507354736, "learning_rate": 0.0001, "loss": 0.0101, "step": 114010 }, { "epoch": 750.1315789473684, "grad_norm": 1.195512294769287, "learning_rate": 0.0001, "loss": 0.0102, "step": 114020 }, { "epoch": 750.1973684210526, "grad_norm": 1.052501916885376, "learning_rate": 0.0001, "loss": 0.0105, "step": 114030 }, { "epoch": 750.2631578947369, "grad_norm": 0.899624228477478, "learning_rate": 0.0001, "loss": 0.0128, "step": 114040 }, { "epoch": 750.328947368421, "grad_norm": 0.7613295316696167, "learning_rate": 0.0001, "loss": 0.0094, "step": 114050 }, { "epoch": 750.3947368421053, "grad_norm": 1.224990725517273, "learning_rate": 0.0001, "loss": 0.0091, "step": 114060 }, { "epoch": 750.4605263157895, "grad_norm": 1.157004475593567, "learning_rate": 0.0001, "loss": 0.0115, "step": 114070 }, { "epoch": 750.5263157894736, "grad_norm": 1.021988034248352, "learning_rate": 0.0001, "loss": 0.0099, "step": 114080 }, { "epoch": 750.5921052631579, "grad_norm": 1.2240087985992432, "learning_rate": 0.0001, "loss": 0.0111, "step": 114090 }, { "epoch": 750.6578947368421, "grad_norm": 0.8899611830711365, "learning_rate": 0.0001, "loss": 0.0107, "step": 114100 }, { "epoch": 750.7236842105264, "grad_norm": 1.063750982284546, "learning_rate": 0.0001, "loss": 0.011, "step": 114110 }, { "epoch": 750.7894736842105, "grad_norm": 0.6514599919319153, "learning_rate": 0.0001, "loss": 0.0109, "step": 114120 }, { "epoch": 750.8552631578947, "grad_norm": 0.8705139756202698, "learning_rate": 0.0001, "loss": 0.0105, "step": 114130 }, { "epoch": 750.921052631579, "grad_norm": 1.2214490175247192, "learning_rate": 0.0001, "loss": 0.0102, "step": 114140 }, { "epoch": 750.9868421052631, "grad_norm": 1.1715788841247559, "learning_rate": 0.0001, "loss": 0.0103, "step": 114150 }, { "epoch": 751.0526315789474, "grad_norm": 1.076686143875122, "learning_rate": 0.0001, "loss": 0.0111, "step": 114160 }, { "epoch": 751.1184210526316, "grad_norm": 1.0638906955718994, "learning_rate": 0.0001, "loss": 0.0095, "step": 114170 }, { "epoch": 751.1842105263158, "grad_norm": 0.9506366848945618, "learning_rate": 0.0001, "loss": 0.0099, "step": 114180 }, { "epoch": 751.25, "grad_norm": 0.9974748492240906, "learning_rate": 0.0001, "loss": 0.0109, "step": 114190 }, { "epoch": 751.3157894736842, "grad_norm": 1.044983983039856, "learning_rate": 0.0001, "loss": 0.0108, "step": 114200 }, { "epoch": 751.3815789473684, "grad_norm": 0.7726526856422424, "learning_rate": 0.0001, "loss": 0.0098, "step": 114210 }, { "epoch": 751.4473684210526, "grad_norm": 0.9929822683334351, "learning_rate": 0.0001, "loss": 0.0103, "step": 114220 }, { "epoch": 751.5131578947369, "grad_norm": 1.2815465927124023, "learning_rate": 0.0001, "loss": 0.0124, "step": 114230 }, { "epoch": 751.578947368421, "grad_norm": 1.2115155458450317, "learning_rate": 0.0001, "loss": 0.0091, "step": 114240 }, { "epoch": 751.6447368421053, "grad_norm": 1.184339165687561, "learning_rate": 0.0001, "loss": 0.011, "step": 114250 }, { "epoch": 751.7105263157895, "grad_norm": 1.2363086938858032, "learning_rate": 0.0001, "loss": 0.0097, "step": 114260 }, { "epoch": 751.7763157894736, "grad_norm": 1.2934776544570923, "learning_rate": 0.0001, "loss": 0.0105, "step": 114270 }, { "epoch": 751.8421052631579, "grad_norm": 1.1832584142684937, "learning_rate": 0.0001, "loss": 0.0089, "step": 114280 }, { "epoch": 751.9078947368421, "grad_norm": 1.02647066116333, "learning_rate": 0.0001, "loss": 0.0105, "step": 114290 }, { "epoch": 751.9736842105264, "grad_norm": 1.5524499416351318, "learning_rate": 0.0001, "loss": 0.0129, "step": 114300 }, { "epoch": 752.0394736842105, "grad_norm": 1.193410038948059, "learning_rate": 0.0001, "loss": 0.0108, "step": 114310 }, { "epoch": 752.1052631578947, "grad_norm": 1.1393669843673706, "learning_rate": 0.0001, "loss": 0.0089, "step": 114320 }, { "epoch": 752.171052631579, "grad_norm": 0.7503915429115295, "learning_rate": 0.0001, "loss": 0.0096, "step": 114330 }, { "epoch": 752.2368421052631, "grad_norm": 1.088619351387024, "learning_rate": 0.0001, "loss": 0.0087, "step": 114340 }, { "epoch": 752.3026315789474, "grad_norm": 0.9867507815361023, "learning_rate": 0.0001, "loss": 0.0132, "step": 114350 }, { "epoch": 752.3684210526316, "grad_norm": 1.2592015266418457, "learning_rate": 0.0001, "loss": 0.0104, "step": 114360 }, { "epoch": 752.4342105263158, "grad_norm": 1.365531086921692, "learning_rate": 0.0001, "loss": 0.0093, "step": 114370 }, { "epoch": 752.5, "grad_norm": 1.318558931350708, "learning_rate": 0.0001, "loss": 0.0096, "step": 114380 }, { "epoch": 752.5657894736842, "grad_norm": 0.9499292373657227, "learning_rate": 0.0001, "loss": 0.0111, "step": 114390 }, { "epoch": 752.6315789473684, "grad_norm": 1.3132816553115845, "learning_rate": 0.0001, "loss": 0.0109, "step": 114400 }, { "epoch": 752.6973684210526, "grad_norm": 1.6993619203567505, "learning_rate": 0.0001, "loss": 0.01, "step": 114410 }, { "epoch": 752.7631578947369, "grad_norm": 1.5415050983428955, "learning_rate": 0.0001, "loss": 0.0113, "step": 114420 }, { "epoch": 752.828947368421, "grad_norm": 1.364233136177063, "learning_rate": 0.0001, "loss": 0.0101, "step": 114430 }, { "epoch": 752.8947368421053, "grad_norm": 1.6695607900619507, "learning_rate": 0.0001, "loss": 0.0107, "step": 114440 }, { "epoch": 752.9605263157895, "grad_norm": 1.379301905632019, "learning_rate": 0.0001, "loss": 0.0096, "step": 114450 }, { "epoch": 753.0263157894736, "grad_norm": 1.2977728843688965, "learning_rate": 0.0001, "loss": 0.01, "step": 114460 }, { "epoch": 753.0921052631579, "grad_norm": 1.069703459739685, "learning_rate": 0.0001, "loss": 0.0109, "step": 114470 }, { "epoch": 753.1578947368421, "grad_norm": 1.064315915107727, "learning_rate": 0.0001, "loss": 0.0098, "step": 114480 }, { "epoch": 753.2236842105264, "grad_norm": 1.2300894260406494, "learning_rate": 0.0001, "loss": 0.0105, "step": 114490 }, { "epoch": 753.2894736842105, "grad_norm": 1.0416491031646729, "learning_rate": 0.0001, "loss": 0.0083, "step": 114500 }, { "epoch": 753.3552631578947, "grad_norm": 1.2523179054260254, "learning_rate": 0.0001, "loss": 0.0089, "step": 114510 }, { "epoch": 753.421052631579, "grad_norm": 1.2075591087341309, "learning_rate": 0.0001, "loss": 0.0114, "step": 114520 }, { "epoch": 753.4868421052631, "grad_norm": 1.3077597618103027, "learning_rate": 0.0001, "loss": 0.0108, "step": 114530 }, { "epoch": 753.5526315789474, "grad_norm": 0.7280270457267761, "learning_rate": 0.0001, "loss": 0.0106, "step": 114540 }, { "epoch": 753.6184210526316, "grad_norm": 1.2432559728622437, "learning_rate": 0.0001, "loss": 0.0097, "step": 114550 }, { "epoch": 753.6842105263158, "grad_norm": 1.0678569078445435, "learning_rate": 0.0001, "loss": 0.0103, "step": 114560 }, { "epoch": 753.75, "grad_norm": 0.9261232614517212, "learning_rate": 0.0001, "loss": 0.0091, "step": 114570 }, { "epoch": 753.8157894736842, "grad_norm": 1.2366281747817993, "learning_rate": 0.0001, "loss": 0.0095, "step": 114580 }, { "epoch": 753.8815789473684, "grad_norm": 1.1832107305526733, "learning_rate": 0.0001, "loss": 0.0082, "step": 114590 }, { "epoch": 753.9473684210526, "grad_norm": 1.4531916379928589, "learning_rate": 0.0001, "loss": 0.0103, "step": 114600 }, { "epoch": 754.0131578947369, "grad_norm": 1.1988731622695923, "learning_rate": 0.0001, "loss": 0.0118, "step": 114610 }, { "epoch": 754.078947368421, "grad_norm": 1.1996492147445679, "learning_rate": 0.0001, "loss": 0.0098, "step": 114620 }, { "epoch": 754.1447368421053, "grad_norm": 1.2475368976593018, "learning_rate": 0.0001, "loss": 0.0123, "step": 114630 }, { "epoch": 754.2105263157895, "grad_norm": 0.7737396359443665, "learning_rate": 0.0001, "loss": 0.0092, "step": 114640 }, { "epoch": 754.2763157894736, "grad_norm": 1.1298565864562988, "learning_rate": 0.0001, "loss": 0.009, "step": 114650 }, { "epoch": 754.3421052631579, "grad_norm": 0.8025405406951904, "learning_rate": 0.0001, "loss": 0.0099, "step": 114660 }, { "epoch": 754.4078947368421, "grad_norm": 1.1314117908477783, "learning_rate": 0.0001, "loss": 0.0099, "step": 114670 }, { "epoch": 754.4736842105264, "grad_norm": 1.032278299331665, "learning_rate": 0.0001, "loss": 0.0102, "step": 114680 }, { "epoch": 754.5394736842105, "grad_norm": 1.1019865274429321, "learning_rate": 0.0001, "loss": 0.0129, "step": 114690 }, { "epoch": 754.6052631578947, "grad_norm": 1.3335840702056885, "learning_rate": 0.0001, "loss": 0.0122, "step": 114700 }, { "epoch": 754.671052631579, "grad_norm": 0.6844580769538879, "learning_rate": 0.0001, "loss": 0.0093, "step": 114710 }, { "epoch": 754.7368421052631, "grad_norm": 0.8246498107910156, "learning_rate": 0.0001, "loss": 0.0107, "step": 114720 }, { "epoch": 754.8026315789474, "grad_norm": 1.2790837287902832, "learning_rate": 0.0001, "loss": 0.0103, "step": 114730 }, { "epoch": 754.8684210526316, "grad_norm": 1.169844388961792, "learning_rate": 0.0001, "loss": 0.01, "step": 114740 }, { "epoch": 754.9342105263158, "grad_norm": 0.9610524773597717, "learning_rate": 0.0001, "loss": 0.0098, "step": 114750 }, { "epoch": 755.0, "grad_norm": 0.9811365604400635, "learning_rate": 0.0001, "loss": 0.0095, "step": 114760 }, { "epoch": 755.0657894736842, "grad_norm": 1.2406882047653198, "learning_rate": 0.0001, "loss": 0.0094, "step": 114770 }, { "epoch": 755.1315789473684, "grad_norm": 1.348677635192871, "learning_rate": 0.0001, "loss": 0.01, "step": 114780 }, { "epoch": 755.1973684210526, "grad_norm": 1.0763145685195923, "learning_rate": 0.0001, "loss": 0.0114, "step": 114790 }, { "epoch": 755.2631578947369, "grad_norm": 0.8977418541908264, "learning_rate": 0.0001, "loss": 0.0106, "step": 114800 }, { "epoch": 755.328947368421, "grad_norm": 1.2123111486434937, "learning_rate": 0.0001, "loss": 0.0113, "step": 114810 }, { "epoch": 755.3947368421053, "grad_norm": 1.1371431350708008, "learning_rate": 0.0001, "loss": 0.0105, "step": 114820 }, { "epoch": 755.4605263157895, "grad_norm": 1.1556872129440308, "learning_rate": 0.0001, "loss": 0.0109, "step": 114830 }, { "epoch": 755.5263157894736, "grad_norm": 1.5574454069137573, "learning_rate": 0.0001, "loss": 0.0088, "step": 114840 }, { "epoch": 755.5921052631579, "grad_norm": 1.1535435914993286, "learning_rate": 0.0001, "loss": 0.0129, "step": 114850 }, { "epoch": 755.6578947368421, "grad_norm": 1.2058866024017334, "learning_rate": 0.0001, "loss": 0.0115, "step": 114860 }, { "epoch": 755.7236842105264, "grad_norm": 0.8264251947402954, "learning_rate": 0.0001, "loss": 0.0103, "step": 114870 }, { "epoch": 755.7894736842105, "grad_norm": 1.1852251291275024, "learning_rate": 0.0001, "loss": 0.0091, "step": 114880 }, { "epoch": 755.8552631578947, "grad_norm": 0.5516109466552734, "learning_rate": 0.0001, "loss": 0.0094, "step": 114890 }, { "epoch": 755.921052631579, "grad_norm": 0.8325181007385254, "learning_rate": 0.0001, "loss": 0.01, "step": 114900 }, { "epoch": 755.9868421052631, "grad_norm": 0.7445228695869446, "learning_rate": 0.0001, "loss": 0.0103, "step": 114910 }, { "epoch": 756.0526315789474, "grad_norm": 1.4654862880706787, "learning_rate": 0.0001, "loss": 0.0115, "step": 114920 }, { "epoch": 756.1184210526316, "grad_norm": 1.1833246946334839, "learning_rate": 0.0001, "loss": 0.0122, "step": 114930 }, { "epoch": 756.1842105263158, "grad_norm": 0.8253543972969055, "learning_rate": 0.0001, "loss": 0.0112, "step": 114940 }, { "epoch": 756.25, "grad_norm": 1.0825392007827759, "learning_rate": 0.0001, "loss": 0.0099, "step": 114950 }, { "epoch": 756.3157894736842, "grad_norm": 0.9824175834655762, "learning_rate": 0.0001, "loss": 0.0099, "step": 114960 }, { "epoch": 756.3815789473684, "grad_norm": 1.0814141035079956, "learning_rate": 0.0001, "loss": 0.0097, "step": 114970 }, { "epoch": 756.4473684210526, "grad_norm": 0.9042399525642395, "learning_rate": 0.0001, "loss": 0.009, "step": 114980 }, { "epoch": 756.5131578947369, "grad_norm": 1.2757545709609985, "learning_rate": 0.0001, "loss": 0.0118, "step": 114990 }, { "epoch": 756.578947368421, "grad_norm": 1.336796760559082, "learning_rate": 0.0001, "loss": 0.0096, "step": 115000 }, { "epoch": 756.6447368421053, "grad_norm": 1.2338722944259644, "learning_rate": 0.0001, "loss": 0.011, "step": 115010 }, { "epoch": 756.7105263157895, "grad_norm": 0.8805379271507263, "learning_rate": 0.0001, "loss": 0.01, "step": 115020 }, { "epoch": 756.7763157894736, "grad_norm": 1.1899020671844482, "learning_rate": 0.0001, "loss": 0.0103, "step": 115030 }, { "epoch": 756.8421052631579, "grad_norm": 1.0832873582839966, "learning_rate": 0.0001, "loss": 0.0113, "step": 115040 }, { "epoch": 756.9078947368421, "grad_norm": 0.9258546233177185, "learning_rate": 0.0001, "loss": 0.01, "step": 115050 }, { "epoch": 756.9736842105264, "grad_norm": 0.8678163290023804, "learning_rate": 0.0001, "loss": 0.0118, "step": 115060 }, { "epoch": 757.0394736842105, "grad_norm": 0.9894230365753174, "learning_rate": 0.0001, "loss": 0.0111, "step": 115070 }, { "epoch": 757.1052631578947, "grad_norm": 1.470314383506775, "learning_rate": 0.0001, "loss": 0.0095, "step": 115080 }, { "epoch": 757.171052631579, "grad_norm": 1.0584030151367188, "learning_rate": 0.0001, "loss": 0.0099, "step": 115090 }, { "epoch": 757.2368421052631, "grad_norm": 1.0429503917694092, "learning_rate": 0.0001, "loss": 0.0114, "step": 115100 }, { "epoch": 757.3026315789474, "grad_norm": 1.2286428213119507, "learning_rate": 0.0001, "loss": 0.0101, "step": 115110 }, { "epoch": 757.3684210526316, "grad_norm": 1.1292539834976196, "learning_rate": 0.0001, "loss": 0.0112, "step": 115120 }, { "epoch": 757.4342105263158, "grad_norm": 0.7512161135673523, "learning_rate": 0.0001, "loss": 0.0115, "step": 115130 }, { "epoch": 757.5, "grad_norm": 1.228721261024475, "learning_rate": 0.0001, "loss": 0.0099, "step": 115140 }, { "epoch": 757.5657894736842, "grad_norm": 1.2214051485061646, "learning_rate": 0.0001, "loss": 0.0097, "step": 115150 }, { "epoch": 757.6315789473684, "grad_norm": 1.1014398336410522, "learning_rate": 0.0001, "loss": 0.0111, "step": 115160 }, { "epoch": 757.6973684210526, "grad_norm": 0.9443309307098389, "learning_rate": 0.0001, "loss": 0.0105, "step": 115170 }, { "epoch": 757.7631578947369, "grad_norm": 0.7845607995986938, "learning_rate": 0.0001, "loss": 0.0096, "step": 115180 }, { "epoch": 757.828947368421, "grad_norm": 1.0893877744674683, "learning_rate": 0.0001, "loss": 0.0114, "step": 115190 }, { "epoch": 757.8947368421053, "grad_norm": 0.7762365937232971, "learning_rate": 0.0001, "loss": 0.0106, "step": 115200 }, { "epoch": 757.9605263157895, "grad_norm": 0.9474076628684998, "learning_rate": 0.0001, "loss": 0.0117, "step": 115210 }, { "epoch": 758.0263157894736, "grad_norm": 1.041471242904663, "learning_rate": 0.0001, "loss": 0.0095, "step": 115220 }, { "epoch": 758.0921052631579, "grad_norm": 1.2159472703933716, "learning_rate": 0.0001, "loss": 0.0106, "step": 115230 }, { "epoch": 758.1578947368421, "grad_norm": 1.0471974611282349, "learning_rate": 0.0001, "loss": 0.0101, "step": 115240 }, { "epoch": 758.2236842105264, "grad_norm": 0.8706479072570801, "learning_rate": 0.0001, "loss": 0.0124, "step": 115250 }, { "epoch": 758.2894736842105, "grad_norm": 0.847398042678833, "learning_rate": 0.0001, "loss": 0.0115, "step": 115260 }, { "epoch": 758.3552631578947, "grad_norm": 1.139387845993042, "learning_rate": 0.0001, "loss": 0.0101, "step": 115270 }, { "epoch": 758.421052631579, "grad_norm": 1.2678453922271729, "learning_rate": 0.0001, "loss": 0.0099, "step": 115280 }, { "epoch": 758.4868421052631, "grad_norm": 1.1725306510925293, "learning_rate": 0.0001, "loss": 0.0102, "step": 115290 }, { "epoch": 758.5526315789474, "grad_norm": 1.2793757915496826, "learning_rate": 0.0001, "loss": 0.0117, "step": 115300 }, { "epoch": 758.6184210526316, "grad_norm": 1.028630018234253, "learning_rate": 0.0001, "loss": 0.0131, "step": 115310 }, { "epoch": 758.6842105263158, "grad_norm": 1.1012976169586182, "learning_rate": 0.0001, "loss": 0.0098, "step": 115320 }, { "epoch": 758.75, "grad_norm": 0.8987807035446167, "learning_rate": 0.0001, "loss": 0.0083, "step": 115330 }, { "epoch": 758.8157894736842, "grad_norm": 1.493779182434082, "learning_rate": 0.0001, "loss": 0.0097, "step": 115340 }, { "epoch": 758.8815789473684, "grad_norm": 1.4419828653335571, "learning_rate": 0.0001, "loss": 0.0128, "step": 115350 }, { "epoch": 758.9473684210526, "grad_norm": 1.4344781637191772, "learning_rate": 0.0001, "loss": 0.0094, "step": 115360 }, { "epoch": 759.0131578947369, "grad_norm": 0.9399645924568176, "learning_rate": 0.0001, "loss": 0.0101, "step": 115370 }, { "epoch": 759.078947368421, "grad_norm": 0.8678160309791565, "learning_rate": 0.0001, "loss": 0.0112, "step": 115380 }, { "epoch": 759.1447368421053, "grad_norm": 1.2153128385543823, "learning_rate": 0.0001, "loss": 0.0094, "step": 115390 }, { "epoch": 759.2105263157895, "grad_norm": 1.31186044216156, "learning_rate": 0.0001, "loss": 0.0115, "step": 115400 }, { "epoch": 759.2763157894736, "grad_norm": 1.637511968612671, "learning_rate": 0.0001, "loss": 0.0098, "step": 115410 }, { "epoch": 759.3421052631579, "grad_norm": 1.3562475442886353, "learning_rate": 0.0001, "loss": 0.0089, "step": 115420 }, { "epoch": 759.4078947368421, "grad_norm": 1.192093014717102, "learning_rate": 0.0001, "loss": 0.0099, "step": 115430 }, { "epoch": 759.4736842105264, "grad_norm": 1.5335882902145386, "learning_rate": 0.0001, "loss": 0.011, "step": 115440 }, { "epoch": 759.5394736842105, "grad_norm": 1.1234040260314941, "learning_rate": 0.0001, "loss": 0.0101, "step": 115450 }, { "epoch": 759.6052631578947, "grad_norm": 1.1577470302581787, "learning_rate": 0.0001, "loss": 0.0114, "step": 115460 }, { "epoch": 759.671052631579, "grad_norm": 1.1491925716400146, "learning_rate": 0.0001, "loss": 0.01, "step": 115470 }, { "epoch": 759.7368421052631, "grad_norm": 1.103284239768982, "learning_rate": 0.0001, "loss": 0.0086, "step": 115480 }, { "epoch": 759.8026315789474, "grad_norm": 1.1908904314041138, "learning_rate": 0.0001, "loss": 0.0118, "step": 115490 }, { "epoch": 759.8684210526316, "grad_norm": 1.180321216583252, "learning_rate": 0.0001, "loss": 0.0106, "step": 115500 }, { "epoch": 759.9342105263158, "grad_norm": 1.2648427486419678, "learning_rate": 0.0001, "loss": 0.0092, "step": 115510 }, { "epoch": 760.0, "grad_norm": 1.4859260320663452, "learning_rate": 0.0001, "loss": 0.0104, "step": 115520 }, { "epoch": 760.0657894736842, "grad_norm": 1.3933957815170288, "learning_rate": 0.0001, "loss": 0.0094, "step": 115530 }, { "epoch": 760.1315789473684, "grad_norm": 1.4522415399551392, "learning_rate": 0.0001, "loss": 0.0102, "step": 115540 }, { "epoch": 760.1973684210526, "grad_norm": 1.2445298433303833, "learning_rate": 0.0001, "loss": 0.0122, "step": 115550 }, { "epoch": 760.2631578947369, "grad_norm": 1.222784399986267, "learning_rate": 0.0001, "loss": 0.0104, "step": 115560 }, { "epoch": 760.328947368421, "grad_norm": 0.9933916330337524, "learning_rate": 0.0001, "loss": 0.0085, "step": 115570 }, { "epoch": 760.3947368421053, "grad_norm": 1.02178955078125, "learning_rate": 0.0001, "loss": 0.0104, "step": 115580 }, { "epoch": 760.4605263157895, "grad_norm": 1.0598787069320679, "learning_rate": 0.0001, "loss": 0.0097, "step": 115590 }, { "epoch": 760.5263157894736, "grad_norm": 1.264055609703064, "learning_rate": 0.0001, "loss": 0.009, "step": 115600 }, { "epoch": 760.5921052631579, "grad_norm": 1.1147860288619995, "learning_rate": 0.0001, "loss": 0.0089, "step": 115610 }, { "epoch": 760.6578947368421, "grad_norm": 1.159653902053833, "learning_rate": 0.0001, "loss": 0.0089, "step": 115620 }, { "epoch": 760.7236842105264, "grad_norm": 0.6607643961906433, "learning_rate": 0.0001, "loss": 0.0103, "step": 115630 }, { "epoch": 760.7894736842105, "grad_norm": 0.9183317422866821, "learning_rate": 0.0001, "loss": 0.0099, "step": 115640 }, { "epoch": 760.8552631578947, "grad_norm": 1.034013032913208, "learning_rate": 0.0001, "loss": 0.01, "step": 115650 }, { "epoch": 760.921052631579, "grad_norm": 0.9568067789077759, "learning_rate": 0.0001, "loss": 0.0119, "step": 115660 }, { "epoch": 760.9868421052631, "grad_norm": 1.138213872909546, "learning_rate": 0.0001, "loss": 0.0099, "step": 115670 }, { "epoch": 761.0526315789474, "grad_norm": 1.0073471069335938, "learning_rate": 0.0001, "loss": 0.0094, "step": 115680 }, { "epoch": 761.1184210526316, "grad_norm": 1.5636231899261475, "learning_rate": 0.0001, "loss": 0.01, "step": 115690 }, { "epoch": 761.1842105263158, "grad_norm": 1.0607483386993408, "learning_rate": 0.0001, "loss": 0.0129, "step": 115700 }, { "epoch": 761.25, "grad_norm": 1.3943238258361816, "learning_rate": 0.0001, "loss": 0.0099, "step": 115710 }, { "epoch": 761.3157894736842, "grad_norm": 0.7510659694671631, "learning_rate": 0.0001, "loss": 0.0098, "step": 115720 }, { "epoch": 761.3815789473684, "grad_norm": 1.033464789390564, "learning_rate": 0.0001, "loss": 0.0106, "step": 115730 }, { "epoch": 761.4473684210526, "grad_norm": 1.1388704776763916, "learning_rate": 0.0001, "loss": 0.0087, "step": 115740 }, { "epoch": 761.5131578947369, "grad_norm": 1.0201410055160522, "learning_rate": 0.0001, "loss": 0.0116, "step": 115750 }, { "epoch": 761.578947368421, "grad_norm": 0.690221905708313, "learning_rate": 0.0001, "loss": 0.0097, "step": 115760 }, { "epoch": 761.6447368421053, "grad_norm": 0.7212555408477783, "learning_rate": 0.0001, "loss": 0.0092, "step": 115770 }, { "epoch": 761.7105263157895, "grad_norm": 1.3436615467071533, "learning_rate": 0.0001, "loss": 0.0096, "step": 115780 }, { "epoch": 761.7763157894736, "grad_norm": 1.2216511964797974, "learning_rate": 0.0001, "loss": 0.0098, "step": 115790 }, { "epoch": 761.8421052631579, "grad_norm": 1.0771437883377075, "learning_rate": 0.0001, "loss": 0.0096, "step": 115800 }, { "epoch": 761.9078947368421, "grad_norm": 1.3642146587371826, "learning_rate": 0.0001, "loss": 0.0082, "step": 115810 }, { "epoch": 761.9736842105264, "grad_norm": 1.320507287979126, "learning_rate": 0.0001, "loss": 0.013, "step": 115820 }, { "epoch": 762.0394736842105, "grad_norm": 0.8073870539665222, "learning_rate": 0.0001, "loss": 0.0085, "step": 115830 }, { "epoch": 762.1052631578947, "grad_norm": 1.161726951599121, "learning_rate": 0.0001, "loss": 0.0104, "step": 115840 }, { "epoch": 762.171052631579, "grad_norm": 1.1861587762832642, "learning_rate": 0.0001, "loss": 0.0086, "step": 115850 }, { "epoch": 762.2368421052631, "grad_norm": 0.9289435744285583, "learning_rate": 0.0001, "loss": 0.0097, "step": 115860 }, { "epoch": 762.3026315789474, "grad_norm": 1.0481675863265991, "learning_rate": 0.0001, "loss": 0.0098, "step": 115870 }, { "epoch": 762.3684210526316, "grad_norm": 1.2852026224136353, "learning_rate": 0.0001, "loss": 0.0105, "step": 115880 }, { "epoch": 762.4342105263158, "grad_norm": 1.4661617279052734, "learning_rate": 0.0001, "loss": 0.0132, "step": 115890 }, { "epoch": 762.5, "grad_norm": 1.2878676652908325, "learning_rate": 0.0001, "loss": 0.011, "step": 115900 }, { "epoch": 762.5657894736842, "grad_norm": 1.1841388940811157, "learning_rate": 0.0001, "loss": 0.009, "step": 115910 }, { "epoch": 762.6315789473684, "grad_norm": 1.1691356897354126, "learning_rate": 0.0001, "loss": 0.0095, "step": 115920 }, { "epoch": 762.6973684210526, "grad_norm": 1.0297003984451294, "learning_rate": 0.0001, "loss": 0.0116, "step": 115930 }, { "epoch": 762.7631578947369, "grad_norm": 1.1073315143585205, "learning_rate": 0.0001, "loss": 0.0088, "step": 115940 }, { "epoch": 762.828947368421, "grad_norm": 1.3465665578842163, "learning_rate": 0.0001, "loss": 0.0093, "step": 115950 }, { "epoch": 762.8947368421053, "grad_norm": 1.0311391353607178, "learning_rate": 0.0001, "loss": 0.0095, "step": 115960 }, { "epoch": 762.9605263157895, "grad_norm": 0.8130193948745728, "learning_rate": 0.0001, "loss": 0.0112, "step": 115970 }, { "epoch": 763.0263157894736, "grad_norm": 0.788371741771698, "learning_rate": 0.0001, "loss": 0.0099, "step": 115980 }, { "epoch": 763.0921052631579, "grad_norm": 1.0673997402191162, "learning_rate": 0.0001, "loss": 0.0097, "step": 115990 }, { "epoch": 763.1578947368421, "grad_norm": 0.967298150062561, "learning_rate": 0.0001, "loss": 0.0089, "step": 116000 }, { "epoch": 763.2236842105264, "grad_norm": 0.8940585255622864, "learning_rate": 0.0001, "loss": 0.0108, "step": 116010 }, { "epoch": 763.2894736842105, "grad_norm": 0.8840467929840088, "learning_rate": 0.0001, "loss": 0.0098, "step": 116020 }, { "epoch": 763.3552631578947, "grad_norm": 1.0176132917404175, "learning_rate": 0.0001, "loss": 0.0101, "step": 116030 }, { "epoch": 763.421052631579, "grad_norm": 1.144092321395874, "learning_rate": 0.0001, "loss": 0.0105, "step": 116040 }, { "epoch": 763.4868421052631, "grad_norm": 1.1976016759872437, "learning_rate": 0.0001, "loss": 0.0089, "step": 116050 }, { "epoch": 763.5526315789474, "grad_norm": 1.0607638359069824, "learning_rate": 0.0001, "loss": 0.0105, "step": 116060 }, { "epoch": 763.6184210526316, "grad_norm": 1.0160369873046875, "learning_rate": 0.0001, "loss": 0.0099, "step": 116070 }, { "epoch": 763.6842105263158, "grad_norm": 0.8879520297050476, "learning_rate": 0.0001, "loss": 0.0102, "step": 116080 }, { "epoch": 763.75, "grad_norm": 1.279135823249817, "learning_rate": 0.0001, "loss": 0.0099, "step": 116090 }, { "epoch": 763.8157894736842, "grad_norm": 0.8777012228965759, "learning_rate": 0.0001, "loss": 0.011, "step": 116100 }, { "epoch": 763.8815789473684, "grad_norm": 1.0412824153900146, "learning_rate": 0.0001, "loss": 0.0106, "step": 116110 }, { "epoch": 763.9473684210526, "grad_norm": 1.1568728685379028, "learning_rate": 0.0001, "loss": 0.0094, "step": 116120 }, { "epoch": 764.0131578947369, "grad_norm": 0.8233476281166077, "learning_rate": 0.0001, "loss": 0.0111, "step": 116130 }, { "epoch": 764.078947368421, "grad_norm": 0.7248982191085815, "learning_rate": 0.0001, "loss": 0.0118, "step": 116140 }, { "epoch": 764.1447368421053, "grad_norm": 0.712303876876831, "learning_rate": 0.0001, "loss": 0.0096, "step": 116150 }, { "epoch": 764.2105263157895, "grad_norm": 1.0194206237792969, "learning_rate": 0.0001, "loss": 0.0095, "step": 116160 }, { "epoch": 764.2763157894736, "grad_norm": 1.2041058540344238, "learning_rate": 0.0001, "loss": 0.0092, "step": 116170 }, { "epoch": 764.3421052631579, "grad_norm": 0.784016489982605, "learning_rate": 0.0001, "loss": 0.0114, "step": 116180 }, { "epoch": 764.4078947368421, "grad_norm": 1.4073185920715332, "learning_rate": 0.0001, "loss": 0.01, "step": 116190 }, { "epoch": 764.4736842105264, "grad_norm": 1.0075128078460693, "learning_rate": 0.0001, "loss": 0.0095, "step": 116200 }, { "epoch": 764.5394736842105, "grad_norm": 0.6819977164268494, "learning_rate": 0.0001, "loss": 0.0129, "step": 116210 }, { "epoch": 764.6052631578947, "grad_norm": 1.2221415042877197, "learning_rate": 0.0001, "loss": 0.0104, "step": 116220 }, { "epoch": 764.671052631579, "grad_norm": 1.6122931241989136, "learning_rate": 0.0001, "loss": 0.0096, "step": 116230 }, { "epoch": 764.7368421052631, "grad_norm": 1.30339515209198, "learning_rate": 0.0001, "loss": 0.0102, "step": 116240 }, { "epoch": 764.8026315789474, "grad_norm": 1.2761629819869995, "learning_rate": 0.0001, "loss": 0.0112, "step": 116250 }, { "epoch": 764.8684210526316, "grad_norm": 1.287692904472351, "learning_rate": 0.0001, "loss": 0.009, "step": 116260 }, { "epoch": 764.9342105263158, "grad_norm": 0.9369876384735107, "learning_rate": 0.0001, "loss": 0.0092, "step": 116270 }, { "epoch": 765.0, "grad_norm": 0.9917391538619995, "learning_rate": 0.0001, "loss": 0.0089, "step": 116280 }, { "epoch": 765.0657894736842, "grad_norm": 0.8295028805732727, "learning_rate": 0.0001, "loss": 0.0114, "step": 116290 }, { "epoch": 765.1315789473684, "grad_norm": 1.1534699201583862, "learning_rate": 0.0001, "loss": 0.0104, "step": 116300 }, { "epoch": 765.1973684210526, "grad_norm": 0.8612220883369446, "learning_rate": 0.0001, "loss": 0.01, "step": 116310 }, { "epoch": 765.2631578947369, "grad_norm": 1.1963870525360107, "learning_rate": 0.0001, "loss": 0.0116, "step": 116320 }, { "epoch": 765.328947368421, "grad_norm": 1.3065626621246338, "learning_rate": 0.0001, "loss": 0.0088, "step": 116330 }, { "epoch": 765.3947368421053, "grad_norm": 0.979164719581604, "learning_rate": 0.0001, "loss": 0.0088, "step": 116340 }, { "epoch": 765.4605263157895, "grad_norm": 0.9245379567146301, "learning_rate": 0.0001, "loss": 0.0104, "step": 116350 }, { "epoch": 765.5263157894736, "grad_norm": 0.8577090501785278, "learning_rate": 0.0001, "loss": 0.0087, "step": 116360 }, { "epoch": 765.5921052631579, "grad_norm": 0.9338501691818237, "learning_rate": 0.0001, "loss": 0.0091, "step": 116370 }, { "epoch": 765.6578947368421, "grad_norm": 1.0019291639328003, "learning_rate": 0.0001, "loss": 0.0101, "step": 116380 }, { "epoch": 765.7236842105264, "grad_norm": 1.2914475202560425, "learning_rate": 0.0001, "loss": 0.0127, "step": 116390 }, { "epoch": 765.7894736842105, "grad_norm": 1.1972627639770508, "learning_rate": 0.0001, "loss": 0.0094, "step": 116400 }, { "epoch": 765.8552631578947, "grad_norm": 1.0993999242782593, "learning_rate": 0.0001, "loss": 0.0095, "step": 116410 }, { "epoch": 765.921052631579, "grad_norm": 0.9448077082633972, "learning_rate": 0.0001, "loss": 0.01, "step": 116420 }, { "epoch": 765.9868421052631, "grad_norm": 0.893061637878418, "learning_rate": 0.0001, "loss": 0.0114, "step": 116430 }, { "epoch": 766.0526315789474, "grad_norm": 1.3330209255218506, "learning_rate": 0.0001, "loss": 0.0099, "step": 116440 }, { "epoch": 766.1184210526316, "grad_norm": 1.363829255104065, "learning_rate": 0.0001, "loss": 0.0114, "step": 116450 }, { "epoch": 766.1842105263158, "grad_norm": 1.1652305126190186, "learning_rate": 0.0001, "loss": 0.0109, "step": 116460 }, { "epoch": 766.25, "grad_norm": 1.1729694604873657, "learning_rate": 0.0001, "loss": 0.0103, "step": 116470 }, { "epoch": 766.3157894736842, "grad_norm": 0.8965724110603333, "learning_rate": 0.0001, "loss": 0.009, "step": 116480 }, { "epoch": 766.3815789473684, "grad_norm": 0.8969491720199585, "learning_rate": 0.0001, "loss": 0.0104, "step": 116490 }, { "epoch": 766.4473684210526, "grad_norm": 1.0770326852798462, "learning_rate": 0.0001, "loss": 0.0105, "step": 116500 }, { "epoch": 766.5131578947369, "grad_norm": 0.9563101530075073, "learning_rate": 0.0001, "loss": 0.0109, "step": 116510 }, { "epoch": 766.578947368421, "grad_norm": 1.18818199634552, "learning_rate": 0.0001, "loss": 0.01, "step": 116520 }, { "epoch": 766.6447368421053, "grad_norm": 0.7398898005485535, "learning_rate": 0.0001, "loss": 0.0097, "step": 116530 }, { "epoch": 766.7105263157895, "grad_norm": 0.6611107587814331, "learning_rate": 0.0001, "loss": 0.009, "step": 116540 }, { "epoch": 766.7763157894736, "grad_norm": 0.8535279631614685, "learning_rate": 0.0001, "loss": 0.0104, "step": 116550 }, { "epoch": 766.8421052631579, "grad_norm": 1.0214197635650635, "learning_rate": 0.0001, "loss": 0.01, "step": 116560 }, { "epoch": 766.9078947368421, "grad_norm": 1.0022668838500977, "learning_rate": 0.0001, "loss": 0.0104, "step": 116570 }, { "epoch": 766.9736842105264, "grad_norm": 1.2458581924438477, "learning_rate": 0.0001, "loss": 0.0109, "step": 116580 }, { "epoch": 767.0394736842105, "grad_norm": 1.3370084762573242, "learning_rate": 0.0001, "loss": 0.0111, "step": 116590 }, { "epoch": 767.1052631578947, "grad_norm": 0.9627024531364441, "learning_rate": 0.0001, "loss": 0.0107, "step": 116600 }, { "epoch": 767.171052631579, "grad_norm": 1.2679235935211182, "learning_rate": 0.0001, "loss": 0.011, "step": 116610 }, { "epoch": 767.2368421052631, "grad_norm": 0.9105582237243652, "learning_rate": 0.0001, "loss": 0.0112, "step": 116620 }, { "epoch": 767.3026315789474, "grad_norm": 0.9305436015129089, "learning_rate": 0.0001, "loss": 0.0089, "step": 116630 }, { "epoch": 767.3684210526316, "grad_norm": 0.8848668336868286, "learning_rate": 0.0001, "loss": 0.0097, "step": 116640 }, { "epoch": 767.4342105263158, "grad_norm": 1.0530251264572144, "learning_rate": 0.0001, "loss": 0.009, "step": 116650 }, { "epoch": 767.5, "grad_norm": 0.8415973782539368, "learning_rate": 0.0001, "loss": 0.0115, "step": 116660 }, { "epoch": 767.5657894736842, "grad_norm": 1.0781415700912476, "learning_rate": 0.0001, "loss": 0.0095, "step": 116670 }, { "epoch": 767.6315789473684, "grad_norm": 0.8499521017074585, "learning_rate": 0.0001, "loss": 0.0093, "step": 116680 }, { "epoch": 767.6973684210526, "grad_norm": 1.271653175354004, "learning_rate": 0.0001, "loss": 0.0099, "step": 116690 }, { "epoch": 767.7631578947369, "grad_norm": 1.3526909351348877, "learning_rate": 0.0001, "loss": 0.0105, "step": 116700 }, { "epoch": 767.828947368421, "grad_norm": 1.1030946969985962, "learning_rate": 0.0001, "loss": 0.012, "step": 116710 }, { "epoch": 767.8947368421053, "grad_norm": 1.2905566692352295, "learning_rate": 0.0001, "loss": 0.0095, "step": 116720 }, { "epoch": 767.9605263157895, "grad_norm": 1.2587246894836426, "learning_rate": 0.0001, "loss": 0.0095, "step": 116730 }, { "epoch": 768.0263157894736, "grad_norm": 1.263112187385559, "learning_rate": 0.0001, "loss": 0.0106, "step": 116740 }, { "epoch": 768.0921052631579, "grad_norm": 1.2640330791473389, "learning_rate": 0.0001, "loss": 0.0105, "step": 116750 }, { "epoch": 768.1578947368421, "grad_norm": 0.9966381788253784, "learning_rate": 0.0001, "loss": 0.0094, "step": 116760 }, { "epoch": 768.2236842105264, "grad_norm": 1.5357413291931152, "learning_rate": 0.0001, "loss": 0.0089, "step": 116770 }, { "epoch": 768.2894736842105, "grad_norm": 1.3589855432510376, "learning_rate": 0.0001, "loss": 0.0116, "step": 116780 }, { "epoch": 768.3552631578947, "grad_norm": 1.3303495645523071, "learning_rate": 0.0001, "loss": 0.01, "step": 116790 }, { "epoch": 768.421052631579, "grad_norm": 1.531052827835083, "learning_rate": 0.0001, "loss": 0.0083, "step": 116800 }, { "epoch": 768.4868421052631, "grad_norm": 1.2647902965545654, "learning_rate": 0.0001, "loss": 0.0093, "step": 116810 }, { "epoch": 768.5526315789474, "grad_norm": 1.2003931999206543, "learning_rate": 0.0001, "loss": 0.0082, "step": 116820 }, { "epoch": 768.6184210526316, "grad_norm": 1.2271924018859863, "learning_rate": 0.0001, "loss": 0.0103, "step": 116830 }, { "epoch": 768.6842105263158, "grad_norm": 0.8011854887008667, "learning_rate": 0.0001, "loss": 0.0092, "step": 116840 }, { "epoch": 768.75, "grad_norm": 1.0051759481430054, "learning_rate": 0.0001, "loss": 0.0102, "step": 116850 }, { "epoch": 768.8157894736842, "grad_norm": 1.0065410137176514, "learning_rate": 0.0001, "loss": 0.0105, "step": 116860 }, { "epoch": 768.8815789473684, "grad_norm": 1.094523310661316, "learning_rate": 0.0001, "loss": 0.0105, "step": 116870 }, { "epoch": 768.9473684210526, "grad_norm": 0.9108685255050659, "learning_rate": 0.0001, "loss": 0.01, "step": 116880 }, { "epoch": 769.0131578947369, "grad_norm": 1.0537774562835693, "learning_rate": 0.0001, "loss": 0.0127, "step": 116890 }, { "epoch": 769.078947368421, "grad_norm": 0.8847028613090515, "learning_rate": 0.0001, "loss": 0.0088, "step": 116900 }, { "epoch": 769.1447368421053, "grad_norm": 0.8502101302146912, "learning_rate": 0.0001, "loss": 0.0104, "step": 116910 }, { "epoch": 769.2105263157895, "grad_norm": 1.3079115152359009, "learning_rate": 0.0001, "loss": 0.0108, "step": 116920 }, { "epoch": 769.2763157894736, "grad_norm": 0.997676432132721, "learning_rate": 0.0001, "loss": 0.0112, "step": 116930 }, { "epoch": 769.3421052631579, "grad_norm": 0.9060066342353821, "learning_rate": 0.0001, "loss": 0.0102, "step": 116940 }, { "epoch": 769.4078947368421, "grad_norm": 0.9685508012771606, "learning_rate": 0.0001, "loss": 0.0096, "step": 116950 }, { "epoch": 769.4736842105264, "grad_norm": 1.053354024887085, "learning_rate": 0.0001, "loss": 0.0103, "step": 116960 }, { "epoch": 769.5394736842105, "grad_norm": 0.901861310005188, "learning_rate": 0.0001, "loss": 0.0101, "step": 116970 }, { "epoch": 769.6052631578947, "grad_norm": 1.1374804973602295, "learning_rate": 0.0001, "loss": 0.0096, "step": 116980 }, { "epoch": 769.671052631579, "grad_norm": 1.060011386871338, "learning_rate": 0.0001, "loss": 0.0115, "step": 116990 }, { "epoch": 769.7368421052631, "grad_norm": 0.8617300391197205, "learning_rate": 0.0001, "loss": 0.0088, "step": 117000 }, { "epoch": 769.8026315789474, "grad_norm": 0.7569366097450256, "learning_rate": 0.0001, "loss": 0.0098, "step": 117010 }, { "epoch": 769.8684210526316, "grad_norm": 0.9865979552268982, "learning_rate": 0.0001, "loss": 0.0116, "step": 117020 }, { "epoch": 769.9342105263158, "grad_norm": 0.6194046139717102, "learning_rate": 0.0001, "loss": 0.0102, "step": 117030 }, { "epoch": 770.0, "grad_norm": 1.2223562002182007, "learning_rate": 0.0001, "loss": 0.0102, "step": 117040 }, { "epoch": 770.0657894736842, "grad_norm": 0.9539296627044678, "learning_rate": 0.0001, "loss": 0.0101, "step": 117050 }, { "epoch": 770.1315789473684, "grad_norm": 0.7826547622680664, "learning_rate": 0.0001, "loss": 0.0119, "step": 117060 }, { "epoch": 770.1973684210526, "grad_norm": 1.4138188362121582, "learning_rate": 0.0001, "loss": 0.0106, "step": 117070 }, { "epoch": 770.2631578947369, "grad_norm": 1.2407159805297852, "learning_rate": 0.0001, "loss": 0.0104, "step": 117080 }, { "epoch": 770.328947368421, "grad_norm": 1.1824477910995483, "learning_rate": 0.0001, "loss": 0.011, "step": 117090 }, { "epoch": 770.3947368421053, "grad_norm": 0.9199476838111877, "learning_rate": 0.0001, "loss": 0.0099, "step": 117100 }, { "epoch": 770.4605263157895, "grad_norm": 0.7619976997375488, "learning_rate": 0.0001, "loss": 0.009, "step": 117110 }, { "epoch": 770.5263157894736, "grad_norm": 0.8713205456733704, "learning_rate": 0.0001, "loss": 0.0117, "step": 117120 }, { "epoch": 770.5921052631579, "grad_norm": 0.819759726524353, "learning_rate": 0.0001, "loss": 0.0104, "step": 117130 }, { "epoch": 770.6578947368421, "grad_norm": 1.4600467681884766, "learning_rate": 0.0001, "loss": 0.0108, "step": 117140 }, { "epoch": 770.7236842105264, "grad_norm": 0.7182674407958984, "learning_rate": 0.0001, "loss": 0.0097, "step": 117150 }, { "epoch": 770.7894736842105, "grad_norm": 1.2647074460983276, "learning_rate": 0.0001, "loss": 0.0111, "step": 117160 }, { "epoch": 770.8552631578947, "grad_norm": 1.4599173069000244, "learning_rate": 0.0001, "loss": 0.0116, "step": 117170 }, { "epoch": 770.921052631579, "grad_norm": 1.161414384841919, "learning_rate": 0.0001, "loss": 0.0087, "step": 117180 }, { "epoch": 770.9868421052631, "grad_norm": 0.9526516199111938, "learning_rate": 0.0001, "loss": 0.0093, "step": 117190 }, { "epoch": 771.0526315789474, "grad_norm": 1.2501287460327148, "learning_rate": 0.0001, "loss": 0.012, "step": 117200 }, { "epoch": 771.1184210526316, "grad_norm": 1.2030541896820068, "learning_rate": 0.0001, "loss": 0.0094, "step": 117210 }, { "epoch": 771.1842105263158, "grad_norm": 1.1028085947036743, "learning_rate": 0.0001, "loss": 0.0114, "step": 117220 }, { "epoch": 771.25, "grad_norm": 1.2376686334609985, "learning_rate": 0.0001, "loss": 0.0118, "step": 117230 }, { "epoch": 771.3157894736842, "grad_norm": 1.1704808473587036, "learning_rate": 0.0001, "loss": 0.0122, "step": 117240 }, { "epoch": 771.3815789473684, "grad_norm": 0.899161159992218, "learning_rate": 0.0001, "loss": 0.0096, "step": 117250 }, { "epoch": 771.4473684210526, "grad_norm": 1.2865201234817505, "learning_rate": 0.0001, "loss": 0.0095, "step": 117260 }, { "epoch": 771.5131578947369, "grad_norm": 1.0590287446975708, "learning_rate": 0.0001, "loss": 0.0088, "step": 117270 }, { "epoch": 771.578947368421, "grad_norm": 0.7633146047592163, "learning_rate": 0.0001, "loss": 0.0104, "step": 117280 }, { "epoch": 771.6447368421053, "grad_norm": 0.6437667012214661, "learning_rate": 0.0001, "loss": 0.0096, "step": 117290 }, { "epoch": 771.7105263157895, "grad_norm": 1.379522442817688, "learning_rate": 0.0001, "loss": 0.0104, "step": 117300 }, { "epoch": 771.7763157894736, "grad_norm": 1.1769373416900635, "learning_rate": 0.0001, "loss": 0.0102, "step": 117310 }, { "epoch": 771.8421052631579, "grad_norm": 1.229821801185608, "learning_rate": 0.0001, "loss": 0.0107, "step": 117320 }, { "epoch": 771.9078947368421, "grad_norm": 0.8484160304069519, "learning_rate": 0.0001, "loss": 0.0082, "step": 117330 }, { "epoch": 771.9736842105264, "grad_norm": 0.7620605230331421, "learning_rate": 0.0001, "loss": 0.0109, "step": 117340 }, { "epoch": 772.0394736842105, "grad_norm": 1.064082384109497, "learning_rate": 0.0001, "loss": 0.0124, "step": 117350 }, { "epoch": 772.1052631578947, "grad_norm": 0.9610821604728699, "learning_rate": 0.0001, "loss": 0.01, "step": 117360 }, { "epoch": 772.171052631579, "grad_norm": 1.0888431072235107, "learning_rate": 0.0001, "loss": 0.0103, "step": 117370 }, { "epoch": 772.2368421052631, "grad_norm": 0.9147794842720032, "learning_rate": 0.0001, "loss": 0.0088, "step": 117380 }, { "epoch": 772.3026315789474, "grad_norm": 0.930996298789978, "learning_rate": 0.0001, "loss": 0.0096, "step": 117390 }, { "epoch": 772.3684210526316, "grad_norm": 0.9899475574493408, "learning_rate": 0.0001, "loss": 0.0116, "step": 117400 }, { "epoch": 772.4342105263158, "grad_norm": 1.1888105869293213, "learning_rate": 0.0001, "loss": 0.0086, "step": 117410 }, { "epoch": 772.5, "grad_norm": 1.1909250020980835, "learning_rate": 0.0001, "loss": 0.0116, "step": 117420 }, { "epoch": 772.5657894736842, "grad_norm": 1.144278645515442, "learning_rate": 0.0001, "loss": 0.012, "step": 117430 }, { "epoch": 772.6315789473684, "grad_norm": 1.2057489156723022, "learning_rate": 0.0001, "loss": 0.0089, "step": 117440 }, { "epoch": 772.6973684210526, "grad_norm": 1.240365982055664, "learning_rate": 0.0001, "loss": 0.0112, "step": 117450 }, { "epoch": 772.7631578947369, "grad_norm": 1.1910370588302612, "learning_rate": 0.0001, "loss": 0.0095, "step": 117460 }, { "epoch": 772.828947368421, "grad_norm": 1.0955934524536133, "learning_rate": 0.0001, "loss": 0.0105, "step": 117470 }, { "epoch": 772.8947368421053, "grad_norm": 1.1824718713760376, "learning_rate": 0.0001, "loss": 0.0088, "step": 117480 }, { "epoch": 772.9605263157895, "grad_norm": 1.323282241821289, "learning_rate": 0.0001, "loss": 0.0098, "step": 117490 }, { "epoch": 773.0263157894736, "grad_norm": 0.9789089560508728, "learning_rate": 0.0001, "loss": 0.0096, "step": 117500 }, { "epoch": 773.0921052631579, "grad_norm": 1.0450570583343506, "learning_rate": 0.0001, "loss": 0.0117, "step": 117510 }, { "epoch": 773.1578947368421, "grad_norm": 1.3305141925811768, "learning_rate": 0.0001, "loss": 0.0105, "step": 117520 }, { "epoch": 773.2236842105264, "grad_norm": 1.228081226348877, "learning_rate": 0.0001, "loss": 0.0097, "step": 117530 }, { "epoch": 773.2894736842105, "grad_norm": 1.1966365575790405, "learning_rate": 0.0001, "loss": 0.0093, "step": 117540 }, { "epoch": 773.3552631578947, "grad_norm": 1.154728651046753, "learning_rate": 0.0001, "loss": 0.0093, "step": 117550 }, { "epoch": 773.421052631579, "grad_norm": 1.119430422782898, "learning_rate": 0.0001, "loss": 0.0107, "step": 117560 }, { "epoch": 773.4868421052631, "grad_norm": 1.3745265007019043, "learning_rate": 0.0001, "loss": 0.0109, "step": 117570 }, { "epoch": 773.5526315789474, "grad_norm": 1.3830089569091797, "learning_rate": 0.0001, "loss": 0.0082, "step": 117580 }, { "epoch": 773.6184210526316, "grad_norm": 1.2149348258972168, "learning_rate": 0.0001, "loss": 0.0104, "step": 117590 }, { "epoch": 773.6842105263158, "grad_norm": 1.084110975265503, "learning_rate": 0.0001, "loss": 0.011, "step": 117600 }, { "epoch": 773.75, "grad_norm": 1.2367154359817505, "learning_rate": 0.0001, "loss": 0.0107, "step": 117610 }, { "epoch": 773.8157894736842, "grad_norm": 1.1649274826049805, "learning_rate": 0.0001, "loss": 0.0096, "step": 117620 }, { "epoch": 773.8815789473684, "grad_norm": 1.4875725507736206, "learning_rate": 0.0001, "loss": 0.0108, "step": 117630 }, { "epoch": 773.9473684210526, "grad_norm": 0.9839734435081482, "learning_rate": 0.0001, "loss": 0.009, "step": 117640 }, { "epoch": 774.0131578947369, "grad_norm": 1.041127324104309, "learning_rate": 0.0001, "loss": 0.0108, "step": 117650 }, { "epoch": 774.078947368421, "grad_norm": 1.2891390323638916, "learning_rate": 0.0001, "loss": 0.0088, "step": 117660 }, { "epoch": 774.1447368421053, "grad_norm": 1.126435399055481, "learning_rate": 0.0001, "loss": 0.0103, "step": 117670 }, { "epoch": 774.2105263157895, "grad_norm": 1.295599341392517, "learning_rate": 0.0001, "loss": 0.0088, "step": 117680 }, { "epoch": 774.2763157894736, "grad_norm": 1.0001301765441895, "learning_rate": 0.0001, "loss": 0.0098, "step": 117690 }, { "epoch": 774.3421052631579, "grad_norm": 0.824128270149231, "learning_rate": 0.0001, "loss": 0.0108, "step": 117700 }, { "epoch": 774.4078947368421, "grad_norm": 1.071433663368225, "learning_rate": 0.0001, "loss": 0.0086, "step": 117710 }, { "epoch": 774.4736842105264, "grad_norm": 0.8961533308029175, "learning_rate": 0.0001, "loss": 0.0098, "step": 117720 }, { "epoch": 774.5394736842105, "grad_norm": 0.9159356355667114, "learning_rate": 0.0001, "loss": 0.0109, "step": 117730 }, { "epoch": 774.6052631578947, "grad_norm": 0.8452175259590149, "learning_rate": 0.0001, "loss": 0.0123, "step": 117740 }, { "epoch": 774.671052631579, "grad_norm": 0.9821557998657227, "learning_rate": 0.0001, "loss": 0.0094, "step": 117750 }, { "epoch": 774.7368421052631, "grad_norm": 1.0779387950897217, "learning_rate": 0.0001, "loss": 0.0099, "step": 117760 }, { "epoch": 774.8026315789474, "grad_norm": 1.1578203439712524, "learning_rate": 0.0001, "loss": 0.0095, "step": 117770 }, { "epoch": 774.8684210526316, "grad_norm": 1.1006511449813843, "learning_rate": 0.0001, "loss": 0.0106, "step": 117780 }, { "epoch": 774.9342105263158, "grad_norm": 1.2114787101745605, "learning_rate": 0.0001, "loss": 0.0114, "step": 117790 }, { "epoch": 775.0, "grad_norm": 0.8628996014595032, "learning_rate": 0.0001, "loss": 0.0086, "step": 117800 }, { "epoch": 775.0657894736842, "grad_norm": 1.1864206790924072, "learning_rate": 0.0001, "loss": 0.0119, "step": 117810 }, { "epoch": 775.1315789473684, "grad_norm": 1.050344467163086, "learning_rate": 0.0001, "loss": 0.0099, "step": 117820 }, { "epoch": 775.1973684210526, "grad_norm": 1.0661941766738892, "learning_rate": 0.0001, "loss": 0.0108, "step": 117830 }, { "epoch": 775.2631578947369, "grad_norm": 0.982932984828949, "learning_rate": 0.0001, "loss": 0.0088, "step": 117840 }, { "epoch": 775.328947368421, "grad_norm": 1.0128767490386963, "learning_rate": 0.0001, "loss": 0.0114, "step": 117850 }, { "epoch": 775.3947368421053, "grad_norm": 1.2008044719696045, "learning_rate": 0.0001, "loss": 0.0108, "step": 117860 }, { "epoch": 775.4605263157895, "grad_norm": 1.156033992767334, "learning_rate": 0.0001, "loss": 0.0092, "step": 117870 }, { "epoch": 775.5263157894736, "grad_norm": 1.492291808128357, "learning_rate": 0.0001, "loss": 0.0106, "step": 117880 }, { "epoch": 775.5921052631579, "grad_norm": 0.656265914440155, "learning_rate": 0.0001, "loss": 0.0094, "step": 117890 }, { "epoch": 775.6578947368421, "grad_norm": 1.0817625522613525, "learning_rate": 0.0001, "loss": 0.0114, "step": 117900 }, { "epoch": 775.7236842105264, "grad_norm": 0.741335391998291, "learning_rate": 0.0001, "loss": 0.0096, "step": 117910 }, { "epoch": 775.7894736842105, "grad_norm": 0.9007006883621216, "learning_rate": 0.0001, "loss": 0.0098, "step": 117920 }, { "epoch": 775.8552631578947, "grad_norm": 0.8411262035369873, "learning_rate": 0.0001, "loss": 0.009, "step": 117930 }, { "epoch": 775.921052631579, "grad_norm": 0.8096522688865662, "learning_rate": 0.0001, "loss": 0.0103, "step": 117940 }, { "epoch": 775.9868421052631, "grad_norm": 0.6779792308807373, "learning_rate": 0.0001, "loss": 0.0112, "step": 117950 }, { "epoch": 776.0526315789474, "grad_norm": 0.9828864336013794, "learning_rate": 0.0001, "loss": 0.0101, "step": 117960 }, { "epoch": 776.1184210526316, "grad_norm": 1.536733627319336, "learning_rate": 0.0001, "loss": 0.0109, "step": 117970 }, { "epoch": 776.1842105263158, "grad_norm": 1.2087494134902954, "learning_rate": 0.0001, "loss": 0.0083, "step": 117980 }, { "epoch": 776.25, "grad_norm": 0.7931889295578003, "learning_rate": 0.0001, "loss": 0.0128, "step": 117990 }, { "epoch": 776.3157894736842, "grad_norm": 0.8839361667633057, "learning_rate": 0.0001, "loss": 0.0103, "step": 118000 }, { "epoch": 776.3815789473684, "grad_norm": 1.1379063129425049, "learning_rate": 0.0001, "loss": 0.0101, "step": 118010 }, { "epoch": 776.4473684210526, "grad_norm": 1.1438642740249634, "learning_rate": 0.0001, "loss": 0.0094, "step": 118020 }, { "epoch": 776.5131578947369, "grad_norm": 0.9373273253440857, "learning_rate": 0.0001, "loss": 0.011, "step": 118030 }, { "epoch": 776.578947368421, "grad_norm": 1.2530593872070312, "learning_rate": 0.0001, "loss": 0.0091, "step": 118040 }, { "epoch": 776.6447368421053, "grad_norm": 1.2131603956222534, "learning_rate": 0.0001, "loss": 0.0119, "step": 118050 }, { "epoch": 776.7105263157895, "grad_norm": 0.7886523008346558, "learning_rate": 0.0001, "loss": 0.0143, "step": 118060 }, { "epoch": 776.7763157894736, "grad_norm": 0.6613753437995911, "learning_rate": 0.0001, "loss": 0.0102, "step": 118070 }, { "epoch": 776.8421052631579, "grad_norm": 1.1990413665771484, "learning_rate": 0.0001, "loss": 0.0101, "step": 118080 }, { "epoch": 776.9078947368421, "grad_norm": 1.0473389625549316, "learning_rate": 0.0001, "loss": 0.0102, "step": 118090 }, { "epoch": 776.9736842105264, "grad_norm": 0.9223041534423828, "learning_rate": 0.0001, "loss": 0.0121, "step": 118100 }, { "epoch": 777.0394736842105, "grad_norm": 0.7598844170570374, "learning_rate": 0.0001, "loss": 0.0119, "step": 118110 }, { "epoch": 777.1052631578947, "grad_norm": 1.1341654062271118, "learning_rate": 0.0001, "loss": 0.012, "step": 118120 }, { "epoch": 777.171052631579, "grad_norm": 0.5702991485595703, "learning_rate": 0.0001, "loss": 0.0129, "step": 118130 }, { "epoch": 777.2368421052631, "grad_norm": 1.0409194231033325, "learning_rate": 0.0001, "loss": 0.0138, "step": 118140 }, { "epoch": 777.3026315789474, "grad_norm": 1.469417691230774, "learning_rate": 0.0001, "loss": 0.0135, "step": 118150 }, { "epoch": 777.3684210526316, "grad_norm": 1.1192512512207031, "learning_rate": 0.0001, "loss": 0.0163, "step": 118160 }, { "epoch": 777.4342105263158, "grad_norm": 1.1567856073379517, "learning_rate": 0.0001, "loss": 0.0134, "step": 118170 }, { "epoch": 777.5, "grad_norm": 0.8657984733581543, "learning_rate": 0.0001, "loss": 0.0163, "step": 118180 }, { "epoch": 777.5657894736842, "grad_norm": 0.8693804740905762, "learning_rate": 0.0001, "loss": 0.0138, "step": 118190 }, { "epoch": 777.6315789473684, "grad_norm": 1.0661799907684326, "learning_rate": 0.0001, "loss": 0.0136, "step": 118200 }, { "epoch": 777.6973684210526, "grad_norm": 0.9497994184494019, "learning_rate": 0.0001, "loss": 0.0115, "step": 118210 }, { "epoch": 777.7631578947369, "grad_norm": 1.2563682794570923, "learning_rate": 0.0001, "loss": 0.0138, "step": 118220 }, { "epoch": 777.828947368421, "grad_norm": 0.7826574444770813, "learning_rate": 0.0001, "loss": 0.0136, "step": 118230 }, { "epoch": 777.8947368421053, "grad_norm": 1.0407278537750244, "learning_rate": 0.0001, "loss": 0.0144, "step": 118240 }, { "epoch": 777.9605263157895, "grad_norm": 1.288908839225769, "learning_rate": 0.0001, "loss": 0.0149, "step": 118250 }, { "epoch": 778.0263157894736, "grad_norm": 1.0953668355941772, "learning_rate": 0.0001, "loss": 0.0129, "step": 118260 }, { "epoch": 778.0921052631579, "grad_norm": 1.315590500831604, "learning_rate": 0.0001, "loss": 0.0124, "step": 118270 }, { "epoch": 778.1578947368421, "grad_norm": 1.1540820598602295, "learning_rate": 0.0001, "loss": 0.0141, "step": 118280 }, { "epoch": 778.2236842105264, "grad_norm": 1.0182602405548096, "learning_rate": 0.0001, "loss": 0.0112, "step": 118290 }, { "epoch": 778.2894736842105, "grad_norm": 1.1212661266326904, "learning_rate": 0.0001, "loss": 0.0114, "step": 118300 }, { "epoch": 778.3552631578947, "grad_norm": 0.8415754437446594, "learning_rate": 0.0001, "loss": 0.0107, "step": 118310 }, { "epoch": 778.421052631579, "grad_norm": 0.8753746747970581, "learning_rate": 0.0001, "loss": 0.0119, "step": 118320 }, { "epoch": 778.4868421052631, "grad_norm": 1.123030662536621, "learning_rate": 0.0001, "loss": 0.012, "step": 118330 }, { "epoch": 778.5526315789474, "grad_norm": 1.058523416519165, "learning_rate": 0.0001, "loss": 0.0114, "step": 118340 }, { "epoch": 778.6184210526316, "grad_norm": 1.2867323160171509, "learning_rate": 0.0001, "loss": 0.0111, "step": 118350 }, { "epoch": 778.6842105263158, "grad_norm": 1.0465610027313232, "learning_rate": 0.0001, "loss": 0.0103, "step": 118360 }, { "epoch": 778.75, "grad_norm": 1.1913409233093262, "learning_rate": 0.0001, "loss": 0.0114, "step": 118370 }, { "epoch": 778.8157894736842, "grad_norm": 1.0035961866378784, "learning_rate": 0.0001, "loss": 0.0127, "step": 118380 }, { "epoch": 778.8815789473684, "grad_norm": 1.2081618309020996, "learning_rate": 0.0001, "loss": 0.012, "step": 118390 }, { "epoch": 778.9473684210526, "grad_norm": 0.9412795901298523, "learning_rate": 0.0001, "loss": 0.0115, "step": 118400 }, { "epoch": 779.0131578947369, "grad_norm": 1.1386052370071411, "learning_rate": 0.0001, "loss": 0.012, "step": 118410 }, { "epoch": 779.078947368421, "grad_norm": 1.215187907218933, "learning_rate": 0.0001, "loss": 0.0143, "step": 118420 }, { "epoch": 779.1447368421053, "grad_norm": 1.0938812494277954, "learning_rate": 0.0001, "loss": 0.0109, "step": 118430 }, { "epoch": 779.2105263157895, "grad_norm": 1.063369631767273, "learning_rate": 0.0001, "loss": 0.0132, "step": 118440 }, { "epoch": 779.2763157894736, "grad_norm": 1.0048210620880127, "learning_rate": 0.0001, "loss": 0.0094, "step": 118450 }, { "epoch": 779.3421052631579, "grad_norm": 1.2882784605026245, "learning_rate": 0.0001, "loss": 0.0107, "step": 118460 }, { "epoch": 779.4078947368421, "grad_norm": 0.9867761731147766, "learning_rate": 0.0001, "loss": 0.0108, "step": 118470 }, { "epoch": 779.4736842105264, "grad_norm": 1.0761276483535767, "learning_rate": 0.0001, "loss": 0.011, "step": 118480 }, { "epoch": 779.5394736842105, "grad_norm": 0.9819127321243286, "learning_rate": 0.0001, "loss": 0.0115, "step": 118490 }, { "epoch": 779.6052631578947, "grad_norm": 1.317988395690918, "learning_rate": 0.0001, "loss": 0.0095, "step": 118500 }, { "epoch": 779.671052631579, "grad_norm": 0.8267877101898193, "learning_rate": 0.0001, "loss": 0.0123, "step": 118510 }, { "epoch": 779.7368421052631, "grad_norm": 1.0201878547668457, "learning_rate": 0.0001, "loss": 0.0115, "step": 118520 }, { "epoch": 779.8026315789474, "grad_norm": 0.907877504825592, "learning_rate": 0.0001, "loss": 0.0089, "step": 118530 }, { "epoch": 779.8684210526316, "grad_norm": 0.8943087458610535, "learning_rate": 0.0001, "loss": 0.0092, "step": 118540 }, { "epoch": 779.9342105263158, "grad_norm": 1.0807287693023682, "learning_rate": 0.0001, "loss": 0.0108, "step": 118550 }, { "epoch": 780.0, "grad_norm": 1.523391842842102, "learning_rate": 0.0001, "loss": 0.0127, "step": 118560 }, { "epoch": 780.0657894736842, "grad_norm": 0.7130796313285828, "learning_rate": 0.0001, "loss": 0.0104, "step": 118570 }, { "epoch": 780.1315789473684, "grad_norm": 1.3061423301696777, "learning_rate": 0.0001, "loss": 0.0121, "step": 118580 }, { "epoch": 780.1973684210526, "grad_norm": 1.1210944652557373, "learning_rate": 0.0001, "loss": 0.0107, "step": 118590 }, { "epoch": 780.2631578947369, "grad_norm": 1.2665170431137085, "learning_rate": 0.0001, "loss": 0.0107, "step": 118600 }, { "epoch": 780.328947368421, "grad_norm": 1.1752995252609253, "learning_rate": 0.0001, "loss": 0.013, "step": 118610 }, { "epoch": 780.3947368421053, "grad_norm": 1.102726936340332, "learning_rate": 0.0001, "loss": 0.0096, "step": 118620 }, { "epoch": 780.4605263157895, "grad_norm": 1.2804216146469116, "learning_rate": 0.0001, "loss": 0.0104, "step": 118630 }, { "epoch": 780.5263157894736, "grad_norm": 1.0606364011764526, "learning_rate": 0.0001, "loss": 0.0108, "step": 118640 }, { "epoch": 780.5921052631579, "grad_norm": 0.9021885991096497, "learning_rate": 0.0001, "loss": 0.0105, "step": 118650 }, { "epoch": 780.6578947368421, "grad_norm": 1.3025189638137817, "learning_rate": 0.0001, "loss": 0.0118, "step": 118660 }, { "epoch": 780.7236842105264, "grad_norm": 1.1469581127166748, "learning_rate": 0.0001, "loss": 0.0091, "step": 118670 }, { "epoch": 780.7894736842105, "grad_norm": 1.1277015209197998, "learning_rate": 0.0001, "loss": 0.0101, "step": 118680 }, { "epoch": 780.8552631578947, "grad_norm": 1.2387878894805908, "learning_rate": 0.0001, "loss": 0.0102, "step": 118690 }, { "epoch": 780.921052631579, "grad_norm": 0.9669885635375977, "learning_rate": 0.0001, "loss": 0.009, "step": 118700 }, { "epoch": 780.9868421052631, "grad_norm": 1.0439066886901855, "learning_rate": 0.0001, "loss": 0.0122, "step": 118710 }, { "epoch": 781.0526315789474, "grad_norm": 0.8005104660987854, "learning_rate": 0.0001, "loss": 0.0099, "step": 118720 }, { "epoch": 781.1184210526316, "grad_norm": 1.0265685319900513, "learning_rate": 0.0001, "loss": 0.0128, "step": 118730 }, { "epoch": 781.1842105263158, "grad_norm": 1.4636842012405396, "learning_rate": 0.0001, "loss": 0.0102, "step": 118740 }, { "epoch": 781.25, "grad_norm": 1.312772512435913, "learning_rate": 0.0001, "loss": 0.0108, "step": 118750 }, { "epoch": 781.3157894736842, "grad_norm": 1.1143829822540283, "learning_rate": 0.0001, "loss": 0.0122, "step": 118760 }, { "epoch": 781.3815789473684, "grad_norm": 1.134764313697815, "learning_rate": 0.0001, "loss": 0.0092, "step": 118770 }, { "epoch": 781.4473684210526, "grad_norm": 1.432435393333435, "learning_rate": 0.0001, "loss": 0.0102, "step": 118780 }, { "epoch": 781.5131578947369, "grad_norm": 1.2253336906433105, "learning_rate": 0.0001, "loss": 0.0109, "step": 118790 }, { "epoch": 781.578947368421, "grad_norm": 1.319845199584961, "learning_rate": 0.0001, "loss": 0.0086, "step": 118800 }, { "epoch": 781.6447368421053, "grad_norm": 1.283031940460205, "learning_rate": 0.0001, "loss": 0.0095, "step": 118810 }, { "epoch": 781.7105263157895, "grad_norm": 1.425012469291687, "learning_rate": 0.0001, "loss": 0.0101, "step": 118820 }, { "epoch": 781.7763157894736, "grad_norm": 0.8615345358848572, "learning_rate": 0.0001, "loss": 0.0093, "step": 118830 }, { "epoch": 781.8421052631579, "grad_norm": 1.3266713619232178, "learning_rate": 0.0001, "loss": 0.0107, "step": 118840 }, { "epoch": 781.9078947368421, "grad_norm": 1.3085308074951172, "learning_rate": 0.0001, "loss": 0.009, "step": 118850 }, { "epoch": 781.9736842105264, "grad_norm": 1.1384379863739014, "learning_rate": 0.0001, "loss": 0.0106, "step": 118860 }, { "epoch": 782.0394736842105, "grad_norm": 1.471619963645935, "learning_rate": 0.0001, "loss": 0.0093, "step": 118870 }, { "epoch": 782.1052631578947, "grad_norm": 1.2752647399902344, "learning_rate": 0.0001, "loss": 0.011, "step": 118880 }, { "epoch": 782.171052631579, "grad_norm": 1.337410569190979, "learning_rate": 0.0001, "loss": 0.0097, "step": 118890 }, { "epoch": 782.2368421052631, "grad_norm": 1.0045582056045532, "learning_rate": 0.0001, "loss": 0.0118, "step": 118900 }, { "epoch": 782.3026315789474, "grad_norm": 1.1922293901443481, "learning_rate": 0.0001, "loss": 0.0098, "step": 118910 }, { "epoch": 782.3684210526316, "grad_norm": 1.145751714706421, "learning_rate": 0.0001, "loss": 0.0089, "step": 118920 }, { "epoch": 782.4342105263158, "grad_norm": 1.2008522748947144, "learning_rate": 0.0001, "loss": 0.0092, "step": 118930 }, { "epoch": 782.5, "grad_norm": 1.2413567304611206, "learning_rate": 0.0001, "loss": 0.0097, "step": 118940 }, { "epoch": 782.5657894736842, "grad_norm": 0.9739688634872437, "learning_rate": 0.0001, "loss": 0.0089, "step": 118950 }, { "epoch": 782.6315789473684, "grad_norm": 0.9391964077949524, "learning_rate": 0.0001, "loss": 0.0108, "step": 118960 }, { "epoch": 782.6973684210526, "grad_norm": 1.4125076532363892, "learning_rate": 0.0001, "loss": 0.0095, "step": 118970 }, { "epoch": 782.7631578947369, "grad_norm": 1.2959308624267578, "learning_rate": 0.0001, "loss": 0.0123, "step": 118980 }, { "epoch": 782.828947368421, "grad_norm": 1.301820993423462, "learning_rate": 0.0001, "loss": 0.0103, "step": 118990 }, { "epoch": 782.8947368421053, "grad_norm": 1.200832724571228, "learning_rate": 0.0001, "loss": 0.008, "step": 119000 }, { "epoch": 782.9605263157895, "grad_norm": 0.8034687042236328, "learning_rate": 0.0001, "loss": 0.009, "step": 119010 }, { "epoch": 783.0263157894736, "grad_norm": 1.1737399101257324, "learning_rate": 0.0001, "loss": 0.0109, "step": 119020 }, { "epoch": 783.0921052631579, "grad_norm": 1.0292094945907593, "learning_rate": 0.0001, "loss": 0.0096, "step": 119030 }, { "epoch": 783.1578947368421, "grad_norm": 0.9858736395835876, "learning_rate": 0.0001, "loss": 0.0101, "step": 119040 }, { "epoch": 783.2236842105264, "grad_norm": 1.2504321336746216, "learning_rate": 0.0001, "loss": 0.0122, "step": 119050 }, { "epoch": 783.2894736842105, "grad_norm": 1.0716443061828613, "learning_rate": 0.0001, "loss": 0.0111, "step": 119060 }, { "epoch": 783.3552631578947, "grad_norm": 0.9858033657073975, "learning_rate": 0.0001, "loss": 0.0097, "step": 119070 }, { "epoch": 783.421052631579, "grad_norm": 1.0344947576522827, "learning_rate": 0.0001, "loss": 0.0093, "step": 119080 }, { "epoch": 783.4868421052631, "grad_norm": 1.1620103120803833, "learning_rate": 0.0001, "loss": 0.0087, "step": 119090 }, { "epoch": 783.5526315789474, "grad_norm": 1.2477577924728394, "learning_rate": 0.0001, "loss": 0.0118, "step": 119100 }, { "epoch": 783.6184210526316, "grad_norm": 0.96187824010849, "learning_rate": 0.0001, "loss": 0.0113, "step": 119110 }, { "epoch": 783.6842105263158, "grad_norm": 0.9657493829727173, "learning_rate": 0.0001, "loss": 0.01, "step": 119120 }, { "epoch": 783.75, "grad_norm": 1.1220613718032837, "learning_rate": 0.0001, "loss": 0.0105, "step": 119130 }, { "epoch": 783.8157894736842, "grad_norm": 0.9884085059165955, "learning_rate": 0.0001, "loss": 0.0092, "step": 119140 }, { "epoch": 783.8815789473684, "grad_norm": 1.1578776836395264, "learning_rate": 0.0001, "loss": 0.0099, "step": 119150 }, { "epoch": 783.9473684210526, "grad_norm": 1.015238642692566, "learning_rate": 0.0001, "loss": 0.0091, "step": 119160 }, { "epoch": 784.0131578947369, "grad_norm": 1.1379379034042358, "learning_rate": 0.0001, "loss": 0.0097, "step": 119170 }, { "epoch": 784.078947368421, "grad_norm": 1.041599154472351, "learning_rate": 0.0001, "loss": 0.01, "step": 119180 }, { "epoch": 784.1447368421053, "grad_norm": 1.3538576364517212, "learning_rate": 0.0001, "loss": 0.0099, "step": 119190 }, { "epoch": 784.2105263157895, "grad_norm": 1.2984648942947388, "learning_rate": 0.0001, "loss": 0.0098, "step": 119200 }, { "epoch": 784.2763157894736, "grad_norm": 1.5270977020263672, "learning_rate": 0.0001, "loss": 0.0083, "step": 119210 }, { "epoch": 784.3421052631579, "grad_norm": 0.9184948801994324, "learning_rate": 0.0001, "loss": 0.0086, "step": 119220 }, { "epoch": 784.4078947368421, "grad_norm": 1.3463494777679443, "learning_rate": 0.0001, "loss": 0.0076, "step": 119230 }, { "epoch": 784.4736842105264, "grad_norm": 1.1868815422058105, "learning_rate": 0.0001, "loss": 0.0119, "step": 119240 }, { "epoch": 784.5394736842105, "grad_norm": 1.198378324508667, "learning_rate": 0.0001, "loss": 0.0092, "step": 119250 }, { "epoch": 784.6052631578947, "grad_norm": 1.215132236480713, "learning_rate": 0.0001, "loss": 0.0097, "step": 119260 }, { "epoch": 784.671052631579, "grad_norm": 1.1473467350006104, "learning_rate": 0.0001, "loss": 0.0105, "step": 119270 }, { "epoch": 784.7368421052631, "grad_norm": 1.5379295349121094, "learning_rate": 0.0001, "loss": 0.0114, "step": 119280 }, { "epoch": 784.8026315789474, "grad_norm": 1.4966095685958862, "learning_rate": 0.0001, "loss": 0.0098, "step": 119290 }, { "epoch": 784.8684210526316, "grad_norm": 1.0885952711105347, "learning_rate": 0.0001, "loss": 0.0116, "step": 119300 }, { "epoch": 784.9342105263158, "grad_norm": 0.980161726474762, "learning_rate": 0.0001, "loss": 0.0079, "step": 119310 }, { "epoch": 785.0, "grad_norm": 0.916150689125061, "learning_rate": 0.0001, "loss": 0.0105, "step": 119320 }, { "epoch": 785.0657894736842, "grad_norm": 1.2834523916244507, "learning_rate": 0.0001, "loss": 0.0079, "step": 119330 }, { "epoch": 785.1315789473684, "grad_norm": 1.2613166570663452, "learning_rate": 0.0001, "loss": 0.0139, "step": 119340 }, { "epoch": 785.1973684210526, "grad_norm": 1.0938130617141724, "learning_rate": 0.0001, "loss": 0.0101, "step": 119350 }, { "epoch": 785.2631578947369, "grad_norm": 1.0228303670883179, "learning_rate": 0.0001, "loss": 0.0097, "step": 119360 }, { "epoch": 785.328947368421, "grad_norm": 0.9948782920837402, "learning_rate": 0.0001, "loss": 0.0077, "step": 119370 }, { "epoch": 785.3947368421053, "grad_norm": 1.0518125295639038, "learning_rate": 0.0001, "loss": 0.0103, "step": 119380 }, { "epoch": 785.4605263157895, "grad_norm": 0.9449158906936646, "learning_rate": 0.0001, "loss": 0.0083, "step": 119390 }, { "epoch": 785.5263157894736, "grad_norm": 0.914734959602356, "learning_rate": 0.0001, "loss": 0.0112, "step": 119400 }, { "epoch": 785.5921052631579, "grad_norm": 1.529738426208496, "learning_rate": 0.0001, "loss": 0.0103, "step": 119410 }, { "epoch": 785.6578947368421, "grad_norm": 1.1134679317474365, "learning_rate": 0.0001, "loss": 0.01, "step": 119420 }, { "epoch": 785.7236842105264, "grad_norm": 0.9557541012763977, "learning_rate": 0.0001, "loss": 0.01, "step": 119430 }, { "epoch": 785.7894736842105, "grad_norm": 1.2106385231018066, "learning_rate": 0.0001, "loss": 0.01, "step": 119440 }, { "epoch": 785.8552631578947, "grad_norm": 0.9082741141319275, "learning_rate": 0.0001, "loss": 0.0107, "step": 119450 }, { "epoch": 785.921052631579, "grad_norm": 1.3996853828430176, "learning_rate": 0.0001, "loss": 0.0092, "step": 119460 }, { "epoch": 785.9868421052631, "grad_norm": 1.2628298997879028, "learning_rate": 0.0001, "loss": 0.0086, "step": 119470 }, { "epoch": 786.0526315789474, "grad_norm": 1.445421814918518, "learning_rate": 0.0001, "loss": 0.0105, "step": 119480 }, { "epoch": 786.1184210526316, "grad_norm": 0.8487147092819214, "learning_rate": 0.0001, "loss": 0.0091, "step": 119490 }, { "epoch": 786.1842105263158, "grad_norm": 0.9393507242202759, "learning_rate": 0.0001, "loss": 0.0097, "step": 119500 }, { "epoch": 786.25, "grad_norm": 1.010326862335205, "learning_rate": 0.0001, "loss": 0.0088, "step": 119510 }, { "epoch": 786.3157894736842, "grad_norm": 1.1670511960983276, "learning_rate": 0.0001, "loss": 0.0092, "step": 119520 }, { "epoch": 786.3815789473684, "grad_norm": 0.9680734872817993, "learning_rate": 0.0001, "loss": 0.01, "step": 119530 }, { "epoch": 786.4473684210526, "grad_norm": 1.2515125274658203, "learning_rate": 0.0001, "loss": 0.011, "step": 119540 }, { "epoch": 786.5131578947369, "grad_norm": 1.0361891984939575, "learning_rate": 0.0001, "loss": 0.0085, "step": 119550 }, { "epoch": 786.578947368421, "grad_norm": 0.6271700859069824, "learning_rate": 0.0001, "loss": 0.009, "step": 119560 }, { "epoch": 786.6447368421053, "grad_norm": 0.9543516635894775, "learning_rate": 0.0001, "loss": 0.0102, "step": 119570 }, { "epoch": 786.7105263157895, "grad_norm": 1.0879392623901367, "learning_rate": 0.0001, "loss": 0.0116, "step": 119580 }, { "epoch": 786.7763157894736, "grad_norm": 0.9335300922393799, "learning_rate": 0.0001, "loss": 0.0102, "step": 119590 }, { "epoch": 786.8421052631579, "grad_norm": 1.244016408920288, "learning_rate": 0.0001, "loss": 0.0084, "step": 119600 }, { "epoch": 786.9078947368421, "grad_norm": 1.3877922296524048, "learning_rate": 0.0001, "loss": 0.0119, "step": 119610 }, { "epoch": 786.9736842105264, "grad_norm": 1.0246111154556274, "learning_rate": 0.0001, "loss": 0.0107, "step": 119620 }, { "epoch": 787.0394736842105, "grad_norm": 1.1666784286499023, "learning_rate": 0.0001, "loss": 0.0087, "step": 119630 }, { "epoch": 787.1052631578947, "grad_norm": 1.422247052192688, "learning_rate": 0.0001, "loss": 0.0101, "step": 119640 }, { "epoch": 787.171052631579, "grad_norm": 1.2223421335220337, "learning_rate": 0.0001, "loss": 0.0125, "step": 119650 }, { "epoch": 787.2368421052631, "grad_norm": 1.0068063735961914, "learning_rate": 0.0001, "loss": 0.0103, "step": 119660 }, { "epoch": 787.3026315789474, "grad_norm": 1.4940046072006226, "learning_rate": 0.0001, "loss": 0.0102, "step": 119670 }, { "epoch": 787.3684210526316, "grad_norm": 0.9652587175369263, "learning_rate": 0.0001, "loss": 0.0085, "step": 119680 }, { "epoch": 787.4342105263158, "grad_norm": 0.7590309977531433, "learning_rate": 0.0001, "loss": 0.0095, "step": 119690 }, { "epoch": 787.5, "grad_norm": 1.0170762538909912, "learning_rate": 0.0001, "loss": 0.0118, "step": 119700 }, { "epoch": 787.5657894736842, "grad_norm": 1.0947462320327759, "learning_rate": 0.0001, "loss": 0.0104, "step": 119710 }, { "epoch": 787.6315789473684, "grad_norm": 0.9564235806465149, "learning_rate": 0.0001, "loss": 0.0098, "step": 119720 }, { "epoch": 787.6973684210526, "grad_norm": 0.8482645153999329, "learning_rate": 0.0001, "loss": 0.0099, "step": 119730 }, { "epoch": 787.7631578947369, "grad_norm": 1.2533555030822754, "learning_rate": 0.0001, "loss": 0.0096, "step": 119740 }, { "epoch": 787.828947368421, "grad_norm": 1.3869478702545166, "learning_rate": 0.0001, "loss": 0.0101, "step": 119750 }, { "epoch": 787.8947368421053, "grad_norm": 1.1893796920776367, "learning_rate": 0.0001, "loss": 0.0092, "step": 119760 }, { "epoch": 787.9605263157895, "grad_norm": 1.3825172185897827, "learning_rate": 0.0001, "loss": 0.0119, "step": 119770 }, { "epoch": 788.0263157894736, "grad_norm": 1.1921755075454712, "learning_rate": 0.0001, "loss": 0.0102, "step": 119780 }, { "epoch": 788.0921052631579, "grad_norm": 1.175467610359192, "learning_rate": 0.0001, "loss": 0.0084, "step": 119790 }, { "epoch": 788.1578947368421, "grad_norm": 1.1586354970932007, "learning_rate": 0.0001, "loss": 0.0105, "step": 119800 }, { "epoch": 788.2236842105264, "grad_norm": 0.9912821054458618, "learning_rate": 0.0001, "loss": 0.0102, "step": 119810 }, { "epoch": 788.2894736842105, "grad_norm": 0.786872923374176, "learning_rate": 0.0001, "loss": 0.0084, "step": 119820 }, { "epoch": 788.3552631578947, "grad_norm": 0.9939967393875122, "learning_rate": 0.0001, "loss": 0.0111, "step": 119830 }, { "epoch": 788.421052631579, "grad_norm": 0.7620839476585388, "learning_rate": 0.0001, "loss": 0.0088, "step": 119840 }, { "epoch": 788.4868421052631, "grad_norm": 1.0876280069351196, "learning_rate": 0.0001, "loss": 0.0111, "step": 119850 }, { "epoch": 788.5526315789474, "grad_norm": 0.9076918959617615, "learning_rate": 0.0001, "loss": 0.0112, "step": 119860 }, { "epoch": 788.6184210526316, "grad_norm": 1.2486871480941772, "learning_rate": 0.0001, "loss": 0.0097, "step": 119870 }, { "epoch": 788.6842105263158, "grad_norm": 0.9349822402000427, "learning_rate": 0.0001, "loss": 0.0093, "step": 119880 }, { "epoch": 788.75, "grad_norm": 1.160534381866455, "learning_rate": 0.0001, "loss": 0.0109, "step": 119890 }, { "epoch": 788.8157894736842, "grad_norm": 1.1158519983291626, "learning_rate": 0.0001, "loss": 0.0092, "step": 119900 }, { "epoch": 788.8815789473684, "grad_norm": 1.0565320253372192, "learning_rate": 0.0001, "loss": 0.0119, "step": 119910 }, { "epoch": 788.9473684210526, "grad_norm": 1.1070889234542847, "learning_rate": 0.0001, "loss": 0.009, "step": 119920 }, { "epoch": 789.0131578947369, "grad_norm": 1.0946141481399536, "learning_rate": 0.0001, "loss": 0.0099, "step": 119930 }, { "epoch": 789.078947368421, "grad_norm": 1.1808589696884155, "learning_rate": 0.0001, "loss": 0.0087, "step": 119940 }, { "epoch": 789.1447368421053, "grad_norm": 1.0160439014434814, "learning_rate": 0.0001, "loss": 0.0082, "step": 119950 }, { "epoch": 789.2105263157895, "grad_norm": 1.3254737854003906, "learning_rate": 0.0001, "loss": 0.0089, "step": 119960 }, { "epoch": 789.2763157894736, "grad_norm": 1.3260244131088257, "learning_rate": 0.0001, "loss": 0.011, "step": 119970 }, { "epoch": 789.3421052631579, "grad_norm": 1.212821125984192, "learning_rate": 0.0001, "loss": 0.0094, "step": 119980 }, { "epoch": 789.4078947368421, "grad_norm": 1.2427418231964111, "learning_rate": 0.0001, "loss": 0.0081, "step": 119990 }, { "epoch": 789.4736842105264, "grad_norm": 0.9601447582244873, "learning_rate": 0.0001, "loss": 0.0091, "step": 120000 }, { "epoch": 789.5394736842105, "grad_norm": 1.4246236085891724, "learning_rate": 0.0001, "loss": 0.0125, "step": 120010 }, { "epoch": 789.6052631578947, "grad_norm": 1.162190556526184, "learning_rate": 0.0001, "loss": 0.0099, "step": 120020 }, { "epoch": 789.671052631579, "grad_norm": 1.3025704622268677, "learning_rate": 0.0001, "loss": 0.0101, "step": 120030 }, { "epoch": 789.7368421052631, "grad_norm": 1.1935677528381348, "learning_rate": 0.0001, "loss": 0.0098, "step": 120040 }, { "epoch": 789.8026315789474, "grad_norm": 2.3716793060302734, "learning_rate": 0.0001, "loss": 0.0121, "step": 120050 }, { "epoch": 789.8684210526316, "grad_norm": 1.407984733581543, "learning_rate": 0.0001, "loss": 0.011, "step": 120060 }, { "epoch": 789.9342105263158, "grad_norm": 1.2061392068862915, "learning_rate": 0.0001, "loss": 0.0097, "step": 120070 }, { "epoch": 790.0, "grad_norm": 1.1397764682769775, "learning_rate": 0.0001, "loss": 0.0101, "step": 120080 }, { "epoch": 790.0657894736842, "grad_norm": 1.4993038177490234, "learning_rate": 0.0001, "loss": 0.0089, "step": 120090 }, { "epoch": 790.1315789473684, "grad_norm": 1.1924669742584229, "learning_rate": 0.0001, "loss": 0.0078, "step": 120100 }, { "epoch": 790.1973684210526, "grad_norm": 1.2470619678497314, "learning_rate": 0.0001, "loss": 0.0121, "step": 120110 }, { "epoch": 790.2631578947369, "grad_norm": 1.23881196975708, "learning_rate": 0.0001, "loss": 0.0094, "step": 120120 }, { "epoch": 790.328947368421, "grad_norm": 1.0482486486434937, "learning_rate": 0.0001, "loss": 0.0101, "step": 120130 }, { "epoch": 790.3947368421053, "grad_norm": 1.1907408237457275, "learning_rate": 0.0001, "loss": 0.0113, "step": 120140 }, { "epoch": 790.4605263157895, "grad_norm": 1.216784119606018, "learning_rate": 0.0001, "loss": 0.0084, "step": 120150 }, { "epoch": 790.5263157894736, "grad_norm": 0.947479248046875, "learning_rate": 0.0001, "loss": 0.01, "step": 120160 }, { "epoch": 790.5921052631579, "grad_norm": 1.0760152339935303, "learning_rate": 0.0001, "loss": 0.01, "step": 120170 }, { "epoch": 790.6578947368421, "grad_norm": 0.7800330519676208, "learning_rate": 0.0001, "loss": 0.0105, "step": 120180 }, { "epoch": 790.7236842105264, "grad_norm": 1.2009683847427368, "learning_rate": 0.0001, "loss": 0.0089, "step": 120190 }, { "epoch": 790.7894736842105, "grad_norm": 0.9596481919288635, "learning_rate": 0.0001, "loss": 0.0108, "step": 120200 }, { "epoch": 790.8552631578947, "grad_norm": 1.0723578929901123, "learning_rate": 0.0001, "loss": 0.0087, "step": 120210 }, { "epoch": 790.921052631579, "grad_norm": 0.8646221160888672, "learning_rate": 0.0001, "loss": 0.0099, "step": 120220 }, { "epoch": 790.9868421052631, "grad_norm": 0.9819400310516357, "learning_rate": 0.0001, "loss": 0.0087, "step": 120230 }, { "epoch": 791.0526315789474, "grad_norm": 0.9730360507965088, "learning_rate": 0.0001, "loss": 0.0092, "step": 120240 }, { "epoch": 791.1184210526316, "grad_norm": 1.2054215669631958, "learning_rate": 0.0001, "loss": 0.0102, "step": 120250 }, { "epoch": 791.1842105263158, "grad_norm": 1.0366238355636597, "learning_rate": 0.0001, "loss": 0.0099, "step": 120260 }, { "epoch": 791.25, "grad_norm": 0.992953360080719, "learning_rate": 0.0001, "loss": 0.0095, "step": 120270 }, { "epoch": 791.3157894736842, "grad_norm": 0.7910591959953308, "learning_rate": 0.0001, "loss": 0.0092, "step": 120280 }, { "epoch": 791.3815789473684, "grad_norm": 0.9786763191223145, "learning_rate": 0.0001, "loss": 0.011, "step": 120290 }, { "epoch": 791.4473684210526, "grad_norm": 0.9105993509292603, "learning_rate": 0.0001, "loss": 0.0123, "step": 120300 }, { "epoch": 791.5131578947369, "grad_norm": 1.005461573600769, "learning_rate": 0.0001, "loss": 0.0082, "step": 120310 }, { "epoch": 791.578947368421, "grad_norm": 0.9919746518135071, "learning_rate": 0.0001, "loss": 0.0105, "step": 120320 }, { "epoch": 791.6447368421053, "grad_norm": 0.6113591194152832, "learning_rate": 0.0001, "loss": 0.0108, "step": 120330 }, { "epoch": 791.7105263157895, "grad_norm": 0.7895347476005554, "learning_rate": 0.0001, "loss": 0.0084, "step": 120340 }, { "epoch": 791.7763157894736, "grad_norm": 1.1017674207687378, "learning_rate": 0.0001, "loss": 0.0103, "step": 120350 }, { "epoch": 791.8421052631579, "grad_norm": 0.9249036908149719, "learning_rate": 0.0001, "loss": 0.009, "step": 120360 }, { "epoch": 791.9078947368421, "grad_norm": 1.0315765142440796, "learning_rate": 0.0001, "loss": 0.0112, "step": 120370 }, { "epoch": 791.9736842105264, "grad_norm": 0.9553092122077942, "learning_rate": 0.0001, "loss": 0.0113, "step": 120380 }, { "epoch": 792.0394736842105, "grad_norm": 0.9283358454704285, "learning_rate": 0.0001, "loss": 0.0128, "step": 120390 }, { "epoch": 792.1052631578947, "grad_norm": 1.0717108249664307, "learning_rate": 0.0001, "loss": 0.0085, "step": 120400 }, { "epoch": 792.171052631579, "grad_norm": 1.008732557296753, "learning_rate": 0.0001, "loss": 0.011, "step": 120410 }, { "epoch": 792.2368421052631, "grad_norm": 0.6750414967536926, "learning_rate": 0.0001, "loss": 0.0101, "step": 120420 }, { "epoch": 792.3026315789474, "grad_norm": 0.9144452810287476, "learning_rate": 0.0001, "loss": 0.0093, "step": 120430 }, { "epoch": 792.3684210526316, "grad_norm": 0.8606979250907898, "learning_rate": 0.0001, "loss": 0.01, "step": 120440 }, { "epoch": 792.4342105263158, "grad_norm": 0.9034025073051453, "learning_rate": 0.0001, "loss": 0.0117, "step": 120450 }, { "epoch": 792.5, "grad_norm": 0.8836445808410645, "learning_rate": 0.0001, "loss": 0.0114, "step": 120460 }, { "epoch": 792.5657894736842, "grad_norm": 1.0012311935424805, "learning_rate": 0.0001, "loss": 0.0092, "step": 120470 }, { "epoch": 792.6315789473684, "grad_norm": 1.7558934688568115, "learning_rate": 0.0001, "loss": 0.0123, "step": 120480 }, { "epoch": 792.6973684210526, "grad_norm": 1.1554054021835327, "learning_rate": 0.0001, "loss": 0.0092, "step": 120490 }, { "epoch": 792.7631578947369, "grad_norm": 0.9315431714057922, "learning_rate": 0.0001, "loss": 0.0107, "step": 120500 }, { "epoch": 792.828947368421, "grad_norm": 1.0039079189300537, "learning_rate": 0.0001, "loss": 0.01, "step": 120510 }, { "epoch": 792.8947368421053, "grad_norm": 0.9370085597038269, "learning_rate": 0.0001, "loss": 0.009, "step": 120520 }, { "epoch": 792.9605263157895, "grad_norm": 1.2092498540878296, "learning_rate": 0.0001, "loss": 0.0103, "step": 120530 }, { "epoch": 793.0263157894736, "grad_norm": 1.0734901428222656, "learning_rate": 0.0001, "loss": 0.0086, "step": 120540 }, { "epoch": 793.0921052631579, "grad_norm": 1.0039688348770142, "learning_rate": 0.0001, "loss": 0.0092, "step": 120550 }, { "epoch": 793.1578947368421, "grad_norm": 0.904447078704834, "learning_rate": 0.0001, "loss": 0.0097, "step": 120560 }, { "epoch": 793.2236842105264, "grad_norm": 0.8521278500556946, "learning_rate": 0.0001, "loss": 0.011, "step": 120570 }, { "epoch": 793.2894736842105, "grad_norm": 0.7753221988677979, "learning_rate": 0.0001, "loss": 0.0099, "step": 120580 }, { "epoch": 793.3552631578947, "grad_norm": 1.0764966011047363, "learning_rate": 0.0001, "loss": 0.0096, "step": 120590 }, { "epoch": 793.421052631579, "grad_norm": 1.086124062538147, "learning_rate": 0.0001, "loss": 0.0102, "step": 120600 }, { "epoch": 793.4868421052631, "grad_norm": 0.8036643862724304, "learning_rate": 0.0001, "loss": 0.0098, "step": 120610 }, { "epoch": 793.5526315789474, "grad_norm": 1.0477961301803589, "learning_rate": 0.0001, "loss": 0.01, "step": 120620 }, { "epoch": 793.6184210526316, "grad_norm": 0.8894155621528625, "learning_rate": 0.0001, "loss": 0.011, "step": 120630 }, { "epoch": 793.6842105263158, "grad_norm": 1.6042909622192383, "learning_rate": 0.0001, "loss": 0.0095, "step": 120640 }, { "epoch": 793.75, "grad_norm": 0.8697584867477417, "learning_rate": 0.0001, "loss": 0.0093, "step": 120650 }, { "epoch": 793.8157894736842, "grad_norm": 1.1683377027511597, "learning_rate": 0.0001, "loss": 0.0106, "step": 120660 }, { "epoch": 793.8815789473684, "grad_norm": 1.1286085844039917, "learning_rate": 0.0001, "loss": 0.0112, "step": 120670 }, { "epoch": 793.9473684210526, "grad_norm": 1.295102834701538, "learning_rate": 0.0001, "loss": 0.0107, "step": 120680 }, { "epoch": 794.0131578947369, "grad_norm": 1.4578384160995483, "learning_rate": 0.0001, "loss": 0.0094, "step": 120690 }, { "epoch": 794.078947368421, "grad_norm": 1.1735413074493408, "learning_rate": 0.0001, "loss": 0.0099, "step": 120700 }, { "epoch": 794.1447368421053, "grad_norm": 0.7384294867515564, "learning_rate": 0.0001, "loss": 0.0117, "step": 120710 }, { "epoch": 794.2105263157895, "grad_norm": 0.9748033285140991, "learning_rate": 0.0001, "loss": 0.0092, "step": 120720 }, { "epoch": 794.2763157894736, "grad_norm": 1.08341646194458, "learning_rate": 0.0001, "loss": 0.0108, "step": 120730 }, { "epoch": 794.3421052631579, "grad_norm": 1.2374165058135986, "learning_rate": 0.0001, "loss": 0.0093, "step": 120740 }, { "epoch": 794.4078947368421, "grad_norm": 0.8181259632110596, "learning_rate": 0.0001, "loss": 0.0119, "step": 120750 }, { "epoch": 794.4736842105264, "grad_norm": 1.0374884605407715, "learning_rate": 0.0001, "loss": 0.0096, "step": 120760 }, { "epoch": 794.5394736842105, "grad_norm": 1.3677926063537598, "learning_rate": 0.0001, "loss": 0.0098, "step": 120770 }, { "epoch": 794.6052631578947, "grad_norm": 0.8347147107124329, "learning_rate": 0.0001, "loss": 0.0105, "step": 120780 }, { "epoch": 794.671052631579, "grad_norm": 0.8443951606750488, "learning_rate": 0.0001, "loss": 0.0089, "step": 120790 }, { "epoch": 794.7368421052631, "grad_norm": 0.8409653902053833, "learning_rate": 0.0001, "loss": 0.009, "step": 120800 }, { "epoch": 794.8026315789474, "grad_norm": 0.8954428434371948, "learning_rate": 0.0001, "loss": 0.009, "step": 120810 }, { "epoch": 794.8684210526316, "grad_norm": 1.0372549295425415, "learning_rate": 0.0001, "loss": 0.009, "step": 120820 }, { "epoch": 794.9342105263158, "grad_norm": 0.8429449796676636, "learning_rate": 0.0001, "loss": 0.0098, "step": 120830 }, { "epoch": 795.0, "grad_norm": 0.6495620608329773, "learning_rate": 0.0001, "loss": 0.0099, "step": 120840 }, { "epoch": 795.0657894736842, "grad_norm": 1.2131069898605347, "learning_rate": 0.0001, "loss": 0.0117, "step": 120850 }, { "epoch": 795.1315789473684, "grad_norm": 1.1015368700027466, "learning_rate": 0.0001, "loss": 0.0101, "step": 120860 }, { "epoch": 795.1973684210526, "grad_norm": 0.9811515212059021, "learning_rate": 0.0001, "loss": 0.009, "step": 120870 }, { "epoch": 795.2631578947369, "grad_norm": 1.299547791481018, "learning_rate": 0.0001, "loss": 0.0115, "step": 120880 }, { "epoch": 795.328947368421, "grad_norm": 1.1710044145584106, "learning_rate": 0.0001, "loss": 0.01, "step": 120890 }, { "epoch": 795.3947368421053, "grad_norm": 1.1657590866088867, "learning_rate": 0.0001, "loss": 0.0119, "step": 120900 }, { "epoch": 795.4605263157895, "grad_norm": 0.8403371572494507, "learning_rate": 0.0001, "loss": 0.0116, "step": 120910 }, { "epoch": 795.5263157894736, "grad_norm": 0.9806166887283325, "learning_rate": 0.0001, "loss": 0.0106, "step": 120920 }, { "epoch": 795.5921052631579, "grad_norm": 1.4901376962661743, "learning_rate": 0.0001, "loss": 0.0087, "step": 120930 }, { "epoch": 795.6578947368421, "grad_norm": 0.8995088338851929, "learning_rate": 0.0001, "loss": 0.0102, "step": 120940 }, { "epoch": 795.7236842105264, "grad_norm": 1.037126064300537, "learning_rate": 0.0001, "loss": 0.011, "step": 120950 }, { "epoch": 795.7894736842105, "grad_norm": 1.0357317924499512, "learning_rate": 0.0001, "loss": 0.0098, "step": 120960 }, { "epoch": 795.8552631578947, "grad_norm": 0.8257767558097839, "learning_rate": 0.0001, "loss": 0.0094, "step": 120970 }, { "epoch": 795.921052631579, "grad_norm": 0.8299721479415894, "learning_rate": 0.0001, "loss": 0.0095, "step": 120980 }, { "epoch": 795.9868421052631, "grad_norm": 1.0660762786865234, "learning_rate": 0.0001, "loss": 0.0091, "step": 120990 }, { "epoch": 796.0526315789474, "grad_norm": 1.2139171361923218, "learning_rate": 0.0001, "loss": 0.0095, "step": 121000 }, { "epoch": 796.1184210526316, "grad_norm": 1.1926602125167847, "learning_rate": 0.0001, "loss": 0.008, "step": 121010 }, { "epoch": 796.1842105263158, "grad_norm": 1.40194571018219, "learning_rate": 0.0001, "loss": 0.0098, "step": 121020 }, { "epoch": 796.25, "grad_norm": 1.023759126663208, "learning_rate": 0.0001, "loss": 0.0109, "step": 121030 }, { "epoch": 796.3157894736842, "grad_norm": 1.3803359270095825, "learning_rate": 0.0001, "loss": 0.0106, "step": 121040 }, { "epoch": 796.3815789473684, "grad_norm": 1.2790648937225342, "learning_rate": 0.0001, "loss": 0.0107, "step": 121050 }, { "epoch": 796.4473684210526, "grad_norm": 0.9783958792686462, "learning_rate": 0.0001, "loss": 0.0088, "step": 121060 }, { "epoch": 796.5131578947369, "grad_norm": 1.3217369318008423, "learning_rate": 0.0001, "loss": 0.0096, "step": 121070 }, { "epoch": 796.578947368421, "grad_norm": 1.2040969133377075, "learning_rate": 0.0001, "loss": 0.0094, "step": 121080 }, { "epoch": 796.6447368421053, "grad_norm": 1.263850212097168, "learning_rate": 0.0001, "loss": 0.011, "step": 121090 }, { "epoch": 796.7105263157895, "grad_norm": 0.976997971534729, "learning_rate": 0.0001, "loss": 0.0114, "step": 121100 }, { "epoch": 796.7763157894736, "grad_norm": 1.194297194480896, "learning_rate": 0.0001, "loss": 0.0094, "step": 121110 }, { "epoch": 796.8421052631579, "grad_norm": 1.2234728336334229, "learning_rate": 0.0001, "loss": 0.0091, "step": 121120 }, { "epoch": 796.9078947368421, "grad_norm": 1.7128822803497314, "learning_rate": 0.0001, "loss": 0.0121, "step": 121130 }, { "epoch": 796.9736842105264, "grad_norm": 1.269959807395935, "learning_rate": 0.0001, "loss": 0.0093, "step": 121140 }, { "epoch": 797.0394736842105, "grad_norm": 1.3292056322097778, "learning_rate": 0.0001, "loss": 0.0103, "step": 121150 }, { "epoch": 797.1052631578947, "grad_norm": 0.8442802429199219, "learning_rate": 0.0001, "loss": 0.0104, "step": 121160 }, { "epoch": 797.171052631579, "grad_norm": 0.9467982649803162, "learning_rate": 0.0001, "loss": 0.0085, "step": 121170 }, { "epoch": 797.2368421052631, "grad_norm": 0.8293236494064331, "learning_rate": 0.0001, "loss": 0.0081, "step": 121180 }, { "epoch": 797.3026315789474, "grad_norm": 0.5392968654632568, "learning_rate": 0.0001, "loss": 0.0109, "step": 121190 }, { "epoch": 797.3684210526316, "grad_norm": 0.9286937713623047, "learning_rate": 0.0001, "loss": 0.0109, "step": 121200 }, { "epoch": 797.4342105263158, "grad_norm": 0.6233976483345032, "learning_rate": 0.0001, "loss": 0.0103, "step": 121210 }, { "epoch": 797.5, "grad_norm": 0.9368897080421448, "learning_rate": 0.0001, "loss": 0.0094, "step": 121220 }, { "epoch": 797.5657894736842, "grad_norm": 0.8141809105873108, "learning_rate": 0.0001, "loss": 0.0107, "step": 121230 }, { "epoch": 797.6315789473684, "grad_norm": 0.7223671674728394, "learning_rate": 0.0001, "loss": 0.0088, "step": 121240 }, { "epoch": 797.6973684210526, "grad_norm": 1.1252926588058472, "learning_rate": 0.0001, "loss": 0.0111, "step": 121250 }, { "epoch": 797.7631578947369, "grad_norm": 0.971419095993042, "learning_rate": 0.0001, "loss": 0.0097, "step": 121260 }, { "epoch": 797.828947368421, "grad_norm": 1.5512263774871826, "learning_rate": 0.0001, "loss": 0.0113, "step": 121270 }, { "epoch": 797.8947368421053, "grad_norm": 2.1177520751953125, "learning_rate": 0.0001, "loss": 0.0088, "step": 121280 }, { "epoch": 797.9605263157895, "grad_norm": 1.3696571588516235, "learning_rate": 0.0001, "loss": 0.0108, "step": 121290 }, { "epoch": 798.0263157894736, "grad_norm": 2.183379888534546, "learning_rate": 0.0001, "loss": 0.0093, "step": 121300 }, { "epoch": 798.0921052631579, "grad_norm": 1.6594129800796509, "learning_rate": 0.0001, "loss": 0.0106, "step": 121310 }, { "epoch": 798.1578947368421, "grad_norm": 1.3081175088882446, "learning_rate": 0.0001, "loss": 0.011, "step": 121320 }, { "epoch": 798.2236842105264, "grad_norm": 1.0478174686431885, "learning_rate": 0.0001, "loss": 0.0097, "step": 121330 }, { "epoch": 798.2894736842105, "grad_norm": 1.081154227256775, "learning_rate": 0.0001, "loss": 0.0107, "step": 121340 }, { "epoch": 798.3552631578947, "grad_norm": 1.172153353691101, "learning_rate": 0.0001, "loss": 0.008, "step": 121350 }, { "epoch": 798.421052631579, "grad_norm": 0.9921495318412781, "learning_rate": 0.0001, "loss": 0.0091, "step": 121360 }, { "epoch": 798.4868421052631, "grad_norm": 0.8902541399002075, "learning_rate": 0.0001, "loss": 0.0099, "step": 121370 }, { "epoch": 798.5526315789474, "grad_norm": 1.2591131925582886, "learning_rate": 0.0001, "loss": 0.0099, "step": 121380 }, { "epoch": 798.6184210526316, "grad_norm": 1.258408546447754, "learning_rate": 0.0001, "loss": 0.0101, "step": 121390 }, { "epoch": 798.6842105263158, "grad_norm": 0.9958367943763733, "learning_rate": 0.0001, "loss": 0.01, "step": 121400 }, { "epoch": 798.75, "grad_norm": 1.3845791816711426, "learning_rate": 0.0001, "loss": 0.0101, "step": 121410 }, { "epoch": 798.8157894736842, "grad_norm": 1.2385156154632568, "learning_rate": 0.0001, "loss": 0.0102, "step": 121420 }, { "epoch": 798.8815789473684, "grad_norm": 0.9330929517745972, "learning_rate": 0.0001, "loss": 0.0107, "step": 121430 }, { "epoch": 798.9473684210526, "grad_norm": 1.2166731357574463, "learning_rate": 0.0001, "loss": 0.0098, "step": 121440 }, { "epoch": 799.0131578947369, "grad_norm": 1.1683727502822876, "learning_rate": 0.0001, "loss": 0.0127, "step": 121450 }, { "epoch": 799.078947368421, "grad_norm": 1.0285605192184448, "learning_rate": 0.0001, "loss": 0.0098, "step": 121460 }, { "epoch": 799.1447368421053, "grad_norm": 0.9653977155685425, "learning_rate": 0.0001, "loss": 0.0104, "step": 121470 }, { "epoch": 799.2105263157895, "grad_norm": 1.072914481163025, "learning_rate": 0.0001, "loss": 0.0091, "step": 121480 }, { "epoch": 799.2763157894736, "grad_norm": 1.1126468181610107, "learning_rate": 0.0001, "loss": 0.0104, "step": 121490 }, { "epoch": 799.3421052631579, "grad_norm": 1.0155147314071655, "learning_rate": 0.0001, "loss": 0.0103, "step": 121500 }, { "epoch": 799.4078947368421, "grad_norm": 1.1577136516571045, "learning_rate": 0.0001, "loss": 0.0107, "step": 121510 }, { "epoch": 799.4736842105264, "grad_norm": 1.2593013048171997, "learning_rate": 0.0001, "loss": 0.0084, "step": 121520 }, { "epoch": 799.5394736842105, "grad_norm": 1.4376327991485596, "learning_rate": 0.0001, "loss": 0.0128, "step": 121530 }, { "epoch": 799.6052631578947, "grad_norm": 1.1606967449188232, "learning_rate": 0.0001, "loss": 0.0094, "step": 121540 }, { "epoch": 799.671052631579, "grad_norm": 1.1336442232131958, "learning_rate": 0.0001, "loss": 0.0104, "step": 121550 }, { "epoch": 799.7368421052631, "grad_norm": 0.9730342626571655, "learning_rate": 0.0001, "loss": 0.01, "step": 121560 }, { "epoch": 799.8026315789474, "grad_norm": 1.030070185661316, "learning_rate": 0.0001, "loss": 0.01, "step": 121570 }, { "epoch": 799.8684210526316, "grad_norm": 0.6735216975212097, "learning_rate": 0.0001, "loss": 0.0121, "step": 121580 }, { "epoch": 799.9342105263158, "grad_norm": 1.4537135362625122, "learning_rate": 0.0001, "loss": 0.0104, "step": 121590 }, { "epoch": 800.0, "grad_norm": 0.9319224953651428, "learning_rate": 0.0001, "loss": 0.0111, "step": 121600 }, { "epoch": 800.0657894736842, "grad_norm": 0.779762864112854, "learning_rate": 0.0001, "loss": 0.0101, "step": 121610 }, { "epoch": 800.1315789473684, "grad_norm": 0.7982974648475647, "learning_rate": 0.0001, "loss": 0.0102, "step": 121620 }, { "epoch": 800.1973684210526, "grad_norm": 0.9684889316558838, "learning_rate": 0.0001, "loss": 0.0117, "step": 121630 }, { "epoch": 800.2631578947369, "grad_norm": 0.9351498484611511, "learning_rate": 0.0001, "loss": 0.0101, "step": 121640 }, { "epoch": 800.328947368421, "grad_norm": 0.9390810132026672, "learning_rate": 0.0001, "loss": 0.0142, "step": 121650 }, { "epoch": 800.3947368421053, "grad_norm": 0.9394250512123108, "learning_rate": 0.0001, "loss": 0.0104, "step": 121660 }, { "epoch": 800.4605263157895, "grad_norm": 0.8497784733772278, "learning_rate": 0.0001, "loss": 0.01, "step": 121670 }, { "epoch": 800.5263157894736, "grad_norm": 0.9997221827507019, "learning_rate": 0.0001, "loss": 0.0117, "step": 121680 }, { "epoch": 800.5921052631579, "grad_norm": 1.0118328332901, "learning_rate": 0.0001, "loss": 0.0108, "step": 121690 }, { "epoch": 800.6578947368421, "grad_norm": 1.0781736373901367, "learning_rate": 0.0001, "loss": 0.012, "step": 121700 }, { "epoch": 800.7236842105264, "grad_norm": 0.9491978287696838, "learning_rate": 0.0001, "loss": 0.009, "step": 121710 }, { "epoch": 800.7894736842105, "grad_norm": 1.0116422176361084, "learning_rate": 0.0001, "loss": 0.0111, "step": 121720 }, { "epoch": 800.8552631578947, "grad_norm": 1.0374714136123657, "learning_rate": 0.0001, "loss": 0.01, "step": 121730 }, { "epoch": 800.921052631579, "grad_norm": 0.8975595831871033, "learning_rate": 0.0001, "loss": 0.0095, "step": 121740 }, { "epoch": 800.9868421052631, "grad_norm": 1.0354864597320557, "learning_rate": 0.0001, "loss": 0.0098, "step": 121750 }, { "epoch": 801.0526315789474, "grad_norm": 1.3337525129318237, "learning_rate": 0.0001, "loss": 0.0105, "step": 121760 }, { "epoch": 801.1184210526316, "grad_norm": 1.3484176397323608, "learning_rate": 0.0001, "loss": 0.011, "step": 121770 }, { "epoch": 801.1842105263158, "grad_norm": 0.9428836703300476, "learning_rate": 0.0001, "loss": 0.0109, "step": 121780 }, { "epoch": 801.25, "grad_norm": 1.2294591665267944, "learning_rate": 0.0001, "loss": 0.0107, "step": 121790 }, { "epoch": 801.3157894736842, "grad_norm": 1.2167091369628906, "learning_rate": 0.0001, "loss": 0.0094, "step": 121800 }, { "epoch": 801.3815789473684, "grad_norm": 1.2219659090042114, "learning_rate": 0.0001, "loss": 0.0093, "step": 121810 }, { "epoch": 801.4473684210526, "grad_norm": 1.1897832155227661, "learning_rate": 0.0001, "loss": 0.0098, "step": 121820 }, { "epoch": 801.5131578947369, "grad_norm": 1.3615158796310425, "learning_rate": 0.0001, "loss": 0.0111, "step": 121830 }, { "epoch": 801.578947368421, "grad_norm": 0.8604697585105896, "learning_rate": 0.0001, "loss": 0.0108, "step": 121840 }, { "epoch": 801.6447368421053, "grad_norm": 1.149682879447937, "learning_rate": 0.0001, "loss": 0.0099, "step": 121850 }, { "epoch": 801.7105263157895, "grad_norm": 0.803564190864563, "learning_rate": 0.0001, "loss": 0.0098, "step": 121860 }, { "epoch": 801.7763157894736, "grad_norm": 1.223574161529541, "learning_rate": 0.0001, "loss": 0.0088, "step": 121870 }, { "epoch": 801.8421052631579, "grad_norm": 0.9793342351913452, "learning_rate": 0.0001, "loss": 0.009, "step": 121880 }, { "epoch": 801.9078947368421, "grad_norm": 0.9008523225784302, "learning_rate": 0.0001, "loss": 0.0123, "step": 121890 }, { "epoch": 801.9736842105264, "grad_norm": 0.9227433204650879, "learning_rate": 0.0001, "loss": 0.0091, "step": 121900 }, { "epoch": 802.0394736842105, "grad_norm": 0.8591435551643372, "learning_rate": 0.0001, "loss": 0.0114, "step": 121910 }, { "epoch": 802.1052631578947, "grad_norm": 1.1136678457260132, "learning_rate": 0.0001, "loss": 0.0099, "step": 121920 }, { "epoch": 802.171052631579, "grad_norm": 0.9162068367004395, "learning_rate": 0.0001, "loss": 0.01, "step": 121930 }, { "epoch": 802.2368421052631, "grad_norm": 0.9551861882209778, "learning_rate": 0.0001, "loss": 0.0096, "step": 121940 }, { "epoch": 802.3026315789474, "grad_norm": 0.9675922989845276, "learning_rate": 0.0001, "loss": 0.0107, "step": 121950 }, { "epoch": 802.3684210526316, "grad_norm": 1.1318920850753784, "learning_rate": 0.0001, "loss": 0.0103, "step": 121960 }, { "epoch": 802.4342105263158, "grad_norm": 0.9588920474052429, "learning_rate": 0.0001, "loss": 0.0116, "step": 121970 }, { "epoch": 802.5, "grad_norm": 0.6656215190887451, "learning_rate": 0.0001, "loss": 0.0094, "step": 121980 }, { "epoch": 802.5657894736842, "grad_norm": 1.1019165515899658, "learning_rate": 0.0001, "loss": 0.0105, "step": 121990 }, { "epoch": 802.6315789473684, "grad_norm": 1.2802983522415161, "learning_rate": 0.0001, "loss": 0.0095, "step": 122000 }, { "epoch": 802.6973684210526, "grad_norm": 1.1623402833938599, "learning_rate": 0.0001, "loss": 0.0105, "step": 122010 }, { "epoch": 802.7631578947369, "grad_norm": 1.039617896080017, "learning_rate": 0.0001, "loss": 0.0103, "step": 122020 }, { "epoch": 802.828947368421, "grad_norm": 0.8688013553619385, "learning_rate": 0.0001, "loss": 0.0103, "step": 122030 }, { "epoch": 802.8947368421053, "grad_norm": 1.22115957736969, "learning_rate": 0.0001, "loss": 0.01, "step": 122040 }, { "epoch": 802.9605263157895, "grad_norm": 0.9164858460426331, "learning_rate": 0.0001, "loss": 0.0108, "step": 122050 }, { "epoch": 803.0263157894736, "grad_norm": 0.8192176818847656, "learning_rate": 0.0001, "loss": 0.0115, "step": 122060 }, { "epoch": 803.0921052631579, "grad_norm": 1.2811557054519653, "learning_rate": 0.0001, "loss": 0.0122, "step": 122070 }, { "epoch": 803.1578947368421, "grad_norm": 1.3830119371414185, "learning_rate": 0.0001, "loss": 0.0117, "step": 122080 }, { "epoch": 803.2236842105264, "grad_norm": 1.146261215209961, "learning_rate": 0.0001, "loss": 0.0097, "step": 122090 }, { "epoch": 803.2894736842105, "grad_norm": 0.9297433495521545, "learning_rate": 0.0001, "loss": 0.0119, "step": 122100 }, { "epoch": 803.3552631578947, "grad_norm": 0.8575513958930969, "learning_rate": 0.0001, "loss": 0.011, "step": 122110 }, { "epoch": 803.421052631579, "grad_norm": 1.1129348278045654, "learning_rate": 0.0001, "loss": 0.0098, "step": 122120 }, { "epoch": 803.4868421052631, "grad_norm": 1.1914259195327759, "learning_rate": 0.0001, "loss": 0.0112, "step": 122130 }, { "epoch": 803.5526315789474, "grad_norm": 0.8768858909606934, "learning_rate": 0.0001, "loss": 0.0098, "step": 122140 }, { "epoch": 803.6184210526316, "grad_norm": 0.8389908671379089, "learning_rate": 0.0001, "loss": 0.0094, "step": 122150 }, { "epoch": 803.6842105263158, "grad_norm": 1.1169975996017456, "learning_rate": 0.0001, "loss": 0.0109, "step": 122160 }, { "epoch": 803.75, "grad_norm": 0.8531729578971863, "learning_rate": 0.0001, "loss": 0.0103, "step": 122170 }, { "epoch": 803.8157894736842, "grad_norm": 1.1040211915969849, "learning_rate": 0.0001, "loss": 0.0121, "step": 122180 }, { "epoch": 803.8815789473684, "grad_norm": 0.9061696529388428, "learning_rate": 0.0001, "loss": 0.0088, "step": 122190 }, { "epoch": 803.9473684210526, "grad_norm": 0.9023482799530029, "learning_rate": 0.0001, "loss": 0.0099, "step": 122200 }, { "epoch": 804.0131578947369, "grad_norm": 1.1508461236953735, "learning_rate": 0.0001, "loss": 0.011, "step": 122210 }, { "epoch": 804.078947368421, "grad_norm": 1.0282151699066162, "learning_rate": 0.0001, "loss": 0.0113, "step": 122220 }, { "epoch": 804.1447368421053, "grad_norm": 1.2764521837234497, "learning_rate": 0.0001, "loss": 0.0102, "step": 122230 }, { "epoch": 804.2105263157895, "grad_norm": 1.1191545724868774, "learning_rate": 0.0001, "loss": 0.0114, "step": 122240 }, { "epoch": 804.2763157894736, "grad_norm": 0.988823413848877, "learning_rate": 0.0001, "loss": 0.0107, "step": 122250 }, { "epoch": 804.3421052631579, "grad_norm": 1.095762848854065, "learning_rate": 0.0001, "loss": 0.009, "step": 122260 }, { "epoch": 804.4078947368421, "grad_norm": 1.3492151498794556, "learning_rate": 0.0001, "loss": 0.0084, "step": 122270 }, { "epoch": 804.4736842105264, "grad_norm": 1.2106963396072388, "learning_rate": 0.0001, "loss": 0.0099, "step": 122280 }, { "epoch": 804.5394736842105, "grad_norm": 1.2582029104232788, "learning_rate": 0.0001, "loss": 0.0106, "step": 122290 }, { "epoch": 804.6052631578947, "grad_norm": 1.3207285404205322, "learning_rate": 0.0001, "loss": 0.0123, "step": 122300 }, { "epoch": 804.671052631579, "grad_norm": 1.0762017965316772, "learning_rate": 0.0001, "loss": 0.011, "step": 122310 }, { "epoch": 804.7368421052631, "grad_norm": 0.871727466583252, "learning_rate": 0.0001, "loss": 0.0099, "step": 122320 }, { "epoch": 804.8026315789474, "grad_norm": 0.9750553369522095, "learning_rate": 0.0001, "loss": 0.0102, "step": 122330 }, { "epoch": 804.8684210526316, "grad_norm": 1.4661288261413574, "learning_rate": 0.0001, "loss": 0.0115, "step": 122340 }, { "epoch": 804.9342105263158, "grad_norm": 1.1938623189926147, "learning_rate": 0.0001, "loss": 0.0091, "step": 122350 }, { "epoch": 805.0, "grad_norm": 1.210606575012207, "learning_rate": 0.0001, "loss": 0.0093, "step": 122360 }, { "epoch": 805.0657894736842, "grad_norm": 0.9685527086257935, "learning_rate": 0.0001, "loss": 0.0101, "step": 122370 }, { "epoch": 805.1315789473684, "grad_norm": 0.7988755702972412, "learning_rate": 0.0001, "loss": 0.0107, "step": 122380 }, { "epoch": 805.1973684210526, "grad_norm": 1.0523570775985718, "learning_rate": 0.0001, "loss": 0.0111, "step": 122390 }, { "epoch": 805.2631578947369, "grad_norm": 0.9532704949378967, "learning_rate": 0.0001, "loss": 0.0094, "step": 122400 }, { "epoch": 805.328947368421, "grad_norm": 0.774535059928894, "learning_rate": 0.0001, "loss": 0.0101, "step": 122410 }, { "epoch": 805.3947368421053, "grad_norm": 1.0820705890655518, "learning_rate": 0.0001, "loss": 0.0085, "step": 122420 }, { "epoch": 805.4605263157895, "grad_norm": 1.2834467887878418, "learning_rate": 0.0001, "loss": 0.0094, "step": 122430 }, { "epoch": 805.5263157894736, "grad_norm": 1.2776272296905518, "learning_rate": 0.0001, "loss": 0.0099, "step": 122440 }, { "epoch": 805.5921052631579, "grad_norm": 1.7061843872070312, "learning_rate": 0.0001, "loss": 0.0117, "step": 122450 }, { "epoch": 805.6578947368421, "grad_norm": 1.2151910066604614, "learning_rate": 0.0001, "loss": 0.0091, "step": 122460 }, { "epoch": 805.7236842105264, "grad_norm": 0.9397733211517334, "learning_rate": 0.0001, "loss": 0.0108, "step": 122470 }, { "epoch": 805.7894736842105, "grad_norm": 1.0783069133758545, "learning_rate": 0.0001, "loss": 0.011, "step": 122480 }, { "epoch": 805.8552631578947, "grad_norm": 0.965991735458374, "learning_rate": 0.0001, "loss": 0.011, "step": 122490 }, { "epoch": 805.921052631579, "grad_norm": 1.1885125637054443, "learning_rate": 0.0001, "loss": 0.009, "step": 122500 }, { "epoch": 805.9868421052631, "grad_norm": 1.4462835788726807, "learning_rate": 0.0001, "loss": 0.0098, "step": 122510 }, { "epoch": 806.0526315789474, "grad_norm": 1.4112342596054077, "learning_rate": 0.0001, "loss": 0.0095, "step": 122520 }, { "epoch": 806.1184210526316, "grad_norm": 1.3368967771530151, "learning_rate": 0.0001, "loss": 0.0089, "step": 122530 }, { "epoch": 806.1842105263158, "grad_norm": 0.9588228464126587, "learning_rate": 0.0001, "loss": 0.011, "step": 122540 }, { "epoch": 806.25, "grad_norm": 1.0012880563735962, "learning_rate": 0.0001, "loss": 0.0097, "step": 122550 }, { "epoch": 806.3157894736842, "grad_norm": 1.0490740537643433, "learning_rate": 0.0001, "loss": 0.012, "step": 122560 }, { "epoch": 806.3815789473684, "grad_norm": 1.1824147701263428, "learning_rate": 0.0001, "loss": 0.0113, "step": 122570 }, { "epoch": 806.4473684210526, "grad_norm": 1.1107277870178223, "learning_rate": 0.0001, "loss": 0.0106, "step": 122580 }, { "epoch": 806.5131578947369, "grad_norm": 0.9902931451797485, "learning_rate": 0.0001, "loss": 0.0092, "step": 122590 }, { "epoch": 806.578947368421, "grad_norm": 1.0864098072052002, "learning_rate": 0.0001, "loss": 0.0097, "step": 122600 }, { "epoch": 806.6447368421053, "grad_norm": 1.0239638090133667, "learning_rate": 0.0001, "loss": 0.0091, "step": 122610 }, { "epoch": 806.7105263157895, "grad_norm": 0.9125197529792786, "learning_rate": 0.0001, "loss": 0.0116, "step": 122620 }, { "epoch": 806.7763157894736, "grad_norm": 0.8427323698997498, "learning_rate": 0.0001, "loss": 0.0093, "step": 122630 }, { "epoch": 806.8421052631579, "grad_norm": 0.6944894790649414, "learning_rate": 0.0001, "loss": 0.0093, "step": 122640 }, { "epoch": 806.9078947368421, "grad_norm": 0.6341553926467896, "learning_rate": 0.0001, "loss": 0.0111, "step": 122650 }, { "epoch": 806.9736842105264, "grad_norm": 1.362230896949768, "learning_rate": 0.0001, "loss": 0.0098, "step": 122660 }, { "epoch": 807.0394736842105, "grad_norm": 1.2482671737670898, "learning_rate": 0.0001, "loss": 0.0098, "step": 122670 }, { "epoch": 807.1052631578947, "grad_norm": 1.413817048072815, "learning_rate": 0.0001, "loss": 0.0124, "step": 122680 }, { "epoch": 807.171052631579, "grad_norm": 1.4257447719573975, "learning_rate": 0.0001, "loss": 0.0094, "step": 122690 }, { "epoch": 807.2368421052631, "grad_norm": 1.0907763242721558, "learning_rate": 0.0001, "loss": 0.009, "step": 122700 }, { "epoch": 807.3026315789474, "grad_norm": 1.1896060705184937, "learning_rate": 0.0001, "loss": 0.0094, "step": 122710 }, { "epoch": 807.3684210526316, "grad_norm": 0.9209076166152954, "learning_rate": 0.0001, "loss": 0.0112, "step": 122720 }, { "epoch": 807.4342105263158, "grad_norm": 0.7475315928459167, "learning_rate": 0.0001, "loss": 0.0094, "step": 122730 }, { "epoch": 807.5, "grad_norm": 1.2551970481872559, "learning_rate": 0.0001, "loss": 0.0102, "step": 122740 }, { "epoch": 807.5657894736842, "grad_norm": 1.2004441022872925, "learning_rate": 0.0001, "loss": 0.0087, "step": 122750 }, { "epoch": 807.6315789473684, "grad_norm": 1.0100356340408325, "learning_rate": 0.0001, "loss": 0.0088, "step": 122760 }, { "epoch": 807.6973684210526, "grad_norm": 1.3553928136825562, "learning_rate": 0.0001, "loss": 0.011, "step": 122770 }, { "epoch": 807.7631578947369, "grad_norm": 0.8858333826065063, "learning_rate": 0.0001, "loss": 0.011, "step": 122780 }, { "epoch": 807.828947368421, "grad_norm": 1.5102218389511108, "learning_rate": 0.0001, "loss": 0.0096, "step": 122790 }, { "epoch": 807.8947368421053, "grad_norm": 1.0069150924682617, "learning_rate": 0.0001, "loss": 0.0114, "step": 122800 }, { "epoch": 807.9605263157895, "grad_norm": 0.7301906943321228, "learning_rate": 0.0001, "loss": 0.0114, "step": 122810 }, { "epoch": 808.0263157894736, "grad_norm": 1.0068855285644531, "learning_rate": 0.0001, "loss": 0.0115, "step": 122820 }, { "epoch": 808.0921052631579, "grad_norm": 0.9534813165664673, "learning_rate": 0.0001, "loss": 0.0101, "step": 122830 }, { "epoch": 808.1578947368421, "grad_norm": 0.9146772027015686, "learning_rate": 0.0001, "loss": 0.0108, "step": 122840 }, { "epoch": 808.2236842105264, "grad_norm": 0.9505394697189331, "learning_rate": 0.0001, "loss": 0.0089, "step": 122850 }, { "epoch": 808.2894736842105, "grad_norm": 1.0315290689468384, "learning_rate": 0.0001, "loss": 0.0114, "step": 122860 }, { "epoch": 808.3552631578947, "grad_norm": 1.236918568611145, "learning_rate": 0.0001, "loss": 0.0089, "step": 122870 }, { "epoch": 808.421052631579, "grad_norm": 1.0912704467773438, "learning_rate": 0.0001, "loss": 0.0106, "step": 122880 }, { "epoch": 808.4868421052631, "grad_norm": 0.8193346261978149, "learning_rate": 0.0001, "loss": 0.0087, "step": 122890 }, { "epoch": 808.5526315789474, "grad_norm": 1.54730224609375, "learning_rate": 0.0001, "loss": 0.0107, "step": 122900 }, { "epoch": 808.6184210526316, "grad_norm": 1.441839575767517, "learning_rate": 0.0001, "loss": 0.0123, "step": 122910 }, { "epoch": 808.6842105263158, "grad_norm": 1.055781602859497, "learning_rate": 0.0001, "loss": 0.0108, "step": 122920 }, { "epoch": 808.75, "grad_norm": 1.353714108467102, "learning_rate": 0.0001, "loss": 0.0094, "step": 122930 }, { "epoch": 808.8157894736842, "grad_norm": 1.2414506673812866, "learning_rate": 0.0001, "loss": 0.0091, "step": 122940 }, { "epoch": 808.8815789473684, "grad_norm": 1.2906715869903564, "learning_rate": 0.0001, "loss": 0.0103, "step": 122950 }, { "epoch": 808.9473684210526, "grad_norm": 0.7939502596855164, "learning_rate": 0.0001, "loss": 0.0109, "step": 122960 }, { "epoch": 809.0131578947369, "grad_norm": 0.9539815783500671, "learning_rate": 0.0001, "loss": 0.0098, "step": 122970 }, { "epoch": 809.078947368421, "grad_norm": 0.7728420495986938, "learning_rate": 0.0001, "loss": 0.0112, "step": 122980 }, { "epoch": 809.1447368421053, "grad_norm": 0.9012200236320496, "learning_rate": 0.0001, "loss": 0.0098, "step": 122990 }, { "epoch": 809.2105263157895, "grad_norm": 0.874159574508667, "learning_rate": 0.0001, "loss": 0.0106, "step": 123000 }, { "epoch": 809.2763157894736, "grad_norm": 1.1063437461853027, "learning_rate": 0.0001, "loss": 0.011, "step": 123010 }, { "epoch": 809.3421052631579, "grad_norm": 1.1049076318740845, "learning_rate": 0.0001, "loss": 0.0099, "step": 123020 }, { "epoch": 809.4078947368421, "grad_norm": 1.4535138607025146, "learning_rate": 0.0001, "loss": 0.0104, "step": 123030 }, { "epoch": 809.4736842105264, "grad_norm": 1.022336483001709, "learning_rate": 0.0001, "loss": 0.0092, "step": 123040 }, { "epoch": 809.5394736842105, "grad_norm": 1.1350669860839844, "learning_rate": 0.0001, "loss": 0.0101, "step": 123050 }, { "epoch": 809.6052631578947, "grad_norm": 1.5300946235656738, "learning_rate": 0.0001, "loss": 0.0133, "step": 123060 }, { "epoch": 809.671052631579, "grad_norm": 1.1498500108718872, "learning_rate": 0.0001, "loss": 0.0091, "step": 123070 }, { "epoch": 809.7368421052631, "grad_norm": 0.6810249090194702, "learning_rate": 0.0001, "loss": 0.0086, "step": 123080 }, { "epoch": 809.8026315789474, "grad_norm": 0.9116702675819397, "learning_rate": 0.0001, "loss": 0.0085, "step": 123090 }, { "epoch": 809.8684210526316, "grad_norm": 1.095033049583435, "learning_rate": 0.0001, "loss": 0.0092, "step": 123100 }, { "epoch": 809.9342105263158, "grad_norm": 0.9382158517837524, "learning_rate": 0.0001, "loss": 0.0112, "step": 123110 }, { "epoch": 810.0, "grad_norm": 1.0285440683364868, "learning_rate": 0.0001, "loss": 0.0098, "step": 123120 }, { "epoch": 810.0657894736842, "grad_norm": 0.9891853332519531, "learning_rate": 0.0001, "loss": 0.0101, "step": 123130 }, { "epoch": 810.1315789473684, "grad_norm": 0.9508638381958008, "learning_rate": 0.0001, "loss": 0.0096, "step": 123140 }, { "epoch": 810.1973684210526, "grad_norm": 0.9087507724761963, "learning_rate": 0.0001, "loss": 0.0119, "step": 123150 }, { "epoch": 810.2631578947369, "grad_norm": 1.0318292379379272, "learning_rate": 0.0001, "loss": 0.0106, "step": 123160 }, { "epoch": 810.328947368421, "grad_norm": 0.8435489535331726, "learning_rate": 0.0001, "loss": 0.0101, "step": 123170 }, { "epoch": 810.3947368421053, "grad_norm": 0.7418573498725891, "learning_rate": 0.0001, "loss": 0.0089, "step": 123180 }, { "epoch": 810.4605263157895, "grad_norm": 1.2154449224472046, "learning_rate": 0.0001, "loss": 0.0109, "step": 123190 }, { "epoch": 810.5263157894736, "grad_norm": 1.1838921308517456, "learning_rate": 0.0001, "loss": 0.0107, "step": 123200 }, { "epoch": 810.5921052631579, "grad_norm": 0.6878968477249146, "learning_rate": 0.0001, "loss": 0.0096, "step": 123210 }, { "epoch": 810.6578947368421, "grad_norm": 1.2046258449554443, "learning_rate": 0.0001, "loss": 0.0091, "step": 123220 }, { "epoch": 810.7236842105264, "grad_norm": 1.1671987771987915, "learning_rate": 0.0001, "loss": 0.0105, "step": 123230 }, { "epoch": 810.7894736842105, "grad_norm": 1.0558589696884155, "learning_rate": 0.0001, "loss": 0.0105, "step": 123240 }, { "epoch": 810.8552631578947, "grad_norm": 1.035437822341919, "learning_rate": 0.0001, "loss": 0.0114, "step": 123250 }, { "epoch": 810.921052631579, "grad_norm": 1.2086669206619263, "learning_rate": 0.0001, "loss": 0.0133, "step": 123260 }, { "epoch": 810.9868421052631, "grad_norm": 1.086695909500122, "learning_rate": 0.0001, "loss": 0.0086, "step": 123270 }, { "epoch": 811.0526315789474, "grad_norm": 1.3100385665893555, "learning_rate": 0.0001, "loss": 0.0096, "step": 123280 }, { "epoch": 811.1184210526316, "grad_norm": 1.3624625205993652, "learning_rate": 0.0001, "loss": 0.0099, "step": 123290 }, { "epoch": 811.1842105263158, "grad_norm": 1.1712080240249634, "learning_rate": 0.0001, "loss": 0.0102, "step": 123300 }, { "epoch": 811.25, "grad_norm": 1.2696281671524048, "learning_rate": 0.0001, "loss": 0.0104, "step": 123310 }, { "epoch": 811.3157894736842, "grad_norm": 0.7880839109420776, "learning_rate": 0.0001, "loss": 0.0096, "step": 123320 }, { "epoch": 811.3815789473684, "grad_norm": 1.3143435716629028, "learning_rate": 0.0001, "loss": 0.0088, "step": 123330 }, { "epoch": 811.4473684210526, "grad_norm": 1.0152329206466675, "learning_rate": 0.0001, "loss": 0.0105, "step": 123340 }, { "epoch": 811.5131578947369, "grad_norm": 0.9424976110458374, "learning_rate": 0.0001, "loss": 0.0118, "step": 123350 }, { "epoch": 811.578947368421, "grad_norm": 1.0948466062545776, "learning_rate": 0.0001, "loss": 0.0119, "step": 123360 }, { "epoch": 811.6447368421053, "grad_norm": 0.9606415033340454, "learning_rate": 0.0001, "loss": 0.0099, "step": 123370 }, { "epoch": 811.7105263157895, "grad_norm": 1.1549152135849, "learning_rate": 0.0001, "loss": 0.0104, "step": 123380 }, { "epoch": 811.7763157894736, "grad_norm": 0.8370715975761414, "learning_rate": 0.0001, "loss": 0.0093, "step": 123390 }, { "epoch": 811.8421052631579, "grad_norm": 1.1757169961929321, "learning_rate": 0.0001, "loss": 0.0108, "step": 123400 }, { "epoch": 811.9078947368421, "grad_norm": 1.3213396072387695, "learning_rate": 0.0001, "loss": 0.0106, "step": 123410 }, { "epoch": 811.9736842105264, "grad_norm": 1.3721901178359985, "learning_rate": 0.0001, "loss": 0.009, "step": 123420 }, { "epoch": 812.0394736842105, "grad_norm": 1.2278730869293213, "learning_rate": 0.0001, "loss": 0.0103, "step": 123430 }, { "epoch": 812.1052631578947, "grad_norm": 0.8597956895828247, "learning_rate": 0.0001, "loss": 0.0089, "step": 123440 }, { "epoch": 812.171052631579, "grad_norm": 1.0379050970077515, "learning_rate": 0.0001, "loss": 0.0097, "step": 123450 }, { "epoch": 812.2368421052631, "grad_norm": 1.107707142829895, "learning_rate": 0.0001, "loss": 0.0098, "step": 123460 }, { "epoch": 812.3026315789474, "grad_norm": 1.37968909740448, "learning_rate": 0.0001, "loss": 0.0096, "step": 123470 }, { "epoch": 812.3684210526316, "grad_norm": 1.6168029308319092, "learning_rate": 0.0001, "loss": 0.0095, "step": 123480 }, { "epoch": 812.4342105263158, "grad_norm": 1.3366464376449585, "learning_rate": 0.0001, "loss": 0.0085, "step": 123490 }, { "epoch": 812.5, "grad_norm": 1.2438991069793701, "learning_rate": 0.0001, "loss": 0.0109, "step": 123500 }, { "epoch": 812.5657894736842, "grad_norm": 1.4187861680984497, "learning_rate": 0.0001, "loss": 0.009, "step": 123510 }, { "epoch": 812.6315789473684, "grad_norm": 1.350807547569275, "learning_rate": 0.0001, "loss": 0.0107, "step": 123520 }, { "epoch": 812.6973684210526, "grad_norm": 1.1984448432922363, "learning_rate": 0.0001, "loss": 0.01, "step": 123530 }, { "epoch": 812.7631578947369, "grad_norm": 0.9718255400657654, "learning_rate": 0.0001, "loss": 0.01, "step": 123540 }, { "epoch": 812.828947368421, "grad_norm": 0.8007934093475342, "learning_rate": 0.0001, "loss": 0.0099, "step": 123550 }, { "epoch": 812.8947368421053, "grad_norm": 1.1120939254760742, "learning_rate": 0.0001, "loss": 0.009, "step": 123560 }, { "epoch": 812.9605263157895, "grad_norm": 0.8073641657829285, "learning_rate": 0.0001, "loss": 0.0112, "step": 123570 }, { "epoch": 813.0263157894736, "grad_norm": 1.3328661918640137, "learning_rate": 0.0001, "loss": 0.01, "step": 123580 }, { "epoch": 813.0921052631579, "grad_norm": 1.2822130918502808, "learning_rate": 0.0001, "loss": 0.013, "step": 123590 }, { "epoch": 813.1578947368421, "grad_norm": 1.1979964971542358, "learning_rate": 0.0001, "loss": 0.0093, "step": 123600 }, { "epoch": 813.2236842105264, "grad_norm": 1.2304644584655762, "learning_rate": 0.0001, "loss": 0.0094, "step": 123610 }, { "epoch": 813.2894736842105, "grad_norm": 0.8856472373008728, "learning_rate": 0.0001, "loss": 0.0105, "step": 123620 }, { "epoch": 813.3552631578947, "grad_norm": 1.14380943775177, "learning_rate": 0.0001, "loss": 0.0104, "step": 123630 }, { "epoch": 813.421052631579, "grad_norm": 1.169329047203064, "learning_rate": 0.0001, "loss": 0.0088, "step": 123640 }, { "epoch": 813.4868421052631, "grad_norm": 1.1275250911712646, "learning_rate": 0.0001, "loss": 0.009, "step": 123650 }, { "epoch": 813.5526315789474, "grad_norm": 0.9524691700935364, "learning_rate": 0.0001, "loss": 0.0097, "step": 123660 }, { "epoch": 813.6184210526316, "grad_norm": 1.1175603866577148, "learning_rate": 0.0001, "loss": 0.0093, "step": 123670 }, { "epoch": 813.6842105263158, "grad_norm": 1.0256012678146362, "learning_rate": 0.0001, "loss": 0.0091, "step": 123680 }, { "epoch": 813.75, "grad_norm": 0.9166593551635742, "learning_rate": 0.0001, "loss": 0.0087, "step": 123690 }, { "epoch": 813.8157894736842, "grad_norm": 1.1852658987045288, "learning_rate": 0.0001, "loss": 0.0117, "step": 123700 }, { "epoch": 813.8815789473684, "grad_norm": 1.0424803495407104, "learning_rate": 0.0001, "loss": 0.009, "step": 123710 }, { "epoch": 813.9473684210526, "grad_norm": 0.9205971956253052, "learning_rate": 0.0001, "loss": 0.0091, "step": 123720 }, { "epoch": 814.0131578947369, "grad_norm": 0.8064239621162415, "learning_rate": 0.0001, "loss": 0.011, "step": 123730 }, { "epoch": 814.078947368421, "grad_norm": 1.1456804275512695, "learning_rate": 0.0001, "loss": 0.0085, "step": 123740 }, { "epoch": 814.1447368421053, "grad_norm": 1.1536784172058105, "learning_rate": 0.0001, "loss": 0.0093, "step": 123750 }, { "epoch": 814.2105263157895, "grad_norm": 0.9535488486289978, "learning_rate": 0.0001, "loss": 0.0116, "step": 123760 }, { "epoch": 814.2763157894736, "grad_norm": 0.8925880193710327, "learning_rate": 0.0001, "loss": 0.0094, "step": 123770 }, { "epoch": 814.3421052631579, "grad_norm": 0.7770874500274658, "learning_rate": 0.0001, "loss": 0.0101, "step": 123780 }, { "epoch": 814.4078947368421, "grad_norm": 0.7658482193946838, "learning_rate": 0.0001, "loss": 0.0093, "step": 123790 }, { "epoch": 814.4736842105264, "grad_norm": 1.1080727577209473, "learning_rate": 0.0001, "loss": 0.0098, "step": 123800 }, { "epoch": 814.5394736842105, "grad_norm": 0.9930034279823303, "learning_rate": 0.0001, "loss": 0.0127, "step": 123810 }, { "epoch": 814.6052631578947, "grad_norm": 1.04108464717865, "learning_rate": 0.0001, "loss": 0.01, "step": 123820 }, { "epoch": 814.671052631579, "grad_norm": 1.4060074090957642, "learning_rate": 0.0001, "loss": 0.0101, "step": 123830 }, { "epoch": 814.7368421052631, "grad_norm": 1.135053277015686, "learning_rate": 0.0001, "loss": 0.0088, "step": 123840 }, { "epoch": 814.8026315789474, "grad_norm": 1.102472186088562, "learning_rate": 0.0001, "loss": 0.0106, "step": 123850 }, { "epoch": 814.8684210526316, "grad_norm": 1.0958255529403687, "learning_rate": 0.0001, "loss": 0.0104, "step": 123860 }, { "epoch": 814.9342105263158, "grad_norm": 1.2109631299972534, "learning_rate": 0.0001, "loss": 0.0096, "step": 123870 }, { "epoch": 815.0, "grad_norm": 0.8327732086181641, "learning_rate": 0.0001, "loss": 0.0092, "step": 123880 }, { "epoch": 815.0657894736842, "grad_norm": 0.830962598323822, "learning_rate": 0.0001, "loss": 0.0103, "step": 123890 }, { "epoch": 815.1315789473684, "grad_norm": 1.0585317611694336, "learning_rate": 0.0001, "loss": 0.0112, "step": 123900 }, { "epoch": 815.1973684210526, "grad_norm": 1.095977783203125, "learning_rate": 0.0001, "loss": 0.0097, "step": 123910 }, { "epoch": 815.2631578947369, "grad_norm": 1.2704319953918457, "learning_rate": 0.0001, "loss": 0.0096, "step": 123920 }, { "epoch": 815.328947368421, "grad_norm": 0.957330584526062, "learning_rate": 0.0001, "loss": 0.0097, "step": 123930 }, { "epoch": 815.3947368421053, "grad_norm": 0.8996625542640686, "learning_rate": 0.0001, "loss": 0.0109, "step": 123940 }, { "epoch": 815.4605263157895, "grad_norm": 1.2398024797439575, "learning_rate": 0.0001, "loss": 0.0116, "step": 123950 }, { "epoch": 815.5263157894736, "grad_norm": 1.1726700067520142, "learning_rate": 0.0001, "loss": 0.0096, "step": 123960 }, { "epoch": 815.5921052631579, "grad_norm": 1.1765739917755127, "learning_rate": 0.0001, "loss": 0.0091, "step": 123970 }, { "epoch": 815.6578947368421, "grad_norm": 1.4318251609802246, "learning_rate": 0.0001, "loss": 0.0086, "step": 123980 }, { "epoch": 815.7236842105264, "grad_norm": 1.3720372915267944, "learning_rate": 0.0001, "loss": 0.0096, "step": 123990 }, { "epoch": 815.7894736842105, "grad_norm": 0.9442108273506165, "learning_rate": 0.0001, "loss": 0.011, "step": 124000 }, { "epoch": 815.8552631578947, "grad_norm": 1.4249804019927979, "learning_rate": 0.0001, "loss": 0.0112, "step": 124010 }, { "epoch": 815.921052631579, "grad_norm": 1.5111039876937866, "learning_rate": 0.0001, "loss": 0.0108, "step": 124020 }, { "epoch": 815.9868421052631, "grad_norm": 1.065873146057129, "learning_rate": 0.0001, "loss": 0.0119, "step": 124030 }, { "epoch": 816.0526315789474, "grad_norm": 1.224717617034912, "learning_rate": 0.0001, "loss": 0.0107, "step": 124040 }, { "epoch": 816.1184210526316, "grad_norm": 0.8708517551422119, "learning_rate": 0.0001, "loss": 0.0096, "step": 124050 }, { "epoch": 816.1842105263158, "grad_norm": 1.0297471284866333, "learning_rate": 0.0001, "loss": 0.0116, "step": 124060 }, { "epoch": 816.25, "grad_norm": 1.1556639671325684, "learning_rate": 0.0001, "loss": 0.0102, "step": 124070 }, { "epoch": 816.3157894736842, "grad_norm": 1.0219916105270386, "learning_rate": 0.0001, "loss": 0.0104, "step": 124080 }, { "epoch": 816.3815789473684, "grad_norm": 0.9908708930015564, "learning_rate": 0.0001, "loss": 0.0115, "step": 124090 }, { "epoch": 816.4473684210526, "grad_norm": 0.8453224897384644, "learning_rate": 0.0001, "loss": 0.0127, "step": 124100 }, { "epoch": 816.5131578947369, "grad_norm": 0.6190137267112732, "learning_rate": 0.0001, "loss": 0.01, "step": 124110 }, { "epoch": 816.578947368421, "grad_norm": 1.4555507898330688, "learning_rate": 0.0001, "loss": 0.0108, "step": 124120 }, { "epoch": 816.6447368421053, "grad_norm": 1.2438726425170898, "learning_rate": 0.0001, "loss": 0.0104, "step": 124130 }, { "epoch": 816.7105263157895, "grad_norm": 1.2123976945877075, "learning_rate": 0.0001, "loss": 0.0135, "step": 124140 }, { "epoch": 816.7763157894736, "grad_norm": 1.1773884296417236, "learning_rate": 0.0001, "loss": 0.0114, "step": 124150 }, { "epoch": 816.8421052631579, "grad_norm": 1.0776981115341187, "learning_rate": 0.0001, "loss": 0.0136, "step": 124160 }, { "epoch": 816.9078947368421, "grad_norm": 1.0977414846420288, "learning_rate": 0.0001, "loss": 0.0118, "step": 124170 }, { "epoch": 816.9736842105264, "grad_norm": 0.9133142232894897, "learning_rate": 0.0001, "loss": 0.0119, "step": 124180 }, { "epoch": 817.0394736842105, "grad_norm": 1.3177136182785034, "learning_rate": 0.0001, "loss": 0.0126, "step": 124190 }, { "epoch": 817.1052631578947, "grad_norm": 1.1052703857421875, "learning_rate": 0.0001, "loss": 0.0113, "step": 124200 }, { "epoch": 817.171052631579, "grad_norm": 1.2231838703155518, "learning_rate": 0.0001, "loss": 0.0125, "step": 124210 }, { "epoch": 817.2368421052631, "grad_norm": 1.0773632526397705, "learning_rate": 0.0001, "loss": 0.011, "step": 124220 }, { "epoch": 817.3026315789474, "grad_norm": 0.7591347098350525, "learning_rate": 0.0001, "loss": 0.0112, "step": 124230 }, { "epoch": 817.3684210526316, "grad_norm": 1.1627017259597778, "learning_rate": 0.0001, "loss": 0.0145, "step": 124240 }, { "epoch": 817.4342105263158, "grad_norm": 0.88426274061203, "learning_rate": 0.0001, "loss": 0.0131, "step": 124250 }, { "epoch": 817.5, "grad_norm": 1.0880540609359741, "learning_rate": 0.0001, "loss": 0.0138, "step": 124260 }, { "epoch": 817.5657894736842, "grad_norm": 1.1447336673736572, "learning_rate": 0.0001, "loss": 0.0122, "step": 124270 }, { "epoch": 817.6315789473684, "grad_norm": 0.9627189636230469, "learning_rate": 0.0001, "loss": 0.0106, "step": 124280 }, { "epoch": 817.6973684210526, "grad_norm": 0.8809316158294678, "learning_rate": 0.0001, "loss": 0.0127, "step": 124290 }, { "epoch": 817.7631578947369, "grad_norm": 1.1471461057662964, "learning_rate": 0.0001, "loss": 0.0123, "step": 124300 }, { "epoch": 817.828947368421, "grad_norm": 0.9346367716789246, "learning_rate": 0.0001, "loss": 0.011, "step": 124310 }, { "epoch": 817.8947368421053, "grad_norm": 1.247454047203064, "learning_rate": 0.0001, "loss": 0.014, "step": 124320 }, { "epoch": 817.9605263157895, "grad_norm": 1.3746150732040405, "learning_rate": 0.0001, "loss": 0.0113, "step": 124330 }, { "epoch": 818.0263157894736, "grad_norm": 1.183838963508606, "learning_rate": 0.0001, "loss": 0.0115, "step": 124340 }, { "epoch": 818.0921052631579, "grad_norm": 0.9270862936973572, "learning_rate": 0.0001, "loss": 0.0089, "step": 124350 }, { "epoch": 818.1578947368421, "grad_norm": 0.9272816777229309, "learning_rate": 0.0001, "loss": 0.0107, "step": 124360 }, { "epoch": 818.2236842105264, "grad_norm": 1.0511780977249146, "learning_rate": 0.0001, "loss": 0.0139, "step": 124370 }, { "epoch": 818.2894736842105, "grad_norm": 1.2871315479278564, "learning_rate": 0.0001, "loss": 0.0112, "step": 124380 }, { "epoch": 818.3552631578947, "grad_norm": 1.2518093585968018, "learning_rate": 0.0001, "loss": 0.0112, "step": 124390 }, { "epoch": 818.421052631579, "grad_norm": 1.498147964477539, "learning_rate": 0.0001, "loss": 0.0101, "step": 124400 }, { "epoch": 818.4868421052631, "grad_norm": 2.1815390586853027, "learning_rate": 0.0001, "loss": 0.0114, "step": 124410 }, { "epoch": 818.5526315789474, "grad_norm": 1.512572169303894, "learning_rate": 0.0001, "loss": 0.0121, "step": 124420 }, { "epoch": 818.6184210526316, "grad_norm": 1.39220130443573, "learning_rate": 0.0001, "loss": 0.0096, "step": 124430 }, { "epoch": 818.6842105263158, "grad_norm": 1.40886390209198, "learning_rate": 0.0001, "loss": 0.0121, "step": 124440 }, { "epoch": 818.75, "grad_norm": 1.2492808103561401, "learning_rate": 0.0001, "loss": 0.0106, "step": 124450 }, { "epoch": 818.8157894736842, "grad_norm": 1.3465094566345215, "learning_rate": 0.0001, "loss": 0.012, "step": 124460 }, { "epoch": 818.8815789473684, "grad_norm": 1.4258430004119873, "learning_rate": 0.0001, "loss": 0.0098, "step": 124470 }, { "epoch": 818.9473684210526, "grad_norm": 1.4436920881271362, "learning_rate": 0.0001, "loss": 0.0113, "step": 124480 }, { "epoch": 819.0131578947369, "grad_norm": 1.0473710298538208, "learning_rate": 0.0001, "loss": 0.0095, "step": 124490 }, { "epoch": 819.078947368421, "grad_norm": 1.4626469612121582, "learning_rate": 0.0001, "loss": 0.0109, "step": 124500 }, { "epoch": 819.1447368421053, "grad_norm": 1.2455971240997314, "learning_rate": 0.0001, "loss": 0.0109, "step": 124510 }, { "epoch": 819.2105263157895, "grad_norm": 0.8882140517234802, "learning_rate": 0.0001, "loss": 0.0099, "step": 124520 }, { "epoch": 819.2763157894736, "grad_norm": 1.2379828691482544, "learning_rate": 0.0001, "loss": 0.0105, "step": 124530 }, { "epoch": 819.3421052631579, "grad_norm": 0.7983285188674927, "learning_rate": 0.0001, "loss": 0.0096, "step": 124540 }, { "epoch": 819.4078947368421, "grad_norm": 0.8106863498687744, "learning_rate": 0.0001, "loss": 0.0098, "step": 124550 }, { "epoch": 819.4736842105264, "grad_norm": 1.1622852087020874, "learning_rate": 0.0001, "loss": 0.0094, "step": 124560 }, { "epoch": 819.5394736842105, "grad_norm": 0.733343243598938, "learning_rate": 0.0001, "loss": 0.0104, "step": 124570 }, { "epoch": 819.6052631578947, "grad_norm": 0.8690012693405151, "learning_rate": 0.0001, "loss": 0.0095, "step": 124580 }, { "epoch": 819.671052631579, "grad_norm": 0.7145095467567444, "learning_rate": 0.0001, "loss": 0.0102, "step": 124590 }, { "epoch": 819.7368421052631, "grad_norm": 1.0075265169143677, "learning_rate": 0.0001, "loss": 0.012, "step": 124600 }, { "epoch": 819.8026315789474, "grad_norm": 1.1366093158721924, "learning_rate": 0.0001, "loss": 0.0094, "step": 124610 }, { "epoch": 819.8684210526316, "grad_norm": 0.9843047261238098, "learning_rate": 0.0001, "loss": 0.0095, "step": 124620 }, { "epoch": 819.9342105263158, "grad_norm": 0.7279674410820007, "learning_rate": 0.0001, "loss": 0.0107, "step": 124630 }, { "epoch": 820.0, "grad_norm": 1.1037843227386475, "learning_rate": 0.0001, "loss": 0.0108, "step": 124640 }, { "epoch": 820.0657894736842, "grad_norm": 0.7516036033630371, "learning_rate": 0.0001, "loss": 0.0106, "step": 124650 }, { "epoch": 820.1315789473684, "grad_norm": 1.1608238220214844, "learning_rate": 0.0001, "loss": 0.0099, "step": 124660 }, { "epoch": 820.1973684210526, "grad_norm": 1.3053244352340698, "learning_rate": 0.0001, "loss": 0.0108, "step": 124670 }, { "epoch": 820.2631578947369, "grad_norm": 1.0844759941101074, "learning_rate": 0.0001, "loss": 0.0098, "step": 124680 }, { "epoch": 820.328947368421, "grad_norm": 0.9442291259765625, "learning_rate": 0.0001, "loss": 0.0111, "step": 124690 }, { "epoch": 820.3947368421053, "grad_norm": 1.1379389762878418, "learning_rate": 0.0001, "loss": 0.0102, "step": 124700 }, { "epoch": 820.4605263157895, "grad_norm": 0.6525788903236389, "learning_rate": 0.0001, "loss": 0.0104, "step": 124710 }, { "epoch": 820.5263157894736, "grad_norm": 1.278929352760315, "learning_rate": 0.0001, "loss": 0.0102, "step": 124720 }, { "epoch": 820.5921052631579, "grad_norm": 1.4533745050430298, "learning_rate": 0.0001, "loss": 0.008, "step": 124730 }, { "epoch": 820.6578947368421, "grad_norm": 1.0017926692962646, "learning_rate": 0.0001, "loss": 0.0096, "step": 124740 }, { "epoch": 820.7236842105264, "grad_norm": 0.9450610876083374, "learning_rate": 0.0001, "loss": 0.0102, "step": 124750 }, { "epoch": 820.7894736842105, "grad_norm": 0.6079602837562561, "learning_rate": 0.0001, "loss": 0.0097, "step": 124760 }, { "epoch": 820.8552631578947, "grad_norm": 1.112248420715332, "learning_rate": 0.0001, "loss": 0.0109, "step": 124770 }, { "epoch": 820.921052631579, "grad_norm": 1.2943469285964966, "learning_rate": 0.0001, "loss": 0.0104, "step": 124780 }, { "epoch": 820.9868421052631, "grad_norm": 1.1327763795852661, "learning_rate": 0.0001, "loss": 0.01, "step": 124790 }, { "epoch": 821.0526315789474, "grad_norm": 1.0445799827575684, "learning_rate": 0.0001, "loss": 0.011, "step": 124800 }, { "epoch": 821.1184210526316, "grad_norm": 1.081302285194397, "learning_rate": 0.0001, "loss": 0.0114, "step": 124810 }, { "epoch": 821.1842105263158, "grad_norm": 1.0378952026367188, "learning_rate": 0.0001, "loss": 0.0113, "step": 124820 }, { "epoch": 821.25, "grad_norm": 0.7026200890541077, "learning_rate": 0.0001, "loss": 0.009, "step": 124830 }, { "epoch": 821.3157894736842, "grad_norm": 0.709734320640564, "learning_rate": 0.0001, "loss": 0.0097, "step": 124840 }, { "epoch": 821.3815789473684, "grad_norm": 1.214118480682373, "learning_rate": 0.0001, "loss": 0.0083, "step": 124850 }, { "epoch": 821.4473684210526, "grad_norm": 1.0140697956085205, "learning_rate": 0.0001, "loss": 0.0109, "step": 124860 }, { "epoch": 821.5131578947369, "grad_norm": 1.1845144033432007, "learning_rate": 0.0001, "loss": 0.0087, "step": 124870 }, { "epoch": 821.578947368421, "grad_norm": 1.131510853767395, "learning_rate": 0.0001, "loss": 0.008, "step": 124880 }, { "epoch": 821.6447368421053, "grad_norm": 1.0710123777389526, "learning_rate": 0.0001, "loss": 0.0098, "step": 124890 }, { "epoch": 821.7105263157895, "grad_norm": 0.9462993741035461, "learning_rate": 0.0001, "loss": 0.0088, "step": 124900 }, { "epoch": 821.7763157894736, "grad_norm": 1.1488789319992065, "learning_rate": 0.0001, "loss": 0.0076, "step": 124910 }, { "epoch": 821.8421052631579, "grad_norm": 0.8153425455093384, "learning_rate": 0.0001, "loss": 0.0095, "step": 124920 }, { "epoch": 821.9078947368421, "grad_norm": 1.136583924293518, "learning_rate": 0.0001, "loss": 0.0098, "step": 124930 }, { "epoch": 821.9736842105264, "grad_norm": 1.1596461534500122, "learning_rate": 0.0001, "loss": 0.0113, "step": 124940 }, { "epoch": 822.0394736842105, "grad_norm": 1.1383651494979858, "learning_rate": 0.0001, "loss": 0.0117, "step": 124950 }, { "epoch": 822.1052631578947, "grad_norm": 1.0268265008926392, "learning_rate": 0.0001, "loss": 0.0114, "step": 124960 }, { "epoch": 822.171052631579, "grad_norm": 1.239668607711792, "learning_rate": 0.0001, "loss": 0.0092, "step": 124970 }, { "epoch": 822.2368421052631, "grad_norm": 1.1452547311782837, "learning_rate": 0.0001, "loss": 0.009, "step": 124980 }, { "epoch": 822.3026315789474, "grad_norm": 0.804125189781189, "learning_rate": 0.0001, "loss": 0.0108, "step": 124990 }, { "epoch": 822.3684210526316, "grad_norm": 1.5917028188705444, "learning_rate": 0.0001, "loss": 0.0093, "step": 125000 }, { "epoch": 822.4342105263158, "grad_norm": 1.280947208404541, "learning_rate": 0.0001, "loss": 0.0104, "step": 125010 }, { "epoch": 822.5, "grad_norm": 0.9528753161430359, "learning_rate": 0.0001, "loss": 0.0096, "step": 125020 }, { "epoch": 822.5657894736842, "grad_norm": 0.9198108911514282, "learning_rate": 0.0001, "loss": 0.0112, "step": 125030 }, { "epoch": 822.6315789473684, "grad_norm": 0.8453339338302612, "learning_rate": 0.0001, "loss": 0.0093, "step": 125040 }, { "epoch": 822.6973684210526, "grad_norm": 1.1574821472167969, "learning_rate": 0.0001, "loss": 0.0092, "step": 125050 }, { "epoch": 822.7631578947369, "grad_norm": 1.1364651918411255, "learning_rate": 0.0001, "loss": 0.0095, "step": 125060 }, { "epoch": 822.828947368421, "grad_norm": 0.9139201641082764, "learning_rate": 0.0001, "loss": 0.0082, "step": 125070 }, { "epoch": 822.8947368421053, "grad_norm": 0.8016061782836914, "learning_rate": 0.0001, "loss": 0.0094, "step": 125080 }, { "epoch": 822.9605263157895, "grad_norm": 1.3205960988998413, "learning_rate": 0.0001, "loss": 0.0092, "step": 125090 }, { "epoch": 823.0263157894736, "grad_norm": 0.8638978600502014, "learning_rate": 0.0001, "loss": 0.011, "step": 125100 }, { "epoch": 823.0921052631579, "grad_norm": 0.964411199092865, "learning_rate": 0.0001, "loss": 0.0092, "step": 125110 }, { "epoch": 823.1578947368421, "grad_norm": 1.1022132635116577, "learning_rate": 0.0001, "loss": 0.0098, "step": 125120 }, { "epoch": 823.2236842105264, "grad_norm": 1.2288185358047485, "learning_rate": 0.0001, "loss": 0.01, "step": 125130 }, { "epoch": 823.2894736842105, "grad_norm": 1.1592532396316528, "learning_rate": 0.0001, "loss": 0.0102, "step": 125140 }, { "epoch": 823.3552631578947, "grad_norm": 0.9944072961807251, "learning_rate": 0.0001, "loss": 0.0097, "step": 125150 }, { "epoch": 823.421052631579, "grad_norm": 0.8100464344024658, "learning_rate": 0.0001, "loss": 0.0098, "step": 125160 }, { "epoch": 823.4868421052631, "grad_norm": 0.9860333204269409, "learning_rate": 0.0001, "loss": 0.0095, "step": 125170 }, { "epoch": 823.5526315789474, "grad_norm": 1.0454355478286743, "learning_rate": 0.0001, "loss": 0.0078, "step": 125180 }, { "epoch": 823.6184210526316, "grad_norm": 1.0982762575149536, "learning_rate": 0.0001, "loss": 0.0081, "step": 125190 }, { "epoch": 823.6842105263158, "grad_norm": 1.372549057006836, "learning_rate": 0.0001, "loss": 0.0097, "step": 125200 }, { "epoch": 823.75, "grad_norm": 1.4132963418960571, "learning_rate": 0.0001, "loss": 0.0113, "step": 125210 }, { "epoch": 823.8157894736842, "grad_norm": 1.2512835264205933, "learning_rate": 0.0001, "loss": 0.0088, "step": 125220 }, { "epoch": 823.8815789473684, "grad_norm": 1.114558219909668, "learning_rate": 0.0001, "loss": 0.0092, "step": 125230 }, { "epoch": 823.9473684210526, "grad_norm": 1.0687578916549683, "learning_rate": 0.0001, "loss": 0.0107, "step": 125240 }, { "epoch": 824.0131578947369, "grad_norm": 1.5718590021133423, "learning_rate": 0.0001, "loss": 0.0087, "step": 125250 }, { "epoch": 824.078947368421, "grad_norm": 1.0337969064712524, "learning_rate": 0.0001, "loss": 0.01, "step": 125260 }, { "epoch": 824.1447368421053, "grad_norm": 1.0111421346664429, "learning_rate": 0.0001, "loss": 0.0111, "step": 125270 }, { "epoch": 824.2105263157895, "grad_norm": 1.1369024515151978, "learning_rate": 0.0001, "loss": 0.0081, "step": 125280 }, { "epoch": 824.2763157894736, "grad_norm": 0.7964611649513245, "learning_rate": 0.0001, "loss": 0.0084, "step": 125290 }, { "epoch": 824.3421052631579, "grad_norm": 1.2055116891860962, "learning_rate": 0.0001, "loss": 0.0092, "step": 125300 }, { "epoch": 824.4078947368421, "grad_norm": 1.0731807947158813, "learning_rate": 0.0001, "loss": 0.0089, "step": 125310 }, { "epoch": 824.4736842105264, "grad_norm": 1.1437373161315918, "learning_rate": 0.0001, "loss": 0.0081, "step": 125320 }, { "epoch": 824.5394736842105, "grad_norm": 1.1605925559997559, "learning_rate": 0.0001, "loss": 0.0082, "step": 125330 }, { "epoch": 824.6052631578947, "grad_norm": 1.286482334136963, "learning_rate": 0.0001, "loss": 0.0108, "step": 125340 }, { "epoch": 824.671052631579, "grad_norm": 0.9673534035682678, "learning_rate": 0.0001, "loss": 0.0098, "step": 125350 }, { "epoch": 824.7368421052631, "grad_norm": 1.3695217370986938, "learning_rate": 0.0001, "loss": 0.0087, "step": 125360 }, { "epoch": 824.8026315789474, "grad_norm": 1.1328647136688232, "learning_rate": 0.0001, "loss": 0.0121, "step": 125370 }, { "epoch": 824.8684210526316, "grad_norm": 1.0601879358291626, "learning_rate": 0.0001, "loss": 0.0122, "step": 125380 }, { "epoch": 824.9342105263158, "grad_norm": 0.9178951978683472, "learning_rate": 0.0001, "loss": 0.0103, "step": 125390 }, { "epoch": 825.0, "grad_norm": 0.9925839900970459, "learning_rate": 0.0001, "loss": 0.0092, "step": 125400 }, { "epoch": 825.0657894736842, "grad_norm": 1.2685902118682861, "learning_rate": 0.0001, "loss": 0.0082, "step": 125410 }, { "epoch": 825.1315789473684, "grad_norm": 1.3963377475738525, "learning_rate": 0.0001, "loss": 0.0088, "step": 125420 }, { "epoch": 825.1973684210526, "grad_norm": 1.2463805675506592, "learning_rate": 0.0001, "loss": 0.0084, "step": 125430 }, { "epoch": 825.2631578947369, "grad_norm": 0.8058486580848694, "learning_rate": 0.0001, "loss": 0.0085, "step": 125440 }, { "epoch": 825.328947368421, "grad_norm": 1.0413463115692139, "learning_rate": 0.0001, "loss": 0.0106, "step": 125450 }, { "epoch": 825.3947368421053, "grad_norm": 1.3171190023422241, "learning_rate": 0.0001, "loss": 0.0096, "step": 125460 }, { "epoch": 825.4605263157895, "grad_norm": 1.252544641494751, "learning_rate": 0.0001, "loss": 0.0101, "step": 125470 }, { "epoch": 825.5263157894736, "grad_norm": 1.0242067575454712, "learning_rate": 0.0001, "loss": 0.0095, "step": 125480 }, { "epoch": 825.5921052631579, "grad_norm": 0.9414500594139099, "learning_rate": 0.0001, "loss": 0.0098, "step": 125490 }, { "epoch": 825.6578947368421, "grad_norm": 1.213117003440857, "learning_rate": 0.0001, "loss": 0.01, "step": 125500 }, { "epoch": 825.7236842105264, "grad_norm": 1.1076947450637817, "learning_rate": 0.0001, "loss": 0.0106, "step": 125510 }, { "epoch": 825.7894736842105, "grad_norm": 1.1053651571273804, "learning_rate": 0.0001, "loss": 0.0115, "step": 125520 }, { "epoch": 825.8552631578947, "grad_norm": 0.7674948573112488, "learning_rate": 0.0001, "loss": 0.0115, "step": 125530 }, { "epoch": 825.921052631579, "grad_norm": 1.0354715585708618, "learning_rate": 0.0001, "loss": 0.009, "step": 125540 }, { "epoch": 825.9868421052631, "grad_norm": 0.8403087854385376, "learning_rate": 0.0001, "loss": 0.0096, "step": 125550 }, { "epoch": 826.0526315789474, "grad_norm": 1.164014458656311, "learning_rate": 0.0001, "loss": 0.0094, "step": 125560 }, { "epoch": 826.1184210526316, "grad_norm": 0.902004599571228, "learning_rate": 0.0001, "loss": 0.0089, "step": 125570 }, { "epoch": 826.1842105263158, "grad_norm": 1.306944489479065, "learning_rate": 0.0001, "loss": 0.0077, "step": 125580 }, { "epoch": 826.25, "grad_norm": 1.1703041791915894, "learning_rate": 0.0001, "loss": 0.0119, "step": 125590 }, { "epoch": 826.3157894736842, "grad_norm": 1.309346079826355, "learning_rate": 0.0001, "loss": 0.0095, "step": 125600 }, { "epoch": 826.3815789473684, "grad_norm": 0.9344452619552612, "learning_rate": 0.0001, "loss": 0.0093, "step": 125610 }, { "epoch": 826.4473684210526, "grad_norm": 1.1551661491394043, "learning_rate": 0.0001, "loss": 0.0091, "step": 125620 }, { "epoch": 826.5131578947369, "grad_norm": 0.8393580913543701, "learning_rate": 0.0001, "loss": 0.0084, "step": 125630 }, { "epoch": 826.578947368421, "grad_norm": 1.1995357275009155, "learning_rate": 0.0001, "loss": 0.0118, "step": 125640 }, { "epoch": 826.6447368421053, "grad_norm": 1.1376581192016602, "learning_rate": 0.0001, "loss": 0.0097, "step": 125650 }, { "epoch": 826.7105263157895, "grad_norm": 1.103240966796875, "learning_rate": 0.0001, "loss": 0.0099, "step": 125660 }, { "epoch": 826.7763157894736, "grad_norm": 0.8122648000717163, "learning_rate": 0.0001, "loss": 0.0092, "step": 125670 }, { "epoch": 826.8421052631579, "grad_norm": 0.9855820536613464, "learning_rate": 0.0001, "loss": 0.0093, "step": 125680 }, { "epoch": 826.9078947368421, "grad_norm": 1.3283075094223022, "learning_rate": 0.0001, "loss": 0.0107, "step": 125690 }, { "epoch": 826.9736842105264, "grad_norm": 1.198071837425232, "learning_rate": 0.0001, "loss": 0.0132, "step": 125700 }, { "epoch": 827.0394736842105, "grad_norm": 1.1746746301651, "learning_rate": 0.0001, "loss": 0.0113, "step": 125710 }, { "epoch": 827.1052631578947, "grad_norm": 1.1474931240081787, "learning_rate": 0.0001, "loss": 0.0099, "step": 125720 }, { "epoch": 827.171052631579, "grad_norm": 0.8816200494766235, "learning_rate": 0.0001, "loss": 0.0092, "step": 125730 }, { "epoch": 827.2368421052631, "grad_norm": 1.0781453847885132, "learning_rate": 0.0001, "loss": 0.0094, "step": 125740 }, { "epoch": 827.3026315789474, "grad_norm": 0.9956436157226562, "learning_rate": 0.0001, "loss": 0.0096, "step": 125750 }, { "epoch": 827.3684210526316, "grad_norm": 0.9633864164352417, "learning_rate": 0.0001, "loss": 0.0097, "step": 125760 }, { "epoch": 827.4342105263158, "grad_norm": 0.7504329085350037, "learning_rate": 0.0001, "loss": 0.0115, "step": 125770 }, { "epoch": 827.5, "grad_norm": 0.7806046009063721, "learning_rate": 0.0001, "loss": 0.0088, "step": 125780 }, { "epoch": 827.5657894736842, "grad_norm": 1.3514283895492554, "learning_rate": 0.0001, "loss": 0.0092, "step": 125790 }, { "epoch": 827.6315789473684, "grad_norm": 0.7963207960128784, "learning_rate": 0.0001, "loss": 0.0095, "step": 125800 }, { "epoch": 827.6973684210526, "grad_norm": 1.0604304075241089, "learning_rate": 0.0001, "loss": 0.009, "step": 125810 }, { "epoch": 827.7631578947369, "grad_norm": 1.343090295791626, "learning_rate": 0.0001, "loss": 0.0083, "step": 125820 }, { "epoch": 827.828947368421, "grad_norm": 1.3664075136184692, "learning_rate": 0.0001, "loss": 0.0097, "step": 125830 }, { "epoch": 827.8947368421053, "grad_norm": 1.1324230432510376, "learning_rate": 0.0001, "loss": 0.0104, "step": 125840 }, { "epoch": 827.9605263157895, "grad_norm": 1.1731352806091309, "learning_rate": 0.0001, "loss": 0.0097, "step": 125850 }, { "epoch": 828.0263157894736, "grad_norm": 1.25088369846344, "learning_rate": 0.0001, "loss": 0.0107, "step": 125860 }, { "epoch": 828.0921052631579, "grad_norm": 0.9906530976295471, "learning_rate": 0.0001, "loss": 0.011, "step": 125870 }, { "epoch": 828.1578947368421, "grad_norm": 0.8870477676391602, "learning_rate": 0.0001, "loss": 0.0086, "step": 125880 }, { "epoch": 828.2236842105264, "grad_norm": 0.904126763343811, "learning_rate": 0.0001, "loss": 0.0089, "step": 125890 }, { "epoch": 828.2894736842105, "grad_norm": 1.1464385986328125, "learning_rate": 0.0001, "loss": 0.0104, "step": 125900 }, { "epoch": 828.3552631578947, "grad_norm": 0.9195071458816528, "learning_rate": 0.0001, "loss": 0.011, "step": 125910 }, { "epoch": 828.421052631579, "grad_norm": 1.2309972047805786, "learning_rate": 0.0001, "loss": 0.0104, "step": 125920 }, { "epoch": 828.4868421052631, "grad_norm": 0.7952999472618103, "learning_rate": 0.0001, "loss": 0.012, "step": 125930 }, { "epoch": 828.5526315789474, "grad_norm": 1.1572850942611694, "learning_rate": 0.0001, "loss": 0.0111, "step": 125940 }, { "epoch": 828.6184210526316, "grad_norm": 1.0016225576400757, "learning_rate": 0.0001, "loss": 0.0095, "step": 125950 }, { "epoch": 828.6842105263158, "grad_norm": 1.1131701469421387, "learning_rate": 0.0001, "loss": 0.0084, "step": 125960 }, { "epoch": 828.75, "grad_norm": 1.075382113456726, "learning_rate": 0.0001, "loss": 0.0092, "step": 125970 }, { "epoch": 828.8157894736842, "grad_norm": 1.0743154287338257, "learning_rate": 0.0001, "loss": 0.0104, "step": 125980 }, { "epoch": 828.8815789473684, "grad_norm": 0.8774272203445435, "learning_rate": 0.0001, "loss": 0.0087, "step": 125990 }, { "epoch": 828.9473684210526, "grad_norm": 0.7976047396659851, "learning_rate": 0.0001, "loss": 0.0104, "step": 126000 }, { "epoch": 829.0131578947369, "grad_norm": 0.9983826875686646, "learning_rate": 0.0001, "loss": 0.0082, "step": 126010 }, { "epoch": 829.078947368421, "grad_norm": 1.0128384828567505, "learning_rate": 0.0001, "loss": 0.0089, "step": 126020 }, { "epoch": 829.1447368421053, "grad_norm": 1.2371277809143066, "learning_rate": 0.0001, "loss": 0.0092, "step": 126030 }, { "epoch": 829.2105263157895, "grad_norm": 1.121740460395813, "learning_rate": 0.0001, "loss": 0.0104, "step": 126040 }, { "epoch": 829.2763157894736, "grad_norm": 1.0720088481903076, "learning_rate": 0.0001, "loss": 0.01, "step": 126050 }, { "epoch": 829.3421052631579, "grad_norm": 0.8553463816642761, "learning_rate": 0.0001, "loss": 0.0086, "step": 126060 }, { "epoch": 829.4078947368421, "grad_norm": 0.9494578838348389, "learning_rate": 0.0001, "loss": 0.0099, "step": 126070 }, { "epoch": 829.4736842105264, "grad_norm": 0.7240245342254639, "learning_rate": 0.0001, "loss": 0.0093, "step": 126080 }, { "epoch": 829.5394736842105, "grad_norm": 0.9537514448165894, "learning_rate": 0.0001, "loss": 0.0098, "step": 126090 }, { "epoch": 829.6052631578947, "grad_norm": 0.9568378329277039, "learning_rate": 0.0001, "loss": 0.0117, "step": 126100 }, { "epoch": 829.671052631579, "grad_norm": 1.081212043762207, "learning_rate": 0.0001, "loss": 0.0098, "step": 126110 }, { "epoch": 829.7368421052631, "grad_norm": 0.7988075017929077, "learning_rate": 0.0001, "loss": 0.0115, "step": 126120 }, { "epoch": 829.8026315789474, "grad_norm": 0.926447868347168, "learning_rate": 0.0001, "loss": 0.0104, "step": 126130 }, { "epoch": 829.8684210526316, "grad_norm": 0.8456553816795349, "learning_rate": 0.0001, "loss": 0.0097, "step": 126140 }, { "epoch": 829.9342105263158, "grad_norm": 0.9388708472251892, "learning_rate": 0.0001, "loss": 0.0092, "step": 126150 }, { "epoch": 830.0, "grad_norm": 1.1751067638397217, "learning_rate": 0.0001, "loss": 0.0096, "step": 126160 }, { "epoch": 830.0657894736842, "grad_norm": 0.8441213369369507, "learning_rate": 0.0001, "loss": 0.0089, "step": 126170 }, { "epoch": 830.1315789473684, "grad_norm": 0.8216992020606995, "learning_rate": 0.0001, "loss": 0.0095, "step": 126180 }, { "epoch": 830.1973684210526, "grad_norm": 1.0158005952835083, "learning_rate": 0.0001, "loss": 0.0107, "step": 126190 }, { "epoch": 830.2631578947369, "grad_norm": 1.3386634588241577, "learning_rate": 0.0001, "loss": 0.0133, "step": 126200 }, { "epoch": 830.328947368421, "grad_norm": 1.026985764503479, "learning_rate": 0.0001, "loss": 0.0104, "step": 126210 }, { "epoch": 830.3947368421053, "grad_norm": 1.194299578666687, "learning_rate": 0.0001, "loss": 0.0096, "step": 126220 }, { "epoch": 830.4605263157895, "grad_norm": 1.0776842832565308, "learning_rate": 0.0001, "loss": 0.0107, "step": 126230 }, { "epoch": 830.5263157894736, "grad_norm": 1.1848527193069458, "learning_rate": 0.0001, "loss": 0.0076, "step": 126240 }, { "epoch": 830.5921052631579, "grad_norm": 1.341431736946106, "learning_rate": 0.0001, "loss": 0.0099, "step": 126250 }, { "epoch": 830.6578947368421, "grad_norm": 1.150313138961792, "learning_rate": 0.0001, "loss": 0.0089, "step": 126260 }, { "epoch": 830.7236842105264, "grad_norm": 1.0594727993011475, "learning_rate": 0.0001, "loss": 0.0089, "step": 126270 }, { "epoch": 830.7894736842105, "grad_norm": 0.9882242679595947, "learning_rate": 0.0001, "loss": 0.0079, "step": 126280 }, { "epoch": 830.8552631578947, "grad_norm": 1.1118570566177368, "learning_rate": 0.0001, "loss": 0.0086, "step": 126290 }, { "epoch": 830.921052631579, "grad_norm": 1.4360451698303223, "learning_rate": 0.0001, "loss": 0.0098, "step": 126300 }, { "epoch": 830.9868421052631, "grad_norm": 1.0150532722473145, "learning_rate": 0.0001, "loss": 0.0117, "step": 126310 }, { "epoch": 831.0526315789474, "grad_norm": 1.010431170463562, "learning_rate": 0.0001, "loss": 0.0095, "step": 126320 }, { "epoch": 831.1184210526316, "grad_norm": 1.2884689569473267, "learning_rate": 0.0001, "loss": 0.0122, "step": 126330 }, { "epoch": 831.1842105263158, "grad_norm": 1.212661623954773, "learning_rate": 0.0001, "loss": 0.011, "step": 126340 }, { "epoch": 831.25, "grad_norm": 1.3419666290283203, "learning_rate": 0.0001, "loss": 0.0089, "step": 126350 }, { "epoch": 831.3157894736842, "grad_norm": 1.1499714851379395, "learning_rate": 0.0001, "loss": 0.0093, "step": 126360 }, { "epoch": 831.3815789473684, "grad_norm": 1.0944184064865112, "learning_rate": 0.0001, "loss": 0.0094, "step": 126370 }, { "epoch": 831.4473684210526, "grad_norm": 1.3042875528335571, "learning_rate": 0.0001, "loss": 0.011, "step": 126380 }, { "epoch": 831.5131578947369, "grad_norm": 1.3170181512832642, "learning_rate": 0.0001, "loss": 0.0097, "step": 126390 }, { "epoch": 831.578947368421, "grad_norm": 1.0547919273376465, "learning_rate": 0.0001, "loss": 0.01, "step": 126400 }, { "epoch": 831.6447368421053, "grad_norm": 1.129002571105957, "learning_rate": 0.0001, "loss": 0.0075, "step": 126410 }, { "epoch": 831.7105263157895, "grad_norm": 1.3356235027313232, "learning_rate": 0.0001, "loss": 0.0106, "step": 126420 }, { "epoch": 831.7763157894736, "grad_norm": 1.2888990640640259, "learning_rate": 0.0001, "loss": 0.0099, "step": 126430 }, { "epoch": 831.8421052631579, "grad_norm": 1.0148605108261108, "learning_rate": 0.0001, "loss": 0.0085, "step": 126440 }, { "epoch": 831.9078947368421, "grad_norm": 0.9079254865646362, "learning_rate": 0.0001, "loss": 0.0118, "step": 126450 }, { "epoch": 831.9736842105264, "grad_norm": 1.0899399518966675, "learning_rate": 0.0001, "loss": 0.0086, "step": 126460 }, { "epoch": 832.0394736842105, "grad_norm": 0.915304958820343, "learning_rate": 0.0001, "loss": 0.0091, "step": 126470 }, { "epoch": 832.1052631578947, "grad_norm": 1.0055623054504395, "learning_rate": 0.0001, "loss": 0.0092, "step": 126480 }, { "epoch": 832.171052631579, "grad_norm": 0.8581055998802185, "learning_rate": 0.0001, "loss": 0.0094, "step": 126490 }, { "epoch": 832.2368421052631, "grad_norm": 0.8408956527709961, "learning_rate": 0.0001, "loss": 0.0089, "step": 126500 }, { "epoch": 832.3026315789474, "grad_norm": 0.7524006366729736, "learning_rate": 0.0001, "loss": 0.0094, "step": 126510 }, { "epoch": 832.3684210526316, "grad_norm": 0.799748420715332, "learning_rate": 0.0001, "loss": 0.0109, "step": 126520 }, { "epoch": 832.4342105263158, "grad_norm": 0.9387583136558533, "learning_rate": 0.0001, "loss": 0.0111, "step": 126530 }, { "epoch": 832.5, "grad_norm": 0.9900626540184021, "learning_rate": 0.0001, "loss": 0.0092, "step": 126540 }, { "epoch": 832.5657894736842, "grad_norm": 1.16420578956604, "learning_rate": 0.0001, "loss": 0.0089, "step": 126550 }, { "epoch": 832.6315789473684, "grad_norm": 1.023986577987671, "learning_rate": 0.0001, "loss": 0.0111, "step": 126560 }, { "epoch": 832.6973684210526, "grad_norm": 0.950329601764679, "learning_rate": 0.0001, "loss": 0.0095, "step": 126570 }, { "epoch": 832.7631578947369, "grad_norm": 1.0447871685028076, "learning_rate": 0.0001, "loss": 0.0108, "step": 126580 }, { "epoch": 832.828947368421, "grad_norm": 0.6059691905975342, "learning_rate": 0.0001, "loss": 0.0098, "step": 126590 }, { "epoch": 832.8947368421053, "grad_norm": 1.1729786396026611, "learning_rate": 0.0001, "loss": 0.0109, "step": 126600 }, { "epoch": 832.9605263157895, "grad_norm": 1.0164276361465454, "learning_rate": 0.0001, "loss": 0.011, "step": 126610 }, { "epoch": 833.0263157894736, "grad_norm": 1.1366938352584839, "learning_rate": 0.0001, "loss": 0.0096, "step": 126620 }, { "epoch": 833.0921052631579, "grad_norm": 1.0589661598205566, "learning_rate": 0.0001, "loss": 0.0102, "step": 126630 }, { "epoch": 833.1578947368421, "grad_norm": 0.9996610879898071, "learning_rate": 0.0001, "loss": 0.01, "step": 126640 }, { "epoch": 833.2236842105264, "grad_norm": 1.13376784324646, "learning_rate": 0.0001, "loss": 0.0102, "step": 126650 }, { "epoch": 833.2894736842105, "grad_norm": 1.0717856884002686, "learning_rate": 0.0001, "loss": 0.0093, "step": 126660 }, { "epoch": 833.3552631578947, "grad_norm": 0.7824698090553284, "learning_rate": 0.0001, "loss": 0.0091, "step": 126670 }, { "epoch": 833.421052631579, "grad_norm": 1.0761967897415161, "learning_rate": 0.0001, "loss": 0.0101, "step": 126680 }, { "epoch": 833.4868421052631, "grad_norm": 1.2236111164093018, "learning_rate": 0.0001, "loss": 0.0104, "step": 126690 }, { "epoch": 833.5526315789474, "grad_norm": 1.143056035041809, "learning_rate": 0.0001, "loss": 0.0105, "step": 126700 }, { "epoch": 833.6184210526316, "grad_norm": 1.0561918020248413, "learning_rate": 0.0001, "loss": 0.0114, "step": 126710 }, { "epoch": 833.6842105263158, "grad_norm": 1.1367357969284058, "learning_rate": 0.0001, "loss": 0.0094, "step": 126720 }, { "epoch": 833.75, "grad_norm": 0.7675701379776001, "learning_rate": 0.0001, "loss": 0.0087, "step": 126730 }, { "epoch": 833.8157894736842, "grad_norm": 1.025897741317749, "learning_rate": 0.0001, "loss": 0.0103, "step": 126740 }, { "epoch": 833.8815789473684, "grad_norm": 0.9761922359466553, "learning_rate": 0.0001, "loss": 0.011, "step": 126750 }, { "epoch": 833.9473684210526, "grad_norm": 0.9018344879150391, "learning_rate": 0.0001, "loss": 0.0099, "step": 126760 }, { "epoch": 834.0131578947369, "grad_norm": 1.203495979309082, "learning_rate": 0.0001, "loss": 0.0094, "step": 126770 }, { "epoch": 834.078947368421, "grad_norm": 1.1950404644012451, "learning_rate": 0.0001, "loss": 0.0113, "step": 126780 }, { "epoch": 834.1447368421053, "grad_norm": 0.943814218044281, "learning_rate": 0.0001, "loss": 0.0106, "step": 126790 }, { "epoch": 834.2105263157895, "grad_norm": 1.0648579597473145, "learning_rate": 0.0001, "loss": 0.0116, "step": 126800 }, { "epoch": 834.2763157894736, "grad_norm": 1.0198975801467896, "learning_rate": 0.0001, "loss": 0.0098, "step": 126810 }, { "epoch": 834.3421052631579, "grad_norm": 1.195588231086731, "learning_rate": 0.0001, "loss": 0.0101, "step": 126820 }, { "epoch": 834.4078947368421, "grad_norm": 0.9909479022026062, "learning_rate": 0.0001, "loss": 0.0103, "step": 126830 }, { "epoch": 834.4736842105264, "grad_norm": 0.8295359015464783, "learning_rate": 0.0001, "loss": 0.0091, "step": 126840 }, { "epoch": 834.5394736842105, "grad_norm": 1.1163146495819092, "learning_rate": 0.0001, "loss": 0.0113, "step": 126850 }, { "epoch": 834.6052631578947, "grad_norm": 0.9339954853057861, "learning_rate": 0.0001, "loss": 0.0117, "step": 126860 }, { "epoch": 834.671052631579, "grad_norm": 1.0907495021820068, "learning_rate": 0.0001, "loss": 0.0101, "step": 126870 }, { "epoch": 834.7368421052631, "grad_norm": 0.7249103784561157, "learning_rate": 0.0001, "loss": 0.0098, "step": 126880 }, { "epoch": 834.8026315789474, "grad_norm": 0.9385533928871155, "learning_rate": 0.0001, "loss": 0.0094, "step": 126890 }, { "epoch": 834.8684210526316, "grad_norm": 1.03643000125885, "learning_rate": 0.0001, "loss": 0.0088, "step": 126900 }, { "epoch": 834.9342105263158, "grad_norm": 1.1739977598190308, "learning_rate": 0.0001, "loss": 0.0087, "step": 126910 }, { "epoch": 835.0, "grad_norm": 0.7921239733695984, "learning_rate": 0.0001, "loss": 0.0081, "step": 126920 }, { "epoch": 835.0657894736842, "grad_norm": 0.7429852485656738, "learning_rate": 0.0001, "loss": 0.0102, "step": 126930 }, { "epoch": 835.1315789473684, "grad_norm": 0.8239948153495789, "learning_rate": 0.0001, "loss": 0.0097, "step": 126940 }, { "epoch": 835.1973684210526, "grad_norm": 0.9873680472373962, "learning_rate": 0.0001, "loss": 0.0103, "step": 126950 }, { "epoch": 835.2631578947369, "grad_norm": 0.8534329533576965, "learning_rate": 0.0001, "loss": 0.0091, "step": 126960 }, { "epoch": 835.328947368421, "grad_norm": 1.498870849609375, "learning_rate": 0.0001, "loss": 0.0086, "step": 126970 }, { "epoch": 835.3947368421053, "grad_norm": 0.9700299501419067, "learning_rate": 0.0001, "loss": 0.0098, "step": 126980 }, { "epoch": 835.4605263157895, "grad_norm": 0.9935576915740967, "learning_rate": 0.0001, "loss": 0.0106, "step": 126990 }, { "epoch": 835.5263157894736, "grad_norm": 1.1261264085769653, "learning_rate": 0.0001, "loss": 0.0122, "step": 127000 }, { "epoch": 835.5921052631579, "grad_norm": 1.2722344398498535, "learning_rate": 0.0001, "loss": 0.0094, "step": 127010 }, { "epoch": 835.6578947368421, "grad_norm": 1.2571996450424194, "learning_rate": 0.0001, "loss": 0.0094, "step": 127020 }, { "epoch": 835.7236842105264, "grad_norm": 1.4331727027893066, "learning_rate": 0.0001, "loss": 0.0096, "step": 127030 }, { "epoch": 835.7894736842105, "grad_norm": 1.5384089946746826, "learning_rate": 0.0001, "loss": 0.0083, "step": 127040 }, { "epoch": 835.8552631578947, "grad_norm": 1.356110692024231, "learning_rate": 0.0001, "loss": 0.0098, "step": 127050 }, { "epoch": 835.921052631579, "grad_norm": 0.8997991681098938, "learning_rate": 0.0001, "loss": 0.0102, "step": 127060 }, { "epoch": 835.9868421052631, "grad_norm": 0.8456292152404785, "learning_rate": 0.0001, "loss": 0.0125, "step": 127070 }, { "epoch": 836.0526315789474, "grad_norm": 1.1669671535491943, "learning_rate": 0.0001, "loss": 0.0099, "step": 127080 }, { "epoch": 836.1184210526316, "grad_norm": 1.0110538005828857, "learning_rate": 0.0001, "loss": 0.0095, "step": 127090 }, { "epoch": 836.1842105263158, "grad_norm": 1.2209850549697876, "learning_rate": 0.0001, "loss": 0.0108, "step": 127100 }, { "epoch": 836.25, "grad_norm": 1.2250803709030151, "learning_rate": 0.0001, "loss": 0.0102, "step": 127110 }, { "epoch": 836.3157894736842, "grad_norm": 0.9316526055335999, "learning_rate": 0.0001, "loss": 0.0084, "step": 127120 }, { "epoch": 836.3815789473684, "grad_norm": 0.9690991044044495, "learning_rate": 0.0001, "loss": 0.0104, "step": 127130 }, { "epoch": 836.4473684210526, "grad_norm": 0.7517539858818054, "learning_rate": 0.0001, "loss": 0.0084, "step": 127140 }, { "epoch": 836.5131578947369, "grad_norm": 1.0882537364959717, "learning_rate": 0.0001, "loss": 0.0115, "step": 127150 }, { "epoch": 836.578947368421, "grad_norm": 1.2422233819961548, "learning_rate": 0.0001, "loss": 0.0109, "step": 127160 }, { "epoch": 836.6447368421053, "grad_norm": 1.1308391094207764, "learning_rate": 0.0001, "loss": 0.0095, "step": 127170 }, { "epoch": 836.7105263157895, "grad_norm": 1.181129813194275, "learning_rate": 0.0001, "loss": 0.0114, "step": 127180 }, { "epoch": 836.7763157894736, "grad_norm": 1.0255203247070312, "learning_rate": 0.0001, "loss": 0.0121, "step": 127190 }, { "epoch": 836.8421052631579, "grad_norm": 1.0835888385772705, "learning_rate": 0.0001, "loss": 0.0096, "step": 127200 }, { "epoch": 836.9078947368421, "grad_norm": 1.4097410440444946, "learning_rate": 0.0001, "loss": 0.0089, "step": 127210 }, { "epoch": 836.9736842105264, "grad_norm": 0.8870857954025269, "learning_rate": 0.0001, "loss": 0.0093, "step": 127220 }, { "epoch": 837.0394736842105, "grad_norm": 0.9946476221084595, "learning_rate": 0.0001, "loss": 0.0088, "step": 127230 }, { "epoch": 837.1052631578947, "grad_norm": 1.268383264541626, "learning_rate": 0.0001, "loss": 0.0095, "step": 127240 }, { "epoch": 837.171052631579, "grad_norm": 1.1119439601898193, "learning_rate": 0.0001, "loss": 0.0124, "step": 127250 }, { "epoch": 837.2368421052631, "grad_norm": 1.3202515840530396, "learning_rate": 0.0001, "loss": 0.0083, "step": 127260 }, { "epoch": 837.3026315789474, "grad_norm": 1.216822624206543, "learning_rate": 0.0001, "loss": 0.0094, "step": 127270 }, { "epoch": 837.3684210526316, "grad_norm": 1.5065245628356934, "learning_rate": 0.0001, "loss": 0.0106, "step": 127280 }, { "epoch": 837.4342105263158, "grad_norm": 1.358711838722229, "learning_rate": 0.0001, "loss": 0.011, "step": 127290 }, { "epoch": 837.5, "grad_norm": 1.0266478061676025, "learning_rate": 0.0001, "loss": 0.0097, "step": 127300 }, { "epoch": 837.5657894736842, "grad_norm": 1.2015262842178345, "learning_rate": 0.0001, "loss": 0.0093, "step": 127310 }, { "epoch": 837.6315789473684, "grad_norm": 0.8070095777511597, "learning_rate": 0.0001, "loss": 0.0115, "step": 127320 }, { "epoch": 837.6973684210526, "grad_norm": 1.3548043966293335, "learning_rate": 0.0001, "loss": 0.01, "step": 127330 }, { "epoch": 837.7631578947369, "grad_norm": 1.2841241359710693, "learning_rate": 0.0001, "loss": 0.0105, "step": 127340 }, { "epoch": 837.828947368421, "grad_norm": 1.2051345109939575, "learning_rate": 0.0001, "loss": 0.0102, "step": 127350 }, { "epoch": 837.8947368421053, "grad_norm": 1.303202748298645, "learning_rate": 0.0001, "loss": 0.0089, "step": 127360 }, { "epoch": 837.9605263157895, "grad_norm": 0.9011682271957397, "learning_rate": 0.0001, "loss": 0.0086, "step": 127370 }, { "epoch": 838.0263157894736, "grad_norm": 1.1267516613006592, "learning_rate": 0.0001, "loss": 0.0103, "step": 127380 }, { "epoch": 838.0921052631579, "grad_norm": 1.045426368713379, "learning_rate": 0.0001, "loss": 0.0098, "step": 127390 }, { "epoch": 838.1578947368421, "grad_norm": 1.0214307308197021, "learning_rate": 0.0001, "loss": 0.0099, "step": 127400 }, { "epoch": 838.2236842105264, "grad_norm": 1.4649877548217773, "learning_rate": 0.0001, "loss": 0.0088, "step": 127410 }, { "epoch": 838.2894736842105, "grad_norm": 1.314589262008667, "learning_rate": 0.0001, "loss": 0.0112, "step": 127420 }, { "epoch": 838.3552631578947, "grad_norm": 1.1437108516693115, "learning_rate": 0.0001, "loss": 0.0102, "step": 127430 }, { "epoch": 838.421052631579, "grad_norm": 1.1604773998260498, "learning_rate": 0.0001, "loss": 0.0101, "step": 127440 }, { "epoch": 838.4868421052631, "grad_norm": 1.3208593130111694, "learning_rate": 0.0001, "loss": 0.0097, "step": 127450 }, { "epoch": 838.5526315789474, "grad_norm": 1.03213369846344, "learning_rate": 0.0001, "loss": 0.0101, "step": 127460 }, { "epoch": 838.6184210526316, "grad_norm": 1.0011709928512573, "learning_rate": 0.0001, "loss": 0.0111, "step": 127470 }, { "epoch": 838.6842105263158, "grad_norm": 1.0269711017608643, "learning_rate": 0.0001, "loss": 0.0092, "step": 127480 }, { "epoch": 838.75, "grad_norm": 1.070455551147461, "learning_rate": 0.0001, "loss": 0.0103, "step": 127490 }, { "epoch": 838.8157894736842, "grad_norm": 0.789607584476471, "learning_rate": 0.0001, "loss": 0.0093, "step": 127500 }, { "epoch": 838.8815789473684, "grad_norm": 0.8468894958496094, "learning_rate": 0.0001, "loss": 0.0092, "step": 127510 }, { "epoch": 838.9473684210526, "grad_norm": 0.7091095447540283, "learning_rate": 0.0001, "loss": 0.0092, "step": 127520 }, { "epoch": 839.0131578947369, "grad_norm": 1.0333023071289062, "learning_rate": 0.0001, "loss": 0.0109, "step": 127530 }, { "epoch": 839.078947368421, "grad_norm": 1.031185269355774, "learning_rate": 0.0001, "loss": 0.0096, "step": 127540 }, { "epoch": 839.1447368421053, "grad_norm": 0.9632649421691895, "learning_rate": 0.0001, "loss": 0.0101, "step": 127550 }, { "epoch": 839.2105263157895, "grad_norm": 0.7596598863601685, "learning_rate": 0.0001, "loss": 0.0097, "step": 127560 }, { "epoch": 839.2763157894736, "grad_norm": 0.7665521502494812, "learning_rate": 0.0001, "loss": 0.0091, "step": 127570 }, { "epoch": 839.3421052631579, "grad_norm": 0.8565236926078796, "learning_rate": 0.0001, "loss": 0.0133, "step": 127580 }, { "epoch": 839.4078947368421, "grad_norm": 1.3483563661575317, "learning_rate": 0.0001, "loss": 0.0118, "step": 127590 }, { "epoch": 839.4736842105264, "grad_norm": 1.4640027284622192, "learning_rate": 0.0001, "loss": 0.0115, "step": 127600 }, { "epoch": 839.5394736842105, "grad_norm": 1.413806438446045, "learning_rate": 0.0001, "loss": 0.0122, "step": 127610 }, { "epoch": 839.6052631578947, "grad_norm": 1.3078571557998657, "learning_rate": 0.0001, "loss": 0.0107, "step": 127620 }, { "epoch": 839.671052631579, "grad_norm": 0.8377428650856018, "learning_rate": 0.0001, "loss": 0.0101, "step": 127630 }, { "epoch": 839.7368421052631, "grad_norm": 1.2343833446502686, "learning_rate": 0.0001, "loss": 0.0142, "step": 127640 }, { "epoch": 839.8026315789474, "grad_norm": 0.7730600833892822, "learning_rate": 0.0001, "loss": 0.0118, "step": 127650 }, { "epoch": 839.8684210526316, "grad_norm": 0.9938012957572937, "learning_rate": 0.0001, "loss": 0.0108, "step": 127660 }, { "epoch": 839.9342105263158, "grad_norm": 0.7523202896118164, "learning_rate": 0.0001, "loss": 0.0102, "step": 127670 }, { "epoch": 840.0, "grad_norm": 0.7400522828102112, "learning_rate": 0.0001, "loss": 0.0105, "step": 127680 }, { "epoch": 840.0657894736842, "grad_norm": 0.9633582234382629, "learning_rate": 0.0001, "loss": 0.01, "step": 127690 }, { "epoch": 840.1315789473684, "grad_norm": 1.1232542991638184, "learning_rate": 0.0001, "loss": 0.0115, "step": 127700 }, { "epoch": 840.1973684210526, "grad_norm": 1.1191346645355225, "learning_rate": 0.0001, "loss": 0.0108, "step": 127710 }, { "epoch": 840.2631578947369, "grad_norm": 1.43672513961792, "learning_rate": 0.0001, "loss": 0.0107, "step": 127720 }, { "epoch": 840.328947368421, "grad_norm": 0.8802378177642822, "learning_rate": 0.0001, "loss": 0.0107, "step": 127730 }, { "epoch": 840.3947368421053, "grad_norm": 1.0515570640563965, "learning_rate": 0.0001, "loss": 0.0092, "step": 127740 }, { "epoch": 840.4605263157895, "grad_norm": 0.9687642455101013, "learning_rate": 0.0001, "loss": 0.0109, "step": 127750 }, { "epoch": 840.5263157894736, "grad_norm": 0.8646338582038879, "learning_rate": 0.0001, "loss": 0.0081, "step": 127760 }, { "epoch": 840.5921052631579, "grad_norm": 0.8392863869667053, "learning_rate": 0.0001, "loss": 0.0097, "step": 127770 }, { "epoch": 840.6578947368421, "grad_norm": 0.8507763147354126, "learning_rate": 0.0001, "loss": 0.0102, "step": 127780 }, { "epoch": 840.7236842105264, "grad_norm": 0.9287623167037964, "learning_rate": 0.0001, "loss": 0.0102, "step": 127790 }, { "epoch": 840.7894736842105, "grad_norm": 0.7224423289299011, "learning_rate": 0.0001, "loss": 0.0111, "step": 127800 }, { "epoch": 840.8552631578947, "grad_norm": 0.9184572100639343, "learning_rate": 0.0001, "loss": 0.0111, "step": 127810 }, { "epoch": 840.921052631579, "grad_norm": 1.360131859779358, "learning_rate": 0.0001, "loss": 0.0105, "step": 127820 }, { "epoch": 840.9868421052631, "grad_norm": 1.099462628364563, "learning_rate": 0.0001, "loss": 0.0119, "step": 127830 }, { "epoch": 841.0526315789474, "grad_norm": 1.0513808727264404, "learning_rate": 0.0001, "loss": 0.009, "step": 127840 }, { "epoch": 841.1184210526316, "grad_norm": 1.0300471782684326, "learning_rate": 0.0001, "loss": 0.0103, "step": 127850 }, { "epoch": 841.1842105263158, "grad_norm": 1.2935221195220947, "learning_rate": 0.0001, "loss": 0.0103, "step": 127860 }, { "epoch": 841.25, "grad_norm": 1.2731430530548096, "learning_rate": 0.0001, "loss": 0.0104, "step": 127870 }, { "epoch": 841.3157894736842, "grad_norm": 0.963191032409668, "learning_rate": 0.0001, "loss": 0.0107, "step": 127880 }, { "epoch": 841.3815789473684, "grad_norm": 1.1909409761428833, "learning_rate": 0.0001, "loss": 0.0101, "step": 127890 }, { "epoch": 841.4473684210526, "grad_norm": 1.1539998054504395, "learning_rate": 0.0001, "loss": 0.0108, "step": 127900 }, { "epoch": 841.5131578947369, "grad_norm": 1.3037039041519165, "learning_rate": 0.0001, "loss": 0.0119, "step": 127910 }, { "epoch": 841.578947368421, "grad_norm": 1.0298134088516235, "learning_rate": 0.0001, "loss": 0.0108, "step": 127920 }, { "epoch": 841.6447368421053, "grad_norm": 0.9822731018066406, "learning_rate": 0.0001, "loss": 0.0109, "step": 127930 }, { "epoch": 841.7105263157895, "grad_norm": 0.9702892303466797, "learning_rate": 0.0001, "loss": 0.0083, "step": 127940 }, { "epoch": 841.7763157894736, "grad_norm": 0.8605859279632568, "learning_rate": 0.0001, "loss": 0.0101, "step": 127950 }, { "epoch": 841.8421052631579, "grad_norm": 1.282814383506775, "learning_rate": 0.0001, "loss": 0.0104, "step": 127960 }, { "epoch": 841.9078947368421, "grad_norm": 0.9928876161575317, "learning_rate": 0.0001, "loss": 0.011, "step": 127970 }, { "epoch": 841.9736842105264, "grad_norm": 1.6558021306991577, "learning_rate": 0.0001, "loss": 0.0096, "step": 127980 }, { "epoch": 842.0394736842105, "grad_norm": 0.8925629258155823, "learning_rate": 0.0001, "loss": 0.0112, "step": 127990 }, { "epoch": 842.1052631578947, "grad_norm": 0.8495152592658997, "learning_rate": 0.0001, "loss": 0.01, "step": 128000 }, { "epoch": 842.171052631579, "grad_norm": 0.8504860997200012, "learning_rate": 0.0001, "loss": 0.0095, "step": 128010 }, { "epoch": 842.2368421052631, "grad_norm": 1.1367663145065308, "learning_rate": 0.0001, "loss": 0.0112, "step": 128020 }, { "epoch": 842.3026315789474, "grad_norm": 0.9067413210868835, "learning_rate": 0.0001, "loss": 0.009, "step": 128030 }, { "epoch": 842.3684210526316, "grad_norm": 1.0044593811035156, "learning_rate": 0.0001, "loss": 0.0102, "step": 128040 }, { "epoch": 842.4342105263158, "grad_norm": 1.2345927953720093, "learning_rate": 0.0001, "loss": 0.0097, "step": 128050 }, { "epoch": 842.5, "grad_norm": 1.3313305377960205, "learning_rate": 0.0001, "loss": 0.01, "step": 128060 }, { "epoch": 842.5657894736842, "grad_norm": 1.3206311464309692, "learning_rate": 0.0001, "loss": 0.0094, "step": 128070 }, { "epoch": 842.6315789473684, "grad_norm": 0.9550430774688721, "learning_rate": 0.0001, "loss": 0.0104, "step": 128080 }, { "epoch": 842.6973684210526, "grad_norm": 1.156746745109558, "learning_rate": 0.0001, "loss": 0.0108, "step": 128090 }, { "epoch": 842.7631578947369, "grad_norm": 1.0178402662277222, "learning_rate": 0.0001, "loss": 0.0112, "step": 128100 }, { "epoch": 842.828947368421, "grad_norm": 1.0084766149520874, "learning_rate": 0.0001, "loss": 0.0088, "step": 128110 }, { "epoch": 842.8947368421053, "grad_norm": 1.0070236921310425, "learning_rate": 0.0001, "loss": 0.0119, "step": 128120 }, { "epoch": 842.9605263157895, "grad_norm": 0.9018028378486633, "learning_rate": 0.0001, "loss": 0.009, "step": 128130 }, { "epoch": 843.0263157894736, "grad_norm": 0.7280605435371399, "learning_rate": 0.0001, "loss": 0.0115, "step": 128140 }, { "epoch": 843.0921052631579, "grad_norm": 1.2799079418182373, "learning_rate": 0.0001, "loss": 0.0102, "step": 128150 }, { "epoch": 843.1578947368421, "grad_norm": 1.1699326038360596, "learning_rate": 0.0001, "loss": 0.0113, "step": 128160 }, { "epoch": 843.2236842105264, "grad_norm": 1.3281631469726562, "learning_rate": 0.0001, "loss": 0.01, "step": 128170 }, { "epoch": 843.2894736842105, "grad_norm": 1.5978515148162842, "learning_rate": 0.0001, "loss": 0.0099, "step": 128180 }, { "epoch": 843.3552631578947, "grad_norm": 1.2502210140228271, "learning_rate": 0.0001, "loss": 0.0084, "step": 128190 }, { "epoch": 843.421052631579, "grad_norm": 1.4333372116088867, "learning_rate": 0.0001, "loss": 0.0091, "step": 128200 }, { "epoch": 843.4868421052631, "grad_norm": 0.8813333511352539, "learning_rate": 0.0001, "loss": 0.0088, "step": 128210 }, { "epoch": 843.5526315789474, "grad_norm": 1.101881980895996, "learning_rate": 0.0001, "loss": 0.0119, "step": 128220 }, { "epoch": 843.6184210526316, "grad_norm": 0.9753175377845764, "learning_rate": 0.0001, "loss": 0.011, "step": 128230 }, { "epoch": 843.6842105263158, "grad_norm": 1.0207269191741943, "learning_rate": 0.0001, "loss": 0.0093, "step": 128240 }, { "epoch": 843.75, "grad_norm": 0.8684105277061462, "learning_rate": 0.0001, "loss": 0.0106, "step": 128250 }, { "epoch": 843.8157894736842, "grad_norm": 0.8891376852989197, "learning_rate": 0.0001, "loss": 0.0098, "step": 128260 }, { "epoch": 843.8815789473684, "grad_norm": 0.7383885979652405, "learning_rate": 0.0001, "loss": 0.0087, "step": 128270 }, { "epoch": 843.9473684210526, "grad_norm": 0.9492682814598083, "learning_rate": 0.0001, "loss": 0.0099, "step": 128280 }, { "epoch": 844.0131578947369, "grad_norm": 0.815500020980835, "learning_rate": 0.0001, "loss": 0.011, "step": 128290 }, { "epoch": 844.078947368421, "grad_norm": 0.8752801418304443, "learning_rate": 0.0001, "loss": 0.011, "step": 128300 }, { "epoch": 844.1447368421053, "grad_norm": 1.1680850982666016, "learning_rate": 0.0001, "loss": 0.0096, "step": 128310 }, { "epoch": 844.2105263157895, "grad_norm": 1.0762124061584473, "learning_rate": 0.0001, "loss": 0.0102, "step": 128320 }, { "epoch": 844.2763157894736, "grad_norm": 1.2257249355316162, "learning_rate": 0.0001, "loss": 0.0109, "step": 128330 }, { "epoch": 844.3421052631579, "grad_norm": 1.2510292530059814, "learning_rate": 0.0001, "loss": 0.0101, "step": 128340 }, { "epoch": 844.4078947368421, "grad_norm": 1.0363690853118896, "learning_rate": 0.0001, "loss": 0.0097, "step": 128350 }, { "epoch": 844.4736842105264, "grad_norm": 1.1200023889541626, "learning_rate": 0.0001, "loss": 0.0095, "step": 128360 }, { "epoch": 844.5394736842105, "grad_norm": 1.0740878582000732, "learning_rate": 0.0001, "loss": 0.0087, "step": 128370 }, { "epoch": 844.6052631578947, "grad_norm": 1.1896917819976807, "learning_rate": 0.0001, "loss": 0.0104, "step": 128380 }, { "epoch": 844.671052631579, "grad_norm": 1.0779705047607422, "learning_rate": 0.0001, "loss": 0.0103, "step": 128390 }, { "epoch": 844.7368421052631, "grad_norm": 0.555634617805481, "learning_rate": 0.0001, "loss": 0.0104, "step": 128400 }, { "epoch": 844.8026315789474, "grad_norm": 0.9556017518043518, "learning_rate": 0.0001, "loss": 0.0092, "step": 128410 }, { "epoch": 844.8684210526316, "grad_norm": 0.8419582843780518, "learning_rate": 0.0001, "loss": 0.0084, "step": 128420 }, { "epoch": 844.9342105263158, "grad_norm": 1.0701279640197754, "learning_rate": 0.0001, "loss": 0.0098, "step": 128430 }, { "epoch": 845.0, "grad_norm": 1.3151577711105347, "learning_rate": 0.0001, "loss": 0.0084, "step": 128440 }, { "epoch": 845.0657894736842, "grad_norm": 1.086671233177185, "learning_rate": 0.0001, "loss": 0.0099, "step": 128450 }, { "epoch": 845.1315789473684, "grad_norm": 1.2113654613494873, "learning_rate": 0.0001, "loss": 0.0101, "step": 128460 }, { "epoch": 845.1973684210526, "grad_norm": 1.2643672227859497, "learning_rate": 0.0001, "loss": 0.0112, "step": 128470 }, { "epoch": 845.2631578947369, "grad_norm": 1.0714949369430542, "learning_rate": 0.0001, "loss": 0.0092, "step": 128480 }, { "epoch": 845.328947368421, "grad_norm": 0.7351527214050293, "learning_rate": 0.0001, "loss": 0.0094, "step": 128490 }, { "epoch": 845.3947368421053, "grad_norm": 1.0877636671066284, "learning_rate": 0.0001, "loss": 0.0103, "step": 128500 }, { "epoch": 845.4605263157895, "grad_norm": 0.6363227963447571, "learning_rate": 0.0001, "loss": 0.0115, "step": 128510 }, { "epoch": 845.5263157894736, "grad_norm": 0.7102157473564148, "learning_rate": 0.0001, "loss": 0.0106, "step": 128520 }, { "epoch": 845.5921052631579, "grad_norm": 0.9936819076538086, "learning_rate": 0.0001, "loss": 0.0105, "step": 128530 }, { "epoch": 845.6578947368421, "grad_norm": 0.7047979831695557, "learning_rate": 0.0001, "loss": 0.0083, "step": 128540 }, { "epoch": 845.7236842105264, "grad_norm": 1.017305850982666, "learning_rate": 0.0001, "loss": 0.0098, "step": 128550 }, { "epoch": 845.7894736842105, "grad_norm": 1.3176766633987427, "learning_rate": 0.0001, "loss": 0.009, "step": 128560 }, { "epoch": 845.8552631578947, "grad_norm": 1.1096950769424438, "learning_rate": 0.0001, "loss": 0.0107, "step": 128570 }, { "epoch": 845.921052631579, "grad_norm": 1.1976255178451538, "learning_rate": 0.0001, "loss": 0.0112, "step": 128580 }, { "epoch": 845.9868421052631, "grad_norm": 1.0794117450714111, "learning_rate": 0.0001, "loss": 0.0103, "step": 128590 }, { "epoch": 846.0526315789474, "grad_norm": 0.8604280948638916, "learning_rate": 0.0001, "loss": 0.0109, "step": 128600 }, { "epoch": 846.1184210526316, "grad_norm": 0.8841588497161865, "learning_rate": 0.0001, "loss": 0.0109, "step": 128610 }, { "epoch": 846.1842105263158, "grad_norm": 0.9039338827133179, "learning_rate": 0.0001, "loss": 0.009, "step": 128620 }, { "epoch": 846.25, "grad_norm": 0.7028020024299622, "learning_rate": 0.0001, "loss": 0.0088, "step": 128630 }, { "epoch": 846.3157894736842, "grad_norm": 1.018131136894226, "learning_rate": 0.0001, "loss": 0.0098, "step": 128640 }, { "epoch": 846.3815789473684, "grad_norm": 1.4011059999465942, "learning_rate": 0.0001, "loss": 0.0102, "step": 128650 }, { "epoch": 846.4473684210526, "grad_norm": 1.176498293876648, "learning_rate": 0.0001, "loss": 0.0101, "step": 128660 }, { "epoch": 846.5131578947369, "grad_norm": 1.150488257408142, "learning_rate": 0.0001, "loss": 0.01, "step": 128670 }, { "epoch": 846.578947368421, "grad_norm": 1.1439628601074219, "learning_rate": 0.0001, "loss": 0.0101, "step": 128680 }, { "epoch": 846.6447368421053, "grad_norm": 1.0313776731491089, "learning_rate": 0.0001, "loss": 0.0086, "step": 128690 }, { "epoch": 846.7105263157895, "grad_norm": 0.8566111922264099, "learning_rate": 0.0001, "loss": 0.0082, "step": 128700 }, { "epoch": 846.7763157894736, "grad_norm": 1.240422010421753, "learning_rate": 0.0001, "loss": 0.0116, "step": 128710 }, { "epoch": 846.8421052631579, "grad_norm": 0.8956599831581116, "learning_rate": 0.0001, "loss": 0.0108, "step": 128720 }, { "epoch": 846.9078947368421, "grad_norm": 1.075857400894165, "learning_rate": 0.0001, "loss": 0.0098, "step": 128730 }, { "epoch": 846.9736842105264, "grad_norm": 1.1086944341659546, "learning_rate": 0.0001, "loss": 0.0091, "step": 128740 }, { "epoch": 847.0394736842105, "grad_norm": 0.7189145684242249, "learning_rate": 0.0001, "loss": 0.0111, "step": 128750 }, { "epoch": 847.1052631578947, "grad_norm": 1.0675982236862183, "learning_rate": 0.0001, "loss": 0.0101, "step": 128760 }, { "epoch": 847.171052631579, "grad_norm": 1.0329749584197998, "learning_rate": 0.0001, "loss": 0.0101, "step": 128770 }, { "epoch": 847.2368421052631, "grad_norm": 0.8450028896331787, "learning_rate": 0.0001, "loss": 0.0074, "step": 128780 }, { "epoch": 847.3026315789474, "grad_norm": 1.0242254734039307, "learning_rate": 0.0001, "loss": 0.011, "step": 128790 }, { "epoch": 847.3684210526316, "grad_norm": 1.0955355167388916, "learning_rate": 0.0001, "loss": 0.0101, "step": 128800 }, { "epoch": 847.4342105263158, "grad_norm": 0.9389364123344421, "learning_rate": 0.0001, "loss": 0.0088, "step": 128810 }, { "epoch": 847.5, "grad_norm": 0.7814875245094299, "learning_rate": 0.0001, "loss": 0.0109, "step": 128820 }, { "epoch": 847.5657894736842, "grad_norm": 1.0729169845581055, "learning_rate": 0.0001, "loss": 0.011, "step": 128830 }, { "epoch": 847.6315789473684, "grad_norm": 1.7011371850967407, "learning_rate": 0.0001, "loss": 0.0099, "step": 128840 }, { "epoch": 847.6973684210526, "grad_norm": 1.1857264041900635, "learning_rate": 0.0001, "loss": 0.0091, "step": 128850 }, { "epoch": 847.7631578947369, "grad_norm": 1.284214973449707, "learning_rate": 0.0001, "loss": 0.0117, "step": 128860 }, { "epoch": 847.828947368421, "grad_norm": 1.111433982849121, "learning_rate": 0.0001, "loss": 0.0111, "step": 128870 }, { "epoch": 847.8947368421053, "grad_norm": 0.7958429455757141, "learning_rate": 0.0001, "loss": 0.0078, "step": 128880 }, { "epoch": 847.9605263157895, "grad_norm": 0.8999338150024414, "learning_rate": 0.0001, "loss": 0.009, "step": 128890 }, { "epoch": 848.0263157894736, "grad_norm": 1.1462279558181763, "learning_rate": 0.0001, "loss": 0.0094, "step": 128900 }, { "epoch": 848.0921052631579, "grad_norm": 1.2270578145980835, "learning_rate": 0.0001, "loss": 0.0111, "step": 128910 }, { "epoch": 848.1578947368421, "grad_norm": 1.2196954488754272, "learning_rate": 0.0001, "loss": 0.0093, "step": 128920 }, { "epoch": 848.2236842105264, "grad_norm": 1.0865422487258911, "learning_rate": 0.0001, "loss": 0.0089, "step": 128930 }, { "epoch": 848.2894736842105, "grad_norm": 0.8074581623077393, "learning_rate": 0.0001, "loss": 0.0098, "step": 128940 }, { "epoch": 848.3552631578947, "grad_norm": 1.2923625707626343, "learning_rate": 0.0001, "loss": 0.0084, "step": 128950 }, { "epoch": 848.421052631579, "grad_norm": 1.020020842552185, "learning_rate": 0.0001, "loss": 0.01, "step": 128960 }, { "epoch": 848.4868421052631, "grad_norm": 0.9365369081497192, "learning_rate": 0.0001, "loss": 0.0115, "step": 128970 }, { "epoch": 848.5526315789474, "grad_norm": 1.216340184211731, "learning_rate": 0.0001, "loss": 0.0094, "step": 128980 }, { "epoch": 848.6184210526316, "grad_norm": 1.2271922826766968, "learning_rate": 0.0001, "loss": 0.0088, "step": 128990 }, { "epoch": 848.6842105263158, "grad_norm": 1.1109647750854492, "learning_rate": 0.0001, "loss": 0.01, "step": 129000 }, { "epoch": 848.75, "grad_norm": 1.0088245868682861, "learning_rate": 0.0001, "loss": 0.0088, "step": 129010 }, { "epoch": 848.8157894736842, "grad_norm": 1.1272104978561401, "learning_rate": 0.0001, "loss": 0.0099, "step": 129020 }, { "epoch": 848.8815789473684, "grad_norm": 1.0856529474258423, "learning_rate": 0.0001, "loss": 0.01, "step": 129030 }, { "epoch": 848.9473684210526, "grad_norm": 0.8239693641662598, "learning_rate": 0.0001, "loss": 0.01, "step": 129040 }, { "epoch": 849.0131578947369, "grad_norm": 1.0656987428665161, "learning_rate": 0.0001, "loss": 0.0111, "step": 129050 }, { "epoch": 849.078947368421, "grad_norm": 1.2168710231781006, "learning_rate": 0.0001, "loss": 0.0102, "step": 129060 }, { "epoch": 849.1447368421053, "grad_norm": 1.1486916542053223, "learning_rate": 0.0001, "loss": 0.0115, "step": 129070 }, { "epoch": 849.2105263157895, "grad_norm": 1.0890756845474243, "learning_rate": 0.0001, "loss": 0.0095, "step": 129080 }, { "epoch": 849.2763157894736, "grad_norm": 1.3131881952285767, "learning_rate": 0.0001, "loss": 0.0111, "step": 129090 }, { "epoch": 849.3421052631579, "grad_norm": 0.8249554634094238, "learning_rate": 0.0001, "loss": 0.0092, "step": 129100 }, { "epoch": 849.4078947368421, "grad_norm": 0.9621846079826355, "learning_rate": 0.0001, "loss": 0.0088, "step": 129110 }, { "epoch": 849.4736842105264, "grad_norm": 0.8949623703956604, "learning_rate": 0.0001, "loss": 0.0078, "step": 129120 }, { "epoch": 849.5394736842105, "grad_norm": 0.9129248261451721, "learning_rate": 0.0001, "loss": 0.0114, "step": 129130 }, { "epoch": 849.6052631578947, "grad_norm": 1.3539313077926636, "learning_rate": 0.0001, "loss": 0.0101, "step": 129140 }, { "epoch": 849.671052631579, "grad_norm": 1.445449948310852, "learning_rate": 0.0001, "loss": 0.0094, "step": 129150 }, { "epoch": 849.7368421052631, "grad_norm": 1.0846349000930786, "learning_rate": 0.0001, "loss": 0.0111, "step": 129160 }, { "epoch": 849.8026315789474, "grad_norm": 1.1930545568466187, "learning_rate": 0.0001, "loss": 0.0096, "step": 129170 }, { "epoch": 849.8684210526316, "grad_norm": 1.0064821243286133, "learning_rate": 0.0001, "loss": 0.0102, "step": 129180 }, { "epoch": 849.9342105263158, "grad_norm": 0.9630594849586487, "learning_rate": 0.0001, "loss": 0.0084, "step": 129190 }, { "epoch": 850.0, "grad_norm": 1.0653307437896729, "learning_rate": 0.0001, "loss": 0.0094, "step": 129200 }, { "epoch": 850.0657894736842, "grad_norm": 0.947507381439209, "learning_rate": 0.0001, "loss": 0.0107, "step": 129210 }, { "epoch": 850.1315789473684, "grad_norm": 0.9130157232284546, "learning_rate": 0.0001, "loss": 0.0098, "step": 129220 }, { "epoch": 850.1973684210526, "grad_norm": 0.8457781076431274, "learning_rate": 0.0001, "loss": 0.0104, "step": 129230 }, { "epoch": 850.2631578947369, "grad_norm": 0.7123493552207947, "learning_rate": 0.0001, "loss": 0.0093, "step": 129240 }, { "epoch": 850.328947368421, "grad_norm": 0.9977707862854004, "learning_rate": 0.0001, "loss": 0.0126, "step": 129250 }, { "epoch": 850.3947368421053, "grad_norm": 0.7980003356933594, "learning_rate": 0.0001, "loss": 0.0098, "step": 129260 }, { "epoch": 850.4605263157895, "grad_norm": 1.2628130912780762, "learning_rate": 0.0001, "loss": 0.0084, "step": 129270 }, { "epoch": 850.5263157894736, "grad_norm": 1.2784351110458374, "learning_rate": 0.0001, "loss": 0.0095, "step": 129280 }, { "epoch": 850.5921052631579, "grad_norm": 0.8135380148887634, "learning_rate": 0.0001, "loss": 0.0104, "step": 129290 }, { "epoch": 850.6578947368421, "grad_norm": 1.2080432176589966, "learning_rate": 0.0001, "loss": 0.0109, "step": 129300 }, { "epoch": 850.7236842105264, "grad_norm": 1.1570539474487305, "learning_rate": 0.0001, "loss": 0.0092, "step": 129310 }, { "epoch": 850.7894736842105, "grad_norm": 0.8722181916236877, "learning_rate": 0.0001, "loss": 0.0094, "step": 129320 }, { "epoch": 850.8552631578947, "grad_norm": 0.9665051698684692, "learning_rate": 0.0001, "loss": 0.0093, "step": 129330 }, { "epoch": 850.921052631579, "grad_norm": 1.156315565109253, "learning_rate": 0.0001, "loss": 0.0105, "step": 129340 }, { "epoch": 850.9868421052631, "grad_norm": 1.3095405101776123, "learning_rate": 0.0001, "loss": 0.0087, "step": 129350 }, { "epoch": 851.0526315789474, "grad_norm": 1.2862575054168701, "learning_rate": 0.0001, "loss": 0.0105, "step": 129360 }, { "epoch": 851.1184210526316, "grad_norm": 1.2178730964660645, "learning_rate": 0.0001, "loss": 0.0104, "step": 129370 }, { "epoch": 851.1842105263158, "grad_norm": 1.331790804862976, "learning_rate": 0.0001, "loss": 0.0107, "step": 129380 }, { "epoch": 851.25, "grad_norm": 1.1525365114212036, "learning_rate": 0.0001, "loss": 0.0083, "step": 129390 }, { "epoch": 851.3157894736842, "grad_norm": 1.2628202438354492, "learning_rate": 0.0001, "loss": 0.0117, "step": 129400 }, { "epoch": 851.3815789473684, "grad_norm": 0.9051490426063538, "learning_rate": 0.0001, "loss": 0.0092, "step": 129410 }, { "epoch": 851.4473684210526, "grad_norm": 0.9689465165138245, "learning_rate": 0.0001, "loss": 0.0095, "step": 129420 }, { "epoch": 851.5131578947369, "grad_norm": 0.8615492582321167, "learning_rate": 0.0001, "loss": 0.0087, "step": 129430 }, { "epoch": 851.578947368421, "grad_norm": 0.9767460823059082, "learning_rate": 0.0001, "loss": 0.0099, "step": 129440 }, { "epoch": 851.6447368421053, "grad_norm": 0.9622799158096313, "learning_rate": 0.0001, "loss": 0.0091, "step": 129450 }, { "epoch": 851.7105263157895, "grad_norm": 1.0682460069656372, "learning_rate": 0.0001, "loss": 0.0092, "step": 129460 }, { "epoch": 851.7763157894736, "grad_norm": 0.5867149233818054, "learning_rate": 0.0001, "loss": 0.0088, "step": 129470 }, { "epoch": 851.8421052631579, "grad_norm": 0.9323984384536743, "learning_rate": 0.0001, "loss": 0.0091, "step": 129480 }, { "epoch": 851.9078947368421, "grad_norm": 1.0186138153076172, "learning_rate": 0.0001, "loss": 0.0113, "step": 129490 }, { "epoch": 851.9736842105264, "grad_norm": 1.1094682216644287, "learning_rate": 0.0001, "loss": 0.0103, "step": 129500 }, { "epoch": 852.0394736842105, "grad_norm": 1.0459119081497192, "learning_rate": 0.0001, "loss": 0.0096, "step": 129510 }, { "epoch": 852.1052631578947, "grad_norm": 0.9179003238677979, "learning_rate": 0.0001, "loss": 0.0087, "step": 129520 }, { "epoch": 852.171052631579, "grad_norm": 1.0056957006454468, "learning_rate": 0.0001, "loss": 0.0112, "step": 129530 }, { "epoch": 852.2368421052631, "grad_norm": 0.6981986165046692, "learning_rate": 0.0001, "loss": 0.0111, "step": 129540 }, { "epoch": 852.3026315789474, "grad_norm": 1.2573353052139282, "learning_rate": 0.0001, "loss": 0.0102, "step": 129550 }, { "epoch": 852.3684210526316, "grad_norm": 0.9952992796897888, "learning_rate": 0.0001, "loss": 0.0117, "step": 129560 }, { "epoch": 852.4342105263158, "grad_norm": 0.9949916005134583, "learning_rate": 0.0001, "loss": 0.0117, "step": 129570 }, { "epoch": 852.5, "grad_norm": 1.2157580852508545, "learning_rate": 0.0001, "loss": 0.0094, "step": 129580 }, { "epoch": 852.5657894736842, "grad_norm": 1.0513194799423218, "learning_rate": 0.0001, "loss": 0.0101, "step": 129590 }, { "epoch": 852.6315789473684, "grad_norm": 1.3504682779312134, "learning_rate": 0.0001, "loss": 0.0095, "step": 129600 }, { "epoch": 852.6973684210526, "grad_norm": 0.9804072380065918, "learning_rate": 0.0001, "loss": 0.0097, "step": 129610 }, { "epoch": 852.7631578947369, "grad_norm": 1.3309262990951538, "learning_rate": 0.0001, "loss": 0.0088, "step": 129620 }, { "epoch": 852.828947368421, "grad_norm": 1.3208234310150146, "learning_rate": 0.0001, "loss": 0.01, "step": 129630 }, { "epoch": 852.8947368421053, "grad_norm": 1.8901704549789429, "learning_rate": 0.0001, "loss": 0.01, "step": 129640 }, { "epoch": 852.9605263157895, "grad_norm": 1.7452239990234375, "learning_rate": 0.0001, "loss": 0.0097, "step": 129650 }, { "epoch": 853.0263157894736, "grad_norm": 1.2675105333328247, "learning_rate": 0.0001, "loss": 0.009, "step": 129660 }, { "epoch": 853.0921052631579, "grad_norm": 0.9307915568351746, "learning_rate": 0.0001, "loss": 0.0096, "step": 129670 }, { "epoch": 853.1578947368421, "grad_norm": 1.136796236038208, "learning_rate": 0.0001, "loss": 0.0098, "step": 129680 }, { "epoch": 853.2236842105264, "grad_norm": 1.2172702550888062, "learning_rate": 0.0001, "loss": 0.0088, "step": 129690 }, { "epoch": 853.2894736842105, "grad_norm": 1.3970834016799927, "learning_rate": 0.0001, "loss": 0.0097, "step": 129700 }, { "epoch": 853.3552631578947, "grad_norm": 1.3139710426330566, "learning_rate": 0.0001, "loss": 0.0081, "step": 129710 }, { "epoch": 853.421052631579, "grad_norm": 1.085668683052063, "learning_rate": 0.0001, "loss": 0.0087, "step": 129720 }, { "epoch": 853.4868421052631, "grad_norm": 1.054573655128479, "learning_rate": 0.0001, "loss": 0.0098, "step": 129730 }, { "epoch": 853.5526315789474, "grad_norm": 1.22200345993042, "learning_rate": 0.0001, "loss": 0.0101, "step": 129740 }, { "epoch": 853.6184210526316, "grad_norm": 1.0596305131912231, "learning_rate": 0.0001, "loss": 0.011, "step": 129750 }, { "epoch": 853.6842105263158, "grad_norm": 1.0495727062225342, "learning_rate": 0.0001, "loss": 0.0115, "step": 129760 }, { "epoch": 853.75, "grad_norm": 1.4085642099380493, "learning_rate": 0.0001, "loss": 0.0102, "step": 129770 }, { "epoch": 853.8157894736842, "grad_norm": 1.1661970615386963, "learning_rate": 0.0001, "loss": 0.0087, "step": 129780 }, { "epoch": 853.8815789473684, "grad_norm": 1.3750501871109009, "learning_rate": 0.0001, "loss": 0.0104, "step": 129790 }, { "epoch": 853.9473684210526, "grad_norm": 1.349413275718689, "learning_rate": 0.0001, "loss": 0.0098, "step": 129800 }, { "epoch": 854.0131578947369, "grad_norm": 1.1775285005569458, "learning_rate": 0.0001, "loss": 0.0095, "step": 129810 }, { "epoch": 854.078947368421, "grad_norm": 1.127249836921692, "learning_rate": 0.0001, "loss": 0.0109, "step": 129820 }, { "epoch": 854.1447368421053, "grad_norm": 1.1303577423095703, "learning_rate": 0.0001, "loss": 0.0095, "step": 129830 }, { "epoch": 854.2105263157895, "grad_norm": 1.0692039728164673, "learning_rate": 0.0001, "loss": 0.0105, "step": 129840 }, { "epoch": 854.2763157894736, "grad_norm": 1.2041025161743164, "learning_rate": 0.0001, "loss": 0.009, "step": 129850 }, { "epoch": 854.3421052631579, "grad_norm": 0.8241481184959412, "learning_rate": 0.0001, "loss": 0.0091, "step": 129860 }, { "epoch": 854.4078947368421, "grad_norm": 0.9176292419433594, "learning_rate": 0.0001, "loss": 0.0089, "step": 129870 }, { "epoch": 854.4736842105264, "grad_norm": 1.2800862789154053, "learning_rate": 0.0001, "loss": 0.009, "step": 129880 }, { "epoch": 854.5394736842105, "grad_norm": 0.7797620892524719, "learning_rate": 0.0001, "loss": 0.0095, "step": 129890 }, { "epoch": 854.6052631578947, "grad_norm": 1.4259511232376099, "learning_rate": 0.0001, "loss": 0.0105, "step": 129900 }, { "epoch": 854.671052631579, "grad_norm": 1.1848866939544678, "learning_rate": 0.0001, "loss": 0.0095, "step": 129910 }, { "epoch": 854.7368421052631, "grad_norm": 0.8558827042579651, "learning_rate": 0.0001, "loss": 0.0107, "step": 129920 }, { "epoch": 854.8026315789474, "grad_norm": 0.9844433069229126, "learning_rate": 0.0001, "loss": 0.0105, "step": 129930 }, { "epoch": 854.8684210526316, "grad_norm": 1.2203624248504639, "learning_rate": 0.0001, "loss": 0.0093, "step": 129940 }, { "epoch": 854.9342105263158, "grad_norm": 1.2477107048034668, "learning_rate": 0.0001, "loss": 0.0102, "step": 129950 }, { "epoch": 855.0, "grad_norm": 1.1950006484985352, "learning_rate": 0.0001, "loss": 0.0088, "step": 129960 }, { "epoch": 855.0657894736842, "grad_norm": 0.8927521109580994, "learning_rate": 0.0001, "loss": 0.0081, "step": 129970 }, { "epoch": 855.1315789473684, "grad_norm": 1.3346248865127563, "learning_rate": 0.0001, "loss": 0.0103, "step": 129980 }, { "epoch": 855.1973684210526, "grad_norm": 1.2857704162597656, "learning_rate": 0.0001, "loss": 0.0091, "step": 129990 }, { "epoch": 855.2631578947369, "grad_norm": 1.3358838558197021, "learning_rate": 0.0001, "loss": 0.0077, "step": 130000 }, { "epoch": 855.328947368421, "grad_norm": 1.1128495931625366, "learning_rate": 0.0001, "loss": 0.011, "step": 130010 }, { "epoch": 855.3947368421053, "grad_norm": 1.34784734249115, "learning_rate": 0.0001, "loss": 0.0092, "step": 130020 }, { "epoch": 855.4605263157895, "grad_norm": 1.322873592376709, "learning_rate": 0.0001, "loss": 0.0087, "step": 130030 }, { "epoch": 855.5263157894736, "grad_norm": 0.7924045324325562, "learning_rate": 0.0001, "loss": 0.0117, "step": 130040 }, { "epoch": 855.5921052631579, "grad_norm": 0.9343613982200623, "learning_rate": 0.0001, "loss": 0.0105, "step": 130050 }, { "epoch": 855.6578947368421, "grad_norm": 0.847163736820221, "learning_rate": 0.0001, "loss": 0.0089, "step": 130060 }, { "epoch": 855.7236842105264, "grad_norm": 0.8668592572212219, "learning_rate": 0.0001, "loss": 0.0104, "step": 130070 }, { "epoch": 855.7894736842105, "grad_norm": 1.1902350187301636, "learning_rate": 0.0001, "loss": 0.0126, "step": 130080 }, { "epoch": 855.8552631578947, "grad_norm": 0.9959059357643127, "learning_rate": 0.0001, "loss": 0.0117, "step": 130090 }, { "epoch": 855.921052631579, "grad_norm": 0.9965035319328308, "learning_rate": 0.0001, "loss": 0.0094, "step": 130100 }, { "epoch": 855.9868421052631, "grad_norm": 0.8111009001731873, "learning_rate": 0.0001, "loss": 0.0097, "step": 130110 }, { "epoch": 856.0526315789474, "grad_norm": 0.8355821967124939, "learning_rate": 0.0001, "loss": 0.0087, "step": 130120 }, { "epoch": 856.1184210526316, "grad_norm": 0.9395269155502319, "learning_rate": 0.0001, "loss": 0.0107, "step": 130130 }, { "epoch": 856.1842105263158, "grad_norm": 0.7983924150466919, "learning_rate": 0.0001, "loss": 0.0095, "step": 130140 }, { "epoch": 856.25, "grad_norm": 1.0904862880706787, "learning_rate": 0.0001, "loss": 0.0096, "step": 130150 }, { "epoch": 856.3157894736842, "grad_norm": 0.8742513656616211, "learning_rate": 0.0001, "loss": 0.0122, "step": 130160 }, { "epoch": 856.3815789473684, "grad_norm": 1.308323860168457, "learning_rate": 0.0001, "loss": 0.0099, "step": 130170 }, { "epoch": 856.4473684210526, "grad_norm": 0.660823404788971, "learning_rate": 0.0001, "loss": 0.0103, "step": 130180 }, { "epoch": 856.5131578947369, "grad_norm": 0.7933557033538818, "learning_rate": 0.0001, "loss": 0.0106, "step": 130190 }, { "epoch": 856.578947368421, "grad_norm": 1.5122641324996948, "learning_rate": 0.0001, "loss": 0.0113, "step": 130200 }, { "epoch": 856.6447368421053, "grad_norm": 1.1528301239013672, "learning_rate": 0.0001, "loss": 0.0087, "step": 130210 }, { "epoch": 856.7105263157895, "grad_norm": 0.9372194409370422, "learning_rate": 0.0001, "loss": 0.0102, "step": 130220 }, { "epoch": 856.7763157894736, "grad_norm": 1.312410831451416, "learning_rate": 0.0001, "loss": 0.0088, "step": 130230 }, { "epoch": 856.8421052631579, "grad_norm": 0.9367921948432922, "learning_rate": 0.0001, "loss": 0.0084, "step": 130240 }, { "epoch": 856.9078947368421, "grad_norm": 1.0946214199066162, "learning_rate": 0.0001, "loss": 0.01, "step": 130250 }, { "epoch": 856.9736842105264, "grad_norm": 1.2462278604507446, "learning_rate": 0.0001, "loss": 0.0087, "step": 130260 }, { "epoch": 857.0394736842105, "grad_norm": 0.9840075373649597, "learning_rate": 0.0001, "loss": 0.0107, "step": 130270 }, { "epoch": 857.1052631578947, "grad_norm": 1.2592564821243286, "learning_rate": 0.0001, "loss": 0.0093, "step": 130280 }, { "epoch": 857.171052631579, "grad_norm": 1.078933835029602, "learning_rate": 0.0001, "loss": 0.0106, "step": 130290 }, { "epoch": 857.2368421052631, "grad_norm": 1.3728272914886475, "learning_rate": 0.0001, "loss": 0.01, "step": 130300 }, { "epoch": 857.3026315789474, "grad_norm": 1.05517578125, "learning_rate": 0.0001, "loss": 0.0092, "step": 130310 }, { "epoch": 857.3684210526316, "grad_norm": 1.3198639154434204, "learning_rate": 0.0001, "loss": 0.008, "step": 130320 }, { "epoch": 857.4342105263158, "grad_norm": 1.3628894090652466, "learning_rate": 0.0001, "loss": 0.0094, "step": 130330 }, { "epoch": 857.5, "grad_norm": 1.2717257738113403, "learning_rate": 0.0001, "loss": 0.0094, "step": 130340 }, { "epoch": 857.5657894736842, "grad_norm": 1.429996371269226, "learning_rate": 0.0001, "loss": 0.0105, "step": 130350 }, { "epoch": 857.6315789473684, "grad_norm": 0.9314458966255188, "learning_rate": 0.0001, "loss": 0.0109, "step": 130360 }, { "epoch": 857.6973684210526, "grad_norm": 1.0293179750442505, "learning_rate": 0.0001, "loss": 0.0092, "step": 130370 }, { "epoch": 857.7631578947369, "grad_norm": 0.7930873036384583, "learning_rate": 0.0001, "loss": 0.0114, "step": 130380 }, { "epoch": 857.828947368421, "grad_norm": 1.0124858617782593, "learning_rate": 0.0001, "loss": 0.0102, "step": 130390 }, { "epoch": 857.8947368421053, "grad_norm": 0.6720251441001892, "learning_rate": 0.0001, "loss": 0.0088, "step": 130400 }, { "epoch": 857.9605263157895, "grad_norm": 1.2420949935913086, "learning_rate": 0.0001, "loss": 0.0096, "step": 130410 }, { "epoch": 858.0263157894736, "grad_norm": 1.295690655708313, "learning_rate": 0.0001, "loss": 0.0117, "step": 130420 }, { "epoch": 858.0921052631579, "grad_norm": 0.9636945128440857, "learning_rate": 0.0001, "loss": 0.0086, "step": 130430 }, { "epoch": 858.1578947368421, "grad_norm": 0.9246024489402771, "learning_rate": 0.0001, "loss": 0.0094, "step": 130440 }, { "epoch": 858.2236842105264, "grad_norm": 1.116898536682129, "learning_rate": 0.0001, "loss": 0.0099, "step": 130450 }, { "epoch": 858.2894736842105, "grad_norm": 1.295310378074646, "learning_rate": 0.0001, "loss": 0.0103, "step": 130460 }, { "epoch": 858.3552631578947, "grad_norm": 1.0627254247665405, "learning_rate": 0.0001, "loss": 0.0098, "step": 130470 }, { "epoch": 858.421052631579, "grad_norm": 0.8008264899253845, "learning_rate": 0.0001, "loss": 0.0096, "step": 130480 }, { "epoch": 858.4868421052631, "grad_norm": 1.239762783050537, "learning_rate": 0.0001, "loss": 0.0104, "step": 130490 }, { "epoch": 858.5526315789474, "grad_norm": 0.9881368279457092, "learning_rate": 0.0001, "loss": 0.0107, "step": 130500 }, { "epoch": 858.6184210526316, "grad_norm": 1.0925045013427734, "learning_rate": 0.0001, "loss": 0.0103, "step": 130510 }, { "epoch": 858.6842105263158, "grad_norm": 0.9376081824302673, "learning_rate": 0.0001, "loss": 0.0095, "step": 130520 }, { "epoch": 858.75, "grad_norm": 0.8281996250152588, "learning_rate": 0.0001, "loss": 0.0097, "step": 130530 }, { "epoch": 858.8157894736842, "grad_norm": 0.5148002505302429, "learning_rate": 0.0001, "loss": 0.0102, "step": 130540 }, { "epoch": 858.8815789473684, "grad_norm": 0.9477270245552063, "learning_rate": 0.0001, "loss": 0.0103, "step": 130550 }, { "epoch": 858.9473684210526, "grad_norm": 1.0532981157302856, "learning_rate": 0.0001, "loss": 0.0087, "step": 130560 }, { "epoch": 859.0131578947369, "grad_norm": 1.0642023086547852, "learning_rate": 0.0001, "loss": 0.0112, "step": 130570 }, { "epoch": 859.078947368421, "grad_norm": 1.0734174251556396, "learning_rate": 0.0001, "loss": 0.0111, "step": 130580 }, { "epoch": 859.1447368421053, "grad_norm": 1.0683674812316895, "learning_rate": 0.0001, "loss": 0.013, "step": 130590 }, { "epoch": 859.2105263157895, "grad_norm": 0.8307974934577942, "learning_rate": 0.0001, "loss": 0.0094, "step": 130600 }, { "epoch": 859.2763157894736, "grad_norm": 0.9671717882156372, "learning_rate": 0.0001, "loss": 0.0102, "step": 130610 }, { "epoch": 859.3421052631579, "grad_norm": 0.8904266953468323, "learning_rate": 0.0001, "loss": 0.0097, "step": 130620 }, { "epoch": 859.4078947368421, "grad_norm": 1.097537875175476, "learning_rate": 0.0001, "loss": 0.0108, "step": 130630 }, { "epoch": 859.4736842105264, "grad_norm": 0.6235876679420471, "learning_rate": 0.0001, "loss": 0.0097, "step": 130640 }, { "epoch": 859.5394736842105, "grad_norm": 1.1195687055587769, "learning_rate": 0.0001, "loss": 0.0091, "step": 130650 }, { "epoch": 859.6052631578947, "grad_norm": 1.0346473455429077, "learning_rate": 0.0001, "loss": 0.0089, "step": 130660 }, { "epoch": 859.671052631579, "grad_norm": 1.2493486404418945, "learning_rate": 0.0001, "loss": 0.011, "step": 130670 }, { "epoch": 859.7368421052631, "grad_norm": 1.1328728199005127, "learning_rate": 0.0001, "loss": 0.0093, "step": 130680 }, { "epoch": 859.8026315789474, "grad_norm": 1.057497501373291, "learning_rate": 0.0001, "loss": 0.0087, "step": 130690 }, { "epoch": 859.8684210526316, "grad_norm": 1.1181135177612305, "learning_rate": 0.0001, "loss": 0.0112, "step": 130700 }, { "epoch": 859.9342105263158, "grad_norm": 1.242897391319275, "learning_rate": 0.0001, "loss": 0.0087, "step": 130710 }, { "epoch": 860.0, "grad_norm": 1.0586951971054077, "learning_rate": 0.0001, "loss": 0.0102, "step": 130720 }, { "epoch": 860.0657894736842, "grad_norm": 1.2389774322509766, "learning_rate": 0.0001, "loss": 0.0086, "step": 130730 }, { "epoch": 860.1315789473684, "grad_norm": 1.0785973072052002, "learning_rate": 0.0001, "loss": 0.0105, "step": 130740 }, { "epoch": 860.1973684210526, "grad_norm": 1.104970932006836, "learning_rate": 0.0001, "loss": 0.0117, "step": 130750 }, { "epoch": 860.2631578947369, "grad_norm": 0.8328081965446472, "learning_rate": 0.0001, "loss": 0.0094, "step": 130760 }, { "epoch": 860.328947368421, "grad_norm": 0.955934464931488, "learning_rate": 0.0001, "loss": 0.0088, "step": 130770 }, { "epoch": 860.3947368421053, "grad_norm": 1.1552305221557617, "learning_rate": 0.0001, "loss": 0.0111, "step": 130780 }, { "epoch": 860.4605263157895, "grad_norm": 1.000320553779602, "learning_rate": 0.0001, "loss": 0.0105, "step": 130790 }, { "epoch": 860.5263157894736, "grad_norm": 0.9015398025512695, "learning_rate": 0.0001, "loss": 0.0106, "step": 130800 }, { "epoch": 860.5921052631579, "grad_norm": 0.6215592622756958, "learning_rate": 0.0001, "loss": 0.0084, "step": 130810 }, { "epoch": 860.6578947368421, "grad_norm": 0.9939150810241699, "learning_rate": 0.0001, "loss": 0.0102, "step": 130820 }, { "epoch": 860.7236842105264, "grad_norm": 1.01790452003479, "learning_rate": 0.0001, "loss": 0.0087, "step": 130830 }, { "epoch": 860.7894736842105, "grad_norm": 1.468485951423645, "learning_rate": 0.0001, "loss": 0.0099, "step": 130840 }, { "epoch": 860.8552631578947, "grad_norm": 1.1106871366500854, "learning_rate": 0.0001, "loss": 0.0109, "step": 130850 }, { "epoch": 860.921052631579, "grad_norm": 0.834820568561554, "learning_rate": 0.0001, "loss": 0.0093, "step": 130860 }, { "epoch": 860.9868421052631, "grad_norm": 0.9590187072753906, "learning_rate": 0.0001, "loss": 0.0114, "step": 130870 }, { "epoch": 861.0526315789474, "grad_norm": 1.1876466274261475, "learning_rate": 0.0001, "loss": 0.009, "step": 130880 }, { "epoch": 861.1184210526316, "grad_norm": 0.968468427658081, "learning_rate": 0.0001, "loss": 0.0115, "step": 130890 }, { "epoch": 861.1842105263158, "grad_norm": 0.8767639398574829, "learning_rate": 0.0001, "loss": 0.01, "step": 130900 }, { "epoch": 861.25, "grad_norm": 0.9714983105659485, "learning_rate": 0.0001, "loss": 0.0094, "step": 130910 }, { "epoch": 861.3157894736842, "grad_norm": 1.2602828741073608, "learning_rate": 0.0001, "loss": 0.0089, "step": 130920 }, { "epoch": 861.3815789473684, "grad_norm": 1.2543740272521973, "learning_rate": 0.0001, "loss": 0.0105, "step": 130930 }, { "epoch": 861.4473684210526, "grad_norm": 1.088036060333252, "learning_rate": 0.0001, "loss": 0.009, "step": 130940 }, { "epoch": 861.5131578947369, "grad_norm": 1.3729690313339233, "learning_rate": 0.0001, "loss": 0.0103, "step": 130950 }, { "epoch": 861.578947368421, "grad_norm": 1.1072582006454468, "learning_rate": 0.0001, "loss": 0.0093, "step": 130960 }, { "epoch": 861.6447368421053, "grad_norm": 1.1380093097686768, "learning_rate": 0.0001, "loss": 0.0097, "step": 130970 }, { "epoch": 861.7105263157895, "grad_norm": 1.0508384704589844, "learning_rate": 0.0001, "loss": 0.0092, "step": 130980 }, { "epoch": 861.7763157894736, "grad_norm": 1.1020636558532715, "learning_rate": 0.0001, "loss": 0.0104, "step": 130990 }, { "epoch": 861.8421052631579, "grad_norm": 1.138003945350647, "learning_rate": 0.0001, "loss": 0.0112, "step": 131000 }, { "epoch": 861.9078947368421, "grad_norm": 1.2972642183303833, "learning_rate": 0.0001, "loss": 0.0103, "step": 131010 }, { "epoch": 861.9736842105264, "grad_norm": 0.911027729511261, "learning_rate": 0.0001, "loss": 0.0105, "step": 131020 }, { "epoch": 862.0394736842105, "grad_norm": 0.9824381470680237, "learning_rate": 0.0001, "loss": 0.0096, "step": 131030 }, { "epoch": 862.1052631578947, "grad_norm": 1.227348804473877, "learning_rate": 0.0001, "loss": 0.0112, "step": 131040 }, { "epoch": 862.171052631579, "grad_norm": 0.9096339344978333, "learning_rate": 0.0001, "loss": 0.0095, "step": 131050 }, { "epoch": 862.2368421052631, "grad_norm": 1.2303411960601807, "learning_rate": 0.0001, "loss": 0.0096, "step": 131060 }, { "epoch": 862.3026315789474, "grad_norm": 0.8208591938018799, "learning_rate": 0.0001, "loss": 0.0113, "step": 131070 }, { "epoch": 862.3684210526316, "grad_norm": 1.1471190452575684, "learning_rate": 0.0001, "loss": 0.0095, "step": 131080 }, { "epoch": 862.4342105263158, "grad_norm": 0.8580765128135681, "learning_rate": 0.0001, "loss": 0.0107, "step": 131090 }, { "epoch": 862.5, "grad_norm": 0.9216636419296265, "learning_rate": 0.0001, "loss": 0.0092, "step": 131100 }, { "epoch": 862.5657894736842, "grad_norm": 1.3407820463180542, "learning_rate": 0.0001, "loss": 0.0094, "step": 131110 }, { "epoch": 862.6315789473684, "grad_norm": 1.359041452407837, "learning_rate": 0.0001, "loss": 0.0104, "step": 131120 }, { "epoch": 862.6973684210526, "grad_norm": 1.231490969657898, "learning_rate": 0.0001, "loss": 0.0085, "step": 131130 }, { "epoch": 862.7631578947369, "grad_norm": 1.0086109638214111, "learning_rate": 0.0001, "loss": 0.0099, "step": 131140 }, { "epoch": 862.828947368421, "grad_norm": 0.9800273180007935, "learning_rate": 0.0001, "loss": 0.0097, "step": 131150 }, { "epoch": 862.8947368421053, "grad_norm": 1.3657548427581787, "learning_rate": 0.0001, "loss": 0.0094, "step": 131160 }, { "epoch": 862.9605263157895, "grad_norm": 1.1648259162902832, "learning_rate": 0.0001, "loss": 0.0104, "step": 131170 }, { "epoch": 863.0263157894736, "grad_norm": 1.3625643253326416, "learning_rate": 0.0001, "loss": 0.0111, "step": 131180 }, { "epoch": 863.0921052631579, "grad_norm": 0.833634614944458, "learning_rate": 0.0001, "loss": 0.0106, "step": 131190 }, { "epoch": 863.1578947368421, "grad_norm": 0.7656792402267456, "learning_rate": 0.0001, "loss": 0.0094, "step": 131200 }, { "epoch": 863.2236842105264, "grad_norm": 1.2618483304977417, "learning_rate": 0.0001, "loss": 0.0102, "step": 131210 }, { "epoch": 863.2894736842105, "grad_norm": 1.133224606513977, "learning_rate": 0.0001, "loss": 0.0096, "step": 131220 }, { "epoch": 863.3552631578947, "grad_norm": 1.1126824617385864, "learning_rate": 0.0001, "loss": 0.0086, "step": 131230 }, { "epoch": 863.421052631579, "grad_norm": 1.0469779968261719, "learning_rate": 0.0001, "loss": 0.009, "step": 131240 }, { "epoch": 863.4868421052631, "grad_norm": 1.0890519618988037, "learning_rate": 0.0001, "loss": 0.0109, "step": 131250 }, { "epoch": 863.5526315789474, "grad_norm": 1.1236612796783447, "learning_rate": 0.0001, "loss": 0.0127, "step": 131260 }, { "epoch": 863.6184210526316, "grad_norm": 1.1163815259933472, "learning_rate": 0.0001, "loss": 0.0092, "step": 131270 }, { "epoch": 863.6842105263158, "grad_norm": 0.9706016182899475, "learning_rate": 0.0001, "loss": 0.0087, "step": 131280 }, { "epoch": 863.75, "grad_norm": 0.7366560697555542, "learning_rate": 0.0001, "loss": 0.0098, "step": 131290 }, { "epoch": 863.8157894736842, "grad_norm": 1.2588547468185425, "learning_rate": 0.0001, "loss": 0.0118, "step": 131300 }, { "epoch": 863.8815789473684, "grad_norm": 0.7898572087287903, "learning_rate": 0.0001, "loss": 0.0088, "step": 131310 }, { "epoch": 863.9473684210526, "grad_norm": 1.2470197677612305, "learning_rate": 0.0001, "loss": 0.0088, "step": 131320 }, { "epoch": 864.0131578947369, "grad_norm": 1.060998797416687, "learning_rate": 0.0001, "loss": 0.0101, "step": 131330 }, { "epoch": 864.078947368421, "grad_norm": 0.6772517561912537, "learning_rate": 0.0001, "loss": 0.0104, "step": 131340 }, { "epoch": 864.1447368421053, "grad_norm": 1.205532193183899, "learning_rate": 0.0001, "loss": 0.0108, "step": 131350 }, { "epoch": 864.2105263157895, "grad_norm": 1.150336742401123, "learning_rate": 0.0001, "loss": 0.0099, "step": 131360 }, { "epoch": 864.2763157894736, "grad_norm": 0.8620902895927429, "learning_rate": 0.0001, "loss": 0.0074, "step": 131370 }, { "epoch": 864.3421052631579, "grad_norm": 1.0216280221939087, "learning_rate": 0.0001, "loss": 0.0096, "step": 131380 }, { "epoch": 864.4078947368421, "grad_norm": 1.0433382987976074, "learning_rate": 0.0001, "loss": 0.0114, "step": 131390 }, { "epoch": 864.4736842105264, "grad_norm": 1.3569798469543457, "learning_rate": 0.0001, "loss": 0.0112, "step": 131400 }, { "epoch": 864.5394736842105, "grad_norm": 1.1768606901168823, "learning_rate": 0.0001, "loss": 0.0105, "step": 131410 }, { "epoch": 864.6052631578947, "grad_norm": 1.2419917583465576, "learning_rate": 0.0001, "loss": 0.0126, "step": 131420 }, { "epoch": 864.671052631579, "grad_norm": 1.128338098526001, "learning_rate": 0.0001, "loss": 0.01, "step": 131430 }, { "epoch": 864.7368421052631, "grad_norm": 1.1112442016601562, "learning_rate": 0.0001, "loss": 0.0086, "step": 131440 }, { "epoch": 864.8026315789474, "grad_norm": 1.0152097940444946, "learning_rate": 0.0001, "loss": 0.0116, "step": 131450 }, { "epoch": 864.8684210526316, "grad_norm": 0.9926842451095581, "learning_rate": 0.0001, "loss": 0.0099, "step": 131460 }, { "epoch": 864.9342105263158, "grad_norm": 0.9706729054450989, "learning_rate": 0.0001, "loss": 0.0098, "step": 131470 }, { "epoch": 865.0, "grad_norm": 0.9864161610603333, "learning_rate": 0.0001, "loss": 0.01, "step": 131480 }, { "epoch": 865.0657894736842, "grad_norm": 1.234376072883606, "learning_rate": 0.0001, "loss": 0.011, "step": 131490 }, { "epoch": 865.1315789473684, "grad_norm": 0.8708391189575195, "learning_rate": 0.0001, "loss": 0.0109, "step": 131500 }, { "epoch": 865.1973684210526, "grad_norm": 1.2367292642593384, "learning_rate": 0.0001, "loss": 0.0103, "step": 131510 }, { "epoch": 865.2631578947369, "grad_norm": 1.1945315599441528, "learning_rate": 0.0001, "loss": 0.0114, "step": 131520 }, { "epoch": 865.328947368421, "grad_norm": 1.3687530755996704, "learning_rate": 0.0001, "loss": 0.0114, "step": 131530 }, { "epoch": 865.3947368421053, "grad_norm": 1.2335397005081177, "learning_rate": 0.0001, "loss": 0.0088, "step": 131540 }, { "epoch": 865.4605263157895, "grad_norm": 0.9434360265731812, "learning_rate": 0.0001, "loss": 0.0096, "step": 131550 }, { "epoch": 865.5263157894736, "grad_norm": 1.020836591720581, "learning_rate": 0.0001, "loss": 0.0084, "step": 131560 }, { "epoch": 865.5921052631579, "grad_norm": 1.0214914083480835, "learning_rate": 0.0001, "loss": 0.0085, "step": 131570 }, { "epoch": 865.6578947368421, "grad_norm": 1.2193523645401, "learning_rate": 0.0001, "loss": 0.0103, "step": 131580 }, { "epoch": 865.7236842105264, "grad_norm": 1.0998847484588623, "learning_rate": 0.0001, "loss": 0.0102, "step": 131590 }, { "epoch": 865.7894736842105, "grad_norm": 1.1614649295806885, "learning_rate": 0.0001, "loss": 0.0116, "step": 131600 }, { "epoch": 865.8552631578947, "grad_norm": 1.3147051334381104, "learning_rate": 0.0001, "loss": 0.0083, "step": 131610 }, { "epoch": 865.921052631579, "grad_norm": 0.9823493957519531, "learning_rate": 0.0001, "loss": 0.0109, "step": 131620 }, { "epoch": 865.9868421052631, "grad_norm": 0.8709498643875122, "learning_rate": 0.0001, "loss": 0.0103, "step": 131630 }, { "epoch": 866.0526315789474, "grad_norm": 1.4504932165145874, "learning_rate": 0.0001, "loss": 0.0101, "step": 131640 }, { "epoch": 866.1184210526316, "grad_norm": 0.9600393772125244, "learning_rate": 0.0001, "loss": 0.0102, "step": 131650 }, { "epoch": 866.1842105263158, "grad_norm": 0.9422047734260559, "learning_rate": 0.0001, "loss": 0.0105, "step": 131660 }, { "epoch": 866.25, "grad_norm": 0.9015654921531677, "learning_rate": 0.0001, "loss": 0.0113, "step": 131670 }, { "epoch": 866.3157894736842, "grad_norm": 1.2097015380859375, "learning_rate": 0.0001, "loss": 0.0095, "step": 131680 }, { "epoch": 866.3815789473684, "grad_norm": 1.0797349214553833, "learning_rate": 0.0001, "loss": 0.0103, "step": 131690 }, { "epoch": 866.4473684210526, "grad_norm": 1.1002148389816284, "learning_rate": 0.0001, "loss": 0.0099, "step": 131700 }, { "epoch": 866.5131578947369, "grad_norm": 0.8693884611129761, "learning_rate": 0.0001, "loss": 0.0095, "step": 131710 }, { "epoch": 866.578947368421, "grad_norm": 1.2665678262710571, "learning_rate": 0.0001, "loss": 0.01, "step": 131720 }, { "epoch": 866.6447368421053, "grad_norm": 1.0744692087173462, "learning_rate": 0.0001, "loss": 0.0098, "step": 131730 }, { "epoch": 866.7105263157895, "grad_norm": 1.1713916063308716, "learning_rate": 0.0001, "loss": 0.0089, "step": 131740 }, { "epoch": 866.7763157894736, "grad_norm": 1.3538668155670166, "learning_rate": 0.0001, "loss": 0.0093, "step": 131750 }, { "epoch": 866.8421052631579, "grad_norm": 0.8203177452087402, "learning_rate": 0.0001, "loss": 0.0097, "step": 131760 }, { "epoch": 866.9078947368421, "grad_norm": 1.1548283100128174, "learning_rate": 0.0001, "loss": 0.0099, "step": 131770 }, { "epoch": 866.9736842105264, "grad_norm": 1.0071430206298828, "learning_rate": 0.0001, "loss": 0.01, "step": 131780 }, { "epoch": 867.0394736842105, "grad_norm": 1.1559392213821411, "learning_rate": 0.0001, "loss": 0.0107, "step": 131790 }, { "epoch": 867.1052631578947, "grad_norm": 1.0900834798812866, "learning_rate": 0.0001, "loss": 0.0103, "step": 131800 }, { "epoch": 867.171052631579, "grad_norm": 1.1441543102264404, "learning_rate": 0.0001, "loss": 0.0079, "step": 131810 }, { "epoch": 867.2368421052631, "grad_norm": 1.2302360534667969, "learning_rate": 0.0001, "loss": 0.0093, "step": 131820 }, { "epoch": 867.3026315789474, "grad_norm": 1.0534881353378296, "learning_rate": 0.0001, "loss": 0.0087, "step": 131830 }, { "epoch": 867.3684210526316, "grad_norm": 0.8932411074638367, "learning_rate": 0.0001, "loss": 0.0125, "step": 131840 }, { "epoch": 867.4342105263158, "grad_norm": 0.6699873805046082, "learning_rate": 0.0001, "loss": 0.0097, "step": 131850 }, { "epoch": 867.5, "grad_norm": 0.7454432845115662, "learning_rate": 0.0001, "loss": 0.0084, "step": 131860 }, { "epoch": 867.5657894736842, "grad_norm": 1.1074482202529907, "learning_rate": 0.0001, "loss": 0.0109, "step": 131870 }, { "epoch": 867.6315789473684, "grad_norm": 1.2334349155426025, "learning_rate": 0.0001, "loss": 0.0091, "step": 131880 }, { "epoch": 867.6973684210526, "grad_norm": 0.9547383189201355, "learning_rate": 0.0001, "loss": 0.0096, "step": 131890 }, { "epoch": 867.7631578947369, "grad_norm": 1.069530963897705, "learning_rate": 0.0001, "loss": 0.0107, "step": 131900 }, { "epoch": 867.828947368421, "grad_norm": 1.2506078481674194, "learning_rate": 0.0001, "loss": 0.0089, "step": 131910 }, { "epoch": 867.8947368421053, "grad_norm": 0.9932133555412292, "learning_rate": 0.0001, "loss": 0.0103, "step": 131920 }, { "epoch": 867.9605263157895, "grad_norm": 1.2245303392410278, "learning_rate": 0.0001, "loss": 0.0099, "step": 131930 }, { "epoch": 868.0263157894736, "grad_norm": 1.5292211771011353, "learning_rate": 0.0001, "loss": 0.009, "step": 131940 }, { "epoch": 868.0921052631579, "grad_norm": 1.3680943250656128, "learning_rate": 0.0001, "loss": 0.0091, "step": 131950 }, { "epoch": 868.1578947368421, "grad_norm": 1.1934568881988525, "learning_rate": 0.0001, "loss": 0.01, "step": 131960 }, { "epoch": 868.2236842105264, "grad_norm": 0.9152090549468994, "learning_rate": 0.0001, "loss": 0.0103, "step": 131970 }, { "epoch": 868.2894736842105, "grad_norm": 0.8579458594322205, "learning_rate": 0.0001, "loss": 0.0092, "step": 131980 }, { "epoch": 868.3552631578947, "grad_norm": 0.9107769131660461, "learning_rate": 0.0001, "loss": 0.0099, "step": 131990 }, { "epoch": 868.421052631579, "grad_norm": 1.1969023942947388, "learning_rate": 0.0001, "loss": 0.0106, "step": 132000 }, { "epoch": 868.4868421052631, "grad_norm": 0.9776577949523926, "learning_rate": 0.0001, "loss": 0.0107, "step": 132010 }, { "epoch": 868.5526315789474, "grad_norm": 1.3878238201141357, "learning_rate": 0.0001, "loss": 0.0082, "step": 132020 }, { "epoch": 868.6184210526316, "grad_norm": 0.997664213180542, "learning_rate": 0.0001, "loss": 0.0091, "step": 132030 }, { "epoch": 868.6842105263158, "grad_norm": 1.237343430519104, "learning_rate": 0.0001, "loss": 0.0104, "step": 132040 }, { "epoch": 868.75, "grad_norm": 0.9770588874816895, "learning_rate": 0.0001, "loss": 0.0119, "step": 132050 }, { "epoch": 868.8157894736842, "grad_norm": 1.209973692893982, "learning_rate": 0.0001, "loss": 0.0103, "step": 132060 }, { "epoch": 868.8815789473684, "grad_norm": 1.109294056892395, "learning_rate": 0.0001, "loss": 0.0094, "step": 132070 }, { "epoch": 868.9473684210526, "grad_norm": 0.8828414082527161, "learning_rate": 0.0001, "loss": 0.0103, "step": 132080 }, { "epoch": 869.0131578947369, "grad_norm": 0.8478983044624329, "learning_rate": 0.0001, "loss": 0.0096, "step": 132090 }, { "epoch": 869.078947368421, "grad_norm": 1.0058600902557373, "learning_rate": 0.0001, "loss": 0.0095, "step": 132100 }, { "epoch": 869.1447368421053, "grad_norm": 1.5055391788482666, "learning_rate": 0.0001, "loss": 0.0102, "step": 132110 }, { "epoch": 869.2105263157895, "grad_norm": 1.3304319381713867, "learning_rate": 0.0001, "loss": 0.011, "step": 132120 }, { "epoch": 869.2763157894736, "grad_norm": 1.3461639881134033, "learning_rate": 0.0001, "loss": 0.0094, "step": 132130 }, { "epoch": 869.3421052631579, "grad_norm": 1.318153977394104, "learning_rate": 0.0001, "loss": 0.0096, "step": 132140 }, { "epoch": 869.4078947368421, "grad_norm": 1.3171056509017944, "learning_rate": 0.0001, "loss": 0.0116, "step": 132150 }, { "epoch": 869.4736842105264, "grad_norm": 1.2487008571624756, "learning_rate": 0.0001, "loss": 0.0101, "step": 132160 }, { "epoch": 869.5394736842105, "grad_norm": 0.9375091195106506, "learning_rate": 0.0001, "loss": 0.009, "step": 132170 }, { "epoch": 869.6052631578947, "grad_norm": 1.3363336324691772, "learning_rate": 0.0001, "loss": 0.0098, "step": 132180 }, { "epoch": 869.671052631579, "grad_norm": 1.0731987953186035, "learning_rate": 0.0001, "loss": 0.0082, "step": 132190 }, { "epoch": 869.7368421052631, "grad_norm": 1.6114320755004883, "learning_rate": 0.0001, "loss": 0.0085, "step": 132200 }, { "epoch": 869.8026315789474, "grad_norm": 1.3398220539093018, "learning_rate": 0.0001, "loss": 0.0106, "step": 132210 }, { "epoch": 869.8684210526316, "grad_norm": 0.9430725574493408, "learning_rate": 0.0001, "loss": 0.008, "step": 132220 }, { "epoch": 869.9342105263158, "grad_norm": 0.9981265068054199, "learning_rate": 0.0001, "loss": 0.0111, "step": 132230 }, { "epoch": 870.0, "grad_norm": 1.1120591163635254, "learning_rate": 0.0001, "loss": 0.0103, "step": 132240 }, { "epoch": 870.0657894736842, "grad_norm": 1.0576080083847046, "learning_rate": 0.0001, "loss": 0.0086, "step": 132250 }, { "epoch": 870.1315789473684, "grad_norm": 1.1692156791687012, "learning_rate": 0.0001, "loss": 0.0093, "step": 132260 }, { "epoch": 870.1973684210526, "grad_norm": 1.4510517120361328, "learning_rate": 0.0001, "loss": 0.0096, "step": 132270 }, { "epoch": 870.2631578947369, "grad_norm": 0.8253922462463379, "learning_rate": 0.0001, "loss": 0.0097, "step": 132280 }, { "epoch": 870.328947368421, "grad_norm": 1.1040948629379272, "learning_rate": 0.0001, "loss": 0.0106, "step": 132290 }, { "epoch": 870.3947368421053, "grad_norm": 1.4819238185882568, "learning_rate": 0.0001, "loss": 0.01, "step": 132300 }, { "epoch": 870.4605263157895, "grad_norm": 0.8338831663131714, "learning_rate": 0.0001, "loss": 0.0101, "step": 132310 }, { "epoch": 870.5263157894736, "grad_norm": 1.5496101379394531, "learning_rate": 0.0001, "loss": 0.0115, "step": 132320 }, { "epoch": 870.5921052631579, "grad_norm": 1.3076415061950684, "learning_rate": 0.0001, "loss": 0.0094, "step": 132330 }, { "epoch": 870.6578947368421, "grad_norm": 1.3276525735855103, "learning_rate": 0.0001, "loss": 0.0079, "step": 132340 }, { "epoch": 870.7236842105264, "grad_norm": 1.18726646900177, "learning_rate": 0.0001, "loss": 0.0099, "step": 132350 }, { "epoch": 870.7894736842105, "grad_norm": 1.3741918802261353, "learning_rate": 0.0001, "loss": 0.0093, "step": 132360 }, { "epoch": 870.8552631578947, "grad_norm": 0.9850346446037292, "learning_rate": 0.0001, "loss": 0.0108, "step": 132370 }, { "epoch": 870.921052631579, "grad_norm": 0.5921350717544556, "learning_rate": 0.0001, "loss": 0.0105, "step": 132380 }, { "epoch": 870.9868421052631, "grad_norm": 1.010513424873352, "learning_rate": 0.0001, "loss": 0.0103, "step": 132390 }, { "epoch": 871.0526315789474, "grad_norm": 1.3716336488723755, "learning_rate": 0.0001, "loss": 0.0105, "step": 132400 }, { "epoch": 871.1184210526316, "grad_norm": 0.9653910994529724, "learning_rate": 0.0001, "loss": 0.0096, "step": 132410 }, { "epoch": 871.1842105263158, "grad_norm": 1.3690600395202637, "learning_rate": 0.0001, "loss": 0.0098, "step": 132420 }, { "epoch": 871.25, "grad_norm": 1.072267770767212, "learning_rate": 0.0001, "loss": 0.0102, "step": 132430 }, { "epoch": 871.3157894736842, "grad_norm": 1.4082567691802979, "learning_rate": 0.0001, "loss": 0.0094, "step": 132440 }, { "epoch": 871.3815789473684, "grad_norm": 1.0181151628494263, "learning_rate": 0.0001, "loss": 0.0082, "step": 132450 }, { "epoch": 871.4473684210526, "grad_norm": 0.8527328968048096, "learning_rate": 0.0001, "loss": 0.0075, "step": 132460 }, { "epoch": 871.5131578947369, "grad_norm": 1.0901014804840088, "learning_rate": 0.0001, "loss": 0.0098, "step": 132470 }, { "epoch": 871.578947368421, "grad_norm": 1.1375715732574463, "learning_rate": 0.0001, "loss": 0.0101, "step": 132480 }, { "epoch": 871.6447368421053, "grad_norm": 1.0507901906967163, "learning_rate": 0.0001, "loss": 0.0094, "step": 132490 }, { "epoch": 871.7105263157895, "grad_norm": 0.819428563117981, "learning_rate": 0.0001, "loss": 0.0107, "step": 132500 }, { "epoch": 871.7763157894736, "grad_norm": 0.7174791693687439, "learning_rate": 0.0001, "loss": 0.0115, "step": 132510 }, { "epoch": 871.8421052631579, "grad_norm": 1.1212362051010132, "learning_rate": 0.0001, "loss": 0.0092, "step": 132520 }, { "epoch": 871.9078947368421, "grad_norm": 1.310887098312378, "learning_rate": 0.0001, "loss": 0.0087, "step": 132530 }, { "epoch": 871.9736842105264, "grad_norm": 0.9797291159629822, "learning_rate": 0.0001, "loss": 0.0092, "step": 132540 }, { "epoch": 872.0394736842105, "grad_norm": 1.042771816253662, "learning_rate": 0.0001, "loss": 0.0119, "step": 132550 }, { "epoch": 872.1052631578947, "grad_norm": 1.1746186017990112, "learning_rate": 0.0001, "loss": 0.008, "step": 132560 }, { "epoch": 872.171052631579, "grad_norm": 1.1553187370300293, "learning_rate": 0.0001, "loss": 0.0086, "step": 132570 }, { "epoch": 872.2368421052631, "grad_norm": 0.9846141338348389, "learning_rate": 0.0001, "loss": 0.0103, "step": 132580 }, { "epoch": 872.3026315789474, "grad_norm": 1.0329734086990356, "learning_rate": 0.0001, "loss": 0.0101, "step": 132590 }, { "epoch": 872.3684210526316, "grad_norm": 1.0318963527679443, "learning_rate": 0.0001, "loss": 0.0097, "step": 132600 }, { "epoch": 872.4342105263158, "grad_norm": 0.97999507188797, "learning_rate": 0.0001, "loss": 0.0098, "step": 132610 }, { "epoch": 872.5, "grad_norm": 0.8947683572769165, "learning_rate": 0.0001, "loss": 0.0077, "step": 132620 }, { "epoch": 872.5657894736842, "grad_norm": 0.7579332590103149, "learning_rate": 0.0001, "loss": 0.0107, "step": 132630 }, { "epoch": 872.6315789473684, "grad_norm": 0.8904950022697449, "learning_rate": 0.0001, "loss": 0.0099, "step": 132640 }, { "epoch": 872.6973684210526, "grad_norm": 1.2979966402053833, "learning_rate": 0.0001, "loss": 0.0107, "step": 132650 }, { "epoch": 872.7631578947369, "grad_norm": 0.7980130314826965, "learning_rate": 0.0001, "loss": 0.009, "step": 132660 }, { "epoch": 872.828947368421, "grad_norm": 0.6705465912818909, "learning_rate": 0.0001, "loss": 0.0114, "step": 132670 }, { "epoch": 872.8947368421053, "grad_norm": 0.5814407467842102, "learning_rate": 0.0001, "loss": 0.0098, "step": 132680 }, { "epoch": 872.9605263157895, "grad_norm": 0.9677609205245972, "learning_rate": 0.0001, "loss": 0.0107, "step": 132690 }, { "epoch": 873.0263157894736, "grad_norm": 0.7871490716934204, "learning_rate": 0.0001, "loss": 0.0105, "step": 132700 }, { "epoch": 873.0921052631579, "grad_norm": 1.0843896865844727, "learning_rate": 0.0001, "loss": 0.0102, "step": 132710 }, { "epoch": 873.1578947368421, "grad_norm": 1.4221380949020386, "learning_rate": 0.0001, "loss": 0.0105, "step": 132720 }, { "epoch": 873.2236842105264, "grad_norm": 0.7746054530143738, "learning_rate": 0.0001, "loss": 0.0084, "step": 132730 }, { "epoch": 873.2894736842105, "grad_norm": 1.0406986474990845, "learning_rate": 0.0001, "loss": 0.0095, "step": 132740 }, { "epoch": 873.3552631578947, "grad_norm": 1.3779292106628418, "learning_rate": 0.0001, "loss": 0.0094, "step": 132750 }, { "epoch": 873.421052631579, "grad_norm": 1.179019808769226, "learning_rate": 0.0001, "loss": 0.0093, "step": 132760 }, { "epoch": 873.4868421052631, "grad_norm": 0.9589442014694214, "learning_rate": 0.0001, "loss": 0.0087, "step": 132770 }, { "epoch": 873.5526315789474, "grad_norm": 0.8380019664764404, "learning_rate": 0.0001, "loss": 0.0087, "step": 132780 }, { "epoch": 873.6184210526316, "grad_norm": 1.0861836671829224, "learning_rate": 0.0001, "loss": 0.011, "step": 132790 }, { "epoch": 873.6842105263158, "grad_norm": 0.7795262932777405, "learning_rate": 0.0001, "loss": 0.011, "step": 132800 }, { "epoch": 873.75, "grad_norm": 1.4798517227172852, "learning_rate": 0.0001, "loss": 0.0125, "step": 132810 }, { "epoch": 873.8157894736842, "grad_norm": 0.8960368037223816, "learning_rate": 0.0001, "loss": 0.0079, "step": 132820 }, { "epoch": 873.8815789473684, "grad_norm": 1.1715494394302368, "learning_rate": 0.0001, "loss": 0.0121, "step": 132830 }, { "epoch": 873.9473684210526, "grad_norm": 1.0302726030349731, "learning_rate": 0.0001, "loss": 0.0085, "step": 132840 }, { "epoch": 874.0131578947369, "grad_norm": 0.8323149085044861, "learning_rate": 0.0001, "loss": 0.0088, "step": 132850 }, { "epoch": 874.078947368421, "grad_norm": 1.0214720964431763, "learning_rate": 0.0001, "loss": 0.0094, "step": 132860 }, { "epoch": 874.1447368421053, "grad_norm": 1.2060658931732178, "learning_rate": 0.0001, "loss": 0.0097, "step": 132870 }, { "epoch": 874.2105263157895, "grad_norm": 1.1269394159317017, "learning_rate": 0.0001, "loss": 0.0107, "step": 132880 }, { "epoch": 874.2763157894736, "grad_norm": 1.2563400268554688, "learning_rate": 0.0001, "loss": 0.0081, "step": 132890 }, { "epoch": 874.3421052631579, "grad_norm": 0.6155217885971069, "learning_rate": 0.0001, "loss": 0.0093, "step": 132900 }, { "epoch": 874.4078947368421, "grad_norm": 1.0993677377700806, "learning_rate": 0.0001, "loss": 0.0083, "step": 132910 }, { "epoch": 874.4736842105264, "grad_norm": 0.8634936809539795, "learning_rate": 0.0001, "loss": 0.0099, "step": 132920 }, { "epoch": 874.5394736842105, "grad_norm": 1.18904709815979, "learning_rate": 0.0001, "loss": 0.0112, "step": 132930 }, { "epoch": 874.6052631578947, "grad_norm": 0.8078858852386475, "learning_rate": 0.0001, "loss": 0.009, "step": 132940 }, { "epoch": 874.671052631579, "grad_norm": 0.8726778626441956, "learning_rate": 0.0001, "loss": 0.01, "step": 132950 }, { "epoch": 874.7368421052631, "grad_norm": 0.8983378410339355, "learning_rate": 0.0001, "loss": 0.0086, "step": 132960 }, { "epoch": 874.8026315789474, "grad_norm": 0.6301218271255493, "learning_rate": 0.0001, "loss": 0.009, "step": 132970 }, { "epoch": 874.8684210526316, "grad_norm": 1.143539547920227, "learning_rate": 0.0001, "loss": 0.0115, "step": 132980 }, { "epoch": 874.9342105263158, "grad_norm": 0.7233585119247437, "learning_rate": 0.0001, "loss": 0.0101, "step": 132990 }, { "epoch": 875.0, "grad_norm": 0.7568589448928833, "learning_rate": 0.0001, "loss": 0.0111, "step": 133000 }, { "epoch": 875.0657894736842, "grad_norm": 1.0015631914138794, "learning_rate": 0.0001, "loss": 0.011, "step": 133010 }, { "epoch": 875.1315789473684, "grad_norm": 1.3138691186904907, "learning_rate": 0.0001, "loss": 0.0094, "step": 133020 }, { "epoch": 875.1973684210526, "grad_norm": 1.1672554016113281, "learning_rate": 0.0001, "loss": 0.0094, "step": 133030 }, { "epoch": 875.2631578947369, "grad_norm": 1.2994216680526733, "learning_rate": 0.0001, "loss": 0.0104, "step": 133040 }, { "epoch": 875.328947368421, "grad_norm": 1.1386960744857788, "learning_rate": 0.0001, "loss": 0.0101, "step": 133050 }, { "epoch": 875.3947368421053, "grad_norm": 0.8103448152542114, "learning_rate": 0.0001, "loss": 0.0104, "step": 133060 }, { "epoch": 875.4605263157895, "grad_norm": 1.0586600303649902, "learning_rate": 0.0001, "loss": 0.0095, "step": 133070 }, { "epoch": 875.5263157894736, "grad_norm": 1.086573839187622, "learning_rate": 0.0001, "loss": 0.0107, "step": 133080 }, { "epoch": 875.5921052631579, "grad_norm": 0.9082050323486328, "learning_rate": 0.0001, "loss": 0.0091, "step": 133090 }, { "epoch": 875.6578947368421, "grad_norm": 1.2547775506973267, "learning_rate": 0.0001, "loss": 0.009, "step": 133100 }, { "epoch": 875.7236842105264, "grad_norm": 1.1525391340255737, "learning_rate": 0.0001, "loss": 0.0082, "step": 133110 }, { "epoch": 875.7894736842105, "grad_norm": 1.0197055339813232, "learning_rate": 0.0001, "loss": 0.0092, "step": 133120 }, { "epoch": 875.8552631578947, "grad_norm": 1.1045457124710083, "learning_rate": 0.0001, "loss": 0.0112, "step": 133130 }, { "epoch": 875.921052631579, "grad_norm": 1.19781494140625, "learning_rate": 0.0001, "loss": 0.0094, "step": 133140 }, { "epoch": 875.9868421052631, "grad_norm": 1.0859558582305908, "learning_rate": 0.0001, "loss": 0.011, "step": 133150 }, { "epoch": 876.0526315789474, "grad_norm": 1.278917670249939, "learning_rate": 0.0001, "loss": 0.0087, "step": 133160 }, { "epoch": 876.1184210526316, "grad_norm": 0.9689110517501831, "learning_rate": 0.0001, "loss": 0.0087, "step": 133170 }, { "epoch": 876.1842105263158, "grad_norm": 1.1780256032943726, "learning_rate": 0.0001, "loss": 0.0086, "step": 133180 }, { "epoch": 876.25, "grad_norm": 1.1999872922897339, "learning_rate": 0.0001, "loss": 0.0084, "step": 133190 }, { "epoch": 876.3157894736842, "grad_norm": 1.1459710597991943, "learning_rate": 0.0001, "loss": 0.0091, "step": 133200 }, { "epoch": 876.3815789473684, "grad_norm": 1.1914170980453491, "learning_rate": 0.0001, "loss": 0.0105, "step": 133210 }, { "epoch": 876.4473684210526, "grad_norm": 1.8968595266342163, "learning_rate": 0.0001, "loss": 0.01, "step": 133220 }, { "epoch": 876.5131578947369, "grad_norm": 1.641607642173767, "learning_rate": 0.0001, "loss": 0.0084, "step": 133230 }, { "epoch": 876.578947368421, "grad_norm": 1.1843949556350708, "learning_rate": 0.0001, "loss": 0.0115, "step": 133240 }, { "epoch": 876.6447368421053, "grad_norm": 1.2296756505966187, "learning_rate": 0.0001, "loss": 0.0096, "step": 133250 }, { "epoch": 876.7105263157895, "grad_norm": 1.4620548486709595, "learning_rate": 0.0001, "loss": 0.0101, "step": 133260 }, { "epoch": 876.7763157894736, "grad_norm": 1.4632091522216797, "learning_rate": 0.0001, "loss": 0.011, "step": 133270 }, { "epoch": 876.8421052631579, "grad_norm": 1.3262276649475098, "learning_rate": 0.0001, "loss": 0.0134, "step": 133280 }, { "epoch": 876.9078947368421, "grad_norm": 1.4855178594589233, "learning_rate": 0.0001, "loss": 0.0094, "step": 133290 }, { "epoch": 876.9736842105264, "grad_norm": 1.28376042842865, "learning_rate": 0.0001, "loss": 0.0087, "step": 133300 }, { "epoch": 877.0394736842105, "grad_norm": 1.2845184803009033, "learning_rate": 0.0001, "loss": 0.0101, "step": 133310 }, { "epoch": 877.1052631578947, "grad_norm": 1.154748797416687, "learning_rate": 0.0001, "loss": 0.0083, "step": 133320 }, { "epoch": 877.171052631579, "grad_norm": 1.3325797319412231, "learning_rate": 0.0001, "loss": 0.0099, "step": 133330 }, { "epoch": 877.2368421052631, "grad_norm": 1.2600542306900024, "learning_rate": 0.0001, "loss": 0.0102, "step": 133340 }, { "epoch": 877.3026315789474, "grad_norm": 1.4029486179351807, "learning_rate": 0.0001, "loss": 0.0119, "step": 133350 }, { "epoch": 877.3684210526316, "grad_norm": 1.3744686841964722, "learning_rate": 0.0001, "loss": 0.0093, "step": 133360 }, { "epoch": 877.4342105263158, "grad_norm": 1.225288987159729, "learning_rate": 0.0001, "loss": 0.0097, "step": 133370 }, { "epoch": 877.5, "grad_norm": 1.006150245666504, "learning_rate": 0.0001, "loss": 0.0119, "step": 133380 }, { "epoch": 877.5657894736842, "grad_norm": 0.9903148412704468, "learning_rate": 0.0001, "loss": 0.0116, "step": 133390 }, { "epoch": 877.6315789473684, "grad_norm": 1.0531034469604492, "learning_rate": 0.0001, "loss": 0.0104, "step": 133400 }, { "epoch": 877.6973684210526, "grad_norm": 1.0496103763580322, "learning_rate": 0.0001, "loss": 0.0114, "step": 133410 }, { "epoch": 877.7631578947369, "grad_norm": 1.3180263042449951, "learning_rate": 0.0001, "loss": 0.0098, "step": 133420 }, { "epoch": 877.828947368421, "grad_norm": 1.455904245376587, "learning_rate": 0.0001, "loss": 0.0096, "step": 133430 }, { "epoch": 877.8947368421053, "grad_norm": 0.9219629168510437, "learning_rate": 0.0001, "loss": 0.0102, "step": 133440 }, { "epoch": 877.9605263157895, "grad_norm": 1.1099201440811157, "learning_rate": 0.0001, "loss": 0.0105, "step": 133450 }, { "epoch": 878.0263157894736, "grad_norm": 1.0845720767974854, "learning_rate": 0.0001, "loss": 0.0108, "step": 133460 }, { "epoch": 878.0921052631579, "grad_norm": 1.3005256652832031, "learning_rate": 0.0001, "loss": 0.0112, "step": 133470 }, { "epoch": 878.1578947368421, "grad_norm": 1.2297483682632446, "learning_rate": 0.0001, "loss": 0.0102, "step": 133480 }, { "epoch": 878.2236842105264, "grad_norm": 1.2901902198791504, "learning_rate": 0.0001, "loss": 0.0112, "step": 133490 }, { "epoch": 878.2894736842105, "grad_norm": 1.3723936080932617, "learning_rate": 0.0001, "loss": 0.0122, "step": 133500 }, { "epoch": 878.3552631578947, "grad_norm": 1.08962082862854, "learning_rate": 0.0001, "loss": 0.0126, "step": 133510 }, { "epoch": 878.421052631579, "grad_norm": 1.2790974378585815, "learning_rate": 0.0001, "loss": 0.0103, "step": 133520 }, { "epoch": 878.4868421052631, "grad_norm": 1.027013897895813, "learning_rate": 0.0001, "loss": 0.0084, "step": 133530 }, { "epoch": 878.5526315789474, "grad_norm": 1.4130576848983765, "learning_rate": 0.0001, "loss": 0.009, "step": 133540 }, { "epoch": 878.6184210526316, "grad_norm": 1.0892243385314941, "learning_rate": 0.0001, "loss": 0.0086, "step": 133550 }, { "epoch": 878.6842105263158, "grad_norm": 1.1556276082992554, "learning_rate": 0.0001, "loss": 0.0103, "step": 133560 }, { "epoch": 878.75, "grad_norm": 1.008480191230774, "learning_rate": 0.0001, "loss": 0.0102, "step": 133570 }, { "epoch": 878.8157894736842, "grad_norm": 0.7436794638633728, "learning_rate": 0.0001, "loss": 0.0099, "step": 133580 }, { "epoch": 878.8815789473684, "grad_norm": 1.1126219034194946, "learning_rate": 0.0001, "loss": 0.0117, "step": 133590 }, { "epoch": 878.9473684210526, "grad_norm": 1.165494441986084, "learning_rate": 0.0001, "loss": 0.0094, "step": 133600 }, { "epoch": 879.0131578947369, "grad_norm": 1.1165974140167236, "learning_rate": 0.0001, "loss": 0.013, "step": 133610 }, { "epoch": 879.078947368421, "grad_norm": 1.1761646270751953, "learning_rate": 0.0001, "loss": 0.009, "step": 133620 }, { "epoch": 879.1447368421053, "grad_norm": 1.1715446710586548, "learning_rate": 0.0001, "loss": 0.0107, "step": 133630 }, { "epoch": 879.2105263157895, "grad_norm": 1.2596334218978882, "learning_rate": 0.0001, "loss": 0.011, "step": 133640 }, { "epoch": 879.2763157894736, "grad_norm": 0.70237797498703, "learning_rate": 0.0001, "loss": 0.0107, "step": 133650 }, { "epoch": 879.3421052631579, "grad_norm": 0.857401967048645, "learning_rate": 0.0001, "loss": 0.0092, "step": 133660 }, { "epoch": 879.4078947368421, "grad_norm": 0.7992585301399231, "learning_rate": 0.0001, "loss": 0.0085, "step": 133670 }, { "epoch": 879.4736842105264, "grad_norm": 0.7546584606170654, "learning_rate": 0.0001, "loss": 0.009, "step": 133680 }, { "epoch": 879.5394736842105, "grad_norm": 1.1993327140808105, "learning_rate": 0.0001, "loss": 0.0097, "step": 133690 }, { "epoch": 879.6052631578947, "grad_norm": 1.0572268962860107, "learning_rate": 0.0001, "loss": 0.0088, "step": 133700 }, { "epoch": 879.671052631579, "grad_norm": 1.2813950777053833, "learning_rate": 0.0001, "loss": 0.0104, "step": 133710 }, { "epoch": 879.7368421052631, "grad_norm": 0.7247679233551025, "learning_rate": 0.0001, "loss": 0.0103, "step": 133720 }, { "epoch": 879.8026315789474, "grad_norm": 0.8819660544395447, "learning_rate": 0.0001, "loss": 0.0121, "step": 133730 }, { "epoch": 879.8684210526316, "grad_norm": 0.9946263432502747, "learning_rate": 0.0001, "loss": 0.011, "step": 133740 }, { "epoch": 879.9342105263158, "grad_norm": 0.8821104168891907, "learning_rate": 0.0001, "loss": 0.0108, "step": 133750 }, { "epoch": 880.0, "grad_norm": 0.6893178820610046, "learning_rate": 0.0001, "loss": 0.0108, "step": 133760 }, { "epoch": 880.0657894736842, "grad_norm": 1.0600444078445435, "learning_rate": 0.0001, "loss": 0.0105, "step": 133770 }, { "epoch": 880.1315789473684, "grad_norm": 1.0986602306365967, "learning_rate": 0.0001, "loss": 0.0116, "step": 133780 }, { "epoch": 880.1973684210526, "grad_norm": 0.8387561440467834, "learning_rate": 0.0001, "loss": 0.01, "step": 133790 }, { "epoch": 880.2631578947369, "grad_norm": 0.8584687113761902, "learning_rate": 0.0001, "loss": 0.0103, "step": 133800 }, { "epoch": 880.328947368421, "grad_norm": 0.9088717699050903, "learning_rate": 0.0001, "loss": 0.0095, "step": 133810 }, { "epoch": 880.3947368421053, "grad_norm": 0.9743188619613647, "learning_rate": 0.0001, "loss": 0.0095, "step": 133820 }, { "epoch": 880.4605263157895, "grad_norm": 0.6925018429756165, "learning_rate": 0.0001, "loss": 0.0104, "step": 133830 }, { "epoch": 880.5263157894736, "grad_norm": 0.9171350002288818, "learning_rate": 0.0001, "loss": 0.0105, "step": 133840 }, { "epoch": 880.5921052631579, "grad_norm": 0.8944483399391174, "learning_rate": 0.0001, "loss": 0.0106, "step": 133850 }, { "epoch": 880.6578947368421, "grad_norm": 0.859816312789917, "learning_rate": 0.0001, "loss": 0.0121, "step": 133860 }, { "epoch": 880.7236842105264, "grad_norm": 0.8540825247764587, "learning_rate": 0.0001, "loss": 0.0102, "step": 133870 }, { "epoch": 880.7894736842105, "grad_norm": 0.8680028319358826, "learning_rate": 0.0001, "loss": 0.0098, "step": 133880 }, { "epoch": 880.8552631578947, "grad_norm": 0.6797283291816711, "learning_rate": 0.0001, "loss": 0.0086, "step": 133890 }, { "epoch": 880.921052631579, "grad_norm": 1.095505952835083, "learning_rate": 0.0001, "loss": 0.0086, "step": 133900 }, { "epoch": 880.9868421052631, "grad_norm": 0.8483147025108337, "learning_rate": 0.0001, "loss": 0.0107, "step": 133910 }, { "epoch": 881.0526315789474, "grad_norm": 1.0879533290863037, "learning_rate": 0.0001, "loss": 0.01, "step": 133920 }, { "epoch": 881.1184210526316, "grad_norm": 1.1705760955810547, "learning_rate": 0.0001, "loss": 0.01, "step": 133930 }, { "epoch": 881.1842105263158, "grad_norm": 1.3736196756362915, "learning_rate": 0.0001, "loss": 0.0106, "step": 133940 }, { "epoch": 881.25, "grad_norm": 1.5451818704605103, "learning_rate": 0.0001, "loss": 0.011, "step": 133950 }, { "epoch": 881.3157894736842, "grad_norm": 1.1037559509277344, "learning_rate": 0.0001, "loss": 0.0083, "step": 133960 }, { "epoch": 881.3815789473684, "grad_norm": 1.054043173789978, "learning_rate": 0.0001, "loss": 0.0105, "step": 133970 }, { "epoch": 881.4473684210526, "grad_norm": 1.1496354341506958, "learning_rate": 0.0001, "loss": 0.0109, "step": 133980 }, { "epoch": 881.5131578947369, "grad_norm": 0.895317792892456, "learning_rate": 0.0001, "loss": 0.0098, "step": 133990 }, { "epoch": 881.578947368421, "grad_norm": 0.9729178547859192, "learning_rate": 0.0001, "loss": 0.0091, "step": 134000 }, { "epoch": 881.6447368421053, "grad_norm": 0.8765199184417725, "learning_rate": 0.0001, "loss": 0.0105, "step": 134010 }, { "epoch": 881.7105263157895, "grad_norm": 1.216593861579895, "learning_rate": 0.0001, "loss": 0.0085, "step": 134020 }, { "epoch": 881.7763157894736, "grad_norm": 0.9411874413490295, "learning_rate": 0.0001, "loss": 0.01, "step": 134030 }, { "epoch": 881.8421052631579, "grad_norm": 1.250806450843811, "learning_rate": 0.0001, "loss": 0.0093, "step": 134040 }, { "epoch": 881.9078947368421, "grad_norm": 1.06573486328125, "learning_rate": 0.0001, "loss": 0.0101, "step": 134050 }, { "epoch": 881.9736842105264, "grad_norm": 1.1607677936553955, "learning_rate": 0.0001, "loss": 0.0122, "step": 134060 }, { "epoch": 882.0394736842105, "grad_norm": 1.0911805629730225, "learning_rate": 0.0001, "loss": 0.0099, "step": 134070 }, { "epoch": 882.1052631578947, "grad_norm": 0.9352615475654602, "learning_rate": 0.0001, "loss": 0.013, "step": 134080 }, { "epoch": 882.171052631579, "grad_norm": 1.1933131217956543, "learning_rate": 0.0001, "loss": 0.0092, "step": 134090 }, { "epoch": 882.2368421052631, "grad_norm": 1.0852186679840088, "learning_rate": 0.0001, "loss": 0.0103, "step": 134100 }, { "epoch": 882.3026315789474, "grad_norm": 1.5493214130401611, "learning_rate": 0.0001, "loss": 0.0108, "step": 134110 }, { "epoch": 882.3684210526316, "grad_norm": 1.0032248497009277, "learning_rate": 0.0001, "loss": 0.0102, "step": 134120 }, { "epoch": 882.4342105263158, "grad_norm": 0.861934244632721, "learning_rate": 0.0001, "loss": 0.0114, "step": 134130 }, { "epoch": 882.5, "grad_norm": 1.3053967952728271, "learning_rate": 0.0001, "loss": 0.011, "step": 134140 }, { "epoch": 882.5657894736842, "grad_norm": 1.0918253660202026, "learning_rate": 0.0001, "loss": 0.0119, "step": 134150 }, { "epoch": 882.6315789473684, "grad_norm": 1.0111234188079834, "learning_rate": 0.0001, "loss": 0.0092, "step": 134160 }, { "epoch": 882.6973684210526, "grad_norm": 1.3519738912582397, "learning_rate": 0.0001, "loss": 0.0087, "step": 134170 }, { "epoch": 882.7631578947369, "grad_norm": 1.0138260126113892, "learning_rate": 0.0001, "loss": 0.0101, "step": 134180 }, { "epoch": 882.828947368421, "grad_norm": 1.0171897411346436, "learning_rate": 0.0001, "loss": 0.0098, "step": 134190 }, { "epoch": 882.8947368421053, "grad_norm": 0.7954549789428711, "learning_rate": 0.0001, "loss": 0.0129, "step": 134200 }, { "epoch": 882.9605263157895, "grad_norm": 0.9768205285072327, "learning_rate": 0.0001, "loss": 0.011, "step": 134210 }, { "epoch": 883.0263157894736, "grad_norm": 1.317543625831604, "learning_rate": 0.0001, "loss": 0.0107, "step": 134220 }, { "epoch": 883.0921052631579, "grad_norm": 1.5799720287322998, "learning_rate": 0.0001, "loss": 0.0108, "step": 134230 }, { "epoch": 883.1578947368421, "grad_norm": 1.1066539287567139, "learning_rate": 0.0001, "loss": 0.01, "step": 134240 }, { "epoch": 883.2236842105264, "grad_norm": 0.8365485668182373, "learning_rate": 0.0001, "loss": 0.0121, "step": 134250 }, { "epoch": 883.2894736842105, "grad_norm": 1.3858592510223389, "learning_rate": 0.0001, "loss": 0.0097, "step": 134260 }, { "epoch": 883.3552631578947, "grad_norm": 0.8898154497146606, "learning_rate": 0.0001, "loss": 0.0112, "step": 134270 }, { "epoch": 883.421052631579, "grad_norm": 0.8104990124702454, "learning_rate": 0.0001, "loss": 0.0103, "step": 134280 }, { "epoch": 883.4868421052631, "grad_norm": 1.3037996292114258, "learning_rate": 0.0001, "loss": 0.0109, "step": 134290 }, { "epoch": 883.5526315789474, "grad_norm": 1.1328245401382446, "learning_rate": 0.0001, "loss": 0.0113, "step": 134300 }, { "epoch": 883.6184210526316, "grad_norm": 1.0135562419891357, "learning_rate": 0.0001, "loss": 0.0117, "step": 134310 }, { "epoch": 883.6842105263158, "grad_norm": 0.8955759406089783, "learning_rate": 0.0001, "loss": 0.0105, "step": 134320 }, { "epoch": 883.75, "grad_norm": 0.7221687436103821, "learning_rate": 0.0001, "loss": 0.0104, "step": 134330 }, { "epoch": 883.8157894736842, "grad_norm": 0.8941370844841003, "learning_rate": 0.0001, "loss": 0.0132, "step": 134340 }, { "epoch": 883.8815789473684, "grad_norm": 1.185426115989685, "learning_rate": 0.0001, "loss": 0.0102, "step": 134350 }, { "epoch": 883.9473684210526, "grad_norm": 1.2640587091445923, "learning_rate": 0.0001, "loss": 0.0085, "step": 134360 }, { "epoch": 884.0131578947369, "grad_norm": 1.3449172973632812, "learning_rate": 0.0001, "loss": 0.0081, "step": 134370 }, { "epoch": 884.078947368421, "grad_norm": 1.0787019729614258, "learning_rate": 0.0001, "loss": 0.0118, "step": 134380 }, { "epoch": 884.1447368421053, "grad_norm": 1.3704967498779297, "learning_rate": 0.0001, "loss": 0.011, "step": 134390 }, { "epoch": 884.2105263157895, "grad_norm": 1.1371135711669922, "learning_rate": 0.0001, "loss": 0.0094, "step": 134400 }, { "epoch": 884.2763157894736, "grad_norm": 1.1591137647628784, "learning_rate": 0.0001, "loss": 0.0097, "step": 134410 }, { "epoch": 884.3421052631579, "grad_norm": 1.0808539390563965, "learning_rate": 0.0001, "loss": 0.0108, "step": 134420 }, { "epoch": 884.4078947368421, "grad_norm": 0.9412270784378052, "learning_rate": 0.0001, "loss": 0.0096, "step": 134430 }, { "epoch": 884.4736842105264, "grad_norm": 0.8797613978385925, "learning_rate": 0.0001, "loss": 0.0114, "step": 134440 }, { "epoch": 884.5394736842105, "grad_norm": 1.1469194889068604, "learning_rate": 0.0001, "loss": 0.0101, "step": 134450 }, { "epoch": 884.6052631578947, "grad_norm": 1.2293258905410767, "learning_rate": 0.0001, "loss": 0.0109, "step": 134460 }, { "epoch": 884.671052631579, "grad_norm": 0.9875232577323914, "learning_rate": 0.0001, "loss": 0.0107, "step": 134470 }, { "epoch": 884.7368421052631, "grad_norm": 0.9115191698074341, "learning_rate": 0.0001, "loss": 0.0089, "step": 134480 }, { "epoch": 884.8026315789474, "grad_norm": 1.4155162572860718, "learning_rate": 0.0001, "loss": 0.0083, "step": 134490 }, { "epoch": 884.8684210526316, "grad_norm": 1.0972641706466675, "learning_rate": 0.0001, "loss": 0.0086, "step": 134500 }, { "epoch": 884.9342105263158, "grad_norm": 1.0389680862426758, "learning_rate": 0.0001, "loss": 0.012, "step": 134510 }, { "epoch": 885.0, "grad_norm": 1.0352895259857178, "learning_rate": 0.0001, "loss": 0.0092, "step": 134520 }, { "epoch": 885.0657894736842, "grad_norm": 0.6757728457450867, "learning_rate": 0.0001, "loss": 0.0097, "step": 134530 }, { "epoch": 885.1315789473684, "grad_norm": 1.3461501598358154, "learning_rate": 0.0001, "loss": 0.0109, "step": 134540 }, { "epoch": 885.1973684210526, "grad_norm": 0.9551567435264587, "learning_rate": 0.0001, "loss": 0.0102, "step": 134550 }, { "epoch": 885.2631578947369, "grad_norm": 1.348388910293579, "learning_rate": 0.0001, "loss": 0.0112, "step": 134560 }, { "epoch": 885.328947368421, "grad_norm": 1.1484230756759644, "learning_rate": 0.0001, "loss": 0.0083, "step": 134570 }, { "epoch": 885.3947368421053, "grad_norm": 1.0391778945922852, "learning_rate": 0.0001, "loss": 0.0122, "step": 134580 }, { "epoch": 885.4605263157895, "grad_norm": 0.9618095755577087, "learning_rate": 0.0001, "loss": 0.0124, "step": 134590 }, { "epoch": 885.5263157894736, "grad_norm": 0.9640491604804993, "learning_rate": 0.0001, "loss": 0.011, "step": 134600 }, { "epoch": 885.5921052631579, "grad_norm": 1.0603336095809937, "learning_rate": 0.0001, "loss": 0.0111, "step": 134610 }, { "epoch": 885.6578947368421, "grad_norm": 0.8449630737304688, "learning_rate": 0.0001, "loss": 0.0112, "step": 134620 }, { "epoch": 885.7236842105264, "grad_norm": 0.8843861222267151, "learning_rate": 0.0001, "loss": 0.0118, "step": 134630 }, { "epoch": 885.7894736842105, "grad_norm": 1.1363091468811035, "learning_rate": 0.0001, "loss": 0.0126, "step": 134640 }, { "epoch": 885.8552631578947, "grad_norm": 1.238927960395813, "learning_rate": 0.0001, "loss": 0.0124, "step": 134650 }, { "epoch": 885.921052631579, "grad_norm": 1.1133671998977661, "learning_rate": 0.0001, "loss": 0.013, "step": 134660 }, { "epoch": 885.9868421052631, "grad_norm": 1.23615300655365, "learning_rate": 0.0001, "loss": 0.0117, "step": 134670 }, { "epoch": 886.0526315789474, "grad_norm": 1.0656203031539917, "learning_rate": 0.0001, "loss": 0.0128, "step": 134680 }, { "epoch": 886.1184210526316, "grad_norm": 1.0231038331985474, "learning_rate": 0.0001, "loss": 0.011, "step": 134690 }, { "epoch": 886.1842105263158, "grad_norm": 1.6583019495010376, "learning_rate": 0.0001, "loss": 0.0113, "step": 134700 }, { "epoch": 886.25, "grad_norm": 1.5105453729629517, "learning_rate": 0.0001, "loss": 0.0104, "step": 134710 }, { "epoch": 886.3157894736842, "grad_norm": 1.3457199335098267, "learning_rate": 0.0001, "loss": 0.0116, "step": 134720 }, { "epoch": 886.3815789473684, "grad_norm": 1.0339312553405762, "learning_rate": 0.0001, "loss": 0.0113, "step": 134730 }, { "epoch": 886.4473684210526, "grad_norm": 0.9141952395439148, "learning_rate": 0.0001, "loss": 0.0125, "step": 134740 }, { "epoch": 886.5131578947369, "grad_norm": 1.3014699220657349, "learning_rate": 0.0001, "loss": 0.0098, "step": 134750 }, { "epoch": 886.578947368421, "grad_norm": 0.8726618885993958, "learning_rate": 0.0001, "loss": 0.0123, "step": 134760 }, { "epoch": 886.6447368421053, "grad_norm": 1.2617624998092651, "learning_rate": 0.0001, "loss": 0.0092, "step": 134770 }, { "epoch": 886.7105263157895, "grad_norm": 0.9956019520759583, "learning_rate": 0.0001, "loss": 0.0122, "step": 134780 }, { "epoch": 886.7763157894736, "grad_norm": 1.3004852533340454, "learning_rate": 0.0001, "loss": 0.0111, "step": 134790 }, { "epoch": 886.8421052631579, "grad_norm": 1.200190782546997, "learning_rate": 0.0001, "loss": 0.0115, "step": 134800 }, { "epoch": 886.9078947368421, "grad_norm": 1.1957974433898926, "learning_rate": 0.0001, "loss": 0.0103, "step": 134810 }, { "epoch": 886.9736842105264, "grad_norm": 1.3454686403274536, "learning_rate": 0.0001, "loss": 0.0084, "step": 134820 }, { "epoch": 887.0394736842105, "grad_norm": 0.9715621471405029, "learning_rate": 0.0001, "loss": 0.0106, "step": 134830 }, { "epoch": 887.1052631578947, "grad_norm": 1.2138034105300903, "learning_rate": 0.0001, "loss": 0.0103, "step": 134840 }, { "epoch": 887.171052631579, "grad_norm": 1.0837821960449219, "learning_rate": 0.0001, "loss": 0.0093, "step": 134850 }, { "epoch": 887.2368421052631, "grad_norm": 0.9954550266265869, "learning_rate": 0.0001, "loss": 0.0104, "step": 134860 }, { "epoch": 887.3026315789474, "grad_norm": 1.319032907485962, "learning_rate": 0.0001, "loss": 0.0115, "step": 134870 }, { "epoch": 887.3684210526316, "grad_norm": 1.0620183944702148, "learning_rate": 0.0001, "loss": 0.0106, "step": 134880 }, { "epoch": 887.4342105263158, "grad_norm": 1.06571626663208, "learning_rate": 0.0001, "loss": 0.009, "step": 134890 }, { "epoch": 887.5, "grad_norm": 1.1749192476272583, "learning_rate": 0.0001, "loss": 0.0085, "step": 134900 }, { "epoch": 887.5657894736842, "grad_norm": 1.475423812866211, "learning_rate": 0.0001, "loss": 0.0099, "step": 134910 }, { "epoch": 887.6315789473684, "grad_norm": 1.3862122297286987, "learning_rate": 0.0001, "loss": 0.0112, "step": 134920 }, { "epoch": 887.6973684210526, "grad_norm": 1.3569873571395874, "learning_rate": 0.0001, "loss": 0.0104, "step": 134930 }, { "epoch": 887.7631578947369, "grad_norm": 1.0890201330184937, "learning_rate": 0.0001, "loss": 0.01, "step": 134940 }, { "epoch": 887.828947368421, "grad_norm": 1.0563850402832031, "learning_rate": 0.0001, "loss": 0.009, "step": 134950 }, { "epoch": 887.8947368421053, "grad_norm": 1.4732695817947388, "learning_rate": 0.0001, "loss": 0.0111, "step": 134960 }, { "epoch": 887.9605263157895, "grad_norm": 0.7517303824424744, "learning_rate": 0.0001, "loss": 0.0108, "step": 134970 }, { "epoch": 888.0263157894736, "grad_norm": 0.9856480956077576, "learning_rate": 0.0001, "loss": 0.0099, "step": 134980 }, { "epoch": 888.0921052631579, "grad_norm": 0.9564664363861084, "learning_rate": 0.0001, "loss": 0.01, "step": 134990 }, { "epoch": 888.1578947368421, "grad_norm": 0.8272193670272827, "learning_rate": 0.0001, "loss": 0.0107, "step": 135000 }, { "epoch": 888.2236842105264, "grad_norm": 1.3324556350708008, "learning_rate": 0.0001, "loss": 0.0108, "step": 135010 }, { "epoch": 888.2894736842105, "grad_norm": 1.2476513385772705, "learning_rate": 0.0001, "loss": 0.0101, "step": 135020 }, { "epoch": 888.3552631578947, "grad_norm": 1.0319244861602783, "learning_rate": 0.0001, "loss": 0.0099, "step": 135030 }, { "epoch": 888.421052631579, "grad_norm": 0.6704224944114685, "learning_rate": 0.0001, "loss": 0.0098, "step": 135040 }, { "epoch": 888.4868421052631, "grad_norm": 0.8636611700057983, "learning_rate": 0.0001, "loss": 0.0091, "step": 135050 }, { "epoch": 888.5526315789474, "grad_norm": 1.0018370151519775, "learning_rate": 0.0001, "loss": 0.0099, "step": 135060 }, { "epoch": 888.6184210526316, "grad_norm": 1.3011027574539185, "learning_rate": 0.0001, "loss": 0.0087, "step": 135070 }, { "epoch": 888.6842105263158, "grad_norm": 0.8524222373962402, "learning_rate": 0.0001, "loss": 0.0107, "step": 135080 }, { "epoch": 888.75, "grad_norm": 1.3557988405227661, "learning_rate": 0.0001, "loss": 0.0091, "step": 135090 }, { "epoch": 888.8157894736842, "grad_norm": 0.79920494556427, "learning_rate": 0.0001, "loss": 0.0113, "step": 135100 }, { "epoch": 888.8815789473684, "grad_norm": 0.8106809258460999, "learning_rate": 0.0001, "loss": 0.0109, "step": 135110 }, { "epoch": 888.9473684210526, "grad_norm": 0.9653734564781189, "learning_rate": 0.0001, "loss": 0.0095, "step": 135120 }, { "epoch": 889.0131578947369, "grad_norm": 1.4216874837875366, "learning_rate": 0.0001, "loss": 0.0114, "step": 135130 }, { "epoch": 889.078947368421, "grad_norm": 1.148635983467102, "learning_rate": 0.0001, "loss": 0.0103, "step": 135140 }, { "epoch": 889.1447368421053, "grad_norm": 1.2149169445037842, "learning_rate": 0.0001, "loss": 0.0112, "step": 135150 }, { "epoch": 889.2105263157895, "grad_norm": 1.0104612112045288, "learning_rate": 0.0001, "loss": 0.0106, "step": 135160 }, { "epoch": 889.2763157894736, "grad_norm": 0.984620988368988, "learning_rate": 0.0001, "loss": 0.0097, "step": 135170 }, { "epoch": 889.3421052631579, "grad_norm": 1.2821751832962036, "learning_rate": 0.0001, "loss": 0.0129, "step": 135180 }, { "epoch": 889.4078947368421, "grad_norm": 1.0690802335739136, "learning_rate": 0.0001, "loss": 0.0086, "step": 135190 }, { "epoch": 889.4736842105264, "grad_norm": 1.02140474319458, "learning_rate": 0.0001, "loss": 0.0103, "step": 135200 }, { "epoch": 889.5394736842105, "grad_norm": 1.0375328063964844, "learning_rate": 0.0001, "loss": 0.0089, "step": 135210 }, { "epoch": 889.6052631578947, "grad_norm": 0.9176158308982849, "learning_rate": 0.0001, "loss": 0.0087, "step": 135220 }, { "epoch": 889.671052631579, "grad_norm": 0.9485794305801392, "learning_rate": 0.0001, "loss": 0.0104, "step": 135230 }, { "epoch": 889.7368421052631, "grad_norm": 0.9150336980819702, "learning_rate": 0.0001, "loss": 0.0108, "step": 135240 }, { "epoch": 889.8026315789474, "grad_norm": 0.9094645380973816, "learning_rate": 0.0001, "loss": 0.0111, "step": 135250 }, { "epoch": 889.8684210526316, "grad_norm": 0.6861184239387512, "learning_rate": 0.0001, "loss": 0.0092, "step": 135260 }, { "epoch": 889.9342105263158, "grad_norm": 0.9334322810173035, "learning_rate": 0.0001, "loss": 0.0112, "step": 135270 }, { "epoch": 890.0, "grad_norm": 1.377838134765625, "learning_rate": 0.0001, "loss": 0.0085, "step": 135280 }, { "epoch": 890.0657894736842, "grad_norm": 1.133252739906311, "learning_rate": 0.0001, "loss": 0.0081, "step": 135290 }, { "epoch": 890.1315789473684, "grad_norm": 1.1310118436813354, "learning_rate": 0.0001, "loss": 0.0075, "step": 135300 }, { "epoch": 890.1973684210526, "grad_norm": 0.7168204188346863, "learning_rate": 0.0001, "loss": 0.0078, "step": 135310 }, { "epoch": 890.2631578947369, "grad_norm": 0.8758770227432251, "learning_rate": 0.0001, "loss": 0.0115, "step": 135320 }, { "epoch": 890.328947368421, "grad_norm": 1.1734673976898193, "learning_rate": 0.0001, "loss": 0.0106, "step": 135330 }, { "epoch": 890.3947368421053, "grad_norm": 1.19082772731781, "learning_rate": 0.0001, "loss": 0.0099, "step": 135340 }, { "epoch": 890.4605263157895, "grad_norm": 0.8569977283477783, "learning_rate": 0.0001, "loss": 0.0096, "step": 135350 }, { "epoch": 890.5263157894736, "grad_norm": 1.1632040739059448, "learning_rate": 0.0001, "loss": 0.0104, "step": 135360 }, { "epoch": 890.5921052631579, "grad_norm": 1.2661454677581787, "learning_rate": 0.0001, "loss": 0.0115, "step": 135370 }, { "epoch": 890.6578947368421, "grad_norm": 0.9945191144943237, "learning_rate": 0.0001, "loss": 0.009, "step": 135380 }, { "epoch": 890.7236842105264, "grad_norm": 1.382722020149231, "learning_rate": 0.0001, "loss": 0.0107, "step": 135390 }, { "epoch": 890.7894736842105, "grad_norm": 1.317020297050476, "learning_rate": 0.0001, "loss": 0.0118, "step": 135400 }, { "epoch": 890.8552631578947, "grad_norm": 1.1985100507736206, "learning_rate": 0.0001, "loss": 0.0107, "step": 135410 }, { "epoch": 890.921052631579, "grad_norm": 0.7880237698554993, "learning_rate": 0.0001, "loss": 0.0118, "step": 135420 }, { "epoch": 890.9868421052631, "grad_norm": 1.2420897483825684, "learning_rate": 0.0001, "loss": 0.0094, "step": 135430 }, { "epoch": 891.0526315789474, "grad_norm": 1.693311333656311, "learning_rate": 0.0001, "loss": 0.0096, "step": 135440 }, { "epoch": 891.1184210526316, "grad_norm": 1.6864817142486572, "learning_rate": 0.0001, "loss": 0.0107, "step": 135450 }, { "epoch": 891.1842105263158, "grad_norm": 1.1330416202545166, "learning_rate": 0.0001, "loss": 0.0101, "step": 135460 }, { "epoch": 891.25, "grad_norm": 1.0997774600982666, "learning_rate": 0.0001, "loss": 0.011, "step": 135470 }, { "epoch": 891.3157894736842, "grad_norm": 0.924976646900177, "learning_rate": 0.0001, "loss": 0.0093, "step": 135480 }, { "epoch": 891.3815789473684, "grad_norm": 1.182472825050354, "learning_rate": 0.0001, "loss": 0.0105, "step": 135490 }, { "epoch": 891.4473684210526, "grad_norm": 1.1451120376586914, "learning_rate": 0.0001, "loss": 0.0092, "step": 135500 }, { "epoch": 891.5131578947369, "grad_norm": 0.876240611076355, "learning_rate": 0.0001, "loss": 0.0107, "step": 135510 }, { "epoch": 891.578947368421, "grad_norm": 1.0297558307647705, "learning_rate": 0.0001, "loss": 0.0094, "step": 135520 }, { "epoch": 891.6447368421053, "grad_norm": 0.6283572912216187, "learning_rate": 0.0001, "loss": 0.0102, "step": 135530 }, { "epoch": 891.7105263157895, "grad_norm": 1.2140889167785645, "learning_rate": 0.0001, "loss": 0.0084, "step": 135540 }, { "epoch": 891.7763157894736, "grad_norm": 1.0670727491378784, "learning_rate": 0.0001, "loss": 0.0093, "step": 135550 }, { "epoch": 891.8421052631579, "grad_norm": 1.0269482135772705, "learning_rate": 0.0001, "loss": 0.0112, "step": 135560 }, { "epoch": 891.9078947368421, "grad_norm": 1.1649383306503296, "learning_rate": 0.0001, "loss": 0.0087, "step": 135570 }, { "epoch": 891.9736842105264, "grad_norm": 1.085970163345337, "learning_rate": 0.0001, "loss": 0.0093, "step": 135580 }, { "epoch": 892.0394736842105, "grad_norm": 1.1776764392852783, "learning_rate": 0.0001, "loss": 0.0111, "step": 135590 }, { "epoch": 892.1052631578947, "grad_norm": 1.2964081764221191, "learning_rate": 0.0001, "loss": 0.0095, "step": 135600 }, { "epoch": 892.171052631579, "grad_norm": 1.1058318614959717, "learning_rate": 0.0001, "loss": 0.0095, "step": 135610 }, { "epoch": 892.2368421052631, "grad_norm": 0.9314119815826416, "learning_rate": 0.0001, "loss": 0.0098, "step": 135620 }, { "epoch": 892.3026315789474, "grad_norm": 1.4460657835006714, "learning_rate": 0.0001, "loss": 0.0114, "step": 135630 }, { "epoch": 892.3684210526316, "grad_norm": 0.9084354043006897, "learning_rate": 0.0001, "loss": 0.0089, "step": 135640 }, { "epoch": 892.4342105263158, "grad_norm": 1.1502026319503784, "learning_rate": 0.0001, "loss": 0.0103, "step": 135650 }, { "epoch": 892.5, "grad_norm": 1.2194117307662964, "learning_rate": 0.0001, "loss": 0.0087, "step": 135660 }, { "epoch": 892.5657894736842, "grad_norm": 0.9721207618713379, "learning_rate": 0.0001, "loss": 0.0126, "step": 135670 }, { "epoch": 892.6315789473684, "grad_norm": 0.7545431852340698, "learning_rate": 0.0001, "loss": 0.0096, "step": 135680 }, { "epoch": 892.6973684210526, "grad_norm": 0.8242561221122742, "learning_rate": 0.0001, "loss": 0.0101, "step": 135690 }, { "epoch": 892.7631578947369, "grad_norm": 0.8406139612197876, "learning_rate": 0.0001, "loss": 0.0082, "step": 135700 }, { "epoch": 892.828947368421, "grad_norm": 1.0956982374191284, "learning_rate": 0.0001, "loss": 0.0104, "step": 135710 }, { "epoch": 892.8947368421053, "grad_norm": 0.8513756990432739, "learning_rate": 0.0001, "loss": 0.0097, "step": 135720 }, { "epoch": 892.9605263157895, "grad_norm": 1.1040880680084229, "learning_rate": 0.0001, "loss": 0.0093, "step": 135730 }, { "epoch": 893.0263157894736, "grad_norm": 1.076196551322937, "learning_rate": 0.0001, "loss": 0.0093, "step": 135740 }, { "epoch": 893.0921052631579, "grad_norm": 1.1001917123794556, "learning_rate": 0.0001, "loss": 0.0107, "step": 135750 }, { "epoch": 893.1578947368421, "grad_norm": 1.5378665924072266, "learning_rate": 0.0001, "loss": 0.0092, "step": 135760 }, { "epoch": 893.2236842105264, "grad_norm": 1.166962742805481, "learning_rate": 0.0001, "loss": 0.0111, "step": 135770 }, { "epoch": 893.2894736842105, "grad_norm": 0.90530925989151, "learning_rate": 0.0001, "loss": 0.0096, "step": 135780 }, { "epoch": 893.3552631578947, "grad_norm": 1.342469334602356, "learning_rate": 0.0001, "loss": 0.01, "step": 135790 }, { "epoch": 893.421052631579, "grad_norm": 1.1305315494537354, "learning_rate": 0.0001, "loss": 0.0085, "step": 135800 }, { "epoch": 893.4868421052631, "grad_norm": 0.8744671940803528, "learning_rate": 0.0001, "loss": 0.0077, "step": 135810 }, { "epoch": 893.5526315789474, "grad_norm": 1.1060692071914673, "learning_rate": 0.0001, "loss": 0.0101, "step": 135820 }, { "epoch": 893.6184210526316, "grad_norm": 1.1510794162750244, "learning_rate": 0.0001, "loss": 0.0094, "step": 135830 }, { "epoch": 893.6842105263158, "grad_norm": 1.0159302949905396, "learning_rate": 0.0001, "loss": 0.0113, "step": 135840 }, { "epoch": 893.75, "grad_norm": 1.0674599409103394, "learning_rate": 0.0001, "loss": 0.0094, "step": 135850 }, { "epoch": 893.8157894736842, "grad_norm": 0.7770407199859619, "learning_rate": 0.0001, "loss": 0.0087, "step": 135860 }, { "epoch": 893.8815789473684, "grad_norm": 1.1732828617095947, "learning_rate": 0.0001, "loss": 0.0087, "step": 135870 }, { "epoch": 893.9473684210526, "grad_norm": 0.732951283454895, "learning_rate": 0.0001, "loss": 0.0118, "step": 135880 }, { "epoch": 894.0131578947369, "grad_norm": 0.8434010148048401, "learning_rate": 0.0001, "loss": 0.0105, "step": 135890 }, { "epoch": 894.078947368421, "grad_norm": 0.9561564922332764, "learning_rate": 0.0001, "loss": 0.01, "step": 135900 }, { "epoch": 894.1447368421053, "grad_norm": 1.377158761024475, "learning_rate": 0.0001, "loss": 0.013, "step": 135910 }, { "epoch": 894.2105263157895, "grad_norm": 1.1105989217758179, "learning_rate": 0.0001, "loss": 0.0096, "step": 135920 }, { "epoch": 894.2763157894736, "grad_norm": 1.2619684934616089, "learning_rate": 0.0001, "loss": 0.0108, "step": 135930 }, { "epoch": 894.3421052631579, "grad_norm": 1.026212453842163, "learning_rate": 0.0001, "loss": 0.0083, "step": 135940 }, { "epoch": 894.4078947368421, "grad_norm": 0.7990017533302307, "learning_rate": 0.0001, "loss": 0.0093, "step": 135950 }, { "epoch": 894.4736842105264, "grad_norm": 0.9422027468681335, "learning_rate": 0.0001, "loss": 0.009, "step": 135960 }, { "epoch": 894.5394736842105, "grad_norm": 0.796430230140686, "learning_rate": 0.0001, "loss": 0.0097, "step": 135970 }, { "epoch": 894.6052631578947, "grad_norm": 1.2357088327407837, "learning_rate": 0.0001, "loss": 0.0099, "step": 135980 }, { "epoch": 894.671052631579, "grad_norm": 1.2409306764602661, "learning_rate": 0.0001, "loss": 0.0096, "step": 135990 }, { "epoch": 894.7368421052631, "grad_norm": 1.2816016674041748, "learning_rate": 0.0001, "loss": 0.0106, "step": 136000 }, { "epoch": 894.8026315789474, "grad_norm": 0.6768025755882263, "learning_rate": 0.0001, "loss": 0.0089, "step": 136010 }, { "epoch": 894.8684210526316, "grad_norm": 0.9706000685691833, "learning_rate": 0.0001, "loss": 0.0096, "step": 136020 }, { "epoch": 894.9342105263158, "grad_norm": 0.9608473777770996, "learning_rate": 0.0001, "loss": 0.0084, "step": 136030 }, { "epoch": 895.0, "grad_norm": 1.143330693244934, "learning_rate": 0.0001, "loss": 0.0095, "step": 136040 }, { "epoch": 895.0657894736842, "grad_norm": 0.8923169374465942, "learning_rate": 0.0001, "loss": 0.0109, "step": 136050 }, { "epoch": 895.1315789473684, "grad_norm": 0.7669290900230408, "learning_rate": 0.0001, "loss": 0.0107, "step": 136060 }, { "epoch": 895.1973684210526, "grad_norm": 0.9958640933036804, "learning_rate": 0.0001, "loss": 0.0101, "step": 136070 }, { "epoch": 895.2631578947369, "grad_norm": 1.1290862560272217, "learning_rate": 0.0001, "loss": 0.0101, "step": 136080 }, { "epoch": 895.328947368421, "grad_norm": 0.9855304956436157, "learning_rate": 0.0001, "loss": 0.0106, "step": 136090 }, { "epoch": 895.3947368421053, "grad_norm": 0.9755940437316895, "learning_rate": 0.0001, "loss": 0.0091, "step": 136100 }, { "epoch": 895.4605263157895, "grad_norm": 0.8567885160446167, "learning_rate": 0.0001, "loss": 0.0091, "step": 136110 }, { "epoch": 895.5263157894736, "grad_norm": 1.0460125207901, "learning_rate": 0.0001, "loss": 0.0083, "step": 136120 }, { "epoch": 895.5921052631579, "grad_norm": 1.2655810117721558, "learning_rate": 0.0001, "loss": 0.0102, "step": 136130 }, { "epoch": 895.6578947368421, "grad_norm": 0.951598048210144, "learning_rate": 0.0001, "loss": 0.0093, "step": 136140 }, { "epoch": 895.7236842105264, "grad_norm": 1.1518741846084595, "learning_rate": 0.0001, "loss": 0.0107, "step": 136150 }, { "epoch": 895.7894736842105, "grad_norm": 0.9149439334869385, "learning_rate": 0.0001, "loss": 0.0102, "step": 136160 }, { "epoch": 895.8552631578947, "grad_norm": 1.0804107189178467, "learning_rate": 0.0001, "loss": 0.0091, "step": 136170 }, { "epoch": 895.921052631579, "grad_norm": 1.2547401189804077, "learning_rate": 0.0001, "loss": 0.0087, "step": 136180 }, { "epoch": 895.9868421052631, "grad_norm": 1.0589017868041992, "learning_rate": 0.0001, "loss": 0.0112, "step": 136190 }, { "epoch": 896.0526315789474, "grad_norm": 1.040658950805664, "learning_rate": 0.0001, "loss": 0.011, "step": 136200 }, { "epoch": 896.1184210526316, "grad_norm": 1.1037824153900146, "learning_rate": 0.0001, "loss": 0.0086, "step": 136210 }, { "epoch": 896.1842105263158, "grad_norm": 1.4946725368499756, "learning_rate": 0.0001, "loss": 0.0104, "step": 136220 }, { "epoch": 896.25, "grad_norm": 1.519361972808838, "learning_rate": 0.0001, "loss": 0.0089, "step": 136230 }, { "epoch": 896.3157894736842, "grad_norm": 1.1634271144866943, "learning_rate": 0.0001, "loss": 0.0084, "step": 136240 }, { "epoch": 896.3815789473684, "grad_norm": 1.3984472751617432, "learning_rate": 0.0001, "loss": 0.0089, "step": 136250 }, { "epoch": 896.4473684210526, "grad_norm": 1.7315332889556885, "learning_rate": 0.0001, "loss": 0.0116, "step": 136260 }, { "epoch": 896.5131578947369, "grad_norm": 1.5922656059265137, "learning_rate": 0.0001, "loss": 0.0085, "step": 136270 }, { "epoch": 896.578947368421, "grad_norm": 1.4069749116897583, "learning_rate": 0.0001, "loss": 0.0118, "step": 136280 }, { "epoch": 896.6447368421053, "grad_norm": 1.244490146636963, "learning_rate": 0.0001, "loss": 0.0098, "step": 136290 }, { "epoch": 896.7105263157895, "grad_norm": 1.1110283136367798, "learning_rate": 0.0001, "loss": 0.008, "step": 136300 }, { "epoch": 896.7763157894736, "grad_norm": 1.0167648792266846, "learning_rate": 0.0001, "loss": 0.0096, "step": 136310 }, { "epoch": 896.8421052631579, "grad_norm": 1.137952208518982, "learning_rate": 0.0001, "loss": 0.0112, "step": 136320 }, { "epoch": 896.9078947368421, "grad_norm": 1.0673314332962036, "learning_rate": 0.0001, "loss": 0.0086, "step": 136330 }, { "epoch": 896.9736842105264, "grad_norm": 1.012425184249878, "learning_rate": 0.0001, "loss": 0.0081, "step": 136340 }, { "epoch": 897.0394736842105, "grad_norm": 0.7594325542449951, "learning_rate": 0.0001, "loss": 0.0085, "step": 136350 }, { "epoch": 897.1052631578947, "grad_norm": 1.1178792715072632, "learning_rate": 0.0001, "loss": 0.0077, "step": 136360 }, { "epoch": 897.171052631579, "grad_norm": 1.0943617820739746, "learning_rate": 0.0001, "loss": 0.0092, "step": 136370 }, { "epoch": 897.2368421052631, "grad_norm": 0.9920550584793091, "learning_rate": 0.0001, "loss": 0.0086, "step": 136380 }, { "epoch": 897.3026315789474, "grad_norm": 1.0962131023406982, "learning_rate": 0.0001, "loss": 0.0101, "step": 136390 }, { "epoch": 897.3684210526316, "grad_norm": 0.8344330787658691, "learning_rate": 0.0001, "loss": 0.0091, "step": 136400 }, { "epoch": 897.4342105263158, "grad_norm": 1.1164144277572632, "learning_rate": 0.0001, "loss": 0.0115, "step": 136410 }, { "epoch": 897.5, "grad_norm": 1.07270348072052, "learning_rate": 0.0001, "loss": 0.0095, "step": 136420 }, { "epoch": 897.5657894736842, "grad_norm": 0.843373715877533, "learning_rate": 0.0001, "loss": 0.0099, "step": 136430 }, { "epoch": 897.6315789473684, "grad_norm": 0.8556021451950073, "learning_rate": 0.0001, "loss": 0.0089, "step": 136440 }, { "epoch": 897.6973684210526, "grad_norm": 0.8964880704879761, "learning_rate": 0.0001, "loss": 0.0112, "step": 136450 }, { "epoch": 897.7631578947369, "grad_norm": 1.153839111328125, "learning_rate": 0.0001, "loss": 0.0106, "step": 136460 }, { "epoch": 897.828947368421, "grad_norm": 1.0658332109451294, "learning_rate": 0.0001, "loss": 0.0095, "step": 136470 }, { "epoch": 897.8947368421053, "grad_norm": 1.2250584363937378, "learning_rate": 0.0001, "loss": 0.0091, "step": 136480 }, { "epoch": 897.9605263157895, "grad_norm": 1.1773885488510132, "learning_rate": 0.0001, "loss": 0.0088, "step": 136490 }, { "epoch": 898.0263157894736, "grad_norm": 1.2498626708984375, "learning_rate": 0.0001, "loss": 0.0085, "step": 136500 }, { "epoch": 898.0921052631579, "grad_norm": 0.980217695236206, "learning_rate": 0.0001, "loss": 0.0093, "step": 136510 }, { "epoch": 898.1578947368421, "grad_norm": 0.8119854927062988, "learning_rate": 0.0001, "loss": 0.0092, "step": 136520 }, { "epoch": 898.2236842105264, "grad_norm": 0.8097248077392578, "learning_rate": 0.0001, "loss": 0.0095, "step": 136530 }, { "epoch": 898.2894736842105, "grad_norm": 1.080000638961792, "learning_rate": 0.0001, "loss": 0.0108, "step": 136540 }, { "epoch": 898.3552631578947, "grad_norm": 1.0661876201629639, "learning_rate": 0.0001, "loss": 0.0095, "step": 136550 }, { "epoch": 898.421052631579, "grad_norm": 1.1550641059875488, "learning_rate": 0.0001, "loss": 0.0107, "step": 136560 }, { "epoch": 898.4868421052631, "grad_norm": 1.1951998472213745, "learning_rate": 0.0001, "loss": 0.0081, "step": 136570 }, { "epoch": 898.5526315789474, "grad_norm": 0.6485775709152222, "learning_rate": 0.0001, "loss": 0.0107, "step": 136580 }, { "epoch": 898.6184210526316, "grad_norm": 0.8973692059516907, "learning_rate": 0.0001, "loss": 0.0089, "step": 136590 }, { "epoch": 898.6842105263158, "grad_norm": 0.7221608757972717, "learning_rate": 0.0001, "loss": 0.0097, "step": 136600 }, { "epoch": 898.75, "grad_norm": 0.7160447835922241, "learning_rate": 0.0001, "loss": 0.0108, "step": 136610 }, { "epoch": 898.8157894736842, "grad_norm": 0.9838132262229919, "learning_rate": 0.0001, "loss": 0.0096, "step": 136620 }, { "epoch": 898.8815789473684, "grad_norm": 0.8849008679389954, "learning_rate": 0.0001, "loss": 0.0115, "step": 136630 }, { "epoch": 898.9473684210526, "grad_norm": 1.1569770574569702, "learning_rate": 0.0001, "loss": 0.0077, "step": 136640 }, { "epoch": 899.0131578947369, "grad_norm": 0.594029426574707, "learning_rate": 0.0001, "loss": 0.0102, "step": 136650 }, { "epoch": 899.078947368421, "grad_norm": 0.8497085571289062, "learning_rate": 0.0001, "loss": 0.0105, "step": 136660 }, { "epoch": 899.1447368421053, "grad_norm": 0.8578495383262634, "learning_rate": 0.0001, "loss": 0.0096, "step": 136670 }, { "epoch": 899.2105263157895, "grad_norm": 1.1420329809188843, "learning_rate": 0.0001, "loss": 0.0088, "step": 136680 }, { "epoch": 899.2763157894736, "grad_norm": 1.0156878232955933, "learning_rate": 0.0001, "loss": 0.0097, "step": 136690 }, { "epoch": 899.3421052631579, "grad_norm": 0.8283445239067078, "learning_rate": 0.0001, "loss": 0.0087, "step": 136700 }, { "epoch": 899.4078947368421, "grad_norm": 0.8237717747688293, "learning_rate": 0.0001, "loss": 0.0109, "step": 136710 }, { "epoch": 899.4736842105264, "grad_norm": 0.9158399105072021, "learning_rate": 0.0001, "loss": 0.0107, "step": 136720 }, { "epoch": 899.5394736842105, "grad_norm": 0.850222110748291, "learning_rate": 0.0001, "loss": 0.009, "step": 136730 }, { "epoch": 899.6052631578947, "grad_norm": 1.1023249626159668, "learning_rate": 0.0001, "loss": 0.0094, "step": 136740 }, { "epoch": 899.671052631579, "grad_norm": 0.9188584685325623, "learning_rate": 0.0001, "loss": 0.0094, "step": 136750 }, { "epoch": 899.7368421052631, "grad_norm": 1.0491944551467896, "learning_rate": 0.0001, "loss": 0.0105, "step": 136760 }, { "epoch": 899.8026315789474, "grad_norm": 0.7473717927932739, "learning_rate": 0.0001, "loss": 0.0095, "step": 136770 }, { "epoch": 899.8684210526316, "grad_norm": 0.8892393112182617, "learning_rate": 0.0001, "loss": 0.0091, "step": 136780 }, { "epoch": 899.9342105263158, "grad_norm": 1.0245060920715332, "learning_rate": 0.0001, "loss": 0.0093, "step": 136790 }, { "epoch": 900.0, "grad_norm": 1.3008759021759033, "learning_rate": 0.0001, "loss": 0.0096, "step": 136800 }, { "epoch": 900.0657894736842, "grad_norm": 1.4540356397628784, "learning_rate": 0.0001, "loss": 0.0095, "step": 136810 }, { "epoch": 900.1315789473684, "grad_norm": 1.342063069343567, "learning_rate": 0.0001, "loss": 0.0083, "step": 136820 }, { "epoch": 900.1973684210526, "grad_norm": 1.1454083919525146, "learning_rate": 0.0001, "loss": 0.009, "step": 136830 }, { "epoch": 900.2631578947369, "grad_norm": 1.6022448539733887, "learning_rate": 0.0001, "loss": 0.0098, "step": 136840 }, { "epoch": 900.328947368421, "grad_norm": 0.9221236109733582, "learning_rate": 0.0001, "loss": 0.01, "step": 136850 }, { "epoch": 900.3947368421053, "grad_norm": 1.3476216793060303, "learning_rate": 0.0001, "loss": 0.0078, "step": 136860 }, { "epoch": 900.4605263157895, "grad_norm": 0.8298095464706421, "learning_rate": 0.0001, "loss": 0.0102, "step": 136870 }, { "epoch": 900.5263157894736, "grad_norm": 0.7791982293128967, "learning_rate": 0.0001, "loss": 0.0104, "step": 136880 }, { "epoch": 900.5921052631579, "grad_norm": 1.068069577217102, "learning_rate": 0.0001, "loss": 0.0087, "step": 136890 }, { "epoch": 900.6578947368421, "grad_norm": 0.7247030138969421, "learning_rate": 0.0001, "loss": 0.0094, "step": 136900 }, { "epoch": 900.7236842105264, "grad_norm": 0.8441376686096191, "learning_rate": 0.0001, "loss": 0.0115, "step": 136910 }, { "epoch": 900.7894736842105, "grad_norm": 1.0622183084487915, "learning_rate": 0.0001, "loss": 0.0108, "step": 136920 }, { "epoch": 900.8552631578947, "grad_norm": 0.9633458256721497, "learning_rate": 0.0001, "loss": 0.0081, "step": 136930 }, { "epoch": 900.921052631579, "grad_norm": 1.0135974884033203, "learning_rate": 0.0001, "loss": 0.0109, "step": 136940 }, { "epoch": 900.9868421052631, "grad_norm": 0.9505342841148376, "learning_rate": 0.0001, "loss": 0.0101, "step": 136950 }, { "epoch": 901.0526315789474, "grad_norm": 0.7464510202407837, "learning_rate": 0.0001, "loss": 0.0093, "step": 136960 }, { "epoch": 901.1184210526316, "grad_norm": 1.1256731748580933, "learning_rate": 0.0001, "loss": 0.0087, "step": 136970 }, { "epoch": 901.1842105263158, "grad_norm": 1.1023762226104736, "learning_rate": 0.0001, "loss": 0.0081, "step": 136980 }, { "epoch": 901.25, "grad_norm": 1.3419615030288696, "learning_rate": 0.0001, "loss": 0.0101, "step": 136990 }, { "epoch": 901.3157894736842, "grad_norm": 1.1724894046783447, "learning_rate": 0.0001, "loss": 0.0091, "step": 137000 }, { "epoch": 901.3815789473684, "grad_norm": 1.1471381187438965, "learning_rate": 0.0001, "loss": 0.0114, "step": 137010 }, { "epoch": 901.4473684210526, "grad_norm": 1.5692983865737915, "learning_rate": 0.0001, "loss": 0.0129, "step": 137020 }, { "epoch": 901.5131578947369, "grad_norm": 1.1495898962020874, "learning_rate": 0.0001, "loss": 0.0092, "step": 137030 }, { "epoch": 901.578947368421, "grad_norm": 0.7561367154121399, "learning_rate": 0.0001, "loss": 0.0105, "step": 137040 }, { "epoch": 901.6447368421053, "grad_norm": 0.8060752749443054, "learning_rate": 0.0001, "loss": 0.0092, "step": 137050 }, { "epoch": 901.7105263157895, "grad_norm": 1.0309655666351318, "learning_rate": 0.0001, "loss": 0.009, "step": 137060 }, { "epoch": 901.7763157894736, "grad_norm": 1.3897480964660645, "learning_rate": 0.0001, "loss": 0.0093, "step": 137070 }, { "epoch": 901.8421052631579, "grad_norm": 1.1561673879623413, "learning_rate": 0.0001, "loss": 0.0121, "step": 137080 }, { "epoch": 901.9078947368421, "grad_norm": 1.1159383058547974, "learning_rate": 0.0001, "loss": 0.0087, "step": 137090 }, { "epoch": 901.9736842105264, "grad_norm": 1.2140202522277832, "learning_rate": 0.0001, "loss": 0.0078, "step": 137100 }, { "epoch": 902.0394736842105, "grad_norm": 1.0120538473129272, "learning_rate": 0.0001, "loss": 0.0092, "step": 137110 }, { "epoch": 902.1052631578947, "grad_norm": 1.3522586822509766, "learning_rate": 0.0001, "loss": 0.009, "step": 137120 }, { "epoch": 902.171052631579, "grad_norm": 1.1707468032836914, "learning_rate": 0.0001, "loss": 0.0086, "step": 137130 }, { "epoch": 902.2368421052631, "grad_norm": 1.209934949874878, "learning_rate": 0.0001, "loss": 0.0089, "step": 137140 }, { "epoch": 902.3026315789474, "grad_norm": 0.8782662153244019, "learning_rate": 0.0001, "loss": 0.0083, "step": 137150 }, { "epoch": 902.3684210526316, "grad_norm": 0.9009442329406738, "learning_rate": 0.0001, "loss": 0.0099, "step": 137160 }, { "epoch": 902.4342105263158, "grad_norm": 1.024381399154663, "learning_rate": 0.0001, "loss": 0.0085, "step": 137170 }, { "epoch": 902.5, "grad_norm": 1.0990251302719116, "learning_rate": 0.0001, "loss": 0.0096, "step": 137180 }, { "epoch": 902.5657894736842, "grad_norm": 1.1666136980056763, "learning_rate": 0.0001, "loss": 0.0115, "step": 137190 }, { "epoch": 902.6315789473684, "grad_norm": 1.1644891500473022, "learning_rate": 0.0001, "loss": 0.0103, "step": 137200 }, { "epoch": 902.6973684210526, "grad_norm": 1.3089399337768555, "learning_rate": 0.0001, "loss": 0.0089, "step": 137210 }, { "epoch": 902.7631578947369, "grad_norm": 1.2203880548477173, "learning_rate": 0.0001, "loss": 0.0091, "step": 137220 }, { "epoch": 902.828947368421, "grad_norm": 1.0612380504608154, "learning_rate": 0.0001, "loss": 0.0121, "step": 137230 }, { "epoch": 902.8947368421053, "grad_norm": 1.1838760375976562, "learning_rate": 0.0001, "loss": 0.0099, "step": 137240 }, { "epoch": 902.9605263157895, "grad_norm": 1.0632514953613281, "learning_rate": 0.0001, "loss": 0.0097, "step": 137250 }, { "epoch": 903.0263157894736, "grad_norm": 0.9610679745674133, "learning_rate": 0.0001, "loss": 0.0103, "step": 137260 }, { "epoch": 903.0921052631579, "grad_norm": 0.670998215675354, "learning_rate": 0.0001, "loss": 0.0103, "step": 137270 }, { "epoch": 903.1578947368421, "grad_norm": 0.9140186905860901, "learning_rate": 0.0001, "loss": 0.0093, "step": 137280 }, { "epoch": 903.2236842105264, "grad_norm": 1.182522177696228, "learning_rate": 0.0001, "loss": 0.0117, "step": 137290 }, { "epoch": 903.2894736842105, "grad_norm": 0.9858769774436951, "learning_rate": 0.0001, "loss": 0.0086, "step": 137300 }, { "epoch": 903.3552631578947, "grad_norm": 1.1477785110473633, "learning_rate": 0.0001, "loss": 0.0088, "step": 137310 }, { "epoch": 903.421052631579, "grad_norm": 1.2154635190963745, "learning_rate": 0.0001, "loss": 0.0107, "step": 137320 }, { "epoch": 903.4868421052631, "grad_norm": 0.9853901863098145, "learning_rate": 0.0001, "loss": 0.0083, "step": 137330 }, { "epoch": 903.5526315789474, "grad_norm": 1.2945221662521362, "learning_rate": 0.0001, "loss": 0.0092, "step": 137340 }, { "epoch": 903.6184210526316, "grad_norm": 1.0276622772216797, "learning_rate": 0.0001, "loss": 0.0093, "step": 137350 }, { "epoch": 903.6842105263158, "grad_norm": 0.8371639251708984, "learning_rate": 0.0001, "loss": 0.0086, "step": 137360 }, { "epoch": 903.75, "grad_norm": 1.3738861083984375, "learning_rate": 0.0001, "loss": 0.0109, "step": 137370 }, { "epoch": 903.8157894736842, "grad_norm": 0.9806889295578003, "learning_rate": 0.0001, "loss": 0.0097, "step": 137380 }, { "epoch": 903.8815789473684, "grad_norm": 1.3452116250991821, "learning_rate": 0.0001, "loss": 0.0101, "step": 137390 }, { "epoch": 903.9473684210526, "grad_norm": 1.2465362548828125, "learning_rate": 0.0001, "loss": 0.009, "step": 137400 }, { "epoch": 904.0131578947369, "grad_norm": 0.8776252269744873, "learning_rate": 0.0001, "loss": 0.0082, "step": 137410 }, { "epoch": 904.078947368421, "grad_norm": 0.8679521083831787, "learning_rate": 0.0001, "loss": 0.0093, "step": 137420 }, { "epoch": 904.1447368421053, "grad_norm": 1.0201027393341064, "learning_rate": 0.0001, "loss": 0.01, "step": 137430 }, { "epoch": 904.2105263157895, "grad_norm": 1.2104623317718506, "learning_rate": 0.0001, "loss": 0.0078, "step": 137440 }, { "epoch": 904.2763157894736, "grad_norm": 0.9057734608650208, "learning_rate": 0.0001, "loss": 0.0078, "step": 137450 }, { "epoch": 904.3421052631579, "grad_norm": 1.0167741775512695, "learning_rate": 0.0001, "loss": 0.0094, "step": 137460 }, { "epoch": 904.4078947368421, "grad_norm": 0.5943556427955627, "learning_rate": 0.0001, "loss": 0.01, "step": 137470 }, { "epoch": 904.4736842105264, "grad_norm": 0.8920695781707764, "learning_rate": 0.0001, "loss": 0.0113, "step": 137480 }, { "epoch": 904.5394736842105, "grad_norm": 0.9045788645744324, "learning_rate": 0.0001, "loss": 0.0117, "step": 137490 }, { "epoch": 904.6052631578947, "grad_norm": 0.9521625638008118, "learning_rate": 0.0001, "loss": 0.0107, "step": 137500 }, { "epoch": 904.671052631579, "grad_norm": 0.8317375779151917, "learning_rate": 0.0001, "loss": 0.0103, "step": 137510 }, { "epoch": 904.7368421052631, "grad_norm": 0.6917269229888916, "learning_rate": 0.0001, "loss": 0.0099, "step": 137520 }, { "epoch": 904.8026315789474, "grad_norm": 1.1361898183822632, "learning_rate": 0.0001, "loss": 0.0084, "step": 137530 }, { "epoch": 904.8684210526316, "grad_norm": 1.2201398611068726, "learning_rate": 0.0001, "loss": 0.011, "step": 137540 }, { "epoch": 904.9342105263158, "grad_norm": 0.8375370502471924, "learning_rate": 0.0001, "loss": 0.0108, "step": 137550 }, { "epoch": 905.0, "grad_norm": 1.1140497922897339, "learning_rate": 0.0001, "loss": 0.0087, "step": 137560 }, { "epoch": 905.0657894736842, "grad_norm": 1.0850721597671509, "learning_rate": 0.0001, "loss": 0.0107, "step": 137570 }, { "epoch": 905.1315789473684, "grad_norm": 0.9781196117401123, "learning_rate": 0.0001, "loss": 0.01, "step": 137580 }, { "epoch": 905.1973684210526, "grad_norm": 1.1050325632095337, "learning_rate": 0.0001, "loss": 0.0083, "step": 137590 }, { "epoch": 905.2631578947369, "grad_norm": 0.8729236721992493, "learning_rate": 0.0001, "loss": 0.0099, "step": 137600 }, { "epoch": 905.328947368421, "grad_norm": 1.088930368423462, "learning_rate": 0.0001, "loss": 0.0097, "step": 137610 }, { "epoch": 905.3947368421053, "grad_norm": 1.292374849319458, "learning_rate": 0.0001, "loss": 0.0104, "step": 137620 }, { "epoch": 905.4605263157895, "grad_norm": 0.9945433735847473, "learning_rate": 0.0001, "loss": 0.0108, "step": 137630 }, { "epoch": 905.5263157894736, "grad_norm": 1.1098923683166504, "learning_rate": 0.0001, "loss": 0.0111, "step": 137640 }, { "epoch": 905.5921052631579, "grad_norm": 1.15111243724823, "learning_rate": 0.0001, "loss": 0.0084, "step": 137650 }, { "epoch": 905.6578947368421, "grad_norm": 0.8712033629417419, "learning_rate": 0.0001, "loss": 0.0093, "step": 137660 }, { "epoch": 905.7236842105264, "grad_norm": 1.0952752828598022, "learning_rate": 0.0001, "loss": 0.009, "step": 137670 }, { "epoch": 905.7894736842105, "grad_norm": 1.2315001487731934, "learning_rate": 0.0001, "loss": 0.01, "step": 137680 }, { "epoch": 905.8552631578947, "grad_norm": 1.5495014190673828, "learning_rate": 0.0001, "loss": 0.0101, "step": 137690 }, { "epoch": 905.921052631579, "grad_norm": 1.7285585403442383, "learning_rate": 0.0001, "loss": 0.0094, "step": 137700 }, { "epoch": 905.9868421052631, "grad_norm": 1.7616963386535645, "learning_rate": 0.0001, "loss": 0.0109, "step": 137710 }, { "epoch": 906.0526315789474, "grad_norm": 1.4196006059646606, "learning_rate": 0.0001, "loss": 0.011, "step": 137720 }, { "epoch": 906.1184210526316, "grad_norm": 1.3745293617248535, "learning_rate": 0.0001, "loss": 0.0113, "step": 137730 }, { "epoch": 906.1842105263158, "grad_norm": 0.8986107707023621, "learning_rate": 0.0001, "loss": 0.0091, "step": 137740 }, { "epoch": 906.25, "grad_norm": 0.9261276721954346, "learning_rate": 0.0001, "loss": 0.01, "step": 137750 }, { "epoch": 906.3157894736842, "grad_norm": 1.2320727109909058, "learning_rate": 0.0001, "loss": 0.011, "step": 137760 }, { "epoch": 906.3815789473684, "grad_norm": 1.3749173879623413, "learning_rate": 0.0001, "loss": 0.0083, "step": 137770 }, { "epoch": 906.4473684210526, "grad_norm": 1.2523115873336792, "learning_rate": 0.0001, "loss": 0.0101, "step": 137780 }, { "epoch": 906.5131578947369, "grad_norm": 0.9561618566513062, "learning_rate": 0.0001, "loss": 0.0092, "step": 137790 }, { "epoch": 906.578947368421, "grad_norm": 1.0065858364105225, "learning_rate": 0.0001, "loss": 0.0084, "step": 137800 }, { "epoch": 906.6447368421053, "grad_norm": 1.0665801763534546, "learning_rate": 0.0001, "loss": 0.0092, "step": 137810 }, { "epoch": 906.7105263157895, "grad_norm": 1.0746973752975464, "learning_rate": 0.0001, "loss": 0.0109, "step": 137820 }, { "epoch": 906.7763157894736, "grad_norm": 0.6593524217605591, "learning_rate": 0.0001, "loss": 0.0086, "step": 137830 }, { "epoch": 906.8421052631579, "grad_norm": 0.9232041239738464, "learning_rate": 0.0001, "loss": 0.0089, "step": 137840 }, { "epoch": 906.9078947368421, "grad_norm": 0.9710672497749329, "learning_rate": 0.0001, "loss": 0.0091, "step": 137850 }, { "epoch": 906.9736842105264, "grad_norm": 1.034181833267212, "learning_rate": 0.0001, "loss": 0.0094, "step": 137860 }, { "epoch": 907.0394736842105, "grad_norm": 0.5571140646934509, "learning_rate": 0.0001, "loss": 0.0092, "step": 137870 }, { "epoch": 907.1052631578947, "grad_norm": 1.2709097862243652, "learning_rate": 0.0001, "loss": 0.0085, "step": 137880 }, { "epoch": 907.171052631579, "grad_norm": 1.4115272760391235, "learning_rate": 0.0001, "loss": 0.0111, "step": 137890 }, { "epoch": 907.2368421052631, "grad_norm": 1.275320291519165, "learning_rate": 0.0001, "loss": 0.01, "step": 137900 }, { "epoch": 907.3026315789474, "grad_norm": 0.980765163898468, "learning_rate": 0.0001, "loss": 0.0082, "step": 137910 }, { "epoch": 907.3684210526316, "grad_norm": 0.9128990173339844, "learning_rate": 0.0001, "loss": 0.0082, "step": 137920 }, { "epoch": 907.4342105263158, "grad_norm": 0.8334782719612122, "learning_rate": 0.0001, "loss": 0.0085, "step": 137930 }, { "epoch": 907.5, "grad_norm": 0.7317301630973816, "learning_rate": 0.0001, "loss": 0.0084, "step": 137940 }, { "epoch": 907.5657894736842, "grad_norm": 1.2653545141220093, "learning_rate": 0.0001, "loss": 0.0085, "step": 137950 }, { "epoch": 907.6315789473684, "grad_norm": 1.2031755447387695, "learning_rate": 0.0001, "loss": 0.0109, "step": 137960 }, { "epoch": 907.6973684210526, "grad_norm": 0.6481014490127563, "learning_rate": 0.0001, "loss": 0.0096, "step": 137970 }, { "epoch": 907.7631578947369, "grad_norm": 1.1213470697402954, "learning_rate": 0.0001, "loss": 0.0107, "step": 137980 }, { "epoch": 907.828947368421, "grad_norm": 1.4939583539962769, "learning_rate": 0.0001, "loss": 0.0121, "step": 137990 }, { "epoch": 907.8947368421053, "grad_norm": 1.7831441164016724, "learning_rate": 0.0001, "loss": 0.011, "step": 138000 }, { "epoch": 907.9605263157895, "grad_norm": 1.7211501598358154, "learning_rate": 0.0001, "loss": 0.0082, "step": 138010 }, { "epoch": 908.0263157894736, "grad_norm": 1.5099759101867676, "learning_rate": 0.0001, "loss": 0.0137, "step": 138020 }, { "epoch": 908.0921052631579, "grad_norm": 1.137043833732605, "learning_rate": 0.0001, "loss": 0.0091, "step": 138030 }, { "epoch": 908.1578947368421, "grad_norm": 1.2676162719726562, "learning_rate": 0.0001, "loss": 0.0105, "step": 138040 }, { "epoch": 908.2236842105264, "grad_norm": 0.994552731513977, "learning_rate": 0.0001, "loss": 0.0093, "step": 138050 }, { "epoch": 908.2894736842105, "grad_norm": 1.0138901472091675, "learning_rate": 0.0001, "loss": 0.009, "step": 138060 }, { "epoch": 908.3552631578947, "grad_norm": 0.9943720698356628, "learning_rate": 0.0001, "loss": 0.01, "step": 138070 }, { "epoch": 908.421052631579, "grad_norm": 1.1211268901824951, "learning_rate": 0.0001, "loss": 0.009, "step": 138080 }, { "epoch": 908.4868421052631, "grad_norm": 1.2332216501235962, "learning_rate": 0.0001, "loss": 0.0103, "step": 138090 }, { "epoch": 908.5526315789474, "grad_norm": 1.1603976488113403, "learning_rate": 0.0001, "loss": 0.008, "step": 138100 }, { "epoch": 908.6184210526316, "grad_norm": 0.789871871471405, "learning_rate": 0.0001, "loss": 0.0098, "step": 138110 }, { "epoch": 908.6842105263158, "grad_norm": 1.0970319509506226, "learning_rate": 0.0001, "loss": 0.0095, "step": 138120 }, { "epoch": 908.75, "grad_norm": 1.0479586124420166, "learning_rate": 0.0001, "loss": 0.0083, "step": 138130 }, { "epoch": 908.8157894736842, "grad_norm": 0.932201623916626, "learning_rate": 0.0001, "loss": 0.0079, "step": 138140 }, { "epoch": 908.8815789473684, "grad_norm": 0.9041379690170288, "learning_rate": 0.0001, "loss": 0.0091, "step": 138150 }, { "epoch": 908.9473684210526, "grad_norm": 0.9784705638885498, "learning_rate": 0.0001, "loss": 0.0104, "step": 138160 }, { "epoch": 909.0131578947369, "grad_norm": 1.0539556741714478, "learning_rate": 0.0001, "loss": 0.0111, "step": 138170 }, { "epoch": 909.078947368421, "grad_norm": 1.054840087890625, "learning_rate": 0.0001, "loss": 0.0092, "step": 138180 }, { "epoch": 909.1447368421053, "grad_norm": 0.817751944065094, "learning_rate": 0.0001, "loss": 0.0097, "step": 138190 }, { "epoch": 909.2105263157895, "grad_norm": 1.2767053842544556, "learning_rate": 0.0001, "loss": 0.0102, "step": 138200 }, { "epoch": 909.2763157894736, "grad_norm": 0.9269699454307556, "learning_rate": 0.0001, "loss": 0.0086, "step": 138210 }, { "epoch": 909.3421052631579, "grad_norm": 0.7192962765693665, "learning_rate": 0.0001, "loss": 0.01, "step": 138220 }, { "epoch": 909.4078947368421, "grad_norm": 1.0182446241378784, "learning_rate": 0.0001, "loss": 0.0129, "step": 138230 }, { "epoch": 909.4736842105264, "grad_norm": 0.8939855694770813, "learning_rate": 0.0001, "loss": 0.0079, "step": 138240 }, { "epoch": 909.5394736842105, "grad_norm": 1.3759862184524536, "learning_rate": 0.0001, "loss": 0.0097, "step": 138250 }, { "epoch": 909.6052631578947, "grad_norm": 1.0938113927841187, "learning_rate": 0.0001, "loss": 0.0103, "step": 138260 }, { "epoch": 909.671052631579, "grad_norm": 1.2169560194015503, "learning_rate": 0.0001, "loss": 0.0114, "step": 138270 }, { "epoch": 909.7368421052631, "grad_norm": 0.8270349502563477, "learning_rate": 0.0001, "loss": 0.0089, "step": 138280 }, { "epoch": 909.8026315789474, "grad_norm": 0.9773039221763611, "learning_rate": 0.0001, "loss": 0.0093, "step": 138290 }, { "epoch": 909.8684210526316, "grad_norm": 0.9139556884765625, "learning_rate": 0.0001, "loss": 0.0095, "step": 138300 }, { "epoch": 909.9342105263158, "grad_norm": 1.0602258443832397, "learning_rate": 0.0001, "loss": 0.0088, "step": 138310 }, { "epoch": 910.0, "grad_norm": 1.1161372661590576, "learning_rate": 0.0001, "loss": 0.0108, "step": 138320 }, { "epoch": 910.0657894736842, "grad_norm": 1.1189671754837036, "learning_rate": 0.0001, "loss": 0.0103, "step": 138330 }, { "epoch": 910.1315789473684, "grad_norm": 0.8698909282684326, "learning_rate": 0.0001, "loss": 0.0104, "step": 138340 }, { "epoch": 910.1973684210526, "grad_norm": 0.9428373575210571, "learning_rate": 0.0001, "loss": 0.0098, "step": 138350 }, { "epoch": 910.2631578947369, "grad_norm": 0.6504638195037842, "learning_rate": 0.0001, "loss": 0.0115, "step": 138360 }, { "epoch": 910.328947368421, "grad_norm": 0.8180758357048035, "learning_rate": 0.0001, "loss": 0.0111, "step": 138370 }, { "epoch": 910.3947368421053, "grad_norm": 1.1417499780654907, "learning_rate": 0.0001, "loss": 0.0112, "step": 138380 }, { "epoch": 910.4605263157895, "grad_norm": 0.8425560593605042, "learning_rate": 0.0001, "loss": 0.0109, "step": 138390 }, { "epoch": 910.5263157894736, "grad_norm": 1.2811640501022339, "learning_rate": 0.0001, "loss": 0.0096, "step": 138400 }, { "epoch": 910.5921052631579, "grad_norm": 1.0059995651245117, "learning_rate": 0.0001, "loss": 0.0107, "step": 138410 }, { "epoch": 910.6578947368421, "grad_norm": 1.4118963479995728, "learning_rate": 0.0001, "loss": 0.01, "step": 138420 }, { "epoch": 910.7236842105264, "grad_norm": 1.055229663848877, "learning_rate": 0.0001, "loss": 0.0112, "step": 138430 }, { "epoch": 910.7894736842105, "grad_norm": 1.1390751600265503, "learning_rate": 0.0001, "loss": 0.0101, "step": 138440 }, { "epoch": 910.8552631578947, "grad_norm": 1.3545476198196411, "learning_rate": 0.0001, "loss": 0.0099, "step": 138450 }, { "epoch": 910.921052631579, "grad_norm": 1.0266139507293701, "learning_rate": 0.0001, "loss": 0.0105, "step": 138460 }, { "epoch": 910.9868421052631, "grad_norm": 1.0077507495880127, "learning_rate": 0.0001, "loss": 0.0089, "step": 138470 }, { "epoch": 911.0526315789474, "grad_norm": 1.1615369319915771, "learning_rate": 0.0001, "loss": 0.0086, "step": 138480 }, { "epoch": 911.1184210526316, "grad_norm": 1.2945265769958496, "learning_rate": 0.0001, "loss": 0.0103, "step": 138490 }, { "epoch": 911.1842105263158, "grad_norm": 0.7563729286193848, "learning_rate": 0.0001, "loss": 0.0092, "step": 138500 }, { "epoch": 911.25, "grad_norm": 0.941920280456543, "learning_rate": 0.0001, "loss": 0.0104, "step": 138510 }, { "epoch": 911.3157894736842, "grad_norm": 0.8620559573173523, "learning_rate": 0.0001, "loss": 0.0103, "step": 138520 }, { "epoch": 911.3815789473684, "grad_norm": 1.0454643964767456, "learning_rate": 0.0001, "loss": 0.0104, "step": 138530 }, { "epoch": 911.4473684210526, "grad_norm": 1.2060420513153076, "learning_rate": 0.0001, "loss": 0.0109, "step": 138540 }, { "epoch": 911.5131578947369, "grad_norm": 1.0942254066467285, "learning_rate": 0.0001, "loss": 0.01, "step": 138550 }, { "epoch": 911.578947368421, "grad_norm": 0.7879404425621033, "learning_rate": 0.0001, "loss": 0.0111, "step": 138560 }, { "epoch": 911.6447368421053, "grad_norm": 1.4239997863769531, "learning_rate": 0.0001, "loss": 0.01, "step": 138570 }, { "epoch": 911.7105263157895, "grad_norm": 1.384261965751648, "learning_rate": 0.0001, "loss": 0.0087, "step": 138580 }, { "epoch": 911.7763157894736, "grad_norm": 1.2264975309371948, "learning_rate": 0.0001, "loss": 0.0102, "step": 138590 }, { "epoch": 911.8421052631579, "grad_norm": 1.1380722522735596, "learning_rate": 0.0001, "loss": 0.0107, "step": 138600 }, { "epoch": 911.9078947368421, "grad_norm": 1.4093822240829468, "learning_rate": 0.0001, "loss": 0.0098, "step": 138610 }, { "epoch": 911.9736842105264, "grad_norm": 0.9658661484718323, "learning_rate": 0.0001, "loss": 0.0087, "step": 138620 }, { "epoch": 912.0394736842105, "grad_norm": 1.6494758129119873, "learning_rate": 0.0001, "loss": 0.0118, "step": 138630 }, { "epoch": 912.1052631578947, "grad_norm": 1.223433494567871, "learning_rate": 0.0001, "loss": 0.0095, "step": 138640 }, { "epoch": 912.171052631579, "grad_norm": 1.1293435096740723, "learning_rate": 0.0001, "loss": 0.0078, "step": 138650 }, { "epoch": 912.2368421052631, "grad_norm": 1.3169997930526733, "learning_rate": 0.0001, "loss": 0.0095, "step": 138660 }, { "epoch": 912.3026315789474, "grad_norm": 1.1258878707885742, "learning_rate": 0.0001, "loss": 0.0093, "step": 138670 }, { "epoch": 912.3684210526316, "grad_norm": 1.0870180130004883, "learning_rate": 0.0001, "loss": 0.0089, "step": 138680 }, { "epoch": 912.4342105263158, "grad_norm": 1.2303227186203003, "learning_rate": 0.0001, "loss": 0.0103, "step": 138690 }, { "epoch": 912.5, "grad_norm": 1.3241380453109741, "learning_rate": 0.0001, "loss": 0.0092, "step": 138700 }, { "epoch": 912.5657894736842, "grad_norm": 0.9058986306190491, "learning_rate": 0.0001, "loss": 0.0086, "step": 138710 }, { "epoch": 912.6315789473684, "grad_norm": 1.2740260362625122, "learning_rate": 0.0001, "loss": 0.011, "step": 138720 }, { "epoch": 912.6973684210526, "grad_norm": 1.073053240776062, "learning_rate": 0.0001, "loss": 0.01, "step": 138730 }, { "epoch": 912.7631578947369, "grad_norm": 0.9886819124221802, "learning_rate": 0.0001, "loss": 0.0099, "step": 138740 }, { "epoch": 912.828947368421, "grad_norm": 1.464430332183838, "learning_rate": 0.0001, "loss": 0.0098, "step": 138750 }, { "epoch": 912.8947368421053, "grad_norm": 1.4598714113235474, "learning_rate": 0.0001, "loss": 0.0091, "step": 138760 }, { "epoch": 912.9605263157895, "grad_norm": 1.233940839767456, "learning_rate": 0.0001, "loss": 0.0094, "step": 138770 }, { "epoch": 913.0263157894736, "grad_norm": 0.9200678467750549, "learning_rate": 0.0001, "loss": 0.0079, "step": 138780 }, { "epoch": 913.0921052631579, "grad_norm": 0.763102650642395, "learning_rate": 0.0001, "loss": 0.0104, "step": 138790 }, { "epoch": 913.1578947368421, "grad_norm": 0.944960355758667, "learning_rate": 0.0001, "loss": 0.0115, "step": 138800 }, { "epoch": 913.2236842105264, "grad_norm": 0.9596688747406006, "learning_rate": 0.0001, "loss": 0.0097, "step": 138810 }, { "epoch": 913.2894736842105, "grad_norm": 0.8753271102905273, "learning_rate": 0.0001, "loss": 0.0074, "step": 138820 }, { "epoch": 913.3552631578947, "grad_norm": 0.8596283197402954, "learning_rate": 0.0001, "loss": 0.0093, "step": 138830 }, { "epoch": 913.421052631579, "grad_norm": 0.9290410280227661, "learning_rate": 0.0001, "loss": 0.0083, "step": 138840 }, { "epoch": 913.4868421052631, "grad_norm": 0.703997015953064, "learning_rate": 0.0001, "loss": 0.0096, "step": 138850 }, { "epoch": 913.5526315789474, "grad_norm": 0.7406630516052246, "learning_rate": 0.0001, "loss": 0.01, "step": 138860 }, { "epoch": 913.6184210526316, "grad_norm": 0.5769844055175781, "learning_rate": 0.0001, "loss": 0.011, "step": 138870 }, { "epoch": 913.6842105263158, "grad_norm": 0.8807429075241089, "learning_rate": 0.0001, "loss": 0.0094, "step": 138880 }, { "epoch": 913.75, "grad_norm": 1.0065295696258545, "learning_rate": 0.0001, "loss": 0.0091, "step": 138890 }, { "epoch": 913.8157894736842, "grad_norm": 0.9491748213768005, "learning_rate": 0.0001, "loss": 0.0095, "step": 138900 }, { "epoch": 913.8815789473684, "grad_norm": 0.8654152750968933, "learning_rate": 0.0001, "loss": 0.01, "step": 138910 }, { "epoch": 913.9473684210526, "grad_norm": 0.9335588216781616, "learning_rate": 0.0001, "loss": 0.0093, "step": 138920 }, { "epoch": 914.0131578947369, "grad_norm": 1.165829062461853, "learning_rate": 0.0001, "loss": 0.0115, "step": 138930 }, { "epoch": 914.078947368421, "grad_norm": 0.8912755250930786, "learning_rate": 0.0001, "loss": 0.0098, "step": 138940 }, { "epoch": 914.1447368421053, "grad_norm": 1.1820372343063354, "learning_rate": 0.0001, "loss": 0.0115, "step": 138950 }, { "epoch": 914.2105263157895, "grad_norm": 1.3358891010284424, "learning_rate": 0.0001, "loss": 0.0097, "step": 138960 }, { "epoch": 914.2763157894736, "grad_norm": 0.878984272480011, "learning_rate": 0.0001, "loss": 0.0108, "step": 138970 }, { "epoch": 914.3421052631579, "grad_norm": 1.2799499034881592, "learning_rate": 0.0001, "loss": 0.0084, "step": 138980 }, { "epoch": 914.4078947368421, "grad_norm": 0.9388821125030518, "learning_rate": 0.0001, "loss": 0.0093, "step": 138990 }, { "epoch": 914.4736842105264, "grad_norm": 1.0505149364471436, "learning_rate": 0.0001, "loss": 0.0089, "step": 139000 }, { "epoch": 914.5394736842105, "grad_norm": 0.869010329246521, "learning_rate": 0.0001, "loss": 0.0108, "step": 139010 }, { "epoch": 914.6052631578947, "grad_norm": 0.8790243864059448, "learning_rate": 0.0001, "loss": 0.0101, "step": 139020 }, { "epoch": 914.671052631579, "grad_norm": 0.7046595811843872, "learning_rate": 0.0001, "loss": 0.0096, "step": 139030 }, { "epoch": 914.7368421052631, "grad_norm": 0.9645804166793823, "learning_rate": 0.0001, "loss": 0.01, "step": 139040 }, { "epoch": 914.8026315789474, "grad_norm": 1.3589966297149658, "learning_rate": 0.0001, "loss": 0.0084, "step": 139050 }, { "epoch": 914.8684210526316, "grad_norm": 1.1540025472640991, "learning_rate": 0.0001, "loss": 0.0102, "step": 139060 }, { "epoch": 914.9342105263158, "grad_norm": 0.794073224067688, "learning_rate": 0.0001, "loss": 0.01, "step": 139070 }, { "epoch": 915.0, "grad_norm": 0.9464465975761414, "learning_rate": 0.0001, "loss": 0.0106, "step": 139080 }, { "epoch": 915.0657894736842, "grad_norm": 1.0088104009628296, "learning_rate": 0.0001, "loss": 0.0097, "step": 139090 }, { "epoch": 915.1315789473684, "grad_norm": 1.040697693824768, "learning_rate": 0.0001, "loss": 0.0095, "step": 139100 }, { "epoch": 915.1973684210526, "grad_norm": 1.2374768257141113, "learning_rate": 0.0001, "loss": 0.0093, "step": 139110 }, { "epoch": 915.2631578947369, "grad_norm": 1.1254475116729736, "learning_rate": 0.0001, "loss": 0.0113, "step": 139120 }, { "epoch": 915.328947368421, "grad_norm": 1.0068753957748413, "learning_rate": 0.0001, "loss": 0.0093, "step": 139130 }, { "epoch": 915.3947368421053, "grad_norm": 1.0009926557540894, "learning_rate": 0.0001, "loss": 0.0116, "step": 139140 }, { "epoch": 915.4605263157895, "grad_norm": 1.104837417602539, "learning_rate": 0.0001, "loss": 0.0094, "step": 139150 }, { "epoch": 915.5263157894736, "grad_norm": 0.9055717587471008, "learning_rate": 0.0001, "loss": 0.013, "step": 139160 }, { "epoch": 915.5921052631579, "grad_norm": 1.2613590955734253, "learning_rate": 0.0001, "loss": 0.0096, "step": 139170 }, { "epoch": 915.6578947368421, "grad_norm": 0.9277036786079407, "learning_rate": 0.0001, "loss": 0.0094, "step": 139180 }, { "epoch": 915.7236842105264, "grad_norm": 0.8551871180534363, "learning_rate": 0.0001, "loss": 0.0107, "step": 139190 }, { "epoch": 915.7894736842105, "grad_norm": 1.1076236963272095, "learning_rate": 0.0001, "loss": 0.0085, "step": 139200 }, { "epoch": 915.8552631578947, "grad_norm": 1.120894432067871, "learning_rate": 0.0001, "loss": 0.0084, "step": 139210 }, { "epoch": 915.921052631579, "grad_norm": 0.8614113926887512, "learning_rate": 0.0001, "loss": 0.0093, "step": 139220 }, { "epoch": 915.9868421052631, "grad_norm": 0.8355217576026917, "learning_rate": 0.0001, "loss": 0.0097, "step": 139230 }, { "epoch": 916.0526315789474, "grad_norm": 0.9857189655303955, "learning_rate": 0.0001, "loss": 0.0098, "step": 139240 }, { "epoch": 916.1184210526316, "grad_norm": 0.8549261689186096, "learning_rate": 0.0001, "loss": 0.0106, "step": 139250 }, { "epoch": 916.1842105263158, "grad_norm": 1.0027446746826172, "learning_rate": 0.0001, "loss": 0.0093, "step": 139260 }, { "epoch": 916.25, "grad_norm": 0.9751135110855103, "learning_rate": 0.0001, "loss": 0.0095, "step": 139270 }, { "epoch": 916.3157894736842, "grad_norm": 1.3326411247253418, "learning_rate": 0.0001, "loss": 0.01, "step": 139280 }, { "epoch": 916.3815789473684, "grad_norm": 0.8798542618751526, "learning_rate": 0.0001, "loss": 0.0097, "step": 139290 }, { "epoch": 916.4473684210526, "grad_norm": 1.432790994644165, "learning_rate": 0.0001, "loss": 0.0085, "step": 139300 }, { "epoch": 916.5131578947369, "grad_norm": 1.3682509660720825, "learning_rate": 0.0001, "loss": 0.0107, "step": 139310 }, { "epoch": 916.578947368421, "grad_norm": 0.9973912835121155, "learning_rate": 0.0001, "loss": 0.0102, "step": 139320 }, { "epoch": 916.6447368421053, "grad_norm": 1.2774707078933716, "learning_rate": 0.0001, "loss": 0.0101, "step": 139330 }, { "epoch": 916.7105263157895, "grad_norm": 1.1853746175765991, "learning_rate": 0.0001, "loss": 0.0089, "step": 139340 }, { "epoch": 916.7763157894736, "grad_norm": 1.0939549207687378, "learning_rate": 0.0001, "loss": 0.0101, "step": 139350 }, { "epoch": 916.8421052631579, "grad_norm": 1.6797022819519043, "learning_rate": 0.0001, "loss": 0.0094, "step": 139360 }, { "epoch": 916.9078947368421, "grad_norm": 0.9860899448394775, "learning_rate": 0.0001, "loss": 0.0108, "step": 139370 }, { "epoch": 916.9736842105264, "grad_norm": 1.0874377489089966, "learning_rate": 0.0001, "loss": 0.0097, "step": 139380 }, { "epoch": 917.0394736842105, "grad_norm": 0.8851874470710754, "learning_rate": 0.0001, "loss": 0.0074, "step": 139390 }, { "epoch": 917.1052631578947, "grad_norm": 1.039806604385376, "learning_rate": 0.0001, "loss": 0.0087, "step": 139400 }, { "epoch": 917.171052631579, "grad_norm": 1.1718597412109375, "learning_rate": 0.0001, "loss": 0.0105, "step": 139410 }, { "epoch": 917.2368421052631, "grad_norm": 0.9298036098480225, "learning_rate": 0.0001, "loss": 0.0095, "step": 139420 }, { "epoch": 917.3026315789474, "grad_norm": 1.1916438341140747, "learning_rate": 0.0001, "loss": 0.0097, "step": 139430 }, { "epoch": 917.3684210526316, "grad_norm": 1.2922661304473877, "learning_rate": 0.0001, "loss": 0.0106, "step": 139440 }, { "epoch": 917.4342105263158, "grad_norm": 1.0661784410476685, "learning_rate": 0.0001, "loss": 0.0106, "step": 139450 }, { "epoch": 917.5, "grad_norm": 1.1906532049179077, "learning_rate": 0.0001, "loss": 0.0083, "step": 139460 }, { "epoch": 917.5657894736842, "grad_norm": 1.1400047540664673, "learning_rate": 0.0001, "loss": 0.01, "step": 139470 }, { "epoch": 917.6315789473684, "grad_norm": 1.0732362270355225, "learning_rate": 0.0001, "loss": 0.0099, "step": 139480 }, { "epoch": 917.6973684210526, "grad_norm": 0.8187721371650696, "learning_rate": 0.0001, "loss": 0.0115, "step": 139490 }, { "epoch": 917.7631578947369, "grad_norm": 0.8629974722862244, "learning_rate": 0.0001, "loss": 0.009, "step": 139500 }, { "epoch": 917.828947368421, "grad_norm": 1.0016518831253052, "learning_rate": 0.0001, "loss": 0.0099, "step": 139510 }, { "epoch": 917.8947368421053, "grad_norm": 0.9016565680503845, "learning_rate": 0.0001, "loss": 0.0103, "step": 139520 }, { "epoch": 917.9605263157895, "grad_norm": 0.7531505227088928, "learning_rate": 0.0001, "loss": 0.0103, "step": 139530 }, { "epoch": 918.0263157894736, "grad_norm": 0.846524178981781, "learning_rate": 0.0001, "loss": 0.0088, "step": 139540 }, { "epoch": 918.0921052631579, "grad_norm": 1.01736581325531, "learning_rate": 0.0001, "loss": 0.0098, "step": 139550 }, { "epoch": 918.1578947368421, "grad_norm": 0.9176644086837769, "learning_rate": 0.0001, "loss": 0.0075, "step": 139560 }, { "epoch": 918.2236842105264, "grad_norm": 1.1797696352005005, "learning_rate": 0.0001, "loss": 0.0122, "step": 139570 }, { "epoch": 918.2894736842105, "grad_norm": 0.9965087175369263, "learning_rate": 0.0001, "loss": 0.0093, "step": 139580 }, { "epoch": 918.3552631578947, "grad_norm": 1.053809642791748, "learning_rate": 0.0001, "loss": 0.0111, "step": 139590 }, { "epoch": 918.421052631579, "grad_norm": 0.8142527341842651, "learning_rate": 0.0001, "loss": 0.0094, "step": 139600 }, { "epoch": 918.4868421052631, "grad_norm": 1.2485212087631226, "learning_rate": 0.0001, "loss": 0.0104, "step": 139610 }, { "epoch": 918.5526315789474, "grad_norm": 0.9339953660964966, "learning_rate": 0.0001, "loss": 0.0097, "step": 139620 }, { "epoch": 918.6184210526316, "grad_norm": 1.036667823791504, "learning_rate": 0.0001, "loss": 0.008, "step": 139630 }, { "epoch": 918.6842105263158, "grad_norm": 0.8027102947235107, "learning_rate": 0.0001, "loss": 0.0097, "step": 139640 }, { "epoch": 918.75, "grad_norm": 0.7558383345603943, "learning_rate": 0.0001, "loss": 0.0098, "step": 139650 }, { "epoch": 918.8157894736842, "grad_norm": 0.6426530480384827, "learning_rate": 0.0001, "loss": 0.0104, "step": 139660 }, { "epoch": 918.8815789473684, "grad_norm": 0.6960042715072632, "learning_rate": 0.0001, "loss": 0.0094, "step": 139670 }, { "epoch": 918.9473684210526, "grad_norm": 0.6835951209068298, "learning_rate": 0.0001, "loss": 0.0111, "step": 139680 }, { "epoch": 919.0131578947369, "grad_norm": 1.270308017730713, "learning_rate": 0.0001, "loss": 0.0089, "step": 139690 }, { "epoch": 919.078947368421, "grad_norm": 1.1782221794128418, "learning_rate": 0.0001, "loss": 0.0088, "step": 139700 }, { "epoch": 919.1447368421053, "grad_norm": 1.0767455101013184, "learning_rate": 0.0001, "loss": 0.0106, "step": 139710 }, { "epoch": 919.2105263157895, "grad_norm": 0.8092596530914307, "learning_rate": 0.0001, "loss": 0.0085, "step": 139720 }, { "epoch": 919.2763157894736, "grad_norm": 0.9447590112686157, "learning_rate": 0.0001, "loss": 0.0109, "step": 139730 }, { "epoch": 919.3421052631579, "grad_norm": 1.1488889455795288, "learning_rate": 0.0001, "loss": 0.0093, "step": 139740 }, { "epoch": 919.4078947368421, "grad_norm": 0.6854469180107117, "learning_rate": 0.0001, "loss": 0.0099, "step": 139750 }, { "epoch": 919.4736842105264, "grad_norm": 0.9892016649246216, "learning_rate": 0.0001, "loss": 0.0094, "step": 139760 }, { "epoch": 919.5394736842105, "grad_norm": 1.0678046941757202, "learning_rate": 0.0001, "loss": 0.0088, "step": 139770 }, { "epoch": 919.6052631578947, "grad_norm": 1.0261939764022827, "learning_rate": 0.0001, "loss": 0.0095, "step": 139780 }, { "epoch": 919.671052631579, "grad_norm": 0.7115074396133423, "learning_rate": 0.0001, "loss": 0.0108, "step": 139790 }, { "epoch": 919.7368421052631, "grad_norm": 0.8169745802879333, "learning_rate": 0.0001, "loss": 0.0104, "step": 139800 }, { "epoch": 919.8026315789474, "grad_norm": 0.951320469379425, "learning_rate": 0.0001, "loss": 0.0087, "step": 139810 }, { "epoch": 919.8684210526316, "grad_norm": 1.1289328336715698, "learning_rate": 0.0001, "loss": 0.0104, "step": 139820 }, { "epoch": 919.9342105263158, "grad_norm": 0.8910213112831116, "learning_rate": 0.0001, "loss": 0.0101, "step": 139830 }, { "epoch": 920.0, "grad_norm": 1.091223120689392, "learning_rate": 0.0001, "loss": 0.011, "step": 139840 }, { "epoch": 920.0657894736842, "grad_norm": 1.1098347902297974, "learning_rate": 0.0001, "loss": 0.0094, "step": 139850 }, { "epoch": 920.1315789473684, "grad_norm": 0.8560648560523987, "learning_rate": 0.0001, "loss": 0.0083, "step": 139860 }, { "epoch": 920.1973684210526, "grad_norm": 0.8434080481529236, "learning_rate": 0.0001, "loss": 0.0091, "step": 139870 }, { "epoch": 920.2631578947369, "grad_norm": 1.10075044631958, "learning_rate": 0.0001, "loss": 0.0113, "step": 139880 }, { "epoch": 920.328947368421, "grad_norm": 1.0007864236831665, "learning_rate": 0.0001, "loss": 0.0109, "step": 139890 }, { "epoch": 920.3947368421053, "grad_norm": 0.9732034802436829, "learning_rate": 0.0001, "loss": 0.0106, "step": 139900 }, { "epoch": 920.4605263157895, "grad_norm": 0.9646409749984741, "learning_rate": 0.0001, "loss": 0.008, "step": 139910 }, { "epoch": 920.5263157894736, "grad_norm": 0.9488770961761475, "learning_rate": 0.0001, "loss": 0.0088, "step": 139920 }, { "epoch": 920.5921052631579, "grad_norm": 1.1824666261672974, "learning_rate": 0.0001, "loss": 0.0094, "step": 139930 }, { "epoch": 920.6578947368421, "grad_norm": 0.9511621594429016, "learning_rate": 0.0001, "loss": 0.009, "step": 139940 }, { "epoch": 920.7236842105264, "grad_norm": 1.0742391347885132, "learning_rate": 0.0001, "loss": 0.0088, "step": 139950 }, { "epoch": 920.7894736842105, "grad_norm": 1.4730793237686157, "learning_rate": 0.0001, "loss": 0.0107, "step": 139960 }, { "epoch": 920.8552631578947, "grad_norm": 1.3852665424346924, "learning_rate": 0.0001, "loss": 0.0084, "step": 139970 }, { "epoch": 920.921052631579, "grad_norm": 1.0065243244171143, "learning_rate": 0.0001, "loss": 0.0109, "step": 139980 }, { "epoch": 920.9868421052631, "grad_norm": 1.2180964946746826, "learning_rate": 0.0001, "loss": 0.0104, "step": 139990 }, { "epoch": 921.0526315789474, "grad_norm": 1.0379401445388794, "learning_rate": 0.0001, "loss": 0.0088, "step": 140000 }, { "epoch": 921.1184210526316, "grad_norm": 1.255040168762207, "learning_rate": 0.0001, "loss": 0.0097, "step": 140010 }, { "epoch": 921.1842105263158, "grad_norm": 1.2452787160873413, "learning_rate": 0.0001, "loss": 0.0093, "step": 140020 }, { "epoch": 921.25, "grad_norm": 1.25413978099823, "learning_rate": 0.0001, "loss": 0.0076, "step": 140030 }, { "epoch": 921.3157894736842, "grad_norm": 0.6751289963722229, "learning_rate": 0.0001, "loss": 0.0094, "step": 140040 }, { "epoch": 921.3815789473684, "grad_norm": 1.1533355712890625, "learning_rate": 0.0001, "loss": 0.0088, "step": 140050 }, { "epoch": 921.4473684210526, "grad_norm": 0.6121186017990112, "learning_rate": 0.0001, "loss": 0.0094, "step": 140060 }, { "epoch": 921.5131578947369, "grad_norm": 1.2446112632751465, "learning_rate": 0.0001, "loss": 0.0104, "step": 140070 }, { "epoch": 921.578947368421, "grad_norm": 1.057027816772461, "learning_rate": 0.0001, "loss": 0.0098, "step": 140080 }, { "epoch": 921.6447368421053, "grad_norm": 0.7790706753730774, "learning_rate": 0.0001, "loss": 0.011, "step": 140090 }, { "epoch": 921.7105263157895, "grad_norm": 0.9158756732940674, "learning_rate": 0.0001, "loss": 0.009, "step": 140100 }, { "epoch": 921.7763157894736, "grad_norm": 0.7222694754600525, "learning_rate": 0.0001, "loss": 0.0091, "step": 140110 }, { "epoch": 921.8421052631579, "grad_norm": 1.073899269104004, "learning_rate": 0.0001, "loss": 0.0109, "step": 140120 }, { "epoch": 921.9078947368421, "grad_norm": 1.1622812747955322, "learning_rate": 0.0001, "loss": 0.0104, "step": 140130 }, { "epoch": 921.9736842105264, "grad_norm": 1.201113224029541, "learning_rate": 0.0001, "loss": 0.0111, "step": 140140 }, { "epoch": 922.0394736842105, "grad_norm": 1.0527993440628052, "learning_rate": 0.0001, "loss": 0.0084, "step": 140150 }, { "epoch": 922.1052631578947, "grad_norm": 0.7997422814369202, "learning_rate": 0.0001, "loss": 0.0102, "step": 140160 }, { "epoch": 922.171052631579, "grad_norm": 0.9327260851860046, "learning_rate": 0.0001, "loss": 0.011, "step": 140170 }, { "epoch": 922.2368421052631, "grad_norm": 0.844416618347168, "learning_rate": 0.0001, "loss": 0.0109, "step": 140180 }, { "epoch": 922.3026315789474, "grad_norm": 0.9128046035766602, "learning_rate": 0.0001, "loss": 0.0085, "step": 140190 }, { "epoch": 922.3684210526316, "grad_norm": 0.7824735045433044, "learning_rate": 0.0001, "loss": 0.0085, "step": 140200 }, { "epoch": 922.4342105263158, "grad_norm": 0.961676836013794, "learning_rate": 0.0001, "loss": 0.0111, "step": 140210 }, { "epoch": 922.5, "grad_norm": 0.7900063991546631, "learning_rate": 0.0001, "loss": 0.009, "step": 140220 }, { "epoch": 922.5657894736842, "grad_norm": 1.3658584356307983, "learning_rate": 0.0001, "loss": 0.0086, "step": 140230 }, { "epoch": 922.6315789473684, "grad_norm": 1.0145326852798462, "learning_rate": 0.0001, "loss": 0.0093, "step": 140240 }, { "epoch": 922.6973684210526, "grad_norm": 1.2029974460601807, "learning_rate": 0.0001, "loss": 0.0116, "step": 140250 }, { "epoch": 922.7631578947369, "grad_norm": 1.2081828117370605, "learning_rate": 0.0001, "loss": 0.01, "step": 140260 }, { "epoch": 922.828947368421, "grad_norm": 1.177101492881775, "learning_rate": 0.0001, "loss": 0.0089, "step": 140270 }, { "epoch": 922.8947368421053, "grad_norm": 1.2548049688339233, "learning_rate": 0.0001, "loss": 0.0094, "step": 140280 }, { "epoch": 922.9605263157895, "grad_norm": 1.279402732849121, "learning_rate": 0.0001, "loss": 0.01, "step": 140290 }, { "epoch": 923.0263157894736, "grad_norm": 0.8556655645370483, "learning_rate": 0.0001, "loss": 0.0115, "step": 140300 }, { "epoch": 923.0921052631579, "grad_norm": 0.7919434905052185, "learning_rate": 0.0001, "loss": 0.011, "step": 140310 }, { "epoch": 923.1578947368421, "grad_norm": 1.0582438707351685, "learning_rate": 0.0001, "loss": 0.0093, "step": 140320 }, { "epoch": 923.2236842105264, "grad_norm": 0.8901892304420471, "learning_rate": 0.0001, "loss": 0.0087, "step": 140330 }, { "epoch": 923.2894736842105, "grad_norm": 0.9587327837944031, "learning_rate": 0.0001, "loss": 0.0106, "step": 140340 }, { "epoch": 923.3552631578947, "grad_norm": 0.9833970665931702, "learning_rate": 0.0001, "loss": 0.0091, "step": 140350 }, { "epoch": 923.421052631579, "grad_norm": 0.9788154363632202, "learning_rate": 0.0001, "loss": 0.008, "step": 140360 }, { "epoch": 923.4868421052631, "grad_norm": 0.7878212332725525, "learning_rate": 0.0001, "loss": 0.0102, "step": 140370 }, { "epoch": 923.5526315789474, "grad_norm": 0.7902630567550659, "learning_rate": 0.0001, "loss": 0.0086, "step": 140380 }, { "epoch": 923.6184210526316, "grad_norm": 1.0521742105484009, "learning_rate": 0.0001, "loss": 0.0088, "step": 140390 }, { "epoch": 923.6842105263158, "grad_norm": 1.1913330554962158, "learning_rate": 0.0001, "loss": 0.0093, "step": 140400 }, { "epoch": 923.75, "grad_norm": 0.9974690079689026, "learning_rate": 0.0001, "loss": 0.0089, "step": 140410 }, { "epoch": 923.8157894736842, "grad_norm": 0.7740110158920288, "learning_rate": 0.0001, "loss": 0.0103, "step": 140420 }, { "epoch": 923.8815789473684, "grad_norm": 0.651350736618042, "learning_rate": 0.0001, "loss": 0.0097, "step": 140430 }, { "epoch": 923.9473684210526, "grad_norm": 0.8533127903938293, "learning_rate": 0.0001, "loss": 0.0102, "step": 140440 }, { "epoch": 924.0131578947369, "grad_norm": 1.2042335271835327, "learning_rate": 0.0001, "loss": 0.0112, "step": 140450 }, { "epoch": 924.078947368421, "grad_norm": 1.0779392719268799, "learning_rate": 0.0001, "loss": 0.0102, "step": 140460 }, { "epoch": 924.1447368421053, "grad_norm": 1.2157610654830933, "learning_rate": 0.0001, "loss": 0.0112, "step": 140470 }, { "epoch": 924.2105263157895, "grad_norm": 1.7160738706588745, "learning_rate": 0.0001, "loss": 0.0116, "step": 140480 }, { "epoch": 924.2763157894736, "grad_norm": 0.9967711567878723, "learning_rate": 0.0001, "loss": 0.0093, "step": 140490 }, { "epoch": 924.3421052631579, "grad_norm": 0.9991487264633179, "learning_rate": 0.0001, "loss": 0.0104, "step": 140500 }, { "epoch": 924.4078947368421, "grad_norm": 1.028993844985962, "learning_rate": 0.0001, "loss": 0.0082, "step": 140510 }, { "epoch": 924.4736842105264, "grad_norm": 1.1722681522369385, "learning_rate": 0.0001, "loss": 0.01, "step": 140520 }, { "epoch": 924.5394736842105, "grad_norm": 0.9538052678108215, "learning_rate": 0.0001, "loss": 0.0086, "step": 140530 }, { "epoch": 924.6052631578947, "grad_norm": 0.9765692353248596, "learning_rate": 0.0001, "loss": 0.0097, "step": 140540 }, { "epoch": 924.671052631579, "grad_norm": 1.1148061752319336, "learning_rate": 0.0001, "loss": 0.0099, "step": 140550 }, { "epoch": 924.7368421052631, "grad_norm": 1.2510484457015991, "learning_rate": 0.0001, "loss": 0.0108, "step": 140560 }, { "epoch": 924.8026315789474, "grad_norm": 0.6285231709480286, "learning_rate": 0.0001, "loss": 0.0091, "step": 140570 }, { "epoch": 924.8684210526316, "grad_norm": 0.8659400939941406, "learning_rate": 0.0001, "loss": 0.0101, "step": 140580 }, { "epoch": 924.9342105263158, "grad_norm": 1.054602861404419, "learning_rate": 0.0001, "loss": 0.0106, "step": 140590 }, { "epoch": 925.0, "grad_norm": 1.0485122203826904, "learning_rate": 0.0001, "loss": 0.009, "step": 140600 }, { "epoch": 925.0657894736842, "grad_norm": 1.1340656280517578, "learning_rate": 0.0001, "loss": 0.0109, "step": 140610 }, { "epoch": 925.1315789473684, "grad_norm": 0.945837140083313, "learning_rate": 0.0001, "loss": 0.0102, "step": 140620 }, { "epoch": 925.1973684210526, "grad_norm": 0.9775026440620422, "learning_rate": 0.0001, "loss": 0.0093, "step": 140630 }, { "epoch": 925.2631578947369, "grad_norm": 0.979673445224762, "learning_rate": 0.0001, "loss": 0.0095, "step": 140640 }, { "epoch": 925.328947368421, "grad_norm": 1.1637605428695679, "learning_rate": 0.0001, "loss": 0.0104, "step": 140650 }, { "epoch": 925.3947368421053, "grad_norm": 1.587935209274292, "learning_rate": 0.0001, "loss": 0.0094, "step": 140660 }, { "epoch": 925.4605263157895, "grad_norm": 1.3600460290908813, "learning_rate": 0.0001, "loss": 0.0111, "step": 140670 }, { "epoch": 925.5263157894736, "grad_norm": 1.253381609916687, "learning_rate": 0.0001, "loss": 0.0087, "step": 140680 }, { "epoch": 925.5921052631579, "grad_norm": 1.4726488590240479, "learning_rate": 0.0001, "loss": 0.0101, "step": 140690 }, { "epoch": 925.6578947368421, "grad_norm": 1.187400221824646, "learning_rate": 0.0001, "loss": 0.01, "step": 140700 }, { "epoch": 925.7236842105264, "grad_norm": 1.0697247982025146, "learning_rate": 0.0001, "loss": 0.0099, "step": 140710 }, { "epoch": 925.7894736842105, "grad_norm": 1.059090256690979, "learning_rate": 0.0001, "loss": 0.0103, "step": 140720 }, { "epoch": 925.8552631578947, "grad_norm": 1.2741175889968872, "learning_rate": 0.0001, "loss": 0.0086, "step": 140730 }, { "epoch": 925.921052631579, "grad_norm": 1.1128478050231934, "learning_rate": 0.0001, "loss": 0.0092, "step": 140740 }, { "epoch": 925.9868421052631, "grad_norm": 0.7608397006988525, "learning_rate": 0.0001, "loss": 0.0083, "step": 140750 }, { "epoch": 926.0526315789474, "grad_norm": 1.069777250289917, "learning_rate": 0.0001, "loss": 0.009, "step": 140760 }, { "epoch": 926.1184210526316, "grad_norm": 1.0431427955627441, "learning_rate": 0.0001, "loss": 0.009, "step": 140770 }, { "epoch": 926.1842105263158, "grad_norm": 1.0773409605026245, "learning_rate": 0.0001, "loss": 0.008, "step": 140780 }, { "epoch": 926.25, "grad_norm": 1.208251714706421, "learning_rate": 0.0001, "loss": 0.0122, "step": 140790 }, { "epoch": 926.3157894736842, "grad_norm": 1.0173733234405518, "learning_rate": 0.0001, "loss": 0.0101, "step": 140800 }, { "epoch": 926.3815789473684, "grad_norm": 0.7675154209136963, "learning_rate": 0.0001, "loss": 0.0088, "step": 140810 }, { "epoch": 926.4473684210526, "grad_norm": 1.237313985824585, "learning_rate": 0.0001, "loss": 0.0114, "step": 140820 }, { "epoch": 926.5131578947369, "grad_norm": 0.9644690752029419, "learning_rate": 0.0001, "loss": 0.0095, "step": 140830 }, { "epoch": 926.578947368421, "grad_norm": 1.1485686302185059, "learning_rate": 0.0001, "loss": 0.0095, "step": 140840 }, { "epoch": 926.6447368421053, "grad_norm": 1.82488214969635, "learning_rate": 0.0001, "loss": 0.0089, "step": 140850 }, { "epoch": 926.7105263157895, "grad_norm": 1.2388635873794556, "learning_rate": 0.0001, "loss": 0.011, "step": 140860 }, { "epoch": 926.7763157894736, "grad_norm": 1.3352988958358765, "learning_rate": 0.0001, "loss": 0.0102, "step": 140870 }, { "epoch": 926.8421052631579, "grad_norm": 1.1297030448913574, "learning_rate": 0.0001, "loss": 0.0092, "step": 140880 }, { "epoch": 926.9078947368421, "grad_norm": 1.162842869758606, "learning_rate": 0.0001, "loss": 0.0091, "step": 140890 }, { "epoch": 926.9736842105264, "grad_norm": 1.2064037322998047, "learning_rate": 0.0001, "loss": 0.0095, "step": 140900 }, { "epoch": 927.0394736842105, "grad_norm": 1.08427894115448, "learning_rate": 0.0001, "loss": 0.0098, "step": 140910 }, { "epoch": 927.1052631578947, "grad_norm": 1.1430708169937134, "learning_rate": 0.0001, "loss": 0.0107, "step": 140920 }, { "epoch": 927.171052631579, "grad_norm": 1.0991872549057007, "learning_rate": 0.0001, "loss": 0.009, "step": 140930 }, { "epoch": 927.2368421052631, "grad_norm": 1.4145089387893677, "learning_rate": 0.0001, "loss": 0.0095, "step": 140940 }, { "epoch": 927.3026315789474, "grad_norm": 0.8903805613517761, "learning_rate": 0.0001, "loss": 0.0087, "step": 140950 }, { "epoch": 927.3684210526316, "grad_norm": 1.02116858959198, "learning_rate": 0.0001, "loss": 0.0105, "step": 140960 }, { "epoch": 927.4342105263158, "grad_norm": 1.196993350982666, "learning_rate": 0.0001, "loss": 0.0095, "step": 140970 }, { "epoch": 927.5, "grad_norm": 1.419411063194275, "learning_rate": 0.0001, "loss": 0.0091, "step": 140980 }, { "epoch": 927.5657894736842, "grad_norm": 1.4672049283981323, "learning_rate": 0.0001, "loss": 0.0085, "step": 140990 }, { "epoch": 927.6315789473684, "grad_norm": 0.8084446787834167, "learning_rate": 0.0001, "loss": 0.0104, "step": 141000 }, { "epoch": 927.6973684210526, "grad_norm": 1.2789831161499023, "learning_rate": 0.0001, "loss": 0.0093, "step": 141010 }, { "epoch": 927.7631578947369, "grad_norm": 0.6588245630264282, "learning_rate": 0.0001, "loss": 0.0109, "step": 141020 }, { "epoch": 927.828947368421, "grad_norm": 0.9887574911117554, "learning_rate": 0.0001, "loss": 0.0103, "step": 141030 }, { "epoch": 927.8947368421053, "grad_norm": 0.8936231136322021, "learning_rate": 0.0001, "loss": 0.0105, "step": 141040 }, { "epoch": 927.9605263157895, "grad_norm": 0.9490195512771606, "learning_rate": 0.0001, "loss": 0.0094, "step": 141050 }, { "epoch": 928.0263157894736, "grad_norm": 1.2485547065734863, "learning_rate": 0.0001, "loss": 0.0092, "step": 141060 }, { "epoch": 928.0921052631579, "grad_norm": 1.1075419187545776, "learning_rate": 0.0001, "loss": 0.011, "step": 141070 }, { "epoch": 928.1578947368421, "grad_norm": 1.1165497303009033, "learning_rate": 0.0001, "loss": 0.009, "step": 141080 }, { "epoch": 928.2236842105264, "grad_norm": 1.352881669998169, "learning_rate": 0.0001, "loss": 0.0093, "step": 141090 }, { "epoch": 928.2894736842105, "grad_norm": 1.4939100742340088, "learning_rate": 0.0001, "loss": 0.0084, "step": 141100 }, { "epoch": 928.3552631578947, "grad_norm": 1.2259972095489502, "learning_rate": 0.0001, "loss": 0.0096, "step": 141110 }, { "epoch": 928.421052631579, "grad_norm": 0.6991260647773743, "learning_rate": 0.0001, "loss": 0.0096, "step": 141120 }, { "epoch": 928.4868421052631, "grad_norm": 0.9250067472457886, "learning_rate": 0.0001, "loss": 0.0094, "step": 141130 }, { "epoch": 928.5526315789474, "grad_norm": 1.0126291513442993, "learning_rate": 0.0001, "loss": 0.0107, "step": 141140 }, { "epoch": 928.6184210526316, "grad_norm": 1.092654824256897, "learning_rate": 0.0001, "loss": 0.0126, "step": 141150 }, { "epoch": 928.6842105263158, "grad_norm": 1.143467903137207, "learning_rate": 0.0001, "loss": 0.0097, "step": 141160 }, { "epoch": 928.75, "grad_norm": 1.1244598627090454, "learning_rate": 0.0001, "loss": 0.0094, "step": 141170 }, { "epoch": 928.8157894736842, "grad_norm": 0.8869340419769287, "learning_rate": 0.0001, "loss": 0.011, "step": 141180 }, { "epoch": 928.8815789473684, "grad_norm": 1.0493642091751099, "learning_rate": 0.0001, "loss": 0.0081, "step": 141190 }, { "epoch": 928.9473684210526, "grad_norm": 1.048553228378296, "learning_rate": 0.0001, "loss": 0.0099, "step": 141200 }, { "epoch": 929.0131578947369, "grad_norm": 1.0063235759735107, "learning_rate": 0.0001, "loss": 0.0082, "step": 141210 }, { "epoch": 929.078947368421, "grad_norm": 0.803489625453949, "learning_rate": 0.0001, "loss": 0.0102, "step": 141220 }, { "epoch": 929.1447368421053, "grad_norm": 0.7754166126251221, "learning_rate": 0.0001, "loss": 0.0119, "step": 141230 }, { "epoch": 929.2105263157895, "grad_norm": 1.123893141746521, "learning_rate": 0.0001, "loss": 0.0093, "step": 141240 }, { "epoch": 929.2763157894736, "grad_norm": 0.8346624970436096, "learning_rate": 0.0001, "loss": 0.0089, "step": 141250 }, { "epoch": 929.3421052631579, "grad_norm": 1.2501662969589233, "learning_rate": 0.0001, "loss": 0.0082, "step": 141260 }, { "epoch": 929.4078947368421, "grad_norm": 0.9028252959251404, "learning_rate": 0.0001, "loss": 0.0109, "step": 141270 }, { "epoch": 929.4736842105264, "grad_norm": 1.1409492492675781, "learning_rate": 0.0001, "loss": 0.0098, "step": 141280 }, { "epoch": 929.5394736842105, "grad_norm": 0.795866847038269, "learning_rate": 0.0001, "loss": 0.0089, "step": 141290 }, { "epoch": 929.6052631578947, "grad_norm": 1.0980581045150757, "learning_rate": 0.0001, "loss": 0.0121, "step": 141300 }, { "epoch": 929.671052631579, "grad_norm": 0.9573304057121277, "learning_rate": 0.0001, "loss": 0.009, "step": 141310 }, { "epoch": 929.7368421052631, "grad_norm": 0.9308114051818848, "learning_rate": 0.0001, "loss": 0.0108, "step": 141320 }, { "epoch": 929.8026315789474, "grad_norm": 1.13672935962677, "learning_rate": 0.0001, "loss": 0.0092, "step": 141330 }, { "epoch": 929.8684210526316, "grad_norm": 1.0716716051101685, "learning_rate": 0.0001, "loss": 0.0107, "step": 141340 }, { "epoch": 929.9342105263158, "grad_norm": 0.8969205617904663, "learning_rate": 0.0001, "loss": 0.0081, "step": 141350 }, { "epoch": 930.0, "grad_norm": 0.7731922268867493, "learning_rate": 0.0001, "loss": 0.0097, "step": 141360 }, { "epoch": 930.0657894736842, "grad_norm": 1.054961085319519, "learning_rate": 0.0001, "loss": 0.0113, "step": 141370 }, { "epoch": 930.1315789473684, "grad_norm": 1.0062198638916016, "learning_rate": 0.0001, "loss": 0.0108, "step": 141380 }, { "epoch": 930.1973684210526, "grad_norm": 0.8831532597541809, "learning_rate": 0.0001, "loss": 0.0125, "step": 141390 }, { "epoch": 930.2631578947369, "grad_norm": 0.94657963514328, "learning_rate": 0.0001, "loss": 0.0094, "step": 141400 }, { "epoch": 930.328947368421, "grad_norm": 0.7149097919464111, "learning_rate": 0.0001, "loss": 0.0101, "step": 141410 }, { "epoch": 930.3947368421053, "grad_norm": 1.1722291707992554, "learning_rate": 0.0001, "loss": 0.0087, "step": 141420 }, { "epoch": 930.4605263157895, "grad_norm": 1.2141605615615845, "learning_rate": 0.0001, "loss": 0.0096, "step": 141430 }, { "epoch": 930.5263157894736, "grad_norm": 1.0134645700454712, "learning_rate": 0.0001, "loss": 0.0102, "step": 141440 }, { "epoch": 930.5921052631579, "grad_norm": 1.4195690155029297, "learning_rate": 0.0001, "loss": 0.0091, "step": 141450 }, { "epoch": 930.6578947368421, "grad_norm": 1.0043601989746094, "learning_rate": 0.0001, "loss": 0.009, "step": 141460 }, { "epoch": 930.7236842105264, "grad_norm": 0.9379559755325317, "learning_rate": 0.0001, "loss": 0.0101, "step": 141470 }, { "epoch": 930.7894736842105, "grad_norm": 1.1322745084762573, "learning_rate": 0.0001, "loss": 0.0094, "step": 141480 }, { "epoch": 930.8552631578947, "grad_norm": 1.1862339973449707, "learning_rate": 0.0001, "loss": 0.0081, "step": 141490 }, { "epoch": 930.921052631579, "grad_norm": 1.0361219644546509, "learning_rate": 0.0001, "loss": 0.0087, "step": 141500 }, { "epoch": 930.9868421052631, "grad_norm": 1.4550143480300903, "learning_rate": 0.0001, "loss": 0.0113, "step": 141510 }, { "epoch": 931.0526315789474, "grad_norm": 1.0843679904937744, "learning_rate": 0.0001, "loss": 0.0116, "step": 141520 }, { "epoch": 931.1184210526316, "grad_norm": 1.4889967441558838, "learning_rate": 0.0001, "loss": 0.013, "step": 141530 }, { "epoch": 931.1842105263158, "grad_norm": 1.130838394165039, "learning_rate": 0.0001, "loss": 0.009, "step": 141540 }, { "epoch": 931.25, "grad_norm": 1.1887781620025635, "learning_rate": 0.0001, "loss": 0.0105, "step": 141550 }, { "epoch": 931.3157894736842, "grad_norm": 0.8891682028770447, "learning_rate": 0.0001, "loss": 0.0077, "step": 141560 }, { "epoch": 931.3815789473684, "grad_norm": 1.069966197013855, "learning_rate": 0.0001, "loss": 0.0081, "step": 141570 }, { "epoch": 931.4473684210526, "grad_norm": 1.2888097763061523, "learning_rate": 0.0001, "loss": 0.0102, "step": 141580 }, { "epoch": 931.5131578947369, "grad_norm": 0.7364023327827454, "learning_rate": 0.0001, "loss": 0.0083, "step": 141590 }, { "epoch": 931.578947368421, "grad_norm": 1.0230684280395508, "learning_rate": 0.0001, "loss": 0.0113, "step": 141600 }, { "epoch": 931.6447368421053, "grad_norm": 1.1566507816314697, "learning_rate": 0.0001, "loss": 0.0109, "step": 141610 }, { "epoch": 931.7105263157895, "grad_norm": 1.0186455249786377, "learning_rate": 0.0001, "loss": 0.0087, "step": 141620 }, { "epoch": 931.7763157894736, "grad_norm": 1.209118127822876, "learning_rate": 0.0001, "loss": 0.0105, "step": 141630 }, { "epoch": 931.8421052631579, "grad_norm": 0.766610324382782, "learning_rate": 0.0001, "loss": 0.0096, "step": 141640 }, { "epoch": 931.9078947368421, "grad_norm": 1.0066529512405396, "learning_rate": 0.0001, "loss": 0.0083, "step": 141650 }, { "epoch": 931.9736842105264, "grad_norm": 0.9950891137123108, "learning_rate": 0.0001, "loss": 0.0099, "step": 141660 }, { "epoch": 932.0394736842105, "grad_norm": 1.1890411376953125, "learning_rate": 0.0001, "loss": 0.0106, "step": 141670 }, { "epoch": 932.1052631578947, "grad_norm": 0.9757727384567261, "learning_rate": 0.0001, "loss": 0.0112, "step": 141680 }, { "epoch": 932.171052631579, "grad_norm": 0.7399367690086365, "learning_rate": 0.0001, "loss": 0.01, "step": 141690 }, { "epoch": 932.2368421052631, "grad_norm": 0.8191750645637512, "learning_rate": 0.0001, "loss": 0.0083, "step": 141700 }, { "epoch": 932.3026315789474, "grad_norm": 1.0755620002746582, "learning_rate": 0.0001, "loss": 0.0095, "step": 141710 }, { "epoch": 932.3684210526316, "grad_norm": 1.099184513092041, "learning_rate": 0.0001, "loss": 0.0085, "step": 141720 }, { "epoch": 932.4342105263158, "grad_norm": 0.948433518409729, "learning_rate": 0.0001, "loss": 0.0101, "step": 141730 }, { "epoch": 932.5, "grad_norm": 1.2848048210144043, "learning_rate": 0.0001, "loss": 0.0103, "step": 141740 }, { "epoch": 932.5657894736842, "grad_norm": 1.0524227619171143, "learning_rate": 0.0001, "loss": 0.0094, "step": 141750 }, { "epoch": 932.6315789473684, "grad_norm": 1.1864573955535889, "learning_rate": 0.0001, "loss": 0.0104, "step": 141760 }, { "epoch": 932.6973684210526, "grad_norm": 1.1344093084335327, "learning_rate": 0.0001, "loss": 0.011, "step": 141770 }, { "epoch": 932.7631578947369, "grad_norm": 1.0128337144851685, "learning_rate": 0.0001, "loss": 0.0089, "step": 141780 }, { "epoch": 932.828947368421, "grad_norm": 0.9922648668289185, "learning_rate": 0.0001, "loss": 0.0086, "step": 141790 }, { "epoch": 932.8947368421053, "grad_norm": 1.1825958490371704, "learning_rate": 0.0001, "loss": 0.0094, "step": 141800 }, { "epoch": 932.9605263157895, "grad_norm": 1.0141911506652832, "learning_rate": 0.0001, "loss": 0.0112, "step": 141810 }, { "epoch": 933.0263157894736, "grad_norm": 1.0173567533493042, "learning_rate": 0.0001, "loss": 0.0098, "step": 141820 }, { "epoch": 933.0921052631579, "grad_norm": 1.0617711544036865, "learning_rate": 0.0001, "loss": 0.0087, "step": 141830 }, { "epoch": 933.1578947368421, "grad_norm": 1.0690877437591553, "learning_rate": 0.0001, "loss": 0.0091, "step": 141840 }, { "epoch": 933.2236842105264, "grad_norm": 0.7836157083511353, "learning_rate": 0.0001, "loss": 0.0104, "step": 141850 }, { "epoch": 933.2894736842105, "grad_norm": 0.8588806986808777, "learning_rate": 0.0001, "loss": 0.0083, "step": 141860 }, { "epoch": 933.3552631578947, "grad_norm": 0.8288969993591309, "learning_rate": 0.0001, "loss": 0.009, "step": 141870 }, { "epoch": 933.421052631579, "grad_norm": 0.8952768445014954, "learning_rate": 0.0001, "loss": 0.0099, "step": 141880 }, { "epoch": 933.4868421052631, "grad_norm": 0.7412847280502319, "learning_rate": 0.0001, "loss": 0.0106, "step": 141890 }, { "epoch": 933.5526315789474, "grad_norm": 0.5983102321624756, "learning_rate": 0.0001, "loss": 0.01, "step": 141900 }, { "epoch": 933.6184210526316, "grad_norm": 0.8310821652412415, "learning_rate": 0.0001, "loss": 0.0111, "step": 141910 }, { "epoch": 933.6842105263158, "grad_norm": 1.0020363330841064, "learning_rate": 0.0001, "loss": 0.0112, "step": 141920 }, { "epoch": 933.75, "grad_norm": 0.9288676381111145, "learning_rate": 0.0001, "loss": 0.0096, "step": 141930 }, { "epoch": 933.8157894736842, "grad_norm": 0.8493418097496033, "learning_rate": 0.0001, "loss": 0.009, "step": 141940 }, { "epoch": 933.8815789473684, "grad_norm": 1.2082266807556152, "learning_rate": 0.0001, "loss": 0.0083, "step": 141950 }, { "epoch": 933.9473684210526, "grad_norm": 1.1100561618804932, "learning_rate": 0.0001, "loss": 0.0096, "step": 141960 }, { "epoch": 934.0131578947369, "grad_norm": 1.3064969778060913, "learning_rate": 0.0001, "loss": 0.0092, "step": 141970 }, { "epoch": 934.078947368421, "grad_norm": 1.045897126197815, "learning_rate": 0.0001, "loss": 0.0101, "step": 141980 }, { "epoch": 934.1447368421053, "grad_norm": 1.0338056087493896, "learning_rate": 0.0001, "loss": 0.0093, "step": 141990 }, { "epoch": 934.2105263157895, "grad_norm": 0.882927656173706, "learning_rate": 0.0001, "loss": 0.0101, "step": 142000 }, { "epoch": 934.2763157894736, "grad_norm": 0.8924429416656494, "learning_rate": 0.0001, "loss": 0.0106, "step": 142010 }, { "epoch": 934.3421052631579, "grad_norm": 1.2137278318405151, "learning_rate": 0.0001, "loss": 0.0093, "step": 142020 }, { "epoch": 934.4078947368421, "grad_norm": 1.2545146942138672, "learning_rate": 0.0001, "loss": 0.0113, "step": 142030 }, { "epoch": 934.4736842105264, "grad_norm": 1.1060088872909546, "learning_rate": 0.0001, "loss": 0.0094, "step": 142040 }, { "epoch": 934.5394736842105, "grad_norm": 0.8980741500854492, "learning_rate": 0.0001, "loss": 0.01, "step": 142050 }, { "epoch": 934.6052631578947, "grad_norm": 1.3434455394744873, "learning_rate": 0.0001, "loss": 0.0093, "step": 142060 }, { "epoch": 934.671052631579, "grad_norm": 1.541412591934204, "learning_rate": 0.0001, "loss": 0.0085, "step": 142070 }, { "epoch": 934.7368421052631, "grad_norm": 0.7517480850219727, "learning_rate": 0.0001, "loss": 0.0111, "step": 142080 }, { "epoch": 934.8026315789474, "grad_norm": 0.9728532433509827, "learning_rate": 0.0001, "loss": 0.0104, "step": 142090 }, { "epoch": 934.8684210526316, "grad_norm": 0.956839382648468, "learning_rate": 0.0001, "loss": 0.0089, "step": 142100 }, { "epoch": 934.9342105263158, "grad_norm": 0.6664096117019653, "learning_rate": 0.0001, "loss": 0.0085, "step": 142110 }, { "epoch": 935.0, "grad_norm": 0.6346357464790344, "learning_rate": 0.0001, "loss": 0.0084, "step": 142120 }, { "epoch": 935.0657894736842, "grad_norm": 0.8186964392662048, "learning_rate": 0.0001, "loss": 0.0087, "step": 142130 }, { "epoch": 935.1315789473684, "grad_norm": 1.007627248764038, "learning_rate": 0.0001, "loss": 0.0091, "step": 142140 }, { "epoch": 935.1973684210526, "grad_norm": 0.8972962498664856, "learning_rate": 0.0001, "loss": 0.0102, "step": 142150 }, { "epoch": 935.2631578947369, "grad_norm": 0.863339364528656, "learning_rate": 0.0001, "loss": 0.0089, "step": 142160 }, { "epoch": 935.328947368421, "grad_norm": 1.014875054359436, "learning_rate": 0.0001, "loss": 0.0088, "step": 142170 }, { "epoch": 935.3947368421053, "grad_norm": 1.0720738172531128, "learning_rate": 0.0001, "loss": 0.0098, "step": 142180 }, { "epoch": 935.4605263157895, "grad_norm": 0.8214248418807983, "learning_rate": 0.0001, "loss": 0.0113, "step": 142190 }, { "epoch": 935.5263157894736, "grad_norm": 0.9831026792526245, "learning_rate": 0.0001, "loss": 0.0102, "step": 142200 }, { "epoch": 935.5921052631579, "grad_norm": 1.0691466331481934, "learning_rate": 0.0001, "loss": 0.0111, "step": 142210 }, { "epoch": 935.6578947368421, "grad_norm": 1.3758347034454346, "learning_rate": 0.0001, "loss": 0.0088, "step": 142220 }, { "epoch": 935.7236842105264, "grad_norm": 1.7172006368637085, "learning_rate": 0.0001, "loss": 0.0107, "step": 142230 }, { "epoch": 935.7894736842105, "grad_norm": 1.1777650117874146, "learning_rate": 0.0001, "loss": 0.0108, "step": 142240 }, { "epoch": 935.8552631578947, "grad_norm": 1.176995038986206, "learning_rate": 0.0001, "loss": 0.0099, "step": 142250 }, { "epoch": 935.921052631579, "grad_norm": 1.0358529090881348, "learning_rate": 0.0001, "loss": 0.0109, "step": 142260 }, { "epoch": 935.9868421052631, "grad_norm": 1.0105960369110107, "learning_rate": 0.0001, "loss": 0.0109, "step": 142270 }, { "epoch": 936.0526315789474, "grad_norm": 1.2849767208099365, "learning_rate": 0.0001, "loss": 0.0103, "step": 142280 }, { "epoch": 936.1184210526316, "grad_norm": 1.315524935722351, "learning_rate": 0.0001, "loss": 0.0119, "step": 142290 }, { "epoch": 936.1842105263158, "grad_norm": 1.1446727514266968, "learning_rate": 0.0001, "loss": 0.0091, "step": 142300 }, { "epoch": 936.25, "grad_norm": 1.1680877208709717, "learning_rate": 0.0001, "loss": 0.0095, "step": 142310 }, { "epoch": 936.3157894736842, "grad_norm": 1.1997969150543213, "learning_rate": 0.0001, "loss": 0.0107, "step": 142320 }, { "epoch": 936.3815789473684, "grad_norm": 1.043163537979126, "learning_rate": 0.0001, "loss": 0.0109, "step": 142330 }, { "epoch": 936.4473684210526, "grad_norm": 1.2314527034759521, "learning_rate": 0.0001, "loss": 0.0114, "step": 142340 }, { "epoch": 936.5131578947369, "grad_norm": 0.8051645755767822, "learning_rate": 0.0001, "loss": 0.0111, "step": 142350 }, { "epoch": 936.578947368421, "grad_norm": 0.8518595099449158, "learning_rate": 0.0001, "loss": 0.0087, "step": 142360 }, { "epoch": 936.6447368421053, "grad_norm": 1.2750550508499146, "learning_rate": 0.0001, "loss": 0.0119, "step": 142370 }, { "epoch": 936.7105263157895, "grad_norm": 0.7354443073272705, "learning_rate": 0.0001, "loss": 0.0093, "step": 142380 }, { "epoch": 936.7763157894736, "grad_norm": 0.8145555853843689, "learning_rate": 0.0001, "loss": 0.0104, "step": 142390 }, { "epoch": 936.8421052631579, "grad_norm": 1.1294350624084473, "learning_rate": 0.0001, "loss": 0.0095, "step": 142400 }, { "epoch": 936.9078947368421, "grad_norm": 0.8376908302307129, "learning_rate": 0.0001, "loss": 0.0104, "step": 142410 }, { "epoch": 936.9736842105264, "grad_norm": 0.9719381332397461, "learning_rate": 0.0001, "loss": 0.0098, "step": 142420 }, { "epoch": 937.0394736842105, "grad_norm": 0.9003921747207642, "learning_rate": 0.0001, "loss": 0.01, "step": 142430 }, { "epoch": 937.1052631578947, "grad_norm": 1.2315677404403687, "learning_rate": 0.0001, "loss": 0.0094, "step": 142440 }, { "epoch": 937.171052631579, "grad_norm": 1.2337232828140259, "learning_rate": 0.0001, "loss": 0.0113, "step": 142450 }, { "epoch": 937.2368421052631, "grad_norm": 0.866338312625885, "learning_rate": 0.0001, "loss": 0.0116, "step": 142460 }, { "epoch": 937.3026315789474, "grad_norm": 1.071451187133789, "learning_rate": 0.0001, "loss": 0.01, "step": 142470 }, { "epoch": 937.3684210526316, "grad_norm": 1.0966578722000122, "learning_rate": 0.0001, "loss": 0.0086, "step": 142480 }, { "epoch": 937.4342105263158, "grad_norm": 0.882290780544281, "learning_rate": 0.0001, "loss": 0.0105, "step": 142490 }, { "epoch": 937.5, "grad_norm": 0.9930370450019836, "learning_rate": 0.0001, "loss": 0.0102, "step": 142500 }, { "epoch": 937.5657894736842, "grad_norm": 0.729529857635498, "learning_rate": 0.0001, "loss": 0.0098, "step": 142510 }, { "epoch": 937.6315789473684, "grad_norm": 0.8702946305274963, "learning_rate": 0.0001, "loss": 0.0104, "step": 142520 }, { "epoch": 937.6973684210526, "grad_norm": 0.8152609467506409, "learning_rate": 0.0001, "loss": 0.0094, "step": 142530 }, { "epoch": 937.7631578947369, "grad_norm": 1.1005128622055054, "learning_rate": 0.0001, "loss": 0.0118, "step": 142540 }, { "epoch": 937.828947368421, "grad_norm": 1.302595615386963, "learning_rate": 0.0001, "loss": 0.0094, "step": 142550 }, { "epoch": 937.8947368421053, "grad_norm": 1.0444657802581787, "learning_rate": 0.0001, "loss": 0.0097, "step": 142560 }, { "epoch": 937.9605263157895, "grad_norm": 0.947675883769989, "learning_rate": 0.0001, "loss": 0.0093, "step": 142570 }, { "epoch": 938.0263157894736, "grad_norm": 0.942642867565155, "learning_rate": 0.0001, "loss": 0.0107, "step": 142580 }, { "epoch": 938.0921052631579, "grad_norm": 1.1774204969406128, "learning_rate": 0.0001, "loss": 0.0088, "step": 142590 }, { "epoch": 938.1578947368421, "grad_norm": 1.1025643348693848, "learning_rate": 0.0001, "loss": 0.0087, "step": 142600 }, { "epoch": 938.2236842105264, "grad_norm": 1.1018234491348267, "learning_rate": 0.0001, "loss": 0.0102, "step": 142610 }, { "epoch": 938.2894736842105, "grad_norm": 1.5480759143829346, "learning_rate": 0.0001, "loss": 0.0099, "step": 142620 }, { "epoch": 938.3552631578947, "grad_norm": 1.1027711629867554, "learning_rate": 0.0001, "loss": 0.011, "step": 142630 }, { "epoch": 938.421052631579, "grad_norm": 0.9052314162254333, "learning_rate": 0.0001, "loss": 0.0097, "step": 142640 }, { "epoch": 938.4868421052631, "grad_norm": 1.1163532733917236, "learning_rate": 0.0001, "loss": 0.0106, "step": 142650 }, { "epoch": 938.5526315789474, "grad_norm": 1.5282114744186401, "learning_rate": 0.0001, "loss": 0.0102, "step": 142660 }, { "epoch": 938.6184210526316, "grad_norm": 1.0808197259902954, "learning_rate": 0.0001, "loss": 0.0109, "step": 142670 }, { "epoch": 938.6842105263158, "grad_norm": 0.9166187644004822, "learning_rate": 0.0001, "loss": 0.0084, "step": 142680 }, { "epoch": 938.75, "grad_norm": 1.2333557605743408, "learning_rate": 0.0001, "loss": 0.0096, "step": 142690 }, { "epoch": 938.8157894736842, "grad_norm": 1.2331442832946777, "learning_rate": 0.0001, "loss": 0.0111, "step": 142700 }, { "epoch": 938.8815789473684, "grad_norm": 0.7584700584411621, "learning_rate": 0.0001, "loss": 0.0096, "step": 142710 }, { "epoch": 938.9473684210526, "grad_norm": 0.8076496720314026, "learning_rate": 0.0001, "loss": 0.009, "step": 142720 }, { "epoch": 939.0131578947369, "grad_norm": 1.1252182722091675, "learning_rate": 0.0001, "loss": 0.0092, "step": 142730 }, { "epoch": 939.078947368421, "grad_norm": 0.7238128185272217, "learning_rate": 0.0001, "loss": 0.0108, "step": 142740 }, { "epoch": 939.1447368421053, "grad_norm": 0.9610776305198669, "learning_rate": 0.0001, "loss": 0.0091, "step": 142750 }, { "epoch": 939.2105263157895, "grad_norm": 1.0641119480133057, "learning_rate": 0.0001, "loss": 0.0113, "step": 142760 }, { "epoch": 939.2763157894736, "grad_norm": 1.4816539287567139, "learning_rate": 0.0001, "loss": 0.0103, "step": 142770 }, { "epoch": 939.3421052631579, "grad_norm": 1.748676061630249, "learning_rate": 0.0001, "loss": 0.0113, "step": 142780 }, { "epoch": 939.4078947368421, "grad_norm": 1.8980330228805542, "learning_rate": 0.0001, "loss": 0.0146, "step": 142790 }, { "epoch": 939.4736842105264, "grad_norm": 1.2307404279708862, "learning_rate": 0.0001, "loss": 0.0101, "step": 142800 }, { "epoch": 939.5394736842105, "grad_norm": 1.277494192123413, "learning_rate": 0.0001, "loss": 0.0094, "step": 142810 }, { "epoch": 939.6052631578947, "grad_norm": 1.3489617109298706, "learning_rate": 0.0001, "loss": 0.01, "step": 142820 }, { "epoch": 939.671052631579, "grad_norm": 1.0544668436050415, "learning_rate": 0.0001, "loss": 0.0084, "step": 142830 }, { "epoch": 939.7368421052631, "grad_norm": 1.1810656785964966, "learning_rate": 0.0001, "loss": 0.0079, "step": 142840 }, { "epoch": 939.8026315789474, "grad_norm": 0.905704140663147, "learning_rate": 0.0001, "loss": 0.0097, "step": 142850 }, { "epoch": 939.8684210526316, "grad_norm": 1.0323855876922607, "learning_rate": 0.0001, "loss": 0.0089, "step": 142860 }, { "epoch": 939.9342105263158, "grad_norm": 1.2508949041366577, "learning_rate": 0.0001, "loss": 0.0096, "step": 142870 }, { "epoch": 940.0, "grad_norm": 1.1149414777755737, "learning_rate": 0.0001, "loss": 0.0089, "step": 142880 }, { "epoch": 940.0657894736842, "grad_norm": 1.280163288116455, "learning_rate": 0.0001, "loss": 0.0087, "step": 142890 }, { "epoch": 940.1315789473684, "grad_norm": 1.270648717880249, "learning_rate": 0.0001, "loss": 0.0094, "step": 142900 }, { "epoch": 940.1973684210526, "grad_norm": 0.8688375949859619, "learning_rate": 0.0001, "loss": 0.0087, "step": 142910 }, { "epoch": 940.2631578947369, "grad_norm": 1.2751940488815308, "learning_rate": 0.0001, "loss": 0.0094, "step": 142920 }, { "epoch": 940.328947368421, "grad_norm": 1.1398617029190063, "learning_rate": 0.0001, "loss": 0.01, "step": 142930 }, { "epoch": 940.3947368421053, "grad_norm": 0.7770438194274902, "learning_rate": 0.0001, "loss": 0.0101, "step": 142940 }, { "epoch": 940.4605263157895, "grad_norm": 1.470767855644226, "learning_rate": 0.0001, "loss": 0.0079, "step": 142950 }, { "epoch": 940.5263157894736, "grad_norm": 1.3184136152267456, "learning_rate": 0.0001, "loss": 0.0093, "step": 142960 }, { "epoch": 940.5921052631579, "grad_norm": 0.9164167642593384, "learning_rate": 0.0001, "loss": 0.0083, "step": 142970 }, { "epoch": 940.6578947368421, "grad_norm": 1.149084210395813, "learning_rate": 0.0001, "loss": 0.0098, "step": 142980 }, { "epoch": 940.7236842105264, "grad_norm": 1.2743107080459595, "learning_rate": 0.0001, "loss": 0.0079, "step": 142990 }, { "epoch": 940.7894736842105, "grad_norm": 0.6566851139068604, "learning_rate": 0.0001, "loss": 0.0097, "step": 143000 }, { "epoch": 940.8552631578947, "grad_norm": 0.6921648979187012, "learning_rate": 0.0001, "loss": 0.0114, "step": 143010 }, { "epoch": 940.921052631579, "grad_norm": 0.9658145904541016, "learning_rate": 0.0001, "loss": 0.0115, "step": 143020 }, { "epoch": 940.9868421052631, "grad_norm": 1.0340406894683838, "learning_rate": 0.0001, "loss": 0.0109, "step": 143030 }, { "epoch": 941.0526315789474, "grad_norm": 1.061692237854004, "learning_rate": 0.0001, "loss": 0.0109, "step": 143040 }, { "epoch": 941.1184210526316, "grad_norm": 0.8725217580795288, "learning_rate": 0.0001, "loss": 0.0102, "step": 143050 }, { "epoch": 941.1842105263158, "grad_norm": 1.1090641021728516, "learning_rate": 0.0001, "loss": 0.009, "step": 143060 }, { "epoch": 941.25, "grad_norm": 1.313765287399292, "learning_rate": 0.0001, "loss": 0.01, "step": 143070 }, { "epoch": 941.3157894736842, "grad_norm": 0.8687812089920044, "learning_rate": 0.0001, "loss": 0.0084, "step": 143080 }, { "epoch": 941.3815789473684, "grad_norm": 1.066633701324463, "learning_rate": 0.0001, "loss": 0.0101, "step": 143090 }, { "epoch": 941.4473684210526, "grad_norm": 1.1810579299926758, "learning_rate": 0.0001, "loss": 0.0095, "step": 143100 }, { "epoch": 941.5131578947369, "grad_norm": 1.057855486869812, "learning_rate": 0.0001, "loss": 0.0081, "step": 143110 }, { "epoch": 941.578947368421, "grad_norm": 0.7218169569969177, "learning_rate": 0.0001, "loss": 0.01, "step": 143120 }, { "epoch": 941.6447368421053, "grad_norm": 1.1620811223983765, "learning_rate": 0.0001, "loss": 0.0094, "step": 143130 }, { "epoch": 941.7105263157895, "grad_norm": 1.2973016500473022, "learning_rate": 0.0001, "loss": 0.009, "step": 143140 }, { "epoch": 941.7763157894736, "grad_norm": 1.3135859966278076, "learning_rate": 0.0001, "loss": 0.0093, "step": 143150 }, { "epoch": 941.8421052631579, "grad_norm": 0.8703665733337402, "learning_rate": 0.0001, "loss": 0.0106, "step": 143160 }, { "epoch": 941.9078947368421, "grad_norm": 0.9815681576728821, "learning_rate": 0.0001, "loss": 0.01, "step": 143170 }, { "epoch": 941.9736842105264, "grad_norm": 1.3823065757751465, "learning_rate": 0.0001, "loss": 0.009, "step": 143180 }, { "epoch": 942.0394736842105, "grad_norm": 0.974730372428894, "learning_rate": 0.0001, "loss": 0.0079, "step": 143190 }, { "epoch": 942.1052631578947, "grad_norm": 0.7570364475250244, "learning_rate": 0.0001, "loss": 0.0079, "step": 143200 }, { "epoch": 942.171052631579, "grad_norm": 1.0894103050231934, "learning_rate": 0.0001, "loss": 0.0096, "step": 143210 }, { "epoch": 942.2368421052631, "grad_norm": 1.1041042804718018, "learning_rate": 0.0001, "loss": 0.0081, "step": 143220 }, { "epoch": 942.3026315789474, "grad_norm": 1.2231684923171997, "learning_rate": 0.0001, "loss": 0.0112, "step": 143230 }, { "epoch": 942.3684210526316, "grad_norm": 1.2694424390792847, "learning_rate": 0.0001, "loss": 0.0092, "step": 143240 }, { "epoch": 942.4342105263158, "grad_norm": 0.8678814172744751, "learning_rate": 0.0001, "loss": 0.0093, "step": 143250 }, { "epoch": 942.5, "grad_norm": 0.9810147285461426, "learning_rate": 0.0001, "loss": 0.0095, "step": 143260 }, { "epoch": 942.5657894736842, "grad_norm": 0.6947391629219055, "learning_rate": 0.0001, "loss": 0.01, "step": 143270 }, { "epoch": 942.6315789473684, "grad_norm": 1.0292843580245972, "learning_rate": 0.0001, "loss": 0.01, "step": 143280 }, { "epoch": 942.6973684210526, "grad_norm": 0.93792724609375, "learning_rate": 0.0001, "loss": 0.0133, "step": 143290 }, { "epoch": 942.7631578947369, "grad_norm": 0.827596127986908, "learning_rate": 0.0001, "loss": 0.0113, "step": 143300 }, { "epoch": 942.828947368421, "grad_norm": 1.1035057306289673, "learning_rate": 0.0001, "loss": 0.0127, "step": 143310 }, { "epoch": 942.8947368421053, "grad_norm": 1.2476816177368164, "learning_rate": 0.0001, "loss": 0.0117, "step": 143320 }, { "epoch": 942.9605263157895, "grad_norm": 1.1506506204605103, "learning_rate": 0.0001, "loss": 0.0107, "step": 143330 }, { "epoch": 943.0263157894736, "grad_norm": 0.9531042575836182, "learning_rate": 0.0001, "loss": 0.0127, "step": 143340 }, { "epoch": 943.0921052631579, "grad_norm": 1.2359414100646973, "learning_rate": 0.0001, "loss": 0.0116, "step": 143350 }, { "epoch": 943.1578947368421, "grad_norm": 1.264983057975769, "learning_rate": 0.0001, "loss": 0.0094, "step": 143360 }, { "epoch": 943.2236842105264, "grad_norm": 1.0347520112991333, "learning_rate": 0.0001, "loss": 0.0097, "step": 143370 }, { "epoch": 943.2894736842105, "grad_norm": 1.0723800659179688, "learning_rate": 0.0001, "loss": 0.011, "step": 143380 }, { "epoch": 943.3552631578947, "grad_norm": 1.2043391466140747, "learning_rate": 0.0001, "loss": 0.0107, "step": 143390 }, { "epoch": 943.421052631579, "grad_norm": 0.9921755790710449, "learning_rate": 0.0001, "loss": 0.0118, "step": 143400 }, { "epoch": 943.4868421052631, "grad_norm": 1.0856060981750488, "learning_rate": 0.0001, "loss": 0.0112, "step": 143410 }, { "epoch": 943.5526315789474, "grad_norm": 1.3713430166244507, "learning_rate": 0.0001, "loss": 0.0116, "step": 143420 }, { "epoch": 943.6184210526316, "grad_norm": 1.2866997718811035, "learning_rate": 0.0001, "loss": 0.0105, "step": 143430 }, { "epoch": 943.6842105263158, "grad_norm": 1.1391348838806152, "learning_rate": 0.0001, "loss": 0.0109, "step": 143440 }, { "epoch": 943.75, "grad_norm": 0.8373425006866455, "learning_rate": 0.0001, "loss": 0.0111, "step": 143450 }, { "epoch": 943.8157894736842, "grad_norm": 0.9597638845443726, "learning_rate": 0.0001, "loss": 0.0115, "step": 143460 }, { "epoch": 943.8815789473684, "grad_norm": 0.904534637928009, "learning_rate": 0.0001, "loss": 0.0096, "step": 143470 }, { "epoch": 943.9473684210526, "grad_norm": 0.8183276057243347, "learning_rate": 0.0001, "loss": 0.0101, "step": 143480 }, { "epoch": 944.0131578947369, "grad_norm": 0.7242514491081238, "learning_rate": 0.0001, "loss": 0.0098, "step": 143490 }, { "epoch": 944.078947368421, "grad_norm": 0.8846292495727539, "learning_rate": 0.0001, "loss": 0.0117, "step": 143500 }, { "epoch": 944.1447368421053, "grad_norm": 0.9731491804122925, "learning_rate": 0.0001, "loss": 0.0094, "step": 143510 }, { "epoch": 944.2105263157895, "grad_norm": 0.6665592193603516, "learning_rate": 0.0001, "loss": 0.01, "step": 143520 }, { "epoch": 944.2763157894736, "grad_norm": 0.75457763671875, "learning_rate": 0.0001, "loss": 0.0091, "step": 143530 }, { "epoch": 944.3421052631579, "grad_norm": 0.9339364767074585, "learning_rate": 0.0001, "loss": 0.0102, "step": 143540 }, { "epoch": 944.4078947368421, "grad_norm": 0.8604110479354858, "learning_rate": 0.0001, "loss": 0.0128, "step": 143550 }, { "epoch": 944.4736842105264, "grad_norm": 1.0775866508483887, "learning_rate": 0.0001, "loss": 0.0098, "step": 143560 }, { "epoch": 944.5394736842105, "grad_norm": 1.1625889539718628, "learning_rate": 0.0001, "loss": 0.0098, "step": 143570 }, { "epoch": 944.6052631578947, "grad_norm": 1.0786641836166382, "learning_rate": 0.0001, "loss": 0.0122, "step": 143580 }, { "epoch": 944.671052631579, "grad_norm": 0.6444934010505676, "learning_rate": 0.0001, "loss": 0.0097, "step": 143590 }, { "epoch": 944.7368421052631, "grad_norm": 0.9392868876457214, "learning_rate": 0.0001, "loss": 0.0116, "step": 143600 }, { "epoch": 944.8026315789474, "grad_norm": 1.0685558319091797, "learning_rate": 0.0001, "loss": 0.0105, "step": 143610 }, { "epoch": 944.8684210526316, "grad_norm": 1.4054182767868042, "learning_rate": 0.0001, "loss": 0.0096, "step": 143620 }, { "epoch": 944.9342105263158, "grad_norm": 1.2101002931594849, "learning_rate": 0.0001, "loss": 0.011, "step": 143630 }, { "epoch": 945.0, "grad_norm": 1.5603113174438477, "learning_rate": 0.0001, "loss": 0.0097, "step": 143640 }, { "epoch": 945.0657894736842, "grad_norm": 1.0184569358825684, "learning_rate": 0.0001, "loss": 0.01, "step": 143650 }, { "epoch": 945.1315789473684, "grad_norm": 0.7555046081542969, "learning_rate": 0.0001, "loss": 0.0106, "step": 143660 }, { "epoch": 945.1973684210526, "grad_norm": 0.8718606233596802, "learning_rate": 0.0001, "loss": 0.0121, "step": 143670 }, { "epoch": 945.2631578947369, "grad_norm": 1.0357540845870972, "learning_rate": 0.0001, "loss": 0.0099, "step": 143680 }, { "epoch": 945.328947368421, "grad_norm": 0.9740762710571289, "learning_rate": 0.0001, "loss": 0.0107, "step": 143690 }, { "epoch": 945.3947368421053, "grad_norm": 0.9810712337493896, "learning_rate": 0.0001, "loss": 0.009, "step": 143700 }, { "epoch": 945.4605263157895, "grad_norm": 0.9435747265815735, "learning_rate": 0.0001, "loss": 0.0085, "step": 143710 }, { "epoch": 945.5263157894736, "grad_norm": 1.1019375324249268, "learning_rate": 0.0001, "loss": 0.0096, "step": 143720 }, { "epoch": 945.5921052631579, "grad_norm": 0.8351660370826721, "learning_rate": 0.0001, "loss": 0.0095, "step": 143730 }, { "epoch": 945.6578947368421, "grad_norm": 0.8502940535545349, "learning_rate": 0.0001, "loss": 0.0099, "step": 143740 }, { "epoch": 945.7236842105264, "grad_norm": 0.7618118524551392, "learning_rate": 0.0001, "loss": 0.0119, "step": 143750 }, { "epoch": 945.7894736842105, "grad_norm": 1.3105641603469849, "learning_rate": 0.0001, "loss": 0.0077, "step": 143760 }, { "epoch": 945.8552631578947, "grad_norm": 0.9641187787055969, "learning_rate": 0.0001, "loss": 0.0103, "step": 143770 }, { "epoch": 945.921052631579, "grad_norm": 1.303466796875, "learning_rate": 0.0001, "loss": 0.011, "step": 143780 }, { "epoch": 945.9868421052631, "grad_norm": 1.175897479057312, "learning_rate": 0.0001, "loss": 0.0112, "step": 143790 }, { "epoch": 946.0526315789474, "grad_norm": 1.1815438270568848, "learning_rate": 0.0001, "loss": 0.0108, "step": 143800 }, { "epoch": 946.1184210526316, "grad_norm": 1.078926920890808, "learning_rate": 0.0001, "loss": 0.0112, "step": 143810 }, { "epoch": 946.1842105263158, "grad_norm": 0.9007603526115417, "learning_rate": 0.0001, "loss": 0.0118, "step": 143820 }, { "epoch": 946.25, "grad_norm": 1.0532934665679932, "learning_rate": 0.0001, "loss": 0.0086, "step": 143830 }, { "epoch": 946.3157894736842, "grad_norm": 1.0850658416748047, "learning_rate": 0.0001, "loss": 0.0112, "step": 143840 }, { "epoch": 946.3815789473684, "grad_norm": 0.9188476800918579, "learning_rate": 0.0001, "loss": 0.0097, "step": 143850 }, { "epoch": 946.4473684210526, "grad_norm": 0.854782223701477, "learning_rate": 0.0001, "loss": 0.01, "step": 143860 }, { "epoch": 946.5131578947369, "grad_norm": 0.5295913815498352, "learning_rate": 0.0001, "loss": 0.0107, "step": 143870 }, { "epoch": 946.578947368421, "grad_norm": 0.8378633856773376, "learning_rate": 0.0001, "loss": 0.0084, "step": 143880 }, { "epoch": 946.6447368421053, "grad_norm": 1.1583908796310425, "learning_rate": 0.0001, "loss": 0.0099, "step": 143890 }, { "epoch": 946.7105263157895, "grad_norm": 0.8956261873245239, "learning_rate": 0.0001, "loss": 0.0113, "step": 143900 }, { "epoch": 946.7763157894736, "grad_norm": 1.1853934526443481, "learning_rate": 0.0001, "loss": 0.0097, "step": 143910 }, { "epoch": 946.8421052631579, "grad_norm": 0.9609845876693726, "learning_rate": 0.0001, "loss": 0.0113, "step": 143920 }, { "epoch": 946.9078947368421, "grad_norm": 0.7970736622810364, "learning_rate": 0.0001, "loss": 0.0107, "step": 143930 }, { "epoch": 946.9736842105264, "grad_norm": 1.046696424484253, "learning_rate": 0.0001, "loss": 0.0091, "step": 143940 }, { "epoch": 947.0394736842105, "grad_norm": 1.5037174224853516, "learning_rate": 0.0001, "loss": 0.0094, "step": 143950 }, { "epoch": 947.1052631578947, "grad_norm": 1.2524503469467163, "learning_rate": 0.0001, "loss": 0.0096, "step": 143960 }, { "epoch": 947.171052631579, "grad_norm": 0.7371581792831421, "learning_rate": 0.0001, "loss": 0.0092, "step": 143970 }, { "epoch": 947.2368421052631, "grad_norm": 0.883450984954834, "learning_rate": 0.0001, "loss": 0.0096, "step": 143980 }, { "epoch": 947.3026315789474, "grad_norm": 1.1274125576019287, "learning_rate": 0.0001, "loss": 0.01, "step": 143990 }, { "epoch": 947.3684210526316, "grad_norm": 1.2218645811080933, "learning_rate": 0.0001, "loss": 0.0089, "step": 144000 }, { "epoch": 947.4342105263158, "grad_norm": 0.9843568205833435, "learning_rate": 0.0001, "loss": 0.0105, "step": 144010 }, { "epoch": 947.5, "grad_norm": 0.8719574213027954, "learning_rate": 0.0001, "loss": 0.0091, "step": 144020 }, { "epoch": 947.5657894736842, "grad_norm": 0.8319658637046814, "learning_rate": 0.0001, "loss": 0.0101, "step": 144030 }, { "epoch": 947.6315789473684, "grad_norm": 0.8840770721435547, "learning_rate": 0.0001, "loss": 0.0085, "step": 144040 }, { "epoch": 947.6973684210526, "grad_norm": 1.1723517179489136, "learning_rate": 0.0001, "loss": 0.0108, "step": 144050 }, { "epoch": 947.7631578947369, "grad_norm": 0.925780177116394, "learning_rate": 0.0001, "loss": 0.0115, "step": 144060 }, { "epoch": 947.828947368421, "grad_norm": 0.7539234161376953, "learning_rate": 0.0001, "loss": 0.0094, "step": 144070 }, { "epoch": 947.8947368421053, "grad_norm": 1.0115832090377808, "learning_rate": 0.0001, "loss": 0.01, "step": 144080 }, { "epoch": 947.9605263157895, "grad_norm": 1.076154112815857, "learning_rate": 0.0001, "loss": 0.0103, "step": 144090 }, { "epoch": 948.0263157894736, "grad_norm": 1.1594144105911255, "learning_rate": 0.0001, "loss": 0.0081, "step": 144100 }, { "epoch": 948.0921052631579, "grad_norm": 0.8752143383026123, "learning_rate": 0.0001, "loss": 0.0081, "step": 144110 }, { "epoch": 948.1578947368421, "grad_norm": 0.816047191619873, "learning_rate": 0.0001, "loss": 0.0098, "step": 144120 }, { "epoch": 948.2236842105264, "grad_norm": 0.9532915949821472, "learning_rate": 0.0001, "loss": 0.0107, "step": 144130 }, { "epoch": 948.2894736842105, "grad_norm": 1.082932710647583, "learning_rate": 0.0001, "loss": 0.0105, "step": 144140 }, { "epoch": 948.3552631578947, "grad_norm": 0.8170779347419739, "learning_rate": 0.0001, "loss": 0.0098, "step": 144150 }, { "epoch": 948.421052631579, "grad_norm": 1.189034104347229, "learning_rate": 0.0001, "loss": 0.0107, "step": 144160 }, { "epoch": 948.4868421052631, "grad_norm": 1.0919978618621826, "learning_rate": 0.0001, "loss": 0.009, "step": 144170 }, { "epoch": 948.5526315789474, "grad_norm": 1.05934476852417, "learning_rate": 0.0001, "loss": 0.0098, "step": 144180 }, { "epoch": 948.6184210526316, "grad_norm": 0.8420997858047485, "learning_rate": 0.0001, "loss": 0.0086, "step": 144190 }, { "epoch": 948.6842105263158, "grad_norm": 1.1011571884155273, "learning_rate": 0.0001, "loss": 0.0114, "step": 144200 }, { "epoch": 948.75, "grad_norm": 0.7747015953063965, "learning_rate": 0.0001, "loss": 0.0109, "step": 144210 }, { "epoch": 948.8157894736842, "grad_norm": 1.1915805339813232, "learning_rate": 0.0001, "loss": 0.0103, "step": 144220 }, { "epoch": 948.8815789473684, "grad_norm": 1.0432209968566895, "learning_rate": 0.0001, "loss": 0.0093, "step": 144230 }, { "epoch": 948.9473684210526, "grad_norm": 0.9377793669700623, "learning_rate": 0.0001, "loss": 0.0115, "step": 144240 }, { "epoch": 949.0131578947369, "grad_norm": 0.8904802203178406, "learning_rate": 0.0001, "loss": 0.0108, "step": 144250 }, { "epoch": 949.078947368421, "grad_norm": 1.0895966291427612, "learning_rate": 0.0001, "loss": 0.0088, "step": 144260 }, { "epoch": 949.1447368421053, "grad_norm": 1.2289841175079346, "learning_rate": 0.0001, "loss": 0.011, "step": 144270 }, { "epoch": 949.2105263157895, "grad_norm": 0.9211072325706482, "learning_rate": 0.0001, "loss": 0.0091, "step": 144280 }, { "epoch": 949.2763157894736, "grad_norm": 1.0456809997558594, "learning_rate": 0.0001, "loss": 0.0083, "step": 144290 }, { "epoch": 949.3421052631579, "grad_norm": 0.9694221019744873, "learning_rate": 0.0001, "loss": 0.0091, "step": 144300 }, { "epoch": 949.4078947368421, "grad_norm": 0.9555386304855347, "learning_rate": 0.0001, "loss": 0.0091, "step": 144310 }, { "epoch": 949.4736842105264, "grad_norm": 0.9614490866661072, "learning_rate": 0.0001, "loss": 0.0092, "step": 144320 }, { "epoch": 949.5394736842105, "grad_norm": 1.557354211807251, "learning_rate": 0.0001, "loss": 0.0107, "step": 144330 }, { "epoch": 949.6052631578947, "grad_norm": 1.0724836587905884, "learning_rate": 0.0001, "loss": 0.0104, "step": 144340 }, { "epoch": 949.671052631579, "grad_norm": 1.0988471508026123, "learning_rate": 0.0001, "loss": 0.0097, "step": 144350 }, { "epoch": 949.7368421052631, "grad_norm": 1.2191790342330933, "learning_rate": 0.0001, "loss": 0.0098, "step": 144360 }, { "epoch": 949.8026315789474, "grad_norm": 1.3018871545791626, "learning_rate": 0.0001, "loss": 0.01, "step": 144370 }, { "epoch": 949.8684210526316, "grad_norm": 0.7598888874053955, "learning_rate": 0.0001, "loss": 0.012, "step": 144380 }, { "epoch": 949.9342105263158, "grad_norm": 0.9216148257255554, "learning_rate": 0.0001, "loss": 0.0096, "step": 144390 }, { "epoch": 950.0, "grad_norm": 1.0104155540466309, "learning_rate": 0.0001, "loss": 0.0116, "step": 144400 }, { "epoch": 950.0657894736842, "grad_norm": 1.0783321857452393, "learning_rate": 0.0001, "loss": 0.0104, "step": 144410 }, { "epoch": 950.1315789473684, "grad_norm": 1.3584924936294556, "learning_rate": 0.0001, "loss": 0.012, "step": 144420 }, { "epoch": 950.1973684210526, "grad_norm": 1.443320393562317, "learning_rate": 0.0001, "loss": 0.0116, "step": 144430 }, { "epoch": 950.2631578947369, "grad_norm": 1.016124963760376, "learning_rate": 0.0001, "loss": 0.0105, "step": 144440 }, { "epoch": 950.328947368421, "grad_norm": 1.2364863157272339, "learning_rate": 0.0001, "loss": 0.0084, "step": 144450 }, { "epoch": 950.3947368421053, "grad_norm": 1.195246696472168, "learning_rate": 0.0001, "loss": 0.0107, "step": 144460 }, { "epoch": 950.4605263157895, "grad_norm": 1.5904250144958496, "learning_rate": 0.0001, "loss": 0.0112, "step": 144470 }, { "epoch": 950.5263157894736, "grad_norm": 1.104093074798584, "learning_rate": 0.0001, "loss": 0.0097, "step": 144480 }, { "epoch": 950.5921052631579, "grad_norm": 1.0603477954864502, "learning_rate": 0.0001, "loss": 0.0095, "step": 144490 }, { "epoch": 950.6578947368421, "grad_norm": 1.1990110874176025, "learning_rate": 0.0001, "loss": 0.0085, "step": 144500 }, { "epoch": 950.7236842105264, "grad_norm": 1.1858569383621216, "learning_rate": 0.0001, "loss": 0.0084, "step": 144510 }, { "epoch": 950.7894736842105, "grad_norm": 0.9741637706756592, "learning_rate": 0.0001, "loss": 0.0093, "step": 144520 }, { "epoch": 950.8552631578947, "grad_norm": 0.9094181656837463, "learning_rate": 0.0001, "loss": 0.0086, "step": 144530 }, { "epoch": 950.921052631579, "grad_norm": 1.022029995918274, "learning_rate": 0.0001, "loss": 0.011, "step": 144540 }, { "epoch": 950.9868421052631, "grad_norm": 0.9624213576316833, "learning_rate": 0.0001, "loss": 0.0105, "step": 144550 }, { "epoch": 951.0526315789474, "grad_norm": 1.2031329870224, "learning_rate": 0.0001, "loss": 0.0104, "step": 144560 }, { "epoch": 951.1184210526316, "grad_norm": 1.1659698486328125, "learning_rate": 0.0001, "loss": 0.0119, "step": 144570 }, { "epoch": 951.1842105263158, "grad_norm": 1.0349775552749634, "learning_rate": 0.0001, "loss": 0.0101, "step": 144580 }, { "epoch": 951.25, "grad_norm": 1.165041208267212, "learning_rate": 0.0001, "loss": 0.01, "step": 144590 }, { "epoch": 951.3157894736842, "grad_norm": 1.2488925457000732, "learning_rate": 0.0001, "loss": 0.0094, "step": 144600 }, { "epoch": 951.3815789473684, "grad_norm": 1.0464668273925781, "learning_rate": 0.0001, "loss": 0.0094, "step": 144610 }, { "epoch": 951.4473684210526, "grad_norm": 0.7799762487411499, "learning_rate": 0.0001, "loss": 0.0089, "step": 144620 }, { "epoch": 951.5131578947369, "grad_norm": 0.6983115077018738, "learning_rate": 0.0001, "loss": 0.0121, "step": 144630 }, { "epoch": 951.578947368421, "grad_norm": 1.1071887016296387, "learning_rate": 0.0001, "loss": 0.0111, "step": 144640 }, { "epoch": 951.6447368421053, "grad_norm": 0.8498964309692383, "learning_rate": 0.0001, "loss": 0.011, "step": 144650 }, { "epoch": 951.7105263157895, "grad_norm": 0.8821257948875427, "learning_rate": 0.0001, "loss": 0.0106, "step": 144660 }, { "epoch": 951.7763157894736, "grad_norm": 0.9334793090820312, "learning_rate": 0.0001, "loss": 0.0115, "step": 144670 }, { "epoch": 951.8421052631579, "grad_norm": 1.2155643701553345, "learning_rate": 0.0001, "loss": 0.0076, "step": 144680 }, { "epoch": 951.9078947368421, "grad_norm": 0.8632481694221497, "learning_rate": 0.0001, "loss": 0.0091, "step": 144690 }, { "epoch": 951.9736842105264, "grad_norm": 1.1241223812103271, "learning_rate": 0.0001, "loss": 0.0102, "step": 144700 }, { "epoch": 952.0394736842105, "grad_norm": 1.0715571641921997, "learning_rate": 0.0001, "loss": 0.0082, "step": 144710 }, { "epoch": 952.1052631578947, "grad_norm": 1.1770998239517212, "learning_rate": 0.0001, "loss": 0.0089, "step": 144720 }, { "epoch": 952.171052631579, "grad_norm": 1.2517147064208984, "learning_rate": 0.0001, "loss": 0.0099, "step": 144730 }, { "epoch": 952.2368421052631, "grad_norm": 1.0367016792297363, "learning_rate": 0.0001, "loss": 0.0096, "step": 144740 }, { "epoch": 952.3026315789474, "grad_norm": 0.8607826232910156, "learning_rate": 0.0001, "loss": 0.0127, "step": 144750 }, { "epoch": 952.3684210526316, "grad_norm": 1.0406628847122192, "learning_rate": 0.0001, "loss": 0.0099, "step": 144760 }, { "epoch": 952.4342105263158, "grad_norm": 0.97654128074646, "learning_rate": 0.0001, "loss": 0.0096, "step": 144770 }, { "epoch": 952.5, "grad_norm": 0.7601302266120911, "learning_rate": 0.0001, "loss": 0.0101, "step": 144780 }, { "epoch": 952.5657894736842, "grad_norm": 0.7820169925689697, "learning_rate": 0.0001, "loss": 0.0099, "step": 144790 }, { "epoch": 952.6315789473684, "grad_norm": 0.8376019597053528, "learning_rate": 0.0001, "loss": 0.0105, "step": 144800 }, { "epoch": 952.6973684210526, "grad_norm": 1.023985505104065, "learning_rate": 0.0001, "loss": 0.01, "step": 144810 }, { "epoch": 952.7631578947369, "grad_norm": 0.743279218673706, "learning_rate": 0.0001, "loss": 0.0096, "step": 144820 }, { "epoch": 952.828947368421, "grad_norm": 1.3411636352539062, "learning_rate": 0.0001, "loss": 0.0102, "step": 144830 }, { "epoch": 952.8947368421053, "grad_norm": 1.0974597930908203, "learning_rate": 0.0001, "loss": 0.0095, "step": 144840 }, { "epoch": 952.9605263157895, "grad_norm": 1.0889559984207153, "learning_rate": 0.0001, "loss": 0.0109, "step": 144850 }, { "epoch": 953.0263157894736, "grad_norm": 1.1542203426361084, "learning_rate": 0.0001, "loss": 0.009, "step": 144860 }, { "epoch": 953.0921052631579, "grad_norm": 1.394636631011963, "learning_rate": 0.0001, "loss": 0.0084, "step": 144870 }, { "epoch": 953.1578947368421, "grad_norm": 1.16937255859375, "learning_rate": 0.0001, "loss": 0.0099, "step": 144880 }, { "epoch": 953.2236842105264, "grad_norm": 1.2651804685592651, "learning_rate": 0.0001, "loss": 0.0124, "step": 144890 }, { "epoch": 953.2894736842105, "grad_norm": 1.2077585458755493, "learning_rate": 0.0001, "loss": 0.0103, "step": 144900 }, { "epoch": 953.3552631578947, "grad_norm": 1.1569517850875854, "learning_rate": 0.0001, "loss": 0.0095, "step": 144910 }, { "epoch": 953.421052631579, "grad_norm": 1.34952712059021, "learning_rate": 0.0001, "loss": 0.0095, "step": 144920 }, { "epoch": 953.4868421052631, "grad_norm": 1.0990208387374878, "learning_rate": 0.0001, "loss": 0.0104, "step": 144930 }, { "epoch": 953.5526315789474, "grad_norm": 0.9614241719245911, "learning_rate": 0.0001, "loss": 0.0085, "step": 144940 }, { "epoch": 953.6184210526316, "grad_norm": 1.349439024925232, "learning_rate": 0.0001, "loss": 0.0086, "step": 144950 }, { "epoch": 953.6842105263158, "grad_norm": 1.501696228981018, "learning_rate": 0.0001, "loss": 0.0097, "step": 144960 }, { "epoch": 953.75, "grad_norm": 1.1203855276107788, "learning_rate": 0.0001, "loss": 0.0078, "step": 144970 }, { "epoch": 953.8157894736842, "grad_norm": 1.2413620948791504, "learning_rate": 0.0001, "loss": 0.0101, "step": 144980 }, { "epoch": 953.8815789473684, "grad_norm": 1.3755046129226685, "learning_rate": 0.0001, "loss": 0.0107, "step": 144990 }, { "epoch": 953.9473684210526, "grad_norm": 1.0295451879501343, "learning_rate": 0.0001, "loss": 0.0097, "step": 145000 }, { "epoch": 954.0131578947369, "grad_norm": 1.3562036752700806, "learning_rate": 0.0001, "loss": 0.0096, "step": 145010 }, { "epoch": 954.078947368421, "grad_norm": 1.4135730266571045, "learning_rate": 0.0001, "loss": 0.0144, "step": 145020 }, { "epoch": 954.1447368421053, "grad_norm": 1.433528184890747, "learning_rate": 0.0001, "loss": 0.0096, "step": 145030 }, { "epoch": 954.2105263157895, "grad_norm": 1.3277394771575928, "learning_rate": 0.0001, "loss": 0.0106, "step": 145040 }, { "epoch": 954.2763157894736, "grad_norm": 0.9119917154312134, "learning_rate": 0.0001, "loss": 0.0089, "step": 145050 }, { "epoch": 954.3421052631579, "grad_norm": 1.1164143085479736, "learning_rate": 0.0001, "loss": 0.0092, "step": 145060 }, { "epoch": 954.4078947368421, "grad_norm": 1.1159082651138306, "learning_rate": 0.0001, "loss": 0.0095, "step": 145070 }, { "epoch": 954.4736842105264, "grad_norm": 1.0839544534683228, "learning_rate": 0.0001, "loss": 0.0092, "step": 145080 }, { "epoch": 954.5394736842105, "grad_norm": 0.9123933911323547, "learning_rate": 0.0001, "loss": 0.0095, "step": 145090 }, { "epoch": 954.6052631578947, "grad_norm": 0.9606594443321228, "learning_rate": 0.0001, "loss": 0.0092, "step": 145100 }, { "epoch": 954.671052631579, "grad_norm": 0.8391530513763428, "learning_rate": 0.0001, "loss": 0.0083, "step": 145110 }, { "epoch": 954.7368421052631, "grad_norm": 0.8009461164474487, "learning_rate": 0.0001, "loss": 0.0086, "step": 145120 }, { "epoch": 954.8026315789474, "grad_norm": 1.1401244401931763, "learning_rate": 0.0001, "loss": 0.0111, "step": 145130 }, { "epoch": 954.8684210526316, "grad_norm": 1.1111904382705688, "learning_rate": 0.0001, "loss": 0.0087, "step": 145140 }, { "epoch": 954.9342105263158, "grad_norm": 0.8661221265792847, "learning_rate": 0.0001, "loss": 0.01, "step": 145150 }, { "epoch": 955.0, "grad_norm": 1.0058561563491821, "learning_rate": 0.0001, "loss": 0.0094, "step": 145160 }, { "epoch": 955.0657894736842, "grad_norm": 1.0411880016326904, "learning_rate": 0.0001, "loss": 0.0092, "step": 145170 }, { "epoch": 955.1315789473684, "grad_norm": 1.146001935005188, "learning_rate": 0.0001, "loss": 0.0104, "step": 145180 }, { "epoch": 955.1973684210526, "grad_norm": 1.115849256515503, "learning_rate": 0.0001, "loss": 0.0093, "step": 145190 }, { "epoch": 955.2631578947369, "grad_norm": 0.991651713848114, "learning_rate": 0.0001, "loss": 0.0101, "step": 145200 }, { "epoch": 955.328947368421, "grad_norm": 1.1527934074401855, "learning_rate": 0.0001, "loss": 0.0098, "step": 145210 }, { "epoch": 955.3947368421053, "grad_norm": 1.2311185598373413, "learning_rate": 0.0001, "loss": 0.0094, "step": 145220 }, { "epoch": 955.4605263157895, "grad_norm": 1.1631335020065308, "learning_rate": 0.0001, "loss": 0.0093, "step": 145230 }, { "epoch": 955.5263157894736, "grad_norm": 0.657287061214447, "learning_rate": 0.0001, "loss": 0.0104, "step": 145240 }, { "epoch": 955.5921052631579, "grad_norm": 0.9889928698539734, "learning_rate": 0.0001, "loss": 0.0108, "step": 145250 }, { "epoch": 955.6578947368421, "grad_norm": 0.9157257676124573, "learning_rate": 0.0001, "loss": 0.0081, "step": 145260 }, { "epoch": 955.7236842105264, "grad_norm": 0.983443558216095, "learning_rate": 0.0001, "loss": 0.011, "step": 145270 }, { "epoch": 955.7894736842105, "grad_norm": 1.1161870956420898, "learning_rate": 0.0001, "loss": 0.0086, "step": 145280 }, { "epoch": 955.8552631578947, "grad_norm": 0.9558767080307007, "learning_rate": 0.0001, "loss": 0.0097, "step": 145290 }, { "epoch": 955.921052631579, "grad_norm": 1.0487935543060303, "learning_rate": 0.0001, "loss": 0.0095, "step": 145300 }, { "epoch": 955.9868421052631, "grad_norm": 0.7724936604499817, "learning_rate": 0.0001, "loss": 0.0088, "step": 145310 }, { "epoch": 956.0526315789474, "grad_norm": 1.0620529651641846, "learning_rate": 0.0001, "loss": 0.0087, "step": 145320 }, { "epoch": 956.1184210526316, "grad_norm": 1.1085588932037354, "learning_rate": 0.0001, "loss": 0.0121, "step": 145330 }, { "epoch": 956.1842105263158, "grad_norm": 1.2441157102584839, "learning_rate": 0.0001, "loss": 0.0095, "step": 145340 }, { "epoch": 956.25, "grad_norm": 1.0353556871414185, "learning_rate": 0.0001, "loss": 0.0087, "step": 145350 }, { "epoch": 956.3157894736842, "grad_norm": 1.4679001569747925, "learning_rate": 0.0001, "loss": 0.0092, "step": 145360 }, { "epoch": 956.3815789473684, "grad_norm": 1.2972333431243896, "learning_rate": 0.0001, "loss": 0.0103, "step": 145370 }, { "epoch": 956.4473684210526, "grad_norm": 0.9384067058563232, "learning_rate": 0.0001, "loss": 0.0099, "step": 145380 }, { "epoch": 956.5131578947369, "grad_norm": 1.0650990009307861, "learning_rate": 0.0001, "loss": 0.0091, "step": 145390 }, { "epoch": 956.578947368421, "grad_norm": 1.5110204219818115, "learning_rate": 0.0001, "loss": 0.0116, "step": 145400 }, { "epoch": 956.6447368421053, "grad_norm": 0.7420325875282288, "learning_rate": 0.0001, "loss": 0.0083, "step": 145410 }, { "epoch": 956.7105263157895, "grad_norm": 1.3069480657577515, "learning_rate": 0.0001, "loss": 0.0097, "step": 145420 }, { "epoch": 956.7763157894736, "grad_norm": 1.2609871625900269, "learning_rate": 0.0001, "loss": 0.0093, "step": 145430 }, { "epoch": 956.8421052631579, "grad_norm": 1.0048247575759888, "learning_rate": 0.0001, "loss": 0.0097, "step": 145440 }, { "epoch": 956.9078947368421, "grad_norm": 1.0545076131820679, "learning_rate": 0.0001, "loss": 0.0101, "step": 145450 }, { "epoch": 956.9736842105264, "grad_norm": 0.8621100187301636, "learning_rate": 0.0001, "loss": 0.009, "step": 145460 }, { "epoch": 957.0394736842105, "grad_norm": 1.0694085359573364, "learning_rate": 0.0001, "loss": 0.0088, "step": 145470 }, { "epoch": 957.1052631578947, "grad_norm": 1.2032498121261597, "learning_rate": 0.0001, "loss": 0.0088, "step": 145480 }, { "epoch": 957.171052631579, "grad_norm": 0.7613986134529114, "learning_rate": 0.0001, "loss": 0.0111, "step": 145490 }, { "epoch": 957.2368421052631, "grad_norm": 0.8016976714134216, "learning_rate": 0.0001, "loss": 0.0089, "step": 145500 }, { "epoch": 957.3026315789474, "grad_norm": 1.1267141103744507, "learning_rate": 0.0001, "loss": 0.0086, "step": 145510 }, { "epoch": 957.3684210526316, "grad_norm": 1.0526936054229736, "learning_rate": 0.0001, "loss": 0.0085, "step": 145520 }, { "epoch": 957.4342105263158, "grad_norm": 1.1312087774276733, "learning_rate": 0.0001, "loss": 0.0084, "step": 145530 }, { "epoch": 957.5, "grad_norm": 0.8225216865539551, "learning_rate": 0.0001, "loss": 0.0079, "step": 145540 }, { "epoch": 957.5657894736842, "grad_norm": 1.3235195875167847, "learning_rate": 0.0001, "loss": 0.0094, "step": 145550 }, { "epoch": 957.6315789473684, "grad_norm": 0.9246519804000854, "learning_rate": 0.0001, "loss": 0.0103, "step": 145560 }, { "epoch": 957.6973684210526, "grad_norm": 1.227251648902893, "learning_rate": 0.0001, "loss": 0.0106, "step": 145570 }, { "epoch": 957.7631578947369, "grad_norm": 1.1541143655776978, "learning_rate": 0.0001, "loss": 0.0088, "step": 145580 }, { "epoch": 957.828947368421, "grad_norm": 1.001869797706604, "learning_rate": 0.0001, "loss": 0.011, "step": 145590 }, { "epoch": 957.8947368421053, "grad_norm": 1.1103819608688354, "learning_rate": 0.0001, "loss": 0.0101, "step": 145600 }, { "epoch": 957.9605263157895, "grad_norm": 1.1512665748596191, "learning_rate": 0.0001, "loss": 0.0088, "step": 145610 }, { "epoch": 958.0263157894736, "grad_norm": 1.135569453239441, "learning_rate": 0.0001, "loss": 0.0106, "step": 145620 }, { "epoch": 958.0921052631579, "grad_norm": 0.9894860982894897, "learning_rate": 0.0001, "loss": 0.0084, "step": 145630 }, { "epoch": 958.1578947368421, "grad_norm": 1.2264050245285034, "learning_rate": 0.0001, "loss": 0.0096, "step": 145640 }, { "epoch": 958.2236842105264, "grad_norm": 0.9081289768218994, "learning_rate": 0.0001, "loss": 0.0111, "step": 145650 }, { "epoch": 958.2894736842105, "grad_norm": 1.1225621700286865, "learning_rate": 0.0001, "loss": 0.0076, "step": 145660 }, { "epoch": 958.3552631578947, "grad_norm": 1.4110243320465088, "learning_rate": 0.0001, "loss": 0.0086, "step": 145670 }, { "epoch": 958.421052631579, "grad_norm": 1.2238539457321167, "learning_rate": 0.0001, "loss": 0.0086, "step": 145680 }, { "epoch": 958.4868421052631, "grad_norm": 0.7691843509674072, "learning_rate": 0.0001, "loss": 0.0087, "step": 145690 }, { "epoch": 958.5526315789474, "grad_norm": 1.0191400051116943, "learning_rate": 0.0001, "loss": 0.0097, "step": 145700 }, { "epoch": 958.6184210526316, "grad_norm": 1.1331918239593506, "learning_rate": 0.0001, "loss": 0.0098, "step": 145710 }, { "epoch": 958.6842105263158, "grad_norm": 1.3987983465194702, "learning_rate": 0.0001, "loss": 0.0103, "step": 145720 }, { "epoch": 958.75, "grad_norm": 1.1458479166030884, "learning_rate": 0.0001, "loss": 0.0092, "step": 145730 }, { "epoch": 958.8157894736842, "grad_norm": 0.9019910097122192, "learning_rate": 0.0001, "loss": 0.0104, "step": 145740 }, { "epoch": 958.8815789473684, "grad_norm": 1.2355693578720093, "learning_rate": 0.0001, "loss": 0.0116, "step": 145750 }, { "epoch": 958.9473684210526, "grad_norm": 1.0789332389831543, "learning_rate": 0.0001, "loss": 0.0098, "step": 145760 }, { "epoch": 959.0131578947369, "grad_norm": 1.0482940673828125, "learning_rate": 0.0001, "loss": 0.0073, "step": 145770 }, { "epoch": 959.078947368421, "grad_norm": 1.026578426361084, "learning_rate": 0.0001, "loss": 0.0087, "step": 145780 }, { "epoch": 959.1447368421053, "grad_norm": 1.144579529762268, "learning_rate": 0.0001, "loss": 0.0093, "step": 145790 }, { "epoch": 959.2105263157895, "grad_norm": 1.0373647212982178, "learning_rate": 0.0001, "loss": 0.0102, "step": 145800 }, { "epoch": 959.2763157894736, "grad_norm": 1.0401736497879028, "learning_rate": 0.0001, "loss": 0.0104, "step": 145810 }, { "epoch": 959.3421052631579, "grad_norm": 1.3450522422790527, "learning_rate": 0.0001, "loss": 0.012, "step": 145820 }, { "epoch": 959.4078947368421, "grad_norm": 0.786882221698761, "learning_rate": 0.0001, "loss": 0.0085, "step": 145830 }, { "epoch": 959.4736842105264, "grad_norm": 1.122706413269043, "learning_rate": 0.0001, "loss": 0.0091, "step": 145840 }, { "epoch": 959.5394736842105, "grad_norm": 1.224389672279358, "learning_rate": 0.0001, "loss": 0.0099, "step": 145850 }, { "epoch": 959.6052631578947, "grad_norm": 0.8540865778923035, "learning_rate": 0.0001, "loss": 0.0102, "step": 145860 }, { "epoch": 959.671052631579, "grad_norm": 1.0747767686843872, "learning_rate": 0.0001, "loss": 0.0094, "step": 145870 }, { "epoch": 959.7368421052631, "grad_norm": 0.7939700484275818, "learning_rate": 0.0001, "loss": 0.0091, "step": 145880 }, { "epoch": 959.8026315789474, "grad_norm": 0.817993700504303, "learning_rate": 0.0001, "loss": 0.0092, "step": 145890 }, { "epoch": 959.8684210526316, "grad_norm": 0.7488849759101868, "learning_rate": 0.0001, "loss": 0.0081, "step": 145900 }, { "epoch": 959.9342105263158, "grad_norm": 1.1702375411987305, "learning_rate": 0.0001, "loss": 0.0093, "step": 145910 }, { "epoch": 960.0, "grad_norm": 1.0733966827392578, "learning_rate": 0.0001, "loss": 0.0089, "step": 145920 }, { "epoch": 960.0657894736842, "grad_norm": 0.836497962474823, "learning_rate": 0.0001, "loss": 0.0099, "step": 145930 }, { "epoch": 960.1315789473684, "grad_norm": 1.1901671886444092, "learning_rate": 0.0001, "loss": 0.0103, "step": 145940 }, { "epoch": 960.1973684210526, "grad_norm": 1.657729148864746, "learning_rate": 0.0001, "loss": 0.0094, "step": 145950 }, { "epoch": 960.2631578947369, "grad_norm": 1.0714272260665894, "learning_rate": 0.0001, "loss": 0.01, "step": 145960 }, { "epoch": 960.328947368421, "grad_norm": 1.1627241373062134, "learning_rate": 0.0001, "loss": 0.0102, "step": 145970 }, { "epoch": 960.3947368421053, "grad_norm": 0.7160118222236633, "learning_rate": 0.0001, "loss": 0.0099, "step": 145980 }, { "epoch": 960.4605263157895, "grad_norm": 1.2324270009994507, "learning_rate": 0.0001, "loss": 0.0085, "step": 145990 }, { "epoch": 960.5263157894736, "grad_norm": 0.9793133735656738, "learning_rate": 0.0001, "loss": 0.0106, "step": 146000 }, { "epoch": 960.5921052631579, "grad_norm": 1.2766015529632568, "learning_rate": 0.0001, "loss": 0.009, "step": 146010 }, { "epoch": 960.6578947368421, "grad_norm": 0.7441828846931458, "learning_rate": 0.0001, "loss": 0.0088, "step": 146020 }, { "epoch": 960.7236842105264, "grad_norm": 1.006022572517395, "learning_rate": 0.0001, "loss": 0.0096, "step": 146030 }, { "epoch": 960.7894736842105, "grad_norm": 0.781061053276062, "learning_rate": 0.0001, "loss": 0.009, "step": 146040 }, { "epoch": 960.8552631578947, "grad_norm": 1.0156453847885132, "learning_rate": 0.0001, "loss": 0.0081, "step": 146050 }, { "epoch": 960.921052631579, "grad_norm": 1.5238510370254517, "learning_rate": 0.0001, "loss": 0.0102, "step": 146060 }, { "epoch": 960.9868421052631, "grad_norm": 1.1294466257095337, "learning_rate": 0.0001, "loss": 0.0087, "step": 146070 }, { "epoch": 961.0526315789474, "grad_norm": 1.103088140487671, "learning_rate": 0.0001, "loss": 0.0095, "step": 146080 }, { "epoch": 961.1184210526316, "grad_norm": 1.1292765140533447, "learning_rate": 0.0001, "loss": 0.0102, "step": 146090 }, { "epoch": 961.1842105263158, "grad_norm": 1.060682773590088, "learning_rate": 0.0001, "loss": 0.0116, "step": 146100 }, { "epoch": 961.25, "grad_norm": 0.584579348564148, "learning_rate": 0.0001, "loss": 0.0108, "step": 146110 }, { "epoch": 961.3157894736842, "grad_norm": 0.8006397485733032, "learning_rate": 0.0001, "loss": 0.0094, "step": 146120 }, { "epoch": 961.3815789473684, "grad_norm": 0.8636497855186462, "learning_rate": 0.0001, "loss": 0.0088, "step": 146130 }, { "epoch": 961.4473684210526, "grad_norm": 0.9201158881187439, "learning_rate": 0.0001, "loss": 0.0098, "step": 146140 }, { "epoch": 961.5131578947369, "grad_norm": 1.3234913349151611, "learning_rate": 0.0001, "loss": 0.0106, "step": 146150 }, { "epoch": 961.578947368421, "grad_norm": 0.9483774900436401, "learning_rate": 0.0001, "loss": 0.0084, "step": 146160 }, { "epoch": 961.6447368421053, "grad_norm": 0.9402949213981628, "learning_rate": 0.0001, "loss": 0.0102, "step": 146170 }, { "epoch": 961.7105263157895, "grad_norm": 0.809597909450531, "learning_rate": 0.0001, "loss": 0.0087, "step": 146180 }, { "epoch": 961.7763157894736, "grad_norm": 1.229201078414917, "learning_rate": 0.0001, "loss": 0.0109, "step": 146190 }, { "epoch": 961.8421052631579, "grad_norm": 1.1061270236968994, "learning_rate": 0.0001, "loss": 0.0082, "step": 146200 }, { "epoch": 961.9078947368421, "grad_norm": 0.8046864867210388, "learning_rate": 0.0001, "loss": 0.0089, "step": 146210 }, { "epoch": 961.9736842105264, "grad_norm": 0.7022920846939087, "learning_rate": 0.0001, "loss": 0.0078, "step": 146220 }, { "epoch": 962.0394736842105, "grad_norm": 1.0460927486419678, "learning_rate": 0.0001, "loss": 0.0112, "step": 146230 }, { "epoch": 962.1052631578947, "grad_norm": 1.061414361000061, "learning_rate": 0.0001, "loss": 0.0118, "step": 146240 }, { "epoch": 962.171052631579, "grad_norm": 0.8802205324172974, "learning_rate": 0.0001, "loss": 0.0105, "step": 146250 }, { "epoch": 962.2368421052631, "grad_norm": 1.0647673606872559, "learning_rate": 0.0001, "loss": 0.009, "step": 146260 }, { "epoch": 962.3026315789474, "grad_norm": 1.1186468601226807, "learning_rate": 0.0001, "loss": 0.0083, "step": 146270 }, { "epoch": 962.3684210526316, "grad_norm": 1.0472915172576904, "learning_rate": 0.0001, "loss": 0.0095, "step": 146280 }, { "epoch": 962.4342105263158, "grad_norm": 0.8644057512283325, "learning_rate": 0.0001, "loss": 0.0098, "step": 146290 }, { "epoch": 962.5, "grad_norm": 0.8172399997711182, "learning_rate": 0.0001, "loss": 0.0093, "step": 146300 }, { "epoch": 962.5657894736842, "grad_norm": 0.9596365094184875, "learning_rate": 0.0001, "loss": 0.009, "step": 146310 }, { "epoch": 962.6315789473684, "grad_norm": 0.8653599619865417, "learning_rate": 0.0001, "loss": 0.01, "step": 146320 }, { "epoch": 962.6973684210526, "grad_norm": 0.9772462844848633, "learning_rate": 0.0001, "loss": 0.0091, "step": 146330 }, { "epoch": 962.7631578947369, "grad_norm": 1.0098869800567627, "learning_rate": 0.0001, "loss": 0.0115, "step": 146340 }, { "epoch": 962.828947368421, "grad_norm": 1.2209604978561401, "learning_rate": 0.0001, "loss": 0.0098, "step": 146350 }, { "epoch": 962.8947368421053, "grad_norm": 0.9694502949714661, "learning_rate": 0.0001, "loss": 0.0073, "step": 146360 }, { "epoch": 962.9605263157895, "grad_norm": 0.981524646282196, "learning_rate": 0.0001, "loss": 0.0083, "step": 146370 }, { "epoch": 963.0263157894736, "grad_norm": 1.0773963928222656, "learning_rate": 0.0001, "loss": 0.0085, "step": 146380 }, { "epoch": 963.0921052631579, "grad_norm": 1.1981137990951538, "learning_rate": 0.0001, "loss": 0.0084, "step": 146390 }, { "epoch": 963.1578947368421, "grad_norm": 1.1366533041000366, "learning_rate": 0.0001, "loss": 0.0109, "step": 146400 }, { "epoch": 963.2236842105264, "grad_norm": 1.1924341917037964, "learning_rate": 0.0001, "loss": 0.0085, "step": 146410 }, { "epoch": 963.2894736842105, "grad_norm": 0.9857580065727234, "learning_rate": 0.0001, "loss": 0.0085, "step": 146420 }, { "epoch": 963.3552631578947, "grad_norm": 1.2033591270446777, "learning_rate": 0.0001, "loss": 0.0104, "step": 146430 }, { "epoch": 963.421052631579, "grad_norm": 1.2146192789077759, "learning_rate": 0.0001, "loss": 0.0088, "step": 146440 }, { "epoch": 963.4868421052631, "grad_norm": 1.1512936353683472, "learning_rate": 0.0001, "loss": 0.0087, "step": 146450 }, { "epoch": 963.5526315789474, "grad_norm": 0.6414240598678589, "learning_rate": 0.0001, "loss": 0.0101, "step": 146460 }, { "epoch": 963.6184210526316, "grad_norm": 0.7209314107894897, "learning_rate": 0.0001, "loss": 0.0101, "step": 146470 }, { "epoch": 963.6842105263158, "grad_norm": 0.8891003727912903, "learning_rate": 0.0001, "loss": 0.0087, "step": 146480 }, { "epoch": 963.75, "grad_norm": 0.7356894016265869, "learning_rate": 0.0001, "loss": 0.01, "step": 146490 }, { "epoch": 963.8157894736842, "grad_norm": 0.9845222234725952, "learning_rate": 0.0001, "loss": 0.0083, "step": 146500 }, { "epoch": 963.8815789473684, "grad_norm": 1.4602724313735962, "learning_rate": 0.0001, "loss": 0.0112, "step": 146510 }, { "epoch": 963.9473684210526, "grad_norm": 0.9599493741989136, "learning_rate": 0.0001, "loss": 0.0086, "step": 146520 }, { "epoch": 964.0131578947369, "grad_norm": 1.1864409446716309, "learning_rate": 0.0001, "loss": 0.0104, "step": 146530 }, { "epoch": 964.078947368421, "grad_norm": 1.6686468124389648, "learning_rate": 0.0001, "loss": 0.0092, "step": 146540 }, { "epoch": 964.1447368421053, "grad_norm": 1.6375855207443237, "learning_rate": 0.0001, "loss": 0.0089, "step": 146550 }, { "epoch": 964.2105263157895, "grad_norm": 1.250057339668274, "learning_rate": 0.0001, "loss": 0.0079, "step": 146560 }, { "epoch": 964.2763157894736, "grad_norm": 1.3468494415283203, "learning_rate": 0.0001, "loss": 0.0084, "step": 146570 }, { "epoch": 964.3421052631579, "grad_norm": 1.369868516921997, "learning_rate": 0.0001, "loss": 0.0095, "step": 146580 }, { "epoch": 964.4078947368421, "grad_norm": 0.8721538186073303, "learning_rate": 0.0001, "loss": 0.01, "step": 146590 }, { "epoch": 964.4736842105264, "grad_norm": 1.0958600044250488, "learning_rate": 0.0001, "loss": 0.0084, "step": 146600 }, { "epoch": 964.5394736842105, "grad_norm": 1.1461358070373535, "learning_rate": 0.0001, "loss": 0.0098, "step": 146610 }, { "epoch": 964.6052631578947, "grad_norm": 1.3515912294387817, "learning_rate": 0.0001, "loss": 0.0093, "step": 146620 }, { "epoch": 964.671052631579, "grad_norm": 1.3492871522903442, "learning_rate": 0.0001, "loss": 0.0092, "step": 146630 }, { "epoch": 964.7368421052631, "grad_norm": 0.7146100997924805, "learning_rate": 0.0001, "loss": 0.0079, "step": 146640 }, { "epoch": 964.8026315789474, "grad_norm": 1.150172233581543, "learning_rate": 0.0001, "loss": 0.0093, "step": 146650 }, { "epoch": 964.8684210526316, "grad_norm": 1.4243590831756592, "learning_rate": 0.0001, "loss": 0.011, "step": 146660 }, { "epoch": 964.9342105263158, "grad_norm": 1.0917390584945679, "learning_rate": 0.0001, "loss": 0.0095, "step": 146670 }, { "epoch": 965.0, "grad_norm": 0.888741135597229, "learning_rate": 0.0001, "loss": 0.011, "step": 146680 }, { "epoch": 965.0657894736842, "grad_norm": 0.8921936750411987, "learning_rate": 0.0001, "loss": 0.0091, "step": 146690 }, { "epoch": 965.1315789473684, "grad_norm": 1.1336225271224976, "learning_rate": 0.0001, "loss": 0.0086, "step": 146700 }, { "epoch": 965.1973684210526, "grad_norm": 1.2168223857879639, "learning_rate": 0.0001, "loss": 0.0094, "step": 146710 }, { "epoch": 965.2631578947369, "grad_norm": 0.9628434181213379, "learning_rate": 0.0001, "loss": 0.0104, "step": 146720 }, { "epoch": 965.328947368421, "grad_norm": 0.9987488389015198, "learning_rate": 0.0001, "loss": 0.0092, "step": 146730 }, { "epoch": 965.3947368421053, "grad_norm": 1.300894856452942, "learning_rate": 0.0001, "loss": 0.0113, "step": 146740 }, { "epoch": 965.4605263157895, "grad_norm": 3.1058502197265625, "learning_rate": 0.0001, "loss": 0.0086, "step": 146750 }, { "epoch": 965.5263157894736, "grad_norm": 1.6994332075119019, "learning_rate": 0.0001, "loss": 0.0104, "step": 146760 }, { "epoch": 965.5921052631579, "grad_norm": 0.7340602874755859, "learning_rate": 0.0001, "loss": 0.0087, "step": 146770 }, { "epoch": 965.6578947368421, "grad_norm": 0.9732682108879089, "learning_rate": 0.0001, "loss": 0.0084, "step": 146780 }, { "epoch": 965.7236842105264, "grad_norm": 1.0283761024475098, "learning_rate": 0.0001, "loss": 0.0079, "step": 146790 }, { "epoch": 965.7894736842105, "grad_norm": 0.8493642807006836, "learning_rate": 0.0001, "loss": 0.0097, "step": 146800 }, { "epoch": 965.8552631578947, "grad_norm": 0.8938899636268616, "learning_rate": 0.0001, "loss": 0.0102, "step": 146810 }, { "epoch": 965.921052631579, "grad_norm": 0.8142083287239075, "learning_rate": 0.0001, "loss": 0.0098, "step": 146820 }, { "epoch": 965.9868421052631, "grad_norm": 0.9816213250160217, "learning_rate": 0.0001, "loss": 0.0088, "step": 146830 }, { "epoch": 966.0526315789474, "grad_norm": 0.7372590899467468, "learning_rate": 0.0001, "loss": 0.0084, "step": 146840 }, { "epoch": 966.1184210526316, "grad_norm": 1.1608000993728638, "learning_rate": 0.0001, "loss": 0.0111, "step": 146850 }, { "epoch": 966.1842105263158, "grad_norm": 1.0333927869796753, "learning_rate": 0.0001, "loss": 0.0095, "step": 146860 }, { "epoch": 966.25, "grad_norm": 1.392578125, "learning_rate": 0.0001, "loss": 0.0081, "step": 146870 }, { "epoch": 966.3157894736842, "grad_norm": 1.003764271736145, "learning_rate": 0.0001, "loss": 0.0091, "step": 146880 }, { "epoch": 966.3815789473684, "grad_norm": 1.026443362236023, "learning_rate": 0.0001, "loss": 0.0119, "step": 146890 }, { "epoch": 966.4473684210526, "grad_norm": 0.8039220571517944, "learning_rate": 0.0001, "loss": 0.009, "step": 146900 }, { "epoch": 966.5131578947369, "grad_norm": 1.3538212776184082, "learning_rate": 0.0001, "loss": 0.009, "step": 146910 }, { "epoch": 966.578947368421, "grad_norm": 1.2053329944610596, "learning_rate": 0.0001, "loss": 0.008, "step": 146920 }, { "epoch": 966.6447368421053, "grad_norm": 1.1715484857559204, "learning_rate": 0.0001, "loss": 0.0079, "step": 146930 }, { "epoch": 966.7105263157895, "grad_norm": 1.387887716293335, "learning_rate": 0.0001, "loss": 0.0082, "step": 146940 }, { "epoch": 966.7763157894736, "grad_norm": 0.7918421626091003, "learning_rate": 0.0001, "loss": 0.0098, "step": 146950 }, { "epoch": 966.8421052631579, "grad_norm": 1.122507929801941, "learning_rate": 0.0001, "loss": 0.0117, "step": 146960 }, { "epoch": 966.9078947368421, "grad_norm": 1.20033597946167, "learning_rate": 0.0001, "loss": 0.01, "step": 146970 }, { "epoch": 966.9736842105264, "grad_norm": 1.156383991241455, "learning_rate": 0.0001, "loss": 0.0104, "step": 146980 }, { "epoch": 967.0394736842105, "grad_norm": 0.760665774345398, "learning_rate": 0.0001, "loss": 0.011, "step": 146990 }, { "epoch": 967.1052631578947, "grad_norm": 0.9186726808547974, "learning_rate": 0.0001, "loss": 0.0101, "step": 147000 }, { "epoch": 967.171052631579, "grad_norm": 1.0184344053268433, "learning_rate": 0.0001, "loss": 0.011, "step": 147010 }, { "epoch": 967.2368421052631, "grad_norm": 1.3089451789855957, "learning_rate": 0.0001, "loss": 0.011, "step": 147020 }, { "epoch": 967.3026315789474, "grad_norm": 0.8527283072471619, "learning_rate": 0.0001, "loss": 0.0101, "step": 147030 }, { "epoch": 967.3684210526316, "grad_norm": 1.0763221979141235, "learning_rate": 0.0001, "loss": 0.0085, "step": 147040 }, { "epoch": 967.4342105263158, "grad_norm": 1.0858137607574463, "learning_rate": 0.0001, "loss": 0.0095, "step": 147050 }, { "epoch": 967.5, "grad_norm": 0.9243344068527222, "learning_rate": 0.0001, "loss": 0.0093, "step": 147060 }, { "epoch": 967.5657894736842, "grad_norm": 0.9228748083114624, "learning_rate": 0.0001, "loss": 0.007, "step": 147070 }, { "epoch": 967.6315789473684, "grad_norm": 1.0944608449935913, "learning_rate": 0.0001, "loss": 0.0086, "step": 147080 }, { "epoch": 967.6973684210526, "grad_norm": 0.9257031679153442, "learning_rate": 0.0001, "loss": 0.0109, "step": 147090 }, { "epoch": 967.7631578947369, "grad_norm": 1.181267261505127, "learning_rate": 0.0001, "loss": 0.0113, "step": 147100 }, { "epoch": 967.828947368421, "grad_norm": 1.0577452182769775, "learning_rate": 0.0001, "loss": 0.0096, "step": 147110 }, { "epoch": 967.8947368421053, "grad_norm": 0.8401322960853577, "learning_rate": 0.0001, "loss": 0.0093, "step": 147120 }, { "epoch": 967.9605263157895, "grad_norm": 0.9169796705245972, "learning_rate": 0.0001, "loss": 0.0085, "step": 147130 }, { "epoch": 968.0263157894736, "grad_norm": 0.8156391978263855, "learning_rate": 0.0001, "loss": 0.011, "step": 147140 }, { "epoch": 968.0921052631579, "grad_norm": 1.2525750398635864, "learning_rate": 0.0001, "loss": 0.0125, "step": 147150 }, { "epoch": 968.1578947368421, "grad_norm": 1.0658366680145264, "learning_rate": 0.0001, "loss": 0.0099, "step": 147160 }, { "epoch": 968.2236842105264, "grad_norm": 0.9549158215522766, "learning_rate": 0.0001, "loss": 0.0102, "step": 147170 }, { "epoch": 968.2894736842105, "grad_norm": 1.1841901540756226, "learning_rate": 0.0001, "loss": 0.0081, "step": 147180 }, { "epoch": 968.3552631578947, "grad_norm": 0.855679452419281, "learning_rate": 0.0001, "loss": 0.0086, "step": 147190 }, { "epoch": 968.421052631579, "grad_norm": 1.03905189037323, "learning_rate": 0.0001, "loss": 0.0095, "step": 147200 }, { "epoch": 968.4868421052631, "grad_norm": 0.7143714427947998, "learning_rate": 0.0001, "loss": 0.0105, "step": 147210 }, { "epoch": 968.5526315789474, "grad_norm": 0.8746243715286255, "learning_rate": 0.0001, "loss": 0.0093, "step": 147220 }, { "epoch": 968.6184210526316, "grad_norm": 0.8814296126365662, "learning_rate": 0.0001, "loss": 0.0079, "step": 147230 }, { "epoch": 968.6842105263158, "grad_norm": 0.7824357151985168, "learning_rate": 0.0001, "loss": 0.0087, "step": 147240 }, { "epoch": 968.75, "grad_norm": 0.8747170567512512, "learning_rate": 0.0001, "loss": 0.0087, "step": 147250 }, { "epoch": 968.8157894736842, "grad_norm": 1.2082691192626953, "learning_rate": 0.0001, "loss": 0.0089, "step": 147260 }, { "epoch": 968.8815789473684, "grad_norm": 1.2725180387496948, "learning_rate": 0.0001, "loss": 0.0086, "step": 147270 }, { "epoch": 968.9473684210526, "grad_norm": 1.4602564573287964, "learning_rate": 0.0001, "loss": 0.0093, "step": 147280 }, { "epoch": 969.0131578947369, "grad_norm": 0.9811068773269653, "learning_rate": 0.0001, "loss": 0.0118, "step": 147290 }, { "epoch": 969.078947368421, "grad_norm": 1.359781265258789, "learning_rate": 0.0001, "loss": 0.0085, "step": 147300 }, { "epoch": 969.1447368421053, "grad_norm": 1.2391654253005981, "learning_rate": 0.0001, "loss": 0.0099, "step": 147310 }, { "epoch": 969.2105263157895, "grad_norm": 1.193811297416687, "learning_rate": 0.0001, "loss": 0.0096, "step": 147320 }, { "epoch": 969.2763157894736, "grad_norm": 0.9369627237319946, "learning_rate": 0.0001, "loss": 0.0099, "step": 147330 }, { "epoch": 969.3421052631579, "grad_norm": 1.2277706861495972, "learning_rate": 0.0001, "loss": 0.0094, "step": 147340 }, { "epoch": 969.4078947368421, "grad_norm": 0.6797551512718201, "learning_rate": 0.0001, "loss": 0.0094, "step": 147350 }, { "epoch": 969.4736842105264, "grad_norm": 0.8890407085418701, "learning_rate": 0.0001, "loss": 0.0111, "step": 147360 }, { "epoch": 969.5394736842105, "grad_norm": 0.6497656106948853, "learning_rate": 0.0001, "loss": 0.0096, "step": 147370 }, { "epoch": 969.6052631578947, "grad_norm": 1.0025545358657837, "learning_rate": 0.0001, "loss": 0.0084, "step": 147380 }, { "epoch": 969.671052631579, "grad_norm": 1.0839303731918335, "learning_rate": 0.0001, "loss": 0.009, "step": 147390 }, { "epoch": 969.7368421052631, "grad_norm": 0.9364657402038574, "learning_rate": 0.0001, "loss": 0.0094, "step": 147400 }, { "epoch": 969.8026315789474, "grad_norm": 0.725238561630249, "learning_rate": 0.0001, "loss": 0.0093, "step": 147410 }, { "epoch": 969.8684210526316, "grad_norm": 0.7093734741210938, "learning_rate": 0.0001, "loss": 0.01, "step": 147420 }, { "epoch": 969.9342105263158, "grad_norm": 1.0567338466644287, "learning_rate": 0.0001, "loss": 0.0091, "step": 147430 }, { "epoch": 970.0, "grad_norm": 1.1926231384277344, "learning_rate": 0.0001, "loss": 0.0118, "step": 147440 }, { "epoch": 970.0657894736842, "grad_norm": 0.7869142889976501, "learning_rate": 0.0001, "loss": 0.0119, "step": 147450 }, { "epoch": 970.1315789473684, "grad_norm": 1.23822820186615, "learning_rate": 0.0001, "loss": 0.0091, "step": 147460 }, { "epoch": 970.1973684210526, "grad_norm": 1.007323145866394, "learning_rate": 0.0001, "loss": 0.0106, "step": 147470 }, { "epoch": 970.2631578947369, "grad_norm": 1.408695101737976, "learning_rate": 0.0001, "loss": 0.0079, "step": 147480 }, { "epoch": 970.328947368421, "grad_norm": 1.1631524562835693, "learning_rate": 0.0001, "loss": 0.0095, "step": 147490 }, { "epoch": 970.3947368421053, "grad_norm": 0.9714431166648865, "learning_rate": 0.0001, "loss": 0.0096, "step": 147500 }, { "epoch": 970.4605263157895, "grad_norm": 0.8139010071754456, "learning_rate": 0.0001, "loss": 0.009, "step": 147510 }, { "epoch": 970.5263157894736, "grad_norm": 1.2274115085601807, "learning_rate": 0.0001, "loss": 0.0085, "step": 147520 }, { "epoch": 970.5921052631579, "grad_norm": 1.0801491737365723, "learning_rate": 0.0001, "loss": 0.0085, "step": 147530 }, { "epoch": 970.6578947368421, "grad_norm": 0.7565146088600159, "learning_rate": 0.0001, "loss": 0.0083, "step": 147540 }, { "epoch": 970.7236842105264, "grad_norm": 0.9171380400657654, "learning_rate": 0.0001, "loss": 0.0111, "step": 147550 }, { "epoch": 970.7894736842105, "grad_norm": 0.9315175414085388, "learning_rate": 0.0001, "loss": 0.0102, "step": 147560 }, { "epoch": 970.8552631578947, "grad_norm": 0.9675357341766357, "learning_rate": 0.0001, "loss": 0.0087, "step": 147570 }, { "epoch": 970.921052631579, "grad_norm": 0.8446849584579468, "learning_rate": 0.0001, "loss": 0.0111, "step": 147580 }, { "epoch": 970.9868421052631, "grad_norm": 1.2151329517364502, "learning_rate": 0.0001, "loss": 0.0101, "step": 147590 }, { "epoch": 971.0526315789474, "grad_norm": 0.8625166416168213, "learning_rate": 0.0001, "loss": 0.0094, "step": 147600 }, { "epoch": 971.1184210526316, "grad_norm": 0.904151439666748, "learning_rate": 0.0001, "loss": 0.0091, "step": 147610 }, { "epoch": 971.1842105263158, "grad_norm": 1.3609187602996826, "learning_rate": 0.0001, "loss": 0.0103, "step": 147620 }, { "epoch": 971.25, "grad_norm": 2.187743663787842, "learning_rate": 0.0001, "loss": 0.0086, "step": 147630 }, { "epoch": 971.3157894736842, "grad_norm": 2.0440964698791504, "learning_rate": 0.0001, "loss": 0.0119, "step": 147640 }, { "epoch": 971.3815789473684, "grad_norm": 1.9944370985031128, "learning_rate": 0.0001, "loss": 0.0099, "step": 147650 }, { "epoch": 971.4473684210526, "grad_norm": 1.8147251605987549, "learning_rate": 0.0001, "loss": 0.0091, "step": 147660 }, { "epoch": 971.5131578947369, "grad_norm": 1.0225766897201538, "learning_rate": 0.0001, "loss": 0.0087, "step": 147670 }, { "epoch": 971.578947368421, "grad_norm": 1.2411375045776367, "learning_rate": 0.0001, "loss": 0.0088, "step": 147680 }, { "epoch": 971.6447368421053, "grad_norm": 1.3855613470077515, "learning_rate": 0.0001, "loss": 0.0088, "step": 147690 }, { "epoch": 971.7105263157895, "grad_norm": 1.0319336652755737, "learning_rate": 0.0001, "loss": 0.0071, "step": 147700 }, { "epoch": 971.7763157894736, "grad_norm": 1.0877528190612793, "learning_rate": 0.0001, "loss": 0.0118, "step": 147710 }, { "epoch": 971.8421052631579, "grad_norm": 1.5878164768218994, "learning_rate": 0.0001, "loss": 0.0098, "step": 147720 }, { "epoch": 971.9078947368421, "grad_norm": 1.735569715499878, "learning_rate": 0.0001, "loss": 0.0099, "step": 147730 }, { "epoch": 971.9736842105264, "grad_norm": 1.2337061166763306, "learning_rate": 0.0001, "loss": 0.0095, "step": 147740 }, { "epoch": 972.0394736842105, "grad_norm": 0.926618754863739, "learning_rate": 0.0001, "loss": 0.0104, "step": 147750 }, { "epoch": 972.1052631578947, "grad_norm": 0.951611340045929, "learning_rate": 0.0001, "loss": 0.01, "step": 147760 }, { "epoch": 972.171052631579, "grad_norm": 1.1412699222564697, "learning_rate": 0.0001, "loss": 0.0088, "step": 147770 }, { "epoch": 972.2368421052631, "grad_norm": 1.0385075807571411, "learning_rate": 0.0001, "loss": 0.009, "step": 147780 }, { "epoch": 972.3026315789474, "grad_norm": 0.8071399331092834, "learning_rate": 0.0001, "loss": 0.0088, "step": 147790 }, { "epoch": 972.3684210526316, "grad_norm": 1.131365418434143, "learning_rate": 0.0001, "loss": 0.011, "step": 147800 }, { "epoch": 972.4342105263158, "grad_norm": 1.10077702999115, "learning_rate": 0.0001, "loss": 0.0081, "step": 147810 }, { "epoch": 972.5, "grad_norm": 1.1625159978866577, "learning_rate": 0.0001, "loss": 0.0108, "step": 147820 }, { "epoch": 972.5657894736842, "grad_norm": 0.8516972064971924, "learning_rate": 0.0001, "loss": 0.0094, "step": 147830 }, { "epoch": 972.6315789473684, "grad_norm": 0.9543007612228394, "learning_rate": 0.0001, "loss": 0.0093, "step": 147840 }, { "epoch": 972.6973684210526, "grad_norm": 0.7978515028953552, "learning_rate": 0.0001, "loss": 0.009, "step": 147850 }, { "epoch": 972.7631578947369, "grad_norm": 1.0908616781234741, "learning_rate": 0.0001, "loss": 0.0124, "step": 147860 }, { "epoch": 972.828947368421, "grad_norm": 1.0295151472091675, "learning_rate": 0.0001, "loss": 0.0111, "step": 147870 }, { "epoch": 972.8947368421053, "grad_norm": 0.67076176404953, "learning_rate": 0.0001, "loss": 0.0119, "step": 147880 }, { "epoch": 972.9605263157895, "grad_norm": 1.1055384874343872, "learning_rate": 0.0001, "loss": 0.0125, "step": 147890 }, { "epoch": 973.0263157894736, "grad_norm": 0.9399993419647217, "learning_rate": 0.0001, "loss": 0.0101, "step": 147900 }, { "epoch": 973.0921052631579, "grad_norm": 0.9942353367805481, "learning_rate": 0.0001, "loss": 0.01, "step": 147910 }, { "epoch": 973.1578947368421, "grad_norm": 0.7977069020271301, "learning_rate": 0.0001, "loss": 0.0106, "step": 147920 }, { "epoch": 973.2236842105264, "grad_norm": 0.940716028213501, "learning_rate": 0.0001, "loss": 0.0094, "step": 147930 }, { "epoch": 973.2894736842105, "grad_norm": 0.5818641185760498, "learning_rate": 0.0001, "loss": 0.009, "step": 147940 }, { "epoch": 973.3552631578947, "grad_norm": 0.9071598052978516, "learning_rate": 0.0001, "loss": 0.0112, "step": 147950 }, { "epoch": 973.421052631579, "grad_norm": 0.9769167900085449, "learning_rate": 0.0001, "loss": 0.0097, "step": 147960 }, { "epoch": 973.4868421052631, "grad_norm": 0.7918068766593933, "learning_rate": 0.0001, "loss": 0.0114, "step": 147970 }, { "epoch": 973.5526315789474, "grad_norm": 1.44162917137146, "learning_rate": 0.0001, "loss": 0.0129, "step": 147980 }, { "epoch": 973.6184210526316, "grad_norm": 1.3190975189208984, "learning_rate": 0.0001, "loss": 0.0112, "step": 147990 }, { "epoch": 973.6842105263158, "grad_norm": 0.8876311182975769, "learning_rate": 0.0001, "loss": 0.0101, "step": 148000 }, { "epoch": 973.75, "grad_norm": 0.6711481213569641, "learning_rate": 0.0001, "loss": 0.0107, "step": 148010 }, { "epoch": 973.8157894736842, "grad_norm": 0.9575647115707397, "learning_rate": 0.0001, "loss": 0.0108, "step": 148020 }, { "epoch": 973.8815789473684, "grad_norm": 1.1577619314193726, "learning_rate": 0.0001, "loss": 0.0109, "step": 148030 }, { "epoch": 973.9473684210526, "grad_norm": 1.1602065563201904, "learning_rate": 0.0001, "loss": 0.011, "step": 148040 }, { "epoch": 974.0131578947369, "grad_norm": 1.1503347158432007, "learning_rate": 0.0001, "loss": 0.0104, "step": 148050 }, { "epoch": 974.078947368421, "grad_norm": 1.1186825037002563, "learning_rate": 0.0001, "loss": 0.0092, "step": 148060 }, { "epoch": 974.1447368421053, "grad_norm": 1.2692493200302124, "learning_rate": 0.0001, "loss": 0.0103, "step": 148070 }, { "epoch": 974.2105263157895, "grad_norm": 1.3095154762268066, "learning_rate": 0.0001, "loss": 0.0099, "step": 148080 }, { "epoch": 974.2763157894736, "grad_norm": 1.1788098812103271, "learning_rate": 0.0001, "loss": 0.0095, "step": 148090 }, { "epoch": 974.3421052631579, "grad_norm": 0.8337370753288269, "learning_rate": 0.0001, "loss": 0.0106, "step": 148100 }, { "epoch": 974.4078947368421, "grad_norm": 0.8174479007720947, "learning_rate": 0.0001, "loss": 0.0099, "step": 148110 }, { "epoch": 974.4736842105264, "grad_norm": 1.0070465803146362, "learning_rate": 0.0001, "loss": 0.0115, "step": 148120 }, { "epoch": 974.5394736842105, "grad_norm": 1.2746952772140503, "learning_rate": 0.0001, "loss": 0.0091, "step": 148130 }, { "epoch": 974.6052631578947, "grad_norm": 1.2314714193344116, "learning_rate": 0.0001, "loss": 0.0098, "step": 148140 }, { "epoch": 974.671052631579, "grad_norm": 0.9068085551261902, "learning_rate": 0.0001, "loss": 0.0113, "step": 148150 }, { "epoch": 974.7368421052631, "grad_norm": 1.3437057733535767, "learning_rate": 0.0001, "loss": 0.0115, "step": 148160 }, { "epoch": 974.8026315789474, "grad_norm": 1.2839545011520386, "learning_rate": 0.0001, "loss": 0.0122, "step": 148170 }, { "epoch": 974.8684210526316, "grad_norm": 1.1615701913833618, "learning_rate": 0.0001, "loss": 0.011, "step": 148180 }, { "epoch": 974.9342105263158, "grad_norm": 1.104874849319458, "learning_rate": 0.0001, "loss": 0.0112, "step": 148190 }, { "epoch": 975.0, "grad_norm": 1.0236458778381348, "learning_rate": 0.0001, "loss": 0.0125, "step": 148200 }, { "epoch": 975.0657894736842, "grad_norm": 0.8276492953300476, "learning_rate": 0.0001, "loss": 0.0117, "step": 148210 }, { "epoch": 975.1315789473684, "grad_norm": 0.7698920369148254, "learning_rate": 0.0001, "loss": 0.0099, "step": 148220 }, { "epoch": 975.1973684210526, "grad_norm": 1.0697041749954224, "learning_rate": 0.0001, "loss": 0.0123, "step": 148230 }, { "epoch": 975.2631578947369, "grad_norm": 0.6203532218933105, "learning_rate": 0.0001, "loss": 0.0114, "step": 148240 }, { "epoch": 975.328947368421, "grad_norm": 1.2870906591415405, "learning_rate": 0.0001, "loss": 0.0099, "step": 148250 }, { "epoch": 975.3947368421053, "grad_norm": 1.1638849973678589, "learning_rate": 0.0001, "loss": 0.0086, "step": 148260 }, { "epoch": 975.4605263157895, "grad_norm": 0.8770232200622559, "learning_rate": 0.0001, "loss": 0.0105, "step": 148270 }, { "epoch": 975.5263157894736, "grad_norm": 0.8042388558387756, "learning_rate": 0.0001, "loss": 0.009, "step": 148280 }, { "epoch": 975.5921052631579, "grad_norm": 1.1777013540267944, "learning_rate": 0.0001, "loss": 0.0083, "step": 148290 }, { "epoch": 975.6578947368421, "grad_norm": 1.2170054912567139, "learning_rate": 0.0001, "loss": 0.0082, "step": 148300 }, { "epoch": 975.7236842105264, "grad_norm": 1.1343997716903687, "learning_rate": 0.0001, "loss": 0.0101, "step": 148310 }, { "epoch": 975.7894736842105, "grad_norm": 0.9868319034576416, "learning_rate": 0.0001, "loss": 0.0096, "step": 148320 }, { "epoch": 975.8552631578947, "grad_norm": 1.1468241214752197, "learning_rate": 0.0001, "loss": 0.0107, "step": 148330 }, { "epoch": 975.921052631579, "grad_norm": 0.9679098129272461, "learning_rate": 0.0001, "loss": 0.0113, "step": 148340 }, { "epoch": 975.9868421052631, "grad_norm": 1.3126945495605469, "learning_rate": 0.0001, "loss": 0.0103, "step": 148350 }, { "epoch": 976.0526315789474, "grad_norm": 1.1734507083892822, "learning_rate": 0.0001, "loss": 0.0092, "step": 148360 }, { "epoch": 976.1184210526316, "grad_norm": 0.9262369871139526, "learning_rate": 0.0001, "loss": 0.01, "step": 148370 }, { "epoch": 976.1842105263158, "grad_norm": 1.1285349130630493, "learning_rate": 0.0001, "loss": 0.0095, "step": 148380 }, { "epoch": 976.25, "grad_norm": 0.9451377987861633, "learning_rate": 0.0001, "loss": 0.0108, "step": 148390 }, { "epoch": 976.3157894736842, "grad_norm": 1.0291858911514282, "learning_rate": 0.0001, "loss": 0.0098, "step": 148400 }, { "epoch": 976.3815789473684, "grad_norm": 1.141808271408081, "learning_rate": 0.0001, "loss": 0.0086, "step": 148410 }, { "epoch": 976.4473684210526, "grad_norm": 0.9214851260185242, "learning_rate": 0.0001, "loss": 0.0096, "step": 148420 }, { "epoch": 976.5131578947369, "grad_norm": 1.1481177806854248, "learning_rate": 0.0001, "loss": 0.0076, "step": 148430 }, { "epoch": 976.578947368421, "grad_norm": 1.2398234605789185, "learning_rate": 0.0001, "loss": 0.0101, "step": 148440 }, { "epoch": 976.6447368421053, "grad_norm": 0.9766506552696228, "learning_rate": 0.0001, "loss": 0.0109, "step": 148450 }, { "epoch": 976.7105263157895, "grad_norm": 1.284214735031128, "learning_rate": 0.0001, "loss": 0.0114, "step": 148460 }, { "epoch": 976.7763157894736, "grad_norm": 1.0955685377120972, "learning_rate": 0.0001, "loss": 0.009, "step": 148470 }, { "epoch": 976.8421052631579, "grad_norm": 1.0362792015075684, "learning_rate": 0.0001, "loss": 0.009, "step": 148480 }, { "epoch": 976.9078947368421, "grad_norm": 1.067434310913086, "learning_rate": 0.0001, "loss": 0.0105, "step": 148490 }, { "epoch": 976.9736842105264, "grad_norm": 0.7627929449081421, "learning_rate": 0.0001, "loss": 0.01, "step": 148500 }, { "epoch": 977.0394736842105, "grad_norm": 0.9590473175048828, "learning_rate": 0.0001, "loss": 0.0089, "step": 148510 }, { "epoch": 977.1052631578947, "grad_norm": 0.8360632061958313, "learning_rate": 0.0001, "loss": 0.0089, "step": 148520 }, { "epoch": 977.171052631579, "grad_norm": 0.7430837154388428, "learning_rate": 0.0001, "loss": 0.0109, "step": 148530 }, { "epoch": 977.2368421052631, "grad_norm": 0.9567493796348572, "learning_rate": 0.0001, "loss": 0.0095, "step": 148540 }, { "epoch": 977.3026315789474, "grad_norm": 1.1349761486053467, "learning_rate": 0.0001, "loss": 0.0098, "step": 148550 }, { "epoch": 977.3684210526316, "grad_norm": 1.2582765817642212, "learning_rate": 0.0001, "loss": 0.009, "step": 148560 }, { "epoch": 977.4342105263158, "grad_norm": 1.0692235231399536, "learning_rate": 0.0001, "loss": 0.0091, "step": 148570 }, { "epoch": 977.5, "grad_norm": 1.2210439443588257, "learning_rate": 0.0001, "loss": 0.008, "step": 148580 }, { "epoch": 977.5657894736842, "grad_norm": 0.9458500742912292, "learning_rate": 0.0001, "loss": 0.0087, "step": 148590 }, { "epoch": 977.6315789473684, "grad_norm": 1.099303960800171, "learning_rate": 0.0001, "loss": 0.01, "step": 148600 }, { "epoch": 977.6973684210526, "grad_norm": 0.9207699298858643, "learning_rate": 0.0001, "loss": 0.0094, "step": 148610 }, { "epoch": 977.7631578947369, "grad_norm": 1.0685609579086304, "learning_rate": 0.0001, "loss": 0.0118, "step": 148620 }, { "epoch": 977.828947368421, "grad_norm": 1.1274516582489014, "learning_rate": 0.0001, "loss": 0.0104, "step": 148630 }, { "epoch": 977.8947368421053, "grad_norm": 1.022341012954712, "learning_rate": 0.0001, "loss": 0.011, "step": 148640 }, { "epoch": 977.9605263157895, "grad_norm": 1.0944082736968994, "learning_rate": 0.0001, "loss": 0.0121, "step": 148650 }, { "epoch": 978.0263157894736, "grad_norm": 1.1251676082611084, "learning_rate": 0.0001, "loss": 0.0074, "step": 148660 }, { "epoch": 978.0921052631579, "grad_norm": 0.7965459823608398, "learning_rate": 0.0001, "loss": 0.0088, "step": 148670 }, { "epoch": 978.1578947368421, "grad_norm": 0.8056169152259827, "learning_rate": 0.0001, "loss": 0.0092, "step": 148680 }, { "epoch": 978.2236842105264, "grad_norm": 0.9947016835212708, "learning_rate": 0.0001, "loss": 0.0105, "step": 148690 }, { "epoch": 978.2894736842105, "grad_norm": 1.521198034286499, "learning_rate": 0.0001, "loss": 0.0093, "step": 148700 }, { "epoch": 978.3552631578947, "grad_norm": 1.4726166725158691, "learning_rate": 0.0001, "loss": 0.011, "step": 148710 }, { "epoch": 978.421052631579, "grad_norm": 1.2374329566955566, "learning_rate": 0.0001, "loss": 0.0083, "step": 148720 }, { "epoch": 978.4868421052631, "grad_norm": 1.090682029724121, "learning_rate": 0.0001, "loss": 0.0106, "step": 148730 }, { "epoch": 978.5526315789474, "grad_norm": 1.163394570350647, "learning_rate": 0.0001, "loss": 0.0097, "step": 148740 }, { "epoch": 978.6184210526316, "grad_norm": 1.0703188180923462, "learning_rate": 0.0001, "loss": 0.0089, "step": 148750 }, { "epoch": 978.6842105263158, "grad_norm": 1.2367560863494873, "learning_rate": 0.0001, "loss": 0.0117, "step": 148760 }, { "epoch": 978.75, "grad_norm": 1.329300880432129, "learning_rate": 0.0001, "loss": 0.0095, "step": 148770 }, { "epoch": 978.8157894736842, "grad_norm": 1.083126425743103, "learning_rate": 0.0001, "loss": 0.0092, "step": 148780 }, { "epoch": 978.8815789473684, "grad_norm": 0.7448536157608032, "learning_rate": 0.0001, "loss": 0.0091, "step": 148790 }, { "epoch": 978.9473684210526, "grad_norm": 1.0757248401641846, "learning_rate": 0.0001, "loss": 0.0096, "step": 148800 }, { "epoch": 979.0131578947369, "grad_norm": 0.8935508131980896, "learning_rate": 0.0001, "loss": 0.0082, "step": 148810 }, { "epoch": 979.078947368421, "grad_norm": 1.3141487836837769, "learning_rate": 0.0001, "loss": 0.0093, "step": 148820 }, { "epoch": 979.1447368421053, "grad_norm": 0.9629881978034973, "learning_rate": 0.0001, "loss": 0.0086, "step": 148830 }, { "epoch": 979.2105263157895, "grad_norm": 0.9605873823165894, "learning_rate": 0.0001, "loss": 0.0085, "step": 148840 }, { "epoch": 979.2763157894736, "grad_norm": 1.030029058456421, "learning_rate": 0.0001, "loss": 0.0104, "step": 148850 }, { "epoch": 979.3421052631579, "grad_norm": 1.113380789756775, "learning_rate": 0.0001, "loss": 0.0083, "step": 148860 }, { "epoch": 979.4078947368421, "grad_norm": 1.1046963930130005, "learning_rate": 0.0001, "loss": 0.0119, "step": 148870 }, { "epoch": 979.4736842105264, "grad_norm": 1.0526591539382935, "learning_rate": 0.0001, "loss": 0.0093, "step": 148880 }, { "epoch": 979.5394736842105, "grad_norm": 1.6122370958328247, "learning_rate": 0.0001, "loss": 0.0106, "step": 148890 }, { "epoch": 979.6052631578947, "grad_norm": 1.042912244796753, "learning_rate": 0.0001, "loss": 0.0101, "step": 148900 }, { "epoch": 979.671052631579, "grad_norm": 1.0555074214935303, "learning_rate": 0.0001, "loss": 0.0092, "step": 148910 }, { "epoch": 979.7368421052631, "grad_norm": 0.8293302655220032, "learning_rate": 0.0001, "loss": 0.0088, "step": 148920 }, { "epoch": 979.8026315789474, "grad_norm": 0.7137147188186646, "learning_rate": 0.0001, "loss": 0.0082, "step": 148930 }, { "epoch": 979.8684210526316, "grad_norm": 1.0557599067687988, "learning_rate": 0.0001, "loss": 0.0099, "step": 148940 }, { "epoch": 979.9342105263158, "grad_norm": 1.147873044013977, "learning_rate": 0.0001, "loss": 0.0092, "step": 148950 }, { "epoch": 980.0, "grad_norm": 0.8311463594436646, "learning_rate": 0.0001, "loss": 0.0097, "step": 148960 }, { "epoch": 980.0657894736842, "grad_norm": 1.0311176776885986, "learning_rate": 0.0001, "loss": 0.0087, "step": 148970 }, { "epoch": 980.1315789473684, "grad_norm": 0.8780924677848816, "learning_rate": 0.0001, "loss": 0.0091, "step": 148980 }, { "epoch": 980.1973684210526, "grad_norm": 1.235633373260498, "learning_rate": 0.0001, "loss": 0.0089, "step": 148990 }, { "epoch": 980.2631578947369, "grad_norm": 1.298965573310852, "learning_rate": 0.0001, "loss": 0.0088, "step": 149000 }, { "epoch": 980.328947368421, "grad_norm": 0.9419639706611633, "learning_rate": 0.0001, "loss": 0.0093, "step": 149010 }, { "epoch": 980.3947368421053, "grad_norm": 1.0413622856140137, "learning_rate": 0.0001, "loss": 0.0091, "step": 149020 }, { "epoch": 980.4605263157895, "grad_norm": 0.7547554969787598, "learning_rate": 0.0001, "loss": 0.0096, "step": 149030 }, { "epoch": 980.5263157894736, "grad_norm": 0.7510955333709717, "learning_rate": 0.0001, "loss": 0.0086, "step": 149040 }, { "epoch": 980.5921052631579, "grad_norm": 0.9087187647819519, "learning_rate": 0.0001, "loss": 0.0105, "step": 149050 }, { "epoch": 980.6578947368421, "grad_norm": 1.227208137512207, "learning_rate": 0.0001, "loss": 0.0099, "step": 149060 }, { "epoch": 980.7236842105264, "grad_norm": 0.9901775121688843, "learning_rate": 0.0001, "loss": 0.0101, "step": 149070 }, { "epoch": 980.7894736842105, "grad_norm": 1.1645238399505615, "learning_rate": 0.0001, "loss": 0.0109, "step": 149080 }, { "epoch": 980.8552631578947, "grad_norm": 0.9713094234466553, "learning_rate": 0.0001, "loss": 0.0098, "step": 149090 }, { "epoch": 980.921052631579, "grad_norm": 1.0313467979431152, "learning_rate": 0.0001, "loss": 0.0108, "step": 149100 }, { "epoch": 980.9868421052631, "grad_norm": 0.6963071227073669, "learning_rate": 0.0001, "loss": 0.0092, "step": 149110 }, { "epoch": 981.0526315789474, "grad_norm": 1.0932525396347046, "learning_rate": 0.0001, "loss": 0.0084, "step": 149120 }, { "epoch": 981.1184210526316, "grad_norm": 1.1071008443832397, "learning_rate": 0.0001, "loss": 0.0092, "step": 149130 }, { "epoch": 981.1842105263158, "grad_norm": 1.3636506795883179, "learning_rate": 0.0001, "loss": 0.0089, "step": 149140 }, { "epoch": 981.25, "grad_norm": 0.9255510568618774, "learning_rate": 0.0001, "loss": 0.0104, "step": 149150 }, { "epoch": 981.3157894736842, "grad_norm": 0.9791567921638489, "learning_rate": 0.0001, "loss": 0.009, "step": 149160 }, { "epoch": 981.3815789473684, "grad_norm": 0.8821341395378113, "learning_rate": 0.0001, "loss": 0.0103, "step": 149170 }, { "epoch": 981.4473684210526, "grad_norm": 0.9280576705932617, "learning_rate": 0.0001, "loss": 0.0092, "step": 149180 }, { "epoch": 981.5131578947369, "grad_norm": 0.7593532800674438, "learning_rate": 0.0001, "loss": 0.0105, "step": 149190 }, { "epoch": 981.578947368421, "grad_norm": 1.2766715288162231, "learning_rate": 0.0001, "loss": 0.0114, "step": 149200 }, { "epoch": 981.6447368421053, "grad_norm": 0.7514734864234924, "learning_rate": 0.0001, "loss": 0.0084, "step": 149210 }, { "epoch": 981.7105263157895, "grad_norm": 0.6935755014419556, "learning_rate": 0.0001, "loss": 0.0088, "step": 149220 }, { "epoch": 981.7763157894736, "grad_norm": 1.0693104267120361, "learning_rate": 0.0001, "loss": 0.009, "step": 149230 }, { "epoch": 981.8421052631579, "grad_norm": 1.1052137613296509, "learning_rate": 0.0001, "loss": 0.0101, "step": 149240 }, { "epoch": 981.9078947368421, "grad_norm": 1.1656912565231323, "learning_rate": 0.0001, "loss": 0.0085, "step": 149250 }, { "epoch": 981.9736842105264, "grad_norm": 1.1336760520935059, "learning_rate": 0.0001, "loss": 0.0102, "step": 149260 }, { "epoch": 982.0394736842105, "grad_norm": 1.118245244026184, "learning_rate": 0.0001, "loss": 0.0095, "step": 149270 }, { "epoch": 982.1052631578947, "grad_norm": 1.0137847661972046, "learning_rate": 0.0001, "loss": 0.0089, "step": 149280 }, { "epoch": 982.171052631579, "grad_norm": 1.1577128171920776, "learning_rate": 0.0001, "loss": 0.0095, "step": 149290 }, { "epoch": 982.2368421052631, "grad_norm": 0.8299029469490051, "learning_rate": 0.0001, "loss": 0.0091, "step": 149300 }, { "epoch": 982.3026315789474, "grad_norm": 1.0582658052444458, "learning_rate": 0.0001, "loss": 0.0105, "step": 149310 }, { "epoch": 982.3684210526316, "grad_norm": 1.0629546642303467, "learning_rate": 0.0001, "loss": 0.0106, "step": 149320 }, { "epoch": 982.4342105263158, "grad_norm": 1.2285642623901367, "learning_rate": 0.0001, "loss": 0.009, "step": 149330 }, { "epoch": 982.5, "grad_norm": 1.1044747829437256, "learning_rate": 0.0001, "loss": 0.0073, "step": 149340 }, { "epoch": 982.5657894736842, "grad_norm": 1.2919867038726807, "learning_rate": 0.0001, "loss": 0.0104, "step": 149350 }, { "epoch": 982.6315789473684, "grad_norm": 0.8287044763565063, "learning_rate": 0.0001, "loss": 0.0087, "step": 149360 }, { "epoch": 982.6973684210526, "grad_norm": 0.9002633094787598, "learning_rate": 0.0001, "loss": 0.0076, "step": 149370 }, { "epoch": 982.7631578947369, "grad_norm": 1.065321683883667, "learning_rate": 0.0001, "loss": 0.0084, "step": 149380 }, { "epoch": 982.828947368421, "grad_norm": 1.0330926179885864, "learning_rate": 0.0001, "loss": 0.0108, "step": 149390 }, { "epoch": 982.8947368421053, "grad_norm": 0.989842414855957, "learning_rate": 0.0001, "loss": 0.0091, "step": 149400 }, { "epoch": 982.9605263157895, "grad_norm": 0.905006468296051, "learning_rate": 0.0001, "loss": 0.0108, "step": 149410 }, { "epoch": 983.0263157894736, "grad_norm": 1.150158405303955, "learning_rate": 0.0001, "loss": 0.0088, "step": 149420 }, { "epoch": 983.0921052631579, "grad_norm": 0.8461920619010925, "learning_rate": 0.0001, "loss": 0.009, "step": 149430 }, { "epoch": 983.1578947368421, "grad_norm": 0.8330326676368713, "learning_rate": 0.0001, "loss": 0.0105, "step": 149440 }, { "epoch": 983.2236842105264, "grad_norm": 1.2453408241271973, "learning_rate": 0.0001, "loss": 0.0092, "step": 149450 }, { "epoch": 983.2894736842105, "grad_norm": 1.0621312856674194, "learning_rate": 0.0001, "loss": 0.0093, "step": 149460 }, { "epoch": 983.3552631578947, "grad_norm": 1.0289655923843384, "learning_rate": 0.0001, "loss": 0.009, "step": 149470 }, { "epoch": 983.421052631579, "grad_norm": 0.8648130297660828, "learning_rate": 0.0001, "loss": 0.0093, "step": 149480 }, { "epoch": 983.4868421052631, "grad_norm": 1.0741007328033447, "learning_rate": 0.0001, "loss": 0.0108, "step": 149490 }, { "epoch": 983.5526315789474, "grad_norm": 0.8858330845832825, "learning_rate": 0.0001, "loss": 0.009, "step": 149500 }, { "epoch": 983.6184210526316, "grad_norm": 1.25303053855896, "learning_rate": 0.0001, "loss": 0.0085, "step": 149510 }, { "epoch": 983.6842105263158, "grad_norm": 1.2206391096115112, "learning_rate": 0.0001, "loss": 0.0087, "step": 149520 }, { "epoch": 983.75, "grad_norm": 0.9137042164802551, "learning_rate": 0.0001, "loss": 0.0101, "step": 149530 }, { "epoch": 983.8157894736842, "grad_norm": 1.025648593902588, "learning_rate": 0.0001, "loss": 0.0098, "step": 149540 }, { "epoch": 983.8815789473684, "grad_norm": 1.0423038005828857, "learning_rate": 0.0001, "loss": 0.0112, "step": 149550 }, { "epoch": 983.9473684210526, "grad_norm": 1.0618716478347778, "learning_rate": 0.0001, "loss": 0.0112, "step": 149560 }, { "epoch": 984.0131578947369, "grad_norm": 1.030285120010376, "learning_rate": 0.0001, "loss": 0.0099, "step": 149570 }, { "epoch": 984.078947368421, "grad_norm": 0.8739547729492188, "learning_rate": 0.0001, "loss": 0.0088, "step": 149580 }, { "epoch": 984.1447368421053, "grad_norm": 1.1206870079040527, "learning_rate": 0.0001, "loss": 0.0108, "step": 149590 }, { "epoch": 984.2105263157895, "grad_norm": 1.278941035270691, "learning_rate": 0.0001, "loss": 0.011, "step": 149600 }, { "epoch": 984.2763157894736, "grad_norm": 1.7599936723709106, "learning_rate": 0.0001, "loss": 0.0086, "step": 149610 }, { "epoch": 984.3421052631579, "grad_norm": 1.6677428483963013, "learning_rate": 0.0001, "loss": 0.0105, "step": 149620 }, { "epoch": 984.4078947368421, "grad_norm": 3.871687412261963, "learning_rate": 0.0001, "loss": 0.0118, "step": 149630 }, { "epoch": 984.4736842105264, "grad_norm": 2.421388864517212, "learning_rate": 0.0001, "loss": 0.0116, "step": 149640 }, { "epoch": 984.5394736842105, "grad_norm": 1.2500579357147217, "learning_rate": 0.0001, "loss": 0.0101, "step": 149650 }, { "epoch": 984.6052631578947, "grad_norm": 1.5379196405410767, "learning_rate": 0.0001, "loss": 0.0119, "step": 149660 }, { "epoch": 984.671052631579, "grad_norm": 1.7158979177474976, "learning_rate": 0.0001, "loss": 0.0092, "step": 149670 }, { "epoch": 984.7368421052631, "grad_norm": 1.4845489263534546, "learning_rate": 0.0001, "loss": 0.0118, "step": 149680 }, { "epoch": 984.8026315789474, "grad_norm": 1.1629608869552612, "learning_rate": 0.0001, "loss": 0.0102, "step": 149690 }, { "epoch": 984.8684210526316, "grad_norm": 1.415179967880249, "learning_rate": 0.0001, "loss": 0.0117, "step": 149700 }, { "epoch": 984.9342105263158, "grad_norm": 1.1638578176498413, "learning_rate": 0.0001, "loss": 0.0109, "step": 149710 }, { "epoch": 985.0, "grad_norm": 0.9588046669960022, "learning_rate": 0.0001, "loss": 0.0093, "step": 149720 }, { "epoch": 985.0657894736842, "grad_norm": 1.2830522060394287, "learning_rate": 0.0001, "loss": 0.0097, "step": 149730 }, { "epoch": 985.1315789473684, "grad_norm": 0.9208570122718811, "learning_rate": 0.0001, "loss": 0.009, "step": 149740 }, { "epoch": 985.1973684210526, "grad_norm": 0.5856902003288269, "learning_rate": 0.0001, "loss": 0.0093, "step": 149750 }, { "epoch": 985.2631578947369, "grad_norm": 0.9705900549888611, "learning_rate": 0.0001, "loss": 0.0104, "step": 149760 }, { "epoch": 985.328947368421, "grad_norm": 1.1862601041793823, "learning_rate": 0.0001, "loss": 0.01, "step": 149770 }, { "epoch": 985.3947368421053, "grad_norm": 0.9023541808128357, "learning_rate": 0.0001, "loss": 0.0107, "step": 149780 }, { "epoch": 985.4605263157895, "grad_norm": 1.044999361038208, "learning_rate": 0.0001, "loss": 0.0126, "step": 149790 }, { "epoch": 985.5263157894736, "grad_norm": 0.9709221124649048, "learning_rate": 0.0001, "loss": 0.0108, "step": 149800 }, { "epoch": 985.5921052631579, "grad_norm": 1.0954318046569824, "learning_rate": 0.0001, "loss": 0.0111, "step": 149810 }, { "epoch": 985.6578947368421, "grad_norm": 1.156111478805542, "learning_rate": 0.0001, "loss": 0.0107, "step": 149820 }, { "epoch": 985.7236842105264, "grad_norm": 0.8454775810241699, "learning_rate": 0.0001, "loss": 0.0095, "step": 149830 }, { "epoch": 985.7894736842105, "grad_norm": 0.880204439163208, "learning_rate": 0.0001, "loss": 0.0108, "step": 149840 }, { "epoch": 985.8552631578947, "grad_norm": 1.2712900638580322, "learning_rate": 0.0001, "loss": 0.0098, "step": 149850 }, { "epoch": 985.921052631579, "grad_norm": 1.0623518228530884, "learning_rate": 0.0001, "loss": 0.0091, "step": 149860 }, { "epoch": 985.9868421052631, "grad_norm": 0.7809160947799683, "learning_rate": 0.0001, "loss": 0.0078, "step": 149870 }, { "epoch": 986.0526315789474, "grad_norm": 0.9675153493881226, "learning_rate": 0.0001, "loss": 0.0091, "step": 149880 }, { "epoch": 986.1184210526316, "grad_norm": 0.8535943031311035, "learning_rate": 0.0001, "loss": 0.0113, "step": 149890 }, { "epoch": 986.1842105263158, "grad_norm": 0.9090612530708313, "learning_rate": 0.0001, "loss": 0.0087, "step": 149900 }, { "epoch": 986.25, "grad_norm": 0.7848944067955017, "learning_rate": 0.0001, "loss": 0.0105, "step": 149910 }, { "epoch": 986.3157894736842, "grad_norm": 1.056888461112976, "learning_rate": 0.0001, "loss": 0.01, "step": 149920 }, { "epoch": 986.3815789473684, "grad_norm": 0.9026320576667786, "learning_rate": 0.0001, "loss": 0.0093, "step": 149930 }, { "epoch": 986.4473684210526, "grad_norm": 1.167649745941162, "learning_rate": 0.0001, "loss": 0.0078, "step": 149940 }, { "epoch": 986.5131578947369, "grad_norm": 1.0454814434051514, "learning_rate": 0.0001, "loss": 0.0091, "step": 149950 }, { "epoch": 986.578947368421, "grad_norm": 1.3439925909042358, "learning_rate": 0.0001, "loss": 0.0089, "step": 149960 }, { "epoch": 986.6447368421053, "grad_norm": 1.020798921585083, "learning_rate": 0.0001, "loss": 0.0093, "step": 149970 }, { "epoch": 986.7105263157895, "grad_norm": 1.1719021797180176, "learning_rate": 0.0001, "loss": 0.0129, "step": 149980 }, { "epoch": 986.7763157894736, "grad_norm": 1.0493474006652832, "learning_rate": 0.0001, "loss": 0.0109, "step": 149990 }, { "epoch": 986.8421052631579, "grad_norm": 0.8416689038276672, "learning_rate": 0.0001, "loss": 0.0135, "step": 150000 }, { "epoch": 986.9078947368421, "grad_norm": 0.9856203198432922, "learning_rate": 0.0001, "loss": 0.0102, "step": 150010 }, { "epoch": 986.9736842105264, "grad_norm": 0.9143205881118774, "learning_rate": 0.0001, "loss": 0.0094, "step": 150020 }, { "epoch": 987.0394736842105, "grad_norm": 0.9702136516571045, "learning_rate": 0.0001, "loss": 0.0087, "step": 150030 }, { "epoch": 987.1052631578947, "grad_norm": 0.7779248356819153, "learning_rate": 0.0001, "loss": 0.0103, "step": 150040 }, { "epoch": 987.171052631579, "grad_norm": 1.1739939451217651, "learning_rate": 0.0001, "loss": 0.01, "step": 150050 }, { "epoch": 987.2368421052631, "grad_norm": 1.0114635229110718, "learning_rate": 0.0001, "loss": 0.0103, "step": 150060 }, { "epoch": 987.3026315789474, "grad_norm": 1.0713098049163818, "learning_rate": 0.0001, "loss": 0.0098, "step": 150070 }, { "epoch": 987.3684210526316, "grad_norm": 0.9097891449928284, "learning_rate": 0.0001, "loss": 0.0101, "step": 150080 }, { "epoch": 987.4342105263158, "grad_norm": 1.1985975503921509, "learning_rate": 0.0001, "loss": 0.0124, "step": 150090 }, { "epoch": 987.5, "grad_norm": 1.0965287685394287, "learning_rate": 0.0001, "loss": 0.0105, "step": 150100 }, { "epoch": 987.5657894736842, "grad_norm": 0.873046338558197, "learning_rate": 0.0001, "loss": 0.0086, "step": 150110 }, { "epoch": 987.6315789473684, "grad_norm": 1.016603708267212, "learning_rate": 0.0001, "loss": 0.0092, "step": 150120 }, { "epoch": 987.6973684210526, "grad_norm": 0.9413048028945923, "learning_rate": 0.0001, "loss": 0.0105, "step": 150130 }, { "epoch": 987.7631578947369, "grad_norm": 1.0264689922332764, "learning_rate": 0.0001, "loss": 0.0097, "step": 150140 }, { "epoch": 987.828947368421, "grad_norm": 0.916080117225647, "learning_rate": 0.0001, "loss": 0.0114, "step": 150150 }, { "epoch": 987.8947368421053, "grad_norm": 1.110188364982605, "learning_rate": 0.0001, "loss": 0.0092, "step": 150160 }, { "epoch": 987.9605263157895, "grad_norm": 1.0848978757858276, "learning_rate": 0.0001, "loss": 0.0105, "step": 150170 }, { "epoch": 988.0263157894736, "grad_norm": 0.9753615856170654, "learning_rate": 0.0001, "loss": 0.012, "step": 150180 }, { "epoch": 988.0921052631579, "grad_norm": 1.2816380262374878, "learning_rate": 0.0001, "loss": 0.0089, "step": 150190 }, { "epoch": 988.1578947368421, "grad_norm": 1.2264199256896973, "learning_rate": 0.0001, "loss": 0.0117, "step": 150200 }, { "epoch": 988.2236842105264, "grad_norm": 1.3432106971740723, "learning_rate": 0.0001, "loss": 0.0104, "step": 150210 }, { "epoch": 988.2894736842105, "grad_norm": 1.5210658311843872, "learning_rate": 0.0001, "loss": 0.0088, "step": 150220 }, { "epoch": 988.3552631578947, "grad_norm": 1.3455909490585327, "learning_rate": 0.0001, "loss": 0.0091, "step": 150230 }, { "epoch": 988.421052631579, "grad_norm": 1.2642890214920044, "learning_rate": 0.0001, "loss": 0.0115, "step": 150240 }, { "epoch": 988.4868421052631, "grad_norm": 1.0704741477966309, "learning_rate": 0.0001, "loss": 0.0102, "step": 150250 }, { "epoch": 988.5526315789474, "grad_norm": 1.0484412908554077, "learning_rate": 0.0001, "loss": 0.0097, "step": 150260 }, { "epoch": 988.6184210526316, "grad_norm": 0.8334908485412598, "learning_rate": 0.0001, "loss": 0.0089, "step": 150270 }, { "epoch": 988.6842105263158, "grad_norm": 0.8684769868850708, "learning_rate": 0.0001, "loss": 0.0077, "step": 150280 }, { "epoch": 988.75, "grad_norm": 0.8539243936538696, "learning_rate": 0.0001, "loss": 0.0114, "step": 150290 }, { "epoch": 988.8157894736842, "grad_norm": 0.8212605118751526, "learning_rate": 0.0001, "loss": 0.0099, "step": 150300 }, { "epoch": 988.8815789473684, "grad_norm": 1.1426969766616821, "learning_rate": 0.0001, "loss": 0.0091, "step": 150310 }, { "epoch": 988.9473684210526, "grad_norm": 1.2686213254928589, "learning_rate": 0.0001, "loss": 0.0077, "step": 150320 }, { "epoch": 989.0131578947369, "grad_norm": 1.0295464992523193, "learning_rate": 0.0001, "loss": 0.0089, "step": 150330 }, { "epoch": 989.078947368421, "grad_norm": 0.9937509298324585, "learning_rate": 0.0001, "loss": 0.0087, "step": 150340 }, { "epoch": 989.1447368421053, "grad_norm": 0.8517789840698242, "learning_rate": 0.0001, "loss": 0.0108, "step": 150350 }, { "epoch": 989.2105263157895, "grad_norm": 1.2996900081634521, "learning_rate": 0.0001, "loss": 0.0087, "step": 150360 }, { "epoch": 989.2763157894736, "grad_norm": 0.7057400345802307, "learning_rate": 0.0001, "loss": 0.0104, "step": 150370 }, { "epoch": 989.3421052631579, "grad_norm": 0.8088485598564148, "learning_rate": 0.0001, "loss": 0.0092, "step": 150380 }, { "epoch": 989.4078947368421, "grad_norm": 0.9495823979377747, "learning_rate": 0.0001, "loss": 0.0095, "step": 150390 }, { "epoch": 989.4736842105264, "grad_norm": 1.176836609840393, "learning_rate": 0.0001, "loss": 0.0085, "step": 150400 }, { "epoch": 989.5394736842105, "grad_norm": 1.438713788986206, "learning_rate": 0.0001, "loss": 0.0097, "step": 150410 }, { "epoch": 989.6052631578947, "grad_norm": 1.0466101169586182, "learning_rate": 0.0001, "loss": 0.0096, "step": 150420 }, { "epoch": 989.671052631579, "grad_norm": 0.9159618020057678, "learning_rate": 0.0001, "loss": 0.0112, "step": 150430 }, { "epoch": 989.7368421052631, "grad_norm": 1.128126859664917, "learning_rate": 0.0001, "loss": 0.0103, "step": 150440 }, { "epoch": 989.8026315789474, "grad_norm": 0.6916261911392212, "learning_rate": 0.0001, "loss": 0.0088, "step": 150450 }, { "epoch": 989.8684210526316, "grad_norm": 1.2188067436218262, "learning_rate": 0.0001, "loss": 0.0101, "step": 150460 }, { "epoch": 989.9342105263158, "grad_norm": 1.330909013748169, "learning_rate": 0.0001, "loss": 0.008, "step": 150470 }, { "epoch": 990.0, "grad_norm": 1.1757625341415405, "learning_rate": 0.0001, "loss": 0.0102, "step": 150480 }, { "epoch": 990.0657894736842, "grad_norm": 1.4033007621765137, "learning_rate": 0.0001, "loss": 0.0085, "step": 150490 }, { "epoch": 990.1315789473684, "grad_norm": 1.0067665576934814, "learning_rate": 0.0001, "loss": 0.0104, "step": 150500 }, { "epoch": 990.1973684210526, "grad_norm": 1.1867026090621948, "learning_rate": 0.0001, "loss": 0.0109, "step": 150510 }, { "epoch": 990.2631578947369, "grad_norm": 1.306778073310852, "learning_rate": 0.0001, "loss": 0.0094, "step": 150520 }, { "epoch": 990.328947368421, "grad_norm": 1.033635139465332, "learning_rate": 0.0001, "loss": 0.0098, "step": 150530 }, { "epoch": 990.3947368421053, "grad_norm": 1.0581187009811401, "learning_rate": 0.0001, "loss": 0.0095, "step": 150540 }, { "epoch": 990.4605263157895, "grad_norm": 1.396301507949829, "learning_rate": 0.0001, "loss": 0.0098, "step": 150550 }, { "epoch": 990.5263157894736, "grad_norm": 1.2779043912887573, "learning_rate": 0.0001, "loss": 0.0097, "step": 150560 }, { "epoch": 990.5921052631579, "grad_norm": 1.0077518224716187, "learning_rate": 0.0001, "loss": 0.0086, "step": 150570 }, { "epoch": 990.6578947368421, "grad_norm": 1.1723346710205078, "learning_rate": 0.0001, "loss": 0.0088, "step": 150580 }, { "epoch": 990.7236842105264, "grad_norm": 1.0024058818817139, "learning_rate": 0.0001, "loss": 0.0101, "step": 150590 }, { "epoch": 990.7894736842105, "grad_norm": 1.120664358139038, "learning_rate": 0.0001, "loss": 0.0105, "step": 150600 }, { "epoch": 990.8552631578947, "grad_norm": 1.2388149499893188, "learning_rate": 0.0001, "loss": 0.0086, "step": 150610 }, { "epoch": 990.921052631579, "grad_norm": 1.3285748958587646, "learning_rate": 0.0001, "loss": 0.0081, "step": 150620 }, { "epoch": 990.9868421052631, "grad_norm": 0.933190643787384, "learning_rate": 0.0001, "loss": 0.0091, "step": 150630 }, { "epoch": 991.0526315789474, "grad_norm": 1.2380552291870117, "learning_rate": 0.0001, "loss": 0.0093, "step": 150640 }, { "epoch": 991.1184210526316, "grad_norm": 1.042554497718811, "learning_rate": 0.0001, "loss": 0.0083, "step": 150650 }, { "epoch": 991.1842105263158, "grad_norm": 1.0768581628799438, "learning_rate": 0.0001, "loss": 0.0082, "step": 150660 }, { "epoch": 991.25, "grad_norm": 0.9357948899269104, "learning_rate": 0.0001, "loss": 0.0084, "step": 150670 }, { "epoch": 991.3157894736842, "grad_norm": 1.2259186506271362, "learning_rate": 0.0001, "loss": 0.0113, "step": 150680 }, { "epoch": 991.3815789473684, "grad_norm": 0.9076092839241028, "learning_rate": 0.0001, "loss": 0.0083, "step": 150690 }, { "epoch": 991.4473684210526, "grad_norm": 1.0756639242172241, "learning_rate": 0.0001, "loss": 0.0093, "step": 150700 }, { "epoch": 991.5131578947369, "grad_norm": 0.8654159307479858, "learning_rate": 0.0001, "loss": 0.0085, "step": 150710 }, { "epoch": 991.578947368421, "grad_norm": 0.5353500843048096, "learning_rate": 0.0001, "loss": 0.0107, "step": 150720 }, { "epoch": 991.6447368421053, "grad_norm": 1.0881078243255615, "learning_rate": 0.0001, "loss": 0.0085, "step": 150730 }, { "epoch": 991.7105263157895, "grad_norm": 0.8417906165122986, "learning_rate": 0.0001, "loss": 0.0084, "step": 150740 }, { "epoch": 991.7763157894736, "grad_norm": 1.3755824565887451, "learning_rate": 0.0001, "loss": 0.0082, "step": 150750 }, { "epoch": 991.8421052631579, "grad_norm": 1.0867851972579956, "learning_rate": 0.0001, "loss": 0.0109, "step": 150760 }, { "epoch": 991.9078947368421, "grad_norm": 1.0686115026474, "learning_rate": 0.0001, "loss": 0.0109, "step": 150770 }, { "epoch": 991.9736842105264, "grad_norm": 0.9699664115905762, "learning_rate": 0.0001, "loss": 0.0094, "step": 150780 }, { "epoch": 992.0394736842105, "grad_norm": 1.1987810134887695, "learning_rate": 0.0001, "loss": 0.0098, "step": 150790 }, { "epoch": 992.1052631578947, "grad_norm": 0.8176727294921875, "learning_rate": 0.0001, "loss": 0.0106, "step": 150800 }, { "epoch": 992.171052631579, "grad_norm": 0.8529284596443176, "learning_rate": 0.0001, "loss": 0.0104, "step": 150810 }, { "epoch": 992.2368421052631, "grad_norm": 1.179165005683899, "learning_rate": 0.0001, "loss": 0.0114, "step": 150820 }, { "epoch": 992.3026315789474, "grad_norm": 0.9331127405166626, "learning_rate": 0.0001, "loss": 0.0097, "step": 150830 }, { "epoch": 992.3684210526316, "grad_norm": 1.0699684619903564, "learning_rate": 0.0001, "loss": 0.0094, "step": 150840 }, { "epoch": 992.4342105263158, "grad_norm": 1.0798276662826538, "learning_rate": 0.0001, "loss": 0.0101, "step": 150850 }, { "epoch": 992.5, "grad_norm": 1.126177430152893, "learning_rate": 0.0001, "loss": 0.0098, "step": 150860 }, { "epoch": 992.5657894736842, "grad_norm": 1.269126057624817, "learning_rate": 0.0001, "loss": 0.0084, "step": 150870 }, { "epoch": 992.6315789473684, "grad_norm": 1.2540189027786255, "learning_rate": 0.0001, "loss": 0.0106, "step": 150880 }, { "epoch": 992.6973684210526, "grad_norm": 0.9607136845588684, "learning_rate": 0.0001, "loss": 0.0092, "step": 150890 }, { "epoch": 992.7631578947369, "grad_norm": 1.192214846611023, "learning_rate": 0.0001, "loss": 0.0084, "step": 150900 }, { "epoch": 992.828947368421, "grad_norm": 0.9914960265159607, "learning_rate": 0.0001, "loss": 0.009, "step": 150910 }, { "epoch": 992.8947368421053, "grad_norm": 0.728797435760498, "learning_rate": 0.0001, "loss": 0.0074, "step": 150920 }, { "epoch": 992.9605263157895, "grad_norm": 0.8233456611633301, "learning_rate": 0.0001, "loss": 0.0084, "step": 150930 }, { "epoch": 993.0263157894736, "grad_norm": 0.8725035190582275, "learning_rate": 0.0001, "loss": 0.0085, "step": 150940 }, { "epoch": 993.0921052631579, "grad_norm": 1.1844197511672974, "learning_rate": 0.0001, "loss": 0.0082, "step": 150950 }, { "epoch": 993.1578947368421, "grad_norm": 0.7963541150093079, "learning_rate": 0.0001, "loss": 0.0086, "step": 150960 }, { "epoch": 993.2236842105264, "grad_norm": 1.0642098188400269, "learning_rate": 0.0001, "loss": 0.0087, "step": 150970 }, { "epoch": 993.2894736842105, "grad_norm": 0.7806707620620728, "learning_rate": 0.0001, "loss": 0.0086, "step": 150980 }, { "epoch": 993.3552631578947, "grad_norm": 0.9475646615028381, "learning_rate": 0.0001, "loss": 0.0085, "step": 150990 }, { "epoch": 993.421052631579, "grad_norm": 0.715223491191864, "learning_rate": 0.0001, "loss": 0.0071, "step": 151000 }, { "epoch": 993.4868421052631, "grad_norm": 1.004929542541504, "learning_rate": 0.0001, "loss": 0.0105, "step": 151010 }, { "epoch": 993.5526315789474, "grad_norm": 0.9862182140350342, "learning_rate": 0.0001, "loss": 0.0098, "step": 151020 }, { "epoch": 993.6184210526316, "grad_norm": 0.9281184673309326, "learning_rate": 0.0001, "loss": 0.0091, "step": 151030 }, { "epoch": 993.6842105263158, "grad_norm": 1.0093213319778442, "learning_rate": 0.0001, "loss": 0.0106, "step": 151040 }, { "epoch": 993.75, "grad_norm": 1.3052515983581543, "learning_rate": 0.0001, "loss": 0.0091, "step": 151050 }, { "epoch": 993.8157894736842, "grad_norm": 1.137257695198059, "learning_rate": 0.0001, "loss": 0.0097, "step": 151060 }, { "epoch": 993.8815789473684, "grad_norm": 1.3269919157028198, "learning_rate": 0.0001, "loss": 0.0103, "step": 151070 }, { "epoch": 993.9473684210526, "grad_norm": 1.2152782678604126, "learning_rate": 0.0001, "loss": 0.0096, "step": 151080 }, { "epoch": 994.0131578947369, "grad_norm": 1.2013344764709473, "learning_rate": 0.0001, "loss": 0.0111, "step": 151090 }, { "epoch": 994.078947368421, "grad_norm": 1.0185185670852661, "learning_rate": 0.0001, "loss": 0.0098, "step": 151100 }, { "epoch": 994.1447368421053, "grad_norm": 1.1128599643707275, "learning_rate": 0.0001, "loss": 0.0077, "step": 151110 }, { "epoch": 994.2105263157895, "grad_norm": 1.1425211429595947, "learning_rate": 0.0001, "loss": 0.0112, "step": 151120 }, { "epoch": 994.2763157894736, "grad_norm": 1.0568586587905884, "learning_rate": 0.0001, "loss": 0.0077, "step": 151130 }, { "epoch": 994.3421052631579, "grad_norm": 1.080623984336853, "learning_rate": 0.0001, "loss": 0.0109, "step": 151140 }, { "epoch": 994.4078947368421, "grad_norm": 1.4741694927215576, "learning_rate": 0.0001, "loss": 0.0091, "step": 151150 }, { "epoch": 994.4736842105264, "grad_norm": 2.169677972793579, "learning_rate": 0.0001, "loss": 0.0095, "step": 151160 }, { "epoch": 994.5394736842105, "grad_norm": 2.1970841884613037, "learning_rate": 0.0001, "loss": 0.0103, "step": 151170 }, { "epoch": 994.6052631578947, "grad_norm": 1.8922151327133179, "learning_rate": 0.0001, "loss": 0.0094, "step": 151180 }, { "epoch": 994.671052631579, "grad_norm": 1.2851279973983765, "learning_rate": 0.0001, "loss": 0.0085, "step": 151190 }, { "epoch": 994.7368421052631, "grad_norm": 1.3797351121902466, "learning_rate": 0.0001, "loss": 0.0097, "step": 151200 }, { "epoch": 994.8026315789474, "grad_norm": 1.318306565284729, "learning_rate": 0.0001, "loss": 0.0081, "step": 151210 }, { "epoch": 994.8684210526316, "grad_norm": 1.115478754043579, "learning_rate": 0.0001, "loss": 0.0101, "step": 151220 }, { "epoch": 994.9342105263158, "grad_norm": 1.0066384077072144, "learning_rate": 0.0001, "loss": 0.0083, "step": 151230 }, { "epoch": 995.0, "grad_norm": 1.1105729341506958, "learning_rate": 0.0001, "loss": 0.0081, "step": 151240 }, { "epoch": 995.0657894736842, "grad_norm": 1.399383544921875, "learning_rate": 0.0001, "loss": 0.0078, "step": 151250 }, { "epoch": 995.1315789473684, "grad_norm": 1.160544991493225, "learning_rate": 0.0001, "loss": 0.0089, "step": 151260 }, { "epoch": 995.1973684210526, "grad_norm": 0.9492695331573486, "learning_rate": 0.0001, "loss": 0.0072, "step": 151270 }, { "epoch": 995.2631578947369, "grad_norm": 1.0131347179412842, "learning_rate": 0.0001, "loss": 0.0094, "step": 151280 }, { "epoch": 995.328947368421, "grad_norm": 0.9944673776626587, "learning_rate": 0.0001, "loss": 0.0105, "step": 151290 }, { "epoch": 995.3947368421053, "grad_norm": 1.1447815895080566, "learning_rate": 0.0001, "loss": 0.008, "step": 151300 }, { "epoch": 995.4605263157895, "grad_norm": 1.2639286518096924, "learning_rate": 0.0001, "loss": 0.0092, "step": 151310 }, { "epoch": 995.5263157894736, "grad_norm": 0.9355853199958801, "learning_rate": 0.0001, "loss": 0.0102, "step": 151320 }, { "epoch": 995.5921052631579, "grad_norm": 1.3493982553482056, "learning_rate": 0.0001, "loss": 0.0087, "step": 151330 }, { "epoch": 995.6578947368421, "grad_norm": 1.0639145374298096, "learning_rate": 0.0001, "loss": 0.0096, "step": 151340 }, { "epoch": 995.7236842105264, "grad_norm": 0.9887959957122803, "learning_rate": 0.0001, "loss": 0.0089, "step": 151350 }, { "epoch": 995.7894736842105, "grad_norm": 1.3102302551269531, "learning_rate": 0.0001, "loss": 0.0098, "step": 151360 }, { "epoch": 995.8552631578947, "grad_norm": 1.0294629335403442, "learning_rate": 0.0001, "loss": 0.0112, "step": 151370 }, { "epoch": 995.921052631579, "grad_norm": 1.3485372066497803, "learning_rate": 0.0001, "loss": 0.0103, "step": 151380 }, { "epoch": 995.9868421052631, "grad_norm": 1.2144447565078735, "learning_rate": 0.0001, "loss": 0.008, "step": 151390 }, { "epoch": 996.0526315789474, "grad_norm": 0.7425600290298462, "learning_rate": 0.0001, "loss": 0.009, "step": 151400 }, { "epoch": 996.1184210526316, "grad_norm": 1.1900805234909058, "learning_rate": 0.0001, "loss": 0.009, "step": 151410 }, { "epoch": 996.1842105263158, "grad_norm": 0.8845036029815674, "learning_rate": 0.0001, "loss": 0.0091, "step": 151420 }, { "epoch": 996.25, "grad_norm": 0.965152382850647, "learning_rate": 0.0001, "loss": 0.0102, "step": 151430 }, { "epoch": 996.3157894736842, "grad_norm": 1.1030590534210205, "learning_rate": 0.0001, "loss": 0.0075, "step": 151440 }, { "epoch": 996.3815789473684, "grad_norm": 0.915751576423645, "learning_rate": 0.0001, "loss": 0.0087, "step": 151450 }, { "epoch": 996.4473684210526, "grad_norm": 0.9805639982223511, "learning_rate": 0.0001, "loss": 0.0088, "step": 151460 }, { "epoch": 996.5131578947369, "grad_norm": 0.8894463777542114, "learning_rate": 0.0001, "loss": 0.0103, "step": 151470 }, { "epoch": 996.578947368421, "grad_norm": 1.0241661071777344, "learning_rate": 0.0001, "loss": 0.0093, "step": 151480 }, { "epoch": 996.6447368421053, "grad_norm": 1.265334129333496, "learning_rate": 0.0001, "loss": 0.009, "step": 151490 }, { "epoch": 996.7105263157895, "grad_norm": 0.9865074753761292, "learning_rate": 0.0001, "loss": 0.0082, "step": 151500 }, { "epoch": 996.7763157894736, "grad_norm": 0.9851489067077637, "learning_rate": 0.0001, "loss": 0.0103, "step": 151510 }, { "epoch": 996.8421052631579, "grad_norm": 0.9616159200668335, "learning_rate": 0.0001, "loss": 0.0094, "step": 151520 }, { "epoch": 996.9078947368421, "grad_norm": 1.051690936088562, "learning_rate": 0.0001, "loss": 0.0107, "step": 151530 }, { "epoch": 996.9736842105264, "grad_norm": 0.7457857131958008, "learning_rate": 0.0001, "loss": 0.0098, "step": 151540 }, { "epoch": 997.0394736842105, "grad_norm": 1.3598742485046387, "learning_rate": 0.0001, "loss": 0.0098, "step": 151550 }, { "epoch": 997.1052631578947, "grad_norm": 1.0818504095077515, "learning_rate": 0.0001, "loss": 0.009, "step": 151560 }, { "epoch": 997.171052631579, "grad_norm": 1.1341614723205566, "learning_rate": 0.0001, "loss": 0.0089, "step": 151570 }, { "epoch": 997.2368421052631, "grad_norm": 1.3437490463256836, "learning_rate": 0.0001, "loss": 0.0101, "step": 151580 }, { "epoch": 997.3026315789474, "grad_norm": 0.998374879360199, "learning_rate": 0.0001, "loss": 0.0081, "step": 151590 }, { "epoch": 997.3684210526316, "grad_norm": 1.2805155515670776, "learning_rate": 0.0001, "loss": 0.009, "step": 151600 }, { "epoch": 997.4342105263158, "grad_norm": 1.1499323844909668, "learning_rate": 0.0001, "loss": 0.0103, "step": 151610 }, { "epoch": 997.5, "grad_norm": 1.3379521369934082, "learning_rate": 0.0001, "loss": 0.0083, "step": 151620 }, { "epoch": 997.5657894736842, "grad_norm": 1.0792102813720703, "learning_rate": 0.0001, "loss": 0.0098, "step": 151630 }, { "epoch": 997.6315789473684, "grad_norm": 1.2518432140350342, "learning_rate": 0.0001, "loss": 0.0095, "step": 151640 }, { "epoch": 997.6973684210526, "grad_norm": 1.0179204940795898, "learning_rate": 0.0001, "loss": 0.012, "step": 151650 }, { "epoch": 997.7631578947369, "grad_norm": 0.97569340467453, "learning_rate": 0.0001, "loss": 0.0077, "step": 151660 }, { "epoch": 997.828947368421, "grad_norm": 1.1537328958511353, "learning_rate": 0.0001, "loss": 0.0094, "step": 151670 }, { "epoch": 997.8947368421053, "grad_norm": 1.0750672817230225, "learning_rate": 0.0001, "loss": 0.0083, "step": 151680 }, { "epoch": 997.9605263157895, "grad_norm": 0.9960630536079407, "learning_rate": 0.0001, "loss": 0.0108, "step": 151690 }, { "epoch": 998.0263157894736, "grad_norm": 0.7261005640029907, "learning_rate": 0.0001, "loss": 0.0102, "step": 151700 }, { "epoch": 998.0921052631579, "grad_norm": 1.1404473781585693, "learning_rate": 0.0001, "loss": 0.0088, "step": 151710 }, { "epoch": 998.1578947368421, "grad_norm": 1.0211684703826904, "learning_rate": 0.0001, "loss": 0.01, "step": 151720 }, { "epoch": 998.2236842105264, "grad_norm": 0.8978713750839233, "learning_rate": 0.0001, "loss": 0.0095, "step": 151730 }, { "epoch": 998.2894736842105, "grad_norm": 1.1412054300308228, "learning_rate": 0.0001, "loss": 0.0098, "step": 151740 }, { "epoch": 998.3552631578947, "grad_norm": 0.9569369554519653, "learning_rate": 0.0001, "loss": 0.0082, "step": 151750 }, { "epoch": 998.421052631579, "grad_norm": 1.211241602897644, "learning_rate": 0.0001, "loss": 0.0105, "step": 151760 }, { "epoch": 998.4868421052631, "grad_norm": 1.1356269121170044, "learning_rate": 0.0001, "loss": 0.0102, "step": 151770 }, { "epoch": 998.5526315789474, "grad_norm": 1.2875263690948486, "learning_rate": 0.0001, "loss": 0.01, "step": 151780 }, { "epoch": 998.6184210526316, "grad_norm": 0.9279645085334778, "learning_rate": 0.0001, "loss": 0.0088, "step": 151790 }, { "epoch": 998.6842105263158, "grad_norm": 1.1666724681854248, "learning_rate": 0.0001, "loss": 0.0108, "step": 151800 }, { "epoch": 998.75, "grad_norm": 1.2689951658248901, "learning_rate": 0.0001, "loss": 0.0086, "step": 151810 }, { "epoch": 998.8157894736842, "grad_norm": 0.8863130807876587, "learning_rate": 0.0001, "loss": 0.0094, "step": 151820 }, { "epoch": 998.8815789473684, "grad_norm": 1.002847671508789, "learning_rate": 0.0001, "loss": 0.0087, "step": 151830 }, { "epoch": 998.9473684210526, "grad_norm": 1.0156797170639038, "learning_rate": 0.0001, "loss": 0.0086, "step": 151840 }, { "epoch": 999.0131578947369, "grad_norm": 1.1730620861053467, "learning_rate": 0.0001, "loss": 0.0109, "step": 151850 }, { "epoch": 999.078947368421, "grad_norm": 1.2348865270614624, "learning_rate": 0.0001, "loss": 0.0108, "step": 151860 }, { "epoch": 999.1447368421053, "grad_norm": 1.2273298501968384, "learning_rate": 0.0001, "loss": 0.0096, "step": 151870 }, { "epoch": 999.2105263157895, "grad_norm": 1.1343059539794922, "learning_rate": 0.0001, "loss": 0.011, "step": 151880 }, { "epoch": 999.2763157894736, "grad_norm": 1.2365325689315796, "learning_rate": 0.0001, "loss": 0.0089, "step": 151890 }, { "epoch": 999.3421052631579, "grad_norm": 0.8025848269462585, "learning_rate": 0.0001, "loss": 0.008, "step": 151900 }, { "epoch": 999.4078947368421, "grad_norm": 1.318703055381775, "learning_rate": 0.0001, "loss": 0.0123, "step": 151910 }, { "epoch": 999.4736842105264, "grad_norm": 0.9021982550621033, "learning_rate": 0.0001, "loss": 0.0112, "step": 151920 }, { "epoch": 999.5394736842105, "grad_norm": 1.2123674154281616, "learning_rate": 0.0001, "loss": 0.0076, "step": 151930 }, { "epoch": 999.6052631578947, "grad_norm": 1.0211516618728638, "learning_rate": 0.0001, "loss": 0.0123, "step": 151940 }, { "epoch": 999.671052631579, "grad_norm": 0.8161410689353943, "learning_rate": 0.0001, "loss": 0.0079, "step": 151950 }, { "epoch": 999.7368421052631, "grad_norm": 0.7967602610588074, "learning_rate": 0.0001, "loss": 0.0088, "step": 151960 }, { "epoch": 999.8026315789474, "grad_norm": 1.032511591911316, "learning_rate": 0.0001, "loss": 0.0077, "step": 151970 }, { "epoch": 999.8684210526316, "grad_norm": 0.9636939764022827, "learning_rate": 0.0001, "loss": 0.0101, "step": 151980 }, { "epoch": 999.9342105263158, "grad_norm": 0.7413888573646545, "learning_rate": 0.0001, "loss": 0.0091, "step": 151990 }, { "epoch": 1000.0, "grad_norm": 1.27151358127594, "learning_rate": 0.0001, "loss": 0.0078, "step": 152000 }, { "epoch": 1000.0657894736842, "grad_norm": 1.314128041267395, "learning_rate": 0.0001, "loss": 0.0103, "step": 152010 }, { "epoch": 1000.1315789473684, "grad_norm": 1.3646230697631836, "learning_rate": 0.0001, "loss": 0.0108, "step": 152020 }, { "epoch": 1000.1973684210526, "grad_norm": 1.1015452146530151, "learning_rate": 0.0001, "loss": 0.0087, "step": 152030 }, { "epoch": 1000.2631578947369, "grad_norm": 1.1657509803771973, "learning_rate": 0.0001, "loss": 0.0088, "step": 152040 }, { "epoch": 1000.328947368421, "grad_norm": 1.5875916481018066, "learning_rate": 0.0001, "loss": 0.0081, "step": 152050 }, { "epoch": 1000.3947368421053, "grad_norm": 0.9831330180168152, "learning_rate": 0.0001, "loss": 0.0088, "step": 152060 }, { "epoch": 1000.4605263157895, "grad_norm": 1.056758999824524, "learning_rate": 0.0001, "loss": 0.0099, "step": 152070 }, { "epoch": 1000.5263157894736, "grad_norm": 0.9852983355522156, "learning_rate": 0.0001, "loss": 0.0089, "step": 152080 }, { "epoch": 1000.5921052631579, "grad_norm": 1.0023329257965088, "learning_rate": 0.0001, "loss": 0.0092, "step": 152090 }, { "epoch": 1000.6578947368421, "grad_norm": 0.8812468647956848, "learning_rate": 0.0001, "loss": 0.0108, "step": 152100 }, { "epoch": 1000.7236842105264, "grad_norm": 1.064321756362915, "learning_rate": 0.0001, "loss": 0.009, "step": 152110 }, { "epoch": 1000.7894736842105, "grad_norm": 0.9033085107803345, "learning_rate": 0.0001, "loss": 0.0089, "step": 152120 }, { "epoch": 1000.8552631578947, "grad_norm": 1.2103757858276367, "learning_rate": 0.0001, "loss": 0.0112, "step": 152130 }, { "epoch": 1000.921052631579, "grad_norm": 0.9150286316871643, "learning_rate": 0.0001, "loss": 0.0091, "step": 152140 }, { "epoch": 1000.9868421052631, "grad_norm": 0.9393565058708191, "learning_rate": 0.0001, "loss": 0.0098, "step": 152150 }, { "epoch": 1001.0526315789474, "grad_norm": 1.163619875907898, "learning_rate": 0.0001, "loss": 0.0076, "step": 152160 }, { "epoch": 1001.1184210526316, "grad_norm": 0.695565938949585, "learning_rate": 0.0001, "loss": 0.011, "step": 152170 }, { "epoch": 1001.1842105263158, "grad_norm": 0.837020754814148, "learning_rate": 0.0001, "loss": 0.0108, "step": 152180 }, { "epoch": 1001.25, "grad_norm": 0.9212221503257751, "learning_rate": 0.0001, "loss": 0.0094, "step": 152190 }, { "epoch": 1001.3157894736842, "grad_norm": 1.2509287595748901, "learning_rate": 0.0001, "loss": 0.0093, "step": 152200 }, { "epoch": 1001.3815789473684, "grad_norm": 1.2080440521240234, "learning_rate": 0.0001, "loss": 0.0107, "step": 152210 }, { "epoch": 1001.4473684210526, "grad_norm": 0.846920907497406, "learning_rate": 0.0001, "loss": 0.0102, "step": 152220 }, { "epoch": 1001.5131578947369, "grad_norm": 1.3782720565795898, "learning_rate": 0.0001, "loss": 0.0102, "step": 152230 }, { "epoch": 1001.578947368421, "grad_norm": 1.222617506980896, "learning_rate": 0.0001, "loss": 0.0083, "step": 152240 }, { "epoch": 1001.6447368421053, "grad_norm": 0.953065812587738, "learning_rate": 0.0001, "loss": 0.0082, "step": 152250 }, { "epoch": 1001.7105263157895, "grad_norm": 0.9601914882659912, "learning_rate": 0.0001, "loss": 0.0083, "step": 152260 }, { "epoch": 1001.7763157894736, "grad_norm": 0.7010998725891113, "learning_rate": 0.0001, "loss": 0.0107, "step": 152270 }, { "epoch": 1001.8421052631579, "grad_norm": 1.2162070274353027, "learning_rate": 0.0001, "loss": 0.0081, "step": 152280 }, { "epoch": 1001.9078947368421, "grad_norm": 0.6597650647163391, "learning_rate": 0.0001, "loss": 0.0107, "step": 152290 }, { "epoch": 1001.9736842105264, "grad_norm": 1.0291619300842285, "learning_rate": 0.0001, "loss": 0.0083, "step": 152300 }, { "epoch": 1002.0394736842105, "grad_norm": 1.1354762315750122, "learning_rate": 0.0001, "loss": 0.0097, "step": 152310 }, { "epoch": 1002.1052631578947, "grad_norm": 0.8112242221832275, "learning_rate": 0.0001, "loss": 0.0078, "step": 152320 }, { "epoch": 1002.171052631579, "grad_norm": 1.0882664918899536, "learning_rate": 0.0001, "loss": 0.0095, "step": 152330 }, { "epoch": 1002.2368421052631, "grad_norm": 0.919173538684845, "learning_rate": 0.0001, "loss": 0.01, "step": 152340 }, { "epoch": 1002.3026315789474, "grad_norm": 1.0165963172912598, "learning_rate": 0.0001, "loss": 0.0095, "step": 152350 }, { "epoch": 1002.3684210526316, "grad_norm": 1.0748934745788574, "learning_rate": 0.0001, "loss": 0.0121, "step": 152360 }, { "epoch": 1002.4342105263158, "grad_norm": 1.1250693798065186, "learning_rate": 0.0001, "loss": 0.0079, "step": 152370 }, { "epoch": 1002.5, "grad_norm": 1.0457911491394043, "learning_rate": 0.0001, "loss": 0.0089, "step": 152380 }, { "epoch": 1002.5657894736842, "grad_norm": 1.0000548362731934, "learning_rate": 0.0001, "loss": 0.0085, "step": 152390 }, { "epoch": 1002.6315789473684, "grad_norm": 1.0795695781707764, "learning_rate": 0.0001, "loss": 0.0096, "step": 152400 }, { "epoch": 1002.6973684210526, "grad_norm": 0.7151395082473755, "learning_rate": 0.0001, "loss": 0.0099, "step": 152410 }, { "epoch": 1002.7631578947369, "grad_norm": 0.9494915008544922, "learning_rate": 0.0001, "loss": 0.0099, "step": 152420 }, { "epoch": 1002.828947368421, "grad_norm": 0.8552742004394531, "learning_rate": 0.0001, "loss": 0.0097, "step": 152430 }, { "epoch": 1002.8947368421053, "grad_norm": 0.8879970908164978, "learning_rate": 0.0001, "loss": 0.0085, "step": 152440 }, { "epoch": 1002.9605263157895, "grad_norm": 1.2099488973617554, "learning_rate": 0.0001, "loss": 0.0096, "step": 152450 }, { "epoch": 1003.0263157894736, "grad_norm": 0.8229483366012573, "learning_rate": 0.0001, "loss": 0.011, "step": 152460 }, { "epoch": 1003.0921052631579, "grad_norm": 0.604310154914856, "learning_rate": 0.0001, "loss": 0.0087, "step": 152470 }, { "epoch": 1003.1578947368421, "grad_norm": 1.2384097576141357, "learning_rate": 0.0001, "loss": 0.0094, "step": 152480 }, { "epoch": 1003.2236842105264, "grad_norm": 0.9572075009346008, "learning_rate": 0.0001, "loss": 0.0096, "step": 152490 }, { "epoch": 1003.2894736842105, "grad_norm": 1.0197408199310303, "learning_rate": 0.0001, "loss": 0.0084, "step": 152500 }, { "epoch": 1003.3552631578947, "grad_norm": 0.9657206535339355, "learning_rate": 0.0001, "loss": 0.0108, "step": 152510 }, { "epoch": 1003.421052631579, "grad_norm": 0.9685333967208862, "learning_rate": 0.0001, "loss": 0.0108, "step": 152520 }, { "epoch": 1003.4868421052631, "grad_norm": 1.1198996305465698, "learning_rate": 0.0001, "loss": 0.0087, "step": 152530 }, { "epoch": 1003.5526315789474, "grad_norm": 1.1644282341003418, "learning_rate": 0.0001, "loss": 0.0123, "step": 152540 }, { "epoch": 1003.6184210526316, "grad_norm": 0.9588817954063416, "learning_rate": 0.0001, "loss": 0.0089, "step": 152550 }, { "epoch": 1003.6842105263158, "grad_norm": 0.7863638997077942, "learning_rate": 0.0001, "loss": 0.0095, "step": 152560 }, { "epoch": 1003.75, "grad_norm": 1.0634634494781494, "learning_rate": 0.0001, "loss": 0.0097, "step": 152570 }, { "epoch": 1003.8157894736842, "grad_norm": 1.312638759613037, "learning_rate": 0.0001, "loss": 0.0098, "step": 152580 }, { "epoch": 1003.8815789473684, "grad_norm": 1.0929923057556152, "learning_rate": 0.0001, "loss": 0.0083, "step": 152590 }, { "epoch": 1003.9473684210526, "grad_norm": 1.4399696588516235, "learning_rate": 0.0001, "loss": 0.0088, "step": 152600 }, { "epoch": 1004.0131578947369, "grad_norm": 1.2716460227966309, "learning_rate": 0.0001, "loss": 0.0101, "step": 152610 }, { "epoch": 1004.078947368421, "grad_norm": 1.0641130208969116, "learning_rate": 0.0001, "loss": 0.0091, "step": 152620 }, { "epoch": 1004.1447368421053, "grad_norm": 0.9772552847862244, "learning_rate": 0.0001, "loss": 0.0089, "step": 152630 }, { "epoch": 1004.2105263157895, "grad_norm": 0.9149281978607178, "learning_rate": 0.0001, "loss": 0.0093, "step": 152640 }, { "epoch": 1004.2763157894736, "grad_norm": 1.0702228546142578, "learning_rate": 0.0001, "loss": 0.0092, "step": 152650 }, { "epoch": 1004.3421052631579, "grad_norm": 1.0155905485153198, "learning_rate": 0.0001, "loss": 0.0093, "step": 152660 }, { "epoch": 1004.4078947368421, "grad_norm": 1.0387847423553467, "learning_rate": 0.0001, "loss": 0.0093, "step": 152670 }, { "epoch": 1004.4736842105264, "grad_norm": 1.1399638652801514, "learning_rate": 0.0001, "loss": 0.0078, "step": 152680 }, { "epoch": 1004.5394736842105, "grad_norm": 1.349084734916687, "learning_rate": 0.0001, "loss": 0.0092, "step": 152690 }, { "epoch": 1004.6052631578947, "grad_norm": 1.1907349824905396, "learning_rate": 0.0001, "loss": 0.0092, "step": 152700 }, { "epoch": 1004.671052631579, "grad_norm": 1.4231032133102417, "learning_rate": 0.0001, "loss": 0.0104, "step": 152710 }, { "epoch": 1004.7368421052631, "grad_norm": 1.0504357814788818, "learning_rate": 0.0001, "loss": 0.0109, "step": 152720 }, { "epoch": 1004.8026315789474, "grad_norm": 0.8143443465232849, "learning_rate": 0.0001, "loss": 0.0081, "step": 152730 }, { "epoch": 1004.8684210526316, "grad_norm": 1.1146156787872314, "learning_rate": 0.0001, "loss": 0.0099, "step": 152740 }, { "epoch": 1004.9342105263158, "grad_norm": 1.0395196676254272, "learning_rate": 0.0001, "loss": 0.0104, "step": 152750 }, { "epoch": 1005.0, "grad_norm": 1.0455608367919922, "learning_rate": 0.0001, "loss": 0.009, "step": 152760 }, { "epoch": 1005.0657894736842, "grad_norm": 0.8449133038520813, "learning_rate": 0.0001, "loss": 0.0103, "step": 152770 }, { "epoch": 1005.1315789473684, "grad_norm": 0.9601693749427795, "learning_rate": 0.0001, "loss": 0.0075, "step": 152780 }, { "epoch": 1005.1973684210526, "grad_norm": 0.9671313762664795, "learning_rate": 0.0001, "loss": 0.0106, "step": 152790 }, { "epoch": 1005.2631578947369, "grad_norm": 0.8322471976280212, "learning_rate": 0.0001, "loss": 0.0105, "step": 152800 }, { "epoch": 1005.328947368421, "grad_norm": 0.9465487003326416, "learning_rate": 0.0001, "loss": 0.0111, "step": 152810 }, { "epoch": 1005.3947368421053, "grad_norm": 0.8888710141181946, "learning_rate": 0.0001, "loss": 0.0099, "step": 152820 }, { "epoch": 1005.4605263157895, "grad_norm": 1.081684947013855, "learning_rate": 0.0001, "loss": 0.0104, "step": 152830 }, { "epoch": 1005.5263157894736, "grad_norm": 0.9783938527107239, "learning_rate": 0.0001, "loss": 0.0072, "step": 152840 }, { "epoch": 1005.5921052631579, "grad_norm": 1.3762357234954834, "learning_rate": 0.0001, "loss": 0.0109, "step": 152850 }, { "epoch": 1005.6578947368421, "grad_norm": 1.3803174495697021, "learning_rate": 0.0001, "loss": 0.0087, "step": 152860 }, { "epoch": 1005.7236842105264, "grad_norm": 1.0648322105407715, "learning_rate": 0.0001, "loss": 0.0085, "step": 152870 }, { "epoch": 1005.7894736842105, "grad_norm": 1.019877314567566, "learning_rate": 0.0001, "loss": 0.0079, "step": 152880 }, { "epoch": 1005.8552631578947, "grad_norm": 1.0689724683761597, "learning_rate": 0.0001, "loss": 0.0098, "step": 152890 }, { "epoch": 1005.921052631579, "grad_norm": 1.0110927820205688, "learning_rate": 0.0001, "loss": 0.009, "step": 152900 }, { "epoch": 1005.9868421052631, "grad_norm": 0.8026441335678101, "learning_rate": 0.0001, "loss": 0.0081, "step": 152910 }, { "epoch": 1006.0526315789474, "grad_norm": 0.9327909350395203, "learning_rate": 0.0001, "loss": 0.0089, "step": 152920 }, { "epoch": 1006.1184210526316, "grad_norm": 0.9593319296836853, "learning_rate": 0.0001, "loss": 0.0093, "step": 152930 }, { "epoch": 1006.1842105263158, "grad_norm": 0.9734617471694946, "learning_rate": 0.0001, "loss": 0.0088, "step": 152940 }, { "epoch": 1006.25, "grad_norm": 0.8981104493141174, "learning_rate": 0.0001, "loss": 0.0085, "step": 152950 }, { "epoch": 1006.3157894736842, "grad_norm": 0.6825907826423645, "learning_rate": 0.0001, "loss": 0.0096, "step": 152960 }, { "epoch": 1006.3815789473684, "grad_norm": 1.2038465738296509, "learning_rate": 0.0001, "loss": 0.0096, "step": 152970 }, { "epoch": 1006.4473684210526, "grad_norm": 1.119794487953186, "learning_rate": 0.0001, "loss": 0.01, "step": 152980 }, { "epoch": 1006.5131578947369, "grad_norm": 1.064179539680481, "learning_rate": 0.0001, "loss": 0.0102, "step": 152990 }, { "epoch": 1006.578947368421, "grad_norm": 1.1156647205352783, "learning_rate": 0.0001, "loss": 0.0095, "step": 153000 }, { "epoch": 1006.6447368421053, "grad_norm": 1.3254365921020508, "learning_rate": 0.0001, "loss": 0.0093, "step": 153010 }, { "epoch": 1006.7105263157895, "grad_norm": 1.4035636186599731, "learning_rate": 0.0001, "loss": 0.0104, "step": 153020 }, { "epoch": 1006.7763157894736, "grad_norm": 1.1695518493652344, "learning_rate": 0.0001, "loss": 0.0091, "step": 153030 }, { "epoch": 1006.8421052631579, "grad_norm": 1.270020842552185, "learning_rate": 0.0001, "loss": 0.0103, "step": 153040 }, { "epoch": 1006.9078947368421, "grad_norm": 1.3155039548873901, "learning_rate": 0.0001, "loss": 0.0081, "step": 153050 }, { "epoch": 1006.9736842105264, "grad_norm": 0.8515135645866394, "learning_rate": 0.0001, "loss": 0.0096, "step": 153060 }, { "epoch": 1007.0394736842105, "grad_norm": 0.9089174866676331, "learning_rate": 0.0001, "loss": 0.0106, "step": 153070 }, { "epoch": 1007.1052631578947, "grad_norm": 1.1836307048797607, "learning_rate": 0.0001, "loss": 0.0092, "step": 153080 }, { "epoch": 1007.171052631579, "grad_norm": 1.1035048961639404, "learning_rate": 0.0001, "loss": 0.0103, "step": 153090 }, { "epoch": 1007.2368421052631, "grad_norm": 1.2062311172485352, "learning_rate": 0.0001, "loss": 0.0107, "step": 153100 }, { "epoch": 1007.3026315789474, "grad_norm": 1.3278850317001343, "learning_rate": 0.0001, "loss": 0.0092, "step": 153110 }, { "epoch": 1007.3684210526316, "grad_norm": 1.200394630432129, "learning_rate": 0.0001, "loss": 0.0091, "step": 153120 }, { "epoch": 1007.4342105263158, "grad_norm": 1.2503161430358887, "learning_rate": 0.0001, "loss": 0.009, "step": 153130 }, { "epoch": 1007.5, "grad_norm": 1.327488899230957, "learning_rate": 0.0001, "loss": 0.0094, "step": 153140 }, { "epoch": 1007.5657894736842, "grad_norm": 0.6946982145309448, "learning_rate": 0.0001, "loss": 0.0084, "step": 153150 }, { "epoch": 1007.6315789473684, "grad_norm": 0.8254473805427551, "learning_rate": 0.0001, "loss": 0.0102, "step": 153160 }, { "epoch": 1007.6973684210526, "grad_norm": 0.8691716194152832, "learning_rate": 0.0001, "loss": 0.0095, "step": 153170 }, { "epoch": 1007.7631578947369, "grad_norm": 1.0709201097488403, "learning_rate": 0.0001, "loss": 0.0081, "step": 153180 }, { "epoch": 1007.828947368421, "grad_norm": 1.361719012260437, "learning_rate": 0.0001, "loss": 0.0107, "step": 153190 }, { "epoch": 1007.8947368421053, "grad_norm": 1.2241108417510986, "learning_rate": 0.0001, "loss": 0.0076, "step": 153200 }, { "epoch": 1007.9605263157895, "grad_norm": 1.4689909219741821, "learning_rate": 0.0001, "loss": 0.0093, "step": 153210 }, { "epoch": 1008.0263157894736, "grad_norm": 1.4543124437332153, "learning_rate": 0.0001, "loss": 0.0085, "step": 153220 }, { "epoch": 1008.0921052631579, "grad_norm": 1.4135468006134033, "learning_rate": 0.0001, "loss": 0.0104, "step": 153230 }, { "epoch": 1008.1578947368421, "grad_norm": 1.1939821243286133, "learning_rate": 0.0001, "loss": 0.0087, "step": 153240 }, { "epoch": 1008.2236842105264, "grad_norm": 1.4653980731964111, "learning_rate": 0.0001, "loss": 0.0094, "step": 153250 }, { "epoch": 1008.2894736842105, "grad_norm": 1.4187058210372925, "learning_rate": 0.0001, "loss": 0.0091, "step": 153260 }, { "epoch": 1008.3552631578947, "grad_norm": 1.3027122020721436, "learning_rate": 0.0001, "loss": 0.0093, "step": 153270 }, { "epoch": 1008.421052631579, "grad_norm": 1.2403491735458374, "learning_rate": 0.0001, "loss": 0.0091, "step": 153280 }, { "epoch": 1008.4868421052631, "grad_norm": 0.8869355916976929, "learning_rate": 0.0001, "loss": 0.0091, "step": 153290 }, { "epoch": 1008.5526315789474, "grad_norm": 1.038567066192627, "learning_rate": 0.0001, "loss": 0.0081, "step": 153300 }, { "epoch": 1008.6184210526316, "grad_norm": 1.1018482446670532, "learning_rate": 0.0001, "loss": 0.0081, "step": 153310 }, { "epoch": 1008.6842105263158, "grad_norm": 0.9891707301139832, "learning_rate": 0.0001, "loss": 0.0075, "step": 153320 }, { "epoch": 1008.75, "grad_norm": 1.3126126527786255, "learning_rate": 0.0001, "loss": 0.0097, "step": 153330 }, { "epoch": 1008.8157894736842, "grad_norm": 1.0261906385421753, "learning_rate": 0.0001, "loss": 0.0095, "step": 153340 }, { "epoch": 1008.8815789473684, "grad_norm": 1.0585100650787354, "learning_rate": 0.0001, "loss": 0.0111, "step": 153350 }, { "epoch": 1008.9473684210526, "grad_norm": 0.7990329265594482, "learning_rate": 0.0001, "loss": 0.011, "step": 153360 }, { "epoch": 1009.0131578947369, "grad_norm": 0.703535795211792, "learning_rate": 0.0001, "loss": 0.0109, "step": 153370 }, { "epoch": 1009.078947368421, "grad_norm": 0.7562394142150879, "learning_rate": 0.0001, "loss": 0.0085, "step": 153380 }, { "epoch": 1009.1447368421053, "grad_norm": 0.6611744165420532, "learning_rate": 0.0001, "loss": 0.0092, "step": 153390 }, { "epoch": 1009.2105263157895, "grad_norm": 0.9061070084571838, "learning_rate": 0.0001, "loss": 0.0117, "step": 153400 }, { "epoch": 1009.2763157894736, "grad_norm": 1.1760482788085938, "learning_rate": 0.0001, "loss": 0.0123, "step": 153410 }, { "epoch": 1009.3421052631579, "grad_norm": 1.6927746534347534, "learning_rate": 0.0001, "loss": 0.0084, "step": 153420 }, { "epoch": 1009.4078947368421, "grad_norm": 1.375290870666504, "learning_rate": 0.0001, "loss": 0.0088, "step": 153430 }, { "epoch": 1009.4736842105264, "grad_norm": 1.3933255672454834, "learning_rate": 0.0001, "loss": 0.0091, "step": 153440 }, { "epoch": 1009.5394736842105, "grad_norm": 1.4885764122009277, "learning_rate": 0.0001, "loss": 0.0091, "step": 153450 }, { "epoch": 1009.6052631578947, "grad_norm": 0.8054714798927307, "learning_rate": 0.0001, "loss": 0.0085, "step": 153460 }, { "epoch": 1009.671052631579, "grad_norm": 0.9713701605796814, "learning_rate": 0.0001, "loss": 0.0083, "step": 153470 }, { "epoch": 1009.7368421052631, "grad_norm": 1.0270944833755493, "learning_rate": 0.0001, "loss": 0.0104, "step": 153480 }, { "epoch": 1009.8026315789474, "grad_norm": 1.0508967638015747, "learning_rate": 0.0001, "loss": 0.0101, "step": 153490 }, { "epoch": 1009.8684210526316, "grad_norm": 1.0318694114685059, "learning_rate": 0.0001, "loss": 0.0092, "step": 153500 }, { "epoch": 1009.9342105263158, "grad_norm": 1.081775426864624, "learning_rate": 0.0001, "loss": 0.0081, "step": 153510 }, { "epoch": 1010.0, "grad_norm": 0.6069177389144897, "learning_rate": 0.0001, "loss": 0.0096, "step": 153520 }, { "epoch": 1010.0657894736842, "grad_norm": 0.6230463981628418, "learning_rate": 0.0001, "loss": 0.0096, "step": 153530 }, { "epoch": 1010.1315789473684, "grad_norm": 0.8272234201431274, "learning_rate": 0.0001, "loss": 0.0096, "step": 153540 }, { "epoch": 1010.1973684210526, "grad_norm": 1.0551385879516602, "learning_rate": 0.0001, "loss": 0.0096, "step": 153550 }, { "epoch": 1010.2631578947369, "grad_norm": 1.039683222770691, "learning_rate": 0.0001, "loss": 0.0118, "step": 153560 }, { "epoch": 1010.328947368421, "grad_norm": 0.7899230122566223, "learning_rate": 0.0001, "loss": 0.0086, "step": 153570 }, { "epoch": 1010.3947368421053, "grad_norm": 0.8216849565505981, "learning_rate": 0.0001, "loss": 0.0091, "step": 153580 }, { "epoch": 1010.4605263157895, "grad_norm": 0.9571535587310791, "learning_rate": 0.0001, "loss": 0.0097, "step": 153590 }, { "epoch": 1010.5263157894736, "grad_norm": 1.1962392330169678, "learning_rate": 0.0001, "loss": 0.0093, "step": 153600 }, { "epoch": 1010.5921052631579, "grad_norm": 0.8668336868286133, "learning_rate": 0.0001, "loss": 0.0094, "step": 153610 }, { "epoch": 1010.6578947368421, "grad_norm": 1.1946544647216797, "learning_rate": 0.0001, "loss": 0.0087, "step": 153620 }, { "epoch": 1010.7236842105264, "grad_norm": 0.8684710264205933, "learning_rate": 0.0001, "loss": 0.0096, "step": 153630 }, { "epoch": 1010.7894736842105, "grad_norm": 1.0615448951721191, "learning_rate": 0.0001, "loss": 0.0095, "step": 153640 }, { "epoch": 1010.8552631578947, "grad_norm": 1.3180712461471558, "learning_rate": 0.0001, "loss": 0.0095, "step": 153650 }, { "epoch": 1010.921052631579, "grad_norm": 1.4111500978469849, "learning_rate": 0.0001, "loss": 0.0092, "step": 153660 }, { "epoch": 1010.9868421052631, "grad_norm": 1.3890069723129272, "learning_rate": 0.0001, "loss": 0.0097, "step": 153670 }, { "epoch": 1011.0526315789474, "grad_norm": 0.9825431108474731, "learning_rate": 0.0001, "loss": 0.0101, "step": 153680 }, { "epoch": 1011.1184210526316, "grad_norm": 0.8479264974594116, "learning_rate": 0.0001, "loss": 0.01, "step": 153690 }, { "epoch": 1011.1842105263158, "grad_norm": 1.2472234964370728, "learning_rate": 0.0001, "loss": 0.0092, "step": 153700 }, { "epoch": 1011.25, "grad_norm": 1.0906670093536377, "learning_rate": 0.0001, "loss": 0.0118, "step": 153710 }, { "epoch": 1011.3157894736842, "grad_norm": 1.144405722618103, "learning_rate": 0.0001, "loss": 0.0102, "step": 153720 }, { "epoch": 1011.3815789473684, "grad_norm": 0.8326566219329834, "learning_rate": 0.0001, "loss": 0.0077, "step": 153730 }, { "epoch": 1011.4473684210526, "grad_norm": 1.1683098077774048, "learning_rate": 0.0001, "loss": 0.0092, "step": 153740 }, { "epoch": 1011.5131578947369, "grad_norm": 0.7336865663528442, "learning_rate": 0.0001, "loss": 0.0099, "step": 153750 }, { "epoch": 1011.578947368421, "grad_norm": 1.1023454666137695, "learning_rate": 0.0001, "loss": 0.0079, "step": 153760 }, { "epoch": 1011.6447368421053, "grad_norm": 1.045922040939331, "learning_rate": 0.0001, "loss": 0.0089, "step": 153770 }, { "epoch": 1011.7105263157895, "grad_norm": 0.9625352621078491, "learning_rate": 0.0001, "loss": 0.0104, "step": 153780 }, { "epoch": 1011.7763157894736, "grad_norm": 1.1642197370529175, "learning_rate": 0.0001, "loss": 0.0097, "step": 153790 }, { "epoch": 1011.8421052631579, "grad_norm": 1.1206856966018677, "learning_rate": 0.0001, "loss": 0.0087, "step": 153800 }, { "epoch": 1011.9078947368421, "grad_norm": 1.251189947128296, "learning_rate": 0.0001, "loss": 0.0091, "step": 153810 }, { "epoch": 1011.9736842105264, "grad_norm": 0.9257858991622925, "learning_rate": 0.0001, "loss": 0.0096, "step": 153820 }, { "epoch": 1012.0394736842105, "grad_norm": 0.9416394233703613, "learning_rate": 0.0001, "loss": 0.0103, "step": 153830 }, { "epoch": 1012.1052631578947, "grad_norm": 0.7011066675186157, "learning_rate": 0.0001, "loss": 0.0091, "step": 153840 }, { "epoch": 1012.171052631579, "grad_norm": 0.9953075051307678, "learning_rate": 0.0001, "loss": 0.0086, "step": 153850 }, { "epoch": 1012.2368421052631, "grad_norm": 1.0580404996871948, "learning_rate": 0.0001, "loss": 0.0094, "step": 153860 }, { "epoch": 1012.3026315789474, "grad_norm": 0.9159753918647766, "learning_rate": 0.0001, "loss": 0.0075, "step": 153870 }, { "epoch": 1012.3684210526316, "grad_norm": 1.2467912435531616, "learning_rate": 0.0001, "loss": 0.0107, "step": 153880 }, { "epoch": 1012.4342105263158, "grad_norm": 0.8988513350486755, "learning_rate": 0.0001, "loss": 0.0119, "step": 153890 }, { "epoch": 1012.5, "grad_norm": 0.8975652456283569, "learning_rate": 0.0001, "loss": 0.0089, "step": 153900 }, { "epoch": 1012.5657894736842, "grad_norm": 0.9276600480079651, "learning_rate": 0.0001, "loss": 0.0105, "step": 153910 }, { "epoch": 1012.6315789473684, "grad_norm": 1.1802819967269897, "learning_rate": 0.0001, "loss": 0.0097, "step": 153920 }, { "epoch": 1012.6973684210526, "grad_norm": 1.5919638872146606, "learning_rate": 0.0001, "loss": 0.0103, "step": 153930 }, { "epoch": 1012.7631578947369, "grad_norm": 0.8894348740577698, "learning_rate": 0.0001, "loss": 0.0086, "step": 153940 }, { "epoch": 1012.828947368421, "grad_norm": 0.9059845209121704, "learning_rate": 0.0001, "loss": 0.0102, "step": 153950 }, { "epoch": 1012.8947368421053, "grad_norm": 0.7619372010231018, "learning_rate": 0.0001, "loss": 0.009, "step": 153960 }, { "epoch": 1012.9605263157895, "grad_norm": 1.105398416519165, "learning_rate": 0.0001, "loss": 0.0089, "step": 153970 }, { "epoch": 1013.0263157894736, "grad_norm": 0.6441566944122314, "learning_rate": 0.0001, "loss": 0.0091, "step": 153980 }, { "epoch": 1013.0921052631579, "grad_norm": 1.1921231746673584, "learning_rate": 0.0001, "loss": 0.0085, "step": 153990 }, { "epoch": 1013.1578947368421, "grad_norm": 1.3228693008422852, "learning_rate": 0.0001, "loss": 0.0083, "step": 154000 }, { "epoch": 1013.2236842105264, "grad_norm": 1.2285857200622559, "learning_rate": 0.0001, "loss": 0.009, "step": 154010 }, { "epoch": 1013.2894736842105, "grad_norm": 0.5511453747749329, "learning_rate": 0.0001, "loss": 0.0097, "step": 154020 }, { "epoch": 1013.3552631578947, "grad_norm": 0.8343952894210815, "learning_rate": 0.0001, "loss": 0.0089, "step": 154030 }, { "epoch": 1013.421052631579, "grad_norm": 0.9927038550376892, "learning_rate": 0.0001, "loss": 0.0098, "step": 154040 }, { "epoch": 1013.4868421052631, "grad_norm": 0.9461319446563721, "learning_rate": 0.0001, "loss": 0.0083, "step": 154050 }, { "epoch": 1013.5526315789474, "grad_norm": 1.170270562171936, "learning_rate": 0.0001, "loss": 0.011, "step": 154060 }, { "epoch": 1013.6184210526316, "grad_norm": 1.1801016330718994, "learning_rate": 0.0001, "loss": 0.011, "step": 154070 }, { "epoch": 1013.6842105263158, "grad_norm": 1.064570665359497, "learning_rate": 0.0001, "loss": 0.0091, "step": 154080 }, { "epoch": 1013.75, "grad_norm": 0.9943475127220154, "learning_rate": 0.0001, "loss": 0.0086, "step": 154090 }, { "epoch": 1013.8157894736842, "grad_norm": 1.1326581239700317, "learning_rate": 0.0001, "loss": 0.0095, "step": 154100 }, { "epoch": 1013.8815789473684, "grad_norm": 1.2644318342208862, "learning_rate": 0.0001, "loss": 0.0122, "step": 154110 }, { "epoch": 1013.9473684210526, "grad_norm": 0.9471270442008972, "learning_rate": 0.0001, "loss": 0.0091, "step": 154120 }, { "epoch": 1014.0131578947369, "grad_norm": 1.1460727453231812, "learning_rate": 0.0001, "loss": 0.0103, "step": 154130 }, { "epoch": 1014.078947368421, "grad_norm": 1.2603262662887573, "learning_rate": 0.0001, "loss": 0.0083, "step": 154140 }, { "epoch": 1014.1447368421053, "grad_norm": 0.8152709007263184, "learning_rate": 0.0001, "loss": 0.0108, "step": 154150 }, { "epoch": 1014.2105263157895, "grad_norm": 0.7039467692375183, "learning_rate": 0.0001, "loss": 0.0085, "step": 154160 }, { "epoch": 1014.2763157894736, "grad_norm": 1.0728949308395386, "learning_rate": 0.0001, "loss": 0.0089, "step": 154170 }, { "epoch": 1014.3421052631579, "grad_norm": 0.7830701470375061, "learning_rate": 0.0001, "loss": 0.0086, "step": 154180 }, { "epoch": 1014.4078947368421, "grad_norm": 0.8367193937301636, "learning_rate": 0.0001, "loss": 0.0105, "step": 154190 }, { "epoch": 1014.4736842105264, "grad_norm": 1.973212480545044, "learning_rate": 0.0001, "loss": 0.0078, "step": 154200 }, { "epoch": 1014.5394736842105, "grad_norm": 1.815258502960205, "learning_rate": 0.0001, "loss": 0.0107, "step": 154210 }, { "epoch": 1014.6052631578947, "grad_norm": 2.393010377883911, "learning_rate": 0.0001, "loss": 0.0117, "step": 154220 }, { "epoch": 1014.671052631579, "grad_norm": 1.425652027130127, "learning_rate": 0.0001, "loss": 0.0106, "step": 154230 }, { "epoch": 1014.7368421052631, "grad_norm": 1.2232511043548584, "learning_rate": 0.0001, "loss": 0.0093, "step": 154240 }, { "epoch": 1014.8026315789474, "grad_norm": 1.2349510192871094, "learning_rate": 0.0001, "loss": 0.0086, "step": 154250 }, { "epoch": 1014.8684210526316, "grad_norm": 1.3472286462783813, "learning_rate": 0.0001, "loss": 0.0094, "step": 154260 }, { "epoch": 1014.9342105263158, "grad_norm": 1.1970043182373047, "learning_rate": 0.0001, "loss": 0.0095, "step": 154270 }, { "epoch": 1015.0, "grad_norm": 0.9629507660865784, "learning_rate": 0.0001, "loss": 0.0108, "step": 154280 }, { "epoch": 1015.0657894736842, "grad_norm": 0.9816302061080933, "learning_rate": 0.0001, "loss": 0.0092, "step": 154290 }, { "epoch": 1015.1315789473684, "grad_norm": 1.125002384185791, "learning_rate": 0.0001, "loss": 0.0086, "step": 154300 }, { "epoch": 1015.1973684210526, "grad_norm": 1.284298062324524, "learning_rate": 0.0001, "loss": 0.01, "step": 154310 }, { "epoch": 1015.2631578947369, "grad_norm": 1.4155656099319458, "learning_rate": 0.0001, "loss": 0.0099, "step": 154320 }, { "epoch": 1015.328947368421, "grad_norm": 1.0492305755615234, "learning_rate": 0.0001, "loss": 0.0089, "step": 154330 }, { "epoch": 1015.3947368421053, "grad_norm": 1.006813406944275, "learning_rate": 0.0001, "loss": 0.0096, "step": 154340 }, { "epoch": 1015.4605263157895, "grad_norm": 1.409071683883667, "learning_rate": 0.0001, "loss": 0.0083, "step": 154350 }, { "epoch": 1015.5263157894736, "grad_norm": 1.6593658924102783, "learning_rate": 0.0001, "loss": 0.0086, "step": 154360 }, { "epoch": 1015.5921052631579, "grad_norm": 1.4697237014770508, "learning_rate": 0.0001, "loss": 0.0111, "step": 154370 }, { "epoch": 1015.6578947368421, "grad_norm": 1.9840725660324097, "learning_rate": 0.0001, "loss": 0.0084, "step": 154380 }, { "epoch": 1015.7236842105264, "grad_norm": 1.4490352869033813, "learning_rate": 0.0001, "loss": 0.009, "step": 154390 }, { "epoch": 1015.7894736842105, "grad_norm": 1.1679290533065796, "learning_rate": 0.0001, "loss": 0.0089, "step": 154400 }, { "epoch": 1015.8552631578947, "grad_norm": 1.219805121421814, "learning_rate": 0.0001, "loss": 0.0111, "step": 154410 }, { "epoch": 1015.921052631579, "grad_norm": 1.1907403469085693, "learning_rate": 0.0001, "loss": 0.0092, "step": 154420 }, { "epoch": 1015.9868421052631, "grad_norm": 1.4506357908248901, "learning_rate": 0.0001, "loss": 0.0095, "step": 154430 }, { "epoch": 1016.0526315789474, "grad_norm": 1.450147032737732, "learning_rate": 0.0001, "loss": 0.0113, "step": 154440 }, { "epoch": 1016.1184210526316, "grad_norm": 1.0505656003952026, "learning_rate": 0.0001, "loss": 0.0085, "step": 154450 }, { "epoch": 1016.1842105263158, "grad_norm": 1.4850653409957886, "learning_rate": 0.0001, "loss": 0.0094, "step": 154460 }, { "epoch": 1016.25, "grad_norm": 1.165228247642517, "learning_rate": 0.0001, "loss": 0.0094, "step": 154470 }, { "epoch": 1016.3157894736842, "grad_norm": 0.9636247754096985, "learning_rate": 0.0001, "loss": 0.0092, "step": 154480 }, { "epoch": 1016.3815789473684, "grad_norm": 1.0024343729019165, "learning_rate": 0.0001, "loss": 0.0091, "step": 154490 }, { "epoch": 1016.4473684210526, "grad_norm": 0.8500187397003174, "learning_rate": 0.0001, "loss": 0.0105, "step": 154500 }, { "epoch": 1016.5131578947369, "grad_norm": 1.01841139793396, "learning_rate": 0.0001, "loss": 0.0092, "step": 154510 }, { "epoch": 1016.578947368421, "grad_norm": 0.774018406867981, "learning_rate": 0.0001, "loss": 0.0081, "step": 154520 }, { "epoch": 1016.6447368421053, "grad_norm": 1.0345419645309448, "learning_rate": 0.0001, "loss": 0.0094, "step": 154530 }, { "epoch": 1016.7105263157895, "grad_norm": 1.3381010293960571, "learning_rate": 0.0001, "loss": 0.011, "step": 154540 }, { "epoch": 1016.7763157894736, "grad_norm": 1.0903173685073853, "learning_rate": 0.0001, "loss": 0.0111, "step": 154550 }, { "epoch": 1016.8421052631579, "grad_norm": 0.7370033860206604, "learning_rate": 0.0001, "loss": 0.0086, "step": 154560 }, { "epoch": 1016.9078947368421, "grad_norm": 0.9964326620101929, "learning_rate": 0.0001, "loss": 0.0095, "step": 154570 }, { "epoch": 1016.9736842105264, "grad_norm": 1.0719693899154663, "learning_rate": 0.0001, "loss": 0.0097, "step": 154580 }, { "epoch": 1017.0394736842105, "grad_norm": 1.0311167240142822, "learning_rate": 0.0001, "loss": 0.0084, "step": 154590 }, { "epoch": 1017.1052631578947, "grad_norm": 1.1289986371994019, "learning_rate": 0.0001, "loss": 0.0097, "step": 154600 }, { "epoch": 1017.171052631579, "grad_norm": 0.8267912864685059, "learning_rate": 0.0001, "loss": 0.0091, "step": 154610 }, { "epoch": 1017.2368421052631, "grad_norm": 1.1450597047805786, "learning_rate": 0.0001, "loss": 0.0089, "step": 154620 }, { "epoch": 1017.3026315789474, "grad_norm": 0.8567523956298828, "learning_rate": 0.0001, "loss": 0.0092, "step": 154630 }, { "epoch": 1017.3684210526316, "grad_norm": 0.6963806748390198, "learning_rate": 0.0001, "loss": 0.0103, "step": 154640 }, { "epoch": 1017.4342105263158, "grad_norm": 0.7073983550071716, "learning_rate": 0.0001, "loss": 0.0108, "step": 154650 }, { "epoch": 1017.5, "grad_norm": 0.6711755394935608, "learning_rate": 0.0001, "loss": 0.01, "step": 154660 }, { "epoch": 1017.5657894736842, "grad_norm": 0.7169049382209778, "learning_rate": 0.0001, "loss": 0.011, "step": 154670 }, { "epoch": 1017.6315789473684, "grad_norm": 0.6535640954971313, "learning_rate": 0.0001, "loss": 0.0089, "step": 154680 }, { "epoch": 1017.6973684210526, "grad_norm": 0.5502938032150269, "learning_rate": 0.0001, "loss": 0.0097, "step": 154690 }, { "epoch": 1017.7631578947369, "grad_norm": 0.9529855847358704, "learning_rate": 0.0001, "loss": 0.0091, "step": 154700 }, { "epoch": 1017.828947368421, "grad_norm": 1.1591817140579224, "learning_rate": 0.0001, "loss": 0.0095, "step": 154710 }, { "epoch": 1017.8947368421053, "grad_norm": 1.1533323526382446, "learning_rate": 0.0001, "loss": 0.011, "step": 154720 }, { "epoch": 1017.9605263157895, "grad_norm": 1.2087000608444214, "learning_rate": 0.0001, "loss": 0.0112, "step": 154730 }, { "epoch": 1018.0263157894736, "grad_norm": 0.936263918876648, "learning_rate": 0.0001, "loss": 0.01, "step": 154740 }, { "epoch": 1018.0921052631579, "grad_norm": 1.1925992965698242, "learning_rate": 0.0001, "loss": 0.0105, "step": 154750 }, { "epoch": 1018.1578947368421, "grad_norm": 1.5353156328201294, "learning_rate": 0.0001, "loss": 0.0102, "step": 154760 }, { "epoch": 1018.2236842105264, "grad_norm": 1.3622137308120728, "learning_rate": 0.0001, "loss": 0.0114, "step": 154770 }, { "epoch": 1018.2894736842105, "grad_norm": 1.236350417137146, "learning_rate": 0.0001, "loss": 0.0124, "step": 154780 }, { "epoch": 1018.3552631578947, "grad_norm": 1.0077983140945435, "learning_rate": 0.0001, "loss": 0.0086, "step": 154790 }, { "epoch": 1018.421052631579, "grad_norm": 1.119370460510254, "learning_rate": 0.0001, "loss": 0.0084, "step": 154800 }, { "epoch": 1018.4868421052631, "grad_norm": 1.009660005569458, "learning_rate": 0.0001, "loss": 0.0087, "step": 154810 }, { "epoch": 1018.5526315789474, "grad_norm": 1.031388759613037, "learning_rate": 0.0001, "loss": 0.0097, "step": 154820 }, { "epoch": 1018.6184210526316, "grad_norm": 0.6950995326042175, "learning_rate": 0.0001, "loss": 0.0086, "step": 154830 }, { "epoch": 1018.6842105263158, "grad_norm": 0.9287242293357849, "learning_rate": 0.0001, "loss": 0.0109, "step": 154840 }, { "epoch": 1018.75, "grad_norm": 0.8696134090423584, "learning_rate": 0.0001, "loss": 0.0095, "step": 154850 }, { "epoch": 1018.8157894736842, "grad_norm": 0.8959367275238037, "learning_rate": 0.0001, "loss": 0.0104, "step": 154860 }, { "epoch": 1018.8815789473684, "grad_norm": 1.239129900932312, "learning_rate": 0.0001, "loss": 0.0083, "step": 154870 }, { "epoch": 1018.9473684210526, "grad_norm": 0.8386613726615906, "learning_rate": 0.0001, "loss": 0.0093, "step": 154880 }, { "epoch": 1019.0131578947369, "grad_norm": 1.1517994403839111, "learning_rate": 0.0001, "loss": 0.0112, "step": 154890 }, { "epoch": 1019.078947368421, "grad_norm": 1.2050758600234985, "learning_rate": 0.0001, "loss": 0.0101, "step": 154900 }, { "epoch": 1019.1447368421053, "grad_norm": 1.15315842628479, "learning_rate": 0.0001, "loss": 0.0095, "step": 154910 }, { "epoch": 1019.2105263157895, "grad_norm": 0.8363189697265625, "learning_rate": 0.0001, "loss": 0.0095, "step": 154920 }, { "epoch": 1019.2763157894736, "grad_norm": 0.8743520379066467, "learning_rate": 0.0001, "loss": 0.0091, "step": 154930 }, { "epoch": 1019.3421052631579, "grad_norm": 1.1668791770935059, "learning_rate": 0.0001, "loss": 0.0102, "step": 154940 }, { "epoch": 1019.4078947368421, "grad_norm": 1.0123094320297241, "learning_rate": 0.0001, "loss": 0.0111, "step": 154950 }, { "epoch": 1019.4736842105264, "grad_norm": 0.5643197894096375, "learning_rate": 0.0001, "loss": 0.0081, "step": 154960 }, { "epoch": 1019.5394736842105, "grad_norm": 0.9855898022651672, "learning_rate": 0.0001, "loss": 0.0114, "step": 154970 }, { "epoch": 1019.6052631578947, "grad_norm": 0.9356517791748047, "learning_rate": 0.0001, "loss": 0.0085, "step": 154980 }, { "epoch": 1019.671052631579, "grad_norm": 0.853830099105835, "learning_rate": 0.0001, "loss": 0.0107, "step": 154990 }, { "epoch": 1019.7368421052631, "grad_norm": 1.0371325016021729, "learning_rate": 0.0001, "loss": 0.0094, "step": 155000 }, { "epoch": 1019.8026315789474, "grad_norm": 0.9705318212509155, "learning_rate": 0.0001, "loss": 0.0109, "step": 155010 }, { "epoch": 1019.8684210526316, "grad_norm": 0.9544471502304077, "learning_rate": 0.0001, "loss": 0.0108, "step": 155020 }, { "epoch": 1019.9342105263158, "grad_norm": 1.049113392829895, "learning_rate": 0.0001, "loss": 0.009, "step": 155030 }, { "epoch": 1020.0, "grad_norm": 1.123701572418213, "learning_rate": 0.0001, "loss": 0.0081, "step": 155040 }, { "epoch": 1020.0657894736842, "grad_norm": 1.035402536392212, "learning_rate": 0.0001, "loss": 0.01, "step": 155050 }, { "epoch": 1020.1315789473684, "grad_norm": 1.1114774942398071, "learning_rate": 0.0001, "loss": 0.0095, "step": 155060 }, { "epoch": 1020.1973684210526, "grad_norm": 0.7469269037246704, "learning_rate": 0.0001, "loss": 0.0112, "step": 155070 }, { "epoch": 1020.2631578947369, "grad_norm": 1.2120532989501953, "learning_rate": 0.0001, "loss": 0.0103, "step": 155080 }, { "epoch": 1020.328947368421, "grad_norm": 0.9209999442100525, "learning_rate": 0.0001, "loss": 0.0089, "step": 155090 }, { "epoch": 1020.3947368421053, "grad_norm": 1.0287814140319824, "learning_rate": 0.0001, "loss": 0.0078, "step": 155100 }, { "epoch": 1020.4605263157895, "grad_norm": 1.212156057357788, "learning_rate": 0.0001, "loss": 0.0104, "step": 155110 }, { "epoch": 1020.5263157894736, "grad_norm": 1.101295828819275, "learning_rate": 0.0001, "loss": 0.0106, "step": 155120 }, { "epoch": 1020.5921052631579, "grad_norm": 1.2472134828567505, "learning_rate": 0.0001, "loss": 0.0107, "step": 155130 }, { "epoch": 1020.6578947368421, "grad_norm": 1.4082173109054565, "learning_rate": 0.0001, "loss": 0.0095, "step": 155140 }, { "epoch": 1020.7236842105264, "grad_norm": 1.0719764232635498, "learning_rate": 0.0001, "loss": 0.0097, "step": 155150 }, { "epoch": 1020.7894736842105, "grad_norm": 1.4822707176208496, "learning_rate": 0.0001, "loss": 0.0095, "step": 155160 }, { "epoch": 1020.8552631578947, "grad_norm": 1.1437565088272095, "learning_rate": 0.0001, "loss": 0.0092, "step": 155170 }, { "epoch": 1020.921052631579, "grad_norm": 1.258529543876648, "learning_rate": 0.0001, "loss": 0.0102, "step": 155180 }, { "epoch": 1020.9868421052631, "grad_norm": 1.0920777320861816, "learning_rate": 0.0001, "loss": 0.0084, "step": 155190 }, { "epoch": 1021.0526315789474, "grad_norm": 1.4850847721099854, "learning_rate": 0.0001, "loss": 0.0097, "step": 155200 }, { "epoch": 1021.1184210526316, "grad_norm": 0.7385463118553162, "learning_rate": 0.0001, "loss": 0.0088, "step": 155210 }, { "epoch": 1021.1842105263158, "grad_norm": 1.168807864189148, "learning_rate": 0.0001, "loss": 0.0095, "step": 155220 }, { "epoch": 1021.25, "grad_norm": 1.1659116744995117, "learning_rate": 0.0001, "loss": 0.0097, "step": 155230 }, { "epoch": 1021.3157894736842, "grad_norm": 0.993746817111969, "learning_rate": 0.0001, "loss": 0.0089, "step": 155240 }, { "epoch": 1021.3815789473684, "grad_norm": 1.1375447511672974, "learning_rate": 0.0001, "loss": 0.0095, "step": 155250 }, { "epoch": 1021.4473684210526, "grad_norm": 0.8850059509277344, "learning_rate": 0.0001, "loss": 0.0081, "step": 155260 }, { "epoch": 1021.5131578947369, "grad_norm": 0.8182586431503296, "learning_rate": 0.0001, "loss": 0.011, "step": 155270 }, { "epoch": 1021.578947368421, "grad_norm": 0.7995529770851135, "learning_rate": 0.0001, "loss": 0.0123, "step": 155280 }, { "epoch": 1021.6447368421053, "grad_norm": 1.0963636636734009, "learning_rate": 0.0001, "loss": 0.0104, "step": 155290 }, { "epoch": 1021.7105263157895, "grad_norm": 1.1766555309295654, "learning_rate": 0.0001, "loss": 0.0109, "step": 155300 }, { "epoch": 1021.7763157894736, "grad_norm": 1.4533417224884033, "learning_rate": 0.0001, "loss": 0.0099, "step": 155310 }, { "epoch": 1021.8421052631579, "grad_norm": 0.9066959023475647, "learning_rate": 0.0001, "loss": 0.009, "step": 155320 }, { "epoch": 1021.9078947368421, "grad_norm": 0.9514365196228027, "learning_rate": 0.0001, "loss": 0.0099, "step": 155330 }, { "epoch": 1021.9736842105264, "grad_norm": 0.9749160408973694, "learning_rate": 0.0001, "loss": 0.008, "step": 155340 }, { "epoch": 1022.0394736842105, "grad_norm": 1.3155367374420166, "learning_rate": 0.0001, "loss": 0.0105, "step": 155350 }, { "epoch": 1022.1052631578947, "grad_norm": 0.8676218390464783, "learning_rate": 0.0001, "loss": 0.009, "step": 155360 }, { "epoch": 1022.171052631579, "grad_norm": 1.047354817390442, "learning_rate": 0.0001, "loss": 0.0078, "step": 155370 }, { "epoch": 1022.2368421052631, "grad_norm": 1.1073765754699707, "learning_rate": 0.0001, "loss": 0.01, "step": 155380 }, { "epoch": 1022.3026315789474, "grad_norm": 1.0393955707550049, "learning_rate": 0.0001, "loss": 0.0107, "step": 155390 }, { "epoch": 1022.3684210526316, "grad_norm": 0.886330783367157, "learning_rate": 0.0001, "loss": 0.0099, "step": 155400 }, { "epoch": 1022.4342105263158, "grad_norm": 0.9492865800857544, "learning_rate": 0.0001, "loss": 0.009, "step": 155410 }, { "epoch": 1022.5, "grad_norm": 1.4241212606430054, "learning_rate": 0.0001, "loss": 0.0084, "step": 155420 }, { "epoch": 1022.5657894736842, "grad_norm": 1.7502048015594482, "learning_rate": 0.0001, "loss": 0.0099, "step": 155430 }, { "epoch": 1022.6315789473684, "grad_norm": 1.311353087425232, "learning_rate": 0.0001, "loss": 0.0094, "step": 155440 }, { "epoch": 1022.6973684210526, "grad_norm": 1.1434524059295654, "learning_rate": 0.0001, "loss": 0.01, "step": 155450 }, { "epoch": 1022.7631578947369, "grad_norm": 1.5268412828445435, "learning_rate": 0.0001, "loss": 0.0106, "step": 155460 }, { "epoch": 1022.828947368421, "grad_norm": 1.0521647930145264, "learning_rate": 0.0001, "loss": 0.0083, "step": 155470 }, { "epoch": 1022.8947368421053, "grad_norm": 1.2657438516616821, "learning_rate": 0.0001, "loss": 0.0089, "step": 155480 }, { "epoch": 1022.9605263157895, "grad_norm": 1.5762451887130737, "learning_rate": 0.0001, "loss": 0.0096, "step": 155490 }, { "epoch": 1023.0263157894736, "grad_norm": 0.9546888470649719, "learning_rate": 0.0001, "loss": 0.0095, "step": 155500 }, { "epoch": 1023.0921052631579, "grad_norm": 0.966810941696167, "learning_rate": 0.0001, "loss": 0.0101, "step": 155510 }, { "epoch": 1023.1578947368421, "grad_norm": 1.2217552661895752, "learning_rate": 0.0001, "loss": 0.0097, "step": 155520 }, { "epoch": 1023.2236842105264, "grad_norm": 1.2993433475494385, "learning_rate": 0.0001, "loss": 0.0087, "step": 155530 }, { "epoch": 1023.2894736842105, "grad_norm": 1.1217420101165771, "learning_rate": 0.0001, "loss": 0.0078, "step": 155540 }, { "epoch": 1023.3552631578947, "grad_norm": 1.1864551305770874, "learning_rate": 0.0001, "loss": 0.0078, "step": 155550 }, { "epoch": 1023.421052631579, "grad_norm": 1.1899709701538086, "learning_rate": 0.0001, "loss": 0.0094, "step": 155560 }, { "epoch": 1023.4868421052631, "grad_norm": 1.223408818244934, "learning_rate": 0.0001, "loss": 0.0085, "step": 155570 }, { "epoch": 1023.5526315789474, "grad_norm": 1.22601318359375, "learning_rate": 0.0001, "loss": 0.0097, "step": 155580 }, { "epoch": 1023.6184210526316, "grad_norm": 0.761501133441925, "learning_rate": 0.0001, "loss": 0.0098, "step": 155590 }, { "epoch": 1023.6842105263158, "grad_norm": 1.0578773021697998, "learning_rate": 0.0001, "loss": 0.01, "step": 155600 }, { "epoch": 1023.75, "grad_norm": 1.3183938264846802, "learning_rate": 0.0001, "loss": 0.0101, "step": 155610 }, { "epoch": 1023.8157894736842, "grad_norm": 1.4918771982192993, "learning_rate": 0.0001, "loss": 0.0083, "step": 155620 }, { "epoch": 1023.8815789473684, "grad_norm": 1.0870895385742188, "learning_rate": 0.0001, "loss": 0.0091, "step": 155630 }, { "epoch": 1023.9473684210526, "grad_norm": 1.3157042264938354, "learning_rate": 0.0001, "loss": 0.0104, "step": 155640 }, { "epoch": 1024.0131578947369, "grad_norm": 1.3045361042022705, "learning_rate": 0.0001, "loss": 0.0107, "step": 155650 }, { "epoch": 1024.078947368421, "grad_norm": 1.0080130100250244, "learning_rate": 0.0001, "loss": 0.0122, "step": 155660 }, { "epoch": 1024.1447368421052, "grad_norm": 1.2834631204605103, "learning_rate": 0.0001, "loss": 0.009, "step": 155670 }, { "epoch": 1024.2105263157894, "grad_norm": 1.2468620538711548, "learning_rate": 0.0001, "loss": 0.0088, "step": 155680 }, { "epoch": 1024.2763157894738, "grad_norm": 0.9517583250999451, "learning_rate": 0.0001, "loss": 0.0101, "step": 155690 }, { "epoch": 1024.342105263158, "grad_norm": 1.0547385215759277, "learning_rate": 0.0001, "loss": 0.0093, "step": 155700 }, { "epoch": 1024.407894736842, "grad_norm": 1.4880155324935913, "learning_rate": 0.0001, "loss": 0.0096, "step": 155710 }, { "epoch": 1024.4736842105262, "grad_norm": 1.133327603340149, "learning_rate": 0.0001, "loss": 0.0096, "step": 155720 }, { "epoch": 1024.5394736842106, "grad_norm": 1.1422955989837646, "learning_rate": 0.0001, "loss": 0.0098, "step": 155730 }, { "epoch": 1024.6052631578948, "grad_norm": 1.3525456190109253, "learning_rate": 0.0001, "loss": 0.0092, "step": 155740 }, { "epoch": 1024.671052631579, "grad_norm": 1.063536524772644, "learning_rate": 0.0001, "loss": 0.011, "step": 155750 }, { "epoch": 1024.7368421052631, "grad_norm": 1.0734652280807495, "learning_rate": 0.0001, "loss": 0.0087, "step": 155760 }, { "epoch": 1024.8026315789473, "grad_norm": 1.1811069250106812, "learning_rate": 0.0001, "loss": 0.0074, "step": 155770 }, { "epoch": 1024.8684210526317, "grad_norm": 0.923505425453186, "learning_rate": 0.0001, "loss": 0.0103, "step": 155780 }, { "epoch": 1024.9342105263158, "grad_norm": 0.7430877685546875, "learning_rate": 0.0001, "loss": 0.0086, "step": 155790 }, { "epoch": 1025.0, "grad_norm": 0.5086251497268677, "learning_rate": 0.0001, "loss": 0.0101, "step": 155800 }, { "epoch": 1025.0657894736842, "grad_norm": 0.8926265239715576, "learning_rate": 0.0001, "loss": 0.0114, "step": 155810 }, { "epoch": 1025.1315789473683, "grad_norm": 1.6458979845046997, "learning_rate": 0.0001, "loss": 0.0084, "step": 155820 }, { "epoch": 1025.1973684210527, "grad_norm": 0.9550696611404419, "learning_rate": 0.0001, "loss": 0.0093, "step": 155830 }, { "epoch": 1025.2631578947369, "grad_norm": 1.3163942098617554, "learning_rate": 0.0001, "loss": 0.0091, "step": 155840 }, { "epoch": 1025.328947368421, "grad_norm": 1.4571993350982666, "learning_rate": 0.0001, "loss": 0.0093, "step": 155850 }, { "epoch": 1025.3947368421052, "grad_norm": 1.3019025325775146, "learning_rate": 0.0001, "loss": 0.0087, "step": 155860 }, { "epoch": 1025.4605263157894, "grad_norm": 1.154705286026001, "learning_rate": 0.0001, "loss": 0.0097, "step": 155870 }, { "epoch": 1025.5263157894738, "grad_norm": 1.1474753618240356, "learning_rate": 0.0001, "loss": 0.0086, "step": 155880 }, { "epoch": 1025.592105263158, "grad_norm": 0.7636915445327759, "learning_rate": 0.0001, "loss": 0.01, "step": 155890 }, { "epoch": 1025.657894736842, "grad_norm": 1.04159414768219, "learning_rate": 0.0001, "loss": 0.0094, "step": 155900 }, { "epoch": 1025.7236842105262, "grad_norm": 1.030527114868164, "learning_rate": 0.0001, "loss": 0.0087, "step": 155910 }, { "epoch": 1025.7894736842106, "grad_norm": 1.1383177042007446, "learning_rate": 0.0001, "loss": 0.0105, "step": 155920 }, { "epoch": 1025.8552631578948, "grad_norm": 1.3946573734283447, "learning_rate": 0.0001, "loss": 0.0111, "step": 155930 }, { "epoch": 1025.921052631579, "grad_norm": 1.2071908712387085, "learning_rate": 0.0001, "loss": 0.0091, "step": 155940 }, { "epoch": 1025.9868421052631, "grad_norm": 1.3407089710235596, "learning_rate": 0.0001, "loss": 0.0093, "step": 155950 }, { "epoch": 1026.0526315789473, "grad_norm": 0.9338118433952332, "learning_rate": 0.0001, "loss": 0.0091, "step": 155960 }, { "epoch": 1026.1184210526317, "grad_norm": 1.0861817598342896, "learning_rate": 0.0001, "loss": 0.0085, "step": 155970 }, { "epoch": 1026.1842105263158, "grad_norm": 1.192749261856079, "learning_rate": 0.0001, "loss": 0.0086, "step": 155980 }, { "epoch": 1026.25, "grad_norm": 1.2024480104446411, "learning_rate": 0.0001, "loss": 0.0096, "step": 155990 }, { "epoch": 1026.3157894736842, "grad_norm": 1.0778899192810059, "learning_rate": 0.0001, "loss": 0.0108, "step": 156000 }, { "epoch": 1026.3815789473683, "grad_norm": 1.0766499042510986, "learning_rate": 0.0001, "loss": 0.0097, "step": 156010 }, { "epoch": 1026.4473684210527, "grad_norm": 1.1861462593078613, "learning_rate": 0.0001, "loss": 0.0106, "step": 156020 }, { "epoch": 1026.5131578947369, "grad_norm": 1.0940054655075073, "learning_rate": 0.0001, "loss": 0.0085, "step": 156030 }, { "epoch": 1026.578947368421, "grad_norm": 1.3079572916030884, "learning_rate": 0.0001, "loss": 0.0105, "step": 156040 }, { "epoch": 1026.6447368421052, "grad_norm": 1.1062140464782715, "learning_rate": 0.0001, "loss": 0.0099, "step": 156050 }, { "epoch": 1026.7105263157894, "grad_norm": 1.2502998113632202, "learning_rate": 0.0001, "loss": 0.0111, "step": 156060 }, { "epoch": 1026.7763157894738, "grad_norm": 0.586124837398529, "learning_rate": 0.0001, "loss": 0.009, "step": 156070 }, { "epoch": 1026.842105263158, "grad_norm": 0.6738919019699097, "learning_rate": 0.0001, "loss": 0.0099, "step": 156080 }, { "epoch": 1026.907894736842, "grad_norm": 0.9908142685890198, "learning_rate": 0.0001, "loss": 0.0106, "step": 156090 }, { "epoch": 1026.9736842105262, "grad_norm": 1.2288180589675903, "learning_rate": 0.0001, "loss": 0.0081, "step": 156100 }, { "epoch": 1027.0394736842106, "grad_norm": 1.325110673904419, "learning_rate": 0.0001, "loss": 0.0094, "step": 156110 }, { "epoch": 1027.1052631578948, "grad_norm": 1.1032160520553589, "learning_rate": 0.0001, "loss": 0.0099, "step": 156120 }, { "epoch": 1027.171052631579, "grad_norm": 1.2170844078063965, "learning_rate": 0.0001, "loss": 0.0112, "step": 156130 }, { "epoch": 1027.2368421052631, "grad_norm": 0.6858425140380859, "learning_rate": 0.0001, "loss": 0.0098, "step": 156140 }, { "epoch": 1027.3026315789473, "grad_norm": 1.3081910610198975, "learning_rate": 0.0001, "loss": 0.0086, "step": 156150 }, { "epoch": 1027.3684210526317, "grad_norm": 1.1171272993087769, "learning_rate": 0.0001, "loss": 0.0082, "step": 156160 }, { "epoch": 1027.4342105263158, "grad_norm": 1.35430109500885, "learning_rate": 0.0001, "loss": 0.0098, "step": 156170 }, { "epoch": 1027.5, "grad_norm": 1.3601711988449097, "learning_rate": 0.0001, "loss": 0.0114, "step": 156180 }, { "epoch": 1027.5657894736842, "grad_norm": 1.4882049560546875, "learning_rate": 0.0001, "loss": 0.0083, "step": 156190 }, { "epoch": 1027.6315789473683, "grad_norm": 0.9452913999557495, "learning_rate": 0.0001, "loss": 0.0087, "step": 156200 }, { "epoch": 1027.6973684210527, "grad_norm": 1.0488688945770264, "learning_rate": 0.0001, "loss": 0.0095, "step": 156210 }, { "epoch": 1027.7631578947369, "grad_norm": 1.0508980751037598, "learning_rate": 0.0001, "loss": 0.0087, "step": 156220 }, { "epoch": 1027.828947368421, "grad_norm": 1.4618175029754639, "learning_rate": 0.0001, "loss": 0.0096, "step": 156230 }, { "epoch": 1027.8947368421052, "grad_norm": 0.8647827506065369, "learning_rate": 0.0001, "loss": 0.0091, "step": 156240 }, { "epoch": 1027.9605263157894, "grad_norm": 1.1985787153244019, "learning_rate": 0.0001, "loss": 0.0108, "step": 156250 }, { "epoch": 1028.0263157894738, "grad_norm": 1.6828774213790894, "learning_rate": 0.0001, "loss": 0.0098, "step": 156260 }, { "epoch": 1028.092105263158, "grad_norm": 1.015224814414978, "learning_rate": 0.0001, "loss": 0.0101, "step": 156270 }, { "epoch": 1028.157894736842, "grad_norm": 1.148722767829895, "learning_rate": 0.0001, "loss": 0.0131, "step": 156280 }, { "epoch": 1028.2236842105262, "grad_norm": 0.7410078048706055, "learning_rate": 0.0001, "loss": 0.0119, "step": 156290 }, { "epoch": 1028.2894736842106, "grad_norm": 0.7929697036743164, "learning_rate": 0.0001, "loss": 0.0113, "step": 156300 }, { "epoch": 1028.3552631578948, "grad_norm": 1.1712801456451416, "learning_rate": 0.0001, "loss": 0.0124, "step": 156310 }, { "epoch": 1028.421052631579, "grad_norm": 0.8382496237754822, "learning_rate": 0.0001, "loss": 0.0107, "step": 156320 }, { "epoch": 1028.4868421052631, "grad_norm": 0.9845030307769775, "learning_rate": 0.0001, "loss": 0.0122, "step": 156330 }, { "epoch": 1028.5526315789473, "grad_norm": 0.8377711176872253, "learning_rate": 0.0001, "loss": 0.0115, "step": 156340 }, { "epoch": 1028.6184210526317, "grad_norm": 0.874224066734314, "learning_rate": 0.0001, "loss": 0.0147, "step": 156350 }, { "epoch": 1028.6842105263158, "grad_norm": 1.0614688396453857, "learning_rate": 0.0001, "loss": 0.0133, "step": 156360 }, { "epoch": 1028.75, "grad_norm": 0.947669267654419, "learning_rate": 0.0001, "loss": 0.0108, "step": 156370 }, { "epoch": 1028.8157894736842, "grad_norm": 0.8915820121765137, "learning_rate": 0.0001, "loss": 0.0095, "step": 156380 }, { "epoch": 1028.8815789473683, "grad_norm": 1.1027073860168457, "learning_rate": 0.0001, "loss": 0.0121, "step": 156390 }, { "epoch": 1028.9473684210527, "grad_norm": 0.9126038551330566, "learning_rate": 0.0001, "loss": 0.0127, "step": 156400 }, { "epoch": 1029.0131578947369, "grad_norm": 1.0201152563095093, "learning_rate": 0.0001, "loss": 0.0096, "step": 156410 }, { "epoch": 1029.078947368421, "grad_norm": 0.7695257067680359, "learning_rate": 0.0001, "loss": 0.0126, "step": 156420 }, { "epoch": 1029.1447368421052, "grad_norm": 1.1396857500076294, "learning_rate": 0.0001, "loss": 0.0109, "step": 156430 }, { "epoch": 1029.2105263157894, "grad_norm": 1.5026952028274536, "learning_rate": 0.0001, "loss": 0.0115, "step": 156440 }, { "epoch": 1029.2763157894738, "grad_norm": 1.048147201538086, "learning_rate": 0.0001, "loss": 0.0111, "step": 156450 }, { "epoch": 1029.342105263158, "grad_norm": 1.0914058685302734, "learning_rate": 0.0001, "loss": 0.0098, "step": 156460 }, { "epoch": 1029.407894736842, "grad_norm": 0.9121741056442261, "learning_rate": 0.0001, "loss": 0.0114, "step": 156470 }, { "epoch": 1029.4736842105262, "grad_norm": 1.072133183479309, "learning_rate": 0.0001, "loss": 0.0096, "step": 156480 }, { "epoch": 1029.5394736842106, "grad_norm": 1.3796448707580566, "learning_rate": 0.0001, "loss": 0.0119, "step": 156490 }, { "epoch": 1029.6052631578948, "grad_norm": 1.3151845932006836, "learning_rate": 0.0001, "loss": 0.0111, "step": 156500 }, { "epoch": 1029.671052631579, "grad_norm": 0.7077886462211609, "learning_rate": 0.0001, "loss": 0.0116, "step": 156510 }, { "epoch": 1029.7368421052631, "grad_norm": 0.8222764134407043, "learning_rate": 0.0001, "loss": 0.0098, "step": 156520 }, { "epoch": 1029.8026315789473, "grad_norm": 1.199191689491272, "learning_rate": 0.0001, "loss": 0.014, "step": 156530 }, { "epoch": 1029.8684210526317, "grad_norm": 1.1930209398269653, "learning_rate": 0.0001, "loss": 0.0107, "step": 156540 }, { "epoch": 1029.9342105263158, "grad_norm": 1.1827071905136108, "learning_rate": 0.0001, "loss": 0.0106, "step": 156550 }, { "epoch": 1030.0, "grad_norm": 1.1457871198654175, "learning_rate": 0.0001, "loss": 0.0109, "step": 156560 }, { "epoch": 1030.0657894736842, "grad_norm": 1.4931516647338867, "learning_rate": 0.0001, "loss": 0.0092, "step": 156570 }, { "epoch": 1030.1315789473683, "grad_norm": 1.4106398820877075, "learning_rate": 0.0001, "loss": 0.0093, "step": 156580 }, { "epoch": 1030.1973684210527, "grad_norm": 1.3719513416290283, "learning_rate": 0.0001, "loss": 0.0111, "step": 156590 }, { "epoch": 1030.2631578947369, "grad_norm": 0.9716209173202515, "learning_rate": 0.0001, "loss": 0.011, "step": 156600 }, { "epoch": 1030.328947368421, "grad_norm": 1.2926265001296997, "learning_rate": 0.0001, "loss": 0.0101, "step": 156610 }, { "epoch": 1030.3947368421052, "grad_norm": 0.9236207604408264, "learning_rate": 0.0001, "loss": 0.0096, "step": 156620 }, { "epoch": 1030.4605263157894, "grad_norm": 0.8633676767349243, "learning_rate": 0.0001, "loss": 0.0105, "step": 156630 }, { "epoch": 1030.5263157894738, "grad_norm": 0.5557491183280945, "learning_rate": 0.0001, "loss": 0.0125, "step": 156640 }, { "epoch": 1030.592105263158, "grad_norm": 1.302513837814331, "learning_rate": 0.0001, "loss": 0.0148, "step": 156650 }, { "epoch": 1030.657894736842, "grad_norm": 0.9874489307403564, "learning_rate": 0.0001, "loss": 0.0102, "step": 156660 }, { "epoch": 1030.7236842105262, "grad_norm": 1.4245355129241943, "learning_rate": 0.0001, "loss": 0.0107, "step": 156670 }, { "epoch": 1030.7894736842106, "grad_norm": 1.2239059209823608, "learning_rate": 0.0001, "loss": 0.0121, "step": 156680 }, { "epoch": 1030.8552631578948, "grad_norm": 1.1711642742156982, "learning_rate": 0.0001, "loss": 0.0112, "step": 156690 }, { "epoch": 1030.921052631579, "grad_norm": 1.1151902675628662, "learning_rate": 0.0001, "loss": 0.01, "step": 156700 }, { "epoch": 1030.9868421052631, "grad_norm": 1.4098039865493774, "learning_rate": 0.0001, "loss": 0.0153, "step": 156710 }, { "epoch": 1031.0526315789473, "grad_norm": 1.140479326248169, "learning_rate": 0.0001, "loss": 0.0126, "step": 156720 }, { "epoch": 1031.1184210526317, "grad_norm": 1.3065927028656006, "learning_rate": 0.0001, "loss": 0.0103, "step": 156730 }, { "epoch": 1031.1842105263158, "grad_norm": 0.9672998785972595, "learning_rate": 0.0001, "loss": 0.0109, "step": 156740 }, { "epoch": 1031.25, "grad_norm": 1.2625794410705566, "learning_rate": 0.0001, "loss": 0.0104, "step": 156750 }, { "epoch": 1031.3157894736842, "grad_norm": 1.0192890167236328, "learning_rate": 0.0001, "loss": 0.01, "step": 156760 }, { "epoch": 1031.3815789473683, "grad_norm": 1.1616865396499634, "learning_rate": 0.0001, "loss": 0.0086, "step": 156770 }, { "epoch": 1031.4473684210527, "grad_norm": 0.9945463538169861, "learning_rate": 0.0001, "loss": 0.0095, "step": 156780 }, { "epoch": 1031.5131578947369, "grad_norm": 0.9444164037704468, "learning_rate": 0.0001, "loss": 0.0094, "step": 156790 }, { "epoch": 1031.578947368421, "grad_norm": 0.7794418931007385, "learning_rate": 0.0001, "loss": 0.0091, "step": 156800 }, { "epoch": 1031.6447368421052, "grad_norm": 0.908078670501709, "learning_rate": 0.0001, "loss": 0.0102, "step": 156810 }, { "epoch": 1031.7105263157894, "grad_norm": 0.7587025165557861, "learning_rate": 0.0001, "loss": 0.0111, "step": 156820 }, { "epoch": 1031.7763157894738, "grad_norm": 1.2462350130081177, "learning_rate": 0.0001, "loss": 0.0097, "step": 156830 }, { "epoch": 1031.842105263158, "grad_norm": 1.1004960536956787, "learning_rate": 0.0001, "loss": 0.0123, "step": 156840 }, { "epoch": 1031.907894736842, "grad_norm": 0.6818149089813232, "learning_rate": 0.0001, "loss": 0.0118, "step": 156850 }, { "epoch": 1031.9736842105262, "grad_norm": 1.0181989669799805, "learning_rate": 0.0001, "loss": 0.0113, "step": 156860 }, { "epoch": 1032.0394736842106, "grad_norm": 1.3769375085830688, "learning_rate": 0.0001, "loss": 0.0108, "step": 156870 }, { "epoch": 1032.1052631578948, "grad_norm": 1.301367998123169, "learning_rate": 0.0001, "loss": 0.011, "step": 156880 }, { "epoch": 1032.171052631579, "grad_norm": 1.4614466428756714, "learning_rate": 0.0001, "loss": 0.0104, "step": 156890 }, { "epoch": 1032.2368421052631, "grad_norm": 1.141822099685669, "learning_rate": 0.0001, "loss": 0.0107, "step": 156900 }, { "epoch": 1032.3026315789473, "grad_norm": 0.8680024147033691, "learning_rate": 0.0001, "loss": 0.0083, "step": 156910 }, { "epoch": 1032.3684210526317, "grad_norm": 1.0369664430618286, "learning_rate": 0.0001, "loss": 0.0094, "step": 156920 }, { "epoch": 1032.4342105263158, "grad_norm": 1.025201439857483, "learning_rate": 0.0001, "loss": 0.0078, "step": 156930 }, { "epoch": 1032.5, "grad_norm": 0.8427085876464844, "learning_rate": 0.0001, "loss": 0.0089, "step": 156940 }, { "epoch": 1032.5657894736842, "grad_norm": 1.090246558189392, "learning_rate": 0.0001, "loss": 0.0112, "step": 156950 }, { "epoch": 1032.6315789473683, "grad_norm": 0.9469527006149292, "learning_rate": 0.0001, "loss": 0.0083, "step": 156960 }, { "epoch": 1032.6973684210527, "grad_norm": 0.9149708151817322, "learning_rate": 0.0001, "loss": 0.0095, "step": 156970 }, { "epoch": 1032.7631578947369, "grad_norm": 0.9780547618865967, "learning_rate": 0.0001, "loss": 0.0095, "step": 156980 }, { "epoch": 1032.828947368421, "grad_norm": 1.3159875869750977, "learning_rate": 0.0001, "loss": 0.0101, "step": 156990 }, { "epoch": 1032.8947368421052, "grad_norm": 0.9465406537055969, "learning_rate": 0.0001, "loss": 0.0079, "step": 157000 }, { "epoch": 1032.9605263157894, "grad_norm": 1.0383718013763428, "learning_rate": 0.0001, "loss": 0.0097, "step": 157010 }, { "epoch": 1033.0263157894738, "grad_norm": 0.9563059210777283, "learning_rate": 0.0001, "loss": 0.0112, "step": 157020 }, { "epoch": 1033.092105263158, "grad_norm": 0.8377731442451477, "learning_rate": 0.0001, "loss": 0.0098, "step": 157030 }, { "epoch": 1033.157894736842, "grad_norm": 0.7005869150161743, "learning_rate": 0.0001, "loss": 0.0097, "step": 157040 }, { "epoch": 1033.2236842105262, "grad_norm": 0.8357473611831665, "learning_rate": 0.0001, "loss": 0.0097, "step": 157050 }, { "epoch": 1033.2894736842106, "grad_norm": 0.877869188785553, "learning_rate": 0.0001, "loss": 0.0114, "step": 157060 }, { "epoch": 1033.3552631578948, "grad_norm": 1.0630418062210083, "learning_rate": 0.0001, "loss": 0.0086, "step": 157070 }, { "epoch": 1033.421052631579, "grad_norm": 1.6160914897918701, "learning_rate": 0.0001, "loss": 0.0079, "step": 157080 }, { "epoch": 1033.4868421052631, "grad_norm": 1.2976188659667969, "learning_rate": 0.0001, "loss": 0.0101, "step": 157090 }, { "epoch": 1033.5526315789473, "grad_norm": 1.338534951210022, "learning_rate": 0.0001, "loss": 0.0097, "step": 157100 }, { "epoch": 1033.6184210526317, "grad_norm": 1.0486924648284912, "learning_rate": 0.0001, "loss": 0.0073, "step": 157110 }, { "epoch": 1033.6842105263158, "grad_norm": 0.957435667514801, "learning_rate": 0.0001, "loss": 0.0091, "step": 157120 }, { "epoch": 1033.75, "grad_norm": 1.0611435174942017, "learning_rate": 0.0001, "loss": 0.0085, "step": 157130 }, { "epoch": 1033.8157894736842, "grad_norm": 1.471934199333191, "learning_rate": 0.0001, "loss": 0.009, "step": 157140 }, { "epoch": 1033.8815789473683, "grad_norm": 1.0346896648406982, "learning_rate": 0.0001, "loss": 0.0098, "step": 157150 }, { "epoch": 1033.9473684210527, "grad_norm": 1.5586978197097778, "learning_rate": 0.0001, "loss": 0.0083, "step": 157160 }, { "epoch": 1034.0131578947369, "grad_norm": 1.5314991474151611, "learning_rate": 0.0001, "loss": 0.0085, "step": 157170 }, { "epoch": 1034.078947368421, "grad_norm": 1.3219138383865356, "learning_rate": 0.0001, "loss": 0.0088, "step": 157180 }, { "epoch": 1034.1447368421052, "grad_norm": 0.7580617070198059, "learning_rate": 0.0001, "loss": 0.0081, "step": 157190 }, { "epoch": 1034.2105263157894, "grad_norm": 1.2841469049453735, "learning_rate": 0.0001, "loss": 0.0101, "step": 157200 }, { "epoch": 1034.2763157894738, "grad_norm": 0.8952348828315735, "learning_rate": 0.0001, "loss": 0.0096, "step": 157210 }, { "epoch": 1034.342105263158, "grad_norm": 1.0828073024749756, "learning_rate": 0.0001, "loss": 0.0089, "step": 157220 }, { "epoch": 1034.407894736842, "grad_norm": 1.3251006603240967, "learning_rate": 0.0001, "loss": 0.0092, "step": 157230 }, { "epoch": 1034.4736842105262, "grad_norm": 0.8319105505943298, "learning_rate": 0.0001, "loss": 0.009, "step": 157240 }, { "epoch": 1034.5394736842106, "grad_norm": 0.6340340971946716, "learning_rate": 0.0001, "loss": 0.0099, "step": 157250 }, { "epoch": 1034.6052631578948, "grad_norm": 1.0439341068267822, "learning_rate": 0.0001, "loss": 0.009, "step": 157260 }, { "epoch": 1034.671052631579, "grad_norm": 0.9129518270492554, "learning_rate": 0.0001, "loss": 0.0102, "step": 157270 }, { "epoch": 1034.7368421052631, "grad_norm": 1.1103804111480713, "learning_rate": 0.0001, "loss": 0.0079, "step": 157280 }, { "epoch": 1034.8026315789473, "grad_norm": 1.1742290258407593, "learning_rate": 0.0001, "loss": 0.0091, "step": 157290 }, { "epoch": 1034.8684210526317, "grad_norm": 1.2057838439941406, "learning_rate": 0.0001, "loss": 0.0096, "step": 157300 }, { "epoch": 1034.9342105263158, "grad_norm": 0.9219057559967041, "learning_rate": 0.0001, "loss": 0.0075, "step": 157310 }, { "epoch": 1035.0, "grad_norm": 0.6480765342712402, "learning_rate": 0.0001, "loss": 0.01, "step": 157320 }, { "epoch": 1035.0657894736842, "grad_norm": 0.9799567461013794, "learning_rate": 0.0001, "loss": 0.009, "step": 157330 }, { "epoch": 1035.1315789473683, "grad_norm": 1.2882983684539795, "learning_rate": 0.0001, "loss": 0.0082, "step": 157340 }, { "epoch": 1035.1973684210527, "grad_norm": 0.887322187423706, "learning_rate": 0.0001, "loss": 0.008, "step": 157350 }, { "epoch": 1035.2631578947369, "grad_norm": 0.8471446633338928, "learning_rate": 0.0001, "loss": 0.0111, "step": 157360 }, { "epoch": 1035.328947368421, "grad_norm": 0.8873788118362427, "learning_rate": 0.0001, "loss": 0.009, "step": 157370 }, { "epoch": 1035.3947368421052, "grad_norm": 1.0279638767242432, "learning_rate": 0.0001, "loss": 0.0096, "step": 157380 }, { "epoch": 1035.4605263157894, "grad_norm": 0.9138223528862, "learning_rate": 0.0001, "loss": 0.0112, "step": 157390 }, { "epoch": 1035.5263157894738, "grad_norm": 1.492469072341919, "learning_rate": 0.0001, "loss": 0.0093, "step": 157400 }, { "epoch": 1035.592105263158, "grad_norm": 1.4782291650772095, "learning_rate": 0.0001, "loss": 0.0099, "step": 157410 }, { "epoch": 1035.657894736842, "grad_norm": 1.0031368732452393, "learning_rate": 0.0001, "loss": 0.0092, "step": 157420 }, { "epoch": 1035.7236842105262, "grad_norm": 1.0063573122024536, "learning_rate": 0.0001, "loss": 0.0079, "step": 157430 }, { "epoch": 1035.7894736842106, "grad_norm": 1.1823172569274902, "learning_rate": 0.0001, "loss": 0.0093, "step": 157440 }, { "epoch": 1035.8552631578948, "grad_norm": 0.9905596971511841, "learning_rate": 0.0001, "loss": 0.009, "step": 157450 }, { "epoch": 1035.921052631579, "grad_norm": 0.9706788063049316, "learning_rate": 0.0001, "loss": 0.0096, "step": 157460 }, { "epoch": 1035.9868421052631, "grad_norm": 1.1120086908340454, "learning_rate": 0.0001, "loss": 0.0077, "step": 157470 }, { "epoch": 1036.0526315789473, "grad_norm": 1.1422266960144043, "learning_rate": 0.0001, "loss": 0.0082, "step": 157480 }, { "epoch": 1036.1184210526317, "grad_norm": 1.3374935388565063, "learning_rate": 0.0001, "loss": 0.0095, "step": 157490 }, { "epoch": 1036.1842105263158, "grad_norm": 1.3589179515838623, "learning_rate": 0.0001, "loss": 0.0103, "step": 157500 }, { "epoch": 1036.25, "grad_norm": 1.1102532148361206, "learning_rate": 0.0001, "loss": 0.0088, "step": 157510 }, { "epoch": 1036.3157894736842, "grad_norm": 1.357604742050171, "learning_rate": 0.0001, "loss": 0.0111, "step": 157520 }, { "epoch": 1036.3815789473683, "grad_norm": 1.054775595664978, "learning_rate": 0.0001, "loss": 0.0076, "step": 157530 }, { "epoch": 1036.4473684210527, "grad_norm": 1.0131275653839111, "learning_rate": 0.0001, "loss": 0.0111, "step": 157540 }, { "epoch": 1036.5131578947369, "grad_norm": 0.73055100440979, "learning_rate": 0.0001, "loss": 0.0079, "step": 157550 }, { "epoch": 1036.578947368421, "grad_norm": 0.8774827122688293, "learning_rate": 0.0001, "loss": 0.01, "step": 157560 }, { "epoch": 1036.6447368421052, "grad_norm": 1.4485974311828613, "learning_rate": 0.0001, "loss": 0.0085, "step": 157570 }, { "epoch": 1036.7105263157894, "grad_norm": 1.3411765098571777, "learning_rate": 0.0001, "loss": 0.0075, "step": 157580 }, { "epoch": 1036.7763157894738, "grad_norm": 1.4272361993789673, "learning_rate": 0.0001, "loss": 0.0075, "step": 157590 }, { "epoch": 1036.842105263158, "grad_norm": 1.3204584121704102, "learning_rate": 0.0001, "loss": 0.0105, "step": 157600 }, { "epoch": 1036.907894736842, "grad_norm": 1.5034716129302979, "learning_rate": 0.0001, "loss": 0.0094, "step": 157610 }, { "epoch": 1036.9736842105262, "grad_norm": 1.6326338052749634, "learning_rate": 0.0001, "loss": 0.0087, "step": 157620 }, { "epoch": 1037.0394736842106, "grad_norm": 0.9135128259658813, "learning_rate": 0.0001, "loss": 0.0095, "step": 157630 }, { "epoch": 1037.1052631578948, "grad_norm": 1.148511290550232, "learning_rate": 0.0001, "loss": 0.0112, "step": 157640 }, { "epoch": 1037.171052631579, "grad_norm": 1.2435686588287354, "learning_rate": 0.0001, "loss": 0.0097, "step": 157650 }, { "epoch": 1037.2368421052631, "grad_norm": 0.9658685326576233, "learning_rate": 0.0001, "loss": 0.0085, "step": 157660 }, { "epoch": 1037.3026315789473, "grad_norm": 1.3001043796539307, "learning_rate": 0.0001, "loss": 0.0085, "step": 157670 }, { "epoch": 1037.3684210526317, "grad_norm": 1.0400381088256836, "learning_rate": 0.0001, "loss": 0.0096, "step": 157680 }, { "epoch": 1037.4342105263158, "grad_norm": 0.8855817914009094, "learning_rate": 0.0001, "loss": 0.0075, "step": 157690 }, { "epoch": 1037.5, "grad_norm": 1.0237966775894165, "learning_rate": 0.0001, "loss": 0.0069, "step": 157700 }, { "epoch": 1037.5657894736842, "grad_norm": 1.0766881704330444, "learning_rate": 0.0001, "loss": 0.0098, "step": 157710 }, { "epoch": 1037.6315789473683, "grad_norm": 1.1923819780349731, "learning_rate": 0.0001, "loss": 0.0087, "step": 157720 }, { "epoch": 1037.6973684210527, "grad_norm": 1.0676945447921753, "learning_rate": 0.0001, "loss": 0.0088, "step": 157730 }, { "epoch": 1037.7631578947369, "grad_norm": 0.5870857238769531, "learning_rate": 0.0001, "loss": 0.0088, "step": 157740 }, { "epoch": 1037.828947368421, "grad_norm": 0.9011507034301758, "learning_rate": 0.0001, "loss": 0.0092, "step": 157750 }, { "epoch": 1037.8947368421052, "grad_norm": 1.1840976476669312, "learning_rate": 0.0001, "loss": 0.0082, "step": 157760 }, { "epoch": 1037.9605263157894, "grad_norm": 1.0965074300765991, "learning_rate": 0.0001, "loss": 0.0086, "step": 157770 }, { "epoch": 1038.0263157894738, "grad_norm": 0.8631574511528015, "learning_rate": 0.0001, "loss": 0.0111, "step": 157780 }, { "epoch": 1038.092105263158, "grad_norm": 1.3120415210723877, "learning_rate": 0.0001, "loss": 0.0072, "step": 157790 }, { "epoch": 1038.157894736842, "grad_norm": 1.1779505014419556, "learning_rate": 0.0001, "loss": 0.0091, "step": 157800 }, { "epoch": 1038.2236842105262, "grad_norm": 1.042224645614624, "learning_rate": 0.0001, "loss": 0.0078, "step": 157810 }, { "epoch": 1038.2894736842106, "grad_norm": 0.9169209599494934, "learning_rate": 0.0001, "loss": 0.0082, "step": 157820 }, { "epoch": 1038.3552631578948, "grad_norm": 1.129440188407898, "learning_rate": 0.0001, "loss": 0.0085, "step": 157830 }, { "epoch": 1038.421052631579, "grad_norm": 0.9302949905395508, "learning_rate": 0.0001, "loss": 0.0093, "step": 157840 }, { "epoch": 1038.4868421052631, "grad_norm": 1.3047235012054443, "learning_rate": 0.0001, "loss": 0.0093, "step": 157850 }, { "epoch": 1038.5526315789473, "grad_norm": 1.4257992506027222, "learning_rate": 0.0001, "loss": 0.0097, "step": 157860 }, { "epoch": 1038.6184210526317, "grad_norm": 1.1425153017044067, "learning_rate": 0.0001, "loss": 0.0095, "step": 157870 }, { "epoch": 1038.6842105263158, "grad_norm": 1.1358925104141235, "learning_rate": 0.0001, "loss": 0.01, "step": 157880 }, { "epoch": 1038.75, "grad_norm": 0.9900014400482178, "learning_rate": 0.0001, "loss": 0.0083, "step": 157890 }, { "epoch": 1038.8157894736842, "grad_norm": 0.9139213562011719, "learning_rate": 0.0001, "loss": 0.0092, "step": 157900 }, { "epoch": 1038.8815789473683, "grad_norm": 1.2955433130264282, "learning_rate": 0.0001, "loss": 0.0112, "step": 157910 }, { "epoch": 1038.9473684210527, "grad_norm": 1.2196770906448364, "learning_rate": 0.0001, "loss": 0.0088, "step": 157920 }, { "epoch": 1039.0131578947369, "grad_norm": 1.0511000156402588, "learning_rate": 0.0001, "loss": 0.0103, "step": 157930 }, { "epoch": 1039.078947368421, "grad_norm": 1.1892273426055908, "learning_rate": 0.0001, "loss": 0.0084, "step": 157940 }, { "epoch": 1039.1447368421052, "grad_norm": 0.8552981019020081, "learning_rate": 0.0001, "loss": 0.0096, "step": 157950 }, { "epoch": 1039.2105263157894, "grad_norm": 0.9567652940750122, "learning_rate": 0.0001, "loss": 0.0087, "step": 157960 }, { "epoch": 1039.2763157894738, "grad_norm": 0.9324297904968262, "learning_rate": 0.0001, "loss": 0.0078, "step": 157970 }, { "epoch": 1039.342105263158, "grad_norm": 0.8563692569732666, "learning_rate": 0.0001, "loss": 0.0099, "step": 157980 }, { "epoch": 1039.407894736842, "grad_norm": 1.0775697231292725, "learning_rate": 0.0001, "loss": 0.0088, "step": 157990 }, { "epoch": 1039.4736842105262, "grad_norm": 1.0669533014297485, "learning_rate": 0.0001, "loss": 0.0095, "step": 158000 }, { "epoch": 1039.5394736842106, "grad_norm": 1.2241381406784058, "learning_rate": 0.0001, "loss": 0.0085, "step": 158010 }, { "epoch": 1039.6052631578948, "grad_norm": 1.0516544580459595, "learning_rate": 0.0001, "loss": 0.0085, "step": 158020 }, { "epoch": 1039.671052631579, "grad_norm": 1.1746127605438232, "learning_rate": 0.0001, "loss": 0.0084, "step": 158030 }, { "epoch": 1039.7368421052631, "grad_norm": 0.8839475512504578, "learning_rate": 0.0001, "loss": 0.0097, "step": 158040 }, { "epoch": 1039.8026315789473, "grad_norm": 0.8313326835632324, "learning_rate": 0.0001, "loss": 0.0107, "step": 158050 }, { "epoch": 1039.8684210526317, "grad_norm": 1.1891549825668335, "learning_rate": 0.0001, "loss": 0.0092, "step": 158060 }, { "epoch": 1039.9342105263158, "grad_norm": 0.9349226355552673, "learning_rate": 0.0001, "loss": 0.0105, "step": 158070 }, { "epoch": 1040.0, "grad_norm": 1.3668376207351685, "learning_rate": 0.0001, "loss": 0.0086, "step": 158080 }, { "epoch": 1040.0657894736842, "grad_norm": 1.0891430377960205, "learning_rate": 0.0001, "loss": 0.0096, "step": 158090 }, { "epoch": 1040.1315789473683, "grad_norm": 1.2689098119735718, "learning_rate": 0.0001, "loss": 0.0093, "step": 158100 }, { "epoch": 1040.1973684210527, "grad_norm": 1.3867899179458618, "learning_rate": 0.0001, "loss": 0.0102, "step": 158110 }, { "epoch": 1040.2631578947369, "grad_norm": 1.1549094915390015, "learning_rate": 0.0001, "loss": 0.0089, "step": 158120 }, { "epoch": 1040.328947368421, "grad_norm": 1.0771297216415405, "learning_rate": 0.0001, "loss": 0.0085, "step": 158130 }, { "epoch": 1040.3947368421052, "grad_norm": 0.6854061484336853, "learning_rate": 0.0001, "loss": 0.0091, "step": 158140 }, { "epoch": 1040.4605263157894, "grad_norm": 0.7592263221740723, "learning_rate": 0.0001, "loss": 0.0099, "step": 158150 }, { "epoch": 1040.5263157894738, "grad_norm": 0.7394665479660034, "learning_rate": 0.0001, "loss": 0.0099, "step": 158160 }, { "epoch": 1040.592105263158, "grad_norm": 1.2182101011276245, "learning_rate": 0.0001, "loss": 0.0093, "step": 158170 }, { "epoch": 1040.657894736842, "grad_norm": 1.0940667390823364, "learning_rate": 0.0001, "loss": 0.0092, "step": 158180 }, { "epoch": 1040.7236842105262, "grad_norm": 1.154238224029541, "learning_rate": 0.0001, "loss": 0.0086, "step": 158190 }, { "epoch": 1040.7894736842106, "grad_norm": 1.0772144794464111, "learning_rate": 0.0001, "loss": 0.0091, "step": 158200 }, { "epoch": 1040.8552631578948, "grad_norm": 0.7135392427444458, "learning_rate": 0.0001, "loss": 0.0083, "step": 158210 }, { "epoch": 1040.921052631579, "grad_norm": 0.6920236945152283, "learning_rate": 0.0001, "loss": 0.0089, "step": 158220 }, { "epoch": 1040.9868421052631, "grad_norm": 0.9873380661010742, "learning_rate": 0.0001, "loss": 0.01, "step": 158230 }, { "epoch": 1041.0526315789473, "grad_norm": 1.1470561027526855, "learning_rate": 0.0001, "loss": 0.0091, "step": 158240 }, { "epoch": 1041.1184210526317, "grad_norm": 1.1026843786239624, "learning_rate": 0.0001, "loss": 0.0105, "step": 158250 }, { "epoch": 1041.1842105263158, "grad_norm": 0.9850237369537354, "learning_rate": 0.0001, "loss": 0.0077, "step": 158260 }, { "epoch": 1041.25, "grad_norm": 0.8078471422195435, "learning_rate": 0.0001, "loss": 0.0082, "step": 158270 }, { "epoch": 1041.3157894736842, "grad_norm": 0.9304453134536743, "learning_rate": 0.0001, "loss": 0.0095, "step": 158280 }, { "epoch": 1041.3815789473683, "grad_norm": 1.4547866582870483, "learning_rate": 0.0001, "loss": 0.0096, "step": 158290 }, { "epoch": 1041.4473684210527, "grad_norm": 1.2319010496139526, "learning_rate": 0.0001, "loss": 0.0087, "step": 158300 }, { "epoch": 1041.5131578947369, "grad_norm": 0.9949256181716919, "learning_rate": 0.0001, "loss": 0.0086, "step": 158310 }, { "epoch": 1041.578947368421, "grad_norm": 0.8701847195625305, "learning_rate": 0.0001, "loss": 0.0106, "step": 158320 }, { "epoch": 1041.6447368421052, "grad_norm": 1.0713963508605957, "learning_rate": 0.0001, "loss": 0.0075, "step": 158330 }, { "epoch": 1041.7105263157894, "grad_norm": 1.043212652206421, "learning_rate": 0.0001, "loss": 0.0094, "step": 158340 }, { "epoch": 1041.7763157894738, "grad_norm": 1.2368879318237305, "learning_rate": 0.0001, "loss": 0.0091, "step": 158350 }, { "epoch": 1041.842105263158, "grad_norm": 1.1877309083938599, "learning_rate": 0.0001, "loss": 0.0092, "step": 158360 }, { "epoch": 1041.907894736842, "grad_norm": 1.265149712562561, "learning_rate": 0.0001, "loss": 0.0101, "step": 158370 }, { "epoch": 1041.9736842105262, "grad_norm": 1.0216879844665527, "learning_rate": 0.0001, "loss": 0.009, "step": 158380 }, { "epoch": 1042.0394736842106, "grad_norm": 0.9168482422828674, "learning_rate": 0.0001, "loss": 0.0105, "step": 158390 }, { "epoch": 1042.1052631578948, "grad_norm": 1.1165847778320312, "learning_rate": 0.0001, "loss": 0.0083, "step": 158400 }, { "epoch": 1042.171052631579, "grad_norm": 1.5731300115585327, "learning_rate": 0.0001, "loss": 0.009, "step": 158410 }, { "epoch": 1042.2368421052631, "grad_norm": 1.1611987352371216, "learning_rate": 0.0001, "loss": 0.0111, "step": 158420 }, { "epoch": 1042.3026315789473, "grad_norm": 1.191115140914917, "learning_rate": 0.0001, "loss": 0.0079, "step": 158430 }, { "epoch": 1042.3684210526317, "grad_norm": 0.9672328233718872, "learning_rate": 0.0001, "loss": 0.0108, "step": 158440 }, { "epoch": 1042.4342105263158, "grad_norm": 1.1341195106506348, "learning_rate": 0.0001, "loss": 0.0086, "step": 158450 }, { "epoch": 1042.5, "grad_norm": 1.0045264959335327, "learning_rate": 0.0001, "loss": 0.0088, "step": 158460 }, { "epoch": 1042.5657894736842, "grad_norm": 1.13510000705719, "learning_rate": 0.0001, "loss": 0.0086, "step": 158470 }, { "epoch": 1042.6315789473683, "grad_norm": 0.9069583415985107, "learning_rate": 0.0001, "loss": 0.0086, "step": 158480 }, { "epoch": 1042.6973684210527, "grad_norm": 1.0426357984542847, "learning_rate": 0.0001, "loss": 0.0096, "step": 158490 }, { "epoch": 1042.7631578947369, "grad_norm": 1.1042143106460571, "learning_rate": 0.0001, "loss": 0.009, "step": 158500 }, { "epoch": 1042.828947368421, "grad_norm": 1.1533740758895874, "learning_rate": 0.0001, "loss": 0.0098, "step": 158510 }, { "epoch": 1042.8947368421052, "grad_norm": 1.0955190658569336, "learning_rate": 0.0001, "loss": 0.0092, "step": 158520 }, { "epoch": 1042.9605263157894, "grad_norm": 1.552657961845398, "learning_rate": 0.0001, "loss": 0.01, "step": 158530 }, { "epoch": 1043.0263157894738, "grad_norm": 0.9286198019981384, "learning_rate": 0.0001, "loss": 0.0099, "step": 158540 }, { "epoch": 1043.092105263158, "grad_norm": 0.8410740494728088, "learning_rate": 0.0001, "loss": 0.0076, "step": 158550 }, { "epoch": 1043.157894736842, "grad_norm": 1.1539233922958374, "learning_rate": 0.0001, "loss": 0.0114, "step": 158560 }, { "epoch": 1043.2236842105262, "grad_norm": 1.0042409896850586, "learning_rate": 0.0001, "loss": 0.0083, "step": 158570 }, { "epoch": 1043.2894736842106, "grad_norm": 1.2449952363967896, "learning_rate": 0.0001, "loss": 0.0078, "step": 158580 }, { "epoch": 1043.3552631578948, "grad_norm": 1.4503757953643799, "learning_rate": 0.0001, "loss": 0.0089, "step": 158590 }, { "epoch": 1043.421052631579, "grad_norm": 1.995700478553772, "learning_rate": 0.0001, "loss": 0.0084, "step": 158600 }, { "epoch": 1043.4868421052631, "grad_norm": 1.310492992401123, "learning_rate": 0.0001, "loss": 0.0097, "step": 158610 }, { "epoch": 1043.5526315789473, "grad_norm": 1.1206649541854858, "learning_rate": 0.0001, "loss": 0.009, "step": 158620 }, { "epoch": 1043.6184210526317, "grad_norm": 1.135241985321045, "learning_rate": 0.0001, "loss": 0.0078, "step": 158630 }, { "epoch": 1043.6842105263158, "grad_norm": 1.2304860353469849, "learning_rate": 0.0001, "loss": 0.0095, "step": 158640 }, { "epoch": 1043.75, "grad_norm": 1.1069141626358032, "learning_rate": 0.0001, "loss": 0.0082, "step": 158650 }, { "epoch": 1043.8157894736842, "grad_norm": 1.013854742050171, "learning_rate": 0.0001, "loss": 0.0102, "step": 158660 }, { "epoch": 1043.8815789473683, "grad_norm": 0.8795697093009949, "learning_rate": 0.0001, "loss": 0.0097, "step": 158670 }, { "epoch": 1043.9473684210527, "grad_norm": 1.4216660261154175, "learning_rate": 0.0001, "loss": 0.0098, "step": 158680 }, { "epoch": 1044.0131578947369, "grad_norm": 1.174999475479126, "learning_rate": 0.0001, "loss": 0.0092, "step": 158690 }, { "epoch": 1044.078947368421, "grad_norm": 1.3855254650115967, "learning_rate": 0.0001, "loss": 0.0107, "step": 158700 }, { "epoch": 1044.1447368421052, "grad_norm": 0.9125514626502991, "learning_rate": 0.0001, "loss": 0.0089, "step": 158710 }, { "epoch": 1044.2105263157894, "grad_norm": 0.9567851424217224, "learning_rate": 0.0001, "loss": 0.0084, "step": 158720 }, { "epoch": 1044.2763157894738, "grad_norm": 0.9960494041442871, "learning_rate": 0.0001, "loss": 0.0082, "step": 158730 }, { "epoch": 1044.342105263158, "grad_norm": 0.8722960948944092, "learning_rate": 0.0001, "loss": 0.0092, "step": 158740 }, { "epoch": 1044.407894736842, "grad_norm": 0.9810813665390015, "learning_rate": 0.0001, "loss": 0.0087, "step": 158750 }, { "epoch": 1044.4736842105262, "grad_norm": 1.2386258840560913, "learning_rate": 0.0001, "loss": 0.0103, "step": 158760 }, { "epoch": 1044.5394736842106, "grad_norm": 1.000181794166565, "learning_rate": 0.0001, "loss": 0.0103, "step": 158770 }, { "epoch": 1044.6052631578948, "grad_norm": 0.927920937538147, "learning_rate": 0.0001, "loss": 0.0073, "step": 158780 }, { "epoch": 1044.671052631579, "grad_norm": 0.8654837608337402, "learning_rate": 0.0001, "loss": 0.0079, "step": 158790 }, { "epoch": 1044.7368421052631, "grad_norm": 0.9293181896209717, "learning_rate": 0.0001, "loss": 0.0097, "step": 158800 }, { "epoch": 1044.8026315789473, "grad_norm": 1.0467267036437988, "learning_rate": 0.0001, "loss": 0.0087, "step": 158810 }, { "epoch": 1044.8684210526317, "grad_norm": 0.9053267240524292, "learning_rate": 0.0001, "loss": 0.0075, "step": 158820 }, { "epoch": 1044.9342105263158, "grad_norm": 1.1089938879013062, "learning_rate": 0.0001, "loss": 0.0096, "step": 158830 }, { "epoch": 1045.0, "grad_norm": 1.0741143226623535, "learning_rate": 0.0001, "loss": 0.0111, "step": 158840 }, { "epoch": 1045.0657894736842, "grad_norm": 0.8008183836936951, "learning_rate": 0.0001, "loss": 0.0092, "step": 158850 }, { "epoch": 1045.1315789473683, "grad_norm": 0.9834315776824951, "learning_rate": 0.0001, "loss": 0.0077, "step": 158860 }, { "epoch": 1045.1973684210527, "grad_norm": 0.8978926539421082, "learning_rate": 0.0001, "loss": 0.0093, "step": 158870 }, { "epoch": 1045.2631578947369, "grad_norm": 0.7383962273597717, "learning_rate": 0.0001, "loss": 0.0108, "step": 158880 }, { "epoch": 1045.328947368421, "grad_norm": 0.7250087261199951, "learning_rate": 0.0001, "loss": 0.0087, "step": 158890 }, { "epoch": 1045.3947368421052, "grad_norm": 1.158569097518921, "learning_rate": 0.0001, "loss": 0.0108, "step": 158900 }, { "epoch": 1045.4605263157894, "grad_norm": 1.0793335437774658, "learning_rate": 0.0001, "loss": 0.0092, "step": 158910 }, { "epoch": 1045.5263157894738, "grad_norm": 1.2114217281341553, "learning_rate": 0.0001, "loss": 0.0093, "step": 158920 }, { "epoch": 1045.592105263158, "grad_norm": 1.464314579963684, "learning_rate": 0.0001, "loss": 0.009, "step": 158930 }, { "epoch": 1045.657894736842, "grad_norm": 1.0885869264602661, "learning_rate": 0.0001, "loss": 0.0094, "step": 158940 }, { "epoch": 1045.7236842105262, "grad_norm": 1.1773313283920288, "learning_rate": 0.0001, "loss": 0.0095, "step": 158950 }, { "epoch": 1045.7894736842106, "grad_norm": 1.345479965209961, "learning_rate": 0.0001, "loss": 0.0103, "step": 158960 }, { "epoch": 1045.8552631578948, "grad_norm": 1.1355724334716797, "learning_rate": 0.0001, "loss": 0.0093, "step": 158970 }, { "epoch": 1045.921052631579, "grad_norm": 1.21616792678833, "learning_rate": 0.0001, "loss": 0.0094, "step": 158980 }, { "epoch": 1045.9868421052631, "grad_norm": 0.895064651966095, "learning_rate": 0.0001, "loss": 0.0082, "step": 158990 }, { "epoch": 1046.0526315789473, "grad_norm": 1.1562937498092651, "learning_rate": 0.0001, "loss": 0.0083, "step": 159000 }, { "epoch": 1046.1184210526317, "grad_norm": 1.1668152809143066, "learning_rate": 0.0001, "loss": 0.0105, "step": 159010 }, { "epoch": 1046.1842105263158, "grad_norm": 1.03926682472229, "learning_rate": 0.0001, "loss": 0.0116, "step": 159020 }, { "epoch": 1046.25, "grad_norm": 1.0261049270629883, "learning_rate": 0.0001, "loss": 0.0073, "step": 159030 }, { "epoch": 1046.3157894736842, "grad_norm": 1.0766314268112183, "learning_rate": 0.0001, "loss": 0.0079, "step": 159040 }, { "epoch": 1046.3815789473683, "grad_norm": 0.6256817579269409, "learning_rate": 0.0001, "loss": 0.0106, "step": 159050 }, { "epoch": 1046.4473684210527, "grad_norm": 0.874019980430603, "learning_rate": 0.0001, "loss": 0.0071, "step": 159060 }, { "epoch": 1046.5131578947369, "grad_norm": 1.0828053951263428, "learning_rate": 0.0001, "loss": 0.0078, "step": 159070 }, { "epoch": 1046.578947368421, "grad_norm": 1.11966872215271, "learning_rate": 0.0001, "loss": 0.0104, "step": 159080 }, { "epoch": 1046.6447368421052, "grad_norm": 0.9557555913925171, "learning_rate": 0.0001, "loss": 0.0098, "step": 159090 }, { "epoch": 1046.7105263157894, "grad_norm": 1.2064489126205444, "learning_rate": 0.0001, "loss": 0.0108, "step": 159100 }, { "epoch": 1046.7763157894738, "grad_norm": 1.3529369831085205, "learning_rate": 0.0001, "loss": 0.0102, "step": 159110 }, { "epoch": 1046.842105263158, "grad_norm": 0.934829592704773, "learning_rate": 0.0001, "loss": 0.0091, "step": 159120 }, { "epoch": 1046.907894736842, "grad_norm": 1.274330735206604, "learning_rate": 0.0001, "loss": 0.0081, "step": 159130 }, { "epoch": 1046.9736842105262, "grad_norm": 1.0497575998306274, "learning_rate": 0.0001, "loss": 0.0087, "step": 159140 }, { "epoch": 1047.0394736842106, "grad_norm": 1.1160510778427124, "learning_rate": 0.0001, "loss": 0.0073, "step": 159150 }, { "epoch": 1047.1052631578948, "grad_norm": 1.325922966003418, "learning_rate": 0.0001, "loss": 0.0092, "step": 159160 }, { "epoch": 1047.171052631579, "grad_norm": 1.4824130535125732, "learning_rate": 0.0001, "loss": 0.0109, "step": 159170 }, { "epoch": 1047.2368421052631, "grad_norm": 1.2637231349945068, "learning_rate": 0.0001, "loss": 0.0082, "step": 159180 }, { "epoch": 1047.3026315789473, "grad_norm": 1.0419176816940308, "learning_rate": 0.0001, "loss": 0.0102, "step": 159190 }, { "epoch": 1047.3684210526317, "grad_norm": 1.0075671672821045, "learning_rate": 0.0001, "loss": 0.0077, "step": 159200 }, { "epoch": 1047.4342105263158, "grad_norm": 0.9810393452644348, "learning_rate": 0.0001, "loss": 0.0081, "step": 159210 }, { "epoch": 1047.5, "grad_norm": 0.9505169987678528, "learning_rate": 0.0001, "loss": 0.011, "step": 159220 }, { "epoch": 1047.5657894736842, "grad_norm": 0.9999101758003235, "learning_rate": 0.0001, "loss": 0.0073, "step": 159230 }, { "epoch": 1047.6315789473683, "grad_norm": 0.7792953848838806, "learning_rate": 0.0001, "loss": 0.0094, "step": 159240 }, { "epoch": 1047.6973684210527, "grad_norm": 1.1231037378311157, "learning_rate": 0.0001, "loss": 0.0085, "step": 159250 }, { "epoch": 1047.7631578947369, "grad_norm": 1.1759856939315796, "learning_rate": 0.0001, "loss": 0.0085, "step": 159260 }, { "epoch": 1047.828947368421, "grad_norm": 0.9894112348556519, "learning_rate": 0.0001, "loss": 0.0115, "step": 159270 }, { "epoch": 1047.8947368421052, "grad_norm": 0.9776474833488464, "learning_rate": 0.0001, "loss": 0.01, "step": 159280 }, { "epoch": 1047.9605263157894, "grad_norm": 1.2519046068191528, "learning_rate": 0.0001, "loss": 0.0091, "step": 159290 }, { "epoch": 1048.0263157894738, "grad_norm": 1.459942102432251, "learning_rate": 0.0001, "loss": 0.01, "step": 159300 }, { "epoch": 1048.092105263158, "grad_norm": 1.0710073709487915, "learning_rate": 0.0001, "loss": 0.0103, "step": 159310 }, { "epoch": 1048.157894736842, "grad_norm": 0.962834358215332, "learning_rate": 0.0001, "loss": 0.0088, "step": 159320 }, { "epoch": 1048.2236842105262, "grad_norm": 0.8130425810813904, "learning_rate": 0.0001, "loss": 0.0104, "step": 159330 }, { "epoch": 1048.2894736842106, "grad_norm": 0.9603180289268494, "learning_rate": 0.0001, "loss": 0.0084, "step": 159340 }, { "epoch": 1048.3552631578948, "grad_norm": 0.8210400342941284, "learning_rate": 0.0001, "loss": 0.0091, "step": 159350 }, { "epoch": 1048.421052631579, "grad_norm": 1.1121681928634644, "learning_rate": 0.0001, "loss": 0.0091, "step": 159360 }, { "epoch": 1048.4868421052631, "grad_norm": 1.0714201927185059, "learning_rate": 0.0001, "loss": 0.0094, "step": 159370 }, { "epoch": 1048.5526315789473, "grad_norm": 0.9843019247055054, "learning_rate": 0.0001, "loss": 0.0093, "step": 159380 }, { "epoch": 1048.6184210526317, "grad_norm": 1.1511754989624023, "learning_rate": 0.0001, "loss": 0.0099, "step": 159390 }, { "epoch": 1048.6842105263158, "grad_norm": 0.6616480946540833, "learning_rate": 0.0001, "loss": 0.0098, "step": 159400 }, { "epoch": 1048.75, "grad_norm": 1.1609398126602173, "learning_rate": 0.0001, "loss": 0.0106, "step": 159410 }, { "epoch": 1048.8157894736842, "grad_norm": 0.9391231536865234, "learning_rate": 0.0001, "loss": 0.0082, "step": 159420 }, { "epoch": 1048.8815789473683, "grad_norm": 0.9525076150894165, "learning_rate": 0.0001, "loss": 0.0081, "step": 159430 }, { "epoch": 1048.9473684210527, "grad_norm": 0.9699547290802002, "learning_rate": 0.0001, "loss": 0.0088, "step": 159440 }, { "epoch": 1049.0131578947369, "grad_norm": 0.7067571878433228, "learning_rate": 0.0001, "loss": 0.0102, "step": 159450 }, { "epoch": 1049.078947368421, "grad_norm": 0.7832255959510803, "learning_rate": 0.0001, "loss": 0.0096, "step": 159460 }, { "epoch": 1049.1447368421052, "grad_norm": 0.945345938205719, "learning_rate": 0.0001, "loss": 0.0098, "step": 159470 }, { "epoch": 1049.2105263157894, "grad_norm": 1.0841176509857178, "learning_rate": 0.0001, "loss": 0.0087, "step": 159480 }, { "epoch": 1049.2763157894738, "grad_norm": 1.3195167779922485, "learning_rate": 0.0001, "loss": 0.0093, "step": 159490 }, { "epoch": 1049.342105263158, "grad_norm": 1.5180623531341553, "learning_rate": 0.0001, "loss": 0.0099, "step": 159500 }, { "epoch": 1049.407894736842, "grad_norm": 0.7986946105957031, "learning_rate": 0.0001, "loss": 0.0106, "step": 159510 }, { "epoch": 1049.4736842105262, "grad_norm": 1.2574127912521362, "learning_rate": 0.0001, "loss": 0.0089, "step": 159520 }, { "epoch": 1049.5394736842106, "grad_norm": 1.320098876953125, "learning_rate": 0.0001, "loss": 0.009, "step": 159530 }, { "epoch": 1049.6052631578948, "grad_norm": 0.890644371509552, "learning_rate": 0.0001, "loss": 0.0084, "step": 159540 }, { "epoch": 1049.671052631579, "grad_norm": 1.4320951700210571, "learning_rate": 0.0001, "loss": 0.0093, "step": 159550 }, { "epoch": 1049.7368421052631, "grad_norm": 1.1257306337356567, "learning_rate": 0.0001, "loss": 0.0089, "step": 159560 }, { "epoch": 1049.8026315789473, "grad_norm": 1.2194310426712036, "learning_rate": 0.0001, "loss": 0.0083, "step": 159570 }, { "epoch": 1049.8684210526317, "grad_norm": 0.7975109219551086, "learning_rate": 0.0001, "loss": 0.0087, "step": 159580 }, { "epoch": 1049.9342105263158, "grad_norm": 1.1322660446166992, "learning_rate": 0.0001, "loss": 0.0104, "step": 159590 }, { "epoch": 1050.0, "grad_norm": 1.1089693307876587, "learning_rate": 0.0001, "loss": 0.0076, "step": 159600 }, { "epoch": 1050.0657894736842, "grad_norm": 1.135398507118225, "learning_rate": 0.0001, "loss": 0.0087, "step": 159610 }, { "epoch": 1050.1315789473683, "grad_norm": 1.2235279083251953, "learning_rate": 0.0001, "loss": 0.0087, "step": 159620 }, { "epoch": 1050.1973684210527, "grad_norm": 1.2901211977005005, "learning_rate": 0.0001, "loss": 0.0081, "step": 159630 }, { "epoch": 1050.2631578947369, "grad_norm": 1.0014219284057617, "learning_rate": 0.0001, "loss": 0.0089, "step": 159640 }, { "epoch": 1050.328947368421, "grad_norm": 1.5262455940246582, "learning_rate": 0.0001, "loss": 0.0075, "step": 159650 }, { "epoch": 1050.3947368421052, "grad_norm": 0.9812429547309875, "learning_rate": 0.0001, "loss": 0.0097, "step": 159660 }, { "epoch": 1050.4605263157894, "grad_norm": 0.8760225176811218, "learning_rate": 0.0001, "loss": 0.0109, "step": 159670 }, { "epoch": 1050.5263157894738, "grad_norm": 0.8203020691871643, "learning_rate": 0.0001, "loss": 0.0092, "step": 159680 }, { "epoch": 1050.592105263158, "grad_norm": 1.3625808954238892, "learning_rate": 0.0001, "loss": 0.0095, "step": 159690 }, { "epoch": 1050.657894736842, "grad_norm": 0.927275538444519, "learning_rate": 0.0001, "loss": 0.0085, "step": 159700 }, { "epoch": 1050.7236842105262, "grad_norm": 1.043666124343872, "learning_rate": 0.0001, "loss": 0.0093, "step": 159710 }, { "epoch": 1050.7894736842106, "grad_norm": 0.8062131404876709, "learning_rate": 0.0001, "loss": 0.0112, "step": 159720 }, { "epoch": 1050.8552631578948, "grad_norm": 0.9691109657287598, "learning_rate": 0.0001, "loss": 0.0087, "step": 159730 }, { "epoch": 1050.921052631579, "grad_norm": 1.3453434705734253, "learning_rate": 0.0001, "loss": 0.0099, "step": 159740 }, { "epoch": 1050.9868421052631, "grad_norm": 1.1522691249847412, "learning_rate": 0.0001, "loss": 0.0098, "step": 159750 }, { "epoch": 1051.0526315789473, "grad_norm": 1.2776633501052856, "learning_rate": 0.0001, "loss": 0.0081, "step": 159760 }, { "epoch": 1051.1184210526317, "grad_norm": 1.5653990507125854, "learning_rate": 0.0001, "loss": 0.0095, "step": 159770 }, { "epoch": 1051.1842105263158, "grad_norm": 1.3178924322128296, "learning_rate": 0.0001, "loss": 0.0082, "step": 159780 }, { "epoch": 1051.25, "grad_norm": 1.3187295198440552, "learning_rate": 0.0001, "loss": 0.0091, "step": 159790 }, { "epoch": 1051.3157894736842, "grad_norm": 0.9678728580474854, "learning_rate": 0.0001, "loss": 0.0096, "step": 159800 }, { "epoch": 1051.3815789473683, "grad_norm": 0.9071379899978638, "learning_rate": 0.0001, "loss": 0.0098, "step": 159810 }, { "epoch": 1051.4473684210527, "grad_norm": 1.175668478012085, "learning_rate": 0.0001, "loss": 0.0112, "step": 159820 }, { "epoch": 1051.5131578947369, "grad_norm": 1.0487818717956543, "learning_rate": 0.0001, "loss": 0.0084, "step": 159830 }, { "epoch": 1051.578947368421, "grad_norm": 0.8464121222496033, "learning_rate": 0.0001, "loss": 0.0075, "step": 159840 }, { "epoch": 1051.6447368421052, "grad_norm": 1.1520172357559204, "learning_rate": 0.0001, "loss": 0.0099, "step": 159850 }, { "epoch": 1051.7105263157894, "grad_norm": 0.6816383600234985, "learning_rate": 0.0001, "loss": 0.0075, "step": 159860 }, { "epoch": 1051.7763157894738, "grad_norm": 1.1880346536636353, "learning_rate": 0.0001, "loss": 0.0102, "step": 159870 }, { "epoch": 1051.842105263158, "grad_norm": 1.3333251476287842, "learning_rate": 0.0001, "loss": 0.0099, "step": 159880 }, { "epoch": 1051.907894736842, "grad_norm": 1.1161757707595825, "learning_rate": 0.0001, "loss": 0.0107, "step": 159890 }, { "epoch": 1051.9736842105262, "grad_norm": 1.5317634344100952, "learning_rate": 0.0001, "loss": 0.0108, "step": 159900 }, { "epoch": 1052.0394736842106, "grad_norm": 1.2690035104751587, "learning_rate": 0.0001, "loss": 0.0099, "step": 159910 }, { "epoch": 1052.1052631578948, "grad_norm": 1.0578007698059082, "learning_rate": 0.0001, "loss": 0.0082, "step": 159920 }, { "epoch": 1052.171052631579, "grad_norm": 1.1168135404586792, "learning_rate": 0.0001, "loss": 0.0091, "step": 159930 }, { "epoch": 1052.2368421052631, "grad_norm": 1.1406240463256836, "learning_rate": 0.0001, "loss": 0.0086, "step": 159940 }, { "epoch": 1052.3026315789473, "grad_norm": 0.972987949848175, "learning_rate": 0.0001, "loss": 0.0097, "step": 159950 }, { "epoch": 1052.3684210526317, "grad_norm": 0.7099289894104004, "learning_rate": 0.0001, "loss": 0.0083, "step": 159960 }, { "epoch": 1052.4342105263158, "grad_norm": 0.7676605582237244, "learning_rate": 0.0001, "loss": 0.0092, "step": 159970 }, { "epoch": 1052.5, "grad_norm": 0.9532294273376465, "learning_rate": 0.0001, "loss": 0.0089, "step": 159980 }, { "epoch": 1052.5657894736842, "grad_norm": 0.8012232780456543, "learning_rate": 0.0001, "loss": 0.009, "step": 159990 }, { "epoch": 1052.6315789473683, "grad_norm": 1.0869078636169434, "learning_rate": 0.0001, "loss": 0.0106, "step": 160000 }, { "epoch": 1052.6973684210527, "grad_norm": 0.9576671123504639, "learning_rate": 0.0001, "loss": 0.0087, "step": 160010 }, { "epoch": 1052.7631578947369, "grad_norm": 0.9968484044075012, "learning_rate": 0.0001, "loss": 0.0101, "step": 160020 }, { "epoch": 1052.828947368421, "grad_norm": 1.0012370347976685, "learning_rate": 0.0001, "loss": 0.0093, "step": 160030 }, { "epoch": 1052.8947368421052, "grad_norm": 0.6246411800384521, "learning_rate": 0.0001, "loss": 0.0121, "step": 160040 }, { "epoch": 1052.9605263157894, "grad_norm": 0.8564648032188416, "learning_rate": 0.0001, "loss": 0.0087, "step": 160050 }, { "epoch": 1053.0263157894738, "grad_norm": 1.1350836753845215, "learning_rate": 0.0001, "loss": 0.0098, "step": 160060 }, { "epoch": 1053.092105263158, "grad_norm": 0.8760911822319031, "learning_rate": 0.0001, "loss": 0.0099, "step": 160070 }, { "epoch": 1053.157894736842, "grad_norm": 1.1660544872283936, "learning_rate": 0.0001, "loss": 0.009, "step": 160080 }, { "epoch": 1053.2236842105262, "grad_norm": 0.8360348343849182, "learning_rate": 0.0001, "loss": 0.0092, "step": 160090 }, { "epoch": 1053.2894736842106, "grad_norm": 0.754542887210846, "learning_rate": 0.0001, "loss": 0.009, "step": 160100 }, { "epoch": 1053.3552631578948, "grad_norm": 1.2186027765274048, "learning_rate": 0.0001, "loss": 0.008, "step": 160110 }, { "epoch": 1053.421052631579, "grad_norm": 0.9988258481025696, "learning_rate": 0.0001, "loss": 0.0137, "step": 160120 }, { "epoch": 1053.4868421052631, "grad_norm": 1.007232904434204, "learning_rate": 0.0001, "loss": 0.0082, "step": 160130 }, { "epoch": 1053.5526315789473, "grad_norm": 1.2770189046859741, "learning_rate": 0.0001, "loss": 0.0085, "step": 160140 }, { "epoch": 1053.6184210526317, "grad_norm": 1.163527488708496, "learning_rate": 0.0001, "loss": 0.0093, "step": 160150 }, { "epoch": 1053.6842105263158, "grad_norm": 0.8490967154502869, "learning_rate": 0.0001, "loss": 0.0108, "step": 160160 }, { "epoch": 1053.75, "grad_norm": 1.0161746740341187, "learning_rate": 0.0001, "loss": 0.0091, "step": 160170 }, { "epoch": 1053.8157894736842, "grad_norm": 1.1093944311141968, "learning_rate": 0.0001, "loss": 0.0116, "step": 160180 }, { "epoch": 1053.8815789473683, "grad_norm": 0.9495304822921753, "learning_rate": 0.0001, "loss": 0.0103, "step": 160190 }, { "epoch": 1053.9473684210527, "grad_norm": 0.9384229779243469, "learning_rate": 0.0001, "loss": 0.009, "step": 160200 }, { "epoch": 1054.0131578947369, "grad_norm": 1.0374761819839478, "learning_rate": 0.0001, "loss": 0.0099, "step": 160210 }, { "epoch": 1054.078947368421, "grad_norm": 1.3228940963745117, "learning_rate": 0.0001, "loss": 0.0088, "step": 160220 }, { "epoch": 1054.1447368421052, "grad_norm": 1.1004372835159302, "learning_rate": 0.0001, "loss": 0.0088, "step": 160230 }, { "epoch": 1054.2105263157894, "grad_norm": 1.346169352531433, "learning_rate": 0.0001, "loss": 0.0095, "step": 160240 }, { "epoch": 1054.2763157894738, "grad_norm": 1.0477455854415894, "learning_rate": 0.0001, "loss": 0.0091, "step": 160250 }, { "epoch": 1054.342105263158, "grad_norm": 1.119246244430542, "learning_rate": 0.0001, "loss": 0.012, "step": 160260 }, { "epoch": 1054.407894736842, "grad_norm": 1.2886263132095337, "learning_rate": 0.0001, "loss": 0.0097, "step": 160270 }, { "epoch": 1054.4736842105262, "grad_norm": 0.7763897180557251, "learning_rate": 0.0001, "loss": 0.0102, "step": 160280 }, { "epoch": 1054.5394736842106, "grad_norm": 0.8887063264846802, "learning_rate": 0.0001, "loss": 0.0083, "step": 160290 }, { "epoch": 1054.6052631578948, "grad_norm": 1.2421995401382446, "learning_rate": 0.0001, "loss": 0.0101, "step": 160300 }, { "epoch": 1054.671052631579, "grad_norm": 1.0714391469955444, "learning_rate": 0.0001, "loss": 0.0099, "step": 160310 }, { "epoch": 1054.7368421052631, "grad_norm": 1.002956509590149, "learning_rate": 0.0001, "loss": 0.0089, "step": 160320 }, { "epoch": 1054.8026315789473, "grad_norm": 1.0658334493637085, "learning_rate": 0.0001, "loss": 0.0095, "step": 160330 }, { "epoch": 1054.8684210526317, "grad_norm": 0.7606706619262695, "learning_rate": 0.0001, "loss": 0.0095, "step": 160340 }, { "epoch": 1054.9342105263158, "grad_norm": 0.995364785194397, "learning_rate": 0.0001, "loss": 0.0096, "step": 160350 }, { "epoch": 1055.0, "grad_norm": 0.8916927576065063, "learning_rate": 0.0001, "loss": 0.0102, "step": 160360 }, { "epoch": 1055.0657894736842, "grad_norm": 1.03330659866333, "learning_rate": 0.0001, "loss": 0.0082, "step": 160370 }, { "epoch": 1055.1315789473683, "grad_norm": 0.8269760012626648, "learning_rate": 0.0001, "loss": 0.0093, "step": 160380 }, { "epoch": 1055.1973684210527, "grad_norm": 0.9928635358810425, "learning_rate": 0.0001, "loss": 0.0105, "step": 160390 }, { "epoch": 1055.2631578947369, "grad_norm": 1.2529430389404297, "learning_rate": 0.0001, "loss": 0.0108, "step": 160400 }, { "epoch": 1055.328947368421, "grad_norm": 0.6669118404388428, "learning_rate": 0.0001, "loss": 0.0091, "step": 160410 }, { "epoch": 1055.3947368421052, "grad_norm": 1.3909178972244263, "learning_rate": 0.0001, "loss": 0.0084, "step": 160420 }, { "epoch": 1055.4605263157894, "grad_norm": 1.6014965772628784, "learning_rate": 0.0001, "loss": 0.0104, "step": 160430 }, { "epoch": 1055.5263157894738, "grad_norm": 1.5059226751327515, "learning_rate": 0.0001, "loss": 0.0098, "step": 160440 }, { "epoch": 1055.592105263158, "grad_norm": 0.8995516896247864, "learning_rate": 0.0001, "loss": 0.0088, "step": 160450 }, { "epoch": 1055.657894736842, "grad_norm": 0.8544006943702698, "learning_rate": 0.0001, "loss": 0.0094, "step": 160460 }, { "epoch": 1055.7236842105262, "grad_norm": 1.2715330123901367, "learning_rate": 0.0001, "loss": 0.0095, "step": 160470 }, { "epoch": 1055.7894736842106, "grad_norm": 0.8547405004501343, "learning_rate": 0.0001, "loss": 0.0106, "step": 160480 }, { "epoch": 1055.8552631578948, "grad_norm": 0.8670430183410645, "learning_rate": 0.0001, "loss": 0.0089, "step": 160490 }, { "epoch": 1055.921052631579, "grad_norm": 0.853753387928009, "learning_rate": 0.0001, "loss": 0.0114, "step": 160500 }, { "epoch": 1055.9868421052631, "grad_norm": 0.8636345267295837, "learning_rate": 0.0001, "loss": 0.0105, "step": 160510 }, { "epoch": 1056.0526315789473, "grad_norm": 0.9810386896133423, "learning_rate": 0.0001, "loss": 0.0097, "step": 160520 }, { "epoch": 1056.1184210526317, "grad_norm": 1.062951683998108, "learning_rate": 0.0001, "loss": 0.009, "step": 160530 }, { "epoch": 1056.1842105263158, "grad_norm": 1.1501662731170654, "learning_rate": 0.0001, "loss": 0.0119, "step": 160540 }, { "epoch": 1056.25, "grad_norm": 1.5023175477981567, "learning_rate": 0.0001, "loss": 0.0098, "step": 160550 }, { "epoch": 1056.3157894736842, "grad_norm": 1.048349380493164, "learning_rate": 0.0001, "loss": 0.0107, "step": 160560 }, { "epoch": 1056.3815789473683, "grad_norm": 1.1809251308441162, "learning_rate": 0.0001, "loss": 0.0106, "step": 160570 }, { "epoch": 1056.4473684210527, "grad_norm": 1.038750171661377, "learning_rate": 0.0001, "loss": 0.0092, "step": 160580 }, { "epoch": 1056.5131578947369, "grad_norm": 1.030869483947754, "learning_rate": 0.0001, "loss": 0.0098, "step": 160590 }, { "epoch": 1056.578947368421, "grad_norm": 1.0279252529144287, "learning_rate": 0.0001, "loss": 0.01, "step": 160600 }, { "epoch": 1056.6447368421052, "grad_norm": 0.8072715997695923, "learning_rate": 0.0001, "loss": 0.0094, "step": 160610 }, { "epoch": 1056.7105263157894, "grad_norm": 1.0479940176010132, "learning_rate": 0.0001, "loss": 0.0105, "step": 160620 }, { "epoch": 1056.7763157894738, "grad_norm": 0.9599220156669617, "learning_rate": 0.0001, "loss": 0.0104, "step": 160630 }, { "epoch": 1056.842105263158, "grad_norm": 0.6841326355934143, "learning_rate": 0.0001, "loss": 0.0099, "step": 160640 }, { "epoch": 1056.907894736842, "grad_norm": 1.1303530931472778, "learning_rate": 0.0001, "loss": 0.0079, "step": 160650 }, { "epoch": 1056.9736842105262, "grad_norm": 1.0210925340652466, "learning_rate": 0.0001, "loss": 0.0093, "step": 160660 }, { "epoch": 1057.0394736842106, "grad_norm": 0.7038403153419495, "learning_rate": 0.0001, "loss": 0.0097, "step": 160670 }, { "epoch": 1057.1052631578948, "grad_norm": 1.3282930850982666, "learning_rate": 0.0001, "loss": 0.0108, "step": 160680 }, { "epoch": 1057.171052631579, "grad_norm": 1.007394790649414, "learning_rate": 0.0001, "loss": 0.009, "step": 160690 }, { "epoch": 1057.2368421052631, "grad_norm": 0.7837812304496765, "learning_rate": 0.0001, "loss": 0.0108, "step": 160700 }, { "epoch": 1057.3026315789473, "grad_norm": 0.9380816221237183, "learning_rate": 0.0001, "loss": 0.0113, "step": 160710 }, { "epoch": 1057.3684210526317, "grad_norm": 1.3225820064544678, "learning_rate": 0.0001, "loss": 0.0108, "step": 160720 }, { "epoch": 1057.4342105263158, "grad_norm": 0.6032253503799438, "learning_rate": 0.0001, "loss": 0.0121, "step": 160730 }, { "epoch": 1057.5, "grad_norm": 1.0029069185256958, "learning_rate": 0.0001, "loss": 0.0122, "step": 160740 }, { "epoch": 1057.5657894736842, "grad_norm": 0.8361908197402954, "learning_rate": 0.0001, "loss": 0.0109, "step": 160750 }, { "epoch": 1057.6315789473683, "grad_norm": 1.2243342399597168, "learning_rate": 0.0001, "loss": 0.0115, "step": 160760 }, { "epoch": 1057.6973684210527, "grad_norm": 0.9138869643211365, "learning_rate": 0.0001, "loss": 0.0109, "step": 160770 }, { "epoch": 1057.7631578947369, "grad_norm": 1.0361911058425903, "learning_rate": 0.0001, "loss": 0.0128, "step": 160780 }, { "epoch": 1057.828947368421, "grad_norm": 0.9030035138130188, "learning_rate": 0.0001, "loss": 0.01, "step": 160790 }, { "epoch": 1057.8947368421052, "grad_norm": 0.996807873249054, "learning_rate": 0.0001, "loss": 0.0093, "step": 160800 }, { "epoch": 1057.9605263157894, "grad_norm": 0.8536729216575623, "learning_rate": 0.0001, "loss": 0.0088, "step": 160810 }, { "epoch": 1058.0263157894738, "grad_norm": 1.0060828924179077, "learning_rate": 0.0001, "loss": 0.0115, "step": 160820 }, { "epoch": 1058.092105263158, "grad_norm": 0.9179419875144958, "learning_rate": 0.0001, "loss": 0.0107, "step": 160830 }, { "epoch": 1058.157894736842, "grad_norm": 0.8911098837852478, "learning_rate": 0.0001, "loss": 0.0108, "step": 160840 }, { "epoch": 1058.2236842105262, "grad_norm": 1.0588878393173218, "learning_rate": 0.0001, "loss": 0.0097, "step": 160850 }, { "epoch": 1058.2894736842106, "grad_norm": 0.6525148153305054, "learning_rate": 0.0001, "loss": 0.0104, "step": 160860 }, { "epoch": 1058.3552631578948, "grad_norm": 0.6334079504013062, "learning_rate": 0.0001, "loss": 0.01, "step": 160870 }, { "epoch": 1058.421052631579, "grad_norm": 0.8859434127807617, "learning_rate": 0.0001, "loss": 0.0098, "step": 160880 }, { "epoch": 1058.4868421052631, "grad_norm": 0.7293907999992371, "learning_rate": 0.0001, "loss": 0.0101, "step": 160890 }, { "epoch": 1058.5526315789473, "grad_norm": 0.8205122351646423, "learning_rate": 0.0001, "loss": 0.0099, "step": 160900 }, { "epoch": 1058.6184210526317, "grad_norm": 0.8700542449951172, "learning_rate": 0.0001, "loss": 0.0095, "step": 160910 }, { "epoch": 1058.6842105263158, "grad_norm": 1.1697430610656738, "learning_rate": 0.0001, "loss": 0.0098, "step": 160920 }, { "epoch": 1058.75, "grad_norm": 1.5094969272613525, "learning_rate": 0.0001, "loss": 0.0097, "step": 160930 }, { "epoch": 1058.8157894736842, "grad_norm": 1.085866928100586, "learning_rate": 0.0001, "loss": 0.011, "step": 160940 }, { "epoch": 1058.8815789473683, "grad_norm": 0.935643196105957, "learning_rate": 0.0001, "loss": 0.0125, "step": 160950 }, { "epoch": 1058.9473684210527, "grad_norm": 0.8698868751525879, "learning_rate": 0.0001, "loss": 0.0104, "step": 160960 }, { "epoch": 1059.0131578947369, "grad_norm": 1.2459019422531128, "learning_rate": 0.0001, "loss": 0.0091, "step": 160970 }, { "epoch": 1059.078947368421, "grad_norm": 1.4872862100601196, "learning_rate": 0.0001, "loss": 0.0087, "step": 160980 }, { "epoch": 1059.1447368421052, "grad_norm": 1.7374746799468994, "learning_rate": 0.0001, "loss": 0.0078, "step": 160990 }, { "epoch": 1059.2105263157894, "grad_norm": 1.4759396314620972, "learning_rate": 0.0001, "loss": 0.0103, "step": 161000 }, { "epoch": 1059.2763157894738, "grad_norm": 1.3807487487792969, "learning_rate": 0.0001, "loss": 0.0111, "step": 161010 }, { "epoch": 1059.342105263158, "grad_norm": 1.0829836130142212, "learning_rate": 0.0001, "loss": 0.008, "step": 161020 }, { "epoch": 1059.407894736842, "grad_norm": 1.3213567733764648, "learning_rate": 0.0001, "loss": 0.0128, "step": 161030 }, { "epoch": 1059.4736842105262, "grad_norm": 1.1159719228744507, "learning_rate": 0.0001, "loss": 0.01, "step": 161040 }, { "epoch": 1059.5394736842106, "grad_norm": 1.0388392210006714, "learning_rate": 0.0001, "loss": 0.009, "step": 161050 }, { "epoch": 1059.6052631578948, "grad_norm": 1.4326999187469482, "learning_rate": 0.0001, "loss": 0.0098, "step": 161060 }, { "epoch": 1059.671052631579, "grad_norm": 0.9729658365249634, "learning_rate": 0.0001, "loss": 0.0087, "step": 161070 }, { "epoch": 1059.7368421052631, "grad_norm": 0.9425380229949951, "learning_rate": 0.0001, "loss": 0.0094, "step": 161080 }, { "epoch": 1059.8026315789473, "grad_norm": 0.8282036185264587, "learning_rate": 0.0001, "loss": 0.0092, "step": 161090 }, { "epoch": 1059.8684210526317, "grad_norm": 0.782004714012146, "learning_rate": 0.0001, "loss": 0.0095, "step": 161100 }, { "epoch": 1059.9342105263158, "grad_norm": 0.8745496273040771, "learning_rate": 0.0001, "loss": 0.0095, "step": 161110 }, { "epoch": 1060.0, "grad_norm": 1.0883170366287231, "learning_rate": 0.0001, "loss": 0.011, "step": 161120 }, { "epoch": 1060.0657894736842, "grad_norm": 0.7825089693069458, "learning_rate": 0.0001, "loss": 0.0108, "step": 161130 }, { "epoch": 1060.1315789473683, "grad_norm": 0.7291836738586426, "learning_rate": 0.0001, "loss": 0.0093, "step": 161140 }, { "epoch": 1060.1973684210527, "grad_norm": 0.9234983325004578, "learning_rate": 0.0001, "loss": 0.0102, "step": 161150 }, { "epoch": 1060.2631578947369, "grad_norm": 1.0557537078857422, "learning_rate": 0.0001, "loss": 0.0102, "step": 161160 }, { "epoch": 1060.328947368421, "grad_norm": 1.190638542175293, "learning_rate": 0.0001, "loss": 0.0085, "step": 161170 }, { "epoch": 1060.3947368421052, "grad_norm": 1.2052501440048218, "learning_rate": 0.0001, "loss": 0.0093, "step": 161180 }, { "epoch": 1060.4605263157894, "grad_norm": 0.8857777714729309, "learning_rate": 0.0001, "loss": 0.0072, "step": 161190 }, { "epoch": 1060.5263157894738, "grad_norm": 0.9795964956283569, "learning_rate": 0.0001, "loss": 0.0097, "step": 161200 }, { "epoch": 1060.592105263158, "grad_norm": 1.1422735452651978, "learning_rate": 0.0001, "loss": 0.0088, "step": 161210 }, { "epoch": 1060.657894736842, "grad_norm": 0.8671726584434509, "learning_rate": 0.0001, "loss": 0.0095, "step": 161220 }, { "epoch": 1060.7236842105262, "grad_norm": 1.1022329330444336, "learning_rate": 0.0001, "loss": 0.0097, "step": 161230 }, { "epoch": 1060.7894736842106, "grad_norm": 1.2287969589233398, "learning_rate": 0.0001, "loss": 0.0086, "step": 161240 }, { "epoch": 1060.8552631578948, "grad_norm": 1.1893408298492432, "learning_rate": 0.0001, "loss": 0.0076, "step": 161250 }, { "epoch": 1060.921052631579, "grad_norm": 1.085235595703125, "learning_rate": 0.0001, "loss": 0.0122, "step": 161260 }, { "epoch": 1060.9868421052631, "grad_norm": 1.2641836404800415, "learning_rate": 0.0001, "loss": 0.01, "step": 161270 }, { "epoch": 1061.0526315789473, "grad_norm": 0.9672636985778809, "learning_rate": 0.0001, "loss": 0.0092, "step": 161280 }, { "epoch": 1061.1184210526317, "grad_norm": 0.8965618014335632, "learning_rate": 0.0001, "loss": 0.0102, "step": 161290 }, { "epoch": 1061.1842105263158, "grad_norm": 0.9255189299583435, "learning_rate": 0.0001, "loss": 0.0083, "step": 161300 }, { "epoch": 1061.25, "grad_norm": 0.9481072425842285, "learning_rate": 0.0001, "loss": 0.0118, "step": 161310 }, { "epoch": 1061.3157894736842, "grad_norm": 1.216856837272644, "learning_rate": 0.0001, "loss": 0.0097, "step": 161320 }, { "epoch": 1061.3815789473683, "grad_norm": 1.3531123399734497, "learning_rate": 0.0001, "loss": 0.0082, "step": 161330 }, { "epoch": 1061.4473684210527, "grad_norm": 0.8785354495048523, "learning_rate": 0.0001, "loss": 0.0094, "step": 161340 }, { "epoch": 1061.5131578947369, "grad_norm": 1.1641191244125366, "learning_rate": 0.0001, "loss": 0.0093, "step": 161350 }, { "epoch": 1061.578947368421, "grad_norm": 1.3598567247390747, "learning_rate": 0.0001, "loss": 0.0094, "step": 161360 }, { "epoch": 1061.6447368421052, "grad_norm": 0.9496684670448303, "learning_rate": 0.0001, "loss": 0.0077, "step": 161370 }, { "epoch": 1061.7105263157894, "grad_norm": 1.231241226196289, "learning_rate": 0.0001, "loss": 0.0089, "step": 161380 }, { "epoch": 1061.7763157894738, "grad_norm": 1.052296757698059, "learning_rate": 0.0001, "loss": 0.0081, "step": 161390 }, { "epoch": 1061.842105263158, "grad_norm": 0.9695555567741394, "learning_rate": 0.0001, "loss": 0.0081, "step": 161400 }, { "epoch": 1061.907894736842, "grad_norm": 1.0984798669815063, "learning_rate": 0.0001, "loss": 0.01, "step": 161410 }, { "epoch": 1061.9736842105262, "grad_norm": 0.8881370425224304, "learning_rate": 0.0001, "loss": 0.01, "step": 161420 }, { "epoch": 1062.0394736842106, "grad_norm": 0.9678500294685364, "learning_rate": 0.0001, "loss": 0.0078, "step": 161430 }, { "epoch": 1062.1052631578948, "grad_norm": 1.1650303602218628, "learning_rate": 0.0001, "loss": 0.0097, "step": 161440 }, { "epoch": 1062.171052631579, "grad_norm": 1.0884886980056763, "learning_rate": 0.0001, "loss": 0.0106, "step": 161450 }, { "epoch": 1062.2368421052631, "grad_norm": 0.9503740668296814, "learning_rate": 0.0001, "loss": 0.0091, "step": 161460 }, { "epoch": 1062.3026315789473, "grad_norm": 0.8754977583885193, "learning_rate": 0.0001, "loss": 0.0112, "step": 161470 }, { "epoch": 1062.3684210526317, "grad_norm": 1.0203204154968262, "learning_rate": 0.0001, "loss": 0.0094, "step": 161480 }, { "epoch": 1062.4342105263158, "grad_norm": 0.9040229320526123, "learning_rate": 0.0001, "loss": 0.0093, "step": 161490 }, { "epoch": 1062.5, "grad_norm": 1.0845030546188354, "learning_rate": 0.0001, "loss": 0.009, "step": 161500 }, { "epoch": 1062.5657894736842, "grad_norm": 0.950481116771698, "learning_rate": 0.0001, "loss": 0.0105, "step": 161510 }, { "epoch": 1062.6315789473683, "grad_norm": 1.1162766218185425, "learning_rate": 0.0001, "loss": 0.0083, "step": 161520 }, { "epoch": 1062.6973684210527, "grad_norm": 1.0046415328979492, "learning_rate": 0.0001, "loss": 0.0087, "step": 161530 }, { "epoch": 1062.7631578947369, "grad_norm": 0.8391357064247131, "learning_rate": 0.0001, "loss": 0.0083, "step": 161540 }, { "epoch": 1062.828947368421, "grad_norm": 0.9628133773803711, "learning_rate": 0.0001, "loss": 0.0089, "step": 161550 }, { "epoch": 1062.8947368421052, "grad_norm": 0.9832002520561218, "learning_rate": 0.0001, "loss": 0.0105, "step": 161560 }, { "epoch": 1062.9605263157894, "grad_norm": 1.2245110273361206, "learning_rate": 0.0001, "loss": 0.0077, "step": 161570 }, { "epoch": 1063.0263157894738, "grad_norm": 1.3383867740631104, "learning_rate": 0.0001, "loss": 0.0079, "step": 161580 }, { "epoch": 1063.092105263158, "grad_norm": 1.1894196271896362, "learning_rate": 0.0001, "loss": 0.0096, "step": 161590 }, { "epoch": 1063.157894736842, "grad_norm": 1.3893300294876099, "learning_rate": 0.0001, "loss": 0.0097, "step": 161600 }, { "epoch": 1063.2236842105262, "grad_norm": 0.8097324371337891, "learning_rate": 0.0001, "loss": 0.0098, "step": 161610 }, { "epoch": 1063.2894736842106, "grad_norm": 0.9299864172935486, "learning_rate": 0.0001, "loss": 0.0095, "step": 161620 }, { "epoch": 1063.3552631578948, "grad_norm": 1.031350016593933, "learning_rate": 0.0001, "loss": 0.0096, "step": 161630 }, { "epoch": 1063.421052631579, "grad_norm": 1.079654574394226, "learning_rate": 0.0001, "loss": 0.0096, "step": 161640 }, { "epoch": 1063.4868421052631, "grad_norm": 1.1577768325805664, "learning_rate": 0.0001, "loss": 0.0088, "step": 161650 }, { "epoch": 1063.5526315789473, "grad_norm": 1.0904746055603027, "learning_rate": 0.0001, "loss": 0.0088, "step": 161660 }, { "epoch": 1063.6184210526317, "grad_norm": 1.2395423650741577, "learning_rate": 0.0001, "loss": 0.0091, "step": 161670 }, { "epoch": 1063.6842105263158, "grad_norm": 1.0951871871948242, "learning_rate": 0.0001, "loss": 0.0093, "step": 161680 }, { "epoch": 1063.75, "grad_norm": 1.186826467514038, "learning_rate": 0.0001, "loss": 0.0088, "step": 161690 }, { "epoch": 1063.8157894736842, "grad_norm": 0.7495224475860596, "learning_rate": 0.0001, "loss": 0.008, "step": 161700 }, { "epoch": 1063.8815789473683, "grad_norm": 1.0695480108261108, "learning_rate": 0.0001, "loss": 0.0108, "step": 161710 }, { "epoch": 1063.9473684210527, "grad_norm": 0.743630051612854, "learning_rate": 0.0001, "loss": 0.0101, "step": 161720 }, { "epoch": 1064.0131578947369, "grad_norm": 0.8561877608299255, "learning_rate": 0.0001, "loss": 0.0096, "step": 161730 }, { "epoch": 1064.078947368421, "grad_norm": 0.8216092586517334, "learning_rate": 0.0001, "loss": 0.009, "step": 161740 }, { "epoch": 1064.1447368421052, "grad_norm": 0.8354560136795044, "learning_rate": 0.0001, "loss": 0.0074, "step": 161750 }, { "epoch": 1064.2105263157894, "grad_norm": 1.2202868461608887, "learning_rate": 0.0001, "loss": 0.0108, "step": 161760 }, { "epoch": 1064.2763157894738, "grad_norm": 0.7838713526725769, "learning_rate": 0.0001, "loss": 0.0094, "step": 161770 }, { "epoch": 1064.342105263158, "grad_norm": 1.285569190979004, "learning_rate": 0.0001, "loss": 0.0092, "step": 161780 }, { "epoch": 1064.407894736842, "grad_norm": 0.9280838370323181, "learning_rate": 0.0001, "loss": 0.0081, "step": 161790 }, { "epoch": 1064.4736842105262, "grad_norm": 0.8232817649841309, "learning_rate": 0.0001, "loss": 0.0097, "step": 161800 }, { "epoch": 1064.5394736842106, "grad_norm": 1.015890121459961, "learning_rate": 0.0001, "loss": 0.0108, "step": 161810 }, { "epoch": 1064.6052631578948, "grad_norm": 0.8009679317474365, "learning_rate": 0.0001, "loss": 0.0081, "step": 161820 }, { "epoch": 1064.671052631579, "grad_norm": 0.6812798976898193, "learning_rate": 0.0001, "loss": 0.0102, "step": 161830 }, { "epoch": 1064.7368421052631, "grad_norm": 1.3212003707885742, "learning_rate": 0.0001, "loss": 0.0088, "step": 161840 }, { "epoch": 1064.8026315789473, "grad_norm": 0.8784815669059753, "learning_rate": 0.0001, "loss": 0.0097, "step": 161850 }, { "epoch": 1064.8684210526317, "grad_norm": 1.0952194929122925, "learning_rate": 0.0001, "loss": 0.0107, "step": 161860 }, { "epoch": 1064.9342105263158, "grad_norm": 1.038833498954773, "learning_rate": 0.0001, "loss": 0.0093, "step": 161870 }, { "epoch": 1065.0, "grad_norm": 1.1288541555404663, "learning_rate": 0.0001, "loss": 0.009, "step": 161880 }, { "epoch": 1065.0657894736842, "grad_norm": 1.0623427629470825, "learning_rate": 0.0001, "loss": 0.0106, "step": 161890 }, { "epoch": 1065.1315789473683, "grad_norm": 1.2442352771759033, "learning_rate": 0.0001, "loss": 0.0092, "step": 161900 }, { "epoch": 1065.1973684210527, "grad_norm": 1.1538206338882446, "learning_rate": 0.0001, "loss": 0.0082, "step": 161910 }, { "epoch": 1065.2631578947369, "grad_norm": 1.1074141263961792, "learning_rate": 0.0001, "loss": 0.0112, "step": 161920 }, { "epoch": 1065.328947368421, "grad_norm": 1.0948708057403564, "learning_rate": 0.0001, "loss": 0.0101, "step": 161930 }, { "epoch": 1065.3947368421052, "grad_norm": 0.8019455671310425, "learning_rate": 0.0001, "loss": 0.0113, "step": 161940 }, { "epoch": 1065.4605263157894, "grad_norm": 0.6959496736526489, "learning_rate": 0.0001, "loss": 0.0105, "step": 161950 }, { "epoch": 1065.5263157894738, "grad_norm": 0.9164180755615234, "learning_rate": 0.0001, "loss": 0.0115, "step": 161960 }, { "epoch": 1065.592105263158, "grad_norm": 1.390481948852539, "learning_rate": 0.0001, "loss": 0.0119, "step": 161970 }, { "epoch": 1065.657894736842, "grad_norm": 1.1339566707611084, "learning_rate": 0.0001, "loss": 0.0085, "step": 161980 }, { "epoch": 1065.7236842105262, "grad_norm": 0.8999402523040771, "learning_rate": 0.0001, "loss": 0.0094, "step": 161990 }, { "epoch": 1065.7894736842106, "grad_norm": 1.1724854707717896, "learning_rate": 0.0001, "loss": 0.0102, "step": 162000 }, { "epoch": 1065.8552631578948, "grad_norm": 1.1292765140533447, "learning_rate": 0.0001, "loss": 0.0084, "step": 162010 }, { "epoch": 1065.921052631579, "grad_norm": 1.3481417894363403, "learning_rate": 0.0001, "loss": 0.0094, "step": 162020 }, { "epoch": 1065.9868421052631, "grad_norm": 1.1352733373641968, "learning_rate": 0.0001, "loss": 0.0092, "step": 162030 }, { "epoch": 1066.0526315789473, "grad_norm": 1.0196239948272705, "learning_rate": 0.0001, "loss": 0.0082, "step": 162040 }, { "epoch": 1066.1184210526317, "grad_norm": 1.2338160276412964, "learning_rate": 0.0001, "loss": 0.0087, "step": 162050 }, { "epoch": 1066.1842105263158, "grad_norm": 0.9089396595954895, "learning_rate": 0.0001, "loss": 0.0084, "step": 162060 }, { "epoch": 1066.25, "grad_norm": 0.8388129472732544, "learning_rate": 0.0001, "loss": 0.0102, "step": 162070 }, { "epoch": 1066.3157894736842, "grad_norm": 1.036230206489563, "learning_rate": 0.0001, "loss": 0.0105, "step": 162080 }, { "epoch": 1066.3815789473683, "grad_norm": 0.9070473909378052, "learning_rate": 0.0001, "loss": 0.0111, "step": 162090 }, { "epoch": 1066.4473684210527, "grad_norm": 1.3986698389053345, "learning_rate": 0.0001, "loss": 0.009, "step": 162100 }, { "epoch": 1066.5131578947369, "grad_norm": 1.2524380683898926, "learning_rate": 0.0001, "loss": 0.009, "step": 162110 }, { "epoch": 1066.578947368421, "grad_norm": 1.3897756338119507, "learning_rate": 0.0001, "loss": 0.0088, "step": 162120 }, { "epoch": 1066.6447368421052, "grad_norm": 1.2259730100631714, "learning_rate": 0.0001, "loss": 0.0092, "step": 162130 }, { "epoch": 1066.7105263157894, "grad_norm": 1.0453299283981323, "learning_rate": 0.0001, "loss": 0.0085, "step": 162140 }, { "epoch": 1066.7763157894738, "grad_norm": 1.3548429012298584, "learning_rate": 0.0001, "loss": 0.0098, "step": 162150 }, { "epoch": 1066.842105263158, "grad_norm": 0.7889332175254822, "learning_rate": 0.0001, "loss": 0.0095, "step": 162160 }, { "epoch": 1066.907894736842, "grad_norm": 0.773144006729126, "learning_rate": 0.0001, "loss": 0.0093, "step": 162170 }, { "epoch": 1066.9736842105262, "grad_norm": 0.8035649657249451, "learning_rate": 0.0001, "loss": 0.0097, "step": 162180 }, { "epoch": 1067.0394736842106, "grad_norm": 0.7761321067810059, "learning_rate": 0.0001, "loss": 0.009, "step": 162190 }, { "epoch": 1067.1052631578948, "grad_norm": 0.8864235877990723, "learning_rate": 0.0001, "loss": 0.0083, "step": 162200 }, { "epoch": 1067.171052631579, "grad_norm": 1.0513126850128174, "learning_rate": 0.0001, "loss": 0.0089, "step": 162210 }, { "epoch": 1067.2368421052631, "grad_norm": 0.9010508060455322, "learning_rate": 0.0001, "loss": 0.0097, "step": 162220 }, { "epoch": 1067.3026315789473, "grad_norm": 1.1405491828918457, "learning_rate": 0.0001, "loss": 0.011, "step": 162230 }, { "epoch": 1067.3684210526317, "grad_norm": 0.7161151170730591, "learning_rate": 0.0001, "loss": 0.0096, "step": 162240 }, { "epoch": 1067.4342105263158, "grad_norm": 0.7078927755355835, "learning_rate": 0.0001, "loss": 0.0085, "step": 162250 }, { "epoch": 1067.5, "grad_norm": 0.9383971095085144, "learning_rate": 0.0001, "loss": 0.0087, "step": 162260 }, { "epoch": 1067.5657894736842, "grad_norm": 0.7436659336090088, "learning_rate": 0.0001, "loss": 0.0095, "step": 162270 }, { "epoch": 1067.6315789473683, "grad_norm": 0.8315485715866089, "learning_rate": 0.0001, "loss": 0.0108, "step": 162280 }, { "epoch": 1067.6973684210527, "grad_norm": 1.013370156288147, "learning_rate": 0.0001, "loss": 0.009, "step": 162290 }, { "epoch": 1067.7631578947369, "grad_norm": 1.2187751531600952, "learning_rate": 0.0001, "loss": 0.0097, "step": 162300 }, { "epoch": 1067.828947368421, "grad_norm": 1.2132238149642944, "learning_rate": 0.0001, "loss": 0.0102, "step": 162310 }, { "epoch": 1067.8947368421052, "grad_norm": 1.1550824642181396, "learning_rate": 0.0001, "loss": 0.0087, "step": 162320 }, { "epoch": 1067.9605263157894, "grad_norm": 1.193505883216858, "learning_rate": 0.0001, "loss": 0.0096, "step": 162330 }, { "epoch": 1068.0263157894738, "grad_norm": 0.9410684108734131, "learning_rate": 0.0001, "loss": 0.0084, "step": 162340 }, { "epoch": 1068.092105263158, "grad_norm": 1.1005580425262451, "learning_rate": 0.0001, "loss": 0.0114, "step": 162350 }, { "epoch": 1068.157894736842, "grad_norm": 1.2267907857894897, "learning_rate": 0.0001, "loss": 0.0088, "step": 162360 }, { "epoch": 1068.2236842105262, "grad_norm": 1.1919841766357422, "learning_rate": 0.0001, "loss": 0.0097, "step": 162370 }, { "epoch": 1068.2894736842106, "grad_norm": 1.14139986038208, "learning_rate": 0.0001, "loss": 0.009, "step": 162380 }, { "epoch": 1068.3552631578948, "grad_norm": 0.8994879722595215, "learning_rate": 0.0001, "loss": 0.0089, "step": 162390 }, { "epoch": 1068.421052631579, "grad_norm": 1.3189268112182617, "learning_rate": 0.0001, "loss": 0.0099, "step": 162400 }, { "epoch": 1068.4868421052631, "grad_norm": 1.0653045177459717, "learning_rate": 0.0001, "loss": 0.0098, "step": 162410 }, { "epoch": 1068.5526315789473, "grad_norm": 1.248468041419983, "learning_rate": 0.0001, "loss": 0.0092, "step": 162420 }, { "epoch": 1068.6184210526317, "grad_norm": 1.4759578704833984, "learning_rate": 0.0001, "loss": 0.0097, "step": 162430 }, { "epoch": 1068.6842105263158, "grad_norm": 1.3343039751052856, "learning_rate": 0.0001, "loss": 0.0106, "step": 162440 }, { "epoch": 1068.75, "grad_norm": 0.91098952293396, "learning_rate": 0.0001, "loss": 0.0096, "step": 162450 }, { "epoch": 1068.8157894736842, "grad_norm": 1.612722635269165, "learning_rate": 0.0001, "loss": 0.0089, "step": 162460 }, { "epoch": 1068.8815789473683, "grad_norm": 1.5344117879867554, "learning_rate": 0.0001, "loss": 0.0094, "step": 162470 }, { "epoch": 1068.9473684210527, "grad_norm": 1.2199701070785522, "learning_rate": 0.0001, "loss": 0.0073, "step": 162480 }, { "epoch": 1069.0131578947369, "grad_norm": 1.03998601436615, "learning_rate": 0.0001, "loss": 0.0093, "step": 162490 }, { "epoch": 1069.078947368421, "grad_norm": 1.0132770538330078, "learning_rate": 0.0001, "loss": 0.0091, "step": 162500 }, { "epoch": 1069.1447368421052, "grad_norm": 1.1793491840362549, "learning_rate": 0.0001, "loss": 0.0085, "step": 162510 }, { "epoch": 1069.2105263157894, "grad_norm": 1.0435994863510132, "learning_rate": 0.0001, "loss": 0.0097, "step": 162520 }, { "epoch": 1069.2763157894738, "grad_norm": 0.7933632135391235, "learning_rate": 0.0001, "loss": 0.0114, "step": 162530 }, { "epoch": 1069.342105263158, "grad_norm": 1.094799280166626, "learning_rate": 0.0001, "loss": 0.0086, "step": 162540 }, { "epoch": 1069.407894736842, "grad_norm": 1.0293395519256592, "learning_rate": 0.0001, "loss": 0.0095, "step": 162550 }, { "epoch": 1069.4736842105262, "grad_norm": 0.8514150381088257, "learning_rate": 0.0001, "loss": 0.0101, "step": 162560 }, { "epoch": 1069.5394736842106, "grad_norm": 1.075880765914917, "learning_rate": 0.0001, "loss": 0.0087, "step": 162570 }, { "epoch": 1069.6052631578948, "grad_norm": 0.8023967742919922, "learning_rate": 0.0001, "loss": 0.0097, "step": 162580 }, { "epoch": 1069.671052631579, "grad_norm": 0.9331603050231934, "learning_rate": 0.0001, "loss": 0.009, "step": 162590 }, { "epoch": 1069.7368421052631, "grad_norm": 0.8573759198188782, "learning_rate": 0.0001, "loss": 0.0075, "step": 162600 }, { "epoch": 1069.8026315789473, "grad_norm": 1.105576515197754, "learning_rate": 0.0001, "loss": 0.0098, "step": 162610 }, { "epoch": 1069.8684210526317, "grad_norm": 1.3647798299789429, "learning_rate": 0.0001, "loss": 0.0096, "step": 162620 }, { "epoch": 1069.9342105263158, "grad_norm": 0.9643642902374268, "learning_rate": 0.0001, "loss": 0.0081, "step": 162630 }, { "epoch": 1070.0, "grad_norm": 1.3293102979660034, "learning_rate": 0.0001, "loss": 0.0096, "step": 162640 }, { "epoch": 1070.0657894736842, "grad_norm": 1.421432375907898, "learning_rate": 0.0001, "loss": 0.0086, "step": 162650 }, { "epoch": 1070.1315789473683, "grad_norm": 1.0988101959228516, "learning_rate": 0.0001, "loss": 0.0088, "step": 162660 }, { "epoch": 1070.1973684210527, "grad_norm": 1.185173511505127, "learning_rate": 0.0001, "loss": 0.0088, "step": 162670 }, { "epoch": 1070.2631578947369, "grad_norm": 0.96378093957901, "learning_rate": 0.0001, "loss": 0.0086, "step": 162680 }, { "epoch": 1070.328947368421, "grad_norm": 1.0372625589370728, "learning_rate": 0.0001, "loss": 0.0093, "step": 162690 }, { "epoch": 1070.3947368421052, "grad_norm": 0.8378893136978149, "learning_rate": 0.0001, "loss": 0.0103, "step": 162700 }, { "epoch": 1070.4605263157894, "grad_norm": 1.0191593170166016, "learning_rate": 0.0001, "loss": 0.0071, "step": 162710 }, { "epoch": 1070.5263157894738, "grad_norm": 1.2982977628707886, "learning_rate": 0.0001, "loss": 0.0091, "step": 162720 }, { "epoch": 1070.592105263158, "grad_norm": 1.0704700946807861, "learning_rate": 0.0001, "loss": 0.0096, "step": 162730 }, { "epoch": 1070.657894736842, "grad_norm": 1.0267466306686401, "learning_rate": 0.0001, "loss": 0.0078, "step": 162740 }, { "epoch": 1070.7236842105262, "grad_norm": 1.131195068359375, "learning_rate": 0.0001, "loss": 0.0116, "step": 162750 }, { "epoch": 1070.7894736842106, "grad_norm": 0.9964534044265747, "learning_rate": 0.0001, "loss": 0.0082, "step": 162760 }, { "epoch": 1070.8552631578948, "grad_norm": 1.135125994682312, "learning_rate": 0.0001, "loss": 0.0084, "step": 162770 }, { "epoch": 1070.921052631579, "grad_norm": 0.5625234842300415, "learning_rate": 0.0001, "loss": 0.0109, "step": 162780 }, { "epoch": 1070.9868421052631, "grad_norm": 1.0314369201660156, "learning_rate": 0.0001, "loss": 0.0109, "step": 162790 }, { "epoch": 1071.0526315789473, "grad_norm": 1.1041908264160156, "learning_rate": 0.0001, "loss": 0.0075, "step": 162800 }, { "epoch": 1071.1184210526317, "grad_norm": 1.4432227611541748, "learning_rate": 0.0001, "loss": 0.0103, "step": 162810 }, { "epoch": 1071.1842105263158, "grad_norm": 1.1122026443481445, "learning_rate": 0.0001, "loss": 0.0077, "step": 162820 }, { "epoch": 1071.25, "grad_norm": 0.8845628499984741, "learning_rate": 0.0001, "loss": 0.009, "step": 162830 }, { "epoch": 1071.3157894736842, "grad_norm": 0.9934724569320679, "learning_rate": 0.0001, "loss": 0.0107, "step": 162840 }, { "epoch": 1071.3815789473683, "grad_norm": 1.2053556442260742, "learning_rate": 0.0001, "loss": 0.01, "step": 162850 }, { "epoch": 1071.4473684210527, "grad_norm": 1.0750372409820557, "learning_rate": 0.0001, "loss": 0.0104, "step": 162860 }, { "epoch": 1071.5131578947369, "grad_norm": 1.1821308135986328, "learning_rate": 0.0001, "loss": 0.0096, "step": 162870 }, { "epoch": 1071.578947368421, "grad_norm": 0.8854652047157288, "learning_rate": 0.0001, "loss": 0.0087, "step": 162880 }, { "epoch": 1071.6447368421052, "grad_norm": 0.9452785849571228, "learning_rate": 0.0001, "loss": 0.0097, "step": 162890 }, { "epoch": 1071.7105263157894, "grad_norm": 0.4714410901069641, "learning_rate": 0.0001, "loss": 0.0095, "step": 162900 }, { "epoch": 1071.7763157894738, "grad_norm": 0.9950037598609924, "learning_rate": 0.0001, "loss": 0.0103, "step": 162910 }, { "epoch": 1071.842105263158, "grad_norm": 1.0008114576339722, "learning_rate": 0.0001, "loss": 0.0075, "step": 162920 }, { "epoch": 1071.907894736842, "grad_norm": 0.9882568120956421, "learning_rate": 0.0001, "loss": 0.0086, "step": 162930 }, { "epoch": 1071.9736842105262, "grad_norm": 0.9975512623786926, "learning_rate": 0.0001, "loss": 0.0097, "step": 162940 }, { "epoch": 1072.0394736842106, "grad_norm": 0.9865131974220276, "learning_rate": 0.0001, "loss": 0.0087, "step": 162950 }, { "epoch": 1072.1052631578948, "grad_norm": 0.8073837757110596, "learning_rate": 0.0001, "loss": 0.0092, "step": 162960 }, { "epoch": 1072.171052631579, "grad_norm": 1.311487078666687, "learning_rate": 0.0001, "loss": 0.0085, "step": 162970 }, { "epoch": 1072.2368421052631, "grad_norm": 1.1827518939971924, "learning_rate": 0.0001, "loss": 0.0084, "step": 162980 }, { "epoch": 1072.3026315789473, "grad_norm": 1.183228611946106, "learning_rate": 0.0001, "loss": 0.0093, "step": 162990 }, { "epoch": 1072.3684210526317, "grad_norm": 1.4253993034362793, "learning_rate": 0.0001, "loss": 0.0096, "step": 163000 }, { "epoch": 1072.4342105263158, "grad_norm": 1.0491241216659546, "learning_rate": 0.0001, "loss": 0.0095, "step": 163010 }, { "epoch": 1072.5, "grad_norm": 1.536728858947754, "learning_rate": 0.0001, "loss": 0.009, "step": 163020 }, { "epoch": 1072.5657894736842, "grad_norm": 0.9010704159736633, "learning_rate": 0.0001, "loss": 0.0081, "step": 163030 }, { "epoch": 1072.6315789473683, "grad_norm": 0.9637726545333862, "learning_rate": 0.0001, "loss": 0.011, "step": 163040 }, { "epoch": 1072.6973684210527, "grad_norm": 1.410753846168518, "learning_rate": 0.0001, "loss": 0.0097, "step": 163050 }, { "epoch": 1072.7631578947369, "grad_norm": 1.4014109373092651, "learning_rate": 0.0001, "loss": 0.0085, "step": 163060 }, { "epoch": 1072.828947368421, "grad_norm": 1.3508261442184448, "learning_rate": 0.0001, "loss": 0.0083, "step": 163070 }, { "epoch": 1072.8947368421052, "grad_norm": 1.2983468770980835, "learning_rate": 0.0001, "loss": 0.0099, "step": 163080 }, { "epoch": 1072.9605263157894, "grad_norm": 1.4878889322280884, "learning_rate": 0.0001, "loss": 0.0101, "step": 163090 }, { "epoch": 1073.0263157894738, "grad_norm": 1.155154824256897, "learning_rate": 0.0001, "loss": 0.0082, "step": 163100 }, { "epoch": 1073.092105263158, "grad_norm": 1.2900110483169556, "learning_rate": 0.0001, "loss": 0.0086, "step": 163110 }, { "epoch": 1073.157894736842, "grad_norm": 0.711246907711029, "learning_rate": 0.0001, "loss": 0.011, "step": 163120 }, { "epoch": 1073.2236842105262, "grad_norm": 1.4569929838180542, "learning_rate": 0.0001, "loss": 0.0088, "step": 163130 }, { "epoch": 1073.2894736842106, "grad_norm": 1.12445068359375, "learning_rate": 0.0001, "loss": 0.012, "step": 163140 }, { "epoch": 1073.3552631578948, "grad_norm": 0.9855101704597473, "learning_rate": 0.0001, "loss": 0.0102, "step": 163150 }, { "epoch": 1073.421052631579, "grad_norm": 0.9957090020179749, "learning_rate": 0.0001, "loss": 0.0084, "step": 163160 }, { "epoch": 1073.4868421052631, "grad_norm": 0.8900773525238037, "learning_rate": 0.0001, "loss": 0.0091, "step": 163170 }, { "epoch": 1073.5526315789473, "grad_norm": 0.8382232785224915, "learning_rate": 0.0001, "loss": 0.011, "step": 163180 }, { "epoch": 1073.6184210526317, "grad_norm": 1.113542914390564, "learning_rate": 0.0001, "loss": 0.0098, "step": 163190 }, { "epoch": 1073.6842105263158, "grad_norm": 1.017556071281433, "learning_rate": 0.0001, "loss": 0.0116, "step": 163200 }, { "epoch": 1073.75, "grad_norm": 1.1955180168151855, "learning_rate": 0.0001, "loss": 0.0114, "step": 163210 }, { "epoch": 1073.8157894736842, "grad_norm": 1.223476529121399, "learning_rate": 0.0001, "loss": 0.0107, "step": 163220 }, { "epoch": 1073.8815789473683, "grad_norm": 0.9942741394042969, "learning_rate": 0.0001, "loss": 0.0109, "step": 163230 }, { "epoch": 1073.9473684210527, "grad_norm": 1.0802150964736938, "learning_rate": 0.0001, "loss": 0.0102, "step": 163240 }, { "epoch": 1074.0131578947369, "grad_norm": 0.9715554714202881, "learning_rate": 0.0001, "loss": 0.0098, "step": 163250 }, { "epoch": 1074.078947368421, "grad_norm": 1.1486918926239014, "learning_rate": 0.0001, "loss": 0.0118, "step": 163260 }, { "epoch": 1074.1447368421052, "grad_norm": 1.1075643301010132, "learning_rate": 0.0001, "loss": 0.0097, "step": 163270 }, { "epoch": 1074.2105263157894, "grad_norm": 1.1682664155960083, "learning_rate": 0.0001, "loss": 0.011, "step": 163280 }, { "epoch": 1074.2763157894738, "grad_norm": 1.3140562772750854, "learning_rate": 0.0001, "loss": 0.0109, "step": 163290 }, { "epoch": 1074.342105263158, "grad_norm": 1.2166996002197266, "learning_rate": 0.0001, "loss": 0.0094, "step": 163300 }, { "epoch": 1074.407894736842, "grad_norm": 1.1994695663452148, "learning_rate": 0.0001, "loss": 0.01, "step": 163310 }, { "epoch": 1074.4736842105262, "grad_norm": 0.7714230418205261, "learning_rate": 0.0001, "loss": 0.0109, "step": 163320 }, { "epoch": 1074.5394736842106, "grad_norm": 0.611962080001831, "learning_rate": 0.0001, "loss": 0.0114, "step": 163330 }, { "epoch": 1074.6052631578948, "grad_norm": 0.8996819853782654, "learning_rate": 0.0001, "loss": 0.0092, "step": 163340 }, { "epoch": 1074.671052631579, "grad_norm": 0.9892165064811707, "learning_rate": 0.0001, "loss": 0.0083, "step": 163350 }, { "epoch": 1074.7368421052631, "grad_norm": 1.245927333831787, "learning_rate": 0.0001, "loss": 0.0084, "step": 163360 }, { "epoch": 1074.8026315789473, "grad_norm": 1.5583741664886475, "learning_rate": 0.0001, "loss": 0.0091, "step": 163370 }, { "epoch": 1074.8684210526317, "grad_norm": 1.4896721839904785, "learning_rate": 0.0001, "loss": 0.0103, "step": 163380 }, { "epoch": 1074.9342105263158, "grad_norm": 1.5544887781143188, "learning_rate": 0.0001, "loss": 0.0106, "step": 163390 }, { "epoch": 1075.0, "grad_norm": 1.562259554862976, "learning_rate": 0.0001, "loss": 0.0106, "step": 163400 }, { "epoch": 1075.0657894736842, "grad_norm": 1.1588703393936157, "learning_rate": 0.0001, "loss": 0.0102, "step": 163410 }, { "epoch": 1075.1315789473683, "grad_norm": 1.3577895164489746, "learning_rate": 0.0001, "loss": 0.0097, "step": 163420 }, { "epoch": 1075.1973684210527, "grad_norm": 1.0716686248779297, "learning_rate": 0.0001, "loss": 0.01, "step": 163430 }, { "epoch": 1075.2631578947369, "grad_norm": 1.1159995794296265, "learning_rate": 0.0001, "loss": 0.0096, "step": 163440 }, { "epoch": 1075.328947368421, "grad_norm": 1.1047301292419434, "learning_rate": 0.0001, "loss": 0.0114, "step": 163450 }, { "epoch": 1075.3947368421052, "grad_norm": 1.001532793045044, "learning_rate": 0.0001, "loss": 0.0106, "step": 163460 }, { "epoch": 1075.4605263157894, "grad_norm": 1.4024605751037598, "learning_rate": 0.0001, "loss": 0.0087, "step": 163470 }, { "epoch": 1075.5263157894738, "grad_norm": 1.3084982633590698, "learning_rate": 0.0001, "loss": 0.0085, "step": 163480 }, { "epoch": 1075.592105263158, "grad_norm": 1.0299196243286133, "learning_rate": 0.0001, "loss": 0.0093, "step": 163490 }, { "epoch": 1075.657894736842, "grad_norm": 0.941424548625946, "learning_rate": 0.0001, "loss": 0.0089, "step": 163500 }, { "epoch": 1075.7236842105262, "grad_norm": 1.2184175252914429, "learning_rate": 0.0001, "loss": 0.0089, "step": 163510 }, { "epoch": 1075.7894736842106, "grad_norm": 1.1736630201339722, "learning_rate": 0.0001, "loss": 0.0091, "step": 163520 }, { "epoch": 1075.8552631578948, "grad_norm": 0.897025465965271, "learning_rate": 0.0001, "loss": 0.0103, "step": 163530 }, { "epoch": 1075.921052631579, "grad_norm": 0.8720511198043823, "learning_rate": 0.0001, "loss": 0.0092, "step": 163540 }, { "epoch": 1075.9868421052631, "grad_norm": 1.0315616130828857, "learning_rate": 0.0001, "loss": 0.01, "step": 163550 }, { "epoch": 1076.0526315789473, "grad_norm": 1.194000005722046, "learning_rate": 0.0001, "loss": 0.0077, "step": 163560 }, { "epoch": 1076.1184210526317, "grad_norm": 1.108793020248413, "learning_rate": 0.0001, "loss": 0.0119, "step": 163570 }, { "epoch": 1076.1842105263158, "grad_norm": 1.059388518333435, "learning_rate": 0.0001, "loss": 0.0097, "step": 163580 }, { "epoch": 1076.25, "grad_norm": 1.1141692399978638, "learning_rate": 0.0001, "loss": 0.0084, "step": 163590 }, { "epoch": 1076.3157894736842, "grad_norm": 0.8344478011131287, "learning_rate": 0.0001, "loss": 0.0103, "step": 163600 }, { "epoch": 1076.3815789473683, "grad_norm": 1.0367026329040527, "learning_rate": 0.0001, "loss": 0.0102, "step": 163610 }, { "epoch": 1076.4473684210527, "grad_norm": 1.339621663093567, "learning_rate": 0.0001, "loss": 0.0082, "step": 163620 }, { "epoch": 1076.5131578947369, "grad_norm": 1.3752906322479248, "learning_rate": 0.0001, "loss": 0.0101, "step": 163630 }, { "epoch": 1076.578947368421, "grad_norm": 0.8494938015937805, "learning_rate": 0.0001, "loss": 0.0096, "step": 163640 }, { "epoch": 1076.6447368421052, "grad_norm": 0.6599777936935425, "learning_rate": 0.0001, "loss": 0.0105, "step": 163650 }, { "epoch": 1076.7105263157894, "grad_norm": 1.3162485361099243, "learning_rate": 0.0001, "loss": 0.0102, "step": 163660 }, { "epoch": 1076.7763157894738, "grad_norm": 0.9044468998908997, "learning_rate": 0.0001, "loss": 0.0093, "step": 163670 }, { "epoch": 1076.842105263158, "grad_norm": 0.8075153231620789, "learning_rate": 0.0001, "loss": 0.0093, "step": 163680 }, { "epoch": 1076.907894736842, "grad_norm": 1.406198501586914, "learning_rate": 0.0001, "loss": 0.0103, "step": 163690 }, { "epoch": 1076.9736842105262, "grad_norm": 1.080063819885254, "learning_rate": 0.0001, "loss": 0.0103, "step": 163700 }, { "epoch": 1077.0394736842106, "grad_norm": 1.1820083856582642, "learning_rate": 0.0001, "loss": 0.0089, "step": 163710 }, { "epoch": 1077.1052631578948, "grad_norm": 1.0260475873947144, "learning_rate": 0.0001, "loss": 0.009, "step": 163720 }, { "epoch": 1077.171052631579, "grad_norm": 1.063698410987854, "learning_rate": 0.0001, "loss": 0.0102, "step": 163730 }, { "epoch": 1077.2368421052631, "grad_norm": 0.7983181476593018, "learning_rate": 0.0001, "loss": 0.0113, "step": 163740 }, { "epoch": 1077.3026315789473, "grad_norm": 0.8934413194656372, "learning_rate": 0.0001, "loss": 0.0102, "step": 163750 }, { "epoch": 1077.3684210526317, "grad_norm": 0.701677680015564, "learning_rate": 0.0001, "loss": 0.0087, "step": 163760 }, { "epoch": 1077.4342105263158, "grad_norm": 0.9841487407684326, "learning_rate": 0.0001, "loss": 0.0102, "step": 163770 }, { "epoch": 1077.5, "grad_norm": 0.8910136222839355, "learning_rate": 0.0001, "loss": 0.0091, "step": 163780 }, { "epoch": 1077.5657894736842, "grad_norm": 1.0197700262069702, "learning_rate": 0.0001, "loss": 0.0097, "step": 163790 }, { "epoch": 1077.6315789473683, "grad_norm": 1.3116474151611328, "learning_rate": 0.0001, "loss": 0.0081, "step": 163800 }, { "epoch": 1077.6973684210527, "grad_norm": 0.8362675905227661, "learning_rate": 0.0001, "loss": 0.0109, "step": 163810 }, { "epoch": 1077.7631578947369, "grad_norm": 0.6043428182601929, "learning_rate": 0.0001, "loss": 0.009, "step": 163820 }, { "epoch": 1077.828947368421, "grad_norm": 1.1942695379257202, "learning_rate": 0.0001, "loss": 0.0089, "step": 163830 }, { "epoch": 1077.8947368421052, "grad_norm": 0.9171984195709229, "learning_rate": 0.0001, "loss": 0.0085, "step": 163840 }, { "epoch": 1077.9605263157894, "grad_norm": 1.100325584411621, "learning_rate": 0.0001, "loss": 0.0102, "step": 163850 }, { "epoch": 1078.0263157894738, "grad_norm": 1.150246024131775, "learning_rate": 0.0001, "loss": 0.0082, "step": 163860 }, { "epoch": 1078.092105263158, "grad_norm": 0.9796252250671387, "learning_rate": 0.0001, "loss": 0.0124, "step": 163870 }, { "epoch": 1078.157894736842, "grad_norm": 0.7340827584266663, "learning_rate": 0.0001, "loss": 0.0082, "step": 163880 }, { "epoch": 1078.2236842105262, "grad_norm": 0.9303230047225952, "learning_rate": 0.0001, "loss": 0.0099, "step": 163890 }, { "epoch": 1078.2894736842106, "grad_norm": 0.9361478090286255, "learning_rate": 0.0001, "loss": 0.008, "step": 163900 }, { "epoch": 1078.3552631578948, "grad_norm": 1.0189319849014282, "learning_rate": 0.0001, "loss": 0.0083, "step": 163910 }, { "epoch": 1078.421052631579, "grad_norm": 1.059217929840088, "learning_rate": 0.0001, "loss": 0.0091, "step": 163920 }, { "epoch": 1078.4868421052631, "grad_norm": 1.3725210428237915, "learning_rate": 0.0001, "loss": 0.0104, "step": 163930 }, { "epoch": 1078.5526315789473, "grad_norm": 1.200869083404541, "learning_rate": 0.0001, "loss": 0.0097, "step": 163940 }, { "epoch": 1078.6184210526317, "grad_norm": 1.561780571937561, "learning_rate": 0.0001, "loss": 0.0107, "step": 163950 }, { "epoch": 1078.6842105263158, "grad_norm": 1.174270510673523, "learning_rate": 0.0001, "loss": 0.0118, "step": 163960 }, { "epoch": 1078.75, "grad_norm": 1.1414722204208374, "learning_rate": 0.0001, "loss": 0.0089, "step": 163970 }, { "epoch": 1078.8157894736842, "grad_norm": 1.0247485637664795, "learning_rate": 0.0001, "loss": 0.0073, "step": 163980 }, { "epoch": 1078.8815789473683, "grad_norm": 0.9565194249153137, "learning_rate": 0.0001, "loss": 0.0114, "step": 163990 }, { "epoch": 1078.9473684210527, "grad_norm": 0.9359107613563538, "learning_rate": 0.0001, "loss": 0.0084, "step": 164000 }, { "epoch": 1079.0131578947369, "grad_norm": 0.8053498268127441, "learning_rate": 0.0001, "loss": 0.0072, "step": 164010 }, { "epoch": 1079.078947368421, "grad_norm": 1.111828088760376, "learning_rate": 0.0001, "loss": 0.0087, "step": 164020 }, { "epoch": 1079.1447368421052, "grad_norm": 0.9666885137557983, "learning_rate": 0.0001, "loss": 0.0102, "step": 164030 }, { "epoch": 1079.2105263157894, "grad_norm": 1.089706301689148, "learning_rate": 0.0001, "loss": 0.0098, "step": 164040 }, { "epoch": 1079.2763157894738, "grad_norm": 1.2634003162384033, "learning_rate": 0.0001, "loss": 0.0098, "step": 164050 }, { "epoch": 1079.342105263158, "grad_norm": 1.2256748676300049, "learning_rate": 0.0001, "loss": 0.0095, "step": 164060 }, { "epoch": 1079.407894736842, "grad_norm": 1.0512341260910034, "learning_rate": 0.0001, "loss": 0.0095, "step": 164070 }, { "epoch": 1079.4736842105262, "grad_norm": 1.1967103481292725, "learning_rate": 0.0001, "loss": 0.0103, "step": 164080 }, { "epoch": 1079.5394736842106, "grad_norm": 0.9088690876960754, "learning_rate": 0.0001, "loss": 0.0096, "step": 164090 }, { "epoch": 1079.6052631578948, "grad_norm": 0.8924160599708557, "learning_rate": 0.0001, "loss": 0.0085, "step": 164100 }, { "epoch": 1079.671052631579, "grad_norm": 0.8244693875312805, "learning_rate": 0.0001, "loss": 0.0091, "step": 164110 }, { "epoch": 1079.7368421052631, "grad_norm": 1.0724085569381714, "learning_rate": 0.0001, "loss": 0.0102, "step": 164120 }, { "epoch": 1079.8026315789473, "grad_norm": 0.8203881978988647, "learning_rate": 0.0001, "loss": 0.0104, "step": 164130 }, { "epoch": 1079.8684210526317, "grad_norm": 0.8975365161895752, "learning_rate": 0.0001, "loss": 0.0091, "step": 164140 }, { "epoch": 1079.9342105263158, "grad_norm": 1.132312297821045, "learning_rate": 0.0001, "loss": 0.0088, "step": 164150 }, { "epoch": 1080.0, "grad_norm": 1.0990959405899048, "learning_rate": 0.0001, "loss": 0.0098, "step": 164160 }, { "epoch": 1080.0657894736842, "grad_norm": 0.9807849526405334, "learning_rate": 0.0001, "loss": 0.0085, "step": 164170 }, { "epoch": 1080.1315789473683, "grad_norm": 1.4156996011734009, "learning_rate": 0.0001, "loss": 0.0099, "step": 164180 }, { "epoch": 1080.1973684210527, "grad_norm": 1.217441439628601, "learning_rate": 0.0001, "loss": 0.011, "step": 164190 }, { "epoch": 1080.2631578947369, "grad_norm": 1.1044880151748657, "learning_rate": 0.0001, "loss": 0.0093, "step": 164200 }, { "epoch": 1080.328947368421, "grad_norm": 1.1214570999145508, "learning_rate": 0.0001, "loss": 0.0099, "step": 164210 }, { "epoch": 1080.3947368421052, "grad_norm": 0.7764980792999268, "learning_rate": 0.0001, "loss": 0.0083, "step": 164220 }, { "epoch": 1080.4605263157894, "grad_norm": 1.1083272695541382, "learning_rate": 0.0001, "loss": 0.0095, "step": 164230 }, { "epoch": 1080.5263157894738, "grad_norm": 0.908808171749115, "learning_rate": 0.0001, "loss": 0.0089, "step": 164240 }, { "epoch": 1080.592105263158, "grad_norm": 0.9142073392868042, "learning_rate": 0.0001, "loss": 0.01, "step": 164250 }, { "epoch": 1080.657894736842, "grad_norm": 0.9220647215843201, "learning_rate": 0.0001, "loss": 0.0081, "step": 164260 }, { "epoch": 1080.7236842105262, "grad_norm": 0.9497620463371277, "learning_rate": 0.0001, "loss": 0.0096, "step": 164270 }, { "epoch": 1080.7894736842106, "grad_norm": 0.9928162693977356, "learning_rate": 0.0001, "loss": 0.0104, "step": 164280 }, { "epoch": 1080.8552631578948, "grad_norm": 1.177316427230835, "learning_rate": 0.0001, "loss": 0.0099, "step": 164290 }, { "epoch": 1080.921052631579, "grad_norm": 1.1457185745239258, "learning_rate": 0.0001, "loss": 0.0105, "step": 164300 }, { "epoch": 1080.9868421052631, "grad_norm": 0.5666669011116028, "learning_rate": 0.0001, "loss": 0.0103, "step": 164310 }, { "epoch": 1081.0526315789473, "grad_norm": 0.9142729640007019, "learning_rate": 0.0001, "loss": 0.0076, "step": 164320 }, { "epoch": 1081.1184210526317, "grad_norm": 0.9657456874847412, "learning_rate": 0.0001, "loss": 0.0096, "step": 164330 }, { "epoch": 1081.1842105263158, "grad_norm": 0.9326075315475464, "learning_rate": 0.0001, "loss": 0.0095, "step": 164340 }, { "epoch": 1081.25, "grad_norm": 0.8688045740127563, "learning_rate": 0.0001, "loss": 0.01, "step": 164350 }, { "epoch": 1081.3157894736842, "grad_norm": 1.195733666419983, "learning_rate": 0.0001, "loss": 0.0119, "step": 164360 }, { "epoch": 1081.3815789473683, "grad_norm": 1.1410993337631226, "learning_rate": 0.0001, "loss": 0.0107, "step": 164370 }, { "epoch": 1081.4473684210527, "grad_norm": 0.9133111238479614, "learning_rate": 0.0001, "loss": 0.0082, "step": 164380 }, { "epoch": 1081.5131578947369, "grad_norm": 0.9332903027534485, "learning_rate": 0.0001, "loss": 0.008, "step": 164390 }, { "epoch": 1081.578947368421, "grad_norm": 1.2633785009384155, "learning_rate": 0.0001, "loss": 0.0107, "step": 164400 }, { "epoch": 1081.6447368421052, "grad_norm": 1.1889288425445557, "learning_rate": 0.0001, "loss": 0.0079, "step": 164410 }, { "epoch": 1081.7105263157894, "grad_norm": 0.9122311472892761, "learning_rate": 0.0001, "loss": 0.01, "step": 164420 }, { "epoch": 1081.7763157894738, "grad_norm": 0.9126380085945129, "learning_rate": 0.0001, "loss": 0.0094, "step": 164430 }, { "epoch": 1081.842105263158, "grad_norm": 0.9555377960205078, "learning_rate": 0.0001, "loss": 0.0095, "step": 164440 }, { "epoch": 1081.907894736842, "grad_norm": 1.0733895301818848, "learning_rate": 0.0001, "loss": 0.0093, "step": 164450 }, { "epoch": 1081.9736842105262, "grad_norm": 1.0887807607650757, "learning_rate": 0.0001, "loss": 0.0111, "step": 164460 }, { "epoch": 1082.0394736842106, "grad_norm": 0.8500633835792542, "learning_rate": 0.0001, "loss": 0.0092, "step": 164470 }, { "epoch": 1082.1052631578948, "grad_norm": 0.6595375537872314, "learning_rate": 0.0001, "loss": 0.0089, "step": 164480 }, { "epoch": 1082.171052631579, "grad_norm": 1.3263654708862305, "learning_rate": 0.0001, "loss": 0.0088, "step": 164490 }, { "epoch": 1082.2368421052631, "grad_norm": 0.9079999327659607, "learning_rate": 0.0001, "loss": 0.0086, "step": 164500 }, { "epoch": 1082.3026315789473, "grad_norm": 0.9589836001396179, "learning_rate": 0.0001, "loss": 0.0108, "step": 164510 }, { "epoch": 1082.3684210526317, "grad_norm": 1.1960152387619019, "learning_rate": 0.0001, "loss": 0.0105, "step": 164520 }, { "epoch": 1082.4342105263158, "grad_norm": 0.9973219037055969, "learning_rate": 0.0001, "loss": 0.0106, "step": 164530 }, { "epoch": 1082.5, "grad_norm": 1.1780554056167603, "learning_rate": 0.0001, "loss": 0.0098, "step": 164540 }, { "epoch": 1082.5657894736842, "grad_norm": 1.2360888719558716, "learning_rate": 0.0001, "loss": 0.0091, "step": 164550 }, { "epoch": 1082.6315789473683, "grad_norm": 1.0589853525161743, "learning_rate": 0.0001, "loss": 0.0089, "step": 164560 }, { "epoch": 1082.6973684210527, "grad_norm": 1.2129298448562622, "learning_rate": 0.0001, "loss": 0.0086, "step": 164570 }, { "epoch": 1082.7631578947369, "grad_norm": 0.8589812517166138, "learning_rate": 0.0001, "loss": 0.0095, "step": 164580 }, { "epoch": 1082.828947368421, "grad_norm": 1.226012110710144, "learning_rate": 0.0001, "loss": 0.0086, "step": 164590 }, { "epoch": 1082.8947368421052, "grad_norm": 0.921205461025238, "learning_rate": 0.0001, "loss": 0.0093, "step": 164600 }, { "epoch": 1082.9605263157894, "grad_norm": 0.9451828598976135, "learning_rate": 0.0001, "loss": 0.0078, "step": 164610 }, { "epoch": 1083.0263157894738, "grad_norm": 0.8518921732902527, "learning_rate": 0.0001, "loss": 0.0086, "step": 164620 }, { "epoch": 1083.092105263158, "grad_norm": 0.7957374453544617, "learning_rate": 0.0001, "loss": 0.0086, "step": 164630 }, { "epoch": 1083.157894736842, "grad_norm": 0.6601923108100891, "learning_rate": 0.0001, "loss": 0.009, "step": 164640 }, { "epoch": 1083.2236842105262, "grad_norm": 0.8923590779304504, "learning_rate": 0.0001, "loss": 0.0086, "step": 164650 }, { "epoch": 1083.2894736842106, "grad_norm": 1.051577091217041, "learning_rate": 0.0001, "loss": 0.0101, "step": 164660 }, { "epoch": 1083.3552631578948, "grad_norm": 1.3525593280792236, "learning_rate": 0.0001, "loss": 0.0093, "step": 164670 }, { "epoch": 1083.421052631579, "grad_norm": 1.278694987297058, "learning_rate": 0.0001, "loss": 0.0092, "step": 164680 }, { "epoch": 1083.4868421052631, "grad_norm": 1.5232925415039062, "learning_rate": 0.0001, "loss": 0.0077, "step": 164690 }, { "epoch": 1083.5526315789473, "grad_norm": 1.3283989429473877, "learning_rate": 0.0001, "loss": 0.0116, "step": 164700 }, { "epoch": 1083.6184210526317, "grad_norm": 1.3318917751312256, "learning_rate": 0.0001, "loss": 0.009, "step": 164710 }, { "epoch": 1083.6842105263158, "grad_norm": 1.2124128341674805, "learning_rate": 0.0001, "loss": 0.0093, "step": 164720 }, { "epoch": 1083.75, "grad_norm": 1.336235761642456, "learning_rate": 0.0001, "loss": 0.0106, "step": 164730 }, { "epoch": 1083.8157894736842, "grad_norm": 0.9546015858650208, "learning_rate": 0.0001, "loss": 0.0076, "step": 164740 }, { "epoch": 1083.8815789473683, "grad_norm": 0.8885062336921692, "learning_rate": 0.0001, "loss": 0.0093, "step": 164750 }, { "epoch": 1083.9473684210527, "grad_norm": 1.1417690515518188, "learning_rate": 0.0001, "loss": 0.0086, "step": 164760 }, { "epoch": 1084.0131578947369, "grad_norm": 0.9051141738891602, "learning_rate": 0.0001, "loss": 0.0083, "step": 164770 }, { "epoch": 1084.078947368421, "grad_norm": 1.195184588432312, "learning_rate": 0.0001, "loss": 0.0093, "step": 164780 }, { "epoch": 1084.1447368421052, "grad_norm": 1.2856130599975586, "learning_rate": 0.0001, "loss": 0.009, "step": 164790 }, { "epoch": 1084.2105263157894, "grad_norm": 0.9908321499824524, "learning_rate": 0.0001, "loss": 0.0083, "step": 164800 }, { "epoch": 1084.2763157894738, "grad_norm": 1.1323515176773071, "learning_rate": 0.0001, "loss": 0.0095, "step": 164810 }, { "epoch": 1084.342105263158, "grad_norm": 1.0925931930541992, "learning_rate": 0.0001, "loss": 0.0075, "step": 164820 }, { "epoch": 1084.407894736842, "grad_norm": 0.8730666637420654, "learning_rate": 0.0001, "loss": 0.0119, "step": 164830 }, { "epoch": 1084.4736842105262, "grad_norm": 1.1487864255905151, "learning_rate": 0.0001, "loss": 0.0102, "step": 164840 }, { "epoch": 1084.5394736842106, "grad_norm": 0.6327576041221619, "learning_rate": 0.0001, "loss": 0.0079, "step": 164850 }, { "epoch": 1084.6052631578948, "grad_norm": 1.2673507928848267, "learning_rate": 0.0001, "loss": 0.0089, "step": 164860 }, { "epoch": 1084.671052631579, "grad_norm": 1.0878818035125732, "learning_rate": 0.0001, "loss": 0.008, "step": 164870 }, { "epoch": 1084.7368421052631, "grad_norm": 1.1536747217178345, "learning_rate": 0.0001, "loss": 0.0094, "step": 164880 }, { "epoch": 1084.8026315789473, "grad_norm": 1.274584174156189, "learning_rate": 0.0001, "loss": 0.0096, "step": 164890 }, { "epoch": 1084.8684210526317, "grad_norm": 1.102174162864685, "learning_rate": 0.0001, "loss": 0.0078, "step": 164900 }, { "epoch": 1084.9342105263158, "grad_norm": 0.8976937532424927, "learning_rate": 0.0001, "loss": 0.0105, "step": 164910 }, { "epoch": 1085.0, "grad_norm": 1.2239928245544434, "learning_rate": 0.0001, "loss": 0.0097, "step": 164920 }, { "epoch": 1085.0657894736842, "grad_norm": 0.8770923614501953, "learning_rate": 0.0001, "loss": 0.0084, "step": 164930 }, { "epoch": 1085.1315789473683, "grad_norm": 0.9570636749267578, "learning_rate": 0.0001, "loss": 0.008, "step": 164940 }, { "epoch": 1085.1973684210527, "grad_norm": 0.9466798901557922, "learning_rate": 0.0001, "loss": 0.0092, "step": 164950 }, { "epoch": 1085.2631578947369, "grad_norm": 0.897685170173645, "learning_rate": 0.0001, "loss": 0.0086, "step": 164960 }, { "epoch": 1085.328947368421, "grad_norm": 0.710432767868042, "learning_rate": 0.0001, "loss": 0.0085, "step": 164970 }, { "epoch": 1085.3947368421052, "grad_norm": 0.8062248826026917, "learning_rate": 0.0001, "loss": 0.0111, "step": 164980 }, { "epoch": 1085.4605263157894, "grad_norm": 0.7032548785209656, "learning_rate": 0.0001, "loss": 0.0078, "step": 164990 }, { "epoch": 1085.5263157894738, "grad_norm": 1.0174980163574219, "learning_rate": 0.0001, "loss": 0.0101, "step": 165000 }, { "epoch": 1085.592105263158, "grad_norm": 0.7526353597640991, "learning_rate": 0.0001, "loss": 0.0099, "step": 165010 }, { "epoch": 1085.657894736842, "grad_norm": 0.815987229347229, "learning_rate": 0.0001, "loss": 0.0095, "step": 165020 }, { "epoch": 1085.7236842105262, "grad_norm": 0.6938906908035278, "learning_rate": 0.0001, "loss": 0.0092, "step": 165030 }, { "epoch": 1085.7894736842106, "grad_norm": 1.025226354598999, "learning_rate": 0.0001, "loss": 0.0083, "step": 165040 }, { "epoch": 1085.8552631578948, "grad_norm": 1.1357896327972412, "learning_rate": 0.0001, "loss": 0.0089, "step": 165050 }, { "epoch": 1085.921052631579, "grad_norm": 1.0124887228012085, "learning_rate": 0.0001, "loss": 0.0097, "step": 165060 }, { "epoch": 1085.9868421052631, "grad_norm": 0.9484216570854187, "learning_rate": 0.0001, "loss": 0.0085, "step": 165070 }, { "epoch": 1086.0526315789473, "grad_norm": 1.2238960266113281, "learning_rate": 0.0001, "loss": 0.0098, "step": 165080 }, { "epoch": 1086.1184210526317, "grad_norm": 1.4172849655151367, "learning_rate": 0.0001, "loss": 0.0074, "step": 165090 }, { "epoch": 1086.1842105263158, "grad_norm": 1.292891263961792, "learning_rate": 0.0001, "loss": 0.0087, "step": 165100 }, { "epoch": 1086.25, "grad_norm": 1.20212984085083, "learning_rate": 0.0001, "loss": 0.008, "step": 165110 }, { "epoch": 1086.3157894736842, "grad_norm": 1.3747832775115967, "learning_rate": 0.0001, "loss": 0.0101, "step": 165120 }, { "epoch": 1086.3815789473683, "grad_norm": 1.1830034255981445, "learning_rate": 0.0001, "loss": 0.0088, "step": 165130 }, { "epoch": 1086.4473684210527, "grad_norm": 1.2273931503295898, "learning_rate": 0.0001, "loss": 0.0095, "step": 165140 }, { "epoch": 1086.5131578947369, "grad_norm": 0.9580655097961426, "learning_rate": 0.0001, "loss": 0.0103, "step": 165150 }, { "epoch": 1086.578947368421, "grad_norm": 1.0733976364135742, "learning_rate": 0.0001, "loss": 0.0078, "step": 165160 }, { "epoch": 1086.6447368421052, "grad_norm": 1.256765604019165, "learning_rate": 0.0001, "loss": 0.0115, "step": 165170 }, { "epoch": 1086.7105263157894, "grad_norm": 0.8285326361656189, "learning_rate": 0.0001, "loss": 0.0087, "step": 165180 }, { "epoch": 1086.7763157894738, "grad_norm": 0.8502132296562195, "learning_rate": 0.0001, "loss": 0.0092, "step": 165190 }, { "epoch": 1086.842105263158, "grad_norm": 0.5578171610832214, "learning_rate": 0.0001, "loss": 0.0093, "step": 165200 }, { "epoch": 1086.907894736842, "grad_norm": 0.8829189538955688, "learning_rate": 0.0001, "loss": 0.0093, "step": 165210 }, { "epoch": 1086.9736842105262, "grad_norm": 0.8555329442024231, "learning_rate": 0.0001, "loss": 0.0103, "step": 165220 }, { "epoch": 1087.0394736842106, "grad_norm": 1.1692776679992676, "learning_rate": 0.0001, "loss": 0.0081, "step": 165230 }, { "epoch": 1087.1052631578948, "grad_norm": 0.903022825717926, "learning_rate": 0.0001, "loss": 0.0102, "step": 165240 }, { "epoch": 1087.171052631579, "grad_norm": 0.9635505676269531, "learning_rate": 0.0001, "loss": 0.0076, "step": 165250 }, { "epoch": 1087.2368421052631, "grad_norm": 1.0092943906784058, "learning_rate": 0.0001, "loss": 0.0086, "step": 165260 }, { "epoch": 1087.3026315789473, "grad_norm": 0.9053812026977539, "learning_rate": 0.0001, "loss": 0.0096, "step": 165270 }, { "epoch": 1087.3684210526317, "grad_norm": 1.0822906494140625, "learning_rate": 0.0001, "loss": 0.0091, "step": 165280 }, { "epoch": 1087.4342105263158, "grad_norm": 0.9517032504081726, "learning_rate": 0.0001, "loss": 0.0103, "step": 165290 }, { "epoch": 1087.5, "grad_norm": 0.995897114276886, "learning_rate": 0.0001, "loss": 0.0104, "step": 165300 }, { "epoch": 1087.5657894736842, "grad_norm": 1.121558427810669, "learning_rate": 0.0001, "loss": 0.0087, "step": 165310 }, { "epoch": 1087.6315789473683, "grad_norm": 0.905768632888794, "learning_rate": 0.0001, "loss": 0.0085, "step": 165320 }, { "epoch": 1087.6973684210527, "grad_norm": 0.9619491100311279, "learning_rate": 0.0001, "loss": 0.0109, "step": 165330 }, { "epoch": 1087.7631578947369, "grad_norm": 0.804125964641571, "learning_rate": 0.0001, "loss": 0.0099, "step": 165340 }, { "epoch": 1087.828947368421, "grad_norm": 0.9737436175346375, "learning_rate": 0.0001, "loss": 0.0096, "step": 165350 }, { "epoch": 1087.8947368421052, "grad_norm": 1.1739447116851807, "learning_rate": 0.0001, "loss": 0.0087, "step": 165360 }, { "epoch": 1087.9605263157894, "grad_norm": 1.081647515296936, "learning_rate": 0.0001, "loss": 0.0095, "step": 165370 }, { "epoch": 1088.0263157894738, "grad_norm": 1.38816499710083, "learning_rate": 0.0001, "loss": 0.0089, "step": 165380 }, { "epoch": 1088.092105263158, "grad_norm": 1.0949757099151611, "learning_rate": 0.0001, "loss": 0.0079, "step": 165390 }, { "epoch": 1088.157894736842, "grad_norm": 1.0412479639053345, "learning_rate": 0.0001, "loss": 0.0096, "step": 165400 }, { "epoch": 1088.2236842105262, "grad_norm": 1.2765264511108398, "learning_rate": 0.0001, "loss": 0.007, "step": 165410 }, { "epoch": 1088.2894736842106, "grad_norm": 1.1357839107513428, "learning_rate": 0.0001, "loss": 0.0088, "step": 165420 }, { "epoch": 1088.3552631578948, "grad_norm": 1.2164040803909302, "learning_rate": 0.0001, "loss": 0.0104, "step": 165430 }, { "epoch": 1088.421052631579, "grad_norm": 1.2040680646896362, "learning_rate": 0.0001, "loss": 0.0076, "step": 165440 }, { "epoch": 1088.4868421052631, "grad_norm": 1.342111349105835, "learning_rate": 0.0001, "loss": 0.0117, "step": 165450 }, { "epoch": 1088.5526315789473, "grad_norm": 1.203928828239441, "learning_rate": 0.0001, "loss": 0.0092, "step": 165460 }, { "epoch": 1088.6184210526317, "grad_norm": 1.0916163921356201, "learning_rate": 0.0001, "loss": 0.0117, "step": 165470 }, { "epoch": 1088.6842105263158, "grad_norm": 0.970431387424469, "learning_rate": 0.0001, "loss": 0.0098, "step": 165480 }, { "epoch": 1088.75, "grad_norm": 1.1007429361343384, "learning_rate": 0.0001, "loss": 0.0094, "step": 165490 }, { "epoch": 1088.8157894736842, "grad_norm": 1.2328646183013916, "learning_rate": 0.0001, "loss": 0.0089, "step": 165500 }, { "epoch": 1088.8815789473683, "grad_norm": 1.3475733995437622, "learning_rate": 0.0001, "loss": 0.0081, "step": 165510 }, { "epoch": 1088.9473684210527, "grad_norm": 1.0184584856033325, "learning_rate": 0.0001, "loss": 0.0095, "step": 165520 }, { "epoch": 1089.0131578947369, "grad_norm": 0.9720950722694397, "learning_rate": 0.0001, "loss": 0.011, "step": 165530 }, { "epoch": 1089.078947368421, "grad_norm": 1.327864646911621, "learning_rate": 0.0001, "loss": 0.0081, "step": 165540 }, { "epoch": 1089.1447368421052, "grad_norm": 0.9376356601715088, "learning_rate": 0.0001, "loss": 0.0097, "step": 165550 }, { "epoch": 1089.2105263157894, "grad_norm": 1.0245535373687744, "learning_rate": 0.0001, "loss": 0.0094, "step": 165560 }, { "epoch": 1089.2763157894738, "grad_norm": 1.088114857673645, "learning_rate": 0.0001, "loss": 0.0089, "step": 165570 }, { "epoch": 1089.342105263158, "grad_norm": 0.981323778629303, "learning_rate": 0.0001, "loss": 0.0094, "step": 165580 }, { "epoch": 1089.407894736842, "grad_norm": 1.1533432006835938, "learning_rate": 0.0001, "loss": 0.01, "step": 165590 }, { "epoch": 1089.4736842105262, "grad_norm": 0.8093254566192627, "learning_rate": 0.0001, "loss": 0.009, "step": 165600 }, { "epoch": 1089.5394736842106, "grad_norm": 0.9100672006607056, "learning_rate": 0.0001, "loss": 0.0097, "step": 165610 }, { "epoch": 1089.6052631578948, "grad_norm": 0.8746789693832397, "learning_rate": 0.0001, "loss": 0.008, "step": 165620 }, { "epoch": 1089.671052631579, "grad_norm": 1.4913291931152344, "learning_rate": 0.0001, "loss": 0.0106, "step": 165630 }, { "epoch": 1089.7368421052631, "grad_norm": 1.1756936311721802, "learning_rate": 0.0001, "loss": 0.0095, "step": 165640 }, { "epoch": 1089.8026315789473, "grad_norm": 1.144705891609192, "learning_rate": 0.0001, "loss": 0.0106, "step": 165650 }, { "epoch": 1089.8684210526317, "grad_norm": 1.0096395015716553, "learning_rate": 0.0001, "loss": 0.0095, "step": 165660 }, { "epoch": 1089.9342105263158, "grad_norm": 1.4962252378463745, "learning_rate": 0.0001, "loss": 0.01, "step": 165670 }, { "epoch": 1090.0, "grad_norm": 1.0682179927825928, "learning_rate": 0.0001, "loss": 0.0085, "step": 165680 }, { "epoch": 1090.0657894736842, "grad_norm": 1.1246052980422974, "learning_rate": 0.0001, "loss": 0.0082, "step": 165690 }, { "epoch": 1090.1315789473683, "grad_norm": 1.1388136148452759, "learning_rate": 0.0001, "loss": 0.0092, "step": 165700 }, { "epoch": 1090.1973684210527, "grad_norm": 1.017364740371704, "learning_rate": 0.0001, "loss": 0.01, "step": 165710 }, { "epoch": 1090.2631578947369, "grad_norm": 1.1747958660125732, "learning_rate": 0.0001, "loss": 0.0102, "step": 165720 }, { "epoch": 1090.328947368421, "grad_norm": 1.1064643859863281, "learning_rate": 0.0001, "loss": 0.0081, "step": 165730 }, { "epoch": 1090.3947368421052, "grad_norm": 0.9353105425834656, "learning_rate": 0.0001, "loss": 0.0086, "step": 165740 }, { "epoch": 1090.4605263157894, "grad_norm": 1.1124236583709717, "learning_rate": 0.0001, "loss": 0.0083, "step": 165750 }, { "epoch": 1090.5263157894738, "grad_norm": 1.1237322092056274, "learning_rate": 0.0001, "loss": 0.0095, "step": 165760 }, { "epoch": 1090.592105263158, "grad_norm": 0.8977600932121277, "learning_rate": 0.0001, "loss": 0.008, "step": 165770 }, { "epoch": 1090.657894736842, "grad_norm": 1.2883377075195312, "learning_rate": 0.0001, "loss": 0.0118, "step": 165780 }, { "epoch": 1090.7236842105262, "grad_norm": 1.2734546661376953, "learning_rate": 0.0001, "loss": 0.0096, "step": 165790 }, { "epoch": 1090.7894736842106, "grad_norm": 1.1414625644683838, "learning_rate": 0.0001, "loss": 0.0077, "step": 165800 }, { "epoch": 1090.8552631578948, "grad_norm": 1.0614356994628906, "learning_rate": 0.0001, "loss": 0.0099, "step": 165810 }, { "epoch": 1090.921052631579, "grad_norm": 1.0727033615112305, "learning_rate": 0.0001, "loss": 0.0102, "step": 165820 }, { "epoch": 1090.9868421052631, "grad_norm": 1.1432024240493774, "learning_rate": 0.0001, "loss": 0.0094, "step": 165830 }, { "epoch": 1091.0526315789473, "grad_norm": 1.158418893814087, "learning_rate": 0.0001, "loss": 0.0077, "step": 165840 }, { "epoch": 1091.1184210526317, "grad_norm": 1.033921718597412, "learning_rate": 0.0001, "loss": 0.0086, "step": 165850 }, { "epoch": 1091.1842105263158, "grad_norm": 1.0645828247070312, "learning_rate": 0.0001, "loss": 0.0083, "step": 165860 }, { "epoch": 1091.25, "grad_norm": 0.5794889330863953, "learning_rate": 0.0001, "loss": 0.0119, "step": 165870 }, { "epoch": 1091.3157894736842, "grad_norm": 1.1075804233551025, "learning_rate": 0.0001, "loss": 0.0089, "step": 165880 }, { "epoch": 1091.3815789473683, "grad_norm": 0.9454206228256226, "learning_rate": 0.0001, "loss": 0.0087, "step": 165890 }, { "epoch": 1091.4473684210527, "grad_norm": 1.1715730428695679, "learning_rate": 0.0001, "loss": 0.0106, "step": 165900 }, { "epoch": 1091.5131578947369, "grad_norm": 1.0313235521316528, "learning_rate": 0.0001, "loss": 0.0089, "step": 165910 }, { "epoch": 1091.578947368421, "grad_norm": 1.3936482667922974, "learning_rate": 0.0001, "loss": 0.009, "step": 165920 }, { "epoch": 1091.6447368421052, "grad_norm": 0.8240237832069397, "learning_rate": 0.0001, "loss": 0.0083, "step": 165930 }, { "epoch": 1091.7105263157894, "grad_norm": 1.0866410732269287, "learning_rate": 0.0001, "loss": 0.0089, "step": 165940 }, { "epoch": 1091.7763157894738, "grad_norm": 1.3597463369369507, "learning_rate": 0.0001, "loss": 0.0094, "step": 165950 }, { "epoch": 1091.842105263158, "grad_norm": 1.3477791547775269, "learning_rate": 0.0001, "loss": 0.0099, "step": 165960 }, { "epoch": 1091.907894736842, "grad_norm": 1.2000268697738647, "learning_rate": 0.0001, "loss": 0.0088, "step": 165970 }, { "epoch": 1091.9736842105262, "grad_norm": 0.7252658009529114, "learning_rate": 0.0001, "loss": 0.009, "step": 165980 }, { "epoch": 1092.0394736842106, "grad_norm": 1.189745545387268, "learning_rate": 0.0001, "loss": 0.0082, "step": 165990 }, { "epoch": 1092.1052631578948, "grad_norm": 0.7382572889328003, "learning_rate": 0.0001, "loss": 0.0094, "step": 166000 }, { "epoch": 1092.171052631579, "grad_norm": 1.0772287845611572, "learning_rate": 0.0001, "loss": 0.0105, "step": 166010 }, { "epoch": 1092.2368421052631, "grad_norm": 0.7473660111427307, "learning_rate": 0.0001, "loss": 0.0103, "step": 166020 }, { "epoch": 1092.3026315789473, "grad_norm": 0.8912314176559448, "learning_rate": 0.0001, "loss": 0.0095, "step": 166030 }, { "epoch": 1092.3684210526317, "grad_norm": 1.1493737697601318, "learning_rate": 0.0001, "loss": 0.009, "step": 166040 }, { "epoch": 1092.4342105263158, "grad_norm": 1.3039054870605469, "learning_rate": 0.0001, "loss": 0.0103, "step": 166050 }, { "epoch": 1092.5, "grad_norm": 0.9073991775512695, "learning_rate": 0.0001, "loss": 0.0081, "step": 166060 }, { "epoch": 1092.5657894736842, "grad_norm": 0.8854196667671204, "learning_rate": 0.0001, "loss": 0.0088, "step": 166070 }, { "epoch": 1092.6315789473683, "grad_norm": 1.2598876953125, "learning_rate": 0.0001, "loss": 0.0078, "step": 166080 }, { "epoch": 1092.6973684210527, "grad_norm": 1.2058098316192627, "learning_rate": 0.0001, "loss": 0.0089, "step": 166090 }, { "epoch": 1092.7631578947369, "grad_norm": 1.188262701034546, "learning_rate": 0.0001, "loss": 0.0095, "step": 166100 }, { "epoch": 1092.828947368421, "grad_norm": 1.5130947828292847, "learning_rate": 0.0001, "loss": 0.0104, "step": 166110 }, { "epoch": 1092.8947368421052, "grad_norm": 1.2979497909545898, "learning_rate": 0.0001, "loss": 0.0074, "step": 166120 }, { "epoch": 1092.9605263157894, "grad_norm": 1.0811450481414795, "learning_rate": 0.0001, "loss": 0.0086, "step": 166130 }, { "epoch": 1093.0263157894738, "grad_norm": 1.0868312120437622, "learning_rate": 0.0001, "loss": 0.011, "step": 166140 }, { "epoch": 1093.092105263158, "grad_norm": 0.8259110450744629, "learning_rate": 0.0001, "loss": 0.0086, "step": 166150 }, { "epoch": 1093.157894736842, "grad_norm": 1.2230782508850098, "learning_rate": 0.0001, "loss": 0.0093, "step": 166160 }, { "epoch": 1093.2236842105262, "grad_norm": 1.296341896057129, "learning_rate": 0.0001, "loss": 0.0074, "step": 166170 }, { "epoch": 1093.2894736842106, "grad_norm": 1.0726011991500854, "learning_rate": 0.0001, "loss": 0.0086, "step": 166180 }, { "epoch": 1093.3552631578948, "grad_norm": 0.7867605686187744, "learning_rate": 0.0001, "loss": 0.0101, "step": 166190 }, { "epoch": 1093.421052631579, "grad_norm": 0.8794106245040894, "learning_rate": 0.0001, "loss": 0.0097, "step": 166200 }, { "epoch": 1093.4868421052631, "grad_norm": 1.1672301292419434, "learning_rate": 0.0001, "loss": 0.0091, "step": 166210 }, { "epoch": 1093.5526315789473, "grad_norm": 0.9970499277114868, "learning_rate": 0.0001, "loss": 0.0086, "step": 166220 }, { "epoch": 1093.6184210526317, "grad_norm": 1.1926120519638062, "learning_rate": 0.0001, "loss": 0.0088, "step": 166230 }, { "epoch": 1093.6842105263158, "grad_norm": 0.935606837272644, "learning_rate": 0.0001, "loss": 0.0088, "step": 166240 }, { "epoch": 1093.75, "grad_norm": 1.3509941101074219, "learning_rate": 0.0001, "loss": 0.0099, "step": 166250 }, { "epoch": 1093.8157894736842, "grad_norm": 1.2598907947540283, "learning_rate": 0.0001, "loss": 0.0098, "step": 166260 }, { "epoch": 1093.8815789473683, "grad_norm": 1.3617310523986816, "learning_rate": 0.0001, "loss": 0.0098, "step": 166270 }, { "epoch": 1093.9473684210527, "grad_norm": 1.6481096744537354, "learning_rate": 0.0001, "loss": 0.0103, "step": 166280 }, { "epoch": 1094.0131578947369, "grad_norm": 1.0241049528121948, "learning_rate": 0.0001, "loss": 0.0095, "step": 166290 }, { "epoch": 1094.078947368421, "grad_norm": 1.2445513010025024, "learning_rate": 0.0001, "loss": 0.0096, "step": 166300 }, { "epoch": 1094.1447368421052, "grad_norm": 1.102005124092102, "learning_rate": 0.0001, "loss": 0.009, "step": 166310 }, { "epoch": 1094.2105263157894, "grad_norm": 1.0038352012634277, "learning_rate": 0.0001, "loss": 0.0081, "step": 166320 }, { "epoch": 1094.2763157894738, "grad_norm": 1.2613288164138794, "learning_rate": 0.0001, "loss": 0.0093, "step": 166330 }, { "epoch": 1094.342105263158, "grad_norm": 0.9229909181594849, "learning_rate": 0.0001, "loss": 0.0091, "step": 166340 }, { "epoch": 1094.407894736842, "grad_norm": 0.9548853635787964, "learning_rate": 0.0001, "loss": 0.008, "step": 166350 }, { "epoch": 1094.4736842105262, "grad_norm": 0.8751519322395325, "learning_rate": 0.0001, "loss": 0.0089, "step": 166360 }, { "epoch": 1094.5394736842106, "grad_norm": 0.5805840492248535, "learning_rate": 0.0001, "loss": 0.0092, "step": 166370 }, { "epoch": 1094.6052631578948, "grad_norm": 1.1700973510742188, "learning_rate": 0.0001, "loss": 0.0089, "step": 166380 }, { "epoch": 1094.671052631579, "grad_norm": 0.8679378032684326, "learning_rate": 0.0001, "loss": 0.0074, "step": 166390 }, { "epoch": 1094.7368421052631, "grad_norm": 0.843585193157196, "learning_rate": 0.0001, "loss": 0.0116, "step": 166400 }, { "epoch": 1094.8026315789473, "grad_norm": 1.0492405891418457, "learning_rate": 0.0001, "loss": 0.0103, "step": 166410 }, { "epoch": 1094.8684210526317, "grad_norm": 1.0488659143447876, "learning_rate": 0.0001, "loss": 0.0101, "step": 166420 }, { "epoch": 1094.9342105263158, "grad_norm": 1.3835662603378296, "learning_rate": 0.0001, "loss": 0.0114, "step": 166430 }, { "epoch": 1095.0, "grad_norm": 0.7628530859947205, "learning_rate": 0.0001, "loss": 0.01, "step": 166440 }, { "epoch": 1095.0657894736842, "grad_norm": 1.0557432174682617, "learning_rate": 0.0001, "loss": 0.0113, "step": 166450 }, { "epoch": 1095.1315789473683, "grad_norm": 1.1266205310821533, "learning_rate": 0.0001, "loss": 0.0105, "step": 166460 }, { "epoch": 1095.1973684210527, "grad_norm": 1.0164369344711304, "learning_rate": 0.0001, "loss": 0.0091, "step": 166470 }, { "epoch": 1095.2631578947369, "grad_norm": 1.1279717683792114, "learning_rate": 0.0001, "loss": 0.0089, "step": 166480 }, { "epoch": 1095.328947368421, "grad_norm": 1.44341242313385, "learning_rate": 0.0001, "loss": 0.0093, "step": 166490 }, { "epoch": 1095.3947368421052, "grad_norm": 1.4984021186828613, "learning_rate": 0.0001, "loss": 0.0112, "step": 166500 }, { "epoch": 1095.4605263157894, "grad_norm": 0.9742573499679565, "learning_rate": 0.0001, "loss": 0.0104, "step": 166510 }, { "epoch": 1095.5263157894738, "grad_norm": 1.1598995923995972, "learning_rate": 0.0001, "loss": 0.0096, "step": 166520 }, { "epoch": 1095.592105263158, "grad_norm": 1.0693007707595825, "learning_rate": 0.0001, "loss": 0.0107, "step": 166530 }, { "epoch": 1095.657894736842, "grad_norm": 1.192436695098877, "learning_rate": 0.0001, "loss": 0.0108, "step": 166540 }, { "epoch": 1095.7236842105262, "grad_norm": 1.156217098236084, "learning_rate": 0.0001, "loss": 0.0103, "step": 166550 }, { "epoch": 1095.7894736842106, "grad_norm": 1.08530592918396, "learning_rate": 0.0001, "loss": 0.0104, "step": 166560 }, { "epoch": 1095.8552631578948, "grad_norm": 1.1809860467910767, "learning_rate": 0.0001, "loss": 0.0115, "step": 166570 }, { "epoch": 1095.921052631579, "grad_norm": 0.9600285887718201, "learning_rate": 0.0001, "loss": 0.0101, "step": 166580 }, { "epoch": 1095.9868421052631, "grad_norm": 1.0501923561096191, "learning_rate": 0.0001, "loss": 0.0087, "step": 166590 }, { "epoch": 1096.0526315789473, "grad_norm": 1.4869877099990845, "learning_rate": 0.0001, "loss": 0.0099, "step": 166600 }, { "epoch": 1096.1184210526317, "grad_norm": 1.0109316110610962, "learning_rate": 0.0001, "loss": 0.0096, "step": 166610 }, { "epoch": 1096.1842105263158, "grad_norm": 1.2201900482177734, "learning_rate": 0.0001, "loss": 0.0111, "step": 166620 }, { "epoch": 1096.25, "grad_norm": 1.0076817274093628, "learning_rate": 0.0001, "loss": 0.0107, "step": 166630 }, { "epoch": 1096.3157894736842, "grad_norm": 0.9542809128761292, "learning_rate": 0.0001, "loss": 0.0086, "step": 166640 }, { "epoch": 1096.3815789473683, "grad_norm": 0.8175939321517944, "learning_rate": 0.0001, "loss": 0.0093, "step": 166650 }, { "epoch": 1096.4473684210527, "grad_norm": 1.0511505603790283, "learning_rate": 0.0001, "loss": 0.0089, "step": 166660 }, { "epoch": 1096.5131578947369, "grad_norm": 0.8790225982666016, "learning_rate": 0.0001, "loss": 0.0083, "step": 166670 }, { "epoch": 1096.578947368421, "grad_norm": 0.8520594835281372, "learning_rate": 0.0001, "loss": 0.0087, "step": 166680 }, { "epoch": 1096.6447368421052, "grad_norm": 0.8566333055496216, "learning_rate": 0.0001, "loss": 0.0088, "step": 166690 }, { "epoch": 1096.7105263157894, "grad_norm": 1.1941945552825928, "learning_rate": 0.0001, "loss": 0.0096, "step": 166700 }, { "epoch": 1096.7763157894738, "grad_norm": 0.8733620047569275, "learning_rate": 0.0001, "loss": 0.0106, "step": 166710 }, { "epoch": 1096.842105263158, "grad_norm": 1.1847484111785889, "learning_rate": 0.0001, "loss": 0.0095, "step": 166720 }, { "epoch": 1096.907894736842, "grad_norm": 0.9977523684501648, "learning_rate": 0.0001, "loss": 0.0127, "step": 166730 }, { "epoch": 1096.9736842105262, "grad_norm": 1.088883638381958, "learning_rate": 0.0001, "loss": 0.0117, "step": 166740 }, { "epoch": 1097.0394736842106, "grad_norm": 1.1181623935699463, "learning_rate": 0.0001, "loss": 0.0105, "step": 166750 }, { "epoch": 1097.1052631578948, "grad_norm": 3.2963168621063232, "learning_rate": 0.0001, "loss": 0.0103, "step": 166760 }, { "epoch": 1097.171052631579, "grad_norm": 1.2029335498809814, "learning_rate": 0.0001, "loss": 0.0089, "step": 166770 }, { "epoch": 1097.2368421052631, "grad_norm": 1.2045027017593384, "learning_rate": 0.0001, "loss": 0.0115, "step": 166780 }, { "epoch": 1097.3026315789473, "grad_norm": 1.1241041421890259, "learning_rate": 0.0001, "loss": 0.0122, "step": 166790 }, { "epoch": 1097.3684210526317, "grad_norm": 1.700153112411499, "learning_rate": 0.0001, "loss": 0.0093, "step": 166800 }, { "epoch": 1097.4342105263158, "grad_norm": 1.5076239109039307, "learning_rate": 0.0001, "loss": 0.0116, "step": 166810 }, { "epoch": 1097.5, "grad_norm": 1.4871948957443237, "learning_rate": 0.0001, "loss": 0.0116, "step": 166820 }, { "epoch": 1097.5657894736842, "grad_norm": 1.3231090307235718, "learning_rate": 0.0001, "loss": 0.0091, "step": 166830 }, { "epoch": 1097.6315789473683, "grad_norm": 1.1733216047286987, "learning_rate": 0.0001, "loss": 0.0097, "step": 166840 }, { "epoch": 1097.6973684210527, "grad_norm": 1.3139346837997437, "learning_rate": 0.0001, "loss": 0.0092, "step": 166850 }, { "epoch": 1097.7631578947369, "grad_norm": 1.0993858575820923, "learning_rate": 0.0001, "loss": 0.0103, "step": 166860 }, { "epoch": 1097.828947368421, "grad_norm": 1.0795350074768066, "learning_rate": 0.0001, "loss": 0.009, "step": 166870 }, { "epoch": 1097.8947368421052, "grad_norm": 1.0457212924957275, "learning_rate": 0.0001, "loss": 0.0108, "step": 166880 }, { "epoch": 1097.9605263157894, "grad_norm": 0.9968882203102112, "learning_rate": 0.0001, "loss": 0.0085, "step": 166890 }, { "epoch": 1098.0263157894738, "grad_norm": 1.2826480865478516, "learning_rate": 0.0001, "loss": 0.0085, "step": 166900 }, { "epoch": 1098.092105263158, "grad_norm": 1.4935877323150635, "learning_rate": 0.0001, "loss": 0.0101, "step": 166910 }, { "epoch": 1098.157894736842, "grad_norm": 1.1641865968704224, "learning_rate": 0.0001, "loss": 0.0094, "step": 166920 }, { "epoch": 1098.2236842105262, "grad_norm": 1.0109764337539673, "learning_rate": 0.0001, "loss": 0.0095, "step": 166930 }, { "epoch": 1098.2894736842106, "grad_norm": 1.3171255588531494, "learning_rate": 0.0001, "loss": 0.0107, "step": 166940 }, { "epoch": 1098.3552631578948, "grad_norm": 1.3384495973587036, "learning_rate": 0.0001, "loss": 0.0095, "step": 166950 }, { "epoch": 1098.421052631579, "grad_norm": 1.485098123550415, "learning_rate": 0.0001, "loss": 0.0091, "step": 166960 }, { "epoch": 1098.4868421052631, "grad_norm": 1.4696592092514038, "learning_rate": 0.0001, "loss": 0.0086, "step": 166970 }, { "epoch": 1098.5526315789473, "grad_norm": 0.9190577864646912, "learning_rate": 0.0001, "loss": 0.0083, "step": 166980 }, { "epoch": 1098.6184210526317, "grad_norm": 1.0120787620544434, "learning_rate": 0.0001, "loss": 0.0099, "step": 166990 }, { "epoch": 1098.6842105263158, "grad_norm": 1.3007423877716064, "learning_rate": 0.0001, "loss": 0.0108, "step": 167000 }, { "epoch": 1098.75, "grad_norm": 1.21133291721344, "learning_rate": 0.0001, "loss": 0.0107, "step": 167010 }, { "epoch": 1098.8157894736842, "grad_norm": 0.8098885416984558, "learning_rate": 0.0001, "loss": 0.0085, "step": 167020 }, { "epoch": 1098.8815789473683, "grad_norm": 1.0924862623214722, "learning_rate": 0.0001, "loss": 0.0097, "step": 167030 }, { "epoch": 1098.9473684210527, "grad_norm": 1.058971643447876, "learning_rate": 0.0001, "loss": 0.01, "step": 167040 }, { "epoch": 1099.0131578947369, "grad_norm": 1.1815778017044067, "learning_rate": 0.0001, "loss": 0.0105, "step": 167050 }, { "epoch": 1099.078947368421, "grad_norm": 1.1275389194488525, "learning_rate": 0.0001, "loss": 0.0101, "step": 167060 }, { "epoch": 1099.1447368421052, "grad_norm": 1.2118959426879883, "learning_rate": 0.0001, "loss": 0.0092, "step": 167070 }, { "epoch": 1099.2105263157894, "grad_norm": 1.3578826189041138, "learning_rate": 0.0001, "loss": 0.0099, "step": 167080 }, { "epoch": 1099.2763157894738, "grad_norm": 1.1233108043670654, "learning_rate": 0.0001, "loss": 0.0081, "step": 167090 }, { "epoch": 1099.342105263158, "grad_norm": 0.8814030885696411, "learning_rate": 0.0001, "loss": 0.0084, "step": 167100 }, { "epoch": 1099.407894736842, "grad_norm": 1.0305200815200806, "learning_rate": 0.0001, "loss": 0.0099, "step": 167110 }, { "epoch": 1099.4736842105262, "grad_norm": 1.1122325658798218, "learning_rate": 0.0001, "loss": 0.0104, "step": 167120 }, { "epoch": 1099.5394736842106, "grad_norm": 0.9248008728027344, "learning_rate": 0.0001, "loss": 0.0095, "step": 167130 }, { "epoch": 1099.6052631578948, "grad_norm": 0.889100968837738, "learning_rate": 0.0001, "loss": 0.0078, "step": 167140 }, { "epoch": 1099.671052631579, "grad_norm": 1.2280583381652832, "learning_rate": 0.0001, "loss": 0.0109, "step": 167150 }, { "epoch": 1099.7368421052631, "grad_norm": 1.2300212383270264, "learning_rate": 0.0001, "loss": 0.0101, "step": 167160 }, { "epoch": 1099.8026315789473, "grad_norm": 1.211428165435791, "learning_rate": 0.0001, "loss": 0.0108, "step": 167170 }, { "epoch": 1099.8684210526317, "grad_norm": 1.1295406818389893, "learning_rate": 0.0001, "loss": 0.0097, "step": 167180 }, { "epoch": 1099.9342105263158, "grad_norm": 1.1120741367340088, "learning_rate": 0.0001, "loss": 0.0086, "step": 167190 }, { "epoch": 1100.0, "grad_norm": 0.8799479603767395, "learning_rate": 0.0001, "loss": 0.0079, "step": 167200 }, { "epoch": 1100.0657894736842, "grad_norm": 1.0118703842163086, "learning_rate": 0.0001, "loss": 0.0092, "step": 167210 }, { "epoch": 1100.1315789473683, "grad_norm": 1.097957968711853, "learning_rate": 0.0001, "loss": 0.0091, "step": 167220 }, { "epoch": 1100.1973684210527, "grad_norm": 0.8075319528579712, "learning_rate": 0.0001, "loss": 0.0087, "step": 167230 }, { "epoch": 1100.2631578947369, "grad_norm": 1.2604179382324219, "learning_rate": 0.0001, "loss": 0.0097, "step": 167240 }, { "epoch": 1100.328947368421, "grad_norm": 1.090760350227356, "learning_rate": 0.0001, "loss": 0.0081, "step": 167250 }, { "epoch": 1100.3947368421052, "grad_norm": 0.9950432181358337, "learning_rate": 0.0001, "loss": 0.0076, "step": 167260 }, { "epoch": 1100.4605263157894, "grad_norm": 0.7708602547645569, "learning_rate": 0.0001, "loss": 0.0101, "step": 167270 }, { "epoch": 1100.5263157894738, "grad_norm": 0.8636929392814636, "learning_rate": 0.0001, "loss": 0.0089, "step": 167280 }, { "epoch": 1100.592105263158, "grad_norm": 0.7915971279144287, "learning_rate": 0.0001, "loss": 0.011, "step": 167290 }, { "epoch": 1100.657894736842, "grad_norm": 0.8626055121421814, "learning_rate": 0.0001, "loss": 0.0113, "step": 167300 }, { "epoch": 1100.7236842105262, "grad_norm": 0.6943460702896118, "learning_rate": 0.0001, "loss": 0.0093, "step": 167310 }, { "epoch": 1100.7894736842106, "grad_norm": 1.0812398195266724, "learning_rate": 0.0001, "loss": 0.0104, "step": 167320 }, { "epoch": 1100.8552631578948, "grad_norm": 0.906474232673645, "learning_rate": 0.0001, "loss": 0.0096, "step": 167330 }, { "epoch": 1100.921052631579, "grad_norm": 0.9703337550163269, "learning_rate": 0.0001, "loss": 0.0081, "step": 167340 }, { "epoch": 1100.9868421052631, "grad_norm": 0.8789888024330139, "learning_rate": 0.0001, "loss": 0.0097, "step": 167350 }, { "epoch": 1101.0526315789473, "grad_norm": 0.9876759648323059, "learning_rate": 0.0001, "loss": 0.0079, "step": 167360 }, { "epoch": 1101.1184210526317, "grad_norm": 1.0288928747177124, "learning_rate": 0.0001, "loss": 0.0115, "step": 167370 }, { "epoch": 1101.1842105263158, "grad_norm": 0.9884411692619324, "learning_rate": 0.0001, "loss": 0.01, "step": 167380 }, { "epoch": 1101.25, "grad_norm": 1.231924057006836, "learning_rate": 0.0001, "loss": 0.0103, "step": 167390 }, { "epoch": 1101.3157894736842, "grad_norm": 1.2032873630523682, "learning_rate": 0.0001, "loss": 0.0092, "step": 167400 }, { "epoch": 1101.3815789473683, "grad_norm": 1.1129034757614136, "learning_rate": 0.0001, "loss": 0.0084, "step": 167410 }, { "epoch": 1101.4473684210527, "grad_norm": 1.0674737691879272, "learning_rate": 0.0001, "loss": 0.0072, "step": 167420 }, { "epoch": 1101.5131578947369, "grad_norm": 0.9787297248840332, "learning_rate": 0.0001, "loss": 0.0112, "step": 167430 }, { "epoch": 1101.578947368421, "grad_norm": 0.8333871364593506, "learning_rate": 0.0001, "loss": 0.0096, "step": 167440 }, { "epoch": 1101.6447368421052, "grad_norm": 1.1701922416687012, "learning_rate": 0.0001, "loss": 0.0086, "step": 167450 }, { "epoch": 1101.7105263157894, "grad_norm": 1.1655021905899048, "learning_rate": 0.0001, "loss": 0.0092, "step": 167460 }, { "epoch": 1101.7763157894738, "grad_norm": 1.2170398235321045, "learning_rate": 0.0001, "loss": 0.0107, "step": 167470 }, { "epoch": 1101.842105263158, "grad_norm": 1.1292792558670044, "learning_rate": 0.0001, "loss": 0.0086, "step": 167480 }, { "epoch": 1101.907894736842, "grad_norm": 1.0343544483184814, "learning_rate": 0.0001, "loss": 0.0089, "step": 167490 }, { "epoch": 1101.9736842105262, "grad_norm": 1.292896032333374, "learning_rate": 0.0001, "loss": 0.0094, "step": 167500 }, { "epoch": 1102.0394736842106, "grad_norm": 1.3598896265029907, "learning_rate": 0.0001, "loss": 0.0087, "step": 167510 }, { "epoch": 1102.1052631578948, "grad_norm": 1.1134599447250366, "learning_rate": 0.0001, "loss": 0.0084, "step": 167520 }, { "epoch": 1102.171052631579, "grad_norm": 1.4782142639160156, "learning_rate": 0.0001, "loss": 0.0089, "step": 167530 }, { "epoch": 1102.2368421052631, "grad_norm": 1.0224602222442627, "learning_rate": 0.0001, "loss": 0.0099, "step": 167540 }, { "epoch": 1102.3026315789473, "grad_norm": 1.1568275690078735, "learning_rate": 0.0001, "loss": 0.0085, "step": 167550 }, { "epoch": 1102.3684210526317, "grad_norm": 0.9794890284538269, "learning_rate": 0.0001, "loss": 0.0089, "step": 167560 }, { "epoch": 1102.4342105263158, "grad_norm": 0.9508534073829651, "learning_rate": 0.0001, "loss": 0.0116, "step": 167570 }, { "epoch": 1102.5, "grad_norm": 1.202999234199524, "learning_rate": 0.0001, "loss": 0.0093, "step": 167580 }, { "epoch": 1102.5657894736842, "grad_norm": 1.2646609544754028, "learning_rate": 0.0001, "loss": 0.0075, "step": 167590 }, { "epoch": 1102.6315789473683, "grad_norm": 1.1292835474014282, "learning_rate": 0.0001, "loss": 0.0104, "step": 167600 }, { "epoch": 1102.6973684210527, "grad_norm": 0.8157707452774048, "learning_rate": 0.0001, "loss": 0.0098, "step": 167610 }, { "epoch": 1102.7631578947369, "grad_norm": 0.9448849558830261, "learning_rate": 0.0001, "loss": 0.0093, "step": 167620 }, { "epoch": 1102.828947368421, "grad_norm": 0.878303587436676, "learning_rate": 0.0001, "loss": 0.0079, "step": 167630 }, { "epoch": 1102.8947368421052, "grad_norm": 1.097684383392334, "learning_rate": 0.0001, "loss": 0.0098, "step": 167640 }, { "epoch": 1102.9605263157894, "grad_norm": 1.0110867023468018, "learning_rate": 0.0001, "loss": 0.0092, "step": 167650 }, { "epoch": 1103.0263157894738, "grad_norm": 1.1155486106872559, "learning_rate": 0.0001, "loss": 0.0071, "step": 167660 }, { "epoch": 1103.092105263158, "grad_norm": 1.0845309495925903, "learning_rate": 0.0001, "loss": 0.009, "step": 167670 }, { "epoch": 1103.157894736842, "grad_norm": 0.8324429392814636, "learning_rate": 0.0001, "loss": 0.0098, "step": 167680 }, { "epoch": 1103.2236842105262, "grad_norm": 0.8664148449897766, "learning_rate": 0.0001, "loss": 0.0102, "step": 167690 }, { "epoch": 1103.2894736842106, "grad_norm": 0.7451772689819336, "learning_rate": 0.0001, "loss": 0.0077, "step": 167700 }, { "epoch": 1103.3552631578948, "grad_norm": 0.9398016929626465, "learning_rate": 0.0001, "loss": 0.0095, "step": 167710 }, { "epoch": 1103.421052631579, "grad_norm": 1.0360288619995117, "learning_rate": 0.0001, "loss": 0.0092, "step": 167720 }, { "epoch": 1103.4868421052631, "grad_norm": 1.274431824684143, "learning_rate": 0.0001, "loss": 0.0094, "step": 167730 }, { "epoch": 1103.5526315789473, "grad_norm": 0.5587978363037109, "learning_rate": 0.0001, "loss": 0.0103, "step": 167740 }, { "epoch": 1103.6184210526317, "grad_norm": 0.7810954451560974, "learning_rate": 0.0001, "loss": 0.012, "step": 167750 }, { "epoch": 1103.6842105263158, "grad_norm": 0.8118604421615601, "learning_rate": 0.0001, "loss": 0.0075, "step": 167760 }, { "epoch": 1103.75, "grad_norm": 0.6753564476966858, "learning_rate": 0.0001, "loss": 0.0076, "step": 167770 }, { "epoch": 1103.8157894736842, "grad_norm": 0.609535276889801, "learning_rate": 0.0001, "loss": 0.0083, "step": 167780 }, { "epoch": 1103.8815789473683, "grad_norm": 0.8553572297096252, "learning_rate": 0.0001, "loss": 0.0082, "step": 167790 }, { "epoch": 1103.9473684210527, "grad_norm": 1.0933605432510376, "learning_rate": 0.0001, "loss": 0.0088, "step": 167800 }, { "epoch": 1104.0131578947369, "grad_norm": 1.4074715375900269, "learning_rate": 0.0001, "loss": 0.0102, "step": 167810 }, { "epoch": 1104.078947368421, "grad_norm": 0.9313386678695679, "learning_rate": 0.0001, "loss": 0.0088, "step": 167820 }, { "epoch": 1104.1447368421052, "grad_norm": 1.2255233526229858, "learning_rate": 0.0001, "loss": 0.0094, "step": 167830 }, { "epoch": 1104.2105263157894, "grad_norm": 0.958168089389801, "learning_rate": 0.0001, "loss": 0.0086, "step": 167840 }, { "epoch": 1104.2763157894738, "grad_norm": 0.7276482582092285, "learning_rate": 0.0001, "loss": 0.0091, "step": 167850 }, { "epoch": 1104.342105263158, "grad_norm": 1.2092912197113037, "learning_rate": 0.0001, "loss": 0.0095, "step": 167860 }, { "epoch": 1104.407894736842, "grad_norm": 0.9933399558067322, "learning_rate": 0.0001, "loss": 0.0092, "step": 167870 }, { "epoch": 1104.4736842105262, "grad_norm": 0.8450823426246643, "learning_rate": 0.0001, "loss": 0.0094, "step": 167880 }, { "epoch": 1104.5394736842106, "grad_norm": 1.043594479560852, "learning_rate": 0.0001, "loss": 0.0084, "step": 167890 }, { "epoch": 1104.6052631578948, "grad_norm": 1.107570767402649, "learning_rate": 0.0001, "loss": 0.0083, "step": 167900 }, { "epoch": 1104.671052631579, "grad_norm": 1.1680465936660767, "learning_rate": 0.0001, "loss": 0.0105, "step": 167910 }, { "epoch": 1104.7368421052631, "grad_norm": 1.4395219087600708, "learning_rate": 0.0001, "loss": 0.0101, "step": 167920 }, { "epoch": 1104.8026315789473, "grad_norm": 1.0182297229766846, "learning_rate": 0.0001, "loss": 0.0078, "step": 167930 }, { "epoch": 1104.8684210526317, "grad_norm": 0.7282902598381042, "learning_rate": 0.0001, "loss": 0.009, "step": 167940 }, { "epoch": 1104.9342105263158, "grad_norm": 1.1767057180404663, "learning_rate": 0.0001, "loss": 0.0101, "step": 167950 }, { "epoch": 1105.0, "grad_norm": 0.9892759323120117, "learning_rate": 0.0001, "loss": 0.0095, "step": 167960 }, { "epoch": 1105.0657894736842, "grad_norm": 0.8956315517425537, "learning_rate": 0.0001, "loss": 0.0091, "step": 167970 }, { "epoch": 1105.1315789473683, "grad_norm": 0.707263708114624, "learning_rate": 0.0001, "loss": 0.0092, "step": 167980 }, { "epoch": 1105.1973684210527, "grad_norm": 0.895297110080719, "learning_rate": 0.0001, "loss": 0.0094, "step": 167990 }, { "epoch": 1105.2631578947369, "grad_norm": 0.7965859174728394, "learning_rate": 0.0001, "loss": 0.0103, "step": 168000 }, { "epoch": 1105.328947368421, "grad_norm": 1.3010401725769043, "learning_rate": 0.0001, "loss": 0.0091, "step": 168010 }, { "epoch": 1105.3947368421052, "grad_norm": 1.0735414028167725, "learning_rate": 0.0001, "loss": 0.0095, "step": 168020 }, { "epoch": 1105.4605263157894, "grad_norm": 1.0381656885147095, "learning_rate": 0.0001, "loss": 0.0118, "step": 168030 }, { "epoch": 1105.5263157894738, "grad_norm": 0.8729901909828186, "learning_rate": 0.0001, "loss": 0.0089, "step": 168040 }, { "epoch": 1105.592105263158, "grad_norm": 0.9868913888931274, "learning_rate": 0.0001, "loss": 0.0088, "step": 168050 }, { "epoch": 1105.657894736842, "grad_norm": 1.1911859512329102, "learning_rate": 0.0001, "loss": 0.0094, "step": 168060 }, { "epoch": 1105.7236842105262, "grad_norm": 1.3197442293167114, "learning_rate": 0.0001, "loss": 0.0081, "step": 168070 }, { "epoch": 1105.7894736842106, "grad_norm": 1.240401029586792, "learning_rate": 0.0001, "loss": 0.0091, "step": 168080 }, { "epoch": 1105.8552631578948, "grad_norm": 0.9005335569381714, "learning_rate": 0.0001, "loss": 0.0085, "step": 168090 }, { "epoch": 1105.921052631579, "grad_norm": 0.9704708456993103, "learning_rate": 0.0001, "loss": 0.0091, "step": 168100 }, { "epoch": 1105.9868421052631, "grad_norm": 0.759002149105072, "learning_rate": 0.0001, "loss": 0.0078, "step": 168110 }, { "epoch": 1106.0526315789473, "grad_norm": 0.8771544694900513, "learning_rate": 0.0001, "loss": 0.0084, "step": 168120 }, { "epoch": 1106.1184210526317, "grad_norm": 1.3510820865631104, "learning_rate": 0.0001, "loss": 0.009, "step": 168130 }, { "epoch": 1106.1842105263158, "grad_norm": 1.1021738052368164, "learning_rate": 0.0001, "loss": 0.0092, "step": 168140 }, { "epoch": 1106.25, "grad_norm": 1.1089617013931274, "learning_rate": 0.0001, "loss": 0.0087, "step": 168150 }, { "epoch": 1106.3157894736842, "grad_norm": 1.5470319986343384, "learning_rate": 0.0001, "loss": 0.0095, "step": 168160 }, { "epoch": 1106.3815789473683, "grad_norm": 0.8695709109306335, "learning_rate": 0.0001, "loss": 0.0089, "step": 168170 }, { "epoch": 1106.4473684210527, "grad_norm": 1.176269769668579, "learning_rate": 0.0001, "loss": 0.01, "step": 168180 }, { "epoch": 1106.5131578947369, "grad_norm": 1.0304797887802124, "learning_rate": 0.0001, "loss": 0.0078, "step": 168190 }, { "epoch": 1106.578947368421, "grad_norm": 1.2818888425827026, "learning_rate": 0.0001, "loss": 0.0074, "step": 168200 }, { "epoch": 1106.6447368421052, "grad_norm": 1.1938977241516113, "learning_rate": 0.0001, "loss": 0.0105, "step": 168210 }, { "epoch": 1106.7105263157894, "grad_norm": 0.8075238466262817, "learning_rate": 0.0001, "loss": 0.0114, "step": 168220 }, { "epoch": 1106.7763157894738, "grad_norm": 1.112987756729126, "learning_rate": 0.0001, "loss": 0.009, "step": 168230 }, { "epoch": 1106.842105263158, "grad_norm": 1.1107630729675293, "learning_rate": 0.0001, "loss": 0.0089, "step": 168240 }, { "epoch": 1106.907894736842, "grad_norm": 0.9863311052322388, "learning_rate": 0.0001, "loss": 0.008, "step": 168250 }, { "epoch": 1106.9736842105262, "grad_norm": 1.1156419515609741, "learning_rate": 0.0001, "loss": 0.0095, "step": 168260 }, { "epoch": 1107.0394736842106, "grad_norm": 1.1970049142837524, "learning_rate": 0.0001, "loss": 0.0092, "step": 168270 }, { "epoch": 1107.1052631578948, "grad_norm": 1.1724064350128174, "learning_rate": 0.0001, "loss": 0.0097, "step": 168280 }, { "epoch": 1107.171052631579, "grad_norm": 1.0076320171356201, "learning_rate": 0.0001, "loss": 0.0097, "step": 168290 }, { "epoch": 1107.2368421052631, "grad_norm": 1.0176210403442383, "learning_rate": 0.0001, "loss": 0.0098, "step": 168300 }, { "epoch": 1107.3026315789473, "grad_norm": 1.0968610048294067, "learning_rate": 0.0001, "loss": 0.0082, "step": 168310 }, { "epoch": 1107.3684210526317, "grad_norm": 1.47584867477417, "learning_rate": 0.0001, "loss": 0.0099, "step": 168320 }, { "epoch": 1107.4342105263158, "grad_norm": 1.49242103099823, "learning_rate": 0.0001, "loss": 0.0099, "step": 168330 }, { "epoch": 1107.5, "grad_norm": 1.3437186479568481, "learning_rate": 0.0001, "loss": 0.0096, "step": 168340 }, { "epoch": 1107.5657894736842, "grad_norm": 1.1982983350753784, "learning_rate": 0.0001, "loss": 0.0107, "step": 168350 }, { "epoch": 1107.6315789473683, "grad_norm": 1.1239218711853027, "learning_rate": 0.0001, "loss": 0.0091, "step": 168360 }, { "epoch": 1107.6973684210527, "grad_norm": 0.9983989000320435, "learning_rate": 0.0001, "loss": 0.009, "step": 168370 }, { "epoch": 1107.7631578947369, "grad_norm": 1.2701680660247803, "learning_rate": 0.0001, "loss": 0.0092, "step": 168380 }, { "epoch": 1107.828947368421, "grad_norm": 1.07079017162323, "learning_rate": 0.0001, "loss": 0.0074, "step": 168390 }, { "epoch": 1107.8947368421052, "grad_norm": 1.092276930809021, "learning_rate": 0.0001, "loss": 0.0093, "step": 168400 }, { "epoch": 1107.9605263157894, "grad_norm": 0.7285662293434143, "learning_rate": 0.0001, "loss": 0.0087, "step": 168410 }, { "epoch": 1108.0263157894738, "grad_norm": 1.0730990171432495, "learning_rate": 0.0001, "loss": 0.0098, "step": 168420 }, { "epoch": 1108.092105263158, "grad_norm": 1.0767719745635986, "learning_rate": 0.0001, "loss": 0.0108, "step": 168430 }, { "epoch": 1108.157894736842, "grad_norm": 1.1680963039398193, "learning_rate": 0.0001, "loss": 0.0098, "step": 168440 }, { "epoch": 1108.2236842105262, "grad_norm": 1.048923373222351, "learning_rate": 0.0001, "loss": 0.0106, "step": 168450 }, { "epoch": 1108.2894736842106, "grad_norm": 0.9691452980041504, "learning_rate": 0.0001, "loss": 0.0099, "step": 168460 }, { "epoch": 1108.3552631578948, "grad_norm": 0.694308340549469, "learning_rate": 0.0001, "loss": 0.009, "step": 168470 }, { "epoch": 1108.421052631579, "grad_norm": 0.8766660094261169, "learning_rate": 0.0001, "loss": 0.0092, "step": 168480 }, { "epoch": 1108.4868421052631, "grad_norm": 0.8163343071937561, "learning_rate": 0.0001, "loss": 0.0087, "step": 168490 }, { "epoch": 1108.5526315789473, "grad_norm": 0.7243715524673462, "learning_rate": 0.0001, "loss": 0.0087, "step": 168500 }, { "epoch": 1108.6184210526317, "grad_norm": 1.2673771381378174, "learning_rate": 0.0001, "loss": 0.0087, "step": 168510 }, { "epoch": 1108.6842105263158, "grad_norm": 0.9088281989097595, "learning_rate": 0.0001, "loss": 0.0096, "step": 168520 }, { "epoch": 1108.75, "grad_norm": 1.3097550868988037, "learning_rate": 0.0001, "loss": 0.0096, "step": 168530 }, { "epoch": 1108.8157894736842, "grad_norm": 1.2778091430664062, "learning_rate": 0.0001, "loss": 0.0075, "step": 168540 }, { "epoch": 1108.8815789473683, "grad_norm": 1.2676957845687866, "learning_rate": 0.0001, "loss": 0.0077, "step": 168550 }, { "epoch": 1108.9473684210527, "grad_norm": 1.185399055480957, "learning_rate": 0.0001, "loss": 0.0088, "step": 168560 }, { "epoch": 1109.0131578947369, "grad_norm": 0.9912101626396179, "learning_rate": 0.0001, "loss": 0.0103, "step": 168570 }, { "epoch": 1109.078947368421, "grad_norm": 1.0790884494781494, "learning_rate": 0.0001, "loss": 0.0087, "step": 168580 }, { "epoch": 1109.1447368421052, "grad_norm": 0.9652689695358276, "learning_rate": 0.0001, "loss": 0.0081, "step": 168590 }, { "epoch": 1109.2105263157894, "grad_norm": 1.2606117725372314, "learning_rate": 0.0001, "loss": 0.0091, "step": 168600 }, { "epoch": 1109.2763157894738, "grad_norm": 1.2370003461837769, "learning_rate": 0.0001, "loss": 0.0093, "step": 168610 }, { "epoch": 1109.342105263158, "grad_norm": 0.9363791346549988, "learning_rate": 0.0001, "loss": 0.0103, "step": 168620 }, { "epoch": 1109.407894736842, "grad_norm": 1.1172534227371216, "learning_rate": 0.0001, "loss": 0.0087, "step": 168630 }, { "epoch": 1109.4736842105262, "grad_norm": 1.1383373737335205, "learning_rate": 0.0001, "loss": 0.0107, "step": 168640 }, { "epoch": 1109.5394736842106, "grad_norm": 1.16228449344635, "learning_rate": 0.0001, "loss": 0.008, "step": 168650 }, { "epoch": 1109.6052631578948, "grad_norm": 1.1701244115829468, "learning_rate": 0.0001, "loss": 0.0109, "step": 168660 }, { "epoch": 1109.671052631579, "grad_norm": 1.5518008470535278, "learning_rate": 0.0001, "loss": 0.0077, "step": 168670 }, { "epoch": 1109.7368421052631, "grad_norm": 1.4797471761703491, "learning_rate": 0.0001, "loss": 0.0119, "step": 168680 }, { "epoch": 1109.8026315789473, "grad_norm": 1.0961973667144775, "learning_rate": 0.0001, "loss": 0.009, "step": 168690 }, { "epoch": 1109.8684210526317, "grad_norm": 1.1600146293640137, "learning_rate": 0.0001, "loss": 0.0082, "step": 168700 }, { "epoch": 1109.9342105263158, "grad_norm": 1.3564647436141968, "learning_rate": 0.0001, "loss": 0.009, "step": 168710 }, { "epoch": 1110.0, "grad_norm": 1.7049609422683716, "learning_rate": 0.0001, "loss": 0.0098, "step": 168720 }, { "epoch": 1110.0657894736842, "grad_norm": 1.5665743350982666, "learning_rate": 0.0001, "loss": 0.0081, "step": 168730 }, { "epoch": 1110.1315789473683, "grad_norm": 1.5646759271621704, "learning_rate": 0.0001, "loss": 0.0097, "step": 168740 }, { "epoch": 1110.1973684210527, "grad_norm": 1.1867520809173584, "learning_rate": 0.0001, "loss": 0.0076, "step": 168750 }, { "epoch": 1110.2631578947369, "grad_norm": 1.6456133127212524, "learning_rate": 0.0001, "loss": 0.0107, "step": 168760 }, { "epoch": 1110.328947368421, "grad_norm": 1.2314730882644653, "learning_rate": 0.0001, "loss": 0.0096, "step": 168770 }, { "epoch": 1110.3947368421052, "grad_norm": 1.0836608409881592, "learning_rate": 0.0001, "loss": 0.0084, "step": 168780 }, { "epoch": 1110.4605263157894, "grad_norm": 0.9682186245918274, "learning_rate": 0.0001, "loss": 0.0087, "step": 168790 }, { "epoch": 1110.5263157894738, "grad_norm": 0.8563161492347717, "learning_rate": 0.0001, "loss": 0.0076, "step": 168800 }, { "epoch": 1110.592105263158, "grad_norm": 0.9814671277999878, "learning_rate": 0.0001, "loss": 0.0105, "step": 168810 }, { "epoch": 1110.657894736842, "grad_norm": 0.7647870182991028, "learning_rate": 0.0001, "loss": 0.0084, "step": 168820 }, { "epoch": 1110.7236842105262, "grad_norm": 1.3109147548675537, "learning_rate": 0.0001, "loss": 0.0098, "step": 168830 }, { "epoch": 1110.7894736842106, "grad_norm": 1.0601048469543457, "learning_rate": 0.0001, "loss": 0.009, "step": 168840 }, { "epoch": 1110.8552631578948, "grad_norm": 1.160309910774231, "learning_rate": 0.0001, "loss": 0.0102, "step": 168850 }, { "epoch": 1110.921052631579, "grad_norm": 1.0141721963882446, "learning_rate": 0.0001, "loss": 0.0092, "step": 168860 }, { "epoch": 1110.9868421052631, "grad_norm": 0.9558742046356201, "learning_rate": 0.0001, "loss": 0.0088, "step": 168870 }, { "epoch": 1111.0526315789473, "grad_norm": 1.0279165506362915, "learning_rate": 0.0001, "loss": 0.0083, "step": 168880 }, { "epoch": 1111.1184210526317, "grad_norm": 1.179625153541565, "learning_rate": 0.0001, "loss": 0.0099, "step": 168890 }, { "epoch": 1111.1842105263158, "grad_norm": 1.070167064666748, "learning_rate": 0.0001, "loss": 0.0079, "step": 168900 }, { "epoch": 1111.25, "grad_norm": 1.2725509405136108, "learning_rate": 0.0001, "loss": 0.0097, "step": 168910 }, { "epoch": 1111.3157894736842, "grad_norm": 0.9104771018028259, "learning_rate": 0.0001, "loss": 0.0091, "step": 168920 }, { "epoch": 1111.3815789473683, "grad_norm": 0.937410831451416, "learning_rate": 0.0001, "loss": 0.0078, "step": 168930 }, { "epoch": 1111.4473684210527, "grad_norm": 1.037760615348816, "learning_rate": 0.0001, "loss": 0.0085, "step": 168940 }, { "epoch": 1111.5131578947369, "grad_norm": 1.1063820123672485, "learning_rate": 0.0001, "loss": 0.0093, "step": 168950 }, { "epoch": 1111.578947368421, "grad_norm": 1.0770851373672485, "learning_rate": 0.0001, "loss": 0.0094, "step": 168960 }, { "epoch": 1111.6447368421052, "grad_norm": 1.1745645999908447, "learning_rate": 0.0001, "loss": 0.0089, "step": 168970 }, { "epoch": 1111.7105263157894, "grad_norm": 0.936163604259491, "learning_rate": 0.0001, "loss": 0.0091, "step": 168980 }, { "epoch": 1111.7763157894738, "grad_norm": 1.031320333480835, "learning_rate": 0.0001, "loss": 0.0097, "step": 168990 }, { "epoch": 1111.842105263158, "grad_norm": 0.6643326878547668, "learning_rate": 0.0001, "loss": 0.0096, "step": 169000 }, { "epoch": 1111.907894736842, "grad_norm": 0.8796061873435974, "learning_rate": 0.0001, "loss": 0.0107, "step": 169010 }, { "epoch": 1111.9736842105262, "grad_norm": 0.9710099697113037, "learning_rate": 0.0001, "loss": 0.0107, "step": 169020 }, { "epoch": 1112.0394736842106, "grad_norm": 1.134057879447937, "learning_rate": 0.0001, "loss": 0.0109, "step": 169030 }, { "epoch": 1112.1052631578948, "grad_norm": 0.7296460270881653, "learning_rate": 0.0001, "loss": 0.0097, "step": 169040 }, { "epoch": 1112.171052631579, "grad_norm": 0.7189831733703613, "learning_rate": 0.0001, "loss": 0.0094, "step": 169050 }, { "epoch": 1112.2368421052631, "grad_norm": 0.8894656896591187, "learning_rate": 0.0001, "loss": 0.0081, "step": 169060 }, { "epoch": 1112.3026315789473, "grad_norm": 1.1131048202514648, "learning_rate": 0.0001, "loss": 0.01, "step": 169070 }, { "epoch": 1112.3684210526317, "grad_norm": 1.5262320041656494, "learning_rate": 0.0001, "loss": 0.0097, "step": 169080 }, { "epoch": 1112.4342105263158, "grad_norm": 1.2792640924453735, "learning_rate": 0.0001, "loss": 0.0091, "step": 169090 }, { "epoch": 1112.5, "grad_norm": 1.091755747795105, "learning_rate": 0.0001, "loss": 0.0099, "step": 169100 }, { "epoch": 1112.5657894736842, "grad_norm": 1.1923725605010986, "learning_rate": 0.0001, "loss": 0.0087, "step": 169110 }, { "epoch": 1112.6315789473683, "grad_norm": 1.2157421112060547, "learning_rate": 0.0001, "loss": 0.0094, "step": 169120 }, { "epoch": 1112.6973684210527, "grad_norm": 1.0996853113174438, "learning_rate": 0.0001, "loss": 0.0124, "step": 169130 }, { "epoch": 1112.7631578947369, "grad_norm": 1.3527673482894897, "learning_rate": 0.0001, "loss": 0.0103, "step": 169140 }, { "epoch": 1112.828947368421, "grad_norm": 1.1760993003845215, "learning_rate": 0.0001, "loss": 0.0103, "step": 169150 }, { "epoch": 1112.8947368421052, "grad_norm": 1.1394774913787842, "learning_rate": 0.0001, "loss": 0.0093, "step": 169160 }, { "epoch": 1112.9605263157894, "grad_norm": 1.1786078214645386, "learning_rate": 0.0001, "loss": 0.0074, "step": 169170 }, { "epoch": 1113.0263157894738, "grad_norm": 2.485809326171875, "learning_rate": 0.0001, "loss": 0.0114, "step": 169180 }, { "epoch": 1113.092105263158, "grad_norm": 1.372261881828308, "learning_rate": 0.0001, "loss": 0.0094, "step": 169190 }, { "epoch": 1113.157894736842, "grad_norm": 1.125187873840332, "learning_rate": 0.0001, "loss": 0.01, "step": 169200 }, { "epoch": 1113.2236842105262, "grad_norm": 1.4014942646026611, "learning_rate": 0.0001, "loss": 0.0101, "step": 169210 }, { "epoch": 1113.2894736842106, "grad_norm": 1.0641945600509644, "learning_rate": 0.0001, "loss": 0.0099, "step": 169220 }, { "epoch": 1113.3552631578948, "grad_norm": 1.023398756980896, "learning_rate": 0.0001, "loss": 0.0105, "step": 169230 }, { "epoch": 1113.421052631579, "grad_norm": 0.8850655555725098, "learning_rate": 0.0001, "loss": 0.0105, "step": 169240 }, { "epoch": 1113.4868421052631, "grad_norm": 1.0332579612731934, "learning_rate": 0.0001, "loss": 0.0121, "step": 169250 }, { "epoch": 1113.5526315789473, "grad_norm": 1.3429882526397705, "learning_rate": 0.0001, "loss": 0.0104, "step": 169260 }, { "epoch": 1113.6184210526317, "grad_norm": 1.1904481649398804, "learning_rate": 0.0001, "loss": 0.0104, "step": 169270 }, { "epoch": 1113.6842105263158, "grad_norm": 0.8941307067871094, "learning_rate": 0.0001, "loss": 0.0089, "step": 169280 }, { "epoch": 1113.75, "grad_norm": 1.1564146280288696, "learning_rate": 0.0001, "loss": 0.0098, "step": 169290 }, { "epoch": 1113.8157894736842, "grad_norm": 0.770451545715332, "learning_rate": 0.0001, "loss": 0.0097, "step": 169300 }, { "epoch": 1113.8815789473683, "grad_norm": 0.9108574986457825, "learning_rate": 0.0001, "loss": 0.0107, "step": 169310 }, { "epoch": 1113.9473684210527, "grad_norm": 0.8220398426055908, "learning_rate": 0.0001, "loss": 0.0105, "step": 169320 }, { "epoch": 1114.0131578947369, "grad_norm": 1.039478063583374, "learning_rate": 0.0001, "loss": 0.009, "step": 169330 }, { "epoch": 1114.078947368421, "grad_norm": 1.078933835029602, "learning_rate": 0.0001, "loss": 0.0086, "step": 169340 }, { "epoch": 1114.1447368421052, "grad_norm": 1.0920974016189575, "learning_rate": 0.0001, "loss": 0.0109, "step": 169350 }, { "epoch": 1114.2105263157894, "grad_norm": 0.9281700253486633, "learning_rate": 0.0001, "loss": 0.0092, "step": 169360 }, { "epoch": 1114.2763157894738, "grad_norm": 1.1259106397628784, "learning_rate": 0.0001, "loss": 0.0113, "step": 169370 }, { "epoch": 1114.342105263158, "grad_norm": 0.9117035269737244, "learning_rate": 0.0001, "loss": 0.0093, "step": 169380 }, { "epoch": 1114.407894736842, "grad_norm": 1.178030252456665, "learning_rate": 0.0001, "loss": 0.0096, "step": 169390 }, { "epoch": 1114.4736842105262, "grad_norm": 1.0540664196014404, "learning_rate": 0.0001, "loss": 0.0098, "step": 169400 }, { "epoch": 1114.5394736842106, "grad_norm": 1.3689993619918823, "learning_rate": 0.0001, "loss": 0.0097, "step": 169410 }, { "epoch": 1114.6052631578948, "grad_norm": 1.2431306838989258, "learning_rate": 0.0001, "loss": 0.0082, "step": 169420 }, { "epoch": 1114.671052631579, "grad_norm": 1.0379416942596436, "learning_rate": 0.0001, "loss": 0.0095, "step": 169430 }, { "epoch": 1114.7368421052631, "grad_norm": 1.2257938385009766, "learning_rate": 0.0001, "loss": 0.0094, "step": 169440 }, { "epoch": 1114.8026315789473, "grad_norm": 0.9797086119651794, "learning_rate": 0.0001, "loss": 0.0098, "step": 169450 }, { "epoch": 1114.8684210526317, "grad_norm": 1.3930553197860718, "learning_rate": 0.0001, "loss": 0.0096, "step": 169460 }, { "epoch": 1114.9342105263158, "grad_norm": 1.1438504457473755, "learning_rate": 0.0001, "loss": 0.0083, "step": 169470 }, { "epoch": 1115.0, "grad_norm": 1.0164341926574707, "learning_rate": 0.0001, "loss": 0.0098, "step": 169480 }, { "epoch": 1115.0657894736842, "grad_norm": 0.706180989742279, "learning_rate": 0.0001, "loss": 0.0081, "step": 169490 }, { "epoch": 1115.1315789473683, "grad_norm": 1.0323935747146606, "learning_rate": 0.0001, "loss": 0.0102, "step": 169500 }, { "epoch": 1115.1973684210527, "grad_norm": 0.8739573359489441, "learning_rate": 0.0001, "loss": 0.0101, "step": 169510 }, { "epoch": 1115.2631578947369, "grad_norm": 1.4711079597473145, "learning_rate": 0.0001, "loss": 0.0093, "step": 169520 }, { "epoch": 1115.328947368421, "grad_norm": 1.3129510879516602, "learning_rate": 0.0001, "loss": 0.0096, "step": 169530 }, { "epoch": 1115.3947368421052, "grad_norm": 1.3260138034820557, "learning_rate": 0.0001, "loss": 0.0107, "step": 169540 }, { "epoch": 1115.4605263157894, "grad_norm": 1.2280200719833374, "learning_rate": 0.0001, "loss": 0.013, "step": 169550 }, { "epoch": 1115.5263157894738, "grad_norm": 1.0579249858856201, "learning_rate": 0.0001, "loss": 0.0094, "step": 169560 }, { "epoch": 1115.592105263158, "grad_norm": 1.5779436826705933, "learning_rate": 0.0001, "loss": 0.0089, "step": 169570 }, { "epoch": 1115.657894736842, "grad_norm": 1.2979265451431274, "learning_rate": 0.0001, "loss": 0.01, "step": 169580 }, { "epoch": 1115.7236842105262, "grad_norm": 1.2990245819091797, "learning_rate": 0.0001, "loss": 0.0096, "step": 169590 }, { "epoch": 1115.7894736842106, "grad_norm": 1.1901843547821045, "learning_rate": 0.0001, "loss": 0.0115, "step": 169600 }, { "epoch": 1115.8552631578948, "grad_norm": 1.2466164827346802, "learning_rate": 0.0001, "loss": 0.0082, "step": 169610 }, { "epoch": 1115.921052631579, "grad_norm": 0.8972548842430115, "learning_rate": 0.0001, "loss": 0.0086, "step": 169620 }, { "epoch": 1115.9868421052631, "grad_norm": 0.7799301743507385, "learning_rate": 0.0001, "loss": 0.0093, "step": 169630 }, { "epoch": 1116.0526315789473, "grad_norm": 0.7389102578163147, "learning_rate": 0.0001, "loss": 0.0112, "step": 169640 }, { "epoch": 1116.1184210526317, "grad_norm": 1.1879955530166626, "learning_rate": 0.0001, "loss": 0.0087, "step": 169650 }, { "epoch": 1116.1842105263158, "grad_norm": 1.0571030378341675, "learning_rate": 0.0001, "loss": 0.0101, "step": 169660 }, { "epoch": 1116.25, "grad_norm": 1.1208561658859253, "learning_rate": 0.0001, "loss": 0.0085, "step": 169670 }, { "epoch": 1116.3157894736842, "grad_norm": 1.211946964263916, "learning_rate": 0.0001, "loss": 0.0089, "step": 169680 }, { "epoch": 1116.3815789473683, "grad_norm": 1.0168685913085938, "learning_rate": 0.0001, "loss": 0.0103, "step": 169690 }, { "epoch": 1116.4473684210527, "grad_norm": 1.4225287437438965, "learning_rate": 0.0001, "loss": 0.0099, "step": 169700 }, { "epoch": 1116.5131578947369, "grad_norm": 0.7239480018615723, "learning_rate": 0.0001, "loss": 0.0106, "step": 169710 }, { "epoch": 1116.578947368421, "grad_norm": 1.2720476388931274, "learning_rate": 0.0001, "loss": 0.0101, "step": 169720 }, { "epoch": 1116.6447368421052, "grad_norm": 1.250244140625, "learning_rate": 0.0001, "loss": 0.0083, "step": 169730 }, { "epoch": 1116.7105263157894, "grad_norm": 1.0670952796936035, "learning_rate": 0.0001, "loss": 0.0108, "step": 169740 }, { "epoch": 1116.7763157894738, "grad_norm": 1.2624378204345703, "learning_rate": 0.0001, "loss": 0.0089, "step": 169750 }, { "epoch": 1116.842105263158, "grad_norm": 1.0680029392242432, "learning_rate": 0.0001, "loss": 0.0093, "step": 169760 }, { "epoch": 1116.907894736842, "grad_norm": 0.9182707071304321, "learning_rate": 0.0001, "loss": 0.0089, "step": 169770 }, { "epoch": 1116.9736842105262, "grad_norm": 1.0356745719909668, "learning_rate": 0.0001, "loss": 0.0081, "step": 169780 }, { "epoch": 1117.0394736842106, "grad_norm": 1.2450051307678223, "learning_rate": 0.0001, "loss": 0.0095, "step": 169790 }, { "epoch": 1117.1052631578948, "grad_norm": 1.1483206748962402, "learning_rate": 0.0001, "loss": 0.0099, "step": 169800 }, { "epoch": 1117.171052631579, "grad_norm": 1.0393849611282349, "learning_rate": 0.0001, "loss": 0.0097, "step": 169810 }, { "epoch": 1117.2368421052631, "grad_norm": 1.1354923248291016, "learning_rate": 0.0001, "loss": 0.011, "step": 169820 }, { "epoch": 1117.3026315789473, "grad_norm": 1.2320741415023804, "learning_rate": 0.0001, "loss": 0.0075, "step": 169830 }, { "epoch": 1117.3684210526317, "grad_norm": 1.2553433179855347, "learning_rate": 0.0001, "loss": 0.0091, "step": 169840 }, { "epoch": 1117.4342105263158, "grad_norm": 1.142922043800354, "learning_rate": 0.0001, "loss": 0.0071, "step": 169850 }, { "epoch": 1117.5, "grad_norm": 1.2326382398605347, "learning_rate": 0.0001, "loss": 0.008, "step": 169860 }, { "epoch": 1117.5657894736842, "grad_norm": 1.5461610555648804, "learning_rate": 0.0001, "loss": 0.0098, "step": 169870 }, { "epoch": 1117.6315789473683, "grad_norm": 0.9742788076400757, "learning_rate": 0.0001, "loss": 0.0099, "step": 169880 }, { "epoch": 1117.6973684210527, "grad_norm": 0.898607075214386, "learning_rate": 0.0001, "loss": 0.008, "step": 169890 }, { "epoch": 1117.7631578947369, "grad_norm": 0.6093039512634277, "learning_rate": 0.0001, "loss": 0.0088, "step": 169900 }, { "epoch": 1117.828947368421, "grad_norm": 1.1345704793930054, "learning_rate": 0.0001, "loss": 0.0073, "step": 169910 }, { "epoch": 1117.8947368421052, "grad_norm": 0.851069450378418, "learning_rate": 0.0001, "loss": 0.0084, "step": 169920 }, { "epoch": 1117.9605263157894, "grad_norm": 0.9209760427474976, "learning_rate": 0.0001, "loss": 0.0097, "step": 169930 }, { "epoch": 1118.0263157894738, "grad_norm": 0.86602783203125, "learning_rate": 0.0001, "loss": 0.0087, "step": 169940 }, { "epoch": 1118.092105263158, "grad_norm": 0.698943555355072, "learning_rate": 0.0001, "loss": 0.0101, "step": 169950 }, { "epoch": 1118.157894736842, "grad_norm": 0.8509097695350647, "learning_rate": 0.0001, "loss": 0.0094, "step": 169960 }, { "epoch": 1118.2236842105262, "grad_norm": 0.893696665763855, "learning_rate": 0.0001, "loss": 0.0102, "step": 169970 }, { "epoch": 1118.2894736842106, "grad_norm": 0.7406723499298096, "learning_rate": 0.0001, "loss": 0.0085, "step": 169980 }, { "epoch": 1118.3552631578948, "grad_norm": 1.147310495376587, "learning_rate": 0.0001, "loss": 0.0075, "step": 169990 }, { "epoch": 1118.421052631579, "grad_norm": 0.8037264347076416, "learning_rate": 0.0001, "loss": 0.0109, "step": 170000 }, { "epoch": 1118.4868421052631, "grad_norm": 0.5971798300743103, "learning_rate": 0.0001, "loss": 0.009, "step": 170010 }, { "epoch": 1118.5526315789473, "grad_norm": 0.7508800625801086, "learning_rate": 0.0001, "loss": 0.009, "step": 170020 }, { "epoch": 1118.6184210526317, "grad_norm": 1.1436516046524048, "learning_rate": 0.0001, "loss": 0.0091, "step": 170030 }, { "epoch": 1118.6842105263158, "grad_norm": 1.146233320236206, "learning_rate": 0.0001, "loss": 0.0084, "step": 170040 }, { "epoch": 1118.75, "grad_norm": 1.1808003187179565, "learning_rate": 0.0001, "loss": 0.0083, "step": 170050 }, { "epoch": 1118.8157894736842, "grad_norm": 0.8754172325134277, "learning_rate": 0.0001, "loss": 0.0093, "step": 170060 }, { "epoch": 1118.8815789473683, "grad_norm": 1.1422688961029053, "learning_rate": 0.0001, "loss": 0.0087, "step": 170070 }, { "epoch": 1118.9473684210527, "grad_norm": 0.8434144854545593, "learning_rate": 0.0001, "loss": 0.0099, "step": 170080 }, { "epoch": 1119.0131578947369, "grad_norm": 1.1790432929992676, "learning_rate": 0.0001, "loss": 0.0122, "step": 170090 }, { "epoch": 1119.078947368421, "grad_norm": 0.9969106316566467, "learning_rate": 0.0001, "loss": 0.0084, "step": 170100 }, { "epoch": 1119.1447368421052, "grad_norm": 0.6672012209892273, "learning_rate": 0.0001, "loss": 0.01, "step": 170110 }, { "epoch": 1119.2105263157894, "grad_norm": 1.309625267982483, "learning_rate": 0.0001, "loss": 0.0084, "step": 170120 }, { "epoch": 1119.2763157894738, "grad_norm": 1.0300084352493286, "learning_rate": 0.0001, "loss": 0.0094, "step": 170130 }, { "epoch": 1119.342105263158, "grad_norm": 0.9487712979316711, "learning_rate": 0.0001, "loss": 0.0088, "step": 170140 }, { "epoch": 1119.407894736842, "grad_norm": 1.0787352323532104, "learning_rate": 0.0001, "loss": 0.01, "step": 170150 }, { "epoch": 1119.4736842105262, "grad_norm": 1.1711540222167969, "learning_rate": 0.0001, "loss": 0.0092, "step": 170160 }, { "epoch": 1119.5394736842106, "grad_norm": 0.9721617102622986, "learning_rate": 0.0001, "loss": 0.0086, "step": 170170 }, { "epoch": 1119.6052631578948, "grad_norm": 1.088510513305664, "learning_rate": 0.0001, "loss": 0.0093, "step": 170180 }, { "epoch": 1119.671052631579, "grad_norm": 1.2319920063018799, "learning_rate": 0.0001, "loss": 0.0083, "step": 170190 }, { "epoch": 1119.7368421052631, "grad_norm": 0.7855985760688782, "learning_rate": 0.0001, "loss": 0.009, "step": 170200 }, { "epoch": 1119.8026315789473, "grad_norm": 0.8179022669792175, "learning_rate": 0.0001, "loss": 0.0115, "step": 170210 }, { "epoch": 1119.8684210526317, "grad_norm": 1.2478657960891724, "learning_rate": 0.0001, "loss": 0.01, "step": 170220 }, { "epoch": 1119.9342105263158, "grad_norm": 0.9301165342330933, "learning_rate": 0.0001, "loss": 0.0086, "step": 170230 }, { "epoch": 1120.0, "grad_norm": 1.096358299255371, "learning_rate": 0.0001, "loss": 0.0097, "step": 170240 }, { "epoch": 1120.0657894736842, "grad_norm": 1.134454369544983, "learning_rate": 0.0001, "loss": 0.0106, "step": 170250 }, { "epoch": 1120.1315789473683, "grad_norm": 1.2220538854599, "learning_rate": 0.0001, "loss": 0.0094, "step": 170260 }, { "epoch": 1120.1973684210527, "grad_norm": 1.1378227472305298, "learning_rate": 0.0001, "loss": 0.0098, "step": 170270 }, { "epoch": 1120.2631578947369, "grad_norm": 0.8785101771354675, "learning_rate": 0.0001, "loss": 0.0087, "step": 170280 }, { "epoch": 1120.328947368421, "grad_norm": 0.8152796626091003, "learning_rate": 0.0001, "loss": 0.01, "step": 170290 }, { "epoch": 1120.3947368421052, "grad_norm": 0.6250335574150085, "learning_rate": 0.0001, "loss": 0.0087, "step": 170300 }, { "epoch": 1120.4605263157894, "grad_norm": 1.0436937808990479, "learning_rate": 0.0001, "loss": 0.0089, "step": 170310 }, { "epoch": 1120.5263157894738, "grad_norm": 0.9515871405601501, "learning_rate": 0.0001, "loss": 0.0092, "step": 170320 }, { "epoch": 1120.592105263158, "grad_norm": 1.3222743272781372, "learning_rate": 0.0001, "loss": 0.0102, "step": 170330 }, { "epoch": 1120.657894736842, "grad_norm": 0.754511833190918, "learning_rate": 0.0001, "loss": 0.0103, "step": 170340 }, { "epoch": 1120.7236842105262, "grad_norm": 1.0005298852920532, "learning_rate": 0.0001, "loss": 0.0086, "step": 170350 }, { "epoch": 1120.7894736842106, "grad_norm": 1.1745997667312622, "learning_rate": 0.0001, "loss": 0.0089, "step": 170360 }, { "epoch": 1120.8552631578948, "grad_norm": 1.2459282875061035, "learning_rate": 0.0001, "loss": 0.0099, "step": 170370 }, { "epoch": 1120.921052631579, "grad_norm": 1.0530474185943604, "learning_rate": 0.0001, "loss": 0.0071, "step": 170380 }, { "epoch": 1120.9868421052631, "grad_norm": 1.2831578254699707, "learning_rate": 0.0001, "loss": 0.0085, "step": 170390 }, { "epoch": 1121.0526315789473, "grad_norm": 1.1489259004592896, "learning_rate": 0.0001, "loss": 0.0102, "step": 170400 }, { "epoch": 1121.1184210526317, "grad_norm": 0.929141104221344, "learning_rate": 0.0001, "loss": 0.0079, "step": 170410 }, { "epoch": 1121.1842105263158, "grad_norm": 1.1473424434661865, "learning_rate": 0.0001, "loss": 0.0081, "step": 170420 }, { "epoch": 1121.25, "grad_norm": 1.2042150497436523, "learning_rate": 0.0001, "loss": 0.009, "step": 170430 }, { "epoch": 1121.3157894736842, "grad_norm": 0.9099306464195251, "learning_rate": 0.0001, "loss": 0.0087, "step": 170440 }, { "epoch": 1121.3815789473683, "grad_norm": 1.141692876815796, "learning_rate": 0.0001, "loss": 0.0101, "step": 170450 }, { "epoch": 1121.4473684210527, "grad_norm": 1.392948865890503, "learning_rate": 0.0001, "loss": 0.0098, "step": 170460 }, { "epoch": 1121.5131578947369, "grad_norm": 0.8575266599655151, "learning_rate": 0.0001, "loss": 0.0075, "step": 170470 }, { "epoch": 1121.578947368421, "grad_norm": 0.9454750418663025, "learning_rate": 0.0001, "loss": 0.0103, "step": 170480 }, { "epoch": 1121.6447368421052, "grad_norm": 0.644149899482727, "learning_rate": 0.0001, "loss": 0.009, "step": 170490 }, { "epoch": 1121.7105263157894, "grad_norm": 1.050371527671814, "learning_rate": 0.0001, "loss": 0.0079, "step": 170500 }, { "epoch": 1121.7763157894738, "grad_norm": 1.0545912981033325, "learning_rate": 0.0001, "loss": 0.0104, "step": 170510 }, { "epoch": 1121.842105263158, "grad_norm": 0.8763865828514099, "learning_rate": 0.0001, "loss": 0.0093, "step": 170520 }, { "epoch": 1121.907894736842, "grad_norm": 1.6924924850463867, "learning_rate": 0.0001, "loss": 0.0087, "step": 170530 }, { "epoch": 1121.9736842105262, "grad_norm": 1.3894912004470825, "learning_rate": 0.0001, "loss": 0.0124, "step": 170540 }, { "epoch": 1122.0394736842106, "grad_norm": 1.1801551580429077, "learning_rate": 0.0001, "loss": 0.01, "step": 170550 }, { "epoch": 1122.1052631578948, "grad_norm": 1.1211940050125122, "learning_rate": 0.0001, "loss": 0.009, "step": 170560 }, { "epoch": 1122.171052631579, "grad_norm": 1.0931165218353271, "learning_rate": 0.0001, "loss": 0.0098, "step": 170570 }, { "epoch": 1122.2368421052631, "grad_norm": 1.0842170715332031, "learning_rate": 0.0001, "loss": 0.009, "step": 170580 }, { "epoch": 1122.3026315789473, "grad_norm": 1.0453462600708008, "learning_rate": 0.0001, "loss": 0.0097, "step": 170590 }, { "epoch": 1122.3684210526317, "grad_norm": 0.7612990736961365, "learning_rate": 0.0001, "loss": 0.0093, "step": 170600 }, { "epoch": 1122.4342105263158, "grad_norm": 1.1230723857879639, "learning_rate": 0.0001, "loss": 0.0102, "step": 170610 }, { "epoch": 1122.5, "grad_norm": 0.7165056467056274, "learning_rate": 0.0001, "loss": 0.0103, "step": 170620 }, { "epoch": 1122.5657894736842, "grad_norm": 1.0598911046981812, "learning_rate": 0.0001, "loss": 0.01, "step": 170630 }, { "epoch": 1122.6315789473683, "grad_norm": 1.3187886476516724, "learning_rate": 0.0001, "loss": 0.0088, "step": 170640 }, { "epoch": 1122.6973684210527, "grad_norm": 1.140576720237732, "learning_rate": 0.0001, "loss": 0.0105, "step": 170650 }, { "epoch": 1122.7631578947369, "grad_norm": 0.73148512840271, "learning_rate": 0.0001, "loss": 0.0094, "step": 170660 }, { "epoch": 1122.828947368421, "grad_norm": 1.5248452425003052, "learning_rate": 0.0001, "loss": 0.0096, "step": 170670 }, { "epoch": 1122.8947368421052, "grad_norm": 0.9757792949676514, "learning_rate": 0.0001, "loss": 0.0119, "step": 170680 }, { "epoch": 1122.9605263157894, "grad_norm": 0.7404382228851318, "learning_rate": 0.0001, "loss": 0.0116, "step": 170690 }, { "epoch": 1123.0263157894738, "grad_norm": 1.0916646718978882, "learning_rate": 0.0001, "loss": 0.0095, "step": 170700 }, { "epoch": 1123.092105263158, "grad_norm": 1.196270227432251, "learning_rate": 0.0001, "loss": 0.0098, "step": 170710 }, { "epoch": 1123.157894736842, "grad_norm": 1.4013103246688843, "learning_rate": 0.0001, "loss": 0.011, "step": 170720 }, { "epoch": 1123.2236842105262, "grad_norm": 1.210837960243225, "learning_rate": 0.0001, "loss": 0.0107, "step": 170730 }, { "epoch": 1123.2894736842106, "grad_norm": 1.1967648267745972, "learning_rate": 0.0001, "loss": 0.0087, "step": 170740 }, { "epoch": 1123.3552631578948, "grad_norm": 1.1867121458053589, "learning_rate": 0.0001, "loss": 0.0113, "step": 170750 }, { "epoch": 1123.421052631579, "grad_norm": 0.9490767121315002, "learning_rate": 0.0001, "loss": 0.0088, "step": 170760 }, { "epoch": 1123.4868421052631, "grad_norm": 0.8652997016906738, "learning_rate": 0.0001, "loss": 0.0076, "step": 170770 }, { "epoch": 1123.5526315789473, "grad_norm": 1.082627534866333, "learning_rate": 0.0001, "loss": 0.0105, "step": 170780 }, { "epoch": 1123.6184210526317, "grad_norm": 1.2542318105697632, "learning_rate": 0.0001, "loss": 0.0086, "step": 170790 }, { "epoch": 1123.6842105263158, "grad_norm": 1.3510973453521729, "learning_rate": 0.0001, "loss": 0.0108, "step": 170800 }, { "epoch": 1123.75, "grad_norm": 1.249051570892334, "learning_rate": 0.0001, "loss": 0.0093, "step": 170810 }, { "epoch": 1123.8157894736842, "grad_norm": 1.2348275184631348, "learning_rate": 0.0001, "loss": 0.008, "step": 170820 }, { "epoch": 1123.8815789473683, "grad_norm": 1.2740799188613892, "learning_rate": 0.0001, "loss": 0.0094, "step": 170830 }, { "epoch": 1123.9473684210527, "grad_norm": 1.133602499961853, "learning_rate": 0.0001, "loss": 0.0082, "step": 170840 }, { "epoch": 1124.0131578947369, "grad_norm": 0.8562770485877991, "learning_rate": 0.0001, "loss": 0.0083, "step": 170850 }, { "epoch": 1124.078947368421, "grad_norm": 1.2140693664550781, "learning_rate": 0.0001, "loss": 0.0096, "step": 170860 }, { "epoch": 1124.1447368421052, "grad_norm": 1.1629786491394043, "learning_rate": 0.0001, "loss": 0.01, "step": 170870 }, { "epoch": 1124.2105263157894, "grad_norm": 0.9350384473800659, "learning_rate": 0.0001, "loss": 0.0095, "step": 170880 }, { "epoch": 1124.2763157894738, "grad_norm": 0.7206969261169434, "learning_rate": 0.0001, "loss": 0.0113, "step": 170890 }, { "epoch": 1124.342105263158, "grad_norm": 1.0412704944610596, "learning_rate": 0.0001, "loss": 0.0089, "step": 170900 }, { "epoch": 1124.407894736842, "grad_norm": 1.0339332818984985, "learning_rate": 0.0001, "loss": 0.009, "step": 170910 }, { "epoch": 1124.4736842105262, "grad_norm": 0.9266976714134216, "learning_rate": 0.0001, "loss": 0.0076, "step": 170920 }, { "epoch": 1124.5394736842106, "grad_norm": 1.0310457944869995, "learning_rate": 0.0001, "loss": 0.0111, "step": 170930 }, { "epoch": 1124.6052631578948, "grad_norm": 1.2762702703475952, "learning_rate": 0.0001, "loss": 0.0102, "step": 170940 }, { "epoch": 1124.671052631579, "grad_norm": 1.1744694709777832, "learning_rate": 0.0001, "loss": 0.0085, "step": 170950 }, { "epoch": 1124.7368421052631, "grad_norm": 1.1720898151397705, "learning_rate": 0.0001, "loss": 0.0083, "step": 170960 }, { "epoch": 1124.8026315789473, "grad_norm": 0.9738285541534424, "learning_rate": 0.0001, "loss": 0.0095, "step": 170970 }, { "epoch": 1124.8684210526317, "grad_norm": 0.9768696427345276, "learning_rate": 0.0001, "loss": 0.0089, "step": 170980 }, { "epoch": 1124.9342105263158, "grad_norm": 1.0169459581375122, "learning_rate": 0.0001, "loss": 0.01, "step": 170990 }, { "epoch": 1125.0, "grad_norm": 0.9950452446937561, "learning_rate": 0.0001, "loss": 0.0094, "step": 171000 }, { "epoch": 1125.0657894736842, "grad_norm": 0.8835775256156921, "learning_rate": 0.0001, "loss": 0.0083, "step": 171010 }, { "epoch": 1125.1315789473683, "grad_norm": 0.8104636073112488, "learning_rate": 0.0001, "loss": 0.0093, "step": 171020 }, { "epoch": 1125.1973684210527, "grad_norm": 0.9511426091194153, "learning_rate": 0.0001, "loss": 0.0102, "step": 171030 }, { "epoch": 1125.2631578947369, "grad_norm": 0.873656153678894, "learning_rate": 0.0001, "loss": 0.0105, "step": 171040 }, { "epoch": 1125.328947368421, "grad_norm": 1.1368869543075562, "learning_rate": 0.0001, "loss": 0.0111, "step": 171050 }, { "epoch": 1125.3947368421052, "grad_norm": 0.568688154220581, "learning_rate": 0.0001, "loss": 0.009, "step": 171060 }, { "epoch": 1125.4605263157894, "grad_norm": 0.5979208946228027, "learning_rate": 0.0001, "loss": 0.0096, "step": 171070 }, { "epoch": 1125.5263157894738, "grad_norm": 0.9214772582054138, "learning_rate": 0.0001, "loss": 0.0099, "step": 171080 }, { "epoch": 1125.592105263158, "grad_norm": 0.7531283497810364, "learning_rate": 0.0001, "loss": 0.0082, "step": 171090 }, { "epoch": 1125.657894736842, "grad_norm": 1.2580556869506836, "learning_rate": 0.0001, "loss": 0.0095, "step": 171100 }, { "epoch": 1125.7236842105262, "grad_norm": 0.930986225605011, "learning_rate": 0.0001, "loss": 0.0094, "step": 171110 }, { "epoch": 1125.7894736842106, "grad_norm": 1.245627999305725, "learning_rate": 0.0001, "loss": 0.0091, "step": 171120 }, { "epoch": 1125.8552631578948, "grad_norm": 0.7773181796073914, "learning_rate": 0.0001, "loss": 0.0082, "step": 171130 }, { "epoch": 1125.921052631579, "grad_norm": 1.173557996749878, "learning_rate": 0.0001, "loss": 0.0082, "step": 171140 }, { "epoch": 1125.9868421052631, "grad_norm": 0.8408874869346619, "learning_rate": 0.0001, "loss": 0.0109, "step": 171150 }, { "epoch": 1126.0526315789473, "grad_norm": 0.6218984127044678, "learning_rate": 0.0001, "loss": 0.0089, "step": 171160 }, { "epoch": 1126.1184210526317, "grad_norm": 0.9372327923774719, "learning_rate": 0.0001, "loss": 0.0081, "step": 171170 }, { "epoch": 1126.1842105263158, "grad_norm": 0.9183964729309082, "learning_rate": 0.0001, "loss": 0.0084, "step": 171180 }, { "epoch": 1126.25, "grad_norm": 1.0843031406402588, "learning_rate": 0.0001, "loss": 0.0099, "step": 171190 }, { "epoch": 1126.3157894736842, "grad_norm": 1.221227765083313, "learning_rate": 0.0001, "loss": 0.0098, "step": 171200 }, { "epoch": 1126.3815789473683, "grad_norm": 1.0891140699386597, "learning_rate": 0.0001, "loss": 0.009, "step": 171210 }, { "epoch": 1126.4473684210527, "grad_norm": 1.0184756517410278, "learning_rate": 0.0001, "loss": 0.0088, "step": 171220 }, { "epoch": 1126.5131578947369, "grad_norm": 0.9050408601760864, "learning_rate": 0.0001, "loss": 0.0116, "step": 171230 }, { "epoch": 1126.578947368421, "grad_norm": 0.754024088382721, "learning_rate": 0.0001, "loss": 0.0111, "step": 171240 }, { "epoch": 1126.6447368421052, "grad_norm": 1.1629414558410645, "learning_rate": 0.0001, "loss": 0.0079, "step": 171250 }, { "epoch": 1126.7105263157894, "grad_norm": 1.1155112981796265, "learning_rate": 0.0001, "loss": 0.0093, "step": 171260 }, { "epoch": 1126.7763157894738, "grad_norm": 1.1457107067108154, "learning_rate": 0.0001, "loss": 0.0088, "step": 171270 }, { "epoch": 1126.842105263158, "grad_norm": 1.5924310684204102, "learning_rate": 0.0001, "loss": 0.0076, "step": 171280 }, { "epoch": 1126.907894736842, "grad_norm": 1.267576813697815, "learning_rate": 0.0001, "loss": 0.0082, "step": 171290 }, { "epoch": 1126.9736842105262, "grad_norm": 1.2480442523956299, "learning_rate": 0.0001, "loss": 0.0091, "step": 171300 }, { "epoch": 1127.0394736842106, "grad_norm": 0.923325777053833, "learning_rate": 0.0001, "loss": 0.0091, "step": 171310 }, { "epoch": 1127.1052631578948, "grad_norm": 1.2825509309768677, "learning_rate": 0.0001, "loss": 0.0089, "step": 171320 }, { "epoch": 1127.171052631579, "grad_norm": 1.1516602039337158, "learning_rate": 0.0001, "loss": 0.0083, "step": 171330 }, { "epoch": 1127.2368421052631, "grad_norm": 0.9105165004730225, "learning_rate": 0.0001, "loss": 0.0082, "step": 171340 }, { "epoch": 1127.3026315789473, "grad_norm": 1.116930603981018, "learning_rate": 0.0001, "loss": 0.011, "step": 171350 }, { "epoch": 1127.3684210526317, "grad_norm": 1.105677604675293, "learning_rate": 0.0001, "loss": 0.0088, "step": 171360 }, { "epoch": 1127.4342105263158, "grad_norm": 1.1379443407058716, "learning_rate": 0.0001, "loss": 0.0096, "step": 171370 }, { "epoch": 1127.5, "grad_norm": 0.8313212394714355, "learning_rate": 0.0001, "loss": 0.0095, "step": 171380 }, { "epoch": 1127.5657894736842, "grad_norm": 0.9692210555076599, "learning_rate": 0.0001, "loss": 0.0079, "step": 171390 }, { "epoch": 1127.6315789473683, "grad_norm": 0.9176498651504517, "learning_rate": 0.0001, "loss": 0.0098, "step": 171400 }, { "epoch": 1127.6973684210527, "grad_norm": 1.2443631887435913, "learning_rate": 0.0001, "loss": 0.0105, "step": 171410 }, { "epoch": 1127.7631578947369, "grad_norm": 1.4867162704467773, "learning_rate": 0.0001, "loss": 0.0083, "step": 171420 }, { "epoch": 1127.828947368421, "grad_norm": 1.2587157487869263, "learning_rate": 0.0001, "loss": 0.0088, "step": 171430 }, { "epoch": 1127.8947368421052, "grad_norm": 1.1008973121643066, "learning_rate": 0.0001, "loss": 0.0076, "step": 171440 }, { "epoch": 1127.9605263157894, "grad_norm": 1.035931944847107, "learning_rate": 0.0001, "loss": 0.009, "step": 171450 }, { "epoch": 1128.0263157894738, "grad_norm": 0.9197618961334229, "learning_rate": 0.0001, "loss": 0.0081, "step": 171460 }, { "epoch": 1128.092105263158, "grad_norm": 0.9170363545417786, "learning_rate": 0.0001, "loss": 0.0091, "step": 171470 }, { "epoch": 1128.157894736842, "grad_norm": 1.0054898262023926, "learning_rate": 0.0001, "loss": 0.0098, "step": 171480 }, { "epoch": 1128.2236842105262, "grad_norm": 0.6774921417236328, "learning_rate": 0.0001, "loss": 0.0087, "step": 171490 }, { "epoch": 1128.2894736842106, "grad_norm": 1.0647435188293457, "learning_rate": 0.0001, "loss": 0.0087, "step": 171500 }, { "epoch": 1128.3552631578948, "grad_norm": 0.9801605343818665, "learning_rate": 0.0001, "loss": 0.0087, "step": 171510 }, { "epoch": 1128.421052631579, "grad_norm": 0.9069273471832275, "learning_rate": 0.0001, "loss": 0.0099, "step": 171520 }, { "epoch": 1128.4868421052631, "grad_norm": 0.8292950987815857, "learning_rate": 0.0001, "loss": 0.0102, "step": 171530 }, { "epoch": 1128.5526315789473, "grad_norm": 1.0863277912139893, "learning_rate": 0.0001, "loss": 0.0092, "step": 171540 }, { "epoch": 1128.6184210526317, "grad_norm": 1.3724346160888672, "learning_rate": 0.0001, "loss": 0.0092, "step": 171550 }, { "epoch": 1128.6842105263158, "grad_norm": 0.905957043170929, "learning_rate": 0.0001, "loss": 0.0107, "step": 171560 }, { "epoch": 1128.75, "grad_norm": 0.5370518565177917, "learning_rate": 0.0001, "loss": 0.0081, "step": 171570 }, { "epoch": 1128.8157894736842, "grad_norm": 1.248266339302063, "learning_rate": 0.0001, "loss": 0.0069, "step": 171580 }, { "epoch": 1128.8815789473683, "grad_norm": 1.00150728225708, "learning_rate": 0.0001, "loss": 0.0105, "step": 171590 }, { "epoch": 1128.9473684210527, "grad_norm": 1.0399243831634521, "learning_rate": 0.0001, "loss": 0.0097, "step": 171600 }, { "epoch": 1129.0131578947369, "grad_norm": 1.1588037014007568, "learning_rate": 0.0001, "loss": 0.0098, "step": 171610 }, { "epoch": 1129.078947368421, "grad_norm": 1.0992803573608398, "learning_rate": 0.0001, "loss": 0.0085, "step": 171620 }, { "epoch": 1129.1447368421052, "grad_norm": 1.2114496231079102, "learning_rate": 0.0001, "loss": 0.0088, "step": 171630 }, { "epoch": 1129.2105263157894, "grad_norm": 1.3456045389175415, "learning_rate": 0.0001, "loss": 0.0111, "step": 171640 }, { "epoch": 1129.2763157894738, "grad_norm": 1.063812017440796, "learning_rate": 0.0001, "loss": 0.0118, "step": 171650 }, { "epoch": 1129.342105263158, "grad_norm": 0.7482918500900269, "learning_rate": 0.0001, "loss": 0.0096, "step": 171660 }, { "epoch": 1129.407894736842, "grad_norm": 0.9320112466812134, "learning_rate": 0.0001, "loss": 0.0098, "step": 171670 }, { "epoch": 1129.4736842105262, "grad_norm": 1.2601550817489624, "learning_rate": 0.0001, "loss": 0.0097, "step": 171680 }, { "epoch": 1129.5394736842106, "grad_norm": 0.9702651500701904, "learning_rate": 0.0001, "loss": 0.0094, "step": 171690 }, { "epoch": 1129.6052631578948, "grad_norm": 0.6806167960166931, "learning_rate": 0.0001, "loss": 0.0093, "step": 171700 }, { "epoch": 1129.671052631579, "grad_norm": 0.8016168475151062, "learning_rate": 0.0001, "loss": 0.0076, "step": 171710 }, { "epoch": 1129.7368421052631, "grad_norm": 1.0033786296844482, "learning_rate": 0.0001, "loss": 0.0079, "step": 171720 }, { "epoch": 1129.8026315789473, "grad_norm": 1.152343988418579, "learning_rate": 0.0001, "loss": 0.0093, "step": 171730 }, { "epoch": 1129.8684210526317, "grad_norm": 1.0898712873458862, "learning_rate": 0.0001, "loss": 0.0077, "step": 171740 }, { "epoch": 1129.9342105263158, "grad_norm": 1.0133626461029053, "learning_rate": 0.0001, "loss": 0.0097, "step": 171750 }, { "epoch": 1130.0, "grad_norm": 1.0823217630386353, "learning_rate": 0.0001, "loss": 0.0077, "step": 171760 }, { "epoch": 1130.0657894736842, "grad_norm": 0.853507936000824, "learning_rate": 0.0001, "loss": 0.0081, "step": 171770 }, { "epoch": 1130.1315789473683, "grad_norm": 1.2531830072402954, "learning_rate": 0.0001, "loss": 0.0118, "step": 171780 }, { "epoch": 1130.1973684210527, "grad_norm": 1.178091287612915, "learning_rate": 0.0001, "loss": 0.0114, "step": 171790 }, { "epoch": 1130.2631578947369, "grad_norm": 1.0549789667129517, "learning_rate": 0.0001, "loss": 0.0091, "step": 171800 }, { "epoch": 1130.328947368421, "grad_norm": 1.3365063667297363, "learning_rate": 0.0001, "loss": 0.0103, "step": 171810 }, { "epoch": 1130.3947368421052, "grad_norm": 1.022502064704895, "learning_rate": 0.0001, "loss": 0.0086, "step": 171820 }, { "epoch": 1130.4605263157894, "grad_norm": 0.9836164116859436, "learning_rate": 0.0001, "loss": 0.0082, "step": 171830 }, { "epoch": 1130.5263157894738, "grad_norm": 1.1745048761367798, "learning_rate": 0.0001, "loss": 0.0076, "step": 171840 }, { "epoch": 1130.592105263158, "grad_norm": 1.0340749025344849, "learning_rate": 0.0001, "loss": 0.0102, "step": 171850 }, { "epoch": 1130.657894736842, "grad_norm": 0.8751736283302307, "learning_rate": 0.0001, "loss": 0.0093, "step": 171860 }, { "epoch": 1130.7236842105262, "grad_norm": 1.1623742580413818, "learning_rate": 0.0001, "loss": 0.009, "step": 171870 }, { "epoch": 1130.7894736842106, "grad_norm": 0.9135785102844238, "learning_rate": 0.0001, "loss": 0.0071, "step": 171880 }, { "epoch": 1130.8552631578948, "grad_norm": 0.9940324425697327, "learning_rate": 0.0001, "loss": 0.009, "step": 171890 }, { "epoch": 1130.921052631579, "grad_norm": 0.944202721118927, "learning_rate": 0.0001, "loss": 0.0092, "step": 171900 }, { "epoch": 1130.9868421052631, "grad_norm": 1.2017103433609009, "learning_rate": 0.0001, "loss": 0.0098, "step": 171910 }, { "epoch": 1131.0526315789473, "grad_norm": 0.7654322385787964, "learning_rate": 0.0001, "loss": 0.0087, "step": 171920 }, { "epoch": 1131.1184210526317, "grad_norm": 0.7834600210189819, "learning_rate": 0.0001, "loss": 0.0087, "step": 171930 }, { "epoch": 1131.1842105263158, "grad_norm": 0.751923143863678, "learning_rate": 0.0001, "loss": 0.0075, "step": 171940 }, { "epoch": 1131.25, "grad_norm": 0.9888975024223328, "learning_rate": 0.0001, "loss": 0.0095, "step": 171950 }, { "epoch": 1131.3157894736842, "grad_norm": 1.2195971012115479, "learning_rate": 0.0001, "loss": 0.0096, "step": 171960 }, { "epoch": 1131.3815789473683, "grad_norm": 1.110573410987854, "learning_rate": 0.0001, "loss": 0.0088, "step": 171970 }, { "epoch": 1131.4473684210527, "grad_norm": 1.1125825643539429, "learning_rate": 0.0001, "loss": 0.0087, "step": 171980 }, { "epoch": 1131.5131578947369, "grad_norm": 0.6448672413825989, "learning_rate": 0.0001, "loss": 0.0104, "step": 171990 }, { "epoch": 1131.578947368421, "grad_norm": 1.2890968322753906, "learning_rate": 0.0001, "loss": 0.0079, "step": 172000 }, { "epoch": 1131.6447368421052, "grad_norm": 1.0350221395492554, "learning_rate": 0.0001, "loss": 0.0092, "step": 172010 }, { "epoch": 1131.7105263157894, "grad_norm": 0.9488757252693176, "learning_rate": 0.0001, "loss": 0.0086, "step": 172020 }, { "epoch": 1131.7763157894738, "grad_norm": 0.7833364009857178, "learning_rate": 0.0001, "loss": 0.0099, "step": 172030 }, { "epoch": 1131.842105263158, "grad_norm": 1.1103259325027466, "learning_rate": 0.0001, "loss": 0.0111, "step": 172040 }, { "epoch": 1131.907894736842, "grad_norm": 1.4510931968688965, "learning_rate": 0.0001, "loss": 0.0092, "step": 172050 }, { "epoch": 1131.9736842105262, "grad_norm": 1.293509840965271, "learning_rate": 0.0001, "loss": 0.0089, "step": 172060 }, { "epoch": 1132.0394736842106, "grad_norm": 1.0810400247573853, "learning_rate": 0.0001, "loss": 0.0113, "step": 172070 }, { "epoch": 1132.1052631578948, "grad_norm": 1.3170888423919678, "learning_rate": 0.0001, "loss": 0.0101, "step": 172080 }, { "epoch": 1132.171052631579, "grad_norm": 0.8184434771537781, "learning_rate": 0.0001, "loss": 0.0094, "step": 172090 }, { "epoch": 1132.2368421052631, "grad_norm": 1.073915958404541, "learning_rate": 0.0001, "loss": 0.0076, "step": 172100 }, { "epoch": 1132.3026315789473, "grad_norm": 1.019290566444397, "learning_rate": 0.0001, "loss": 0.009, "step": 172110 }, { "epoch": 1132.3684210526317, "grad_norm": 1.3928847312927246, "learning_rate": 0.0001, "loss": 0.009, "step": 172120 }, { "epoch": 1132.4342105263158, "grad_norm": 1.158503770828247, "learning_rate": 0.0001, "loss": 0.0087, "step": 172130 }, { "epoch": 1132.5, "grad_norm": 1.2527748346328735, "learning_rate": 0.0001, "loss": 0.0095, "step": 172140 }, { "epoch": 1132.5657894736842, "grad_norm": 1.0773019790649414, "learning_rate": 0.0001, "loss": 0.008, "step": 172150 }, { "epoch": 1132.6315789473683, "grad_norm": 1.3126847743988037, "learning_rate": 0.0001, "loss": 0.0084, "step": 172160 }, { "epoch": 1132.6973684210527, "grad_norm": 0.9994803071022034, "learning_rate": 0.0001, "loss": 0.009, "step": 172170 }, { "epoch": 1132.7631578947369, "grad_norm": 1.4703785181045532, "learning_rate": 0.0001, "loss": 0.0091, "step": 172180 }, { "epoch": 1132.828947368421, "grad_norm": 0.6720287799835205, "learning_rate": 0.0001, "loss": 0.0113, "step": 172190 }, { "epoch": 1132.8947368421052, "grad_norm": 0.6046428084373474, "learning_rate": 0.0001, "loss": 0.0092, "step": 172200 }, { "epoch": 1132.9605263157894, "grad_norm": 1.031443476676941, "learning_rate": 0.0001, "loss": 0.0083, "step": 172210 }, { "epoch": 1133.0263157894738, "grad_norm": 1.2193036079406738, "learning_rate": 0.0001, "loss": 0.0074, "step": 172220 }, { "epoch": 1133.092105263158, "grad_norm": 1.1105955839157104, "learning_rate": 0.0001, "loss": 0.0071, "step": 172230 }, { "epoch": 1133.157894736842, "grad_norm": 1.0140831470489502, "learning_rate": 0.0001, "loss": 0.0098, "step": 172240 }, { "epoch": 1133.2236842105262, "grad_norm": 0.8403400778770447, "learning_rate": 0.0001, "loss": 0.0106, "step": 172250 }, { "epoch": 1133.2894736842106, "grad_norm": 1.143967628479004, "learning_rate": 0.0001, "loss": 0.0093, "step": 172260 }, { "epoch": 1133.3552631578948, "grad_norm": 0.6676881909370422, "learning_rate": 0.0001, "loss": 0.0103, "step": 172270 }, { "epoch": 1133.421052631579, "grad_norm": 0.8999795913696289, "learning_rate": 0.0001, "loss": 0.0095, "step": 172280 }, { "epoch": 1133.4868421052631, "grad_norm": 0.9858129024505615, "learning_rate": 0.0001, "loss": 0.0078, "step": 172290 }, { "epoch": 1133.5526315789473, "grad_norm": 0.9326895475387573, "learning_rate": 0.0001, "loss": 0.0089, "step": 172300 }, { "epoch": 1133.6184210526317, "grad_norm": 1.069669246673584, "learning_rate": 0.0001, "loss": 0.0097, "step": 172310 }, { "epoch": 1133.6842105263158, "grad_norm": 0.8025325536727905, "learning_rate": 0.0001, "loss": 0.0093, "step": 172320 }, { "epoch": 1133.75, "grad_norm": 0.5994061231613159, "learning_rate": 0.0001, "loss": 0.0082, "step": 172330 }, { "epoch": 1133.8157894736842, "grad_norm": 0.6835926175117493, "learning_rate": 0.0001, "loss": 0.0079, "step": 172340 }, { "epoch": 1133.8815789473683, "grad_norm": 0.6774199604988098, "learning_rate": 0.0001, "loss": 0.0087, "step": 172350 }, { "epoch": 1133.9473684210527, "grad_norm": 1.1998493671417236, "learning_rate": 0.0001, "loss": 0.0096, "step": 172360 }, { "epoch": 1134.0131578947369, "grad_norm": 1.2091012001037598, "learning_rate": 0.0001, "loss": 0.011, "step": 172370 }, { "epoch": 1134.078947368421, "grad_norm": 0.9642314910888672, "learning_rate": 0.0001, "loss": 0.0082, "step": 172380 }, { "epoch": 1134.1447368421052, "grad_norm": 0.9943333864212036, "learning_rate": 0.0001, "loss": 0.0092, "step": 172390 }, { "epoch": 1134.2105263157894, "grad_norm": 1.449242115020752, "learning_rate": 0.0001, "loss": 0.0101, "step": 172400 }, { "epoch": 1134.2763157894738, "grad_norm": 1.4561964273452759, "learning_rate": 0.0001, "loss": 0.0088, "step": 172410 }, { "epoch": 1134.342105263158, "grad_norm": 1.191004991531372, "learning_rate": 0.0001, "loss": 0.0114, "step": 172420 }, { "epoch": 1134.407894736842, "grad_norm": 1.171547293663025, "learning_rate": 0.0001, "loss": 0.0092, "step": 172430 }, { "epoch": 1134.4736842105262, "grad_norm": 1.08799409866333, "learning_rate": 0.0001, "loss": 0.0094, "step": 172440 }, { "epoch": 1134.5394736842106, "grad_norm": 1.141973614692688, "learning_rate": 0.0001, "loss": 0.0082, "step": 172450 }, { "epoch": 1134.6052631578948, "grad_norm": 1.2193361520767212, "learning_rate": 0.0001, "loss": 0.009, "step": 172460 }, { "epoch": 1134.671052631579, "grad_norm": 1.011452078819275, "learning_rate": 0.0001, "loss": 0.0096, "step": 172470 }, { "epoch": 1134.7368421052631, "grad_norm": 1.046140432357788, "learning_rate": 0.0001, "loss": 0.0077, "step": 172480 }, { "epoch": 1134.8026315789473, "grad_norm": 0.9603179693222046, "learning_rate": 0.0001, "loss": 0.0092, "step": 172490 }, { "epoch": 1134.8684210526317, "grad_norm": 1.3100394010543823, "learning_rate": 0.0001, "loss": 0.0093, "step": 172500 }, { "epoch": 1134.9342105263158, "grad_norm": 0.8967265486717224, "learning_rate": 0.0001, "loss": 0.01, "step": 172510 }, { "epoch": 1135.0, "grad_norm": 0.750058650970459, "learning_rate": 0.0001, "loss": 0.0114, "step": 172520 }, { "epoch": 1135.0657894736842, "grad_norm": 0.9799665808677673, "learning_rate": 0.0001, "loss": 0.0105, "step": 172530 }, { "epoch": 1135.1315789473683, "grad_norm": 0.871820330619812, "learning_rate": 0.0001, "loss": 0.0082, "step": 172540 }, { "epoch": 1135.1973684210527, "grad_norm": 1.1122573614120483, "learning_rate": 0.0001, "loss": 0.0077, "step": 172550 }, { "epoch": 1135.2631578947369, "grad_norm": 0.9300641417503357, "learning_rate": 0.0001, "loss": 0.0105, "step": 172560 }, { "epoch": 1135.328947368421, "grad_norm": 1.2777584791183472, "learning_rate": 0.0001, "loss": 0.0111, "step": 172570 }, { "epoch": 1135.3947368421052, "grad_norm": 0.886059582233429, "learning_rate": 0.0001, "loss": 0.0077, "step": 172580 }, { "epoch": 1135.4605263157894, "grad_norm": 1.118418574333191, "learning_rate": 0.0001, "loss": 0.0112, "step": 172590 }, { "epoch": 1135.5263157894738, "grad_norm": 1.219286561012268, "learning_rate": 0.0001, "loss": 0.0093, "step": 172600 }, { "epoch": 1135.592105263158, "grad_norm": 1.4709956645965576, "learning_rate": 0.0001, "loss": 0.0111, "step": 172610 }, { "epoch": 1135.657894736842, "grad_norm": 1.124354600906372, "learning_rate": 0.0001, "loss": 0.0112, "step": 172620 }, { "epoch": 1135.7236842105262, "grad_norm": 0.929551362991333, "learning_rate": 0.0001, "loss": 0.0098, "step": 172630 }, { "epoch": 1135.7894736842106, "grad_norm": 1.3097996711730957, "learning_rate": 0.0001, "loss": 0.0073, "step": 172640 }, { "epoch": 1135.8552631578948, "grad_norm": 1.0899239778518677, "learning_rate": 0.0001, "loss": 0.0084, "step": 172650 }, { "epoch": 1135.921052631579, "grad_norm": 1.0806041955947876, "learning_rate": 0.0001, "loss": 0.0068, "step": 172660 }, { "epoch": 1135.9868421052631, "grad_norm": 1.1731756925582886, "learning_rate": 0.0001, "loss": 0.0094, "step": 172670 }, { "epoch": 1136.0526315789473, "grad_norm": 0.7187557816505432, "learning_rate": 0.0001, "loss": 0.0086, "step": 172680 }, { "epoch": 1136.1184210526317, "grad_norm": 1.014794945716858, "learning_rate": 0.0001, "loss": 0.0094, "step": 172690 }, { "epoch": 1136.1842105263158, "grad_norm": 0.9141129851341248, "learning_rate": 0.0001, "loss": 0.0096, "step": 172700 }, { "epoch": 1136.25, "grad_norm": 0.8806213736534119, "learning_rate": 0.0001, "loss": 0.0099, "step": 172710 }, { "epoch": 1136.3157894736842, "grad_norm": 1.0453318357467651, "learning_rate": 0.0001, "loss": 0.0083, "step": 172720 }, { "epoch": 1136.3815789473683, "grad_norm": 1.0713512897491455, "learning_rate": 0.0001, "loss": 0.0086, "step": 172730 }, { "epoch": 1136.4473684210527, "grad_norm": 0.8867670893669128, "learning_rate": 0.0001, "loss": 0.0108, "step": 172740 }, { "epoch": 1136.5131578947369, "grad_norm": 1.3366000652313232, "learning_rate": 0.0001, "loss": 0.0103, "step": 172750 }, { "epoch": 1136.578947368421, "grad_norm": 1.2236058712005615, "learning_rate": 0.0001, "loss": 0.0082, "step": 172760 }, { "epoch": 1136.6447368421052, "grad_norm": 1.2140995264053345, "learning_rate": 0.0001, "loss": 0.0103, "step": 172770 }, { "epoch": 1136.7105263157894, "grad_norm": 1.1668179035186768, "learning_rate": 0.0001, "loss": 0.008, "step": 172780 }, { "epoch": 1136.7763157894738, "grad_norm": 0.9051017761230469, "learning_rate": 0.0001, "loss": 0.0104, "step": 172790 }, { "epoch": 1136.842105263158, "grad_norm": 1.346951961517334, "learning_rate": 0.0001, "loss": 0.009, "step": 172800 }, { "epoch": 1136.907894736842, "grad_norm": 0.9546437859535217, "learning_rate": 0.0001, "loss": 0.0085, "step": 172810 }, { "epoch": 1136.9736842105262, "grad_norm": 1.1562156677246094, "learning_rate": 0.0001, "loss": 0.0091, "step": 172820 }, { "epoch": 1137.0394736842106, "grad_norm": 1.1238209009170532, "learning_rate": 0.0001, "loss": 0.0096, "step": 172830 }, { "epoch": 1137.1052631578948, "grad_norm": 1.0962235927581787, "learning_rate": 0.0001, "loss": 0.0082, "step": 172840 }, { "epoch": 1137.171052631579, "grad_norm": 1.1757200956344604, "learning_rate": 0.0001, "loss": 0.0092, "step": 172850 }, { "epoch": 1137.2368421052631, "grad_norm": 0.6541777849197388, "learning_rate": 0.0001, "loss": 0.009, "step": 172860 }, { "epoch": 1137.3026315789473, "grad_norm": 0.7562914490699768, "learning_rate": 0.0001, "loss": 0.0103, "step": 172870 }, { "epoch": 1137.3684210526317, "grad_norm": 0.8109030723571777, "learning_rate": 0.0001, "loss": 0.008, "step": 172880 }, { "epoch": 1137.4342105263158, "grad_norm": 0.924420952796936, "learning_rate": 0.0001, "loss": 0.011, "step": 172890 }, { "epoch": 1137.5, "grad_norm": 0.7896656394004822, "learning_rate": 0.0001, "loss": 0.009, "step": 172900 }, { "epoch": 1137.5657894736842, "grad_norm": 1.00142502784729, "learning_rate": 0.0001, "loss": 0.0093, "step": 172910 }, { "epoch": 1137.6315789473683, "grad_norm": 0.8568731546401978, "learning_rate": 0.0001, "loss": 0.0097, "step": 172920 }, { "epoch": 1137.6973684210527, "grad_norm": 1.0580912828445435, "learning_rate": 0.0001, "loss": 0.0093, "step": 172930 }, { "epoch": 1137.7631578947369, "grad_norm": 0.8699727058410645, "learning_rate": 0.0001, "loss": 0.0089, "step": 172940 }, { "epoch": 1137.828947368421, "grad_norm": 0.8225347399711609, "learning_rate": 0.0001, "loss": 0.0097, "step": 172950 }, { "epoch": 1137.8947368421052, "grad_norm": 1.0345213413238525, "learning_rate": 0.0001, "loss": 0.0101, "step": 172960 }, { "epoch": 1137.9605263157894, "grad_norm": 1.1980303525924683, "learning_rate": 0.0001, "loss": 0.0085, "step": 172970 }, { "epoch": 1138.0263157894738, "grad_norm": 1.3538132905960083, "learning_rate": 0.0001, "loss": 0.0096, "step": 172980 }, { "epoch": 1138.092105263158, "grad_norm": 1.0862656831741333, "learning_rate": 0.0001, "loss": 0.0084, "step": 172990 }, { "epoch": 1138.157894736842, "grad_norm": 1.2043201923370361, "learning_rate": 0.0001, "loss": 0.0093, "step": 173000 }, { "epoch": 1138.2236842105262, "grad_norm": 1.0616967678070068, "learning_rate": 0.0001, "loss": 0.0097, "step": 173010 }, { "epoch": 1138.2894736842106, "grad_norm": 1.2403620481491089, "learning_rate": 0.0001, "loss": 0.0091, "step": 173020 }, { "epoch": 1138.3552631578948, "grad_norm": 1.2264503240585327, "learning_rate": 0.0001, "loss": 0.0092, "step": 173030 }, { "epoch": 1138.421052631579, "grad_norm": 0.9958550333976746, "learning_rate": 0.0001, "loss": 0.0098, "step": 173040 }, { "epoch": 1138.4868421052631, "grad_norm": 0.9058167934417725, "learning_rate": 0.0001, "loss": 0.0092, "step": 173050 }, { "epoch": 1138.5526315789473, "grad_norm": 0.7711786031723022, "learning_rate": 0.0001, "loss": 0.0102, "step": 173060 }, { "epoch": 1138.6184210526317, "grad_norm": 1.0303704738616943, "learning_rate": 0.0001, "loss": 0.0081, "step": 173070 }, { "epoch": 1138.6842105263158, "grad_norm": 1.2294689416885376, "learning_rate": 0.0001, "loss": 0.0114, "step": 173080 }, { "epoch": 1138.75, "grad_norm": 0.888333797454834, "learning_rate": 0.0001, "loss": 0.0087, "step": 173090 }, { "epoch": 1138.8157894736842, "grad_norm": 0.8880026936531067, "learning_rate": 0.0001, "loss": 0.0096, "step": 173100 }, { "epoch": 1138.8815789473683, "grad_norm": 0.8499730229377747, "learning_rate": 0.0001, "loss": 0.0088, "step": 173110 }, { "epoch": 1138.9473684210527, "grad_norm": 1.0166504383087158, "learning_rate": 0.0001, "loss": 0.0093, "step": 173120 }, { "epoch": 1139.0131578947369, "grad_norm": 1.199263334274292, "learning_rate": 0.0001, "loss": 0.0095, "step": 173130 }, { "epoch": 1139.078947368421, "grad_norm": 0.9333541393280029, "learning_rate": 0.0001, "loss": 0.0086, "step": 173140 }, { "epoch": 1139.1447368421052, "grad_norm": 1.0832608938217163, "learning_rate": 0.0001, "loss": 0.0085, "step": 173150 }, { "epoch": 1139.2105263157894, "grad_norm": 1.0656371116638184, "learning_rate": 0.0001, "loss": 0.0103, "step": 173160 }, { "epoch": 1139.2763157894738, "grad_norm": 0.7567489147186279, "learning_rate": 0.0001, "loss": 0.0096, "step": 173170 }, { "epoch": 1139.342105263158, "grad_norm": 1.146492600440979, "learning_rate": 0.0001, "loss": 0.0092, "step": 173180 }, { "epoch": 1139.407894736842, "grad_norm": 1.0995131731033325, "learning_rate": 0.0001, "loss": 0.0094, "step": 173190 }, { "epoch": 1139.4736842105262, "grad_norm": 1.1685936450958252, "learning_rate": 0.0001, "loss": 0.0099, "step": 173200 }, { "epoch": 1139.5394736842106, "grad_norm": 0.8442726731300354, "learning_rate": 0.0001, "loss": 0.0107, "step": 173210 }, { "epoch": 1139.6052631578948, "grad_norm": 0.9669975638389587, "learning_rate": 0.0001, "loss": 0.0089, "step": 173220 }, { "epoch": 1139.671052631579, "grad_norm": 1.19166100025177, "learning_rate": 0.0001, "loss": 0.0118, "step": 173230 }, { "epoch": 1139.7368421052631, "grad_norm": 0.6347991228103638, "learning_rate": 0.0001, "loss": 0.0101, "step": 173240 }, { "epoch": 1139.8026315789473, "grad_norm": 0.877065122127533, "learning_rate": 0.0001, "loss": 0.0097, "step": 173250 }, { "epoch": 1139.8684210526317, "grad_norm": 0.9608317017555237, "learning_rate": 0.0001, "loss": 0.0111, "step": 173260 }, { "epoch": 1139.9342105263158, "grad_norm": 1.243171215057373, "learning_rate": 0.0001, "loss": 0.0102, "step": 173270 }, { "epoch": 1140.0, "grad_norm": 0.7330761551856995, "learning_rate": 0.0001, "loss": 0.0097, "step": 173280 }, { "epoch": 1140.0657894736842, "grad_norm": 1.0452905893325806, "learning_rate": 0.0001, "loss": 0.0097, "step": 173290 }, { "epoch": 1140.1315789473683, "grad_norm": 1.095406413078308, "learning_rate": 0.0001, "loss": 0.0111, "step": 173300 }, { "epoch": 1140.1973684210527, "grad_norm": 0.8945803642272949, "learning_rate": 0.0001, "loss": 0.0114, "step": 173310 }, { "epoch": 1140.2631578947369, "grad_norm": 1.0646826028823853, "learning_rate": 0.0001, "loss": 0.0092, "step": 173320 }, { "epoch": 1140.328947368421, "grad_norm": 0.9003171324729919, "learning_rate": 0.0001, "loss": 0.0107, "step": 173330 }, { "epoch": 1140.3947368421052, "grad_norm": 1.0605850219726562, "learning_rate": 0.0001, "loss": 0.0111, "step": 173340 }, { "epoch": 1140.4605263157894, "grad_norm": 1.0738884210586548, "learning_rate": 0.0001, "loss": 0.0102, "step": 173350 }, { "epoch": 1140.5263157894738, "grad_norm": 1.067591667175293, "learning_rate": 0.0001, "loss": 0.0108, "step": 173360 }, { "epoch": 1140.592105263158, "grad_norm": 1.4844350814819336, "learning_rate": 0.0001, "loss": 0.0102, "step": 173370 }, { "epoch": 1140.657894736842, "grad_norm": 1.0386178493499756, "learning_rate": 0.0001, "loss": 0.0098, "step": 173380 }, { "epoch": 1140.7236842105262, "grad_norm": 1.306445837020874, "learning_rate": 0.0001, "loss": 0.0111, "step": 173390 }, { "epoch": 1140.7894736842106, "grad_norm": 1.4519286155700684, "learning_rate": 0.0001, "loss": 0.0098, "step": 173400 }, { "epoch": 1140.8552631578948, "grad_norm": 1.0992698669433594, "learning_rate": 0.0001, "loss": 0.0105, "step": 173410 }, { "epoch": 1140.921052631579, "grad_norm": 1.0953896045684814, "learning_rate": 0.0001, "loss": 0.0091, "step": 173420 }, { "epoch": 1140.9868421052631, "grad_norm": 1.086705207824707, "learning_rate": 0.0001, "loss": 0.0089, "step": 173430 }, { "epoch": 1141.0526315789473, "grad_norm": 0.9892106056213379, "learning_rate": 0.0001, "loss": 0.0084, "step": 173440 }, { "epoch": 1141.1184210526317, "grad_norm": 0.8099537491798401, "learning_rate": 0.0001, "loss": 0.0093, "step": 173450 }, { "epoch": 1141.1842105263158, "grad_norm": 1.2578682899475098, "learning_rate": 0.0001, "loss": 0.0094, "step": 173460 }, { "epoch": 1141.25, "grad_norm": 1.0416673421859741, "learning_rate": 0.0001, "loss": 0.0088, "step": 173470 }, { "epoch": 1141.3157894736842, "grad_norm": 0.9099404215812683, "learning_rate": 0.0001, "loss": 0.0078, "step": 173480 }, { "epoch": 1141.3815789473683, "grad_norm": 1.2768899202346802, "learning_rate": 0.0001, "loss": 0.0108, "step": 173490 }, { "epoch": 1141.4473684210527, "grad_norm": 1.1993544101715088, "learning_rate": 0.0001, "loss": 0.0102, "step": 173500 }, { "epoch": 1141.5131578947369, "grad_norm": 1.094346046447754, "learning_rate": 0.0001, "loss": 0.0113, "step": 173510 }, { "epoch": 1141.578947368421, "grad_norm": 0.8274684548377991, "learning_rate": 0.0001, "loss": 0.0099, "step": 173520 }, { "epoch": 1141.6447368421052, "grad_norm": 1.1275930404663086, "learning_rate": 0.0001, "loss": 0.0097, "step": 173530 }, { "epoch": 1141.7105263157894, "grad_norm": 1.065172553062439, "learning_rate": 0.0001, "loss": 0.0105, "step": 173540 }, { "epoch": 1141.7763157894738, "grad_norm": 0.9865522980690002, "learning_rate": 0.0001, "loss": 0.0101, "step": 173550 }, { "epoch": 1141.842105263158, "grad_norm": 0.7311487793922424, "learning_rate": 0.0001, "loss": 0.0108, "step": 173560 }, { "epoch": 1141.907894736842, "grad_norm": 0.9053587317466736, "learning_rate": 0.0001, "loss": 0.01, "step": 173570 }, { "epoch": 1141.9736842105262, "grad_norm": 1.1042989492416382, "learning_rate": 0.0001, "loss": 0.0106, "step": 173580 }, { "epoch": 1142.0394736842106, "grad_norm": 1.2687427997589111, "learning_rate": 0.0001, "loss": 0.012, "step": 173590 }, { "epoch": 1142.1052631578948, "grad_norm": 0.8820356726646423, "learning_rate": 0.0001, "loss": 0.011, "step": 173600 }, { "epoch": 1142.171052631579, "grad_norm": 1.3813494443893433, "learning_rate": 0.0001, "loss": 0.0094, "step": 173610 }, { "epoch": 1142.2368421052631, "grad_norm": 0.895897626876831, "learning_rate": 0.0001, "loss": 0.0094, "step": 173620 }, { "epoch": 1142.3026315789473, "grad_norm": 1.020932674407959, "learning_rate": 0.0001, "loss": 0.0106, "step": 173630 }, { "epoch": 1142.3684210526317, "grad_norm": 1.1267672777175903, "learning_rate": 0.0001, "loss": 0.0099, "step": 173640 }, { "epoch": 1142.4342105263158, "grad_norm": 0.8028258085250854, "learning_rate": 0.0001, "loss": 0.0116, "step": 173650 }, { "epoch": 1142.5, "grad_norm": 1.3700445890426636, "learning_rate": 0.0001, "loss": 0.009, "step": 173660 }, { "epoch": 1142.5657894736842, "grad_norm": 0.9799003005027771, "learning_rate": 0.0001, "loss": 0.0104, "step": 173670 }, { "epoch": 1142.6315789473683, "grad_norm": 0.9328917264938354, "learning_rate": 0.0001, "loss": 0.0105, "step": 173680 }, { "epoch": 1142.6973684210527, "grad_norm": 0.8575257658958435, "learning_rate": 0.0001, "loss": 0.0105, "step": 173690 }, { "epoch": 1142.7631578947369, "grad_norm": 0.9841787219047546, "learning_rate": 0.0001, "loss": 0.0085, "step": 173700 }, { "epoch": 1142.828947368421, "grad_norm": 1.0814399719238281, "learning_rate": 0.0001, "loss": 0.014, "step": 173710 }, { "epoch": 1142.8947368421052, "grad_norm": 1.0300018787384033, "learning_rate": 0.0001, "loss": 0.0096, "step": 173720 }, { "epoch": 1142.9605263157894, "grad_norm": 0.9641240239143372, "learning_rate": 0.0001, "loss": 0.0092, "step": 173730 }, { "epoch": 1143.0263157894738, "grad_norm": 1.5655041933059692, "learning_rate": 0.0001, "loss": 0.0107, "step": 173740 }, { "epoch": 1143.092105263158, "grad_norm": 1.1116403341293335, "learning_rate": 0.0001, "loss": 0.0077, "step": 173750 }, { "epoch": 1143.157894736842, "grad_norm": 0.8238686919212341, "learning_rate": 0.0001, "loss": 0.0091, "step": 173760 }, { "epoch": 1143.2236842105262, "grad_norm": 1.036594033241272, "learning_rate": 0.0001, "loss": 0.008, "step": 173770 }, { "epoch": 1143.2894736842106, "grad_norm": 1.2605559825897217, "learning_rate": 0.0001, "loss": 0.011, "step": 173780 }, { "epoch": 1143.3552631578948, "grad_norm": 0.9970645904541016, "learning_rate": 0.0001, "loss": 0.0093, "step": 173790 }, { "epoch": 1143.421052631579, "grad_norm": 0.645720362663269, "learning_rate": 0.0001, "loss": 0.009, "step": 173800 }, { "epoch": 1143.4868421052631, "grad_norm": 1.0435887575149536, "learning_rate": 0.0001, "loss": 0.0097, "step": 173810 }, { "epoch": 1143.5526315789473, "grad_norm": 1.0935068130493164, "learning_rate": 0.0001, "loss": 0.0113, "step": 173820 }, { "epoch": 1143.6184210526317, "grad_norm": 0.9884122610092163, "learning_rate": 0.0001, "loss": 0.0092, "step": 173830 }, { "epoch": 1143.6842105263158, "grad_norm": 1.100342035293579, "learning_rate": 0.0001, "loss": 0.01, "step": 173840 }, { "epoch": 1143.75, "grad_norm": 0.95659339427948, "learning_rate": 0.0001, "loss": 0.0089, "step": 173850 }, { "epoch": 1143.8157894736842, "grad_norm": 0.9715131521224976, "learning_rate": 0.0001, "loss": 0.0101, "step": 173860 }, { "epoch": 1143.8815789473683, "grad_norm": 0.7474709153175354, "learning_rate": 0.0001, "loss": 0.0099, "step": 173870 }, { "epoch": 1143.9473684210527, "grad_norm": 0.9470142126083374, "learning_rate": 0.0001, "loss": 0.008, "step": 173880 }, { "epoch": 1144.0131578947369, "grad_norm": 0.9878813624382019, "learning_rate": 0.0001, "loss": 0.0106, "step": 173890 }, { "epoch": 1144.078947368421, "grad_norm": 0.6697216629981995, "learning_rate": 0.0001, "loss": 0.0101, "step": 173900 }, { "epoch": 1144.1447368421052, "grad_norm": 1.1067986488342285, "learning_rate": 0.0001, "loss": 0.0104, "step": 173910 }, { "epoch": 1144.2105263157894, "grad_norm": 1.1652238368988037, "learning_rate": 0.0001, "loss": 0.0095, "step": 173920 }, { "epoch": 1144.2763157894738, "grad_norm": 1.5187675952911377, "learning_rate": 0.0001, "loss": 0.008, "step": 173930 }, { "epoch": 1144.342105263158, "grad_norm": 0.8939927220344543, "learning_rate": 0.0001, "loss": 0.0086, "step": 173940 }, { "epoch": 1144.407894736842, "grad_norm": 1.0155541896820068, "learning_rate": 0.0001, "loss": 0.0084, "step": 173950 }, { "epoch": 1144.4736842105262, "grad_norm": 1.0913894176483154, "learning_rate": 0.0001, "loss": 0.0102, "step": 173960 }, { "epoch": 1144.5394736842106, "grad_norm": 0.9633883237838745, "learning_rate": 0.0001, "loss": 0.0087, "step": 173970 }, { "epoch": 1144.6052631578948, "grad_norm": 0.845365047454834, "learning_rate": 0.0001, "loss": 0.009, "step": 173980 }, { "epoch": 1144.671052631579, "grad_norm": 0.9477745890617371, "learning_rate": 0.0001, "loss": 0.008, "step": 173990 }, { "epoch": 1144.7368421052631, "grad_norm": 1.1216838359832764, "learning_rate": 0.0001, "loss": 0.0099, "step": 174000 }, { "epoch": 1144.8026315789473, "grad_norm": 1.5633032321929932, "learning_rate": 0.0001, "loss": 0.0085, "step": 174010 }, { "epoch": 1144.8684210526317, "grad_norm": 1.2574188709259033, "learning_rate": 0.0001, "loss": 0.0087, "step": 174020 }, { "epoch": 1144.9342105263158, "grad_norm": 1.3190844058990479, "learning_rate": 0.0001, "loss": 0.0107, "step": 174030 }, { "epoch": 1145.0, "grad_norm": 1.2496864795684814, "learning_rate": 0.0001, "loss": 0.0099, "step": 174040 }, { "epoch": 1145.0657894736842, "grad_norm": 1.188367486000061, "learning_rate": 0.0001, "loss": 0.0105, "step": 174050 }, { "epoch": 1145.1315789473683, "grad_norm": 1.2431753873825073, "learning_rate": 0.0001, "loss": 0.0081, "step": 174060 }, { "epoch": 1145.1973684210527, "grad_norm": 1.5690101385116577, "learning_rate": 0.0001, "loss": 0.0116, "step": 174070 }, { "epoch": 1145.2631578947369, "grad_norm": 1.4982401132583618, "learning_rate": 0.0001, "loss": 0.0093, "step": 174080 }, { "epoch": 1145.328947368421, "grad_norm": 1.3239631652832031, "learning_rate": 0.0001, "loss": 0.0075, "step": 174090 }, { "epoch": 1145.3947368421052, "grad_norm": 1.079105257987976, "learning_rate": 0.0001, "loss": 0.0077, "step": 174100 }, { "epoch": 1145.4605263157894, "grad_norm": 1.0493974685668945, "learning_rate": 0.0001, "loss": 0.0087, "step": 174110 }, { "epoch": 1145.5263157894738, "grad_norm": 1.0567365884780884, "learning_rate": 0.0001, "loss": 0.0085, "step": 174120 }, { "epoch": 1145.592105263158, "grad_norm": 1.0888392925262451, "learning_rate": 0.0001, "loss": 0.0091, "step": 174130 }, { "epoch": 1145.657894736842, "grad_norm": 1.156753420829773, "learning_rate": 0.0001, "loss": 0.0093, "step": 174140 }, { "epoch": 1145.7236842105262, "grad_norm": 0.7009889483451843, "learning_rate": 0.0001, "loss": 0.0093, "step": 174150 }, { "epoch": 1145.7894736842106, "grad_norm": 1.242331862449646, "learning_rate": 0.0001, "loss": 0.0091, "step": 174160 }, { "epoch": 1145.8552631578948, "grad_norm": 1.1424720287322998, "learning_rate": 0.0001, "loss": 0.01, "step": 174170 }, { "epoch": 1145.921052631579, "grad_norm": 1.1428437232971191, "learning_rate": 0.0001, "loss": 0.0088, "step": 174180 }, { "epoch": 1145.9868421052631, "grad_norm": 1.0217548608779907, "learning_rate": 0.0001, "loss": 0.0087, "step": 174190 }, { "epoch": 1146.0526315789473, "grad_norm": 0.9081407189369202, "learning_rate": 0.0001, "loss": 0.0091, "step": 174200 }, { "epoch": 1146.1184210526317, "grad_norm": 1.0314104557037354, "learning_rate": 0.0001, "loss": 0.0076, "step": 174210 }, { "epoch": 1146.1842105263158, "grad_norm": 0.9840436577796936, "learning_rate": 0.0001, "loss": 0.0087, "step": 174220 }, { "epoch": 1146.25, "grad_norm": 1.1434142589569092, "learning_rate": 0.0001, "loss": 0.009, "step": 174230 }, { "epoch": 1146.3157894736842, "grad_norm": 0.9102608561515808, "learning_rate": 0.0001, "loss": 0.0087, "step": 174240 }, { "epoch": 1146.3815789473683, "grad_norm": 1.0038232803344727, "learning_rate": 0.0001, "loss": 0.0091, "step": 174250 }, { "epoch": 1146.4473684210527, "grad_norm": 0.7302023768424988, "learning_rate": 0.0001, "loss": 0.0098, "step": 174260 }, { "epoch": 1146.5131578947369, "grad_norm": 1.0826613903045654, "learning_rate": 0.0001, "loss": 0.009, "step": 174270 }, { "epoch": 1146.578947368421, "grad_norm": 1.095320224761963, "learning_rate": 0.0001, "loss": 0.009, "step": 174280 }, { "epoch": 1146.6447368421052, "grad_norm": 1.0644302368164062, "learning_rate": 0.0001, "loss": 0.0095, "step": 174290 }, { "epoch": 1146.7105263157894, "grad_norm": 1.1615333557128906, "learning_rate": 0.0001, "loss": 0.0085, "step": 174300 }, { "epoch": 1146.7763157894738, "grad_norm": 1.3274446725845337, "learning_rate": 0.0001, "loss": 0.0089, "step": 174310 }, { "epoch": 1146.842105263158, "grad_norm": 1.1300023794174194, "learning_rate": 0.0001, "loss": 0.0087, "step": 174320 }, { "epoch": 1146.907894736842, "grad_norm": 1.0761096477508545, "learning_rate": 0.0001, "loss": 0.0095, "step": 174330 }, { "epoch": 1146.9736842105262, "grad_norm": 0.9165819883346558, "learning_rate": 0.0001, "loss": 0.0111, "step": 174340 }, { "epoch": 1147.0394736842106, "grad_norm": 0.8142982721328735, "learning_rate": 0.0001, "loss": 0.0097, "step": 174350 }, { "epoch": 1147.1052631578948, "grad_norm": 0.7436504364013672, "learning_rate": 0.0001, "loss": 0.0086, "step": 174360 }, { "epoch": 1147.171052631579, "grad_norm": 0.9768326878547668, "learning_rate": 0.0001, "loss": 0.009, "step": 174370 }, { "epoch": 1147.2368421052631, "grad_norm": 1.2096675634384155, "learning_rate": 0.0001, "loss": 0.0092, "step": 174380 }, { "epoch": 1147.3026315789473, "grad_norm": 1.1367160081863403, "learning_rate": 0.0001, "loss": 0.0093, "step": 174390 }, { "epoch": 1147.3684210526317, "grad_norm": 0.680374801158905, "learning_rate": 0.0001, "loss": 0.0086, "step": 174400 }, { "epoch": 1147.4342105263158, "grad_norm": 0.9637120962142944, "learning_rate": 0.0001, "loss": 0.0082, "step": 174410 }, { "epoch": 1147.5, "grad_norm": 0.9413122534751892, "learning_rate": 0.0001, "loss": 0.0099, "step": 174420 }, { "epoch": 1147.5657894736842, "grad_norm": 1.165696620941162, "learning_rate": 0.0001, "loss": 0.0096, "step": 174430 }, { "epoch": 1147.6315789473683, "grad_norm": 1.0719096660614014, "learning_rate": 0.0001, "loss": 0.009, "step": 174440 }, { "epoch": 1147.6973684210527, "grad_norm": 0.9276707768440247, "learning_rate": 0.0001, "loss": 0.009, "step": 174450 }, { "epoch": 1147.7631578947369, "grad_norm": 0.7522660493850708, "learning_rate": 0.0001, "loss": 0.008, "step": 174460 }, { "epoch": 1147.828947368421, "grad_norm": 0.9146892428398132, "learning_rate": 0.0001, "loss": 0.0086, "step": 174470 }, { "epoch": 1147.8947368421052, "grad_norm": 1.5214803218841553, "learning_rate": 0.0001, "loss": 0.0105, "step": 174480 }, { "epoch": 1147.9605263157894, "grad_norm": 1.152605652809143, "learning_rate": 0.0001, "loss": 0.0106, "step": 174490 }, { "epoch": 1148.0263157894738, "grad_norm": 1.000752568244934, "learning_rate": 0.0001, "loss": 0.0098, "step": 174500 }, { "epoch": 1148.092105263158, "grad_norm": 1.0302700996398926, "learning_rate": 0.0001, "loss": 0.0095, "step": 174510 }, { "epoch": 1148.157894736842, "grad_norm": 1.0237771272659302, "learning_rate": 0.0001, "loss": 0.0108, "step": 174520 }, { "epoch": 1148.2236842105262, "grad_norm": 1.1548134088516235, "learning_rate": 0.0001, "loss": 0.009, "step": 174530 }, { "epoch": 1148.2894736842106, "grad_norm": 1.4400047063827515, "learning_rate": 0.0001, "loss": 0.0087, "step": 174540 }, { "epoch": 1148.3552631578948, "grad_norm": 0.9746676087379456, "learning_rate": 0.0001, "loss": 0.0103, "step": 174550 }, { "epoch": 1148.421052631579, "grad_norm": 0.8200980424880981, "learning_rate": 0.0001, "loss": 0.0104, "step": 174560 }, { "epoch": 1148.4868421052631, "grad_norm": 1.2456996440887451, "learning_rate": 0.0001, "loss": 0.0079, "step": 174570 }, { "epoch": 1148.5526315789473, "grad_norm": 1.0017765760421753, "learning_rate": 0.0001, "loss": 0.0082, "step": 174580 }, { "epoch": 1148.6184210526317, "grad_norm": 0.7824232578277588, "learning_rate": 0.0001, "loss": 0.0087, "step": 174590 }, { "epoch": 1148.6842105263158, "grad_norm": 1.0675746202468872, "learning_rate": 0.0001, "loss": 0.0097, "step": 174600 }, { "epoch": 1148.75, "grad_norm": 0.8915340304374695, "learning_rate": 0.0001, "loss": 0.0098, "step": 174610 }, { "epoch": 1148.8157894736842, "grad_norm": 1.1041761636734009, "learning_rate": 0.0001, "loss": 0.0081, "step": 174620 }, { "epoch": 1148.8815789473683, "grad_norm": 0.9389930367469788, "learning_rate": 0.0001, "loss": 0.0094, "step": 174630 }, { "epoch": 1148.9473684210527, "grad_norm": 0.7159057259559631, "learning_rate": 0.0001, "loss": 0.0103, "step": 174640 }, { "epoch": 1149.0131578947369, "grad_norm": 0.8780013918876648, "learning_rate": 0.0001, "loss": 0.0087, "step": 174650 }, { "epoch": 1149.078947368421, "grad_norm": 0.7802185416221619, "learning_rate": 0.0001, "loss": 0.0103, "step": 174660 }, { "epoch": 1149.1447368421052, "grad_norm": 1.1969313621520996, "learning_rate": 0.0001, "loss": 0.0088, "step": 174670 }, { "epoch": 1149.2105263157894, "grad_norm": 1.0634304285049438, "learning_rate": 0.0001, "loss": 0.0079, "step": 174680 }, { "epoch": 1149.2763157894738, "grad_norm": 0.9525381326675415, "learning_rate": 0.0001, "loss": 0.0085, "step": 174690 }, { "epoch": 1149.342105263158, "grad_norm": 0.895878791809082, "learning_rate": 0.0001, "loss": 0.0102, "step": 174700 }, { "epoch": 1149.407894736842, "grad_norm": 0.641148030757904, "learning_rate": 0.0001, "loss": 0.0089, "step": 174710 }, { "epoch": 1149.4736842105262, "grad_norm": 1.0341429710388184, "learning_rate": 0.0001, "loss": 0.0089, "step": 174720 }, { "epoch": 1149.5394736842106, "grad_norm": 0.9922910928726196, "learning_rate": 0.0001, "loss": 0.0082, "step": 174730 }, { "epoch": 1149.6052631578948, "grad_norm": 1.1714662313461304, "learning_rate": 0.0001, "loss": 0.0106, "step": 174740 }, { "epoch": 1149.671052631579, "grad_norm": 2.198077440261841, "learning_rate": 0.0001, "loss": 0.0102, "step": 174750 }, { "epoch": 1149.7368421052631, "grad_norm": 1.9137755632400513, "learning_rate": 0.0001, "loss": 0.0115, "step": 174760 }, { "epoch": 1149.8026315789473, "grad_norm": 1.6982780694961548, "learning_rate": 0.0001, "loss": 0.0102, "step": 174770 }, { "epoch": 1149.8684210526317, "grad_norm": 1.3892414569854736, "learning_rate": 0.0001, "loss": 0.0095, "step": 174780 }, { "epoch": 1149.9342105263158, "grad_norm": 1.2968597412109375, "learning_rate": 0.0001, "loss": 0.0094, "step": 174790 }, { "epoch": 1150.0, "grad_norm": 1.1849431991577148, "learning_rate": 0.0001, "loss": 0.0114, "step": 174800 }, { "epoch": 1150.0657894736842, "grad_norm": 1.133933663368225, "learning_rate": 0.0001, "loss": 0.0093, "step": 174810 }, { "epoch": 1150.1315789473683, "grad_norm": 1.385817050933838, "learning_rate": 0.0001, "loss": 0.0093, "step": 174820 }, { "epoch": 1150.1973684210527, "grad_norm": 1.2556873559951782, "learning_rate": 0.0001, "loss": 0.0079, "step": 174830 }, { "epoch": 1150.2631578947369, "grad_norm": 1.142346978187561, "learning_rate": 0.0001, "loss": 0.0105, "step": 174840 }, { "epoch": 1150.328947368421, "grad_norm": 1.2064703702926636, "learning_rate": 0.0001, "loss": 0.0092, "step": 174850 }, { "epoch": 1150.3947368421052, "grad_norm": 1.0505454540252686, "learning_rate": 0.0001, "loss": 0.0094, "step": 174860 }, { "epoch": 1150.4605263157894, "grad_norm": 1.084917664527893, "learning_rate": 0.0001, "loss": 0.0097, "step": 174870 }, { "epoch": 1150.5263157894738, "grad_norm": 1.289945125579834, "learning_rate": 0.0001, "loss": 0.0094, "step": 174880 }, { "epoch": 1150.592105263158, "grad_norm": 0.8230373859405518, "learning_rate": 0.0001, "loss": 0.0095, "step": 174890 }, { "epoch": 1150.657894736842, "grad_norm": 1.1008986234664917, "learning_rate": 0.0001, "loss": 0.0088, "step": 174900 }, { "epoch": 1150.7236842105262, "grad_norm": 1.0355833768844604, "learning_rate": 0.0001, "loss": 0.01, "step": 174910 }, { "epoch": 1150.7894736842106, "grad_norm": 1.206417441368103, "learning_rate": 0.0001, "loss": 0.0095, "step": 174920 }, { "epoch": 1150.8552631578948, "grad_norm": 1.393118977546692, "learning_rate": 0.0001, "loss": 0.0097, "step": 174930 }, { "epoch": 1150.921052631579, "grad_norm": 1.0129419565200806, "learning_rate": 0.0001, "loss": 0.0084, "step": 174940 }, { "epoch": 1150.9868421052631, "grad_norm": 0.9136284589767456, "learning_rate": 0.0001, "loss": 0.0096, "step": 174950 }, { "epoch": 1151.0526315789473, "grad_norm": 0.9406630992889404, "learning_rate": 0.0001, "loss": 0.0093, "step": 174960 }, { "epoch": 1151.1184210526317, "grad_norm": 1.0768885612487793, "learning_rate": 0.0001, "loss": 0.0099, "step": 174970 }, { "epoch": 1151.1842105263158, "grad_norm": 0.9179930686950684, "learning_rate": 0.0001, "loss": 0.0109, "step": 174980 }, { "epoch": 1151.25, "grad_norm": 0.6786910891532898, "learning_rate": 0.0001, "loss": 0.0082, "step": 174990 }, { "epoch": 1151.3157894736842, "grad_norm": 1.1306889057159424, "learning_rate": 0.0001, "loss": 0.0089, "step": 175000 }, { "epoch": 1151.3815789473683, "grad_norm": 1.0117748975753784, "learning_rate": 0.0001, "loss": 0.0087, "step": 175010 }, { "epoch": 1151.4473684210527, "grad_norm": 0.661830484867096, "learning_rate": 0.0001, "loss": 0.0091, "step": 175020 }, { "epoch": 1151.5131578947369, "grad_norm": 1.0096147060394287, "learning_rate": 0.0001, "loss": 0.0102, "step": 175030 }, { "epoch": 1151.578947368421, "grad_norm": 1.1573773622512817, "learning_rate": 0.0001, "loss": 0.0088, "step": 175040 }, { "epoch": 1151.6447368421052, "grad_norm": 1.0948268175125122, "learning_rate": 0.0001, "loss": 0.0085, "step": 175050 }, { "epoch": 1151.7105263157894, "grad_norm": 0.9008634090423584, "learning_rate": 0.0001, "loss": 0.0093, "step": 175060 }, { "epoch": 1151.7763157894738, "grad_norm": 0.8874291181564331, "learning_rate": 0.0001, "loss": 0.01, "step": 175070 }, { "epoch": 1151.842105263158, "grad_norm": 1.33906090259552, "learning_rate": 0.0001, "loss": 0.0096, "step": 175080 }, { "epoch": 1151.907894736842, "grad_norm": 1.3272651433944702, "learning_rate": 0.0001, "loss": 0.0087, "step": 175090 }, { "epoch": 1151.9736842105262, "grad_norm": 1.1627230644226074, "learning_rate": 0.0001, "loss": 0.0086, "step": 175100 }, { "epoch": 1152.0394736842106, "grad_norm": 0.9578849077224731, "learning_rate": 0.0001, "loss": 0.0106, "step": 175110 }, { "epoch": 1152.1052631578948, "grad_norm": 1.016489863395691, "learning_rate": 0.0001, "loss": 0.0089, "step": 175120 }, { "epoch": 1152.171052631579, "grad_norm": 0.9531344771385193, "learning_rate": 0.0001, "loss": 0.0092, "step": 175130 }, { "epoch": 1152.2368421052631, "grad_norm": 0.9982447028160095, "learning_rate": 0.0001, "loss": 0.0096, "step": 175140 }, { "epoch": 1152.3026315789473, "grad_norm": 1.3460252285003662, "learning_rate": 0.0001, "loss": 0.0119, "step": 175150 }, { "epoch": 1152.3684210526317, "grad_norm": 1.0662846565246582, "learning_rate": 0.0001, "loss": 0.0088, "step": 175160 }, { "epoch": 1152.4342105263158, "grad_norm": 0.7010901570320129, "learning_rate": 0.0001, "loss": 0.0098, "step": 175170 }, { "epoch": 1152.5, "grad_norm": 0.8454627990722656, "learning_rate": 0.0001, "loss": 0.0082, "step": 175180 }, { "epoch": 1152.5657894736842, "grad_norm": 0.858896017074585, "learning_rate": 0.0001, "loss": 0.0089, "step": 175190 }, { "epoch": 1152.6315789473683, "grad_norm": 1.0066238641738892, "learning_rate": 0.0001, "loss": 0.0096, "step": 175200 }, { "epoch": 1152.6973684210527, "grad_norm": 1.1515685319900513, "learning_rate": 0.0001, "loss": 0.008, "step": 175210 }, { "epoch": 1152.7631578947369, "grad_norm": 0.8878205418586731, "learning_rate": 0.0001, "loss": 0.0082, "step": 175220 }, { "epoch": 1152.828947368421, "grad_norm": 1.0133531093597412, "learning_rate": 0.0001, "loss": 0.0092, "step": 175230 }, { "epoch": 1152.8947368421052, "grad_norm": 1.2004742622375488, "learning_rate": 0.0001, "loss": 0.008, "step": 175240 }, { "epoch": 1152.9605263157894, "grad_norm": 0.8571202754974365, "learning_rate": 0.0001, "loss": 0.011, "step": 175250 }, { "epoch": 1153.0263157894738, "grad_norm": 1.2629226446151733, "learning_rate": 0.0001, "loss": 0.0086, "step": 175260 }, { "epoch": 1153.092105263158, "grad_norm": 1.3697537183761597, "learning_rate": 0.0001, "loss": 0.0082, "step": 175270 }, { "epoch": 1153.157894736842, "grad_norm": 1.2100975513458252, "learning_rate": 0.0001, "loss": 0.0099, "step": 175280 }, { "epoch": 1153.2236842105262, "grad_norm": 1.1854854822158813, "learning_rate": 0.0001, "loss": 0.0082, "step": 175290 }, { "epoch": 1153.2894736842106, "grad_norm": 0.7720757126808167, "learning_rate": 0.0001, "loss": 0.0081, "step": 175300 }, { "epoch": 1153.3552631578948, "grad_norm": 1.1668815612792969, "learning_rate": 0.0001, "loss": 0.0098, "step": 175310 }, { "epoch": 1153.421052631579, "grad_norm": 1.0737518072128296, "learning_rate": 0.0001, "loss": 0.0099, "step": 175320 }, { "epoch": 1153.4868421052631, "grad_norm": 1.1186960935592651, "learning_rate": 0.0001, "loss": 0.0112, "step": 175330 }, { "epoch": 1153.5526315789473, "grad_norm": 0.9653819799423218, "learning_rate": 0.0001, "loss": 0.0081, "step": 175340 }, { "epoch": 1153.6184210526317, "grad_norm": 0.9481167793273926, "learning_rate": 0.0001, "loss": 0.0101, "step": 175350 }, { "epoch": 1153.6842105263158, "grad_norm": 1.0212024450302124, "learning_rate": 0.0001, "loss": 0.0107, "step": 175360 }, { "epoch": 1153.75, "grad_norm": 1.3355201482772827, "learning_rate": 0.0001, "loss": 0.0093, "step": 175370 }, { "epoch": 1153.8157894736842, "grad_norm": 0.727803111076355, "learning_rate": 0.0001, "loss": 0.0094, "step": 175380 }, { "epoch": 1153.8815789473683, "grad_norm": 0.894546627998352, "learning_rate": 0.0001, "loss": 0.0101, "step": 175390 }, { "epoch": 1153.9473684210527, "grad_norm": 1.0446175336837769, "learning_rate": 0.0001, "loss": 0.0086, "step": 175400 }, { "epoch": 1154.0131578947369, "grad_norm": 1.3837840557098389, "learning_rate": 0.0001, "loss": 0.0073, "step": 175410 }, { "epoch": 1154.078947368421, "grad_norm": 1.131395936012268, "learning_rate": 0.0001, "loss": 0.0092, "step": 175420 }, { "epoch": 1154.1447368421052, "grad_norm": 1.1270420551300049, "learning_rate": 0.0001, "loss": 0.0102, "step": 175430 }, { "epoch": 1154.2105263157894, "grad_norm": 0.8780666589736938, "learning_rate": 0.0001, "loss": 0.0079, "step": 175440 }, { "epoch": 1154.2763157894738, "grad_norm": 1.054305911064148, "learning_rate": 0.0001, "loss": 0.009, "step": 175450 }, { "epoch": 1154.342105263158, "grad_norm": 0.7585221529006958, "learning_rate": 0.0001, "loss": 0.0077, "step": 175460 }, { "epoch": 1154.407894736842, "grad_norm": 0.8007470965385437, "learning_rate": 0.0001, "loss": 0.0092, "step": 175470 }, { "epoch": 1154.4736842105262, "grad_norm": 1.0967533588409424, "learning_rate": 0.0001, "loss": 0.0087, "step": 175480 }, { "epoch": 1154.5394736842106, "grad_norm": 1.0877718925476074, "learning_rate": 0.0001, "loss": 0.0077, "step": 175490 }, { "epoch": 1154.6052631578948, "grad_norm": 0.965691864490509, "learning_rate": 0.0001, "loss": 0.0088, "step": 175500 }, { "epoch": 1154.671052631579, "grad_norm": 0.9504064917564392, "learning_rate": 0.0001, "loss": 0.01, "step": 175510 }, { "epoch": 1154.7368421052631, "grad_norm": 1.0463320016860962, "learning_rate": 0.0001, "loss": 0.0104, "step": 175520 }, { "epoch": 1154.8026315789473, "grad_norm": 1.1356477737426758, "learning_rate": 0.0001, "loss": 0.0102, "step": 175530 }, { "epoch": 1154.8684210526317, "grad_norm": 0.937968909740448, "learning_rate": 0.0001, "loss": 0.0097, "step": 175540 }, { "epoch": 1154.9342105263158, "grad_norm": 1.2012879848480225, "learning_rate": 0.0001, "loss": 0.0098, "step": 175550 }, { "epoch": 1155.0, "grad_norm": 1.171942949295044, "learning_rate": 0.0001, "loss": 0.0084, "step": 175560 }, { "epoch": 1155.0657894736842, "grad_norm": 0.92462557554245, "learning_rate": 0.0001, "loss": 0.01, "step": 175570 }, { "epoch": 1155.1315789473683, "grad_norm": 1.2336483001708984, "learning_rate": 0.0001, "loss": 0.0091, "step": 175580 }, { "epoch": 1155.1973684210527, "grad_norm": 1.019647240638733, "learning_rate": 0.0001, "loss": 0.0074, "step": 175590 }, { "epoch": 1155.2631578947369, "grad_norm": 1.0296791791915894, "learning_rate": 0.0001, "loss": 0.0085, "step": 175600 }, { "epoch": 1155.328947368421, "grad_norm": 1.081726312637329, "learning_rate": 0.0001, "loss": 0.0094, "step": 175610 }, { "epoch": 1155.3947368421052, "grad_norm": 0.6053563356399536, "learning_rate": 0.0001, "loss": 0.0076, "step": 175620 }, { "epoch": 1155.4605263157894, "grad_norm": 0.908021867275238, "learning_rate": 0.0001, "loss": 0.0086, "step": 175630 }, { "epoch": 1155.5263157894738, "grad_norm": 0.8451210856437683, "learning_rate": 0.0001, "loss": 0.0083, "step": 175640 }, { "epoch": 1155.592105263158, "grad_norm": 1.068265676498413, "learning_rate": 0.0001, "loss": 0.0113, "step": 175650 }, { "epoch": 1155.657894736842, "grad_norm": 1.213500738143921, "learning_rate": 0.0001, "loss": 0.012, "step": 175660 }, { "epoch": 1155.7236842105262, "grad_norm": 0.8982007503509521, "learning_rate": 0.0001, "loss": 0.0107, "step": 175670 }, { "epoch": 1155.7894736842106, "grad_norm": 1.0422526597976685, "learning_rate": 0.0001, "loss": 0.0106, "step": 175680 }, { "epoch": 1155.8552631578948, "grad_norm": 0.9385272264480591, "learning_rate": 0.0001, "loss": 0.0109, "step": 175690 }, { "epoch": 1155.921052631579, "grad_norm": 0.9605172872543335, "learning_rate": 0.0001, "loss": 0.0121, "step": 175700 }, { "epoch": 1155.9868421052631, "grad_norm": 1.1489660739898682, "learning_rate": 0.0001, "loss": 0.0089, "step": 175710 }, { "epoch": 1156.0526315789473, "grad_norm": 1.1852326393127441, "learning_rate": 0.0001, "loss": 0.0096, "step": 175720 }, { "epoch": 1156.1184210526317, "grad_norm": 1.5356533527374268, "learning_rate": 0.0001, "loss": 0.0113, "step": 175730 }, { "epoch": 1156.1842105263158, "grad_norm": 0.974061131477356, "learning_rate": 0.0001, "loss": 0.0079, "step": 175740 }, { "epoch": 1156.25, "grad_norm": 1.0487468242645264, "learning_rate": 0.0001, "loss": 0.0106, "step": 175750 }, { "epoch": 1156.3157894736842, "grad_norm": 1.5691155195236206, "learning_rate": 0.0001, "loss": 0.0121, "step": 175760 }, { "epoch": 1156.3815789473683, "grad_norm": 1.4372376203536987, "learning_rate": 0.0001, "loss": 0.009, "step": 175770 }, { "epoch": 1156.4473684210527, "grad_norm": 1.508100152015686, "learning_rate": 0.0001, "loss": 0.0083, "step": 175780 }, { "epoch": 1156.5131578947369, "grad_norm": 1.0872918367385864, "learning_rate": 0.0001, "loss": 0.0099, "step": 175790 }, { "epoch": 1156.578947368421, "grad_norm": 1.0140292644500732, "learning_rate": 0.0001, "loss": 0.0074, "step": 175800 }, { "epoch": 1156.6447368421052, "grad_norm": 0.9516561031341553, "learning_rate": 0.0001, "loss": 0.0104, "step": 175810 }, { "epoch": 1156.7105263157894, "grad_norm": 1.1463650465011597, "learning_rate": 0.0001, "loss": 0.0095, "step": 175820 }, { "epoch": 1156.7763157894738, "grad_norm": 1.2068666219711304, "learning_rate": 0.0001, "loss": 0.0088, "step": 175830 }, { "epoch": 1156.842105263158, "grad_norm": 0.8994778394699097, "learning_rate": 0.0001, "loss": 0.0089, "step": 175840 }, { "epoch": 1156.907894736842, "grad_norm": 1.115564227104187, "learning_rate": 0.0001, "loss": 0.0112, "step": 175850 }, { "epoch": 1156.9736842105262, "grad_norm": 1.5093863010406494, "learning_rate": 0.0001, "loss": 0.0091, "step": 175860 }, { "epoch": 1157.0394736842106, "grad_norm": 1.3060439825057983, "learning_rate": 0.0001, "loss": 0.0078, "step": 175870 }, { "epoch": 1157.1052631578948, "grad_norm": 1.0380960702896118, "learning_rate": 0.0001, "loss": 0.0079, "step": 175880 }, { "epoch": 1157.171052631579, "grad_norm": 1.381862998008728, "learning_rate": 0.0001, "loss": 0.0095, "step": 175890 }, { "epoch": 1157.2368421052631, "grad_norm": 0.9802920818328857, "learning_rate": 0.0001, "loss": 0.0101, "step": 175900 }, { "epoch": 1157.3026315789473, "grad_norm": 1.1407344341278076, "learning_rate": 0.0001, "loss": 0.0106, "step": 175910 }, { "epoch": 1157.3684210526317, "grad_norm": 1.3060024976730347, "learning_rate": 0.0001, "loss": 0.0087, "step": 175920 }, { "epoch": 1157.4342105263158, "grad_norm": 1.3562568426132202, "learning_rate": 0.0001, "loss": 0.0092, "step": 175930 }, { "epoch": 1157.5, "grad_norm": 0.958991527557373, "learning_rate": 0.0001, "loss": 0.0097, "step": 175940 }, { "epoch": 1157.5657894736842, "grad_norm": 1.1298139095306396, "learning_rate": 0.0001, "loss": 0.0092, "step": 175950 }, { "epoch": 1157.6315789473683, "grad_norm": 1.4933613538742065, "learning_rate": 0.0001, "loss": 0.0096, "step": 175960 }, { "epoch": 1157.6973684210527, "grad_norm": 1.4154495000839233, "learning_rate": 0.0001, "loss": 0.0094, "step": 175970 }, { "epoch": 1157.7631578947369, "grad_norm": 1.487290382385254, "learning_rate": 0.0001, "loss": 0.0084, "step": 175980 }, { "epoch": 1157.828947368421, "grad_norm": 1.0696330070495605, "learning_rate": 0.0001, "loss": 0.0097, "step": 175990 }, { "epoch": 1157.8947368421052, "grad_norm": 1.3961727619171143, "learning_rate": 0.0001, "loss": 0.0099, "step": 176000 }, { "epoch": 1157.9605263157894, "grad_norm": 0.8423775434494019, "learning_rate": 0.0001, "loss": 0.009, "step": 176010 }, { "epoch": 1158.0263157894738, "grad_norm": 1.1189703941345215, "learning_rate": 0.0001, "loss": 0.0074, "step": 176020 }, { "epoch": 1158.092105263158, "grad_norm": 1.04086434841156, "learning_rate": 0.0001, "loss": 0.0086, "step": 176030 }, { "epoch": 1158.157894736842, "grad_norm": 0.8030532002449036, "learning_rate": 0.0001, "loss": 0.0079, "step": 176040 }, { "epoch": 1158.2236842105262, "grad_norm": 1.1623908281326294, "learning_rate": 0.0001, "loss": 0.0093, "step": 176050 }, { "epoch": 1158.2894736842106, "grad_norm": 0.7452375292778015, "learning_rate": 0.0001, "loss": 0.0089, "step": 176060 }, { "epoch": 1158.3552631578948, "grad_norm": 0.625102162361145, "learning_rate": 0.0001, "loss": 0.0098, "step": 176070 }, { "epoch": 1158.421052631579, "grad_norm": 0.6674894690513611, "learning_rate": 0.0001, "loss": 0.009, "step": 176080 }, { "epoch": 1158.4868421052631, "grad_norm": 0.7138943076133728, "learning_rate": 0.0001, "loss": 0.0083, "step": 176090 }, { "epoch": 1158.5526315789473, "grad_norm": 0.9019205570220947, "learning_rate": 0.0001, "loss": 0.0094, "step": 176100 }, { "epoch": 1158.6184210526317, "grad_norm": 1.1200653314590454, "learning_rate": 0.0001, "loss": 0.0092, "step": 176110 }, { "epoch": 1158.6842105263158, "grad_norm": 1.2586793899536133, "learning_rate": 0.0001, "loss": 0.0081, "step": 176120 }, { "epoch": 1158.75, "grad_norm": 0.7260456085205078, "learning_rate": 0.0001, "loss": 0.0098, "step": 176130 }, { "epoch": 1158.8157894736842, "grad_norm": 1.0455162525177002, "learning_rate": 0.0001, "loss": 0.0072, "step": 176140 }, { "epoch": 1158.8815789473683, "grad_norm": 1.5511831045150757, "learning_rate": 0.0001, "loss": 0.0092, "step": 176150 }, { "epoch": 1158.9473684210527, "grad_norm": 0.8422067165374756, "learning_rate": 0.0001, "loss": 0.0109, "step": 176160 }, { "epoch": 1159.0131578947369, "grad_norm": 1.0608569383621216, "learning_rate": 0.0001, "loss": 0.0105, "step": 176170 }, { "epoch": 1159.078947368421, "grad_norm": 0.9599637985229492, "learning_rate": 0.0001, "loss": 0.0094, "step": 176180 }, { "epoch": 1159.1447368421052, "grad_norm": 1.0301839113235474, "learning_rate": 0.0001, "loss": 0.0092, "step": 176190 }, { "epoch": 1159.2105263157894, "grad_norm": 1.3413852453231812, "learning_rate": 0.0001, "loss": 0.0086, "step": 176200 }, { "epoch": 1159.2763157894738, "grad_norm": 1.2173924446105957, "learning_rate": 0.0001, "loss": 0.0086, "step": 176210 }, { "epoch": 1159.342105263158, "grad_norm": 1.487339735031128, "learning_rate": 0.0001, "loss": 0.0107, "step": 176220 }, { "epoch": 1159.407894736842, "grad_norm": 1.342084527015686, "learning_rate": 0.0001, "loss": 0.0105, "step": 176230 }, { "epoch": 1159.4736842105262, "grad_norm": 1.627140998840332, "learning_rate": 0.0001, "loss": 0.0089, "step": 176240 }, { "epoch": 1159.5394736842106, "grad_norm": 1.2196797132492065, "learning_rate": 0.0001, "loss": 0.0086, "step": 176250 }, { "epoch": 1159.6052631578948, "grad_norm": 1.0979526042938232, "learning_rate": 0.0001, "loss": 0.009, "step": 176260 }, { "epoch": 1159.671052631579, "grad_norm": 1.0794157981872559, "learning_rate": 0.0001, "loss": 0.0082, "step": 176270 }, { "epoch": 1159.7368421052631, "grad_norm": 1.344915747642517, "learning_rate": 0.0001, "loss": 0.0099, "step": 176280 }, { "epoch": 1159.8026315789473, "grad_norm": 0.9110754728317261, "learning_rate": 0.0001, "loss": 0.0081, "step": 176290 }, { "epoch": 1159.8684210526317, "grad_norm": 0.9595354795455933, "learning_rate": 0.0001, "loss": 0.0085, "step": 176300 }, { "epoch": 1159.9342105263158, "grad_norm": 1.3430472612380981, "learning_rate": 0.0001, "loss": 0.0095, "step": 176310 }, { "epoch": 1160.0, "grad_norm": 0.8704079985618591, "learning_rate": 0.0001, "loss": 0.0088, "step": 176320 }, { "epoch": 1160.0657894736842, "grad_norm": 1.069422721862793, "learning_rate": 0.0001, "loss": 0.0075, "step": 176330 }, { "epoch": 1160.1315789473683, "grad_norm": 1.1828012466430664, "learning_rate": 0.0001, "loss": 0.0097, "step": 176340 }, { "epoch": 1160.1973684210527, "grad_norm": 1.0887290239334106, "learning_rate": 0.0001, "loss": 0.0097, "step": 176350 }, { "epoch": 1160.2631578947369, "grad_norm": 0.9942824244499207, "learning_rate": 0.0001, "loss": 0.0083, "step": 176360 }, { "epoch": 1160.328947368421, "grad_norm": 0.9858255982398987, "learning_rate": 0.0001, "loss": 0.009, "step": 176370 }, { "epoch": 1160.3947368421052, "grad_norm": 0.7866831421852112, "learning_rate": 0.0001, "loss": 0.0111, "step": 176380 }, { "epoch": 1160.4605263157894, "grad_norm": 0.8747726082801819, "learning_rate": 0.0001, "loss": 0.0086, "step": 176390 }, { "epoch": 1160.5263157894738, "grad_norm": 1.004457712173462, "learning_rate": 0.0001, "loss": 0.0079, "step": 176400 }, { "epoch": 1160.592105263158, "grad_norm": 0.9716295599937439, "learning_rate": 0.0001, "loss": 0.0084, "step": 176410 }, { "epoch": 1160.657894736842, "grad_norm": 1.2135223150253296, "learning_rate": 0.0001, "loss": 0.0106, "step": 176420 }, { "epoch": 1160.7236842105262, "grad_norm": 1.085508942604065, "learning_rate": 0.0001, "loss": 0.0103, "step": 176430 }, { "epoch": 1160.7894736842106, "grad_norm": 1.4997029304504395, "learning_rate": 0.0001, "loss": 0.0097, "step": 176440 }, { "epoch": 1160.8552631578948, "grad_norm": 1.238146185874939, "learning_rate": 0.0001, "loss": 0.008, "step": 176450 }, { "epoch": 1160.921052631579, "grad_norm": 0.9297826290130615, "learning_rate": 0.0001, "loss": 0.0074, "step": 176460 }, { "epoch": 1160.9868421052631, "grad_norm": 0.8368382453918457, "learning_rate": 0.0001, "loss": 0.0087, "step": 176470 }, { "epoch": 1161.0526315789473, "grad_norm": 0.933539092540741, "learning_rate": 0.0001, "loss": 0.0095, "step": 176480 }, { "epoch": 1161.1184210526317, "grad_norm": 1.1759393215179443, "learning_rate": 0.0001, "loss": 0.0102, "step": 176490 }, { "epoch": 1161.1842105263158, "grad_norm": 1.2678519487380981, "learning_rate": 0.0001, "loss": 0.0098, "step": 176500 }, { "epoch": 1161.25, "grad_norm": 1.227339506149292, "learning_rate": 0.0001, "loss": 0.0098, "step": 176510 }, { "epoch": 1161.3157894736842, "grad_norm": 1.1603562831878662, "learning_rate": 0.0001, "loss": 0.008, "step": 176520 }, { "epoch": 1161.3815789473683, "grad_norm": 1.2368062734603882, "learning_rate": 0.0001, "loss": 0.009, "step": 176530 }, { "epoch": 1161.4473684210527, "grad_norm": 1.3081183433532715, "learning_rate": 0.0001, "loss": 0.0102, "step": 176540 }, { "epoch": 1161.5131578947369, "grad_norm": 1.4025623798370361, "learning_rate": 0.0001, "loss": 0.0091, "step": 176550 }, { "epoch": 1161.578947368421, "grad_norm": 1.1546919345855713, "learning_rate": 0.0001, "loss": 0.0068, "step": 176560 }, { "epoch": 1161.6447368421052, "grad_norm": 0.9056599736213684, "learning_rate": 0.0001, "loss": 0.0089, "step": 176570 }, { "epoch": 1161.7105263157894, "grad_norm": 1.22064208984375, "learning_rate": 0.0001, "loss": 0.0077, "step": 176580 }, { "epoch": 1161.7763157894738, "grad_norm": 0.9950551390647888, "learning_rate": 0.0001, "loss": 0.0082, "step": 176590 }, { "epoch": 1161.842105263158, "grad_norm": 1.0174490213394165, "learning_rate": 0.0001, "loss": 0.0093, "step": 176600 }, { "epoch": 1161.907894736842, "grad_norm": 1.21022629737854, "learning_rate": 0.0001, "loss": 0.009, "step": 176610 }, { "epoch": 1161.9736842105262, "grad_norm": 0.6943854093551636, "learning_rate": 0.0001, "loss": 0.0101, "step": 176620 }, { "epoch": 1162.0394736842106, "grad_norm": 1.3050177097320557, "learning_rate": 0.0001, "loss": 0.0086, "step": 176630 }, { "epoch": 1162.1052631578948, "grad_norm": 1.040520429611206, "learning_rate": 0.0001, "loss": 0.0111, "step": 176640 }, { "epoch": 1162.171052631579, "grad_norm": 1.3651999235153198, "learning_rate": 0.0001, "loss": 0.0085, "step": 176650 }, { "epoch": 1162.2368421052631, "grad_norm": 1.1482195854187012, "learning_rate": 0.0001, "loss": 0.0092, "step": 176660 }, { "epoch": 1162.3026315789473, "grad_norm": 1.137037754058838, "learning_rate": 0.0001, "loss": 0.0109, "step": 176670 }, { "epoch": 1162.3684210526317, "grad_norm": 1.381179690361023, "learning_rate": 0.0001, "loss": 0.0068, "step": 176680 }, { "epoch": 1162.4342105263158, "grad_norm": 1.0104210376739502, "learning_rate": 0.0001, "loss": 0.0087, "step": 176690 }, { "epoch": 1162.5, "grad_norm": 1.2539334297180176, "learning_rate": 0.0001, "loss": 0.0093, "step": 176700 }, { "epoch": 1162.5657894736842, "grad_norm": 1.0029774904251099, "learning_rate": 0.0001, "loss": 0.01, "step": 176710 }, { "epoch": 1162.6315789473683, "grad_norm": 0.8852845430374146, "learning_rate": 0.0001, "loss": 0.0097, "step": 176720 }, { "epoch": 1162.6973684210527, "grad_norm": 1.0239177942276, "learning_rate": 0.0001, "loss": 0.0075, "step": 176730 }, { "epoch": 1162.7631578947369, "grad_norm": 0.8226960897445679, "learning_rate": 0.0001, "loss": 0.0082, "step": 176740 }, { "epoch": 1162.828947368421, "grad_norm": 0.9960774779319763, "learning_rate": 0.0001, "loss": 0.0072, "step": 176750 }, { "epoch": 1162.8947368421052, "grad_norm": 1.1968601942062378, "learning_rate": 0.0001, "loss": 0.0094, "step": 176760 }, { "epoch": 1162.9605263157894, "grad_norm": 1.1123733520507812, "learning_rate": 0.0001, "loss": 0.0088, "step": 176770 }, { "epoch": 1163.0263157894738, "grad_norm": 1.0978655815124512, "learning_rate": 0.0001, "loss": 0.0094, "step": 176780 }, { "epoch": 1163.092105263158, "grad_norm": 0.8756230473518372, "learning_rate": 0.0001, "loss": 0.0093, "step": 176790 }, { "epoch": 1163.157894736842, "grad_norm": 1.0591200590133667, "learning_rate": 0.0001, "loss": 0.0107, "step": 176800 }, { "epoch": 1163.2236842105262, "grad_norm": 0.7175271511077881, "learning_rate": 0.0001, "loss": 0.0106, "step": 176810 }, { "epoch": 1163.2894736842106, "grad_norm": 1.0549403429031372, "learning_rate": 0.0001, "loss": 0.01, "step": 176820 }, { "epoch": 1163.3552631578948, "grad_norm": 1.0469660758972168, "learning_rate": 0.0001, "loss": 0.0096, "step": 176830 }, { "epoch": 1163.421052631579, "grad_norm": 0.8197566270828247, "learning_rate": 0.0001, "loss": 0.0094, "step": 176840 }, { "epoch": 1163.4868421052631, "grad_norm": 1.2558757066726685, "learning_rate": 0.0001, "loss": 0.0084, "step": 176850 }, { "epoch": 1163.5526315789473, "grad_norm": 1.1719180345535278, "learning_rate": 0.0001, "loss": 0.0075, "step": 176860 }, { "epoch": 1163.6184210526317, "grad_norm": 0.817057728767395, "learning_rate": 0.0001, "loss": 0.0075, "step": 176870 }, { "epoch": 1163.6842105263158, "grad_norm": 1.178382158279419, "learning_rate": 0.0001, "loss": 0.0097, "step": 176880 }, { "epoch": 1163.75, "grad_norm": 1.0112900733947754, "learning_rate": 0.0001, "loss": 0.0081, "step": 176890 }, { "epoch": 1163.8157894736842, "grad_norm": 0.7621281743049622, "learning_rate": 0.0001, "loss": 0.0083, "step": 176900 }, { "epoch": 1163.8815789473683, "grad_norm": 0.6787197589874268, "learning_rate": 0.0001, "loss": 0.0094, "step": 176910 }, { "epoch": 1163.9473684210527, "grad_norm": 1.0531107187271118, "learning_rate": 0.0001, "loss": 0.0094, "step": 176920 }, { "epoch": 1164.0131578947369, "grad_norm": 0.5585743188858032, "learning_rate": 0.0001, "loss": 0.0086, "step": 176930 }, { "epoch": 1164.078947368421, "grad_norm": 0.8682098388671875, "learning_rate": 0.0001, "loss": 0.0092, "step": 176940 }, { "epoch": 1164.1447368421052, "grad_norm": 0.7695916295051575, "learning_rate": 0.0001, "loss": 0.0089, "step": 176950 }, { "epoch": 1164.2105263157894, "grad_norm": 1.2778445482254028, "learning_rate": 0.0001, "loss": 0.0086, "step": 176960 }, { "epoch": 1164.2763157894738, "grad_norm": 1.0614190101623535, "learning_rate": 0.0001, "loss": 0.0085, "step": 176970 }, { "epoch": 1164.342105263158, "grad_norm": 0.7645097374916077, "learning_rate": 0.0001, "loss": 0.0084, "step": 176980 }, { "epoch": 1164.407894736842, "grad_norm": 1.0178488492965698, "learning_rate": 0.0001, "loss": 0.0079, "step": 176990 }, { "epoch": 1164.4736842105262, "grad_norm": 1.3799580335617065, "learning_rate": 0.0001, "loss": 0.0104, "step": 177000 }, { "epoch": 1164.5394736842106, "grad_norm": 1.1391440629959106, "learning_rate": 0.0001, "loss": 0.0082, "step": 177010 }, { "epoch": 1164.6052631578948, "grad_norm": 1.0461714267730713, "learning_rate": 0.0001, "loss": 0.0084, "step": 177020 }, { "epoch": 1164.671052631579, "grad_norm": 1.119397521018982, "learning_rate": 0.0001, "loss": 0.0105, "step": 177030 }, { "epoch": 1164.7368421052631, "grad_norm": 1.3247554302215576, "learning_rate": 0.0001, "loss": 0.0101, "step": 177040 }, { "epoch": 1164.8026315789473, "grad_norm": 0.9581509828567505, "learning_rate": 0.0001, "loss": 0.0081, "step": 177050 }, { "epoch": 1164.8684210526317, "grad_norm": 1.0519541501998901, "learning_rate": 0.0001, "loss": 0.0099, "step": 177060 }, { "epoch": 1164.9342105263158, "grad_norm": 0.7811246514320374, "learning_rate": 0.0001, "loss": 0.0083, "step": 177070 }, { "epoch": 1165.0, "grad_norm": 0.8886945843696594, "learning_rate": 0.0001, "loss": 0.0107, "step": 177080 }, { "epoch": 1165.0657894736842, "grad_norm": 1.0160313844680786, "learning_rate": 0.0001, "loss": 0.0072, "step": 177090 }, { "epoch": 1165.1315789473683, "grad_norm": 1.3136074542999268, "learning_rate": 0.0001, "loss": 0.0098, "step": 177100 }, { "epoch": 1165.1973684210527, "grad_norm": 1.1090233325958252, "learning_rate": 0.0001, "loss": 0.0102, "step": 177110 }, { "epoch": 1165.2631578947369, "grad_norm": 1.0425786972045898, "learning_rate": 0.0001, "loss": 0.0096, "step": 177120 }, { "epoch": 1165.328947368421, "grad_norm": 1.0848468542099, "learning_rate": 0.0001, "loss": 0.0094, "step": 177130 }, { "epoch": 1165.3947368421052, "grad_norm": 0.8333765268325806, "learning_rate": 0.0001, "loss": 0.0081, "step": 177140 }, { "epoch": 1165.4605263157894, "grad_norm": 0.8007072806358337, "learning_rate": 0.0001, "loss": 0.0099, "step": 177150 }, { "epoch": 1165.5263157894738, "grad_norm": 1.0657711029052734, "learning_rate": 0.0001, "loss": 0.0104, "step": 177160 }, { "epoch": 1165.592105263158, "grad_norm": 0.8942683339118958, "learning_rate": 0.0001, "loss": 0.0098, "step": 177170 }, { "epoch": 1165.657894736842, "grad_norm": 0.7064841985702515, "learning_rate": 0.0001, "loss": 0.0082, "step": 177180 }, { "epoch": 1165.7236842105262, "grad_norm": 0.673353910446167, "learning_rate": 0.0001, "loss": 0.0105, "step": 177190 }, { "epoch": 1165.7894736842106, "grad_norm": 1.201241135597229, "learning_rate": 0.0001, "loss": 0.0088, "step": 177200 }, { "epoch": 1165.8552631578948, "grad_norm": 1.1128406524658203, "learning_rate": 0.0001, "loss": 0.0087, "step": 177210 }, { "epoch": 1165.921052631579, "grad_norm": 1.0577025413513184, "learning_rate": 0.0001, "loss": 0.0089, "step": 177220 }, { "epoch": 1165.9868421052631, "grad_norm": 1.063428282737732, "learning_rate": 0.0001, "loss": 0.0086, "step": 177230 }, { "epoch": 1166.0526315789473, "grad_norm": 0.6475265622138977, "learning_rate": 0.0001, "loss": 0.0111, "step": 177240 }, { "epoch": 1166.1184210526317, "grad_norm": 1.0673624277114868, "learning_rate": 0.0001, "loss": 0.0091, "step": 177250 }, { "epoch": 1166.1842105263158, "grad_norm": 1.2042251825332642, "learning_rate": 0.0001, "loss": 0.0087, "step": 177260 }, { "epoch": 1166.25, "grad_norm": 1.023403286933899, "learning_rate": 0.0001, "loss": 0.0084, "step": 177270 }, { "epoch": 1166.3157894736842, "grad_norm": 1.0554587841033936, "learning_rate": 0.0001, "loss": 0.0092, "step": 177280 }, { "epoch": 1166.3815789473683, "grad_norm": 1.2385607957839966, "learning_rate": 0.0001, "loss": 0.0099, "step": 177290 }, { "epoch": 1166.4473684210527, "grad_norm": 1.058170199394226, "learning_rate": 0.0001, "loss": 0.008, "step": 177300 }, { "epoch": 1166.5131578947369, "grad_norm": 0.7787966728210449, "learning_rate": 0.0001, "loss": 0.0093, "step": 177310 }, { "epoch": 1166.578947368421, "grad_norm": 0.8278951048851013, "learning_rate": 0.0001, "loss": 0.0079, "step": 177320 }, { "epoch": 1166.6447368421052, "grad_norm": 0.945210337638855, "learning_rate": 0.0001, "loss": 0.0086, "step": 177330 }, { "epoch": 1166.7105263157894, "grad_norm": 0.8070635795593262, "learning_rate": 0.0001, "loss": 0.0097, "step": 177340 }, { "epoch": 1166.7763157894738, "grad_norm": 1.1100777387619019, "learning_rate": 0.0001, "loss": 0.0099, "step": 177350 }, { "epoch": 1166.842105263158, "grad_norm": 1.3282232284545898, "learning_rate": 0.0001, "loss": 0.0094, "step": 177360 }, { "epoch": 1166.907894736842, "grad_norm": 1.6687939167022705, "learning_rate": 0.0001, "loss": 0.0093, "step": 177370 }, { "epoch": 1166.9736842105262, "grad_norm": 1.4565303325653076, "learning_rate": 0.0001, "loss": 0.009, "step": 177380 }, { "epoch": 1167.0394736842106, "grad_norm": 0.7736444473266602, "learning_rate": 0.0001, "loss": 0.0111, "step": 177390 }, { "epoch": 1167.1052631578948, "grad_norm": 1.261600375175476, "learning_rate": 0.0001, "loss": 0.0091, "step": 177400 }, { "epoch": 1167.171052631579, "grad_norm": 1.2047302722930908, "learning_rate": 0.0001, "loss": 0.0093, "step": 177410 }, { "epoch": 1167.2368421052631, "grad_norm": 1.3666282892227173, "learning_rate": 0.0001, "loss": 0.0092, "step": 177420 }, { "epoch": 1167.3026315789473, "grad_norm": 1.41166353225708, "learning_rate": 0.0001, "loss": 0.0104, "step": 177430 }, { "epoch": 1167.3684210526317, "grad_norm": 0.9169084429740906, "learning_rate": 0.0001, "loss": 0.0079, "step": 177440 }, { "epoch": 1167.4342105263158, "grad_norm": 1.0971057415008545, "learning_rate": 0.0001, "loss": 0.011, "step": 177450 }, { "epoch": 1167.5, "grad_norm": 1.0250972509384155, "learning_rate": 0.0001, "loss": 0.0092, "step": 177460 }, { "epoch": 1167.5657894736842, "grad_norm": 1.008253812789917, "learning_rate": 0.0001, "loss": 0.0075, "step": 177470 }, { "epoch": 1167.6315789473683, "grad_norm": 1.0217920541763306, "learning_rate": 0.0001, "loss": 0.0104, "step": 177480 }, { "epoch": 1167.6973684210527, "grad_norm": 1.0006431341171265, "learning_rate": 0.0001, "loss": 0.0085, "step": 177490 }, { "epoch": 1167.7631578947369, "grad_norm": 0.8362782001495361, "learning_rate": 0.0001, "loss": 0.0076, "step": 177500 }, { "epoch": 1167.828947368421, "grad_norm": 1.3864368200302124, "learning_rate": 0.0001, "loss": 0.0107, "step": 177510 }, { "epoch": 1167.8947368421052, "grad_norm": 1.3755213022232056, "learning_rate": 0.0001, "loss": 0.0072, "step": 177520 }, { "epoch": 1167.9605263157894, "grad_norm": 1.060314416885376, "learning_rate": 0.0001, "loss": 0.0089, "step": 177530 }, { "epoch": 1168.0263157894738, "grad_norm": 1.2582261562347412, "learning_rate": 0.0001, "loss": 0.009, "step": 177540 }, { "epoch": 1168.092105263158, "grad_norm": 1.0791796445846558, "learning_rate": 0.0001, "loss": 0.0114, "step": 177550 }, { "epoch": 1168.157894736842, "grad_norm": 1.2090390920639038, "learning_rate": 0.0001, "loss": 0.0081, "step": 177560 }, { "epoch": 1168.2236842105262, "grad_norm": 0.7330916523933411, "learning_rate": 0.0001, "loss": 0.0101, "step": 177570 }, { "epoch": 1168.2894736842106, "grad_norm": 0.9363530874252319, "learning_rate": 0.0001, "loss": 0.0096, "step": 177580 }, { "epoch": 1168.3552631578948, "grad_norm": 0.9685211777687073, "learning_rate": 0.0001, "loss": 0.0082, "step": 177590 }, { "epoch": 1168.421052631579, "grad_norm": 1.0913938283920288, "learning_rate": 0.0001, "loss": 0.0074, "step": 177600 }, { "epoch": 1168.4868421052631, "grad_norm": 1.035131812095642, "learning_rate": 0.0001, "loss": 0.0084, "step": 177610 }, { "epoch": 1168.5526315789473, "grad_norm": 1.0176951885223389, "learning_rate": 0.0001, "loss": 0.0091, "step": 177620 }, { "epoch": 1168.6184210526317, "grad_norm": 0.8525509834289551, "learning_rate": 0.0001, "loss": 0.0096, "step": 177630 }, { "epoch": 1168.6842105263158, "grad_norm": 1.0058506727218628, "learning_rate": 0.0001, "loss": 0.0086, "step": 177640 }, { "epoch": 1168.75, "grad_norm": 1.1106820106506348, "learning_rate": 0.0001, "loss": 0.0085, "step": 177650 }, { "epoch": 1168.8157894736842, "grad_norm": 0.9219995737075806, "learning_rate": 0.0001, "loss": 0.009, "step": 177660 }, { "epoch": 1168.8815789473683, "grad_norm": 0.9162684082984924, "learning_rate": 0.0001, "loss": 0.0097, "step": 177670 }, { "epoch": 1168.9473684210527, "grad_norm": 1.331979513168335, "learning_rate": 0.0001, "loss": 0.01, "step": 177680 }, { "epoch": 1169.0131578947369, "grad_norm": 0.8545548915863037, "learning_rate": 0.0001, "loss": 0.0096, "step": 177690 }, { "epoch": 1169.078947368421, "grad_norm": 0.989903450012207, "learning_rate": 0.0001, "loss": 0.009, "step": 177700 }, { "epoch": 1169.1447368421052, "grad_norm": 0.9872443079948425, "learning_rate": 0.0001, "loss": 0.0107, "step": 177710 }, { "epoch": 1169.2105263157894, "grad_norm": 1.1131963729858398, "learning_rate": 0.0001, "loss": 0.0085, "step": 177720 }, { "epoch": 1169.2763157894738, "grad_norm": 0.7441876530647278, "learning_rate": 0.0001, "loss": 0.0075, "step": 177730 }, { "epoch": 1169.342105263158, "grad_norm": 1.1648389101028442, "learning_rate": 0.0001, "loss": 0.0112, "step": 177740 }, { "epoch": 1169.407894736842, "grad_norm": 1.0851612091064453, "learning_rate": 0.0001, "loss": 0.0112, "step": 177750 }, { "epoch": 1169.4736842105262, "grad_norm": 0.9316413402557373, "learning_rate": 0.0001, "loss": 0.0094, "step": 177760 }, { "epoch": 1169.5394736842106, "grad_norm": 0.8847388029098511, "learning_rate": 0.0001, "loss": 0.0097, "step": 177770 }, { "epoch": 1169.6052631578948, "grad_norm": 1.1352200508117676, "learning_rate": 0.0001, "loss": 0.0094, "step": 177780 }, { "epoch": 1169.671052631579, "grad_norm": 0.9478182792663574, "learning_rate": 0.0001, "loss": 0.0097, "step": 177790 }, { "epoch": 1169.7368421052631, "grad_norm": 0.7659247517585754, "learning_rate": 0.0001, "loss": 0.0112, "step": 177800 }, { "epoch": 1169.8026315789473, "grad_norm": 0.7848868370056152, "learning_rate": 0.0001, "loss": 0.0128, "step": 177810 }, { "epoch": 1169.8684210526317, "grad_norm": 1.0635731220245361, "learning_rate": 0.0001, "loss": 0.011, "step": 177820 }, { "epoch": 1169.9342105263158, "grad_norm": 1.3124653100967407, "learning_rate": 0.0001, "loss": 0.0099, "step": 177830 }, { "epoch": 1170.0, "grad_norm": 0.9985669255256653, "learning_rate": 0.0001, "loss": 0.0117, "step": 177840 }, { "epoch": 1170.0657894736842, "grad_norm": 1.075380563735962, "learning_rate": 0.0001, "loss": 0.0111, "step": 177850 }, { "epoch": 1170.1315789473683, "grad_norm": 0.7401938438415527, "learning_rate": 0.0001, "loss": 0.0093, "step": 177860 }, { "epoch": 1170.1973684210527, "grad_norm": 1.1559667587280273, "learning_rate": 0.0001, "loss": 0.0088, "step": 177870 }, { "epoch": 1170.2631578947369, "grad_norm": 1.152563452720642, "learning_rate": 0.0001, "loss": 0.0108, "step": 177880 }, { "epoch": 1170.328947368421, "grad_norm": 0.8326547145843506, "learning_rate": 0.0001, "loss": 0.009, "step": 177890 }, { "epoch": 1170.3947368421052, "grad_norm": 1.1279677152633667, "learning_rate": 0.0001, "loss": 0.0094, "step": 177900 }, { "epoch": 1170.4605263157894, "grad_norm": 1.192924976348877, "learning_rate": 0.0001, "loss": 0.0107, "step": 177910 }, { "epoch": 1170.5263157894738, "grad_norm": 1.3840651512145996, "learning_rate": 0.0001, "loss": 0.0101, "step": 177920 }, { "epoch": 1170.592105263158, "grad_norm": 1.016034483909607, "learning_rate": 0.0001, "loss": 0.0106, "step": 177930 }, { "epoch": 1170.657894736842, "grad_norm": 1.2145895957946777, "learning_rate": 0.0001, "loss": 0.0106, "step": 177940 }, { "epoch": 1170.7236842105262, "grad_norm": 0.9925963878631592, "learning_rate": 0.0001, "loss": 0.0103, "step": 177950 }, { "epoch": 1170.7894736842106, "grad_norm": 0.9628350734710693, "learning_rate": 0.0001, "loss": 0.0095, "step": 177960 }, { "epoch": 1170.8552631578948, "grad_norm": 0.8890961408615112, "learning_rate": 0.0001, "loss": 0.0103, "step": 177970 }, { "epoch": 1170.921052631579, "grad_norm": 0.8459914922714233, "learning_rate": 0.0001, "loss": 0.009, "step": 177980 }, { "epoch": 1170.9868421052631, "grad_norm": 0.828228235244751, "learning_rate": 0.0001, "loss": 0.0092, "step": 177990 }, { "epoch": 1171.0526315789473, "grad_norm": 0.8761423230171204, "learning_rate": 0.0001, "loss": 0.01, "step": 178000 }, { "epoch": 1171.1184210526317, "grad_norm": 0.9206410050392151, "learning_rate": 0.0001, "loss": 0.0073, "step": 178010 }, { "epoch": 1171.1842105263158, "grad_norm": 0.7937294840812683, "learning_rate": 0.0001, "loss": 0.0093, "step": 178020 }, { "epoch": 1171.25, "grad_norm": 0.9361086487770081, "learning_rate": 0.0001, "loss": 0.0085, "step": 178030 }, { "epoch": 1171.3157894736842, "grad_norm": 1.000895380973816, "learning_rate": 0.0001, "loss": 0.01, "step": 178040 }, { "epoch": 1171.3815789473683, "grad_norm": 1.2588021755218506, "learning_rate": 0.0001, "loss": 0.0108, "step": 178050 }, { "epoch": 1171.4473684210527, "grad_norm": 1.0151118040084839, "learning_rate": 0.0001, "loss": 0.0086, "step": 178060 }, { "epoch": 1171.5131578947369, "grad_norm": 1.0695226192474365, "learning_rate": 0.0001, "loss": 0.0097, "step": 178070 }, { "epoch": 1171.578947368421, "grad_norm": 0.9043155908584595, "learning_rate": 0.0001, "loss": 0.0124, "step": 178080 }, { "epoch": 1171.6447368421052, "grad_norm": 0.9957794547080994, "learning_rate": 0.0001, "loss": 0.0082, "step": 178090 }, { "epoch": 1171.7105263157894, "grad_norm": 0.9040594100952148, "learning_rate": 0.0001, "loss": 0.0102, "step": 178100 }, { "epoch": 1171.7763157894738, "grad_norm": 0.9921096563339233, "learning_rate": 0.0001, "loss": 0.0103, "step": 178110 }, { "epoch": 1171.842105263158, "grad_norm": 1.4011787176132202, "learning_rate": 0.0001, "loss": 0.0093, "step": 178120 }, { "epoch": 1171.907894736842, "grad_norm": 1.1821715831756592, "learning_rate": 0.0001, "loss": 0.0083, "step": 178130 }, { "epoch": 1171.9736842105262, "grad_norm": 1.4412405490875244, "learning_rate": 0.0001, "loss": 0.0095, "step": 178140 }, { "epoch": 1172.0394736842106, "grad_norm": 1.3233447074890137, "learning_rate": 0.0001, "loss": 0.0092, "step": 178150 }, { "epoch": 1172.1052631578948, "grad_norm": 1.3075172901153564, "learning_rate": 0.0001, "loss": 0.0107, "step": 178160 }, { "epoch": 1172.171052631579, "grad_norm": 1.162680983543396, "learning_rate": 0.0001, "loss": 0.0085, "step": 178170 }, { "epoch": 1172.2368421052631, "grad_norm": 0.6982220411300659, "learning_rate": 0.0001, "loss": 0.0085, "step": 178180 }, { "epoch": 1172.3026315789473, "grad_norm": 1.1954939365386963, "learning_rate": 0.0001, "loss": 0.0098, "step": 178190 }, { "epoch": 1172.3684210526317, "grad_norm": 1.6869287490844727, "learning_rate": 0.0001, "loss": 0.0116, "step": 178200 }, { "epoch": 1172.4342105263158, "grad_norm": 1.649885654449463, "learning_rate": 0.0001, "loss": 0.0094, "step": 178210 }, { "epoch": 1172.5, "grad_norm": 1.5803041458129883, "learning_rate": 0.0001, "loss": 0.0075, "step": 178220 }, { "epoch": 1172.5657894736842, "grad_norm": 1.7074077129364014, "learning_rate": 0.0001, "loss": 0.008, "step": 178230 }, { "epoch": 1172.6315789473683, "grad_norm": 1.3916934728622437, "learning_rate": 0.0001, "loss": 0.0102, "step": 178240 }, { "epoch": 1172.6973684210527, "grad_norm": 1.668851613998413, "learning_rate": 0.0001, "loss": 0.008, "step": 178250 }, { "epoch": 1172.7631578947369, "grad_norm": 1.1936376094818115, "learning_rate": 0.0001, "loss": 0.0082, "step": 178260 }, { "epoch": 1172.828947368421, "grad_norm": 1.4797992706298828, "learning_rate": 0.0001, "loss": 0.0089, "step": 178270 }, { "epoch": 1172.8947368421052, "grad_norm": 1.1677192449569702, "learning_rate": 0.0001, "loss": 0.0094, "step": 178280 }, { "epoch": 1172.9605263157894, "grad_norm": 1.2152477502822876, "learning_rate": 0.0001, "loss": 0.0095, "step": 178290 }, { "epoch": 1173.0263157894738, "grad_norm": 1.248241662979126, "learning_rate": 0.0001, "loss": 0.0096, "step": 178300 }, { "epoch": 1173.092105263158, "grad_norm": 1.2586883306503296, "learning_rate": 0.0001, "loss": 0.0085, "step": 178310 }, { "epoch": 1173.157894736842, "grad_norm": 1.2856673002243042, "learning_rate": 0.0001, "loss": 0.0095, "step": 178320 }, { "epoch": 1173.2236842105262, "grad_norm": 1.19162917137146, "learning_rate": 0.0001, "loss": 0.0093, "step": 178330 }, { "epoch": 1173.2894736842106, "grad_norm": 0.8176268935203552, "learning_rate": 0.0001, "loss": 0.0085, "step": 178340 }, { "epoch": 1173.3552631578948, "grad_norm": 0.7139531970024109, "learning_rate": 0.0001, "loss": 0.0081, "step": 178350 }, { "epoch": 1173.421052631579, "grad_norm": 0.7016176581382751, "learning_rate": 0.0001, "loss": 0.0101, "step": 178360 }, { "epoch": 1173.4868421052631, "grad_norm": 1.0311745405197144, "learning_rate": 0.0001, "loss": 0.0092, "step": 178370 }, { "epoch": 1173.5526315789473, "grad_norm": 0.8565056920051575, "learning_rate": 0.0001, "loss": 0.0095, "step": 178380 }, { "epoch": 1173.6184210526317, "grad_norm": 1.1574746370315552, "learning_rate": 0.0001, "loss": 0.0098, "step": 178390 }, { "epoch": 1173.6842105263158, "grad_norm": 1.0975223779678345, "learning_rate": 0.0001, "loss": 0.0093, "step": 178400 }, { "epoch": 1173.75, "grad_norm": 0.849980890750885, "learning_rate": 0.0001, "loss": 0.0084, "step": 178410 }, { "epoch": 1173.8157894736842, "grad_norm": 0.8010478019714355, "learning_rate": 0.0001, "loss": 0.0087, "step": 178420 }, { "epoch": 1173.8815789473683, "grad_norm": 0.7263656258583069, "learning_rate": 0.0001, "loss": 0.0089, "step": 178430 }, { "epoch": 1173.9473684210527, "grad_norm": 0.9674796462059021, "learning_rate": 0.0001, "loss": 0.009, "step": 178440 }, { "epoch": 1174.0131578947369, "grad_norm": 0.8462510108947754, "learning_rate": 0.0001, "loss": 0.0106, "step": 178450 }, { "epoch": 1174.078947368421, "grad_norm": 0.921765923500061, "learning_rate": 0.0001, "loss": 0.0102, "step": 178460 }, { "epoch": 1174.1447368421052, "grad_norm": 1.0591344833374023, "learning_rate": 0.0001, "loss": 0.0084, "step": 178470 }, { "epoch": 1174.2105263157894, "grad_norm": 1.122679352760315, "learning_rate": 0.0001, "loss": 0.0091, "step": 178480 }, { "epoch": 1174.2763157894738, "grad_norm": 0.8474733233451843, "learning_rate": 0.0001, "loss": 0.0097, "step": 178490 }, { "epoch": 1174.342105263158, "grad_norm": 0.9375333189964294, "learning_rate": 0.0001, "loss": 0.0086, "step": 178500 }, { "epoch": 1174.407894736842, "grad_norm": 0.9587281942367554, "learning_rate": 0.0001, "loss": 0.0098, "step": 178510 }, { "epoch": 1174.4736842105262, "grad_norm": 0.998461902141571, "learning_rate": 0.0001, "loss": 0.0101, "step": 178520 }, { "epoch": 1174.5394736842106, "grad_norm": 1.0688536167144775, "learning_rate": 0.0001, "loss": 0.0091, "step": 178530 }, { "epoch": 1174.6052631578948, "grad_norm": 0.768993079662323, "learning_rate": 0.0001, "loss": 0.0119, "step": 178540 }, { "epoch": 1174.671052631579, "grad_norm": 0.9926314353942871, "learning_rate": 0.0001, "loss": 0.0088, "step": 178550 }, { "epoch": 1174.7368421052631, "grad_norm": 1.068148136138916, "learning_rate": 0.0001, "loss": 0.0083, "step": 178560 }, { "epoch": 1174.8026315789473, "grad_norm": 0.9381814002990723, "learning_rate": 0.0001, "loss": 0.0107, "step": 178570 }, { "epoch": 1174.8684210526317, "grad_norm": 1.102551817893982, "learning_rate": 0.0001, "loss": 0.0085, "step": 178580 }, { "epoch": 1174.9342105263158, "grad_norm": 0.8753682374954224, "learning_rate": 0.0001, "loss": 0.0089, "step": 178590 }, { "epoch": 1175.0, "grad_norm": 1.0447521209716797, "learning_rate": 0.0001, "loss": 0.0076, "step": 178600 }, { "epoch": 1175.0657894736842, "grad_norm": 1.3192112445831299, "learning_rate": 0.0001, "loss": 0.01, "step": 178610 }, { "epoch": 1175.1315789473683, "grad_norm": 1.2548503875732422, "learning_rate": 0.0001, "loss": 0.0087, "step": 178620 }, { "epoch": 1175.1973684210527, "grad_norm": 1.1852110624313354, "learning_rate": 0.0001, "loss": 0.0099, "step": 178630 }, { "epoch": 1175.2631578947369, "grad_norm": 1.1667160987854004, "learning_rate": 0.0001, "loss": 0.0105, "step": 178640 }, { "epoch": 1175.328947368421, "grad_norm": 0.9693193435668945, "learning_rate": 0.0001, "loss": 0.0082, "step": 178650 }, { "epoch": 1175.3947368421052, "grad_norm": 1.279008150100708, "learning_rate": 0.0001, "loss": 0.0093, "step": 178660 }, { "epoch": 1175.4605263157894, "grad_norm": 0.8803616762161255, "learning_rate": 0.0001, "loss": 0.009, "step": 178670 }, { "epoch": 1175.5263157894738, "grad_norm": 0.7935562133789062, "learning_rate": 0.0001, "loss": 0.0083, "step": 178680 }, { "epoch": 1175.592105263158, "grad_norm": 0.8752424120903015, "learning_rate": 0.0001, "loss": 0.009, "step": 178690 }, { "epoch": 1175.657894736842, "grad_norm": 1.2736647129058838, "learning_rate": 0.0001, "loss": 0.0114, "step": 178700 }, { "epoch": 1175.7236842105262, "grad_norm": 0.9050102829933167, "learning_rate": 0.0001, "loss": 0.0081, "step": 178710 }, { "epoch": 1175.7894736842106, "grad_norm": 0.9366145730018616, "learning_rate": 0.0001, "loss": 0.0091, "step": 178720 }, { "epoch": 1175.8552631578948, "grad_norm": 1.0141912698745728, "learning_rate": 0.0001, "loss": 0.0074, "step": 178730 }, { "epoch": 1175.921052631579, "grad_norm": 0.8957210183143616, "learning_rate": 0.0001, "loss": 0.0085, "step": 178740 }, { "epoch": 1175.9868421052631, "grad_norm": 0.743053674697876, "learning_rate": 0.0001, "loss": 0.011, "step": 178750 }, { "epoch": 1176.0526315789473, "grad_norm": 0.8863754868507385, "learning_rate": 0.0001, "loss": 0.0095, "step": 178760 }, { "epoch": 1176.1184210526317, "grad_norm": 0.7674370408058167, "learning_rate": 0.0001, "loss": 0.0089, "step": 178770 }, { "epoch": 1176.1842105263158, "grad_norm": 0.6406316757202148, "learning_rate": 0.0001, "loss": 0.0089, "step": 178780 }, { "epoch": 1176.25, "grad_norm": 0.5896703600883484, "learning_rate": 0.0001, "loss": 0.0085, "step": 178790 }, { "epoch": 1176.3157894736842, "grad_norm": 0.7623488903045654, "learning_rate": 0.0001, "loss": 0.0092, "step": 178800 }, { "epoch": 1176.3815789473683, "grad_norm": 0.7528663277626038, "learning_rate": 0.0001, "loss": 0.0084, "step": 178810 }, { "epoch": 1176.4473684210527, "grad_norm": 0.9548066854476929, "learning_rate": 0.0001, "loss": 0.0112, "step": 178820 }, { "epoch": 1176.5131578947369, "grad_norm": 0.880696177482605, "learning_rate": 0.0001, "loss": 0.0095, "step": 178830 }, { "epoch": 1176.578947368421, "grad_norm": 1.145169734954834, "learning_rate": 0.0001, "loss": 0.0095, "step": 178840 }, { "epoch": 1176.6447368421052, "grad_norm": 1.2576274871826172, "learning_rate": 0.0001, "loss": 0.0095, "step": 178850 }, { "epoch": 1176.7105263157894, "grad_norm": 1.057856798171997, "learning_rate": 0.0001, "loss": 0.0088, "step": 178860 }, { "epoch": 1176.7763157894738, "grad_norm": 0.9875898957252502, "learning_rate": 0.0001, "loss": 0.0087, "step": 178870 }, { "epoch": 1176.842105263158, "grad_norm": 1.0104905366897583, "learning_rate": 0.0001, "loss": 0.0089, "step": 178880 }, { "epoch": 1176.907894736842, "grad_norm": 0.7599539756774902, "learning_rate": 0.0001, "loss": 0.0085, "step": 178890 }, { "epoch": 1176.9736842105262, "grad_norm": 0.6184892058372498, "learning_rate": 0.0001, "loss": 0.0096, "step": 178900 }, { "epoch": 1177.0394736842106, "grad_norm": 1.0564872026443481, "learning_rate": 0.0001, "loss": 0.0103, "step": 178910 }, { "epoch": 1177.1052631578948, "grad_norm": 0.9271602034568787, "learning_rate": 0.0001, "loss": 0.0074, "step": 178920 }, { "epoch": 1177.171052631579, "grad_norm": 0.9372125267982483, "learning_rate": 0.0001, "loss": 0.0095, "step": 178930 }, { "epoch": 1177.2368421052631, "grad_norm": 0.8659113645553589, "learning_rate": 0.0001, "loss": 0.0114, "step": 178940 }, { "epoch": 1177.3026315789473, "grad_norm": 0.9990260601043701, "learning_rate": 0.0001, "loss": 0.0089, "step": 178950 }, { "epoch": 1177.3684210526317, "grad_norm": 0.9762459993362427, "learning_rate": 0.0001, "loss": 0.0089, "step": 178960 }, { "epoch": 1177.4342105263158, "grad_norm": 0.8299205899238586, "learning_rate": 0.0001, "loss": 0.0099, "step": 178970 }, { "epoch": 1177.5, "grad_norm": 1.0260875225067139, "learning_rate": 0.0001, "loss": 0.0085, "step": 178980 }, { "epoch": 1177.5657894736842, "grad_norm": 0.8214814066886902, "learning_rate": 0.0001, "loss": 0.0087, "step": 178990 }, { "epoch": 1177.6315789473683, "grad_norm": 0.8492797017097473, "learning_rate": 0.0001, "loss": 0.0085, "step": 179000 }, { "epoch": 1177.6973684210527, "grad_norm": 1.093478798866272, "learning_rate": 0.0001, "loss": 0.0098, "step": 179010 }, { "epoch": 1177.7631578947369, "grad_norm": 1.2081449031829834, "learning_rate": 0.0001, "loss": 0.0089, "step": 179020 }, { "epoch": 1177.828947368421, "grad_norm": 1.1940277814865112, "learning_rate": 0.0001, "loss": 0.0087, "step": 179030 }, { "epoch": 1177.8947368421052, "grad_norm": 0.8136548399925232, "learning_rate": 0.0001, "loss": 0.0107, "step": 179040 }, { "epoch": 1177.9605263157894, "grad_norm": 0.9597692489624023, "learning_rate": 0.0001, "loss": 0.0102, "step": 179050 }, { "epoch": 1178.0263157894738, "grad_norm": 1.0133211612701416, "learning_rate": 0.0001, "loss": 0.0104, "step": 179060 }, { "epoch": 1178.092105263158, "grad_norm": 1.08301842212677, "learning_rate": 0.0001, "loss": 0.0091, "step": 179070 }, { "epoch": 1178.157894736842, "grad_norm": 0.9614161252975464, "learning_rate": 0.0001, "loss": 0.0103, "step": 179080 }, { "epoch": 1178.2236842105262, "grad_norm": 1.0885823965072632, "learning_rate": 0.0001, "loss": 0.0098, "step": 179090 }, { "epoch": 1178.2894736842106, "grad_norm": 0.9387257695198059, "learning_rate": 0.0001, "loss": 0.0089, "step": 179100 }, { "epoch": 1178.3552631578948, "grad_norm": 0.7102392911911011, "learning_rate": 0.0001, "loss": 0.0078, "step": 179110 }, { "epoch": 1178.421052631579, "grad_norm": 0.9195134043693542, "learning_rate": 0.0001, "loss": 0.0088, "step": 179120 }, { "epoch": 1178.4868421052631, "grad_norm": 1.0749733448028564, "learning_rate": 0.0001, "loss": 0.0082, "step": 179130 }, { "epoch": 1178.5526315789473, "grad_norm": 1.2915416955947876, "learning_rate": 0.0001, "loss": 0.0102, "step": 179140 }, { "epoch": 1178.6184210526317, "grad_norm": 0.8285413980484009, "learning_rate": 0.0001, "loss": 0.0094, "step": 179150 }, { "epoch": 1178.6842105263158, "grad_norm": 0.7751388549804688, "learning_rate": 0.0001, "loss": 0.0087, "step": 179160 }, { "epoch": 1178.75, "grad_norm": 0.8202561736106873, "learning_rate": 0.0001, "loss": 0.0077, "step": 179170 }, { "epoch": 1178.8157894736842, "grad_norm": 0.9537473320960999, "learning_rate": 0.0001, "loss": 0.0111, "step": 179180 }, { "epoch": 1178.8815789473683, "grad_norm": 0.9641271829605103, "learning_rate": 0.0001, "loss": 0.0109, "step": 179190 }, { "epoch": 1178.9473684210527, "grad_norm": 0.962111234664917, "learning_rate": 0.0001, "loss": 0.011, "step": 179200 }, { "epoch": 1179.0131578947369, "grad_norm": 0.8706178665161133, "learning_rate": 0.0001, "loss": 0.0089, "step": 179210 }, { "epoch": 1179.078947368421, "grad_norm": 1.0734243392944336, "learning_rate": 0.0001, "loss": 0.0086, "step": 179220 }, { "epoch": 1179.1447368421052, "grad_norm": 1.075181484222412, "learning_rate": 0.0001, "loss": 0.0087, "step": 179230 }, { "epoch": 1179.2105263157894, "grad_norm": 1.1446199417114258, "learning_rate": 0.0001, "loss": 0.0085, "step": 179240 }, { "epoch": 1179.2763157894738, "grad_norm": 0.9987981915473938, "learning_rate": 0.0001, "loss": 0.0094, "step": 179250 }, { "epoch": 1179.342105263158, "grad_norm": 0.8230014443397522, "learning_rate": 0.0001, "loss": 0.0089, "step": 179260 }, { "epoch": 1179.407894736842, "grad_norm": 0.7945327162742615, "learning_rate": 0.0001, "loss": 0.0102, "step": 179270 }, { "epoch": 1179.4736842105262, "grad_norm": 0.7237135171890259, "learning_rate": 0.0001, "loss": 0.0089, "step": 179280 }, { "epoch": 1179.5394736842106, "grad_norm": 0.8276739716529846, "learning_rate": 0.0001, "loss": 0.0095, "step": 179290 }, { "epoch": 1179.6052631578948, "grad_norm": 1.0555814504623413, "learning_rate": 0.0001, "loss": 0.0091, "step": 179300 }, { "epoch": 1179.671052631579, "grad_norm": 1.0211066007614136, "learning_rate": 0.0001, "loss": 0.0087, "step": 179310 }, { "epoch": 1179.7368421052631, "grad_norm": 1.0281317234039307, "learning_rate": 0.0001, "loss": 0.0111, "step": 179320 }, { "epoch": 1179.8026315789473, "grad_norm": 1.0497641563415527, "learning_rate": 0.0001, "loss": 0.0088, "step": 179330 }, { "epoch": 1179.8684210526317, "grad_norm": 1.2660293579101562, "learning_rate": 0.0001, "loss": 0.0105, "step": 179340 }, { "epoch": 1179.9342105263158, "grad_norm": 0.6678763628005981, "learning_rate": 0.0001, "loss": 0.01, "step": 179350 }, { "epoch": 1180.0, "grad_norm": 1.2870025634765625, "learning_rate": 0.0001, "loss": 0.0083, "step": 179360 }, { "epoch": 1180.0657894736842, "grad_norm": 1.0261781215667725, "learning_rate": 0.0001, "loss": 0.0079, "step": 179370 }, { "epoch": 1180.1315789473683, "grad_norm": 0.9007861614227295, "learning_rate": 0.0001, "loss": 0.0089, "step": 179380 }, { "epoch": 1180.1973684210527, "grad_norm": 0.9087187647819519, "learning_rate": 0.0001, "loss": 0.0096, "step": 179390 }, { "epoch": 1180.2631578947369, "grad_norm": 1.1201528310775757, "learning_rate": 0.0001, "loss": 0.009, "step": 179400 }, { "epoch": 1180.328947368421, "grad_norm": 1.268721342086792, "learning_rate": 0.0001, "loss": 0.0101, "step": 179410 }, { "epoch": 1180.3947368421052, "grad_norm": 1.1803576946258545, "learning_rate": 0.0001, "loss": 0.0091, "step": 179420 }, { "epoch": 1180.4605263157894, "grad_norm": 0.8738627433776855, "learning_rate": 0.0001, "loss": 0.0098, "step": 179430 }, { "epoch": 1180.5263157894738, "grad_norm": 0.9588783383369446, "learning_rate": 0.0001, "loss": 0.0083, "step": 179440 }, { "epoch": 1180.592105263158, "grad_norm": 0.8148517608642578, "learning_rate": 0.0001, "loss": 0.0084, "step": 179450 }, { "epoch": 1180.657894736842, "grad_norm": 0.8594145774841309, "learning_rate": 0.0001, "loss": 0.0093, "step": 179460 }, { "epoch": 1180.7236842105262, "grad_norm": 0.658765435218811, "learning_rate": 0.0001, "loss": 0.009, "step": 179470 }, { "epoch": 1180.7894736842106, "grad_norm": 0.9419595003128052, "learning_rate": 0.0001, "loss": 0.0099, "step": 179480 }, { "epoch": 1180.8552631578948, "grad_norm": 0.9967935681343079, "learning_rate": 0.0001, "loss": 0.0075, "step": 179490 }, { "epoch": 1180.921052631579, "grad_norm": 1.151293158531189, "learning_rate": 0.0001, "loss": 0.0105, "step": 179500 }, { "epoch": 1180.9868421052631, "grad_norm": 1.1010416746139526, "learning_rate": 0.0001, "loss": 0.0084, "step": 179510 }, { "epoch": 1181.0526315789473, "grad_norm": 1.184219241142273, "learning_rate": 0.0001, "loss": 0.009, "step": 179520 }, { "epoch": 1181.1184210526317, "grad_norm": 1.5013480186462402, "learning_rate": 0.0001, "loss": 0.0081, "step": 179530 }, { "epoch": 1181.1842105263158, "grad_norm": 1.2943904399871826, "learning_rate": 0.0001, "loss": 0.0086, "step": 179540 }, { "epoch": 1181.25, "grad_norm": 1.3031779527664185, "learning_rate": 0.0001, "loss": 0.0087, "step": 179550 }, { "epoch": 1181.3157894736842, "grad_norm": 0.875571072101593, "learning_rate": 0.0001, "loss": 0.0074, "step": 179560 }, { "epoch": 1181.3815789473683, "grad_norm": 1.010248064994812, "learning_rate": 0.0001, "loss": 0.0078, "step": 179570 }, { "epoch": 1181.4473684210527, "grad_norm": 1.0535480976104736, "learning_rate": 0.0001, "loss": 0.0091, "step": 179580 }, { "epoch": 1181.5131578947369, "grad_norm": 0.9515687823295593, "learning_rate": 0.0001, "loss": 0.0088, "step": 179590 }, { "epoch": 1181.578947368421, "grad_norm": 0.5328004360198975, "learning_rate": 0.0001, "loss": 0.0081, "step": 179600 }, { "epoch": 1181.6447368421052, "grad_norm": 1.2475734949111938, "learning_rate": 0.0001, "loss": 0.0097, "step": 179610 }, { "epoch": 1181.7105263157894, "grad_norm": 0.8466393351554871, "learning_rate": 0.0001, "loss": 0.0099, "step": 179620 }, { "epoch": 1181.7763157894738, "grad_norm": 0.8406497240066528, "learning_rate": 0.0001, "loss": 0.0105, "step": 179630 }, { "epoch": 1181.842105263158, "grad_norm": 1.0086272954940796, "learning_rate": 0.0001, "loss": 0.0093, "step": 179640 }, { "epoch": 1181.907894736842, "grad_norm": 0.9933298826217651, "learning_rate": 0.0001, "loss": 0.0093, "step": 179650 }, { "epoch": 1181.9736842105262, "grad_norm": 1.049748420715332, "learning_rate": 0.0001, "loss": 0.0087, "step": 179660 }, { "epoch": 1182.0394736842106, "grad_norm": 1.0045021772384644, "learning_rate": 0.0001, "loss": 0.0101, "step": 179670 }, { "epoch": 1182.1052631578948, "grad_norm": 0.8957365155220032, "learning_rate": 0.0001, "loss": 0.0076, "step": 179680 }, { "epoch": 1182.171052631579, "grad_norm": 0.8856828212738037, "learning_rate": 0.0001, "loss": 0.0075, "step": 179690 }, { "epoch": 1182.2368421052631, "grad_norm": 1.2047641277313232, "learning_rate": 0.0001, "loss": 0.0094, "step": 179700 }, { "epoch": 1182.3026315789473, "grad_norm": 1.1860121488571167, "learning_rate": 0.0001, "loss": 0.008, "step": 179710 }, { "epoch": 1182.3684210526317, "grad_norm": 1.2543531656265259, "learning_rate": 0.0001, "loss": 0.0095, "step": 179720 }, { "epoch": 1182.4342105263158, "grad_norm": 1.3099639415740967, "learning_rate": 0.0001, "loss": 0.0079, "step": 179730 }, { "epoch": 1182.5, "grad_norm": 1.1823962926864624, "learning_rate": 0.0001, "loss": 0.009, "step": 179740 }, { "epoch": 1182.5657894736842, "grad_norm": 0.9950479865074158, "learning_rate": 0.0001, "loss": 0.0094, "step": 179750 }, { "epoch": 1182.6315789473683, "grad_norm": 1.14492928981781, "learning_rate": 0.0001, "loss": 0.0082, "step": 179760 }, { "epoch": 1182.6973684210527, "grad_norm": 0.985609233379364, "learning_rate": 0.0001, "loss": 0.0086, "step": 179770 }, { "epoch": 1182.7631578947369, "grad_norm": 1.0433059930801392, "learning_rate": 0.0001, "loss": 0.0078, "step": 179780 }, { "epoch": 1182.828947368421, "grad_norm": 0.9120721220970154, "learning_rate": 0.0001, "loss": 0.0107, "step": 179790 }, { "epoch": 1182.8947368421052, "grad_norm": 0.8850807547569275, "learning_rate": 0.0001, "loss": 0.0092, "step": 179800 }, { "epoch": 1182.9605263157894, "grad_norm": 0.6139315962791443, "learning_rate": 0.0001, "loss": 0.0106, "step": 179810 }, { "epoch": 1183.0263157894738, "grad_norm": 1.1187626123428345, "learning_rate": 0.0001, "loss": 0.0098, "step": 179820 }, { "epoch": 1183.092105263158, "grad_norm": 1.0548551082611084, "learning_rate": 0.0001, "loss": 0.0098, "step": 179830 }, { "epoch": 1183.157894736842, "grad_norm": 0.9442790746688843, "learning_rate": 0.0001, "loss": 0.0088, "step": 179840 }, { "epoch": 1183.2236842105262, "grad_norm": 0.7005117535591125, "learning_rate": 0.0001, "loss": 0.0101, "step": 179850 }, { "epoch": 1183.2894736842106, "grad_norm": 0.5735732913017273, "learning_rate": 0.0001, "loss": 0.0086, "step": 179860 }, { "epoch": 1183.3552631578948, "grad_norm": 1.027298092842102, "learning_rate": 0.0001, "loss": 0.0095, "step": 179870 }, { "epoch": 1183.421052631579, "grad_norm": 1.3360284566879272, "learning_rate": 0.0001, "loss": 0.0091, "step": 179880 }, { "epoch": 1183.4868421052631, "grad_norm": 0.8555198907852173, "learning_rate": 0.0001, "loss": 0.0091, "step": 179890 }, { "epoch": 1183.5526315789473, "grad_norm": 1.2100673913955688, "learning_rate": 0.0001, "loss": 0.0105, "step": 179900 }, { "epoch": 1183.6184210526317, "grad_norm": 0.8696701526641846, "learning_rate": 0.0001, "loss": 0.0091, "step": 179910 }, { "epoch": 1183.6842105263158, "grad_norm": 1.044405460357666, "learning_rate": 0.0001, "loss": 0.0086, "step": 179920 }, { "epoch": 1183.75, "grad_norm": 0.6577524542808533, "learning_rate": 0.0001, "loss": 0.0085, "step": 179930 }, { "epoch": 1183.8157894736842, "grad_norm": 0.8520231246948242, "learning_rate": 0.0001, "loss": 0.0085, "step": 179940 }, { "epoch": 1183.8815789473683, "grad_norm": 0.5790631175041199, "learning_rate": 0.0001, "loss": 0.0067, "step": 179950 }, { "epoch": 1183.9473684210527, "grad_norm": 0.6821280717849731, "learning_rate": 0.0001, "loss": 0.0085, "step": 179960 }, { "epoch": 1184.0131578947369, "grad_norm": 1.1946996450424194, "learning_rate": 0.0001, "loss": 0.009, "step": 179970 }, { "epoch": 1184.078947368421, "grad_norm": 0.8206828832626343, "learning_rate": 0.0001, "loss": 0.0096, "step": 179980 }, { "epoch": 1184.1447368421052, "grad_norm": 1.200190782546997, "learning_rate": 0.0001, "loss": 0.0089, "step": 179990 }, { "epoch": 1184.2105263157894, "grad_norm": 1.0176090002059937, "learning_rate": 0.0001, "loss": 0.0096, "step": 180000 }, { "epoch": 1184.2763157894738, "grad_norm": 1.791040301322937, "learning_rate": 0.0001, "loss": 0.0098, "step": 180010 }, { "epoch": 1184.342105263158, "grad_norm": 1.1457858085632324, "learning_rate": 0.0001, "loss": 0.0097, "step": 180020 }, { "epoch": 1184.407894736842, "grad_norm": 1.3513076305389404, "learning_rate": 0.0001, "loss": 0.0092, "step": 180030 }, { "epoch": 1184.4736842105262, "grad_norm": 1.1888035535812378, "learning_rate": 0.0001, "loss": 0.0075, "step": 180040 }, { "epoch": 1184.5394736842106, "grad_norm": 1.3418779373168945, "learning_rate": 0.0001, "loss": 0.009, "step": 180050 }, { "epoch": 1184.6052631578948, "grad_norm": 1.2430517673492432, "learning_rate": 0.0001, "loss": 0.0095, "step": 180060 }, { "epoch": 1184.671052631579, "grad_norm": 1.299909234046936, "learning_rate": 0.0001, "loss": 0.0089, "step": 180070 }, { "epoch": 1184.7368421052631, "grad_norm": 1.187268614768982, "learning_rate": 0.0001, "loss": 0.0085, "step": 180080 }, { "epoch": 1184.8026315789473, "grad_norm": 1.0313128232955933, "learning_rate": 0.0001, "loss": 0.0089, "step": 180090 }, { "epoch": 1184.8684210526317, "grad_norm": 0.9773435592651367, "learning_rate": 0.0001, "loss": 0.0095, "step": 180100 }, { "epoch": 1184.9342105263158, "grad_norm": 0.9462606310844421, "learning_rate": 0.0001, "loss": 0.0074, "step": 180110 }, { "epoch": 1185.0, "grad_norm": 1.008164882659912, "learning_rate": 0.0001, "loss": 0.0093, "step": 180120 }, { "epoch": 1185.0657894736842, "grad_norm": 1.0677521228790283, "learning_rate": 0.0001, "loss": 0.0116, "step": 180130 }, { "epoch": 1185.1315789473683, "grad_norm": 1.056257963180542, "learning_rate": 0.0001, "loss": 0.009, "step": 180140 }, { "epoch": 1185.1973684210527, "grad_norm": 1.3330624103546143, "learning_rate": 0.0001, "loss": 0.0072, "step": 180150 }, { "epoch": 1185.2631578947369, "grad_norm": 1.2008347511291504, "learning_rate": 0.0001, "loss": 0.0071, "step": 180160 }, { "epoch": 1185.328947368421, "grad_norm": 1.0260757207870483, "learning_rate": 0.0001, "loss": 0.008, "step": 180170 }, { "epoch": 1185.3947368421052, "grad_norm": 0.9635398983955383, "learning_rate": 0.0001, "loss": 0.0094, "step": 180180 }, { "epoch": 1185.4605263157894, "grad_norm": 0.9844223260879517, "learning_rate": 0.0001, "loss": 0.0104, "step": 180190 }, { "epoch": 1185.5263157894738, "grad_norm": 0.9323477745056152, "learning_rate": 0.0001, "loss": 0.0091, "step": 180200 }, { "epoch": 1185.592105263158, "grad_norm": 0.7835142612457275, "learning_rate": 0.0001, "loss": 0.0076, "step": 180210 }, { "epoch": 1185.657894736842, "grad_norm": 0.8820164799690247, "learning_rate": 0.0001, "loss": 0.0071, "step": 180220 }, { "epoch": 1185.7236842105262, "grad_norm": 1.1750354766845703, "learning_rate": 0.0001, "loss": 0.0086, "step": 180230 }, { "epoch": 1185.7894736842106, "grad_norm": 1.058611512184143, "learning_rate": 0.0001, "loss": 0.0096, "step": 180240 }, { "epoch": 1185.8552631578948, "grad_norm": 1.0352505445480347, "learning_rate": 0.0001, "loss": 0.0095, "step": 180250 }, { "epoch": 1185.921052631579, "grad_norm": 0.8906688690185547, "learning_rate": 0.0001, "loss": 0.0107, "step": 180260 }, { "epoch": 1185.9868421052631, "grad_norm": 1.0255763530731201, "learning_rate": 0.0001, "loss": 0.0078, "step": 180270 }, { "epoch": 1186.0526315789473, "grad_norm": 0.9030811190605164, "learning_rate": 0.0001, "loss": 0.0099, "step": 180280 }, { "epoch": 1186.1184210526317, "grad_norm": 0.7949104905128479, "learning_rate": 0.0001, "loss": 0.0083, "step": 180290 }, { "epoch": 1186.1842105263158, "grad_norm": 1.2452713251113892, "learning_rate": 0.0001, "loss": 0.0095, "step": 180300 }, { "epoch": 1186.25, "grad_norm": 0.8592615127563477, "learning_rate": 0.0001, "loss": 0.0073, "step": 180310 }, { "epoch": 1186.3157894736842, "grad_norm": 1.0579206943511963, "learning_rate": 0.0001, "loss": 0.0085, "step": 180320 }, { "epoch": 1186.3815789473683, "grad_norm": 1.0151481628417969, "learning_rate": 0.0001, "loss": 0.0093, "step": 180330 }, { "epoch": 1186.4473684210527, "grad_norm": 1.2412348985671997, "learning_rate": 0.0001, "loss": 0.0092, "step": 180340 }, { "epoch": 1186.5131578947369, "grad_norm": 0.9836140275001526, "learning_rate": 0.0001, "loss": 0.0106, "step": 180350 }, { "epoch": 1186.578947368421, "grad_norm": 0.8268551826477051, "learning_rate": 0.0001, "loss": 0.0084, "step": 180360 }, { "epoch": 1186.6447368421052, "grad_norm": 1.1718807220458984, "learning_rate": 0.0001, "loss": 0.0098, "step": 180370 }, { "epoch": 1186.7105263157894, "grad_norm": 0.9186016321182251, "learning_rate": 0.0001, "loss": 0.0098, "step": 180380 }, { "epoch": 1186.7763157894738, "grad_norm": 1.0187981128692627, "learning_rate": 0.0001, "loss": 0.0086, "step": 180390 }, { "epoch": 1186.842105263158, "grad_norm": 0.7766441106796265, "learning_rate": 0.0001, "loss": 0.0079, "step": 180400 }, { "epoch": 1186.907894736842, "grad_norm": 1.1324797868728638, "learning_rate": 0.0001, "loss": 0.0083, "step": 180410 }, { "epoch": 1186.9736842105262, "grad_norm": 1.3124502897262573, "learning_rate": 0.0001, "loss": 0.0101, "step": 180420 }, { "epoch": 1187.0394736842106, "grad_norm": 1.0421258211135864, "learning_rate": 0.0001, "loss": 0.0094, "step": 180430 }, { "epoch": 1187.1052631578948, "grad_norm": 0.9878388047218323, "learning_rate": 0.0001, "loss": 0.0085, "step": 180440 }, { "epoch": 1187.171052631579, "grad_norm": 1.404099941253662, "learning_rate": 0.0001, "loss": 0.0083, "step": 180450 }, { "epoch": 1187.2368421052631, "grad_norm": 0.8869650959968567, "learning_rate": 0.0001, "loss": 0.0091, "step": 180460 }, { "epoch": 1187.3026315789473, "grad_norm": 0.7417500615119934, "learning_rate": 0.0001, "loss": 0.0097, "step": 180470 }, { "epoch": 1187.3684210526317, "grad_norm": 0.8958377838134766, "learning_rate": 0.0001, "loss": 0.0094, "step": 180480 }, { "epoch": 1187.4342105263158, "grad_norm": 1.3181413412094116, "learning_rate": 0.0001, "loss": 0.0084, "step": 180490 }, { "epoch": 1187.5, "grad_norm": 1.1831432580947876, "learning_rate": 0.0001, "loss": 0.0091, "step": 180500 }, { "epoch": 1187.5657894736842, "grad_norm": 0.9017171263694763, "learning_rate": 0.0001, "loss": 0.0078, "step": 180510 }, { "epoch": 1187.6315789473683, "grad_norm": 1.0367988348007202, "learning_rate": 0.0001, "loss": 0.0113, "step": 180520 }, { "epoch": 1187.6973684210527, "grad_norm": 0.9045135974884033, "learning_rate": 0.0001, "loss": 0.0084, "step": 180530 }, { "epoch": 1187.7631578947369, "grad_norm": 0.8042669296264648, "learning_rate": 0.0001, "loss": 0.0083, "step": 180540 }, { "epoch": 1187.828947368421, "grad_norm": 1.19108247756958, "learning_rate": 0.0001, "loss": 0.0099, "step": 180550 }, { "epoch": 1187.8947368421052, "grad_norm": 1.238985300064087, "learning_rate": 0.0001, "loss": 0.009, "step": 180560 }, { "epoch": 1187.9605263157894, "grad_norm": 1.5841645002365112, "learning_rate": 0.0001, "loss": 0.0081, "step": 180570 }, { "epoch": 1188.0263157894738, "grad_norm": 1.1452237367630005, "learning_rate": 0.0001, "loss": 0.0099, "step": 180580 }, { "epoch": 1188.092105263158, "grad_norm": 1.4237656593322754, "learning_rate": 0.0001, "loss": 0.0102, "step": 180590 }, { "epoch": 1188.157894736842, "grad_norm": 1.343073844909668, "learning_rate": 0.0001, "loss": 0.0097, "step": 180600 }, { "epoch": 1188.2236842105262, "grad_norm": 0.9501973986625671, "learning_rate": 0.0001, "loss": 0.0103, "step": 180610 }, { "epoch": 1188.2894736842106, "grad_norm": 0.8718429803848267, "learning_rate": 0.0001, "loss": 0.0104, "step": 180620 }, { "epoch": 1188.3552631578948, "grad_norm": 1.1177846193313599, "learning_rate": 0.0001, "loss": 0.0092, "step": 180630 }, { "epoch": 1188.421052631579, "grad_norm": 0.7141475677490234, "learning_rate": 0.0001, "loss": 0.0078, "step": 180640 }, { "epoch": 1188.4868421052631, "grad_norm": 0.9596945643424988, "learning_rate": 0.0001, "loss": 0.0085, "step": 180650 }, { "epoch": 1188.5526315789473, "grad_norm": 1.1708953380584717, "learning_rate": 0.0001, "loss": 0.0081, "step": 180660 }, { "epoch": 1188.6184210526317, "grad_norm": 1.1487609148025513, "learning_rate": 0.0001, "loss": 0.0087, "step": 180670 }, { "epoch": 1188.6842105263158, "grad_norm": 0.9982112050056458, "learning_rate": 0.0001, "loss": 0.0084, "step": 180680 }, { "epoch": 1188.75, "grad_norm": 1.1738011837005615, "learning_rate": 0.0001, "loss": 0.0099, "step": 180690 }, { "epoch": 1188.8157894736842, "grad_norm": 1.2586804628372192, "learning_rate": 0.0001, "loss": 0.0085, "step": 180700 }, { "epoch": 1188.8815789473683, "grad_norm": 1.6938642263412476, "learning_rate": 0.0001, "loss": 0.0086, "step": 180710 }, { "epoch": 1188.9473684210527, "grad_norm": 1.3859210014343262, "learning_rate": 0.0001, "loss": 0.0094, "step": 180720 }, { "epoch": 1189.0131578947369, "grad_norm": 1.3656494617462158, "learning_rate": 0.0001, "loss": 0.0094, "step": 180730 }, { "epoch": 1189.078947368421, "grad_norm": 1.3069894313812256, "learning_rate": 0.0001, "loss": 0.007, "step": 180740 }, { "epoch": 1189.1447368421052, "grad_norm": 1.5368648767471313, "learning_rate": 0.0001, "loss": 0.0086, "step": 180750 }, { "epoch": 1189.2105263157894, "grad_norm": 0.9646233916282654, "learning_rate": 0.0001, "loss": 0.0081, "step": 180760 }, { "epoch": 1189.2763157894738, "grad_norm": 1.1789246797561646, "learning_rate": 0.0001, "loss": 0.0083, "step": 180770 }, { "epoch": 1189.342105263158, "grad_norm": 0.9047682881355286, "learning_rate": 0.0001, "loss": 0.0083, "step": 180780 }, { "epoch": 1189.407894736842, "grad_norm": 1.1768347024917603, "learning_rate": 0.0001, "loss": 0.0087, "step": 180790 }, { "epoch": 1189.4736842105262, "grad_norm": 0.8927736878395081, "learning_rate": 0.0001, "loss": 0.0097, "step": 180800 }, { "epoch": 1189.5394736842106, "grad_norm": 1.3227018117904663, "learning_rate": 0.0001, "loss": 0.0093, "step": 180810 }, { "epoch": 1189.6052631578948, "grad_norm": 1.1930198669433594, "learning_rate": 0.0001, "loss": 0.0091, "step": 180820 }, { "epoch": 1189.671052631579, "grad_norm": 0.9631125926971436, "learning_rate": 0.0001, "loss": 0.0092, "step": 180830 }, { "epoch": 1189.7368421052631, "grad_norm": 0.9095155000686646, "learning_rate": 0.0001, "loss": 0.0104, "step": 180840 }, { "epoch": 1189.8026315789473, "grad_norm": 0.9870935082435608, "learning_rate": 0.0001, "loss": 0.01, "step": 180850 }, { "epoch": 1189.8684210526317, "grad_norm": 0.9359210133552551, "learning_rate": 0.0001, "loss": 0.0095, "step": 180860 }, { "epoch": 1189.9342105263158, "grad_norm": 1.2630891799926758, "learning_rate": 0.0001, "loss": 0.0092, "step": 180870 }, { "epoch": 1190.0, "grad_norm": 0.9118783473968506, "learning_rate": 0.0001, "loss": 0.0073, "step": 180880 }, { "epoch": 1190.0657894736842, "grad_norm": 0.9447109699249268, "learning_rate": 0.0001, "loss": 0.0098, "step": 180890 }, { "epoch": 1190.1315789473683, "grad_norm": 0.8619827032089233, "learning_rate": 0.0001, "loss": 0.0091, "step": 180900 }, { "epoch": 1190.1973684210527, "grad_norm": 1.0921404361724854, "learning_rate": 0.0001, "loss": 0.011, "step": 180910 }, { "epoch": 1190.2631578947369, "grad_norm": 0.7819488048553467, "learning_rate": 0.0001, "loss": 0.0095, "step": 180920 }, { "epoch": 1190.328947368421, "grad_norm": 0.9675580263137817, "learning_rate": 0.0001, "loss": 0.0075, "step": 180930 }, { "epoch": 1190.3947368421052, "grad_norm": 0.9420514106750488, "learning_rate": 0.0001, "loss": 0.0094, "step": 180940 }, { "epoch": 1190.4605263157894, "grad_norm": 1.3097189664840698, "learning_rate": 0.0001, "loss": 0.0083, "step": 180950 }, { "epoch": 1190.5263157894738, "grad_norm": 1.0128648281097412, "learning_rate": 0.0001, "loss": 0.0095, "step": 180960 }, { "epoch": 1190.592105263158, "grad_norm": 0.833601713180542, "learning_rate": 0.0001, "loss": 0.0097, "step": 180970 }, { "epoch": 1190.657894736842, "grad_norm": 0.7364785671234131, "learning_rate": 0.0001, "loss": 0.0094, "step": 180980 }, { "epoch": 1190.7236842105262, "grad_norm": 1.0391943454742432, "learning_rate": 0.0001, "loss": 0.011, "step": 180990 }, { "epoch": 1190.7894736842106, "grad_norm": 0.9360489845275879, "learning_rate": 0.0001, "loss": 0.0094, "step": 181000 }, { "epoch": 1190.8552631578948, "grad_norm": 0.8230446577072144, "learning_rate": 0.0001, "loss": 0.0109, "step": 181010 }, { "epoch": 1190.921052631579, "grad_norm": 0.6308891773223877, "learning_rate": 0.0001, "loss": 0.0094, "step": 181020 }, { "epoch": 1190.9868421052631, "grad_norm": 0.9240980744361877, "learning_rate": 0.0001, "loss": 0.0108, "step": 181030 }, { "epoch": 1191.0526315789473, "grad_norm": 0.9708207845687866, "learning_rate": 0.0001, "loss": 0.0101, "step": 181040 }, { "epoch": 1191.1184210526317, "grad_norm": 1.2800960540771484, "learning_rate": 0.0001, "loss": 0.0103, "step": 181050 }, { "epoch": 1191.1842105263158, "grad_norm": 0.9510250687599182, "learning_rate": 0.0001, "loss": 0.009, "step": 181060 }, { "epoch": 1191.25, "grad_norm": 0.8674155473709106, "learning_rate": 0.0001, "loss": 0.0097, "step": 181070 }, { "epoch": 1191.3157894736842, "grad_norm": 1.276930332183838, "learning_rate": 0.0001, "loss": 0.0097, "step": 181080 }, { "epoch": 1191.3815789473683, "grad_norm": 1.3920434713363647, "learning_rate": 0.0001, "loss": 0.0115, "step": 181090 }, { "epoch": 1191.4473684210527, "grad_norm": 0.7923642992973328, "learning_rate": 0.0001, "loss": 0.0094, "step": 181100 }, { "epoch": 1191.5131578947369, "grad_norm": 1.4579869508743286, "learning_rate": 0.0001, "loss": 0.0109, "step": 181110 }, { "epoch": 1191.578947368421, "grad_norm": 0.9362051486968994, "learning_rate": 0.0001, "loss": 0.0113, "step": 181120 }, { "epoch": 1191.6447368421052, "grad_norm": 1.371772050857544, "learning_rate": 0.0001, "loss": 0.0097, "step": 181130 }, { "epoch": 1191.7105263157894, "grad_norm": 1.1880755424499512, "learning_rate": 0.0001, "loss": 0.01, "step": 181140 }, { "epoch": 1191.7763157894738, "grad_norm": 0.8417680859565735, "learning_rate": 0.0001, "loss": 0.0086, "step": 181150 }, { "epoch": 1191.842105263158, "grad_norm": 0.9738719463348389, "learning_rate": 0.0001, "loss": 0.0083, "step": 181160 }, { "epoch": 1191.907894736842, "grad_norm": 0.8332647085189819, "learning_rate": 0.0001, "loss": 0.0112, "step": 181170 }, { "epoch": 1191.9736842105262, "grad_norm": 0.8110590577125549, "learning_rate": 0.0001, "loss": 0.0097, "step": 181180 }, { "epoch": 1192.0394736842106, "grad_norm": 0.7889165878295898, "learning_rate": 0.0001, "loss": 0.0091, "step": 181190 }, { "epoch": 1192.1052631578948, "grad_norm": 0.9631662368774414, "learning_rate": 0.0001, "loss": 0.0077, "step": 181200 }, { "epoch": 1192.171052631579, "grad_norm": 0.9708088040351868, "learning_rate": 0.0001, "loss": 0.0094, "step": 181210 }, { "epoch": 1192.2368421052631, "grad_norm": 0.9359713196754456, "learning_rate": 0.0001, "loss": 0.011, "step": 181220 }, { "epoch": 1192.3026315789473, "grad_norm": 0.9870832562446594, "learning_rate": 0.0001, "loss": 0.0093, "step": 181230 }, { "epoch": 1192.3684210526317, "grad_norm": 1.1724073886871338, "learning_rate": 0.0001, "loss": 0.0114, "step": 181240 }, { "epoch": 1192.4342105263158, "grad_norm": 0.7455830574035645, "learning_rate": 0.0001, "loss": 0.0098, "step": 181250 }, { "epoch": 1192.5, "grad_norm": 0.9996033906936646, "learning_rate": 0.0001, "loss": 0.0125, "step": 181260 }, { "epoch": 1192.5657894736842, "grad_norm": 1.123407006263733, "learning_rate": 0.0001, "loss": 0.0094, "step": 181270 }, { "epoch": 1192.6315789473683, "grad_norm": 0.9888333678245544, "learning_rate": 0.0001, "loss": 0.012, "step": 181280 }, { "epoch": 1192.6973684210527, "grad_norm": 1.7157537937164307, "learning_rate": 0.0001, "loss": 0.0103, "step": 181290 }, { "epoch": 1192.7631578947369, "grad_norm": 1.2598707675933838, "learning_rate": 0.0001, "loss": 0.01, "step": 181300 }, { "epoch": 1192.828947368421, "grad_norm": 1.1619699001312256, "learning_rate": 0.0001, "loss": 0.0094, "step": 181310 }, { "epoch": 1192.8947368421052, "grad_norm": 0.9161400198936462, "learning_rate": 0.0001, "loss": 0.0118, "step": 181320 }, { "epoch": 1192.9605263157894, "grad_norm": 0.7341195344924927, "learning_rate": 0.0001, "loss": 0.0082, "step": 181330 }, { "epoch": 1193.0263157894738, "grad_norm": 1.1331208944320679, "learning_rate": 0.0001, "loss": 0.0089, "step": 181340 }, { "epoch": 1193.092105263158, "grad_norm": 1.149287223815918, "learning_rate": 0.0001, "loss": 0.0108, "step": 181350 }, { "epoch": 1193.157894736842, "grad_norm": 0.9564964175224304, "learning_rate": 0.0001, "loss": 0.0106, "step": 181360 }, { "epoch": 1193.2236842105262, "grad_norm": 1.252368450164795, "learning_rate": 0.0001, "loss": 0.0121, "step": 181370 }, { "epoch": 1193.2894736842106, "grad_norm": 1.285278558731079, "learning_rate": 0.0001, "loss": 0.0092, "step": 181380 }, { "epoch": 1193.3552631578948, "grad_norm": 1.0179182291030884, "learning_rate": 0.0001, "loss": 0.0128, "step": 181390 }, { "epoch": 1193.421052631579, "grad_norm": 0.8512329459190369, "learning_rate": 0.0001, "loss": 0.0128, "step": 181400 }, { "epoch": 1193.4868421052631, "grad_norm": 1.3857789039611816, "learning_rate": 0.0001, "loss": 0.0089, "step": 181410 }, { "epoch": 1193.5526315789473, "grad_norm": 1.1037719249725342, "learning_rate": 0.0001, "loss": 0.0108, "step": 181420 }, { "epoch": 1193.6184210526317, "grad_norm": 1.2100577354431152, "learning_rate": 0.0001, "loss": 0.0106, "step": 181430 }, { "epoch": 1193.6842105263158, "grad_norm": 1.3522088527679443, "learning_rate": 0.0001, "loss": 0.0108, "step": 181440 }, { "epoch": 1193.75, "grad_norm": 1.1661986112594604, "learning_rate": 0.0001, "loss": 0.0119, "step": 181450 }, { "epoch": 1193.8157894736842, "grad_norm": 1.2195343971252441, "learning_rate": 0.0001, "loss": 0.0106, "step": 181460 }, { "epoch": 1193.8815789473683, "grad_norm": 1.2342994213104248, "learning_rate": 0.0001, "loss": 0.0093, "step": 181470 }, { "epoch": 1193.9473684210527, "grad_norm": 1.23150634765625, "learning_rate": 0.0001, "loss": 0.01, "step": 181480 }, { "epoch": 1194.0131578947369, "grad_norm": 1.1036239862442017, "learning_rate": 0.0001, "loss": 0.0095, "step": 181490 }, { "epoch": 1194.078947368421, "grad_norm": 0.7650617361068726, "learning_rate": 0.0001, "loss": 0.0086, "step": 181500 }, { "epoch": 1194.1447368421052, "grad_norm": 0.7626112699508667, "learning_rate": 0.0001, "loss": 0.0099, "step": 181510 }, { "epoch": 1194.2105263157894, "grad_norm": 1.1375484466552734, "learning_rate": 0.0001, "loss": 0.0096, "step": 181520 }, { "epoch": 1194.2763157894738, "grad_norm": 0.9652150869369507, "learning_rate": 0.0001, "loss": 0.0104, "step": 181530 }, { "epoch": 1194.342105263158, "grad_norm": 0.9495952725410461, "learning_rate": 0.0001, "loss": 0.0087, "step": 181540 }, { "epoch": 1194.407894736842, "grad_norm": 1.1295645236968994, "learning_rate": 0.0001, "loss": 0.0087, "step": 181550 }, { "epoch": 1194.4736842105262, "grad_norm": 0.7198823094367981, "learning_rate": 0.0001, "loss": 0.0093, "step": 181560 }, { "epoch": 1194.5394736842106, "grad_norm": 1.061182975769043, "learning_rate": 0.0001, "loss": 0.0099, "step": 181570 }, { "epoch": 1194.6052631578948, "grad_norm": 0.6715997457504272, "learning_rate": 0.0001, "loss": 0.0094, "step": 181580 }, { "epoch": 1194.671052631579, "grad_norm": 0.8791295289993286, "learning_rate": 0.0001, "loss": 0.0112, "step": 181590 }, { "epoch": 1194.7368421052631, "grad_norm": 1.9613913297653198, "learning_rate": 0.0001, "loss": 0.009, "step": 181600 }, { "epoch": 1194.8026315789473, "grad_norm": 2.2601711750030518, "learning_rate": 0.0001, "loss": 0.0104, "step": 181610 }, { "epoch": 1194.8684210526317, "grad_norm": 2.3166682720184326, "learning_rate": 0.0001, "loss": 0.01, "step": 181620 }, { "epoch": 1194.9342105263158, "grad_norm": 1.4985716342926025, "learning_rate": 0.0001, "loss": 0.012, "step": 181630 }, { "epoch": 1195.0, "grad_norm": 1.3064441680908203, "learning_rate": 0.0001, "loss": 0.0114, "step": 181640 }, { "epoch": 1195.0657894736842, "grad_norm": 0.9696851372718811, "learning_rate": 0.0001, "loss": 0.0084, "step": 181650 }, { "epoch": 1195.1315789473683, "grad_norm": 1.3249530792236328, "learning_rate": 0.0001, "loss": 0.0112, "step": 181660 }, { "epoch": 1195.1973684210527, "grad_norm": 1.042757511138916, "learning_rate": 0.0001, "loss": 0.0099, "step": 181670 }, { "epoch": 1195.2631578947369, "grad_norm": 0.9609398245811462, "learning_rate": 0.0001, "loss": 0.0082, "step": 181680 }, { "epoch": 1195.328947368421, "grad_norm": 1.0072219371795654, "learning_rate": 0.0001, "loss": 0.0102, "step": 181690 }, { "epoch": 1195.3947368421052, "grad_norm": 1.1560006141662598, "learning_rate": 0.0001, "loss": 0.0078, "step": 181700 }, { "epoch": 1195.4605263157894, "grad_norm": 0.9536867737770081, "learning_rate": 0.0001, "loss": 0.0123, "step": 181710 }, { "epoch": 1195.5263157894738, "grad_norm": 0.8431065678596497, "learning_rate": 0.0001, "loss": 0.011, "step": 181720 }, { "epoch": 1195.592105263158, "grad_norm": 1.448400855064392, "learning_rate": 0.0001, "loss": 0.0101, "step": 181730 }, { "epoch": 1195.657894736842, "grad_norm": 1.354210615158081, "learning_rate": 0.0001, "loss": 0.0087, "step": 181740 }, { "epoch": 1195.7236842105262, "grad_norm": 1.0818076133728027, "learning_rate": 0.0001, "loss": 0.0074, "step": 181750 }, { "epoch": 1195.7894736842106, "grad_norm": 1.0603787899017334, "learning_rate": 0.0001, "loss": 0.0082, "step": 181760 }, { "epoch": 1195.8552631578948, "grad_norm": 1.0252753496170044, "learning_rate": 0.0001, "loss": 0.01, "step": 181770 }, { "epoch": 1195.921052631579, "grad_norm": 0.9185944199562073, "learning_rate": 0.0001, "loss": 0.0097, "step": 181780 }, { "epoch": 1195.9868421052631, "grad_norm": 1.012385368347168, "learning_rate": 0.0001, "loss": 0.009, "step": 181790 }, { "epoch": 1196.0526315789473, "grad_norm": 1.1767807006835938, "learning_rate": 0.0001, "loss": 0.0075, "step": 181800 }, { "epoch": 1196.1184210526317, "grad_norm": 0.9487605094909668, "learning_rate": 0.0001, "loss": 0.0092, "step": 181810 }, { "epoch": 1196.1842105263158, "grad_norm": 1.0731463432312012, "learning_rate": 0.0001, "loss": 0.0118, "step": 181820 }, { "epoch": 1196.25, "grad_norm": 1.1926857233047485, "learning_rate": 0.0001, "loss": 0.0079, "step": 181830 }, { "epoch": 1196.3157894736842, "grad_norm": 1.1458866596221924, "learning_rate": 0.0001, "loss": 0.0099, "step": 181840 }, { "epoch": 1196.3815789473683, "grad_norm": 0.7594784498214722, "learning_rate": 0.0001, "loss": 0.0089, "step": 181850 }, { "epoch": 1196.4473684210527, "grad_norm": 0.7159861326217651, "learning_rate": 0.0001, "loss": 0.0103, "step": 181860 }, { "epoch": 1196.5131578947369, "grad_norm": 0.9384440183639526, "learning_rate": 0.0001, "loss": 0.0087, "step": 181870 }, { "epoch": 1196.578947368421, "grad_norm": 0.7661808729171753, "learning_rate": 0.0001, "loss": 0.0103, "step": 181880 }, { "epoch": 1196.6447368421052, "grad_norm": 0.8647274971008301, "learning_rate": 0.0001, "loss": 0.0093, "step": 181890 }, { "epoch": 1196.7105263157894, "grad_norm": 0.8873572945594788, "learning_rate": 0.0001, "loss": 0.0087, "step": 181900 }, { "epoch": 1196.7763157894738, "grad_norm": 1.2721664905548096, "learning_rate": 0.0001, "loss": 0.0105, "step": 181910 }, { "epoch": 1196.842105263158, "grad_norm": 0.8040602207183838, "learning_rate": 0.0001, "loss": 0.0087, "step": 181920 }, { "epoch": 1196.907894736842, "grad_norm": 1.413497805595398, "learning_rate": 0.0001, "loss": 0.0082, "step": 181930 }, { "epoch": 1196.9736842105262, "grad_norm": 1.1012861728668213, "learning_rate": 0.0001, "loss": 0.0085, "step": 181940 }, { "epoch": 1197.0394736842106, "grad_norm": 1.0302972793579102, "learning_rate": 0.0001, "loss": 0.0084, "step": 181950 }, { "epoch": 1197.1052631578948, "grad_norm": 0.9582118391990662, "learning_rate": 0.0001, "loss": 0.0076, "step": 181960 }, { "epoch": 1197.171052631579, "grad_norm": 1.0968737602233887, "learning_rate": 0.0001, "loss": 0.0094, "step": 181970 }, { "epoch": 1197.2368421052631, "grad_norm": 1.0602166652679443, "learning_rate": 0.0001, "loss": 0.0076, "step": 181980 }, { "epoch": 1197.3026315789473, "grad_norm": 0.890673816204071, "learning_rate": 0.0001, "loss": 0.0115, "step": 181990 }, { "epoch": 1197.3684210526317, "grad_norm": 0.9959932565689087, "learning_rate": 0.0001, "loss": 0.0086, "step": 182000 }, { "epoch": 1197.4342105263158, "grad_norm": 1.0189555883407593, "learning_rate": 0.0001, "loss": 0.0079, "step": 182010 }, { "epoch": 1197.5, "grad_norm": 1.3010244369506836, "learning_rate": 0.0001, "loss": 0.0092, "step": 182020 }, { "epoch": 1197.5657894736842, "grad_norm": 0.6490765810012817, "learning_rate": 0.0001, "loss": 0.0114, "step": 182030 }, { "epoch": 1197.6315789473683, "grad_norm": 0.9288578629493713, "learning_rate": 0.0001, "loss": 0.01, "step": 182040 }, { "epoch": 1197.6973684210527, "grad_norm": 0.9825459122657776, "learning_rate": 0.0001, "loss": 0.0093, "step": 182050 }, { "epoch": 1197.7631578947369, "grad_norm": 1.1905490159988403, "learning_rate": 0.0001, "loss": 0.0093, "step": 182060 }, { "epoch": 1197.828947368421, "grad_norm": 0.7103608846664429, "learning_rate": 0.0001, "loss": 0.011, "step": 182070 }, { "epoch": 1197.8947368421052, "grad_norm": 0.9443063735961914, "learning_rate": 0.0001, "loss": 0.01, "step": 182080 }, { "epoch": 1197.9605263157894, "grad_norm": 1.0908981561660767, "learning_rate": 0.0001, "loss": 0.0097, "step": 182090 }, { "epoch": 1198.0263157894738, "grad_norm": 1.0232374668121338, "learning_rate": 0.0001, "loss": 0.0082, "step": 182100 }, { "epoch": 1198.092105263158, "grad_norm": 0.8261113166809082, "learning_rate": 0.0001, "loss": 0.0085, "step": 182110 }, { "epoch": 1198.157894736842, "grad_norm": 0.9809024930000305, "learning_rate": 0.0001, "loss": 0.0108, "step": 182120 }, { "epoch": 1198.2236842105262, "grad_norm": 1.3282575607299805, "learning_rate": 0.0001, "loss": 0.0104, "step": 182130 }, { "epoch": 1198.2894736842106, "grad_norm": 0.756084144115448, "learning_rate": 0.0001, "loss": 0.0096, "step": 182140 }, { "epoch": 1198.3552631578948, "grad_norm": 1.1468161344528198, "learning_rate": 0.0001, "loss": 0.0109, "step": 182150 }, { "epoch": 1198.421052631579, "grad_norm": 1.3206136226654053, "learning_rate": 0.0001, "loss": 0.0103, "step": 182160 }, { "epoch": 1198.4868421052631, "grad_norm": 1.339464545249939, "learning_rate": 0.0001, "loss": 0.0101, "step": 182170 }, { "epoch": 1198.5526315789473, "grad_norm": 1.1766377687454224, "learning_rate": 0.0001, "loss": 0.0091, "step": 182180 }, { "epoch": 1198.6184210526317, "grad_norm": 1.342929482460022, "learning_rate": 0.0001, "loss": 0.0072, "step": 182190 }, { "epoch": 1198.6842105263158, "grad_norm": 1.5300862789154053, "learning_rate": 0.0001, "loss": 0.0118, "step": 182200 }, { "epoch": 1198.75, "grad_norm": 1.2704681158065796, "learning_rate": 0.0001, "loss": 0.0076, "step": 182210 }, { "epoch": 1198.8157894736842, "grad_norm": 0.9477614164352417, "learning_rate": 0.0001, "loss": 0.0079, "step": 182220 }, { "epoch": 1198.8815789473683, "grad_norm": 1.055249810218811, "learning_rate": 0.0001, "loss": 0.008, "step": 182230 }, { "epoch": 1198.9473684210527, "grad_norm": 1.057106375694275, "learning_rate": 0.0001, "loss": 0.0082, "step": 182240 }, { "epoch": 1199.0131578947369, "grad_norm": 1.096887469291687, "learning_rate": 0.0001, "loss": 0.0076, "step": 182250 }, { "epoch": 1199.078947368421, "grad_norm": 1.1574534177780151, "learning_rate": 0.0001, "loss": 0.01, "step": 182260 }, { "epoch": 1199.1447368421052, "grad_norm": 1.2946901321411133, "learning_rate": 0.0001, "loss": 0.0087, "step": 182270 }, { "epoch": 1199.2105263157894, "grad_norm": 1.0303276777267456, "learning_rate": 0.0001, "loss": 0.0084, "step": 182280 }, { "epoch": 1199.2763157894738, "grad_norm": 0.9668704867362976, "learning_rate": 0.0001, "loss": 0.0086, "step": 182290 }, { "epoch": 1199.342105263158, "grad_norm": 1.6884338855743408, "learning_rate": 0.0001, "loss": 0.0103, "step": 182300 }, { "epoch": 1199.407894736842, "grad_norm": 1.1282835006713867, "learning_rate": 0.0001, "loss": 0.0089, "step": 182310 }, { "epoch": 1199.4736842105262, "grad_norm": 1.2376283407211304, "learning_rate": 0.0001, "loss": 0.008, "step": 182320 }, { "epoch": 1199.5394736842106, "grad_norm": 1.081881046295166, "learning_rate": 0.0001, "loss": 0.0096, "step": 182330 }, { "epoch": 1199.6052631578948, "grad_norm": 1.080176830291748, "learning_rate": 0.0001, "loss": 0.0088, "step": 182340 }, { "epoch": 1199.671052631579, "grad_norm": 1.160550832748413, "learning_rate": 0.0001, "loss": 0.0081, "step": 182350 }, { "epoch": 1199.7368421052631, "grad_norm": 1.1847903728485107, "learning_rate": 0.0001, "loss": 0.0102, "step": 182360 }, { "epoch": 1199.8026315789473, "grad_norm": 1.0338932275772095, "learning_rate": 0.0001, "loss": 0.0109, "step": 182370 }, { "epoch": 1199.8684210526317, "grad_norm": 0.7202876806259155, "learning_rate": 0.0001, "loss": 0.0077, "step": 182380 }, { "epoch": 1199.9342105263158, "grad_norm": 0.9999865293502808, "learning_rate": 0.0001, "loss": 0.0097, "step": 182390 }, { "epoch": 1200.0, "grad_norm": 0.7838411331176758, "learning_rate": 0.0001, "loss": 0.0088, "step": 182400 }, { "epoch": 1200.0657894736842, "grad_norm": 0.8725451827049255, "learning_rate": 0.0001, "loss": 0.0078, "step": 182410 }, { "epoch": 1200.1315789473683, "grad_norm": 0.8963289856910706, "learning_rate": 0.0001, "loss": 0.0083, "step": 182420 }, { "epoch": 1200.1973684210527, "grad_norm": 0.8601619601249695, "learning_rate": 0.0001, "loss": 0.0089, "step": 182430 }, { "epoch": 1200.2631578947369, "grad_norm": 0.9362215399742126, "learning_rate": 0.0001, "loss": 0.0111, "step": 182440 }, { "epoch": 1200.328947368421, "grad_norm": 0.7226735949516296, "learning_rate": 0.0001, "loss": 0.0084, "step": 182450 }, { "epoch": 1200.3947368421052, "grad_norm": 1.1684337854385376, "learning_rate": 0.0001, "loss": 0.0101, "step": 182460 }, { "epoch": 1200.4605263157894, "grad_norm": 1.1520575284957886, "learning_rate": 0.0001, "loss": 0.0085, "step": 182470 }, { "epoch": 1200.5263157894738, "grad_norm": 1.0707088708877563, "learning_rate": 0.0001, "loss": 0.0094, "step": 182480 }, { "epoch": 1200.592105263158, "grad_norm": 0.8464767336845398, "learning_rate": 0.0001, "loss": 0.0095, "step": 182490 }, { "epoch": 1200.657894736842, "grad_norm": 1.186996579170227, "learning_rate": 0.0001, "loss": 0.0086, "step": 182500 }, { "epoch": 1200.7236842105262, "grad_norm": 1.0348249673843384, "learning_rate": 0.0001, "loss": 0.0082, "step": 182510 }, { "epoch": 1200.7894736842106, "grad_norm": 1.1279314756393433, "learning_rate": 0.0001, "loss": 0.0085, "step": 182520 }, { "epoch": 1200.8552631578948, "grad_norm": 0.6997672319412231, "learning_rate": 0.0001, "loss": 0.0086, "step": 182530 }, { "epoch": 1200.921052631579, "grad_norm": 0.8470837473869324, "learning_rate": 0.0001, "loss": 0.008, "step": 182540 }, { "epoch": 1200.9868421052631, "grad_norm": 1.178248405456543, "learning_rate": 0.0001, "loss": 0.0101, "step": 182550 }, { "epoch": 1201.0526315789473, "grad_norm": 0.958922266960144, "learning_rate": 0.0001, "loss": 0.0098, "step": 182560 }, { "epoch": 1201.1184210526317, "grad_norm": 0.8927515745162964, "learning_rate": 0.0001, "loss": 0.0086, "step": 182570 }, { "epoch": 1201.1842105263158, "grad_norm": 1.0204429626464844, "learning_rate": 0.0001, "loss": 0.008, "step": 182580 }, { "epoch": 1201.25, "grad_norm": 0.7796499729156494, "learning_rate": 0.0001, "loss": 0.0092, "step": 182590 }, { "epoch": 1201.3157894736842, "grad_norm": 0.7244349718093872, "learning_rate": 0.0001, "loss": 0.0088, "step": 182600 }, { "epoch": 1201.3815789473683, "grad_norm": 0.921933114528656, "learning_rate": 0.0001, "loss": 0.0093, "step": 182610 }, { "epoch": 1201.4473684210527, "grad_norm": 1.1822832822799683, "learning_rate": 0.0001, "loss": 0.009, "step": 182620 }, { "epoch": 1201.5131578947369, "grad_norm": 0.8840344548225403, "learning_rate": 0.0001, "loss": 0.0081, "step": 182630 }, { "epoch": 1201.578947368421, "grad_norm": 1.2693527936935425, "learning_rate": 0.0001, "loss": 0.0089, "step": 182640 }, { "epoch": 1201.6447368421052, "grad_norm": 1.1028697490692139, "learning_rate": 0.0001, "loss": 0.009, "step": 182650 }, { "epoch": 1201.7105263157894, "grad_norm": 0.9558727145195007, "learning_rate": 0.0001, "loss": 0.0106, "step": 182660 }, { "epoch": 1201.7763157894738, "grad_norm": 1.0722416639328003, "learning_rate": 0.0001, "loss": 0.0098, "step": 182670 }, { "epoch": 1201.842105263158, "grad_norm": 1.0818010568618774, "learning_rate": 0.0001, "loss": 0.0074, "step": 182680 }, { "epoch": 1201.907894736842, "grad_norm": 0.9133005142211914, "learning_rate": 0.0001, "loss": 0.0085, "step": 182690 }, { "epoch": 1201.9736842105262, "grad_norm": 0.8337648510932922, "learning_rate": 0.0001, "loss": 0.0084, "step": 182700 }, { "epoch": 1202.0394736842106, "grad_norm": 1.0010408163070679, "learning_rate": 0.0001, "loss": 0.0081, "step": 182710 }, { "epoch": 1202.1052631578948, "grad_norm": 1.2349421977996826, "learning_rate": 0.0001, "loss": 0.009, "step": 182720 }, { "epoch": 1202.171052631579, "grad_norm": 0.9251318573951721, "learning_rate": 0.0001, "loss": 0.0094, "step": 182730 }, { "epoch": 1202.2368421052631, "grad_norm": 1.2574422359466553, "learning_rate": 0.0001, "loss": 0.011, "step": 182740 }, { "epoch": 1202.3026315789473, "grad_norm": 1.0148321390151978, "learning_rate": 0.0001, "loss": 0.0117, "step": 182750 }, { "epoch": 1202.3684210526317, "grad_norm": 0.4935104548931122, "learning_rate": 0.0001, "loss": 0.0088, "step": 182760 }, { "epoch": 1202.4342105263158, "grad_norm": 0.8940833806991577, "learning_rate": 0.0001, "loss": 0.0072, "step": 182770 }, { "epoch": 1202.5, "grad_norm": 0.8071444034576416, "learning_rate": 0.0001, "loss": 0.0084, "step": 182780 }, { "epoch": 1202.5657894736842, "grad_norm": 0.8158524036407471, "learning_rate": 0.0001, "loss": 0.0108, "step": 182790 }, { "epoch": 1202.6315789473683, "grad_norm": 1.1093024015426636, "learning_rate": 0.0001, "loss": 0.0091, "step": 182800 }, { "epoch": 1202.6973684210527, "grad_norm": 1.2366114854812622, "learning_rate": 0.0001, "loss": 0.0093, "step": 182810 }, { "epoch": 1202.7631578947369, "grad_norm": 1.1967785358428955, "learning_rate": 0.0001, "loss": 0.0094, "step": 182820 }, { "epoch": 1202.828947368421, "grad_norm": 1.1936818361282349, "learning_rate": 0.0001, "loss": 0.0089, "step": 182830 }, { "epoch": 1202.8947368421052, "grad_norm": 1.7148412466049194, "learning_rate": 0.0001, "loss": 0.0084, "step": 182840 }, { "epoch": 1202.9605263157894, "grad_norm": 1.4570777416229248, "learning_rate": 0.0001, "loss": 0.008, "step": 182850 }, { "epoch": 1203.0263157894738, "grad_norm": 1.157327651977539, "learning_rate": 0.0001, "loss": 0.0072, "step": 182860 }, { "epoch": 1203.092105263158, "grad_norm": 1.0844436883926392, "learning_rate": 0.0001, "loss": 0.0107, "step": 182870 }, { "epoch": 1203.157894736842, "grad_norm": 1.446609616279602, "learning_rate": 0.0001, "loss": 0.0092, "step": 182880 }, { "epoch": 1203.2236842105262, "grad_norm": 0.9722105264663696, "learning_rate": 0.0001, "loss": 0.0081, "step": 182890 }, { "epoch": 1203.2894736842106, "grad_norm": 0.864902675151825, "learning_rate": 0.0001, "loss": 0.0086, "step": 182900 }, { "epoch": 1203.3552631578948, "grad_norm": 0.9735894203186035, "learning_rate": 0.0001, "loss": 0.0097, "step": 182910 }, { "epoch": 1203.421052631579, "grad_norm": 0.9694196581840515, "learning_rate": 0.0001, "loss": 0.0078, "step": 182920 }, { "epoch": 1203.4868421052631, "grad_norm": 1.1758695840835571, "learning_rate": 0.0001, "loss": 0.0089, "step": 182930 }, { "epoch": 1203.5526315789473, "grad_norm": 0.9834887981414795, "learning_rate": 0.0001, "loss": 0.0074, "step": 182940 }, { "epoch": 1203.6184210526317, "grad_norm": 1.0233968496322632, "learning_rate": 0.0001, "loss": 0.0076, "step": 182950 }, { "epoch": 1203.6842105263158, "grad_norm": 0.826490044593811, "learning_rate": 0.0001, "loss": 0.0086, "step": 182960 }, { "epoch": 1203.75, "grad_norm": 0.7306733727455139, "learning_rate": 0.0001, "loss": 0.0082, "step": 182970 }, { "epoch": 1203.8157894736842, "grad_norm": 1.019582986831665, "learning_rate": 0.0001, "loss": 0.009, "step": 182980 }, { "epoch": 1203.8815789473683, "grad_norm": 1.1735548973083496, "learning_rate": 0.0001, "loss": 0.0089, "step": 182990 }, { "epoch": 1203.9473684210527, "grad_norm": 1.2922629117965698, "learning_rate": 0.0001, "loss": 0.0102, "step": 183000 }, { "epoch": 1204.0131578947369, "grad_norm": 1.170229434967041, "learning_rate": 0.0001, "loss": 0.0088, "step": 183010 }, { "epoch": 1204.078947368421, "grad_norm": 1.1170730590820312, "learning_rate": 0.0001, "loss": 0.0075, "step": 183020 }, { "epoch": 1204.1447368421052, "grad_norm": 1.4809566736221313, "learning_rate": 0.0001, "loss": 0.0078, "step": 183030 }, { "epoch": 1204.2105263157894, "grad_norm": 1.3102599382400513, "learning_rate": 0.0001, "loss": 0.0099, "step": 183040 }, { "epoch": 1204.2763157894738, "grad_norm": 1.1147892475128174, "learning_rate": 0.0001, "loss": 0.008, "step": 183050 }, { "epoch": 1204.342105263158, "grad_norm": 1.0742824077606201, "learning_rate": 0.0001, "loss": 0.008, "step": 183060 }, { "epoch": 1204.407894736842, "grad_norm": 1.0442990064620972, "learning_rate": 0.0001, "loss": 0.0082, "step": 183070 }, { "epoch": 1204.4736842105262, "grad_norm": 0.6480495929718018, "learning_rate": 0.0001, "loss": 0.0082, "step": 183080 }, { "epoch": 1204.5394736842106, "grad_norm": 1.12603759765625, "learning_rate": 0.0001, "loss": 0.0111, "step": 183090 }, { "epoch": 1204.6052631578948, "grad_norm": 0.9349260926246643, "learning_rate": 0.0001, "loss": 0.0099, "step": 183100 }, { "epoch": 1204.671052631579, "grad_norm": 1.4842894077301025, "learning_rate": 0.0001, "loss": 0.0084, "step": 183110 }, { "epoch": 1204.7368421052631, "grad_norm": 1.003530502319336, "learning_rate": 0.0001, "loss": 0.01, "step": 183120 }, { "epoch": 1204.8026315789473, "grad_norm": 1.3181918859481812, "learning_rate": 0.0001, "loss": 0.0094, "step": 183130 }, { "epoch": 1204.8684210526317, "grad_norm": 0.9079726934432983, "learning_rate": 0.0001, "loss": 0.0085, "step": 183140 }, { "epoch": 1204.9342105263158, "grad_norm": 1.0107637643814087, "learning_rate": 0.0001, "loss": 0.0092, "step": 183150 }, { "epoch": 1205.0, "grad_norm": 1.043506145477295, "learning_rate": 0.0001, "loss": 0.0099, "step": 183160 }, { "epoch": 1205.0657894736842, "grad_norm": 0.9948956966400146, "learning_rate": 0.0001, "loss": 0.0099, "step": 183170 }, { "epoch": 1205.1315789473683, "grad_norm": 0.8911522626876831, "learning_rate": 0.0001, "loss": 0.0076, "step": 183180 }, { "epoch": 1205.1973684210527, "grad_norm": 0.9188016057014465, "learning_rate": 0.0001, "loss": 0.0089, "step": 183190 }, { "epoch": 1205.2631578947369, "grad_norm": 0.9119938611984253, "learning_rate": 0.0001, "loss": 0.0086, "step": 183200 }, { "epoch": 1205.328947368421, "grad_norm": 1.064100742340088, "learning_rate": 0.0001, "loss": 0.0082, "step": 183210 }, { "epoch": 1205.3947368421052, "grad_norm": 1.1177867650985718, "learning_rate": 0.0001, "loss": 0.0087, "step": 183220 }, { "epoch": 1205.4605263157894, "grad_norm": 0.7622190117835999, "learning_rate": 0.0001, "loss": 0.0084, "step": 183230 }, { "epoch": 1205.5263157894738, "grad_norm": 0.9194398522377014, "learning_rate": 0.0001, "loss": 0.0077, "step": 183240 }, { "epoch": 1205.592105263158, "grad_norm": 1.1614681482315063, "learning_rate": 0.0001, "loss": 0.0108, "step": 183250 }, { "epoch": 1205.657894736842, "grad_norm": 1.296439290046692, "learning_rate": 0.0001, "loss": 0.0088, "step": 183260 }, { "epoch": 1205.7236842105262, "grad_norm": 1.368787169456482, "learning_rate": 0.0001, "loss": 0.0096, "step": 183270 }, { "epoch": 1205.7894736842106, "grad_norm": 1.1311159133911133, "learning_rate": 0.0001, "loss": 0.0102, "step": 183280 }, { "epoch": 1205.8552631578948, "grad_norm": 1.0988332033157349, "learning_rate": 0.0001, "loss": 0.0094, "step": 183290 }, { "epoch": 1205.921052631579, "grad_norm": 0.6983150839805603, "learning_rate": 0.0001, "loss": 0.0089, "step": 183300 }, { "epoch": 1205.9868421052631, "grad_norm": 0.8750330805778503, "learning_rate": 0.0001, "loss": 0.0074, "step": 183310 }, { "epoch": 1206.0526315789473, "grad_norm": 1.087065577507019, "learning_rate": 0.0001, "loss": 0.009, "step": 183320 }, { "epoch": 1206.1184210526317, "grad_norm": 0.9251524806022644, "learning_rate": 0.0001, "loss": 0.0072, "step": 183330 }, { "epoch": 1206.1842105263158, "grad_norm": 0.998687744140625, "learning_rate": 0.0001, "loss": 0.0098, "step": 183340 }, { "epoch": 1206.25, "grad_norm": 0.6075114011764526, "learning_rate": 0.0001, "loss": 0.0084, "step": 183350 }, { "epoch": 1206.3157894736842, "grad_norm": 0.7300073504447937, "learning_rate": 0.0001, "loss": 0.009, "step": 183360 }, { "epoch": 1206.3815789473683, "grad_norm": 0.7422103881835938, "learning_rate": 0.0001, "loss": 0.0077, "step": 183370 }, { "epoch": 1206.4473684210527, "grad_norm": 0.9821145534515381, "learning_rate": 0.0001, "loss": 0.0106, "step": 183380 }, { "epoch": 1206.5131578947369, "grad_norm": 0.9073783159255981, "learning_rate": 0.0001, "loss": 0.0095, "step": 183390 }, { "epoch": 1206.578947368421, "grad_norm": 0.854741096496582, "learning_rate": 0.0001, "loss": 0.0111, "step": 183400 }, { "epoch": 1206.6447368421052, "grad_norm": 1.1214978694915771, "learning_rate": 0.0001, "loss": 0.0081, "step": 183410 }, { "epoch": 1206.7105263157894, "grad_norm": 0.9807927012443542, "learning_rate": 0.0001, "loss": 0.0088, "step": 183420 }, { "epoch": 1206.7763157894738, "grad_norm": 0.8988727927207947, "learning_rate": 0.0001, "loss": 0.0095, "step": 183430 }, { "epoch": 1206.842105263158, "grad_norm": 0.840644896030426, "learning_rate": 0.0001, "loss": 0.0076, "step": 183440 }, { "epoch": 1206.907894736842, "grad_norm": 1.0273395776748657, "learning_rate": 0.0001, "loss": 0.0102, "step": 183450 }, { "epoch": 1206.9736842105262, "grad_norm": 1.0159443616867065, "learning_rate": 0.0001, "loss": 0.0089, "step": 183460 }, { "epoch": 1207.0394736842106, "grad_norm": 1.0389869213104248, "learning_rate": 0.0001, "loss": 0.008, "step": 183470 }, { "epoch": 1207.1052631578948, "grad_norm": 0.9145666360855103, "learning_rate": 0.0001, "loss": 0.0103, "step": 183480 }, { "epoch": 1207.171052631579, "grad_norm": 0.9767587184906006, "learning_rate": 0.0001, "loss": 0.0105, "step": 183490 }, { "epoch": 1207.2368421052631, "grad_norm": 1.1191926002502441, "learning_rate": 0.0001, "loss": 0.0094, "step": 183500 }, { "epoch": 1207.3026315789473, "grad_norm": 0.898288905620575, "learning_rate": 0.0001, "loss": 0.0105, "step": 183510 }, { "epoch": 1207.3684210526317, "grad_norm": 0.7566203474998474, "learning_rate": 0.0001, "loss": 0.0078, "step": 183520 }, { "epoch": 1207.4342105263158, "grad_norm": 1.134048342704773, "learning_rate": 0.0001, "loss": 0.0078, "step": 183530 }, { "epoch": 1207.5, "grad_norm": 1.08811354637146, "learning_rate": 0.0001, "loss": 0.0082, "step": 183540 }, { "epoch": 1207.5657894736842, "grad_norm": 1.2516783475875854, "learning_rate": 0.0001, "loss": 0.0097, "step": 183550 }, { "epoch": 1207.6315789473683, "grad_norm": 1.2968010902404785, "learning_rate": 0.0001, "loss": 0.0091, "step": 183560 }, { "epoch": 1207.6973684210527, "grad_norm": 0.9975584149360657, "learning_rate": 0.0001, "loss": 0.0079, "step": 183570 }, { "epoch": 1207.7631578947369, "grad_norm": 1.1204050779342651, "learning_rate": 0.0001, "loss": 0.0091, "step": 183580 }, { "epoch": 1207.828947368421, "grad_norm": 1.050008773803711, "learning_rate": 0.0001, "loss": 0.009, "step": 183590 }, { "epoch": 1207.8947368421052, "grad_norm": 0.832999050617218, "learning_rate": 0.0001, "loss": 0.01, "step": 183600 }, { "epoch": 1207.9605263157894, "grad_norm": 0.9619593024253845, "learning_rate": 0.0001, "loss": 0.0086, "step": 183610 }, { "epoch": 1208.0263157894738, "grad_norm": 0.7968380451202393, "learning_rate": 0.0001, "loss": 0.0089, "step": 183620 }, { "epoch": 1208.092105263158, "grad_norm": 0.9513537883758545, "learning_rate": 0.0001, "loss": 0.0097, "step": 183630 }, { "epoch": 1208.157894736842, "grad_norm": 1.0646263360977173, "learning_rate": 0.0001, "loss": 0.0089, "step": 183640 }, { "epoch": 1208.2236842105262, "grad_norm": 0.8924401998519897, "learning_rate": 0.0001, "loss": 0.009, "step": 183650 }, { "epoch": 1208.2894736842106, "grad_norm": 0.9576424360275269, "learning_rate": 0.0001, "loss": 0.0075, "step": 183660 }, { "epoch": 1208.3552631578948, "grad_norm": 1.0064516067504883, "learning_rate": 0.0001, "loss": 0.0087, "step": 183670 }, { "epoch": 1208.421052631579, "grad_norm": 1.2385472059249878, "learning_rate": 0.0001, "loss": 0.0085, "step": 183680 }, { "epoch": 1208.4868421052631, "grad_norm": 1.1335607767105103, "learning_rate": 0.0001, "loss": 0.0089, "step": 183690 }, { "epoch": 1208.5526315789473, "grad_norm": 1.0447804927825928, "learning_rate": 0.0001, "loss": 0.0095, "step": 183700 }, { "epoch": 1208.6184210526317, "grad_norm": 0.9965493679046631, "learning_rate": 0.0001, "loss": 0.0097, "step": 183710 }, { "epoch": 1208.6842105263158, "grad_norm": 0.8367345929145813, "learning_rate": 0.0001, "loss": 0.0095, "step": 183720 }, { "epoch": 1208.75, "grad_norm": 1.0351852178573608, "learning_rate": 0.0001, "loss": 0.0094, "step": 183730 }, { "epoch": 1208.8157894736842, "grad_norm": 0.9388834834098816, "learning_rate": 0.0001, "loss": 0.0083, "step": 183740 }, { "epoch": 1208.8815789473683, "grad_norm": 0.9737825393676758, "learning_rate": 0.0001, "loss": 0.0101, "step": 183750 }, { "epoch": 1208.9473684210527, "grad_norm": 1.0484927892684937, "learning_rate": 0.0001, "loss": 0.0093, "step": 183760 }, { "epoch": 1209.0131578947369, "grad_norm": 0.9659373760223389, "learning_rate": 0.0001, "loss": 0.0095, "step": 183770 }, { "epoch": 1209.078947368421, "grad_norm": 1.433643102645874, "learning_rate": 0.0001, "loss": 0.0092, "step": 183780 }, { "epoch": 1209.1447368421052, "grad_norm": 0.7290152311325073, "learning_rate": 0.0001, "loss": 0.0083, "step": 183790 }, { "epoch": 1209.2105263157894, "grad_norm": 1.0148568153381348, "learning_rate": 0.0001, "loss": 0.0117, "step": 183800 }, { "epoch": 1209.2763157894738, "grad_norm": 1.4373056888580322, "learning_rate": 0.0001, "loss": 0.0084, "step": 183810 }, { "epoch": 1209.342105263158, "grad_norm": 0.8225641846656799, "learning_rate": 0.0001, "loss": 0.0097, "step": 183820 }, { "epoch": 1209.407894736842, "grad_norm": 1.236804485321045, "learning_rate": 0.0001, "loss": 0.0082, "step": 183830 }, { "epoch": 1209.4736842105262, "grad_norm": 1.466613531112671, "learning_rate": 0.0001, "loss": 0.0084, "step": 183840 }, { "epoch": 1209.5394736842106, "grad_norm": 1.2647264003753662, "learning_rate": 0.0001, "loss": 0.0091, "step": 183850 }, { "epoch": 1209.6052631578948, "grad_norm": 0.9265692234039307, "learning_rate": 0.0001, "loss": 0.0099, "step": 183860 }, { "epoch": 1209.671052631579, "grad_norm": 1.0921601057052612, "learning_rate": 0.0001, "loss": 0.0092, "step": 183870 }, { "epoch": 1209.7368421052631, "grad_norm": 1.2621147632598877, "learning_rate": 0.0001, "loss": 0.0089, "step": 183880 }, { "epoch": 1209.8026315789473, "grad_norm": 1.0996668338775635, "learning_rate": 0.0001, "loss": 0.0084, "step": 183890 }, { "epoch": 1209.8684210526317, "grad_norm": 1.0238312482833862, "learning_rate": 0.0001, "loss": 0.01, "step": 183900 }, { "epoch": 1209.9342105263158, "grad_norm": 1.0571686029434204, "learning_rate": 0.0001, "loss": 0.0103, "step": 183910 }, { "epoch": 1210.0, "grad_norm": 1.0116900205612183, "learning_rate": 0.0001, "loss": 0.0091, "step": 183920 }, { "epoch": 1210.0657894736842, "grad_norm": 1.5669469833374023, "learning_rate": 0.0001, "loss": 0.0105, "step": 183930 }, { "epoch": 1210.1315789473683, "grad_norm": 1.1383708715438843, "learning_rate": 0.0001, "loss": 0.0096, "step": 183940 }, { "epoch": 1210.1973684210527, "grad_norm": 1.053849697113037, "learning_rate": 0.0001, "loss": 0.0094, "step": 183950 }, { "epoch": 1210.2631578947369, "grad_norm": 0.9576135873794556, "learning_rate": 0.0001, "loss": 0.0099, "step": 183960 }, { "epoch": 1210.328947368421, "grad_norm": 1.0139400959014893, "learning_rate": 0.0001, "loss": 0.0068, "step": 183970 }, { "epoch": 1210.3947368421052, "grad_norm": 0.9644301533699036, "learning_rate": 0.0001, "loss": 0.0071, "step": 183980 }, { "epoch": 1210.4605263157894, "grad_norm": 0.9945560693740845, "learning_rate": 0.0001, "loss": 0.0081, "step": 183990 }, { "epoch": 1210.5263157894738, "grad_norm": 1.0940089225769043, "learning_rate": 0.0001, "loss": 0.0099, "step": 184000 }, { "epoch": 1210.592105263158, "grad_norm": 0.9017602801322937, "learning_rate": 0.0001, "loss": 0.0105, "step": 184010 }, { "epoch": 1210.657894736842, "grad_norm": 0.8119890093803406, "learning_rate": 0.0001, "loss": 0.0081, "step": 184020 }, { "epoch": 1210.7236842105262, "grad_norm": 0.7484346628189087, "learning_rate": 0.0001, "loss": 0.0107, "step": 184030 }, { "epoch": 1210.7894736842106, "grad_norm": 0.6174297332763672, "learning_rate": 0.0001, "loss": 0.0102, "step": 184040 }, { "epoch": 1210.8552631578948, "grad_norm": 0.9243563413619995, "learning_rate": 0.0001, "loss": 0.0096, "step": 184050 }, { "epoch": 1210.921052631579, "grad_norm": 1.0928142070770264, "learning_rate": 0.0001, "loss": 0.0094, "step": 184060 }, { "epoch": 1210.9868421052631, "grad_norm": 1.0260541439056396, "learning_rate": 0.0001, "loss": 0.0093, "step": 184070 }, { "epoch": 1211.0526315789473, "grad_norm": 0.8601813316345215, "learning_rate": 0.0001, "loss": 0.0113, "step": 184080 }, { "epoch": 1211.1184210526317, "grad_norm": 1.062245488166809, "learning_rate": 0.0001, "loss": 0.0089, "step": 184090 }, { "epoch": 1211.1842105263158, "grad_norm": 0.8199011087417603, "learning_rate": 0.0001, "loss": 0.0092, "step": 184100 }, { "epoch": 1211.25, "grad_norm": 0.9992640614509583, "learning_rate": 0.0001, "loss": 0.0102, "step": 184110 }, { "epoch": 1211.3157894736842, "grad_norm": 1.1344804763793945, "learning_rate": 0.0001, "loss": 0.0097, "step": 184120 }, { "epoch": 1211.3815789473683, "grad_norm": 0.9198042750358582, "learning_rate": 0.0001, "loss": 0.0096, "step": 184130 }, { "epoch": 1211.4473684210527, "grad_norm": 1.2158046960830688, "learning_rate": 0.0001, "loss": 0.0083, "step": 184140 }, { "epoch": 1211.5131578947369, "grad_norm": 1.368562936782837, "learning_rate": 0.0001, "loss": 0.0097, "step": 184150 }, { "epoch": 1211.578947368421, "grad_norm": 0.9911054372787476, "learning_rate": 0.0001, "loss": 0.0077, "step": 184160 }, { "epoch": 1211.6447368421052, "grad_norm": 1.076811671257019, "learning_rate": 0.0001, "loss": 0.0099, "step": 184170 }, { "epoch": 1211.7105263157894, "grad_norm": 1.632210373878479, "learning_rate": 0.0001, "loss": 0.0102, "step": 184180 }, { "epoch": 1211.7763157894738, "grad_norm": 1.2261195182800293, "learning_rate": 0.0001, "loss": 0.0075, "step": 184190 }, { "epoch": 1211.842105263158, "grad_norm": 1.1790894269943237, "learning_rate": 0.0001, "loss": 0.0079, "step": 184200 }, { "epoch": 1211.907894736842, "grad_norm": 0.8446111083030701, "learning_rate": 0.0001, "loss": 0.0086, "step": 184210 }, { "epoch": 1211.9736842105262, "grad_norm": 0.72014319896698, "learning_rate": 0.0001, "loss": 0.0094, "step": 184220 }, { "epoch": 1212.0394736842106, "grad_norm": 0.8168789744377136, "learning_rate": 0.0001, "loss": 0.01, "step": 184230 }, { "epoch": 1212.1052631578948, "grad_norm": 0.8248627781867981, "learning_rate": 0.0001, "loss": 0.0085, "step": 184240 }, { "epoch": 1212.171052631579, "grad_norm": 0.6290561556816101, "learning_rate": 0.0001, "loss": 0.0095, "step": 184250 }, { "epoch": 1212.2368421052631, "grad_norm": 0.9064732789993286, "learning_rate": 0.0001, "loss": 0.0108, "step": 184260 }, { "epoch": 1212.3026315789473, "grad_norm": 1.5276134014129639, "learning_rate": 0.0001, "loss": 0.0089, "step": 184270 }, { "epoch": 1212.3684210526317, "grad_norm": 1.2133339643478394, "learning_rate": 0.0001, "loss": 0.0097, "step": 184280 }, { "epoch": 1212.4342105263158, "grad_norm": 1.096923589706421, "learning_rate": 0.0001, "loss": 0.0095, "step": 184290 }, { "epoch": 1212.5, "grad_norm": 1.0609931945800781, "learning_rate": 0.0001, "loss": 0.0094, "step": 184300 }, { "epoch": 1212.5657894736842, "grad_norm": 1.0884696245193481, "learning_rate": 0.0001, "loss": 0.0096, "step": 184310 }, { "epoch": 1212.6315789473683, "grad_norm": 1.2373757362365723, "learning_rate": 0.0001, "loss": 0.0091, "step": 184320 }, { "epoch": 1212.6973684210527, "grad_norm": 0.8663111925125122, "learning_rate": 0.0001, "loss": 0.0096, "step": 184330 }, { "epoch": 1212.7631578947369, "grad_norm": 0.8895679116249084, "learning_rate": 0.0001, "loss": 0.0077, "step": 184340 }, { "epoch": 1212.828947368421, "grad_norm": 0.9693534970283508, "learning_rate": 0.0001, "loss": 0.0096, "step": 184350 }, { "epoch": 1212.8947368421052, "grad_norm": 1.173888921737671, "learning_rate": 0.0001, "loss": 0.0077, "step": 184360 }, { "epoch": 1212.9605263157894, "grad_norm": 0.8452108502388, "learning_rate": 0.0001, "loss": 0.0092, "step": 184370 }, { "epoch": 1213.0263157894738, "grad_norm": 1.301694393157959, "learning_rate": 0.0001, "loss": 0.0098, "step": 184380 }, { "epoch": 1213.092105263158, "grad_norm": 1.157357931137085, "learning_rate": 0.0001, "loss": 0.0082, "step": 184390 }, { "epoch": 1213.157894736842, "grad_norm": 0.9105615019798279, "learning_rate": 0.0001, "loss": 0.0089, "step": 184400 }, { "epoch": 1213.2236842105262, "grad_norm": 1.3666666746139526, "learning_rate": 0.0001, "loss": 0.0089, "step": 184410 }, { "epoch": 1213.2894736842106, "grad_norm": 1.4840264320373535, "learning_rate": 0.0001, "loss": 0.0103, "step": 184420 }, { "epoch": 1213.3552631578948, "grad_norm": 1.0419845581054688, "learning_rate": 0.0001, "loss": 0.0094, "step": 184430 }, { "epoch": 1213.421052631579, "grad_norm": 1.0189485549926758, "learning_rate": 0.0001, "loss": 0.0092, "step": 184440 }, { "epoch": 1213.4868421052631, "grad_norm": 1.2533512115478516, "learning_rate": 0.0001, "loss": 0.008, "step": 184450 }, { "epoch": 1213.5526315789473, "grad_norm": 0.8926238417625427, "learning_rate": 0.0001, "loss": 0.011, "step": 184460 }, { "epoch": 1213.6184210526317, "grad_norm": 1.0614726543426514, "learning_rate": 0.0001, "loss": 0.0096, "step": 184470 }, { "epoch": 1213.6842105263158, "grad_norm": 1.0628024339675903, "learning_rate": 0.0001, "loss": 0.0093, "step": 184480 }, { "epoch": 1213.75, "grad_norm": 1.3046667575836182, "learning_rate": 0.0001, "loss": 0.0097, "step": 184490 }, { "epoch": 1213.8157894736842, "grad_norm": 1.021803379058838, "learning_rate": 0.0001, "loss": 0.0096, "step": 184500 }, { "epoch": 1213.8815789473683, "grad_norm": 1.1435785293579102, "learning_rate": 0.0001, "loss": 0.007, "step": 184510 }, { "epoch": 1213.9473684210527, "grad_norm": 1.1126259565353394, "learning_rate": 0.0001, "loss": 0.0079, "step": 184520 }, { "epoch": 1214.0131578947369, "grad_norm": 1.026728630065918, "learning_rate": 0.0001, "loss": 0.0092, "step": 184530 }, { "epoch": 1214.078947368421, "grad_norm": 0.6541023254394531, "learning_rate": 0.0001, "loss": 0.0103, "step": 184540 }, { "epoch": 1214.1447368421052, "grad_norm": 0.8954967260360718, "learning_rate": 0.0001, "loss": 0.0108, "step": 184550 }, { "epoch": 1214.2105263157894, "grad_norm": 1.3695642948150635, "learning_rate": 0.0001, "loss": 0.0086, "step": 184560 }, { "epoch": 1214.2763157894738, "grad_norm": 1.1998350620269775, "learning_rate": 0.0001, "loss": 0.0111, "step": 184570 }, { "epoch": 1214.342105263158, "grad_norm": 1.03053617477417, "learning_rate": 0.0001, "loss": 0.0105, "step": 184580 }, { "epoch": 1214.407894736842, "grad_norm": 1.0831443071365356, "learning_rate": 0.0001, "loss": 0.0096, "step": 184590 }, { "epoch": 1214.4736842105262, "grad_norm": 0.8419860005378723, "learning_rate": 0.0001, "loss": 0.0111, "step": 184600 }, { "epoch": 1214.5394736842106, "grad_norm": 1.1064237356185913, "learning_rate": 0.0001, "loss": 0.0098, "step": 184610 }, { "epoch": 1214.6052631578948, "grad_norm": 1.0451138019561768, "learning_rate": 0.0001, "loss": 0.0097, "step": 184620 }, { "epoch": 1214.671052631579, "grad_norm": 0.9039570093154907, "learning_rate": 0.0001, "loss": 0.0103, "step": 184630 }, { "epoch": 1214.7368421052631, "grad_norm": 1.0026644468307495, "learning_rate": 0.0001, "loss": 0.0102, "step": 184640 }, { "epoch": 1214.8026315789473, "grad_norm": 0.8823190927505493, "learning_rate": 0.0001, "loss": 0.0105, "step": 184650 }, { "epoch": 1214.8684210526317, "grad_norm": 1.510465383529663, "learning_rate": 0.0001, "loss": 0.0102, "step": 184660 }, { "epoch": 1214.9342105263158, "grad_norm": 0.9225495457649231, "learning_rate": 0.0001, "loss": 0.0112, "step": 184670 }, { "epoch": 1215.0, "grad_norm": 0.866723895072937, "learning_rate": 0.0001, "loss": 0.01, "step": 184680 }, { "epoch": 1215.0657894736842, "grad_norm": 1.0360679626464844, "learning_rate": 0.0001, "loss": 0.01, "step": 184690 }, { "epoch": 1215.1315789473683, "grad_norm": 1.0180871486663818, "learning_rate": 0.0001, "loss": 0.0125, "step": 184700 }, { "epoch": 1215.1973684210527, "grad_norm": 0.8230453729629517, "learning_rate": 0.0001, "loss": 0.0098, "step": 184710 }, { "epoch": 1215.2631578947369, "grad_norm": 1.398345947265625, "learning_rate": 0.0001, "loss": 0.0121, "step": 184720 }, { "epoch": 1215.328947368421, "grad_norm": 1.5286283493041992, "learning_rate": 0.0001, "loss": 0.0127, "step": 184730 }, { "epoch": 1215.3947368421052, "grad_norm": 1.097556710243225, "learning_rate": 0.0001, "loss": 0.0116, "step": 184740 }, { "epoch": 1215.4605263157894, "grad_norm": 1.1276756525039673, "learning_rate": 0.0001, "loss": 0.0129, "step": 184750 }, { "epoch": 1215.5263157894738, "grad_norm": 1.013379693031311, "learning_rate": 0.0001, "loss": 0.0113, "step": 184760 }, { "epoch": 1215.592105263158, "grad_norm": 1.1437040567398071, "learning_rate": 0.0001, "loss": 0.0117, "step": 184770 }, { "epoch": 1215.657894736842, "grad_norm": 0.8518174290657043, "learning_rate": 0.0001, "loss": 0.0124, "step": 184780 }, { "epoch": 1215.7236842105262, "grad_norm": 1.0094599723815918, "learning_rate": 0.0001, "loss": 0.0146, "step": 184790 }, { "epoch": 1215.7894736842106, "grad_norm": 1.0971324443817139, "learning_rate": 0.0001, "loss": 0.0111, "step": 184800 }, { "epoch": 1215.8552631578948, "grad_norm": 1.0762872695922852, "learning_rate": 0.0001, "loss": 0.0135, "step": 184810 }, { "epoch": 1215.921052631579, "grad_norm": 0.6933231353759766, "learning_rate": 0.0001, "loss": 0.012, "step": 184820 }, { "epoch": 1215.9868421052631, "grad_norm": 1.1836282014846802, "learning_rate": 0.0001, "loss": 0.011, "step": 184830 }, { "epoch": 1216.0526315789473, "grad_norm": 1.4843765497207642, "learning_rate": 0.0001, "loss": 0.0113, "step": 184840 }, { "epoch": 1216.1184210526317, "grad_norm": 1.573395013809204, "learning_rate": 0.0001, "loss": 0.0138, "step": 184850 }, { "epoch": 1216.1842105263158, "grad_norm": 1.349432110786438, "learning_rate": 0.0001, "loss": 0.0126, "step": 184860 }, { "epoch": 1216.25, "grad_norm": 0.9781970381736755, "learning_rate": 0.0001, "loss": 0.0113, "step": 184870 }, { "epoch": 1216.3157894736842, "grad_norm": 0.9339350461959839, "learning_rate": 0.0001, "loss": 0.0091, "step": 184880 }, { "epoch": 1216.3815789473683, "grad_norm": 1.192179799079895, "learning_rate": 0.0001, "loss": 0.0114, "step": 184890 }, { "epoch": 1216.4473684210527, "grad_norm": 0.9777132272720337, "learning_rate": 0.0001, "loss": 0.0098, "step": 184900 }, { "epoch": 1216.5131578947369, "grad_norm": 1.1878960132598877, "learning_rate": 0.0001, "loss": 0.0108, "step": 184910 }, { "epoch": 1216.578947368421, "grad_norm": 1.1332019567489624, "learning_rate": 0.0001, "loss": 0.01, "step": 184920 }, { "epoch": 1216.6447368421052, "grad_norm": 1.2628083229064941, "learning_rate": 0.0001, "loss": 0.0112, "step": 184930 }, { "epoch": 1216.7105263157894, "grad_norm": 1.167966365814209, "learning_rate": 0.0001, "loss": 0.0099, "step": 184940 }, { "epoch": 1216.7763157894738, "grad_norm": 1.3506724834442139, "learning_rate": 0.0001, "loss": 0.0085, "step": 184950 }, { "epoch": 1216.842105263158, "grad_norm": 1.2869248390197754, "learning_rate": 0.0001, "loss": 0.0091, "step": 184960 }, { "epoch": 1216.907894736842, "grad_norm": 1.2013112306594849, "learning_rate": 0.0001, "loss": 0.0109, "step": 184970 }, { "epoch": 1216.9736842105262, "grad_norm": 0.8679819107055664, "learning_rate": 0.0001, "loss": 0.0111, "step": 184980 }, { "epoch": 1217.0394736842106, "grad_norm": 1.0381109714508057, "learning_rate": 0.0001, "loss": 0.0101, "step": 184990 }, { "epoch": 1217.1052631578948, "grad_norm": 1.11387038230896, "learning_rate": 0.0001, "loss": 0.0104, "step": 185000 }, { "epoch": 1217.171052631579, "grad_norm": 1.2010740041732788, "learning_rate": 0.0001, "loss": 0.0103, "step": 185010 }, { "epoch": 1217.2368421052631, "grad_norm": 1.2932566404342651, "learning_rate": 0.0001, "loss": 0.0088, "step": 185020 }, { "epoch": 1217.3026315789473, "grad_norm": 0.7184022665023804, "learning_rate": 0.0001, "loss": 0.0111, "step": 185030 }, { "epoch": 1217.3684210526317, "grad_norm": 0.8820980787277222, "learning_rate": 0.0001, "loss": 0.0094, "step": 185040 }, { "epoch": 1217.4342105263158, "grad_norm": 1.4294477701187134, "learning_rate": 0.0001, "loss": 0.0095, "step": 185050 }, { "epoch": 1217.5, "grad_norm": 1.082810878753662, "learning_rate": 0.0001, "loss": 0.0092, "step": 185060 }, { "epoch": 1217.5657894736842, "grad_norm": 1.033191442489624, "learning_rate": 0.0001, "loss": 0.0095, "step": 185070 }, { "epoch": 1217.6315789473683, "grad_norm": 1.1163650751113892, "learning_rate": 0.0001, "loss": 0.0098, "step": 185080 }, { "epoch": 1217.6973684210527, "grad_norm": 1.2836352586746216, "learning_rate": 0.0001, "loss": 0.0097, "step": 185090 }, { "epoch": 1217.7631578947369, "grad_norm": 1.1807303428649902, "learning_rate": 0.0001, "loss": 0.0087, "step": 185100 }, { "epoch": 1217.828947368421, "grad_norm": 1.0362707376480103, "learning_rate": 0.0001, "loss": 0.0099, "step": 185110 }, { "epoch": 1217.8947368421052, "grad_norm": 1.2636171579360962, "learning_rate": 0.0001, "loss": 0.0109, "step": 185120 }, { "epoch": 1217.9605263157894, "grad_norm": 1.2539921998977661, "learning_rate": 0.0001, "loss": 0.0091, "step": 185130 }, { "epoch": 1218.0263157894738, "grad_norm": 1.5820645093917847, "learning_rate": 0.0001, "loss": 0.008, "step": 185140 }, { "epoch": 1218.092105263158, "grad_norm": 0.8721255660057068, "learning_rate": 0.0001, "loss": 0.0093, "step": 185150 }, { "epoch": 1218.157894736842, "grad_norm": 1.0245574712753296, "learning_rate": 0.0001, "loss": 0.0099, "step": 185160 }, { "epoch": 1218.2236842105262, "grad_norm": 1.1424921751022339, "learning_rate": 0.0001, "loss": 0.0092, "step": 185170 }, { "epoch": 1218.2894736842106, "grad_norm": 1.0817832946777344, "learning_rate": 0.0001, "loss": 0.008, "step": 185180 }, { "epoch": 1218.3552631578948, "grad_norm": 0.9851661920547485, "learning_rate": 0.0001, "loss": 0.0097, "step": 185190 }, { "epoch": 1218.421052631579, "grad_norm": 1.0707749128341675, "learning_rate": 0.0001, "loss": 0.0098, "step": 185200 }, { "epoch": 1218.4868421052631, "grad_norm": 0.903893768787384, "learning_rate": 0.0001, "loss": 0.0094, "step": 185210 }, { "epoch": 1218.5526315789473, "grad_norm": 0.8631360530853271, "learning_rate": 0.0001, "loss": 0.0107, "step": 185220 }, { "epoch": 1218.6184210526317, "grad_norm": 0.8446455001831055, "learning_rate": 0.0001, "loss": 0.0102, "step": 185230 }, { "epoch": 1218.6842105263158, "grad_norm": 1.238858938217163, "learning_rate": 0.0001, "loss": 0.0091, "step": 185240 }, { "epoch": 1218.75, "grad_norm": 1.4525588750839233, "learning_rate": 0.0001, "loss": 0.0085, "step": 185250 }, { "epoch": 1218.8157894736842, "grad_norm": 0.7506535649299622, "learning_rate": 0.0001, "loss": 0.0091, "step": 185260 }, { "epoch": 1218.8815789473683, "grad_norm": 1.0195599794387817, "learning_rate": 0.0001, "loss": 0.0103, "step": 185270 }, { "epoch": 1218.9473684210527, "grad_norm": 1.0614231824874878, "learning_rate": 0.0001, "loss": 0.0094, "step": 185280 }, { "epoch": 1219.0131578947369, "grad_norm": 0.9390943050384521, "learning_rate": 0.0001, "loss": 0.0086, "step": 185290 }, { "epoch": 1219.078947368421, "grad_norm": 1.175963282585144, "learning_rate": 0.0001, "loss": 0.0083, "step": 185300 }, { "epoch": 1219.1447368421052, "grad_norm": 1.5767515897750854, "learning_rate": 0.0001, "loss": 0.0105, "step": 185310 }, { "epoch": 1219.2105263157894, "grad_norm": 1.0393364429473877, "learning_rate": 0.0001, "loss": 0.0089, "step": 185320 }, { "epoch": 1219.2763157894738, "grad_norm": 1.084050178527832, "learning_rate": 0.0001, "loss": 0.0086, "step": 185330 }, { "epoch": 1219.342105263158, "grad_norm": 1.1142622232437134, "learning_rate": 0.0001, "loss": 0.0086, "step": 185340 }, { "epoch": 1219.407894736842, "grad_norm": 1.1076176166534424, "learning_rate": 0.0001, "loss": 0.0092, "step": 185350 }, { "epoch": 1219.4736842105262, "grad_norm": 0.9142469167709351, "learning_rate": 0.0001, "loss": 0.0111, "step": 185360 }, { "epoch": 1219.5394736842106, "grad_norm": 0.9830288887023926, "learning_rate": 0.0001, "loss": 0.0094, "step": 185370 }, { "epoch": 1219.6052631578948, "grad_norm": 1.4786913394927979, "learning_rate": 0.0001, "loss": 0.0087, "step": 185380 }, { "epoch": 1219.671052631579, "grad_norm": 1.2091387510299683, "learning_rate": 0.0001, "loss": 0.0122, "step": 185390 }, { "epoch": 1219.7368421052631, "grad_norm": 1.051276683807373, "learning_rate": 0.0001, "loss": 0.0079, "step": 185400 }, { "epoch": 1219.8026315789473, "grad_norm": 1.0199636220932007, "learning_rate": 0.0001, "loss": 0.0091, "step": 185410 }, { "epoch": 1219.8684210526317, "grad_norm": 0.9174479246139526, "learning_rate": 0.0001, "loss": 0.0083, "step": 185420 }, { "epoch": 1219.9342105263158, "grad_norm": 0.9192227721214294, "learning_rate": 0.0001, "loss": 0.0084, "step": 185430 }, { "epoch": 1220.0, "grad_norm": 0.7987057566642761, "learning_rate": 0.0001, "loss": 0.0099, "step": 185440 }, { "epoch": 1220.0657894736842, "grad_norm": 0.6848961710929871, "learning_rate": 0.0001, "loss": 0.0095, "step": 185450 }, { "epoch": 1220.1315789473683, "grad_norm": 0.7822224497795105, "learning_rate": 0.0001, "loss": 0.0096, "step": 185460 }, { "epoch": 1220.1973684210527, "grad_norm": 0.8950042724609375, "learning_rate": 0.0001, "loss": 0.0099, "step": 185470 }, { "epoch": 1220.2631578947369, "grad_norm": 0.982462465763092, "learning_rate": 0.0001, "loss": 0.0109, "step": 185480 }, { "epoch": 1220.328947368421, "grad_norm": 1.3260400295257568, "learning_rate": 0.0001, "loss": 0.0112, "step": 185490 }, { "epoch": 1220.3947368421052, "grad_norm": 1.1258111000061035, "learning_rate": 0.0001, "loss": 0.0076, "step": 185500 }, { "epoch": 1220.4605263157894, "grad_norm": 0.9215371608734131, "learning_rate": 0.0001, "loss": 0.009, "step": 185510 }, { "epoch": 1220.5263157894738, "grad_norm": 1.1126407384872437, "learning_rate": 0.0001, "loss": 0.01, "step": 185520 }, { "epoch": 1220.592105263158, "grad_norm": 0.8571876287460327, "learning_rate": 0.0001, "loss": 0.0086, "step": 185530 }, { "epoch": 1220.657894736842, "grad_norm": 1.0084202289581299, "learning_rate": 0.0001, "loss": 0.0098, "step": 185540 }, { "epoch": 1220.7236842105262, "grad_norm": 1.0304898023605347, "learning_rate": 0.0001, "loss": 0.0082, "step": 185550 }, { "epoch": 1220.7894736842106, "grad_norm": 1.4203600883483887, "learning_rate": 0.0001, "loss": 0.0096, "step": 185560 }, { "epoch": 1220.8552631578948, "grad_norm": 0.695814311504364, "learning_rate": 0.0001, "loss": 0.0093, "step": 185570 }, { "epoch": 1220.921052631579, "grad_norm": 1.066893219947815, "learning_rate": 0.0001, "loss": 0.0083, "step": 185580 }, { "epoch": 1220.9868421052631, "grad_norm": 1.0553237199783325, "learning_rate": 0.0001, "loss": 0.0085, "step": 185590 }, { "epoch": 1221.0526315789473, "grad_norm": 1.2047573328018188, "learning_rate": 0.0001, "loss": 0.0081, "step": 185600 }, { "epoch": 1221.1184210526317, "grad_norm": 1.031988263130188, "learning_rate": 0.0001, "loss": 0.0091, "step": 185610 }, { "epoch": 1221.1842105263158, "grad_norm": 1.2671812772750854, "learning_rate": 0.0001, "loss": 0.0081, "step": 185620 }, { "epoch": 1221.25, "grad_norm": 1.1938546895980835, "learning_rate": 0.0001, "loss": 0.011, "step": 185630 }, { "epoch": 1221.3157894736842, "grad_norm": 0.8404895067214966, "learning_rate": 0.0001, "loss": 0.0089, "step": 185640 }, { "epoch": 1221.3815789473683, "grad_norm": 1.2843692302703857, "learning_rate": 0.0001, "loss": 0.009, "step": 185650 }, { "epoch": 1221.4473684210527, "grad_norm": 1.1143099069595337, "learning_rate": 0.0001, "loss": 0.0088, "step": 185660 }, { "epoch": 1221.5131578947369, "grad_norm": 1.0575257539749146, "learning_rate": 0.0001, "loss": 0.0105, "step": 185670 }, { "epoch": 1221.578947368421, "grad_norm": 0.7534637451171875, "learning_rate": 0.0001, "loss": 0.0084, "step": 185680 }, { "epoch": 1221.6447368421052, "grad_norm": 0.7828577756881714, "learning_rate": 0.0001, "loss": 0.0079, "step": 185690 }, { "epoch": 1221.7105263157894, "grad_norm": 0.9473252296447754, "learning_rate": 0.0001, "loss": 0.0093, "step": 185700 }, { "epoch": 1221.7763157894738, "grad_norm": 1.1639002561569214, "learning_rate": 0.0001, "loss": 0.0104, "step": 185710 }, { "epoch": 1221.842105263158, "grad_norm": 0.9955407977104187, "learning_rate": 0.0001, "loss": 0.0089, "step": 185720 }, { "epoch": 1221.907894736842, "grad_norm": 1.1388436555862427, "learning_rate": 0.0001, "loss": 0.0092, "step": 185730 }, { "epoch": 1221.9736842105262, "grad_norm": 0.793958842754364, "learning_rate": 0.0001, "loss": 0.0074, "step": 185740 }, { "epoch": 1222.0394736842106, "grad_norm": 0.7527614235877991, "learning_rate": 0.0001, "loss": 0.0089, "step": 185750 }, { "epoch": 1222.1052631578948, "grad_norm": 1.2795637845993042, "learning_rate": 0.0001, "loss": 0.0078, "step": 185760 }, { "epoch": 1222.171052631579, "grad_norm": 0.7451528906822205, "learning_rate": 0.0001, "loss": 0.0074, "step": 185770 }, { "epoch": 1222.2368421052631, "grad_norm": 0.9207596778869629, "learning_rate": 0.0001, "loss": 0.0078, "step": 185780 }, { "epoch": 1222.3026315789473, "grad_norm": 0.8541901111602783, "learning_rate": 0.0001, "loss": 0.0101, "step": 185790 }, { "epoch": 1222.3684210526317, "grad_norm": 0.9875356554985046, "learning_rate": 0.0001, "loss": 0.0118, "step": 185800 }, { "epoch": 1222.4342105263158, "grad_norm": 0.8189533948898315, "learning_rate": 0.0001, "loss": 0.0105, "step": 185810 }, { "epoch": 1222.5, "grad_norm": 1.3536303043365479, "learning_rate": 0.0001, "loss": 0.0079, "step": 185820 }, { "epoch": 1222.5657894736842, "grad_norm": 1.253621220588684, "learning_rate": 0.0001, "loss": 0.0082, "step": 185830 }, { "epoch": 1222.6315789473683, "grad_norm": 0.9833441376686096, "learning_rate": 0.0001, "loss": 0.0081, "step": 185840 }, { "epoch": 1222.6973684210527, "grad_norm": 1.02719247341156, "learning_rate": 0.0001, "loss": 0.0098, "step": 185850 }, { "epoch": 1222.7631578947369, "grad_norm": 0.8571970462799072, "learning_rate": 0.0001, "loss": 0.0078, "step": 185860 }, { "epoch": 1222.828947368421, "grad_norm": 1.270446538925171, "learning_rate": 0.0001, "loss": 0.0089, "step": 185870 }, { "epoch": 1222.8947368421052, "grad_norm": 1.1519957780838013, "learning_rate": 0.0001, "loss": 0.0083, "step": 185880 }, { "epoch": 1222.9605263157894, "grad_norm": 1.149924397468567, "learning_rate": 0.0001, "loss": 0.0105, "step": 185890 }, { "epoch": 1223.0263157894738, "grad_norm": 1.0503630638122559, "learning_rate": 0.0001, "loss": 0.0105, "step": 185900 }, { "epoch": 1223.092105263158, "grad_norm": 1.033896565437317, "learning_rate": 0.0001, "loss": 0.0096, "step": 185910 }, { "epoch": 1223.157894736842, "grad_norm": 1.530225396156311, "learning_rate": 0.0001, "loss": 0.0107, "step": 185920 }, { "epoch": 1223.2236842105262, "grad_norm": 1.317832112312317, "learning_rate": 0.0001, "loss": 0.0103, "step": 185930 }, { "epoch": 1223.2894736842106, "grad_norm": 1.1852115392684937, "learning_rate": 0.0001, "loss": 0.0076, "step": 185940 }, { "epoch": 1223.3552631578948, "grad_norm": 1.0423250198364258, "learning_rate": 0.0001, "loss": 0.0081, "step": 185950 }, { "epoch": 1223.421052631579, "grad_norm": 1.0078924894332886, "learning_rate": 0.0001, "loss": 0.0076, "step": 185960 }, { "epoch": 1223.4868421052631, "grad_norm": 1.1069872379302979, "learning_rate": 0.0001, "loss": 0.0092, "step": 185970 }, { "epoch": 1223.5526315789473, "grad_norm": 1.058595061302185, "learning_rate": 0.0001, "loss": 0.0085, "step": 185980 }, { "epoch": 1223.6184210526317, "grad_norm": 1.1351420879364014, "learning_rate": 0.0001, "loss": 0.0089, "step": 185990 }, { "epoch": 1223.6842105263158, "grad_norm": 1.0550578832626343, "learning_rate": 0.0001, "loss": 0.0093, "step": 186000 }, { "epoch": 1223.75, "grad_norm": 0.7912912964820862, "learning_rate": 0.0001, "loss": 0.0084, "step": 186010 }, { "epoch": 1223.8157894736842, "grad_norm": 1.267266035079956, "learning_rate": 0.0001, "loss": 0.0109, "step": 186020 }, { "epoch": 1223.8815789473683, "grad_norm": 0.7863309979438782, "learning_rate": 0.0001, "loss": 0.0085, "step": 186030 }, { "epoch": 1223.9473684210527, "grad_norm": 0.8405379056930542, "learning_rate": 0.0001, "loss": 0.0067, "step": 186040 }, { "epoch": 1224.0131578947369, "grad_norm": 0.7136426568031311, "learning_rate": 0.0001, "loss": 0.0087, "step": 186050 }, { "epoch": 1224.078947368421, "grad_norm": 1.2677913904190063, "learning_rate": 0.0001, "loss": 0.0097, "step": 186060 }, { "epoch": 1224.1447368421052, "grad_norm": 1.0475759506225586, "learning_rate": 0.0001, "loss": 0.0089, "step": 186070 }, { "epoch": 1224.2105263157894, "grad_norm": 0.9506308436393738, "learning_rate": 0.0001, "loss": 0.0082, "step": 186080 }, { "epoch": 1224.2763157894738, "grad_norm": 0.9816416501998901, "learning_rate": 0.0001, "loss": 0.0089, "step": 186090 }, { "epoch": 1224.342105263158, "grad_norm": 0.6930526494979858, "learning_rate": 0.0001, "loss": 0.0089, "step": 186100 }, { "epoch": 1224.407894736842, "grad_norm": 0.9830896258354187, "learning_rate": 0.0001, "loss": 0.009, "step": 186110 }, { "epoch": 1224.4736842105262, "grad_norm": 0.9465078115463257, "learning_rate": 0.0001, "loss": 0.0097, "step": 186120 }, { "epoch": 1224.5394736842106, "grad_norm": 0.8693153858184814, "learning_rate": 0.0001, "loss": 0.0093, "step": 186130 }, { "epoch": 1224.6052631578948, "grad_norm": 1.0287957191467285, "learning_rate": 0.0001, "loss": 0.0074, "step": 186140 }, { "epoch": 1224.671052631579, "grad_norm": 0.7358331084251404, "learning_rate": 0.0001, "loss": 0.0099, "step": 186150 }, { "epoch": 1224.7368421052631, "grad_norm": 0.6375886797904968, "learning_rate": 0.0001, "loss": 0.009, "step": 186160 }, { "epoch": 1224.8026315789473, "grad_norm": 1.0094578266143799, "learning_rate": 0.0001, "loss": 0.0079, "step": 186170 }, { "epoch": 1224.8684210526317, "grad_norm": 1.2570643424987793, "learning_rate": 0.0001, "loss": 0.0096, "step": 186180 }, { "epoch": 1224.9342105263158, "grad_norm": 0.9317885637283325, "learning_rate": 0.0001, "loss": 0.0092, "step": 186190 }, { "epoch": 1225.0, "grad_norm": 0.8335903286933899, "learning_rate": 0.0001, "loss": 0.0074, "step": 186200 }, { "epoch": 1225.0657894736842, "grad_norm": 1.1470184326171875, "learning_rate": 0.0001, "loss": 0.0102, "step": 186210 }, { "epoch": 1225.1315789473683, "grad_norm": 1.1468976736068726, "learning_rate": 0.0001, "loss": 0.0095, "step": 186220 }, { "epoch": 1225.1973684210527, "grad_norm": 1.1850436925888062, "learning_rate": 0.0001, "loss": 0.0104, "step": 186230 }, { "epoch": 1225.2631578947369, "grad_norm": 1.3890918493270874, "learning_rate": 0.0001, "loss": 0.01, "step": 186240 }, { "epoch": 1225.328947368421, "grad_norm": 0.9903066754341125, "learning_rate": 0.0001, "loss": 0.0077, "step": 186250 }, { "epoch": 1225.3947368421052, "grad_norm": 0.9706083536148071, "learning_rate": 0.0001, "loss": 0.0086, "step": 186260 }, { "epoch": 1225.4605263157894, "grad_norm": 1.4619114398956299, "learning_rate": 0.0001, "loss": 0.0078, "step": 186270 }, { "epoch": 1225.5263157894738, "grad_norm": 0.8819053173065186, "learning_rate": 0.0001, "loss": 0.0096, "step": 186280 }, { "epoch": 1225.592105263158, "grad_norm": 0.8979442715644836, "learning_rate": 0.0001, "loss": 0.0097, "step": 186290 }, { "epoch": 1225.657894736842, "grad_norm": 0.9516298770904541, "learning_rate": 0.0001, "loss": 0.0077, "step": 186300 }, { "epoch": 1225.7236842105262, "grad_norm": 1.1198757886886597, "learning_rate": 0.0001, "loss": 0.0089, "step": 186310 }, { "epoch": 1225.7894736842106, "grad_norm": 1.2486748695373535, "learning_rate": 0.0001, "loss": 0.0078, "step": 186320 }, { "epoch": 1225.8552631578948, "grad_norm": 1.025698184967041, "learning_rate": 0.0001, "loss": 0.0073, "step": 186330 }, { "epoch": 1225.921052631579, "grad_norm": 0.6521353125572205, "learning_rate": 0.0001, "loss": 0.0098, "step": 186340 }, { "epoch": 1225.9868421052631, "grad_norm": 0.7902568578720093, "learning_rate": 0.0001, "loss": 0.0093, "step": 186350 }, { "epoch": 1226.0526315789473, "grad_norm": 1.071388840675354, "learning_rate": 0.0001, "loss": 0.007, "step": 186360 }, { "epoch": 1226.1184210526317, "grad_norm": 0.9144065976142883, "learning_rate": 0.0001, "loss": 0.009, "step": 186370 }, { "epoch": 1226.1842105263158, "grad_norm": 1.1299687623977661, "learning_rate": 0.0001, "loss": 0.0077, "step": 186380 }, { "epoch": 1226.25, "grad_norm": 1.0512938499450684, "learning_rate": 0.0001, "loss": 0.0095, "step": 186390 }, { "epoch": 1226.3157894736842, "grad_norm": 0.8974717259407043, "learning_rate": 0.0001, "loss": 0.0083, "step": 186400 }, { "epoch": 1226.3815789473683, "grad_norm": 0.9535547494888306, "learning_rate": 0.0001, "loss": 0.0095, "step": 186410 }, { "epoch": 1226.4473684210527, "grad_norm": 1.0984748601913452, "learning_rate": 0.0001, "loss": 0.008, "step": 186420 }, { "epoch": 1226.5131578947369, "grad_norm": 1.2447376251220703, "learning_rate": 0.0001, "loss": 0.009, "step": 186430 }, { "epoch": 1226.578947368421, "grad_norm": 1.0115010738372803, "learning_rate": 0.0001, "loss": 0.0086, "step": 186440 }, { "epoch": 1226.6447368421052, "grad_norm": 1.0430307388305664, "learning_rate": 0.0001, "loss": 0.0081, "step": 186450 }, { "epoch": 1226.7105263157894, "grad_norm": 0.9099544882774353, "learning_rate": 0.0001, "loss": 0.0095, "step": 186460 }, { "epoch": 1226.7763157894738, "grad_norm": 0.878151535987854, "learning_rate": 0.0001, "loss": 0.0099, "step": 186470 }, { "epoch": 1226.842105263158, "grad_norm": 0.6946768164634705, "learning_rate": 0.0001, "loss": 0.0118, "step": 186480 }, { "epoch": 1226.907894736842, "grad_norm": 1.0372909307479858, "learning_rate": 0.0001, "loss": 0.0101, "step": 186490 }, { "epoch": 1226.9736842105262, "grad_norm": 0.8411084413528442, "learning_rate": 0.0001, "loss": 0.0077, "step": 186500 }, { "epoch": 1227.0394736842106, "grad_norm": 0.7090082764625549, "learning_rate": 0.0001, "loss": 0.0092, "step": 186510 }, { "epoch": 1227.1052631578948, "grad_norm": 0.802839457988739, "learning_rate": 0.0001, "loss": 0.0086, "step": 186520 }, { "epoch": 1227.171052631579, "grad_norm": 0.8215923309326172, "learning_rate": 0.0001, "loss": 0.0095, "step": 186530 }, { "epoch": 1227.2368421052631, "grad_norm": 0.8801704049110413, "learning_rate": 0.0001, "loss": 0.009, "step": 186540 }, { "epoch": 1227.3026315789473, "grad_norm": 1.15470552444458, "learning_rate": 0.0001, "loss": 0.0102, "step": 186550 }, { "epoch": 1227.3684210526317, "grad_norm": 1.114592432975769, "learning_rate": 0.0001, "loss": 0.009, "step": 186560 }, { "epoch": 1227.4342105263158, "grad_norm": 1.192267656326294, "learning_rate": 0.0001, "loss": 0.0078, "step": 186570 }, { "epoch": 1227.5, "grad_norm": 0.8330889940261841, "learning_rate": 0.0001, "loss": 0.0096, "step": 186580 }, { "epoch": 1227.5657894736842, "grad_norm": 1.0712906122207642, "learning_rate": 0.0001, "loss": 0.0076, "step": 186590 }, { "epoch": 1227.6315789473683, "grad_norm": 1.0091384649276733, "learning_rate": 0.0001, "loss": 0.0072, "step": 186600 }, { "epoch": 1227.6973684210527, "grad_norm": 0.8430705070495605, "learning_rate": 0.0001, "loss": 0.0086, "step": 186610 }, { "epoch": 1227.7631578947369, "grad_norm": 1.0369099378585815, "learning_rate": 0.0001, "loss": 0.009, "step": 186620 }, { "epoch": 1227.828947368421, "grad_norm": 1.0996822118759155, "learning_rate": 0.0001, "loss": 0.0074, "step": 186630 }, { "epoch": 1227.8947368421052, "grad_norm": 1.3173720836639404, "learning_rate": 0.0001, "loss": 0.0099, "step": 186640 }, { "epoch": 1227.9605263157894, "grad_norm": 1.1714130640029907, "learning_rate": 0.0001, "loss": 0.0107, "step": 186650 }, { "epoch": 1228.0263157894738, "grad_norm": 1.1778959035873413, "learning_rate": 0.0001, "loss": 0.0096, "step": 186660 }, { "epoch": 1228.092105263158, "grad_norm": 1.0300238132476807, "learning_rate": 0.0001, "loss": 0.0092, "step": 186670 }, { "epoch": 1228.157894736842, "grad_norm": 0.9473615288734436, "learning_rate": 0.0001, "loss": 0.0071, "step": 186680 }, { "epoch": 1228.2236842105262, "grad_norm": 1.171370267868042, "learning_rate": 0.0001, "loss": 0.0091, "step": 186690 }, { "epoch": 1228.2894736842106, "grad_norm": 0.8373340964317322, "learning_rate": 0.0001, "loss": 0.0075, "step": 186700 }, { "epoch": 1228.3552631578948, "grad_norm": 0.9212744832038879, "learning_rate": 0.0001, "loss": 0.0103, "step": 186710 }, { "epoch": 1228.421052631579, "grad_norm": 1.379812240600586, "learning_rate": 0.0001, "loss": 0.0084, "step": 186720 }, { "epoch": 1228.4868421052631, "grad_norm": 1.5933481454849243, "learning_rate": 0.0001, "loss": 0.0097, "step": 186730 }, { "epoch": 1228.5526315789473, "grad_norm": 1.3223012685775757, "learning_rate": 0.0001, "loss": 0.0089, "step": 186740 }, { "epoch": 1228.6184210526317, "grad_norm": 1.4536499977111816, "learning_rate": 0.0001, "loss": 0.0092, "step": 186750 }, { "epoch": 1228.6842105263158, "grad_norm": 1.071915626525879, "learning_rate": 0.0001, "loss": 0.0077, "step": 186760 }, { "epoch": 1228.75, "grad_norm": 1.0385130643844604, "learning_rate": 0.0001, "loss": 0.0086, "step": 186770 }, { "epoch": 1228.8157894736842, "grad_norm": 1.3961035013198853, "learning_rate": 0.0001, "loss": 0.009, "step": 186780 }, { "epoch": 1228.8815789473683, "grad_norm": 1.3020026683807373, "learning_rate": 0.0001, "loss": 0.0107, "step": 186790 }, { "epoch": 1228.9473684210527, "grad_norm": 1.341375708580017, "learning_rate": 0.0001, "loss": 0.0087, "step": 186800 }, { "epoch": 1229.0131578947369, "grad_norm": 1.4074076414108276, "learning_rate": 0.0001, "loss": 0.0076, "step": 186810 }, { "epoch": 1229.078947368421, "grad_norm": 0.9496380090713501, "learning_rate": 0.0001, "loss": 0.0081, "step": 186820 }, { "epoch": 1229.1447368421052, "grad_norm": 1.0725762844085693, "learning_rate": 0.0001, "loss": 0.0099, "step": 186830 }, { "epoch": 1229.2105263157894, "grad_norm": 0.9692988991737366, "learning_rate": 0.0001, "loss": 0.0091, "step": 186840 }, { "epoch": 1229.2763157894738, "grad_norm": 0.9738875031471252, "learning_rate": 0.0001, "loss": 0.0083, "step": 186850 }, { "epoch": 1229.342105263158, "grad_norm": 1.2129768133163452, "learning_rate": 0.0001, "loss": 0.0103, "step": 186860 }, { "epoch": 1229.407894736842, "grad_norm": 0.7136619091033936, "learning_rate": 0.0001, "loss": 0.0087, "step": 186870 }, { "epoch": 1229.4736842105262, "grad_norm": 1.1828364133834839, "learning_rate": 0.0001, "loss": 0.008, "step": 186880 }, { "epoch": 1229.5394736842106, "grad_norm": 1.1648519039154053, "learning_rate": 0.0001, "loss": 0.0092, "step": 186890 }, { "epoch": 1229.6052631578948, "grad_norm": 0.9487463235855103, "learning_rate": 0.0001, "loss": 0.0066, "step": 186900 }, { "epoch": 1229.671052631579, "grad_norm": 1.2276160717010498, "learning_rate": 0.0001, "loss": 0.0098, "step": 186910 }, { "epoch": 1229.7368421052631, "grad_norm": 0.9763864278793335, "learning_rate": 0.0001, "loss": 0.0084, "step": 186920 }, { "epoch": 1229.8026315789473, "grad_norm": 0.8818470239639282, "learning_rate": 0.0001, "loss": 0.0084, "step": 186930 }, { "epoch": 1229.8684210526317, "grad_norm": 0.7005442380905151, "learning_rate": 0.0001, "loss": 0.0097, "step": 186940 }, { "epoch": 1229.9342105263158, "grad_norm": 0.5166844129562378, "learning_rate": 0.0001, "loss": 0.0102, "step": 186950 }, { "epoch": 1230.0, "grad_norm": 0.9323204755783081, "learning_rate": 0.0001, "loss": 0.0101, "step": 186960 }, { "epoch": 1230.0657894736842, "grad_norm": 1.0536624193191528, "learning_rate": 0.0001, "loss": 0.0078, "step": 186970 }, { "epoch": 1230.1315789473683, "grad_norm": 0.9364405274391174, "learning_rate": 0.0001, "loss": 0.0128, "step": 186980 }, { "epoch": 1230.1973684210527, "grad_norm": 0.9843827486038208, "learning_rate": 0.0001, "loss": 0.0083, "step": 186990 }, { "epoch": 1230.2631578947369, "grad_norm": 1.1612672805786133, "learning_rate": 0.0001, "loss": 0.0073, "step": 187000 }, { "epoch": 1230.328947368421, "grad_norm": 1.0113646984100342, "learning_rate": 0.0001, "loss": 0.0079, "step": 187010 }, { "epoch": 1230.3947368421052, "grad_norm": 0.8987430334091187, "learning_rate": 0.0001, "loss": 0.0083, "step": 187020 }, { "epoch": 1230.4605263157894, "grad_norm": 1.0051621198654175, "learning_rate": 0.0001, "loss": 0.0103, "step": 187030 }, { "epoch": 1230.5263157894738, "grad_norm": 1.093727469444275, "learning_rate": 0.0001, "loss": 0.009, "step": 187040 }, { "epoch": 1230.592105263158, "grad_norm": 0.9883322715759277, "learning_rate": 0.0001, "loss": 0.0076, "step": 187050 }, { "epoch": 1230.657894736842, "grad_norm": 0.8294075131416321, "learning_rate": 0.0001, "loss": 0.0081, "step": 187060 }, { "epoch": 1230.7236842105262, "grad_norm": 0.8644272685050964, "learning_rate": 0.0001, "loss": 0.0105, "step": 187070 }, { "epoch": 1230.7894736842106, "grad_norm": 0.7558962106704712, "learning_rate": 0.0001, "loss": 0.0093, "step": 187080 }, { "epoch": 1230.8552631578948, "grad_norm": 0.6551307439804077, "learning_rate": 0.0001, "loss": 0.01, "step": 187090 }, { "epoch": 1230.921052631579, "grad_norm": 0.8390691876411438, "learning_rate": 0.0001, "loss": 0.009, "step": 187100 }, { "epoch": 1230.9868421052631, "grad_norm": 1.2046642303466797, "learning_rate": 0.0001, "loss": 0.0068, "step": 187110 }, { "epoch": 1231.0526315789473, "grad_norm": 0.6695582866668701, "learning_rate": 0.0001, "loss": 0.0086, "step": 187120 }, { "epoch": 1231.1184210526317, "grad_norm": 0.7703352570533752, "learning_rate": 0.0001, "loss": 0.0076, "step": 187130 }, { "epoch": 1231.1842105263158, "grad_norm": 0.9928059577941895, "learning_rate": 0.0001, "loss": 0.0091, "step": 187140 }, { "epoch": 1231.25, "grad_norm": 0.8067262768745422, "learning_rate": 0.0001, "loss": 0.0102, "step": 187150 }, { "epoch": 1231.3157894736842, "grad_norm": 0.8623690009117126, "learning_rate": 0.0001, "loss": 0.0093, "step": 187160 }, { "epoch": 1231.3815789473683, "grad_norm": 0.8938329219818115, "learning_rate": 0.0001, "loss": 0.0082, "step": 187170 }, { "epoch": 1231.4473684210527, "grad_norm": 1.010252594947815, "learning_rate": 0.0001, "loss": 0.01, "step": 187180 }, { "epoch": 1231.5131578947369, "grad_norm": 1.1144150495529175, "learning_rate": 0.0001, "loss": 0.0081, "step": 187190 }, { "epoch": 1231.578947368421, "grad_norm": 0.7431076765060425, "learning_rate": 0.0001, "loss": 0.0096, "step": 187200 }, { "epoch": 1231.6447368421052, "grad_norm": 0.8599720597267151, "learning_rate": 0.0001, "loss": 0.0112, "step": 187210 }, { "epoch": 1231.7105263157894, "grad_norm": 1.280928134918213, "learning_rate": 0.0001, "loss": 0.0114, "step": 187220 }, { "epoch": 1231.7763157894738, "grad_norm": 0.8571041822433472, "learning_rate": 0.0001, "loss": 0.0091, "step": 187230 }, { "epoch": 1231.842105263158, "grad_norm": 0.6886258125305176, "learning_rate": 0.0001, "loss": 0.0096, "step": 187240 }, { "epoch": 1231.907894736842, "grad_norm": 0.9688119888305664, "learning_rate": 0.0001, "loss": 0.0081, "step": 187250 }, { "epoch": 1231.9736842105262, "grad_norm": 1.306778907775879, "learning_rate": 0.0001, "loss": 0.0082, "step": 187260 }, { "epoch": 1232.0394736842106, "grad_norm": 1.23270845413208, "learning_rate": 0.0001, "loss": 0.0093, "step": 187270 }, { "epoch": 1232.1052631578948, "grad_norm": 1.2252572774887085, "learning_rate": 0.0001, "loss": 0.0085, "step": 187280 }, { "epoch": 1232.171052631579, "grad_norm": 0.9559009075164795, "learning_rate": 0.0001, "loss": 0.0086, "step": 187290 }, { "epoch": 1232.2368421052631, "grad_norm": 0.895037829875946, "learning_rate": 0.0001, "loss": 0.0079, "step": 187300 }, { "epoch": 1232.3026315789473, "grad_norm": 1.1871989965438843, "learning_rate": 0.0001, "loss": 0.01, "step": 187310 }, { "epoch": 1232.3684210526317, "grad_norm": 1.0235185623168945, "learning_rate": 0.0001, "loss": 0.0078, "step": 187320 }, { "epoch": 1232.4342105263158, "grad_norm": 0.5289400815963745, "learning_rate": 0.0001, "loss": 0.0096, "step": 187330 }, { "epoch": 1232.5, "grad_norm": 1.077367901802063, "learning_rate": 0.0001, "loss": 0.0098, "step": 187340 }, { "epoch": 1232.5657894736842, "grad_norm": 1.0103833675384521, "learning_rate": 0.0001, "loss": 0.0082, "step": 187350 }, { "epoch": 1232.6315789473683, "grad_norm": 1.0976219177246094, "learning_rate": 0.0001, "loss": 0.0087, "step": 187360 }, { "epoch": 1232.6973684210527, "grad_norm": 0.8010794520378113, "learning_rate": 0.0001, "loss": 0.0079, "step": 187370 }, { "epoch": 1232.7631578947369, "grad_norm": 0.8650757074356079, "learning_rate": 0.0001, "loss": 0.0112, "step": 187380 }, { "epoch": 1232.828947368421, "grad_norm": 0.9312655329704285, "learning_rate": 0.0001, "loss": 0.0086, "step": 187390 }, { "epoch": 1232.8947368421052, "grad_norm": 0.9070029854774475, "learning_rate": 0.0001, "loss": 0.0099, "step": 187400 }, { "epoch": 1232.9605263157894, "grad_norm": 0.7533953189849854, "learning_rate": 0.0001, "loss": 0.0091, "step": 187410 }, { "epoch": 1233.0263157894738, "grad_norm": 0.9384796023368835, "learning_rate": 0.0001, "loss": 0.0088, "step": 187420 }, { "epoch": 1233.092105263158, "grad_norm": 1.097226858139038, "learning_rate": 0.0001, "loss": 0.0093, "step": 187430 }, { "epoch": 1233.157894736842, "grad_norm": 1.156470537185669, "learning_rate": 0.0001, "loss": 0.0082, "step": 187440 }, { "epoch": 1233.2236842105262, "grad_norm": 0.7621616125106812, "learning_rate": 0.0001, "loss": 0.0087, "step": 187450 }, { "epoch": 1233.2894736842106, "grad_norm": 1.3856050968170166, "learning_rate": 0.0001, "loss": 0.0084, "step": 187460 }, { "epoch": 1233.3552631578948, "grad_norm": 0.9687317609786987, "learning_rate": 0.0001, "loss": 0.0092, "step": 187470 }, { "epoch": 1233.421052631579, "grad_norm": 0.9934854507446289, "learning_rate": 0.0001, "loss": 0.0103, "step": 187480 }, { "epoch": 1233.4868421052631, "grad_norm": 1.1821582317352295, "learning_rate": 0.0001, "loss": 0.0084, "step": 187490 }, { "epoch": 1233.5526315789473, "grad_norm": 0.8907933235168457, "learning_rate": 0.0001, "loss": 0.01, "step": 187500 }, { "epoch": 1233.6184210526317, "grad_norm": 0.7803938388824463, "learning_rate": 0.0001, "loss": 0.0086, "step": 187510 }, { "epoch": 1233.6842105263158, "grad_norm": 0.8478217124938965, "learning_rate": 0.0001, "loss": 0.0087, "step": 187520 }, { "epoch": 1233.75, "grad_norm": 0.9070845246315002, "learning_rate": 0.0001, "loss": 0.0128, "step": 187530 }, { "epoch": 1233.8157894736842, "grad_norm": 0.7729270458221436, "learning_rate": 0.0001, "loss": 0.01, "step": 187540 }, { "epoch": 1233.8815789473683, "grad_norm": 0.689509391784668, "learning_rate": 0.0001, "loss": 0.0072, "step": 187550 }, { "epoch": 1233.9473684210527, "grad_norm": 1.0114260911941528, "learning_rate": 0.0001, "loss": 0.0089, "step": 187560 }, { "epoch": 1234.0131578947369, "grad_norm": 0.8790208697319031, "learning_rate": 0.0001, "loss": 0.0072, "step": 187570 }, { "epoch": 1234.078947368421, "grad_norm": 0.7868586182594299, "learning_rate": 0.0001, "loss": 0.0088, "step": 187580 }, { "epoch": 1234.1447368421052, "grad_norm": 1.1828399896621704, "learning_rate": 0.0001, "loss": 0.0103, "step": 187590 }, { "epoch": 1234.2105263157894, "grad_norm": 2.568375825881958, "learning_rate": 0.0001, "loss": 0.0089, "step": 187600 }, { "epoch": 1234.2763157894738, "grad_norm": 1.1999150514602661, "learning_rate": 0.0001, "loss": 0.0092, "step": 187610 }, { "epoch": 1234.342105263158, "grad_norm": 1.172085165977478, "learning_rate": 0.0001, "loss": 0.0089, "step": 187620 }, { "epoch": 1234.407894736842, "grad_norm": 1.260359287261963, "learning_rate": 0.0001, "loss": 0.0085, "step": 187630 }, { "epoch": 1234.4736842105262, "grad_norm": 0.9659546613693237, "learning_rate": 0.0001, "loss": 0.0081, "step": 187640 }, { "epoch": 1234.5394736842106, "grad_norm": 1.1871107816696167, "learning_rate": 0.0001, "loss": 0.0109, "step": 187650 }, { "epoch": 1234.6052631578948, "grad_norm": 0.8396361470222473, "learning_rate": 0.0001, "loss": 0.0094, "step": 187660 }, { "epoch": 1234.671052631579, "grad_norm": 0.862738311290741, "learning_rate": 0.0001, "loss": 0.0088, "step": 187670 }, { "epoch": 1234.7368421052631, "grad_norm": 0.8556384444236755, "learning_rate": 0.0001, "loss": 0.0088, "step": 187680 }, { "epoch": 1234.8026315789473, "grad_norm": 1.10312819480896, "learning_rate": 0.0001, "loss": 0.0092, "step": 187690 }, { "epoch": 1234.8684210526317, "grad_norm": 0.9752662777900696, "learning_rate": 0.0001, "loss": 0.0087, "step": 187700 }, { "epoch": 1234.9342105263158, "grad_norm": 0.7204729914665222, "learning_rate": 0.0001, "loss": 0.009, "step": 187710 }, { "epoch": 1235.0, "grad_norm": 0.7936938405036926, "learning_rate": 0.0001, "loss": 0.0084, "step": 187720 }, { "epoch": 1235.0657894736842, "grad_norm": 0.8540555238723755, "learning_rate": 0.0001, "loss": 0.0091, "step": 187730 }, { "epoch": 1235.1315789473683, "grad_norm": 1.0517683029174805, "learning_rate": 0.0001, "loss": 0.0073, "step": 187740 }, { "epoch": 1235.1973684210527, "grad_norm": 0.9040259718894958, "learning_rate": 0.0001, "loss": 0.0096, "step": 187750 }, { "epoch": 1235.2631578947369, "grad_norm": 1.0970168113708496, "learning_rate": 0.0001, "loss": 0.0092, "step": 187760 }, { "epoch": 1235.328947368421, "grad_norm": 0.7497133016586304, "learning_rate": 0.0001, "loss": 0.0089, "step": 187770 }, { "epoch": 1235.3947368421052, "grad_norm": 1.210512638092041, "learning_rate": 0.0001, "loss": 0.0087, "step": 187780 }, { "epoch": 1235.4605263157894, "grad_norm": 1.1530228853225708, "learning_rate": 0.0001, "loss": 0.009, "step": 187790 }, { "epoch": 1235.5263157894738, "grad_norm": 1.0944901704788208, "learning_rate": 0.0001, "loss": 0.0094, "step": 187800 }, { "epoch": 1235.592105263158, "grad_norm": 0.8939099311828613, "learning_rate": 0.0001, "loss": 0.0104, "step": 187810 }, { "epoch": 1235.657894736842, "grad_norm": 0.756377100944519, "learning_rate": 0.0001, "loss": 0.0083, "step": 187820 }, { "epoch": 1235.7236842105262, "grad_norm": 0.9245010614395142, "learning_rate": 0.0001, "loss": 0.0089, "step": 187830 }, { "epoch": 1235.7894736842106, "grad_norm": 1.1319273710250854, "learning_rate": 0.0001, "loss": 0.007, "step": 187840 }, { "epoch": 1235.8552631578948, "grad_norm": 1.1174309253692627, "learning_rate": 0.0001, "loss": 0.0078, "step": 187850 }, { "epoch": 1235.921052631579, "grad_norm": 0.8425665497779846, "learning_rate": 0.0001, "loss": 0.0114, "step": 187860 }, { "epoch": 1235.9868421052631, "grad_norm": 0.9204140901565552, "learning_rate": 0.0001, "loss": 0.0084, "step": 187870 }, { "epoch": 1236.0526315789473, "grad_norm": 0.7384737730026245, "learning_rate": 0.0001, "loss": 0.0072, "step": 187880 }, { "epoch": 1236.1184210526317, "grad_norm": 1.1339808702468872, "learning_rate": 0.0001, "loss": 0.0097, "step": 187890 }, { "epoch": 1236.1842105263158, "grad_norm": 1.197279453277588, "learning_rate": 0.0001, "loss": 0.0101, "step": 187900 }, { "epoch": 1236.25, "grad_norm": 0.8099234700202942, "learning_rate": 0.0001, "loss": 0.0077, "step": 187910 }, { "epoch": 1236.3157894736842, "grad_norm": 0.9687013626098633, "learning_rate": 0.0001, "loss": 0.01, "step": 187920 }, { "epoch": 1236.3815789473683, "grad_norm": 0.9288181066513062, "learning_rate": 0.0001, "loss": 0.0088, "step": 187930 }, { "epoch": 1236.4473684210527, "grad_norm": 1.2456539869308472, "learning_rate": 0.0001, "loss": 0.0071, "step": 187940 }, { "epoch": 1236.5131578947369, "grad_norm": 1.1154669523239136, "learning_rate": 0.0001, "loss": 0.0112, "step": 187950 }, { "epoch": 1236.578947368421, "grad_norm": 1.3738938570022583, "learning_rate": 0.0001, "loss": 0.0081, "step": 187960 }, { "epoch": 1236.6447368421052, "grad_norm": 1.0610955953598022, "learning_rate": 0.0001, "loss": 0.0103, "step": 187970 }, { "epoch": 1236.7105263157894, "grad_norm": 1.108542799949646, "learning_rate": 0.0001, "loss": 0.0117, "step": 187980 }, { "epoch": 1236.7763157894738, "grad_norm": 0.896397054195404, "learning_rate": 0.0001, "loss": 0.0089, "step": 187990 }, { "epoch": 1236.842105263158, "grad_norm": 0.7802370190620422, "learning_rate": 0.0001, "loss": 0.0091, "step": 188000 }, { "epoch": 1236.907894736842, "grad_norm": 0.5918530225753784, "learning_rate": 0.0001, "loss": 0.0084, "step": 188010 }, { "epoch": 1236.9736842105262, "grad_norm": 1.0109541416168213, "learning_rate": 0.0001, "loss": 0.0078, "step": 188020 }, { "epoch": 1237.0394736842106, "grad_norm": 0.9148463010787964, "learning_rate": 0.0001, "loss": 0.0108, "step": 188030 }, { "epoch": 1237.1052631578948, "grad_norm": 0.9371543526649475, "learning_rate": 0.0001, "loss": 0.0085, "step": 188040 }, { "epoch": 1237.171052631579, "grad_norm": 0.9670152068138123, "learning_rate": 0.0001, "loss": 0.0074, "step": 188050 }, { "epoch": 1237.2368421052631, "grad_norm": 0.973000705242157, "learning_rate": 0.0001, "loss": 0.0085, "step": 188060 }, { "epoch": 1237.3026315789473, "grad_norm": 0.9884973764419556, "learning_rate": 0.0001, "loss": 0.0091, "step": 188070 }, { "epoch": 1237.3684210526317, "grad_norm": 1.0578480958938599, "learning_rate": 0.0001, "loss": 0.0094, "step": 188080 }, { "epoch": 1237.4342105263158, "grad_norm": 0.9570189118385315, "learning_rate": 0.0001, "loss": 0.0089, "step": 188090 }, { "epoch": 1237.5, "grad_norm": 1.118773102760315, "learning_rate": 0.0001, "loss": 0.009, "step": 188100 }, { "epoch": 1237.5657894736842, "grad_norm": 1.1802111864089966, "learning_rate": 0.0001, "loss": 0.0093, "step": 188110 }, { "epoch": 1237.6315789473683, "grad_norm": 0.8796427845954895, "learning_rate": 0.0001, "loss": 0.008, "step": 188120 }, { "epoch": 1237.6973684210527, "grad_norm": 1.2683212757110596, "learning_rate": 0.0001, "loss": 0.011, "step": 188130 }, { "epoch": 1237.7631578947369, "grad_norm": 1.0439941883087158, "learning_rate": 0.0001, "loss": 0.0113, "step": 188140 }, { "epoch": 1237.828947368421, "grad_norm": 0.969838559627533, "learning_rate": 0.0001, "loss": 0.0093, "step": 188150 }, { "epoch": 1237.8947368421052, "grad_norm": 1.0440396070480347, "learning_rate": 0.0001, "loss": 0.0087, "step": 188160 }, { "epoch": 1237.9605263157894, "grad_norm": 1.1314268112182617, "learning_rate": 0.0001, "loss": 0.0082, "step": 188170 }, { "epoch": 1238.0263157894738, "grad_norm": 1.2484370470046997, "learning_rate": 0.0001, "loss": 0.0092, "step": 188180 }, { "epoch": 1238.092105263158, "grad_norm": 0.7492143511772156, "learning_rate": 0.0001, "loss": 0.0096, "step": 188190 }, { "epoch": 1238.157894736842, "grad_norm": 1.2526135444641113, "learning_rate": 0.0001, "loss": 0.0084, "step": 188200 }, { "epoch": 1238.2236842105262, "grad_norm": 1.2862526178359985, "learning_rate": 0.0001, "loss": 0.0104, "step": 188210 }, { "epoch": 1238.2894736842106, "grad_norm": 1.1841567754745483, "learning_rate": 0.0001, "loss": 0.0097, "step": 188220 }, { "epoch": 1238.3552631578948, "grad_norm": 1.1451932191848755, "learning_rate": 0.0001, "loss": 0.0076, "step": 188230 }, { "epoch": 1238.421052631579, "grad_norm": 0.9696784019470215, "learning_rate": 0.0001, "loss": 0.0076, "step": 188240 }, { "epoch": 1238.4868421052631, "grad_norm": 1.0971126556396484, "learning_rate": 0.0001, "loss": 0.0083, "step": 188250 }, { "epoch": 1238.5526315789473, "grad_norm": 0.7748174071311951, "learning_rate": 0.0001, "loss": 0.0101, "step": 188260 }, { "epoch": 1238.6184210526317, "grad_norm": 0.851367712020874, "learning_rate": 0.0001, "loss": 0.0103, "step": 188270 }, { "epoch": 1238.6842105263158, "grad_norm": 0.7852613925933838, "learning_rate": 0.0001, "loss": 0.0076, "step": 188280 }, { "epoch": 1238.75, "grad_norm": 0.9901844263076782, "learning_rate": 0.0001, "loss": 0.0103, "step": 188290 }, { "epoch": 1238.8157894736842, "grad_norm": 1.0415254831314087, "learning_rate": 0.0001, "loss": 0.0099, "step": 188300 }, { "epoch": 1238.8815789473683, "grad_norm": 1.2643136978149414, "learning_rate": 0.0001, "loss": 0.0095, "step": 188310 }, { "epoch": 1238.9473684210527, "grad_norm": 1.2420594692230225, "learning_rate": 0.0001, "loss": 0.0102, "step": 188320 }, { "epoch": 1239.0131578947369, "grad_norm": 0.9030695557594299, "learning_rate": 0.0001, "loss": 0.0111, "step": 188330 }, { "epoch": 1239.078947368421, "grad_norm": 0.8474811911582947, "learning_rate": 0.0001, "loss": 0.0083, "step": 188340 }, { "epoch": 1239.1447368421052, "grad_norm": 0.9705641865730286, "learning_rate": 0.0001, "loss": 0.0096, "step": 188350 }, { "epoch": 1239.2105263157894, "grad_norm": 0.9619945883750916, "learning_rate": 0.0001, "loss": 0.0091, "step": 188360 }, { "epoch": 1239.2763157894738, "grad_norm": 1.1420098543167114, "learning_rate": 0.0001, "loss": 0.0094, "step": 188370 }, { "epoch": 1239.342105263158, "grad_norm": 1.0340029001235962, "learning_rate": 0.0001, "loss": 0.0095, "step": 188380 }, { "epoch": 1239.407894736842, "grad_norm": 0.9889524579048157, "learning_rate": 0.0001, "loss": 0.0102, "step": 188390 }, { "epoch": 1239.4736842105262, "grad_norm": 1.768046259880066, "learning_rate": 0.0001, "loss": 0.0093, "step": 188400 }, { "epoch": 1239.5394736842106, "grad_norm": 1.4465337991714478, "learning_rate": 0.0001, "loss": 0.0115, "step": 188410 }, { "epoch": 1239.6052631578948, "grad_norm": 1.0993311405181885, "learning_rate": 0.0001, "loss": 0.0102, "step": 188420 }, { "epoch": 1239.671052631579, "grad_norm": 1.098059892654419, "learning_rate": 0.0001, "loss": 0.0093, "step": 188430 }, { "epoch": 1239.7368421052631, "grad_norm": 1.0775386095046997, "learning_rate": 0.0001, "loss": 0.008, "step": 188440 }, { "epoch": 1239.8026315789473, "grad_norm": 0.6998803019523621, "learning_rate": 0.0001, "loss": 0.0093, "step": 188450 }, { "epoch": 1239.8684210526317, "grad_norm": 0.9273382425308228, "learning_rate": 0.0001, "loss": 0.0086, "step": 188460 }, { "epoch": 1239.9342105263158, "grad_norm": 1.2026686668395996, "learning_rate": 0.0001, "loss": 0.0088, "step": 188470 }, { "epoch": 1240.0, "grad_norm": 0.9668415784835815, "learning_rate": 0.0001, "loss": 0.0087, "step": 188480 }, { "epoch": 1240.0657894736842, "grad_norm": 1.1113210916519165, "learning_rate": 0.0001, "loss": 0.0086, "step": 188490 }, { "epoch": 1240.1315789473683, "grad_norm": 1.2677863836288452, "learning_rate": 0.0001, "loss": 0.0092, "step": 188500 }, { "epoch": 1240.1973684210527, "grad_norm": 1.4234356880187988, "learning_rate": 0.0001, "loss": 0.0098, "step": 188510 }, { "epoch": 1240.2631578947369, "grad_norm": 1.1672581434249878, "learning_rate": 0.0001, "loss": 0.0091, "step": 188520 }, { "epoch": 1240.328947368421, "grad_norm": 1.075741171836853, "learning_rate": 0.0001, "loss": 0.0112, "step": 188530 }, { "epoch": 1240.3947368421052, "grad_norm": 0.8153349161148071, "learning_rate": 0.0001, "loss": 0.0099, "step": 188540 }, { "epoch": 1240.4605263157894, "grad_norm": 1.0249247550964355, "learning_rate": 0.0001, "loss": 0.0088, "step": 188550 }, { "epoch": 1240.5263157894738, "grad_norm": 1.0031867027282715, "learning_rate": 0.0001, "loss": 0.0094, "step": 188560 }, { "epoch": 1240.592105263158, "grad_norm": 1.103637933731079, "learning_rate": 0.0001, "loss": 0.0082, "step": 188570 }, { "epoch": 1240.657894736842, "grad_norm": 1.0994689464569092, "learning_rate": 0.0001, "loss": 0.0101, "step": 188580 }, { "epoch": 1240.7236842105262, "grad_norm": 1.0447285175323486, "learning_rate": 0.0001, "loss": 0.0094, "step": 188590 }, { "epoch": 1240.7894736842106, "grad_norm": 0.9495458006858826, "learning_rate": 0.0001, "loss": 0.0082, "step": 188600 }, { "epoch": 1240.8552631578948, "grad_norm": 0.9653858542442322, "learning_rate": 0.0001, "loss": 0.0085, "step": 188610 }, { "epoch": 1240.921052631579, "grad_norm": 1.0502097606658936, "learning_rate": 0.0001, "loss": 0.0094, "step": 188620 }, { "epoch": 1240.9868421052631, "grad_norm": 0.9432629942893982, "learning_rate": 0.0001, "loss": 0.0085, "step": 188630 }, { "epoch": 1241.0526315789473, "grad_norm": 0.9657227396965027, "learning_rate": 0.0001, "loss": 0.009, "step": 188640 }, { "epoch": 1241.1184210526317, "grad_norm": 0.8866987228393555, "learning_rate": 0.0001, "loss": 0.0089, "step": 188650 }, { "epoch": 1241.1842105263158, "grad_norm": 0.9814696907997131, "learning_rate": 0.0001, "loss": 0.0096, "step": 188660 }, { "epoch": 1241.25, "grad_norm": 1.1600935459136963, "learning_rate": 0.0001, "loss": 0.0075, "step": 188670 }, { "epoch": 1241.3157894736842, "grad_norm": 1.2622950077056885, "learning_rate": 0.0001, "loss": 0.0077, "step": 188680 }, { "epoch": 1241.3815789473683, "grad_norm": 0.7318976521492004, "learning_rate": 0.0001, "loss": 0.0082, "step": 188690 }, { "epoch": 1241.4473684210527, "grad_norm": 1.3622748851776123, "learning_rate": 0.0001, "loss": 0.0101, "step": 188700 }, { "epoch": 1241.5131578947369, "grad_norm": 1.0387088060379028, "learning_rate": 0.0001, "loss": 0.01, "step": 188710 }, { "epoch": 1241.578947368421, "grad_norm": 0.8503026366233826, "learning_rate": 0.0001, "loss": 0.0096, "step": 188720 }, { "epoch": 1241.6447368421052, "grad_norm": 0.909067690372467, "learning_rate": 0.0001, "loss": 0.0107, "step": 188730 }, { "epoch": 1241.7105263157894, "grad_norm": 0.9165451526641846, "learning_rate": 0.0001, "loss": 0.0089, "step": 188740 }, { "epoch": 1241.7763157894738, "grad_norm": 0.7640308737754822, "learning_rate": 0.0001, "loss": 0.0088, "step": 188750 }, { "epoch": 1241.842105263158, "grad_norm": 0.9033883810043335, "learning_rate": 0.0001, "loss": 0.0088, "step": 188760 }, { "epoch": 1241.907894736842, "grad_norm": 1.0701746940612793, "learning_rate": 0.0001, "loss": 0.0084, "step": 188770 }, { "epoch": 1241.9736842105262, "grad_norm": 0.9798920154571533, "learning_rate": 0.0001, "loss": 0.009, "step": 188780 }, { "epoch": 1242.0394736842106, "grad_norm": 1.3275381326675415, "learning_rate": 0.0001, "loss": 0.0097, "step": 188790 }, { "epoch": 1242.1052631578948, "grad_norm": 1.1964999437332153, "learning_rate": 0.0001, "loss": 0.0084, "step": 188800 }, { "epoch": 1242.171052631579, "grad_norm": 1.0140482187271118, "learning_rate": 0.0001, "loss": 0.0092, "step": 188810 }, { "epoch": 1242.2368421052631, "grad_norm": 0.8349454998970032, "learning_rate": 0.0001, "loss": 0.0101, "step": 188820 }, { "epoch": 1242.3026315789473, "grad_norm": 1.2491860389709473, "learning_rate": 0.0001, "loss": 0.0092, "step": 188830 }, { "epoch": 1242.3684210526317, "grad_norm": 1.1465342044830322, "learning_rate": 0.0001, "loss": 0.0102, "step": 188840 }, { "epoch": 1242.4342105263158, "grad_norm": 0.7169458866119385, "learning_rate": 0.0001, "loss": 0.007, "step": 188850 }, { "epoch": 1242.5, "grad_norm": 0.887615978717804, "learning_rate": 0.0001, "loss": 0.0103, "step": 188860 }, { "epoch": 1242.5657894736842, "grad_norm": 1.243327260017395, "learning_rate": 0.0001, "loss": 0.0084, "step": 188870 }, { "epoch": 1242.6315789473683, "grad_norm": 1.3320894241333008, "learning_rate": 0.0001, "loss": 0.0096, "step": 188880 }, { "epoch": 1242.6973684210527, "grad_norm": 1.0858453512191772, "learning_rate": 0.0001, "loss": 0.0097, "step": 188890 }, { "epoch": 1242.7631578947369, "grad_norm": 1.123133659362793, "learning_rate": 0.0001, "loss": 0.0098, "step": 188900 }, { "epoch": 1242.828947368421, "grad_norm": 0.7207273840904236, "learning_rate": 0.0001, "loss": 0.0092, "step": 188910 }, { "epoch": 1242.8947368421052, "grad_norm": 0.6698071956634521, "learning_rate": 0.0001, "loss": 0.0095, "step": 188920 }, { "epoch": 1242.9605263157894, "grad_norm": 1.2865219116210938, "learning_rate": 0.0001, "loss": 0.0071, "step": 188930 }, { "epoch": 1243.0263157894738, "grad_norm": 0.8424243927001953, "learning_rate": 0.0001, "loss": 0.0083, "step": 188940 }, { "epoch": 1243.092105263158, "grad_norm": 1.2220584154129028, "learning_rate": 0.0001, "loss": 0.0077, "step": 188950 }, { "epoch": 1243.157894736842, "grad_norm": 1.234723687171936, "learning_rate": 0.0001, "loss": 0.0105, "step": 188960 }, { "epoch": 1243.2236842105262, "grad_norm": 1.3230215311050415, "learning_rate": 0.0001, "loss": 0.0085, "step": 188970 }, { "epoch": 1243.2894736842106, "grad_norm": 1.1419968605041504, "learning_rate": 0.0001, "loss": 0.009, "step": 188980 }, { "epoch": 1243.3552631578948, "grad_norm": 1.2557247877120972, "learning_rate": 0.0001, "loss": 0.0085, "step": 188990 }, { "epoch": 1243.421052631579, "grad_norm": 1.0815895795822144, "learning_rate": 0.0001, "loss": 0.0087, "step": 189000 }, { "epoch": 1243.4868421052631, "grad_norm": 0.997104823589325, "learning_rate": 0.0001, "loss": 0.0076, "step": 189010 }, { "epoch": 1243.5526315789473, "grad_norm": 0.776548445224762, "learning_rate": 0.0001, "loss": 0.0082, "step": 189020 }, { "epoch": 1243.6184210526317, "grad_norm": 0.8415836691856384, "learning_rate": 0.0001, "loss": 0.0101, "step": 189030 }, { "epoch": 1243.6842105263158, "grad_norm": 0.7960478663444519, "learning_rate": 0.0001, "loss": 0.0098, "step": 189040 }, { "epoch": 1243.75, "grad_norm": 0.7596374750137329, "learning_rate": 0.0001, "loss": 0.0098, "step": 189050 }, { "epoch": 1243.8157894736842, "grad_norm": 1.2631161212921143, "learning_rate": 0.0001, "loss": 0.0087, "step": 189060 }, { "epoch": 1243.8815789473683, "grad_norm": 0.959852397441864, "learning_rate": 0.0001, "loss": 0.0102, "step": 189070 }, { "epoch": 1243.9473684210527, "grad_norm": 0.6801808476448059, "learning_rate": 0.0001, "loss": 0.009, "step": 189080 }, { "epoch": 1244.0131578947369, "grad_norm": 1.1035255193710327, "learning_rate": 0.0001, "loss": 0.0088, "step": 189090 }, { "epoch": 1244.078947368421, "grad_norm": 0.7732435464859009, "learning_rate": 0.0001, "loss": 0.0093, "step": 189100 }, { "epoch": 1244.1447368421052, "grad_norm": 1.119423747062683, "learning_rate": 0.0001, "loss": 0.0086, "step": 189110 }, { "epoch": 1244.2105263157894, "grad_norm": 1.1290099620819092, "learning_rate": 0.0001, "loss": 0.0087, "step": 189120 }, { "epoch": 1244.2763157894738, "grad_norm": 0.9945160150527954, "learning_rate": 0.0001, "loss": 0.0082, "step": 189130 }, { "epoch": 1244.342105263158, "grad_norm": 1.1095064878463745, "learning_rate": 0.0001, "loss": 0.0104, "step": 189140 }, { "epoch": 1244.407894736842, "grad_norm": 0.930774450302124, "learning_rate": 0.0001, "loss": 0.0086, "step": 189150 }, { "epoch": 1244.4736842105262, "grad_norm": 0.7640677094459534, "learning_rate": 0.0001, "loss": 0.0111, "step": 189160 }, { "epoch": 1244.5394736842106, "grad_norm": 0.8537974953651428, "learning_rate": 0.0001, "loss": 0.0088, "step": 189170 }, { "epoch": 1244.6052631578948, "grad_norm": 0.8385094404220581, "learning_rate": 0.0001, "loss": 0.0071, "step": 189180 }, { "epoch": 1244.671052631579, "grad_norm": 0.8964349627494812, "learning_rate": 0.0001, "loss": 0.0097, "step": 189190 }, { "epoch": 1244.7368421052631, "grad_norm": 0.9953314065933228, "learning_rate": 0.0001, "loss": 0.0114, "step": 189200 }, { "epoch": 1244.8026315789473, "grad_norm": 1.099317193031311, "learning_rate": 0.0001, "loss": 0.0093, "step": 189210 }, { "epoch": 1244.8684210526317, "grad_norm": 1.2956123352050781, "learning_rate": 0.0001, "loss": 0.0081, "step": 189220 }, { "epoch": 1244.9342105263158, "grad_norm": 1.0341782569885254, "learning_rate": 0.0001, "loss": 0.009, "step": 189230 }, { "epoch": 1245.0, "grad_norm": 1.3250845670700073, "learning_rate": 0.0001, "loss": 0.0096, "step": 189240 }, { "epoch": 1245.0657894736842, "grad_norm": 0.8931382298469543, "learning_rate": 0.0001, "loss": 0.0099, "step": 189250 }, { "epoch": 1245.1315789473683, "grad_norm": 0.8672465682029724, "learning_rate": 0.0001, "loss": 0.008, "step": 189260 }, { "epoch": 1245.1973684210527, "grad_norm": 1.1268553733825684, "learning_rate": 0.0001, "loss": 0.009, "step": 189270 }, { "epoch": 1245.2631578947369, "grad_norm": 0.9678467512130737, "learning_rate": 0.0001, "loss": 0.0105, "step": 189280 }, { "epoch": 1245.328947368421, "grad_norm": 1.09657883644104, "learning_rate": 0.0001, "loss": 0.0081, "step": 189290 }, { "epoch": 1245.3947368421052, "grad_norm": 1.1582295894622803, "learning_rate": 0.0001, "loss": 0.0093, "step": 189300 }, { "epoch": 1245.4605263157894, "grad_norm": 0.817107617855072, "learning_rate": 0.0001, "loss": 0.0083, "step": 189310 }, { "epoch": 1245.5263157894738, "grad_norm": 1.2293273210525513, "learning_rate": 0.0001, "loss": 0.0082, "step": 189320 }, { "epoch": 1245.592105263158, "grad_norm": 0.8553192019462585, "learning_rate": 0.0001, "loss": 0.0079, "step": 189330 }, { "epoch": 1245.657894736842, "grad_norm": 1.2988234758377075, "learning_rate": 0.0001, "loss": 0.0085, "step": 189340 }, { "epoch": 1245.7236842105262, "grad_norm": 1.1800487041473389, "learning_rate": 0.0001, "loss": 0.009, "step": 189350 }, { "epoch": 1245.7894736842106, "grad_norm": 1.1548720598220825, "learning_rate": 0.0001, "loss": 0.0103, "step": 189360 }, { "epoch": 1245.8552631578948, "grad_norm": 1.1566232442855835, "learning_rate": 0.0001, "loss": 0.0086, "step": 189370 }, { "epoch": 1245.921052631579, "grad_norm": 1.2034006118774414, "learning_rate": 0.0001, "loss": 0.0115, "step": 189380 }, { "epoch": 1245.9868421052631, "grad_norm": 1.0221554040908813, "learning_rate": 0.0001, "loss": 0.009, "step": 189390 }, { "epoch": 1246.0526315789473, "grad_norm": 0.8397990465164185, "learning_rate": 0.0001, "loss": 0.0092, "step": 189400 }, { "epoch": 1246.1184210526317, "grad_norm": 1.0938118696212769, "learning_rate": 0.0001, "loss": 0.0079, "step": 189410 }, { "epoch": 1246.1842105263158, "grad_norm": 0.7125457525253296, "learning_rate": 0.0001, "loss": 0.0095, "step": 189420 }, { "epoch": 1246.25, "grad_norm": 1.0024956464767456, "learning_rate": 0.0001, "loss": 0.0085, "step": 189430 }, { "epoch": 1246.3157894736842, "grad_norm": 0.7446456551551819, "learning_rate": 0.0001, "loss": 0.0116, "step": 189440 }, { "epoch": 1246.3815789473683, "grad_norm": 0.9926702380180359, "learning_rate": 0.0001, "loss": 0.0111, "step": 189450 }, { "epoch": 1246.4473684210527, "grad_norm": 1.1048181056976318, "learning_rate": 0.0001, "loss": 0.0105, "step": 189460 }, { "epoch": 1246.5131578947369, "grad_norm": 1.3652530908584595, "learning_rate": 0.0001, "loss": 0.0099, "step": 189470 }, { "epoch": 1246.578947368421, "grad_norm": 1.2224739789962769, "learning_rate": 0.0001, "loss": 0.0108, "step": 189480 }, { "epoch": 1246.6447368421052, "grad_norm": 1.0956324338912964, "learning_rate": 0.0001, "loss": 0.0084, "step": 189490 }, { "epoch": 1246.7105263157894, "grad_norm": 0.8215267658233643, "learning_rate": 0.0001, "loss": 0.0096, "step": 189500 }, { "epoch": 1246.7763157894738, "grad_norm": 1.3271948099136353, "learning_rate": 0.0001, "loss": 0.0111, "step": 189510 }, { "epoch": 1246.842105263158, "grad_norm": 1.0132023096084595, "learning_rate": 0.0001, "loss": 0.0114, "step": 189520 }, { "epoch": 1246.907894736842, "grad_norm": 1.202150821685791, "learning_rate": 0.0001, "loss": 0.0105, "step": 189530 }, { "epoch": 1246.9736842105262, "grad_norm": 1.2758268117904663, "learning_rate": 0.0001, "loss": 0.0098, "step": 189540 }, { "epoch": 1247.0394736842106, "grad_norm": 1.0313605070114136, "learning_rate": 0.0001, "loss": 0.0095, "step": 189550 }, { "epoch": 1247.1052631578948, "grad_norm": 1.0390371084213257, "learning_rate": 0.0001, "loss": 0.0098, "step": 189560 }, { "epoch": 1247.171052631579, "grad_norm": 0.8408495187759399, "learning_rate": 0.0001, "loss": 0.0093, "step": 189570 }, { "epoch": 1247.2368421052631, "grad_norm": 1.059502124786377, "learning_rate": 0.0001, "loss": 0.0111, "step": 189580 }, { "epoch": 1247.3026315789473, "grad_norm": 1.236569881439209, "learning_rate": 0.0001, "loss": 0.0083, "step": 189590 }, { "epoch": 1247.3684210526317, "grad_norm": 1.1000412702560425, "learning_rate": 0.0001, "loss": 0.0114, "step": 189600 }, { "epoch": 1247.4342105263158, "grad_norm": 1.3047072887420654, "learning_rate": 0.0001, "loss": 0.0107, "step": 189610 }, { "epoch": 1247.5, "grad_norm": 1.1245132684707642, "learning_rate": 0.0001, "loss": 0.0091, "step": 189620 }, { "epoch": 1247.5657894736842, "grad_norm": 1.1942509412765503, "learning_rate": 0.0001, "loss": 0.0105, "step": 189630 }, { "epoch": 1247.6315789473683, "grad_norm": 1.323743462562561, "learning_rate": 0.0001, "loss": 0.0113, "step": 189640 }, { "epoch": 1247.6973684210527, "grad_norm": 1.3108394145965576, "learning_rate": 0.0001, "loss": 0.0088, "step": 189650 }, { "epoch": 1247.7631578947369, "grad_norm": 1.1331356763839722, "learning_rate": 0.0001, "loss": 0.0105, "step": 189660 }, { "epoch": 1247.828947368421, "grad_norm": 1.2502070665359497, "learning_rate": 0.0001, "loss": 0.0087, "step": 189670 }, { "epoch": 1247.8947368421052, "grad_norm": 0.7357776165008545, "learning_rate": 0.0001, "loss": 0.0089, "step": 189680 }, { "epoch": 1247.9605263157894, "grad_norm": 1.009076714515686, "learning_rate": 0.0001, "loss": 0.0115, "step": 189690 }, { "epoch": 1248.0263157894738, "grad_norm": 0.9644945859909058, "learning_rate": 0.0001, "loss": 0.0093, "step": 189700 }, { "epoch": 1248.092105263158, "grad_norm": 0.8089728355407715, "learning_rate": 0.0001, "loss": 0.0087, "step": 189710 }, { "epoch": 1248.157894736842, "grad_norm": 1.2572730779647827, "learning_rate": 0.0001, "loss": 0.0083, "step": 189720 }, { "epoch": 1248.2236842105262, "grad_norm": 1.3105838298797607, "learning_rate": 0.0001, "loss": 0.0091, "step": 189730 }, { "epoch": 1248.2894736842106, "grad_norm": 0.7587280869483948, "learning_rate": 0.0001, "loss": 0.0084, "step": 189740 }, { "epoch": 1248.3552631578948, "grad_norm": 0.8644336462020874, "learning_rate": 0.0001, "loss": 0.0081, "step": 189750 }, { "epoch": 1248.421052631579, "grad_norm": 0.8798325657844543, "learning_rate": 0.0001, "loss": 0.0107, "step": 189760 }, { "epoch": 1248.4868421052631, "grad_norm": 1.5717847347259521, "learning_rate": 0.0001, "loss": 0.0106, "step": 189770 }, { "epoch": 1248.5526315789473, "grad_norm": 0.9737314581871033, "learning_rate": 0.0001, "loss": 0.0108, "step": 189780 }, { "epoch": 1248.6184210526317, "grad_norm": 0.884158194065094, "learning_rate": 0.0001, "loss": 0.012, "step": 189790 }, { "epoch": 1248.6842105263158, "grad_norm": 0.8840878009796143, "learning_rate": 0.0001, "loss": 0.0112, "step": 189800 }, { "epoch": 1248.75, "grad_norm": 0.7942262291908264, "learning_rate": 0.0001, "loss": 0.0112, "step": 189810 }, { "epoch": 1248.8157894736842, "grad_norm": 1.0044453144073486, "learning_rate": 0.0001, "loss": 0.0096, "step": 189820 }, { "epoch": 1248.8815789473683, "grad_norm": 1.1081262826919556, "learning_rate": 0.0001, "loss": 0.0111, "step": 189830 }, { "epoch": 1248.9473684210527, "grad_norm": 0.7932543754577637, "learning_rate": 0.0001, "loss": 0.0096, "step": 189840 }, { "epoch": 1249.0131578947369, "grad_norm": 1.0459403991699219, "learning_rate": 0.0001, "loss": 0.0081, "step": 189850 }, { "epoch": 1249.078947368421, "grad_norm": 1.1057350635528564, "learning_rate": 0.0001, "loss": 0.0094, "step": 189860 }, { "epoch": 1249.1447368421052, "grad_norm": 0.8458811640739441, "learning_rate": 0.0001, "loss": 0.01, "step": 189870 }, { "epoch": 1249.2105263157894, "grad_norm": 0.7159251570701599, "learning_rate": 0.0001, "loss": 0.0077, "step": 189880 }, { "epoch": 1249.2763157894738, "grad_norm": 1.037407636642456, "learning_rate": 0.0001, "loss": 0.0109, "step": 189890 }, { "epoch": 1249.342105263158, "grad_norm": 0.9437480568885803, "learning_rate": 0.0001, "loss": 0.0096, "step": 189900 }, { "epoch": 1249.407894736842, "grad_norm": 0.7939618229866028, "learning_rate": 0.0001, "loss": 0.0114, "step": 189910 }, { "epoch": 1249.4736842105262, "grad_norm": 1.138870358467102, "learning_rate": 0.0001, "loss": 0.0077, "step": 189920 }, { "epoch": 1249.5394736842106, "grad_norm": 0.7695747017860413, "learning_rate": 0.0001, "loss": 0.0105, "step": 189930 }, { "epoch": 1249.6052631578948, "grad_norm": 1.0361729860305786, "learning_rate": 0.0001, "loss": 0.0092, "step": 189940 }, { "epoch": 1249.671052631579, "grad_norm": 0.9108820557594299, "learning_rate": 0.0001, "loss": 0.0081, "step": 189950 }, { "epoch": 1249.7368421052631, "grad_norm": 1.085376501083374, "learning_rate": 0.0001, "loss": 0.0103, "step": 189960 }, { "epoch": 1249.8026315789473, "grad_norm": 1.2229604721069336, "learning_rate": 0.0001, "loss": 0.0099, "step": 189970 }, { "epoch": 1249.8684210526317, "grad_norm": 0.9447299242019653, "learning_rate": 0.0001, "loss": 0.0093, "step": 189980 }, { "epoch": 1249.9342105263158, "grad_norm": 0.8941596746444702, "learning_rate": 0.0001, "loss": 0.0087, "step": 189990 }, { "epoch": 1250.0, "grad_norm": 0.8298214673995972, "learning_rate": 0.0001, "loss": 0.0093, "step": 190000 }, { "epoch": 1250.0657894736842, "grad_norm": 0.8514341711997986, "learning_rate": 0.0001, "loss": 0.0097, "step": 190010 }, { "epoch": 1250.1315789473683, "grad_norm": 1.1375254392623901, "learning_rate": 0.0001, "loss": 0.0093, "step": 190020 }, { "epoch": 1250.1973684210527, "grad_norm": 0.9819296598434448, "learning_rate": 0.0001, "loss": 0.0095, "step": 190030 }, { "epoch": 1250.2631578947369, "grad_norm": 0.9162175059318542, "learning_rate": 0.0001, "loss": 0.0094, "step": 190040 }, { "epoch": 1250.328947368421, "grad_norm": 1.0678719282150269, "learning_rate": 0.0001, "loss": 0.0094, "step": 190050 }, { "epoch": 1250.3947368421052, "grad_norm": 1.3395860195159912, "learning_rate": 0.0001, "loss": 0.0123, "step": 190060 }, { "epoch": 1250.4605263157894, "grad_norm": 0.8305793404579163, "learning_rate": 0.0001, "loss": 0.0097, "step": 190070 }, { "epoch": 1250.5263157894738, "grad_norm": 1.182700514793396, "learning_rate": 0.0001, "loss": 0.0091, "step": 190080 }, { "epoch": 1250.592105263158, "grad_norm": 0.9990321397781372, "learning_rate": 0.0001, "loss": 0.012, "step": 190090 }, { "epoch": 1250.657894736842, "grad_norm": 0.9627804756164551, "learning_rate": 0.0001, "loss": 0.0079, "step": 190100 }, { "epoch": 1250.7236842105262, "grad_norm": 0.7819858193397522, "learning_rate": 0.0001, "loss": 0.0086, "step": 190110 }, { "epoch": 1250.7894736842106, "grad_norm": 0.8119869232177734, "learning_rate": 0.0001, "loss": 0.0114, "step": 190120 }, { "epoch": 1250.8552631578948, "grad_norm": 1.0260744094848633, "learning_rate": 0.0001, "loss": 0.0094, "step": 190130 }, { "epoch": 1250.921052631579, "grad_norm": 1.2213586568832397, "learning_rate": 0.0001, "loss": 0.0098, "step": 190140 }, { "epoch": 1250.9868421052631, "grad_norm": 1.0840460062026978, "learning_rate": 0.0001, "loss": 0.0094, "step": 190150 }, { "epoch": 1251.0526315789473, "grad_norm": 1.0639532804489136, "learning_rate": 0.0001, "loss": 0.0075, "step": 190160 }, { "epoch": 1251.1184210526317, "grad_norm": 1.0878303050994873, "learning_rate": 0.0001, "loss": 0.012, "step": 190170 }, { "epoch": 1251.1842105263158, "grad_norm": 0.787267804145813, "learning_rate": 0.0001, "loss": 0.0083, "step": 190180 }, { "epoch": 1251.25, "grad_norm": 1.283235788345337, "learning_rate": 0.0001, "loss": 0.0086, "step": 190190 }, { "epoch": 1251.3157894736842, "grad_norm": 1.402185320854187, "learning_rate": 0.0001, "loss": 0.012, "step": 190200 }, { "epoch": 1251.3815789473683, "grad_norm": 0.9384086728096008, "learning_rate": 0.0001, "loss": 0.0098, "step": 190210 }, { "epoch": 1251.4473684210527, "grad_norm": 1.2631064653396606, "learning_rate": 0.0001, "loss": 0.0086, "step": 190220 }, { "epoch": 1251.5131578947369, "grad_norm": 1.1802366971969604, "learning_rate": 0.0001, "loss": 0.0101, "step": 190230 }, { "epoch": 1251.578947368421, "grad_norm": 1.0927965641021729, "learning_rate": 0.0001, "loss": 0.0087, "step": 190240 }, { "epoch": 1251.6447368421052, "grad_norm": 1.3226182460784912, "learning_rate": 0.0001, "loss": 0.0089, "step": 190250 }, { "epoch": 1251.7105263157894, "grad_norm": 1.1238583326339722, "learning_rate": 0.0001, "loss": 0.0095, "step": 190260 }, { "epoch": 1251.7763157894738, "grad_norm": 1.0334173440933228, "learning_rate": 0.0001, "loss": 0.0097, "step": 190270 }, { "epoch": 1251.842105263158, "grad_norm": 0.7510884404182434, "learning_rate": 0.0001, "loss": 0.0085, "step": 190280 }, { "epoch": 1251.907894736842, "grad_norm": 1.0210719108581543, "learning_rate": 0.0001, "loss": 0.0088, "step": 190290 }, { "epoch": 1251.9736842105262, "grad_norm": 1.1662030220031738, "learning_rate": 0.0001, "loss": 0.0076, "step": 190300 }, { "epoch": 1252.0394736842106, "grad_norm": 0.8975061774253845, "learning_rate": 0.0001, "loss": 0.0082, "step": 190310 }, { "epoch": 1252.1052631578948, "grad_norm": 1.1046485900878906, "learning_rate": 0.0001, "loss": 0.01, "step": 190320 }, { "epoch": 1252.171052631579, "grad_norm": 0.740519106388092, "learning_rate": 0.0001, "loss": 0.008, "step": 190330 }, { "epoch": 1252.2368421052631, "grad_norm": 0.7718915939331055, "learning_rate": 0.0001, "loss": 0.0082, "step": 190340 }, { "epoch": 1252.3026315789473, "grad_norm": 0.7174364328384399, "learning_rate": 0.0001, "loss": 0.0079, "step": 190350 }, { "epoch": 1252.3684210526317, "grad_norm": 0.7045212388038635, "learning_rate": 0.0001, "loss": 0.0085, "step": 190360 }, { "epoch": 1252.4342105263158, "grad_norm": 1.1751577854156494, "learning_rate": 0.0001, "loss": 0.0103, "step": 190370 }, { "epoch": 1252.5, "grad_norm": 1.107194423675537, "learning_rate": 0.0001, "loss": 0.0102, "step": 190380 }, { "epoch": 1252.5657894736842, "grad_norm": 1.6648300886154175, "learning_rate": 0.0001, "loss": 0.0083, "step": 190390 }, { "epoch": 1252.6315789473683, "grad_norm": 1.0686296224594116, "learning_rate": 0.0001, "loss": 0.008, "step": 190400 }, { "epoch": 1252.6973684210527, "grad_norm": 1.0623538494110107, "learning_rate": 0.0001, "loss": 0.008, "step": 190410 }, { "epoch": 1252.7631578947369, "grad_norm": 1.1247642040252686, "learning_rate": 0.0001, "loss": 0.0102, "step": 190420 }, { "epoch": 1252.828947368421, "grad_norm": 1.0035089254379272, "learning_rate": 0.0001, "loss": 0.0095, "step": 190430 }, { "epoch": 1252.8947368421052, "grad_norm": 1.277760624885559, "learning_rate": 0.0001, "loss": 0.0097, "step": 190440 }, { "epoch": 1252.9605263157894, "grad_norm": 1.3326517343521118, "learning_rate": 0.0001, "loss": 0.0105, "step": 190450 }, { "epoch": 1253.0263157894738, "grad_norm": 1.0908524990081787, "learning_rate": 0.0001, "loss": 0.0096, "step": 190460 }, { "epoch": 1253.092105263158, "grad_norm": 1.1250838041305542, "learning_rate": 0.0001, "loss": 0.0095, "step": 190470 }, { "epoch": 1253.157894736842, "grad_norm": 1.1381192207336426, "learning_rate": 0.0001, "loss": 0.0095, "step": 190480 }, { "epoch": 1253.2236842105262, "grad_norm": 1.1070564985275269, "learning_rate": 0.0001, "loss": 0.0094, "step": 190490 }, { "epoch": 1253.2894736842106, "grad_norm": 0.9667354226112366, "learning_rate": 0.0001, "loss": 0.0103, "step": 190500 }, { "epoch": 1253.3552631578948, "grad_norm": 0.9640436172485352, "learning_rate": 0.0001, "loss": 0.0092, "step": 190510 }, { "epoch": 1253.421052631579, "grad_norm": 1.0675090551376343, "learning_rate": 0.0001, "loss": 0.0085, "step": 190520 }, { "epoch": 1253.4868421052631, "grad_norm": 0.9862016439437866, "learning_rate": 0.0001, "loss": 0.008, "step": 190530 }, { "epoch": 1253.5526315789473, "grad_norm": 0.98412024974823, "learning_rate": 0.0001, "loss": 0.0082, "step": 190540 }, { "epoch": 1253.6184210526317, "grad_norm": 0.9340914487838745, "learning_rate": 0.0001, "loss": 0.0097, "step": 190550 }, { "epoch": 1253.6842105263158, "grad_norm": 1.179900884628296, "learning_rate": 0.0001, "loss": 0.0086, "step": 190560 }, { "epoch": 1253.75, "grad_norm": 1.0011744499206543, "learning_rate": 0.0001, "loss": 0.0095, "step": 190570 }, { "epoch": 1253.8157894736842, "grad_norm": 0.9150945544242859, "learning_rate": 0.0001, "loss": 0.0103, "step": 190580 }, { "epoch": 1253.8815789473683, "grad_norm": 1.025476098060608, "learning_rate": 0.0001, "loss": 0.0084, "step": 190590 }, { "epoch": 1253.9473684210527, "grad_norm": 0.7332446575164795, "learning_rate": 0.0001, "loss": 0.0081, "step": 190600 }, { "epoch": 1254.0131578947369, "grad_norm": 0.9624845385551453, "learning_rate": 0.0001, "loss": 0.0095, "step": 190610 }, { "epoch": 1254.078947368421, "grad_norm": 0.9401580691337585, "learning_rate": 0.0001, "loss": 0.0083, "step": 190620 }, { "epoch": 1254.1447368421052, "grad_norm": 1.178200602531433, "learning_rate": 0.0001, "loss": 0.0093, "step": 190630 }, { "epoch": 1254.2105263157894, "grad_norm": 0.8018969297409058, "learning_rate": 0.0001, "loss": 0.0083, "step": 190640 }, { "epoch": 1254.2763157894738, "grad_norm": 1.0022549629211426, "learning_rate": 0.0001, "loss": 0.0112, "step": 190650 }, { "epoch": 1254.342105263158, "grad_norm": 1.0292798280715942, "learning_rate": 0.0001, "loss": 0.0076, "step": 190660 }, { "epoch": 1254.407894736842, "grad_norm": 0.9748183488845825, "learning_rate": 0.0001, "loss": 0.0089, "step": 190670 }, { "epoch": 1254.4736842105262, "grad_norm": 1.1642075777053833, "learning_rate": 0.0001, "loss": 0.01, "step": 190680 }, { "epoch": 1254.5394736842106, "grad_norm": 1.0161871910095215, "learning_rate": 0.0001, "loss": 0.0078, "step": 190690 }, { "epoch": 1254.6052631578948, "grad_norm": 1.2374141216278076, "learning_rate": 0.0001, "loss": 0.0084, "step": 190700 }, { "epoch": 1254.671052631579, "grad_norm": 0.6536117196083069, "learning_rate": 0.0001, "loss": 0.009, "step": 190710 }, { "epoch": 1254.7368421052631, "grad_norm": 0.9045120477676392, "learning_rate": 0.0001, "loss": 0.008, "step": 190720 }, { "epoch": 1254.8026315789473, "grad_norm": 0.969821035861969, "learning_rate": 0.0001, "loss": 0.0066, "step": 190730 }, { "epoch": 1254.8684210526317, "grad_norm": 0.9699207544326782, "learning_rate": 0.0001, "loss": 0.0086, "step": 190740 }, { "epoch": 1254.9342105263158, "grad_norm": 1.0693953037261963, "learning_rate": 0.0001, "loss": 0.0095, "step": 190750 }, { "epoch": 1255.0, "grad_norm": 1.0095750093460083, "learning_rate": 0.0001, "loss": 0.0082, "step": 190760 }, { "epoch": 1255.0657894736842, "grad_norm": 1.1202609539031982, "learning_rate": 0.0001, "loss": 0.0091, "step": 190770 }, { "epoch": 1255.1315789473683, "grad_norm": 1.095145344734192, "learning_rate": 0.0001, "loss": 0.0098, "step": 190780 }, { "epoch": 1255.1973684210527, "grad_norm": 0.8262442350387573, "learning_rate": 0.0001, "loss": 0.0092, "step": 190790 }, { "epoch": 1255.2631578947369, "grad_norm": 1.1005247831344604, "learning_rate": 0.0001, "loss": 0.0077, "step": 190800 }, { "epoch": 1255.328947368421, "grad_norm": 1.0275697708129883, "learning_rate": 0.0001, "loss": 0.0075, "step": 190810 }, { "epoch": 1255.3947368421052, "grad_norm": 0.8765363097190857, "learning_rate": 0.0001, "loss": 0.0108, "step": 190820 }, { "epoch": 1255.4605263157894, "grad_norm": 0.8573766946792603, "learning_rate": 0.0001, "loss": 0.0076, "step": 190830 }, { "epoch": 1255.5263157894738, "grad_norm": 1.2033637762069702, "learning_rate": 0.0001, "loss": 0.0078, "step": 190840 }, { "epoch": 1255.592105263158, "grad_norm": 0.8023409843444824, "learning_rate": 0.0001, "loss": 0.0083, "step": 190850 }, { "epoch": 1255.657894736842, "grad_norm": 0.9117615222930908, "learning_rate": 0.0001, "loss": 0.0081, "step": 190860 }, { "epoch": 1255.7236842105262, "grad_norm": 1.0993090867996216, "learning_rate": 0.0001, "loss": 0.0091, "step": 190870 }, { "epoch": 1255.7894736842106, "grad_norm": 1.0232125520706177, "learning_rate": 0.0001, "loss": 0.0075, "step": 190880 }, { "epoch": 1255.8552631578948, "grad_norm": 1.2864488363265991, "learning_rate": 0.0001, "loss": 0.0087, "step": 190890 }, { "epoch": 1255.921052631579, "grad_norm": 0.8744462132453918, "learning_rate": 0.0001, "loss": 0.0105, "step": 190900 }, { "epoch": 1255.9868421052631, "grad_norm": 0.8951719403266907, "learning_rate": 0.0001, "loss": 0.0101, "step": 190910 }, { "epoch": 1256.0526315789473, "grad_norm": 1.058536410331726, "learning_rate": 0.0001, "loss": 0.0078, "step": 190920 }, { "epoch": 1256.1184210526317, "grad_norm": 0.9087652564048767, "learning_rate": 0.0001, "loss": 0.009, "step": 190930 }, { "epoch": 1256.1842105263158, "grad_norm": 1.1572648286819458, "learning_rate": 0.0001, "loss": 0.0078, "step": 190940 }, { "epoch": 1256.25, "grad_norm": 1.497307300567627, "learning_rate": 0.0001, "loss": 0.0104, "step": 190950 }, { "epoch": 1256.3157894736842, "grad_norm": 1.1288655996322632, "learning_rate": 0.0001, "loss": 0.0092, "step": 190960 }, { "epoch": 1256.3815789473683, "grad_norm": 1.1092500686645508, "learning_rate": 0.0001, "loss": 0.0084, "step": 190970 }, { "epoch": 1256.4473684210527, "grad_norm": 0.9487274885177612, "learning_rate": 0.0001, "loss": 0.0092, "step": 190980 }, { "epoch": 1256.5131578947369, "grad_norm": 1.2593556642532349, "learning_rate": 0.0001, "loss": 0.0082, "step": 190990 }, { "epoch": 1256.578947368421, "grad_norm": 1.0957036018371582, "learning_rate": 0.0001, "loss": 0.0086, "step": 191000 }, { "epoch": 1256.6447368421052, "grad_norm": 1.3564567565917969, "learning_rate": 0.0001, "loss": 0.0094, "step": 191010 }, { "epoch": 1256.7105263157894, "grad_norm": 1.0658830404281616, "learning_rate": 0.0001, "loss": 0.0075, "step": 191020 }, { "epoch": 1256.7763157894738, "grad_norm": 1.4636614322662354, "learning_rate": 0.0001, "loss": 0.0101, "step": 191030 }, { "epoch": 1256.842105263158, "grad_norm": 0.8907326459884644, "learning_rate": 0.0001, "loss": 0.0085, "step": 191040 }, { "epoch": 1256.907894736842, "grad_norm": 0.6591998934745789, "learning_rate": 0.0001, "loss": 0.0095, "step": 191050 }, { "epoch": 1256.9736842105262, "grad_norm": 1.2480213642120361, "learning_rate": 0.0001, "loss": 0.0076, "step": 191060 }, { "epoch": 1257.0394736842106, "grad_norm": 1.0565965175628662, "learning_rate": 0.0001, "loss": 0.0116, "step": 191070 }, { "epoch": 1257.1052631578948, "grad_norm": 0.7769169807434082, "learning_rate": 0.0001, "loss": 0.0073, "step": 191080 }, { "epoch": 1257.171052631579, "grad_norm": 1.2173153162002563, "learning_rate": 0.0001, "loss": 0.0089, "step": 191090 }, { "epoch": 1257.2368421052631, "grad_norm": 1.1387425661087036, "learning_rate": 0.0001, "loss": 0.0086, "step": 191100 }, { "epoch": 1257.3026315789473, "grad_norm": 1.099096417427063, "learning_rate": 0.0001, "loss": 0.0095, "step": 191110 }, { "epoch": 1257.3684210526317, "grad_norm": 0.9480217695236206, "learning_rate": 0.0001, "loss": 0.0101, "step": 191120 }, { "epoch": 1257.4342105263158, "grad_norm": 0.8401440382003784, "learning_rate": 0.0001, "loss": 0.0101, "step": 191130 }, { "epoch": 1257.5, "grad_norm": 0.9184837341308594, "learning_rate": 0.0001, "loss": 0.0121, "step": 191140 }, { "epoch": 1257.5657894736842, "grad_norm": 0.9384816288948059, "learning_rate": 0.0001, "loss": 0.0078, "step": 191150 }, { "epoch": 1257.6315789473683, "grad_norm": 0.8797683119773865, "learning_rate": 0.0001, "loss": 0.0109, "step": 191160 }, { "epoch": 1257.6973684210527, "grad_norm": 0.781887412071228, "learning_rate": 0.0001, "loss": 0.0101, "step": 191170 }, { "epoch": 1257.7631578947369, "grad_norm": 0.9485580325126648, "learning_rate": 0.0001, "loss": 0.0098, "step": 191180 }, { "epoch": 1257.828947368421, "grad_norm": 0.7003353834152222, "learning_rate": 0.0001, "loss": 0.0099, "step": 191190 }, { "epoch": 1257.8947368421052, "grad_norm": 0.9563307166099548, "learning_rate": 0.0001, "loss": 0.0085, "step": 191200 }, { "epoch": 1257.9605263157894, "grad_norm": 0.787761926651001, "learning_rate": 0.0001, "loss": 0.0101, "step": 191210 }, { "epoch": 1258.0263157894738, "grad_norm": 1.2450077533721924, "learning_rate": 0.0001, "loss": 0.011, "step": 191220 }, { "epoch": 1258.092105263158, "grad_norm": 1.280927062034607, "learning_rate": 0.0001, "loss": 0.0097, "step": 191230 }, { "epoch": 1258.157894736842, "grad_norm": 1.0110337734222412, "learning_rate": 0.0001, "loss": 0.0111, "step": 191240 }, { "epoch": 1258.2236842105262, "grad_norm": 1.0578523874282837, "learning_rate": 0.0001, "loss": 0.0084, "step": 191250 }, { "epoch": 1258.2894736842106, "grad_norm": 1.5664889812469482, "learning_rate": 0.0001, "loss": 0.01, "step": 191260 }, { "epoch": 1258.3552631578948, "grad_norm": 1.2868566513061523, "learning_rate": 0.0001, "loss": 0.0092, "step": 191270 }, { "epoch": 1258.421052631579, "grad_norm": 1.0075466632843018, "learning_rate": 0.0001, "loss": 0.0096, "step": 191280 }, { "epoch": 1258.4868421052631, "grad_norm": 1.2369372844696045, "learning_rate": 0.0001, "loss": 0.0095, "step": 191290 }, { "epoch": 1258.5526315789473, "grad_norm": 1.0888575315475464, "learning_rate": 0.0001, "loss": 0.0097, "step": 191300 }, { "epoch": 1258.6184210526317, "grad_norm": 1.023420810699463, "learning_rate": 0.0001, "loss": 0.0091, "step": 191310 }, { "epoch": 1258.6842105263158, "grad_norm": 1.248289704322815, "learning_rate": 0.0001, "loss": 0.01, "step": 191320 }, { "epoch": 1258.75, "grad_norm": 1.0365052223205566, "learning_rate": 0.0001, "loss": 0.0094, "step": 191330 }, { "epoch": 1258.8157894736842, "grad_norm": 1.4416338205337524, "learning_rate": 0.0001, "loss": 0.0084, "step": 191340 }, { "epoch": 1258.8815789473683, "grad_norm": 1.3358416557312012, "learning_rate": 0.0001, "loss": 0.0088, "step": 191350 }, { "epoch": 1258.9473684210527, "grad_norm": 1.1580281257629395, "learning_rate": 0.0001, "loss": 0.0087, "step": 191360 }, { "epoch": 1259.0131578947369, "grad_norm": 1.4239611625671387, "learning_rate": 0.0001, "loss": 0.0083, "step": 191370 }, { "epoch": 1259.078947368421, "grad_norm": 0.7980217337608337, "learning_rate": 0.0001, "loss": 0.0107, "step": 191380 }, { "epoch": 1259.1447368421052, "grad_norm": 0.9824530482292175, "learning_rate": 0.0001, "loss": 0.0097, "step": 191390 }, { "epoch": 1259.2105263157894, "grad_norm": 0.770296573638916, "learning_rate": 0.0001, "loss": 0.0089, "step": 191400 }, { "epoch": 1259.2763157894738, "grad_norm": 0.780419647693634, "learning_rate": 0.0001, "loss": 0.0097, "step": 191410 }, { "epoch": 1259.342105263158, "grad_norm": 0.8854151964187622, "learning_rate": 0.0001, "loss": 0.0096, "step": 191420 }, { "epoch": 1259.407894736842, "grad_norm": 0.6854922771453857, "learning_rate": 0.0001, "loss": 0.0088, "step": 191430 }, { "epoch": 1259.4736842105262, "grad_norm": 0.6790972948074341, "learning_rate": 0.0001, "loss": 0.0091, "step": 191440 }, { "epoch": 1259.5394736842106, "grad_norm": 0.5925989747047424, "learning_rate": 0.0001, "loss": 0.0097, "step": 191450 }, { "epoch": 1259.6052631578948, "grad_norm": 1.307077407836914, "learning_rate": 0.0001, "loss": 0.0102, "step": 191460 }, { "epoch": 1259.671052631579, "grad_norm": 0.9079083800315857, "learning_rate": 0.0001, "loss": 0.0087, "step": 191470 }, { "epoch": 1259.7368421052631, "grad_norm": 1.258397102355957, "learning_rate": 0.0001, "loss": 0.0087, "step": 191480 }, { "epoch": 1259.8026315789473, "grad_norm": 1.126745343208313, "learning_rate": 0.0001, "loss": 0.0085, "step": 191490 }, { "epoch": 1259.8684210526317, "grad_norm": 0.9693688154220581, "learning_rate": 0.0001, "loss": 0.0082, "step": 191500 }, { "epoch": 1259.9342105263158, "grad_norm": 1.0901410579681396, "learning_rate": 0.0001, "loss": 0.0088, "step": 191510 }, { "epoch": 1260.0, "grad_norm": 1.0153412818908691, "learning_rate": 0.0001, "loss": 0.0085, "step": 191520 }, { "epoch": 1260.0657894736842, "grad_norm": 1.094736099243164, "learning_rate": 0.0001, "loss": 0.0096, "step": 191530 }, { "epoch": 1260.1315789473683, "grad_norm": 1.3164678812026978, "learning_rate": 0.0001, "loss": 0.0089, "step": 191540 }, { "epoch": 1260.1973684210527, "grad_norm": 1.0634385347366333, "learning_rate": 0.0001, "loss": 0.0084, "step": 191550 }, { "epoch": 1260.2631578947369, "grad_norm": 1.0030722618103027, "learning_rate": 0.0001, "loss": 0.009, "step": 191560 }, { "epoch": 1260.328947368421, "grad_norm": 1.0075671672821045, "learning_rate": 0.0001, "loss": 0.0108, "step": 191570 }, { "epoch": 1260.3947368421052, "grad_norm": 1.2293148040771484, "learning_rate": 0.0001, "loss": 0.0107, "step": 191580 }, { "epoch": 1260.4605263157894, "grad_norm": 0.9783899784088135, "learning_rate": 0.0001, "loss": 0.007, "step": 191590 }, { "epoch": 1260.5263157894738, "grad_norm": 1.0404366254806519, "learning_rate": 0.0001, "loss": 0.0104, "step": 191600 }, { "epoch": 1260.592105263158, "grad_norm": 1.1873605251312256, "learning_rate": 0.0001, "loss": 0.0085, "step": 191610 }, { "epoch": 1260.657894736842, "grad_norm": 0.8735949397087097, "learning_rate": 0.0001, "loss": 0.009, "step": 191620 }, { "epoch": 1260.7236842105262, "grad_norm": 1.2900155782699585, "learning_rate": 0.0001, "loss": 0.0086, "step": 191630 }, { "epoch": 1260.7894736842106, "grad_norm": 1.2164313793182373, "learning_rate": 0.0001, "loss": 0.0073, "step": 191640 }, { "epoch": 1260.8552631578948, "grad_norm": 1.0002045631408691, "learning_rate": 0.0001, "loss": 0.0098, "step": 191650 }, { "epoch": 1260.921052631579, "grad_norm": 1.3534318208694458, "learning_rate": 0.0001, "loss": 0.0093, "step": 191660 }, { "epoch": 1260.9868421052631, "grad_norm": 1.0810010433197021, "learning_rate": 0.0001, "loss": 0.0085, "step": 191670 }, { "epoch": 1261.0526315789473, "grad_norm": 1.0390048027038574, "learning_rate": 0.0001, "loss": 0.0087, "step": 191680 }, { "epoch": 1261.1184210526317, "grad_norm": 0.8042935729026794, "learning_rate": 0.0001, "loss": 0.0082, "step": 191690 }, { "epoch": 1261.1842105263158, "grad_norm": 0.8306818008422852, "learning_rate": 0.0001, "loss": 0.0076, "step": 191700 }, { "epoch": 1261.25, "grad_norm": 0.769751787185669, "learning_rate": 0.0001, "loss": 0.0107, "step": 191710 }, { "epoch": 1261.3157894736842, "grad_norm": 0.807361364364624, "learning_rate": 0.0001, "loss": 0.0083, "step": 191720 }, { "epoch": 1261.3815789473683, "grad_norm": 0.6746041178703308, "learning_rate": 0.0001, "loss": 0.0109, "step": 191730 }, { "epoch": 1261.4473684210527, "grad_norm": 0.6681817173957825, "learning_rate": 0.0001, "loss": 0.0084, "step": 191740 }, { "epoch": 1261.5131578947369, "grad_norm": 0.5453993082046509, "learning_rate": 0.0001, "loss": 0.0102, "step": 191750 }, { "epoch": 1261.578947368421, "grad_norm": 0.9125059247016907, "learning_rate": 0.0001, "loss": 0.0086, "step": 191760 }, { "epoch": 1261.6447368421052, "grad_norm": 0.5915899276733398, "learning_rate": 0.0001, "loss": 0.0084, "step": 191770 }, { "epoch": 1261.7105263157894, "grad_norm": 0.9361303448677063, "learning_rate": 0.0001, "loss": 0.0093, "step": 191780 }, { "epoch": 1261.7763157894738, "grad_norm": 1.2069631814956665, "learning_rate": 0.0001, "loss": 0.0078, "step": 191790 }, { "epoch": 1261.842105263158, "grad_norm": 1.1386783123016357, "learning_rate": 0.0001, "loss": 0.0083, "step": 191800 }, { "epoch": 1261.907894736842, "grad_norm": 1.0958198308944702, "learning_rate": 0.0001, "loss": 0.0081, "step": 191810 }, { "epoch": 1261.9736842105262, "grad_norm": 0.7236484885215759, "learning_rate": 0.0001, "loss": 0.0085, "step": 191820 }, { "epoch": 1262.0394736842106, "grad_norm": 1.066319465637207, "learning_rate": 0.0001, "loss": 0.0095, "step": 191830 }, { "epoch": 1262.1052631578948, "grad_norm": 1.1681187152862549, "learning_rate": 0.0001, "loss": 0.0086, "step": 191840 }, { "epoch": 1262.171052631579, "grad_norm": 0.9392849206924438, "learning_rate": 0.0001, "loss": 0.0092, "step": 191850 }, { "epoch": 1262.2368421052631, "grad_norm": 1.0458626747131348, "learning_rate": 0.0001, "loss": 0.0116, "step": 191860 }, { "epoch": 1262.3026315789473, "grad_norm": 0.7124366164207458, "learning_rate": 0.0001, "loss": 0.0077, "step": 191870 }, { "epoch": 1262.3684210526317, "grad_norm": 0.9901890158653259, "learning_rate": 0.0001, "loss": 0.0078, "step": 191880 }, { "epoch": 1262.4342105263158, "grad_norm": 1.156256914138794, "learning_rate": 0.0001, "loss": 0.008, "step": 191890 }, { "epoch": 1262.5, "grad_norm": 1.051873803138733, "learning_rate": 0.0001, "loss": 0.0079, "step": 191900 }, { "epoch": 1262.5657894736842, "grad_norm": 1.0366870164871216, "learning_rate": 0.0001, "loss": 0.0094, "step": 191910 }, { "epoch": 1262.6315789473683, "grad_norm": 0.843497633934021, "learning_rate": 0.0001, "loss": 0.0091, "step": 191920 }, { "epoch": 1262.6973684210527, "grad_norm": 0.8482083678245544, "learning_rate": 0.0001, "loss": 0.0091, "step": 191930 }, { "epoch": 1262.7631578947369, "grad_norm": 0.7245591282844543, "learning_rate": 0.0001, "loss": 0.009, "step": 191940 }, { "epoch": 1262.828947368421, "grad_norm": 0.7226964831352234, "learning_rate": 0.0001, "loss": 0.0093, "step": 191950 }, { "epoch": 1262.8947368421052, "grad_norm": 1.0393447875976562, "learning_rate": 0.0001, "loss": 0.0098, "step": 191960 }, { "epoch": 1262.9605263157894, "grad_norm": 0.9947481155395508, "learning_rate": 0.0001, "loss": 0.0087, "step": 191970 }, { "epoch": 1263.0263157894738, "grad_norm": 1.087018609046936, "learning_rate": 0.0001, "loss": 0.007, "step": 191980 }, { "epoch": 1263.092105263158, "grad_norm": 1.0347148180007935, "learning_rate": 0.0001, "loss": 0.0093, "step": 191990 }, { "epoch": 1263.157894736842, "grad_norm": 0.8795108795166016, "learning_rate": 0.0001, "loss": 0.0088, "step": 192000 }, { "epoch": 1263.2236842105262, "grad_norm": 0.9262383580207825, "learning_rate": 0.0001, "loss": 0.0088, "step": 192010 }, { "epoch": 1263.2894736842106, "grad_norm": 0.9647265076637268, "learning_rate": 0.0001, "loss": 0.0074, "step": 192020 }, { "epoch": 1263.3552631578948, "grad_norm": 1.0269702672958374, "learning_rate": 0.0001, "loss": 0.0104, "step": 192030 }, { "epoch": 1263.421052631579, "grad_norm": 0.9326910972595215, "learning_rate": 0.0001, "loss": 0.0085, "step": 192040 }, { "epoch": 1263.4868421052631, "grad_norm": 1.2422527074813843, "learning_rate": 0.0001, "loss": 0.0079, "step": 192050 }, { "epoch": 1263.5526315789473, "grad_norm": 1.082977056503296, "learning_rate": 0.0001, "loss": 0.0095, "step": 192060 }, { "epoch": 1263.6184210526317, "grad_norm": 1.06131112575531, "learning_rate": 0.0001, "loss": 0.0092, "step": 192070 }, { "epoch": 1263.6842105263158, "grad_norm": 1.269061803817749, "learning_rate": 0.0001, "loss": 0.0086, "step": 192080 }, { "epoch": 1263.75, "grad_norm": 0.993816614151001, "learning_rate": 0.0001, "loss": 0.0103, "step": 192090 }, { "epoch": 1263.8157894736842, "grad_norm": 0.9369046688079834, "learning_rate": 0.0001, "loss": 0.0082, "step": 192100 }, { "epoch": 1263.8815789473683, "grad_norm": 1.1662249565124512, "learning_rate": 0.0001, "loss": 0.0088, "step": 192110 }, { "epoch": 1263.9473684210527, "grad_norm": 0.8694877028465271, "learning_rate": 0.0001, "loss": 0.0097, "step": 192120 }, { "epoch": 1264.0131578947369, "grad_norm": 1.0363537073135376, "learning_rate": 0.0001, "loss": 0.0092, "step": 192130 }, { "epoch": 1264.078947368421, "grad_norm": 0.8358498215675354, "learning_rate": 0.0001, "loss": 0.0105, "step": 192140 }, { "epoch": 1264.1447368421052, "grad_norm": 0.7027297616004944, "learning_rate": 0.0001, "loss": 0.0073, "step": 192150 }, { "epoch": 1264.2105263157894, "grad_norm": 0.8918916583061218, "learning_rate": 0.0001, "loss": 0.0081, "step": 192160 }, { "epoch": 1264.2763157894738, "grad_norm": 0.9327743053436279, "learning_rate": 0.0001, "loss": 0.0092, "step": 192170 }, { "epoch": 1264.342105263158, "grad_norm": 1.0644272565841675, "learning_rate": 0.0001, "loss": 0.0092, "step": 192180 }, { "epoch": 1264.407894736842, "grad_norm": 1.1869374513626099, "learning_rate": 0.0001, "loss": 0.0104, "step": 192190 }, { "epoch": 1264.4736842105262, "grad_norm": 0.9971968531608582, "learning_rate": 0.0001, "loss": 0.0082, "step": 192200 }, { "epoch": 1264.5394736842106, "grad_norm": 0.745521068572998, "learning_rate": 0.0001, "loss": 0.0074, "step": 192210 }, { "epoch": 1264.6052631578948, "grad_norm": 1.1426355838775635, "learning_rate": 0.0001, "loss": 0.0093, "step": 192220 }, { "epoch": 1264.671052631579, "grad_norm": 1.11016845703125, "learning_rate": 0.0001, "loss": 0.0082, "step": 192230 }, { "epoch": 1264.7368421052631, "grad_norm": 0.9929028153419495, "learning_rate": 0.0001, "loss": 0.0086, "step": 192240 }, { "epoch": 1264.8026315789473, "grad_norm": 0.9268110394477844, "learning_rate": 0.0001, "loss": 0.0096, "step": 192250 }, { "epoch": 1264.8684210526317, "grad_norm": 0.8309184312820435, "learning_rate": 0.0001, "loss": 0.0093, "step": 192260 }, { "epoch": 1264.9342105263158, "grad_norm": 1.2837961912155151, "learning_rate": 0.0001, "loss": 0.0107, "step": 192270 }, { "epoch": 1265.0, "grad_norm": 0.8434421420097351, "learning_rate": 0.0001, "loss": 0.0098, "step": 192280 }, { "epoch": 1265.0657894736842, "grad_norm": 1.0207735300064087, "learning_rate": 0.0001, "loss": 0.0111, "step": 192290 }, { "epoch": 1265.1315789473683, "grad_norm": 0.9556507468223572, "learning_rate": 0.0001, "loss": 0.008, "step": 192300 }, { "epoch": 1265.1973684210527, "grad_norm": 1.1968109607696533, "learning_rate": 0.0001, "loss": 0.0097, "step": 192310 }, { "epoch": 1265.2631578947369, "grad_norm": 1.016579270362854, "learning_rate": 0.0001, "loss": 0.0089, "step": 192320 }, { "epoch": 1265.328947368421, "grad_norm": 1.248950481414795, "learning_rate": 0.0001, "loss": 0.0101, "step": 192330 }, { "epoch": 1265.3947368421052, "grad_norm": 1.086604118347168, "learning_rate": 0.0001, "loss": 0.0086, "step": 192340 }, { "epoch": 1265.4605263157894, "grad_norm": 1.240754246711731, "learning_rate": 0.0001, "loss": 0.0085, "step": 192350 }, { "epoch": 1265.5263157894738, "grad_norm": 1.088182806968689, "learning_rate": 0.0001, "loss": 0.008, "step": 192360 }, { "epoch": 1265.592105263158, "grad_norm": 1.2884389162063599, "learning_rate": 0.0001, "loss": 0.0096, "step": 192370 }, { "epoch": 1265.657894736842, "grad_norm": 0.9595872759819031, "learning_rate": 0.0001, "loss": 0.0096, "step": 192380 }, { "epoch": 1265.7236842105262, "grad_norm": 0.7716920375823975, "learning_rate": 0.0001, "loss": 0.008, "step": 192390 }, { "epoch": 1265.7894736842106, "grad_norm": 1.2177424430847168, "learning_rate": 0.0001, "loss": 0.0096, "step": 192400 }, { "epoch": 1265.8552631578948, "grad_norm": 1.1005266904830933, "learning_rate": 0.0001, "loss": 0.0089, "step": 192410 }, { "epoch": 1265.921052631579, "grad_norm": 0.8893570899963379, "learning_rate": 0.0001, "loss": 0.0089, "step": 192420 }, { "epoch": 1265.9868421052631, "grad_norm": 1.0129755735397339, "learning_rate": 0.0001, "loss": 0.0101, "step": 192430 }, { "epoch": 1266.0526315789473, "grad_norm": 1.1341480016708374, "learning_rate": 0.0001, "loss": 0.0093, "step": 192440 }, { "epoch": 1266.1184210526317, "grad_norm": 0.907776951789856, "learning_rate": 0.0001, "loss": 0.0102, "step": 192450 }, { "epoch": 1266.1842105263158, "grad_norm": 0.811395525932312, "learning_rate": 0.0001, "loss": 0.0092, "step": 192460 }, { "epoch": 1266.25, "grad_norm": 1.0590198040008545, "learning_rate": 0.0001, "loss": 0.0085, "step": 192470 }, { "epoch": 1266.3157894736842, "grad_norm": 1.0062720775604248, "learning_rate": 0.0001, "loss": 0.0091, "step": 192480 }, { "epoch": 1266.3815789473683, "grad_norm": 0.9959651827812195, "learning_rate": 0.0001, "loss": 0.0088, "step": 192490 }, { "epoch": 1266.4473684210527, "grad_norm": 1.1516484022140503, "learning_rate": 0.0001, "loss": 0.0078, "step": 192500 }, { "epoch": 1266.5131578947369, "grad_norm": 0.8557493090629578, "learning_rate": 0.0001, "loss": 0.0078, "step": 192510 }, { "epoch": 1266.578947368421, "grad_norm": 1.0222887992858887, "learning_rate": 0.0001, "loss": 0.0104, "step": 192520 }, { "epoch": 1266.6447368421052, "grad_norm": 1.160636067390442, "learning_rate": 0.0001, "loss": 0.0103, "step": 192530 }, { "epoch": 1266.7105263157894, "grad_norm": 0.7170030474662781, "learning_rate": 0.0001, "loss": 0.0073, "step": 192540 }, { "epoch": 1266.7763157894738, "grad_norm": 1.1373835802078247, "learning_rate": 0.0001, "loss": 0.0096, "step": 192550 }, { "epoch": 1266.842105263158, "grad_norm": 0.9175273776054382, "learning_rate": 0.0001, "loss": 0.0094, "step": 192560 }, { "epoch": 1266.907894736842, "grad_norm": 1.1107465028762817, "learning_rate": 0.0001, "loss": 0.0095, "step": 192570 }, { "epoch": 1266.9736842105262, "grad_norm": 1.1369487047195435, "learning_rate": 0.0001, "loss": 0.0096, "step": 192580 }, { "epoch": 1267.0394736842106, "grad_norm": 1.0154566764831543, "learning_rate": 0.0001, "loss": 0.0078, "step": 192590 }, { "epoch": 1267.1052631578948, "grad_norm": 0.7436667084693909, "learning_rate": 0.0001, "loss": 0.0074, "step": 192600 }, { "epoch": 1267.171052631579, "grad_norm": 1.1619658470153809, "learning_rate": 0.0001, "loss": 0.008, "step": 192610 }, { "epoch": 1267.2368421052631, "grad_norm": 1.0842703580856323, "learning_rate": 0.0001, "loss": 0.0091, "step": 192620 }, { "epoch": 1267.3026315789473, "grad_norm": 1.0601835250854492, "learning_rate": 0.0001, "loss": 0.0076, "step": 192630 }, { "epoch": 1267.3684210526317, "grad_norm": 1.2090107202529907, "learning_rate": 0.0001, "loss": 0.0091, "step": 192640 }, { "epoch": 1267.4342105263158, "grad_norm": 0.9081798195838928, "learning_rate": 0.0001, "loss": 0.0077, "step": 192650 }, { "epoch": 1267.5, "grad_norm": 0.960776686668396, "learning_rate": 0.0001, "loss": 0.0085, "step": 192660 }, { "epoch": 1267.5657894736842, "grad_norm": 0.7369850277900696, "learning_rate": 0.0001, "loss": 0.0084, "step": 192670 }, { "epoch": 1267.6315789473683, "grad_norm": 0.9591938853263855, "learning_rate": 0.0001, "loss": 0.0096, "step": 192680 }, { "epoch": 1267.6973684210527, "grad_norm": 0.9522967338562012, "learning_rate": 0.0001, "loss": 0.0103, "step": 192690 }, { "epoch": 1267.7631578947369, "grad_norm": 1.203101396560669, "learning_rate": 0.0001, "loss": 0.0103, "step": 192700 }, { "epoch": 1267.828947368421, "grad_norm": 0.974402666091919, "learning_rate": 0.0001, "loss": 0.0108, "step": 192710 }, { "epoch": 1267.8947368421052, "grad_norm": 0.6702833771705627, "learning_rate": 0.0001, "loss": 0.0096, "step": 192720 }, { "epoch": 1267.9605263157894, "grad_norm": 0.7159844636917114, "learning_rate": 0.0001, "loss": 0.0101, "step": 192730 }, { "epoch": 1268.0263157894738, "grad_norm": 0.7871840596199036, "learning_rate": 0.0001, "loss": 0.0082, "step": 192740 }, { "epoch": 1268.092105263158, "grad_norm": 1.0035476684570312, "learning_rate": 0.0001, "loss": 0.0095, "step": 192750 }, { "epoch": 1268.157894736842, "grad_norm": 1.134181022644043, "learning_rate": 0.0001, "loss": 0.0078, "step": 192760 }, { "epoch": 1268.2236842105262, "grad_norm": 1.1951532363891602, "learning_rate": 0.0001, "loss": 0.0103, "step": 192770 }, { "epoch": 1268.2894736842106, "grad_norm": 0.7619327902793884, "learning_rate": 0.0001, "loss": 0.0085, "step": 192780 }, { "epoch": 1268.3552631578948, "grad_norm": 1.1061769723892212, "learning_rate": 0.0001, "loss": 0.0076, "step": 192790 }, { "epoch": 1268.421052631579, "grad_norm": 0.7267657518386841, "learning_rate": 0.0001, "loss": 0.0082, "step": 192800 }, { "epoch": 1268.4868421052631, "grad_norm": 1.2342023849487305, "learning_rate": 0.0001, "loss": 0.0094, "step": 192810 }, { "epoch": 1268.5526315789473, "grad_norm": 0.790826141834259, "learning_rate": 0.0001, "loss": 0.0098, "step": 192820 }, { "epoch": 1268.6184210526317, "grad_norm": 0.762381374835968, "learning_rate": 0.0001, "loss": 0.0091, "step": 192830 }, { "epoch": 1268.6842105263158, "grad_norm": 1.3754265308380127, "learning_rate": 0.0001, "loss": 0.0083, "step": 192840 }, { "epoch": 1268.75, "grad_norm": 1.1752783060073853, "learning_rate": 0.0001, "loss": 0.0074, "step": 192850 }, { "epoch": 1268.8157894736842, "grad_norm": 1.3542336225509644, "learning_rate": 0.0001, "loss": 0.0096, "step": 192860 }, { "epoch": 1268.8815789473683, "grad_norm": 1.3783767223358154, "learning_rate": 0.0001, "loss": 0.0097, "step": 192870 }, { "epoch": 1268.9473684210527, "grad_norm": 0.9072512984275818, "learning_rate": 0.0001, "loss": 0.0092, "step": 192880 }, { "epoch": 1269.0131578947369, "grad_norm": 1.1490826606750488, "learning_rate": 0.0001, "loss": 0.0089, "step": 192890 }, { "epoch": 1269.078947368421, "grad_norm": 1.1428316831588745, "learning_rate": 0.0001, "loss": 0.0079, "step": 192900 }, { "epoch": 1269.1447368421052, "grad_norm": 1.2847250699996948, "learning_rate": 0.0001, "loss": 0.0088, "step": 192910 }, { "epoch": 1269.2105263157894, "grad_norm": 0.9286653995513916, "learning_rate": 0.0001, "loss": 0.0073, "step": 192920 }, { "epoch": 1269.2763157894738, "grad_norm": 0.81577467918396, "learning_rate": 0.0001, "loss": 0.0098, "step": 192930 }, { "epoch": 1269.342105263158, "grad_norm": 0.6981829404830933, "learning_rate": 0.0001, "loss": 0.0087, "step": 192940 }, { "epoch": 1269.407894736842, "grad_norm": 0.916454553604126, "learning_rate": 0.0001, "loss": 0.0092, "step": 192950 }, { "epoch": 1269.4736842105262, "grad_norm": 0.8955065608024597, "learning_rate": 0.0001, "loss": 0.0071, "step": 192960 }, { "epoch": 1269.5394736842106, "grad_norm": 1.2273223400115967, "learning_rate": 0.0001, "loss": 0.009, "step": 192970 }, { "epoch": 1269.6052631578948, "grad_norm": 1.2199914455413818, "learning_rate": 0.0001, "loss": 0.0071, "step": 192980 }, { "epoch": 1269.671052631579, "grad_norm": 1.2514991760253906, "learning_rate": 0.0001, "loss": 0.0094, "step": 192990 }, { "epoch": 1269.7368421052631, "grad_norm": 1.1191940307617188, "learning_rate": 0.0001, "loss": 0.0118, "step": 193000 }, { "epoch": 1269.8026315789473, "grad_norm": 1.1365535259246826, "learning_rate": 0.0001, "loss": 0.0083, "step": 193010 }, { "epoch": 1269.8684210526317, "grad_norm": 1.1664906740188599, "learning_rate": 0.0001, "loss": 0.0093, "step": 193020 }, { "epoch": 1269.9342105263158, "grad_norm": 1.2412478923797607, "learning_rate": 0.0001, "loss": 0.0094, "step": 193030 }, { "epoch": 1270.0, "grad_norm": 1.1647230386734009, "learning_rate": 0.0001, "loss": 0.0092, "step": 193040 }, { "epoch": 1270.0657894736842, "grad_norm": 1.4412037134170532, "learning_rate": 0.0001, "loss": 0.0088, "step": 193050 }, { "epoch": 1270.1315789473683, "grad_norm": 1.1689636707305908, "learning_rate": 0.0001, "loss": 0.0088, "step": 193060 }, { "epoch": 1270.1973684210527, "grad_norm": 0.9392591118812561, "learning_rate": 0.0001, "loss": 0.0096, "step": 193070 }, { "epoch": 1270.2631578947369, "grad_norm": 1.0944244861602783, "learning_rate": 0.0001, "loss": 0.009, "step": 193080 }, { "epoch": 1270.328947368421, "grad_norm": 0.9576421976089478, "learning_rate": 0.0001, "loss": 0.0082, "step": 193090 }, { "epoch": 1270.3947368421052, "grad_norm": 0.7435740828514099, "learning_rate": 0.0001, "loss": 0.0065, "step": 193100 }, { "epoch": 1270.4605263157894, "grad_norm": 0.8688104152679443, "learning_rate": 0.0001, "loss": 0.0085, "step": 193110 }, { "epoch": 1270.5263157894738, "grad_norm": 0.8202447295188904, "learning_rate": 0.0001, "loss": 0.0081, "step": 193120 }, { "epoch": 1270.592105263158, "grad_norm": 0.7284652590751648, "learning_rate": 0.0001, "loss": 0.011, "step": 193130 }, { "epoch": 1270.657894736842, "grad_norm": 0.9387590289115906, "learning_rate": 0.0001, "loss": 0.0082, "step": 193140 }, { "epoch": 1270.7236842105262, "grad_norm": 1.2218446731567383, "learning_rate": 0.0001, "loss": 0.0083, "step": 193150 }, { "epoch": 1270.7894736842106, "grad_norm": 0.6693400144577026, "learning_rate": 0.0001, "loss": 0.0088, "step": 193160 }, { "epoch": 1270.8552631578948, "grad_norm": 1.0726978778839111, "learning_rate": 0.0001, "loss": 0.0096, "step": 193170 }, { "epoch": 1270.921052631579, "grad_norm": 1.0772373676300049, "learning_rate": 0.0001, "loss": 0.0089, "step": 193180 }, { "epoch": 1270.9868421052631, "grad_norm": 1.2476823329925537, "learning_rate": 0.0001, "loss": 0.0083, "step": 193190 }, { "epoch": 1271.0526315789473, "grad_norm": 1.162440299987793, "learning_rate": 0.0001, "loss": 0.0091, "step": 193200 }, { "epoch": 1271.1184210526317, "grad_norm": 0.7666929960250854, "learning_rate": 0.0001, "loss": 0.0108, "step": 193210 }, { "epoch": 1271.1842105263158, "grad_norm": 1.0631024837493896, "learning_rate": 0.0001, "loss": 0.0103, "step": 193220 }, { "epoch": 1271.25, "grad_norm": 0.8989170789718628, "learning_rate": 0.0001, "loss": 0.0081, "step": 193230 }, { "epoch": 1271.3157894736842, "grad_norm": 1.1688525676727295, "learning_rate": 0.0001, "loss": 0.0097, "step": 193240 }, { "epoch": 1271.3815789473683, "grad_norm": 1.0052133798599243, "learning_rate": 0.0001, "loss": 0.0108, "step": 193250 }, { "epoch": 1271.4473684210527, "grad_norm": 0.9958306550979614, "learning_rate": 0.0001, "loss": 0.009, "step": 193260 }, { "epoch": 1271.5131578947369, "grad_norm": 1.302128553390503, "learning_rate": 0.0001, "loss": 0.009, "step": 193270 }, { "epoch": 1271.578947368421, "grad_norm": 0.8450573086738586, "learning_rate": 0.0001, "loss": 0.0075, "step": 193280 }, { "epoch": 1271.6447368421052, "grad_norm": 1.1243925094604492, "learning_rate": 0.0001, "loss": 0.0092, "step": 193290 }, { "epoch": 1271.7105263157894, "grad_norm": 1.0591107606887817, "learning_rate": 0.0001, "loss": 0.008, "step": 193300 }, { "epoch": 1271.7763157894738, "grad_norm": 1.174922227859497, "learning_rate": 0.0001, "loss": 0.0088, "step": 193310 }, { "epoch": 1271.842105263158, "grad_norm": 0.8074167966842651, "learning_rate": 0.0001, "loss": 0.0078, "step": 193320 }, { "epoch": 1271.907894736842, "grad_norm": 0.9306144714355469, "learning_rate": 0.0001, "loss": 0.0077, "step": 193330 }, { "epoch": 1271.9736842105262, "grad_norm": 0.8695573806762695, "learning_rate": 0.0001, "loss": 0.0104, "step": 193340 }, { "epoch": 1272.0394736842106, "grad_norm": 0.9562103152275085, "learning_rate": 0.0001, "loss": 0.0086, "step": 193350 }, { "epoch": 1272.1052631578948, "grad_norm": 0.7408207058906555, "learning_rate": 0.0001, "loss": 0.0076, "step": 193360 }, { "epoch": 1272.171052631579, "grad_norm": 0.8762840628623962, "learning_rate": 0.0001, "loss": 0.0082, "step": 193370 }, { "epoch": 1272.2368421052631, "grad_norm": 1.0107637643814087, "learning_rate": 0.0001, "loss": 0.0091, "step": 193380 }, { "epoch": 1272.3026315789473, "grad_norm": 0.9742252230644226, "learning_rate": 0.0001, "loss": 0.0094, "step": 193390 }, { "epoch": 1272.3684210526317, "grad_norm": 1.0357519388198853, "learning_rate": 0.0001, "loss": 0.0081, "step": 193400 }, { "epoch": 1272.4342105263158, "grad_norm": 1.0647118091583252, "learning_rate": 0.0001, "loss": 0.0078, "step": 193410 }, { "epoch": 1272.5, "grad_norm": 1.2739837169647217, "learning_rate": 0.0001, "loss": 0.0093, "step": 193420 }, { "epoch": 1272.5657894736842, "grad_norm": 1.3156754970550537, "learning_rate": 0.0001, "loss": 0.0096, "step": 193430 }, { "epoch": 1272.6315789473683, "grad_norm": 1.3240246772766113, "learning_rate": 0.0001, "loss": 0.0112, "step": 193440 }, { "epoch": 1272.6973684210527, "grad_norm": 1.2527782917022705, "learning_rate": 0.0001, "loss": 0.0096, "step": 193450 }, { "epoch": 1272.7631578947369, "grad_norm": 1.097855567932129, "learning_rate": 0.0001, "loss": 0.0085, "step": 193460 }, { "epoch": 1272.828947368421, "grad_norm": 0.7602299451828003, "learning_rate": 0.0001, "loss": 0.0086, "step": 193470 }, { "epoch": 1272.8947368421052, "grad_norm": 0.9691528677940369, "learning_rate": 0.0001, "loss": 0.0084, "step": 193480 }, { "epoch": 1272.9605263157894, "grad_norm": 0.8665773868560791, "learning_rate": 0.0001, "loss": 0.0094, "step": 193490 }, { "epoch": 1273.0263157894738, "grad_norm": 0.9522979855537415, "learning_rate": 0.0001, "loss": 0.008, "step": 193500 }, { "epoch": 1273.092105263158, "grad_norm": 1.1000076532363892, "learning_rate": 0.0001, "loss": 0.0072, "step": 193510 }, { "epoch": 1273.157894736842, "grad_norm": 1.0523219108581543, "learning_rate": 0.0001, "loss": 0.0093, "step": 193520 }, { "epoch": 1273.2236842105262, "grad_norm": 0.7484111785888672, "learning_rate": 0.0001, "loss": 0.0089, "step": 193530 }, { "epoch": 1273.2894736842106, "grad_norm": 0.6804783344268799, "learning_rate": 0.0001, "loss": 0.0083, "step": 193540 }, { "epoch": 1273.3552631578948, "grad_norm": 1.133036494255066, "learning_rate": 0.0001, "loss": 0.009, "step": 193550 }, { "epoch": 1273.421052631579, "grad_norm": 0.9207621812820435, "learning_rate": 0.0001, "loss": 0.0109, "step": 193560 }, { "epoch": 1273.4868421052631, "grad_norm": 0.9967194199562073, "learning_rate": 0.0001, "loss": 0.0092, "step": 193570 }, { "epoch": 1273.5526315789473, "grad_norm": 0.6356325745582581, "learning_rate": 0.0001, "loss": 0.0095, "step": 193580 }, { "epoch": 1273.6184210526317, "grad_norm": 0.9811447262763977, "learning_rate": 0.0001, "loss": 0.009, "step": 193590 }, { "epoch": 1273.6842105263158, "grad_norm": 0.9721751809120178, "learning_rate": 0.0001, "loss": 0.0095, "step": 193600 }, { "epoch": 1273.75, "grad_norm": 0.8101963996887207, "learning_rate": 0.0001, "loss": 0.0086, "step": 193610 }, { "epoch": 1273.8157894736842, "grad_norm": 1.3644793033599854, "learning_rate": 0.0001, "loss": 0.01, "step": 193620 }, { "epoch": 1273.8815789473683, "grad_norm": 0.827865719795227, "learning_rate": 0.0001, "loss": 0.0087, "step": 193630 }, { "epoch": 1273.9473684210527, "grad_norm": 1.0075935125350952, "learning_rate": 0.0001, "loss": 0.009, "step": 193640 }, { "epoch": 1274.0131578947369, "grad_norm": 1.2060139179229736, "learning_rate": 0.0001, "loss": 0.0095, "step": 193650 }, { "epoch": 1274.078947368421, "grad_norm": 1.1146782636642456, "learning_rate": 0.0001, "loss": 0.0076, "step": 193660 }, { "epoch": 1274.1447368421052, "grad_norm": 1.0539047718048096, "learning_rate": 0.0001, "loss": 0.0094, "step": 193670 }, { "epoch": 1274.2105263157894, "grad_norm": 0.9264514446258545, "learning_rate": 0.0001, "loss": 0.0074, "step": 193680 }, { "epoch": 1274.2763157894738, "grad_norm": 0.6521208882331848, "learning_rate": 0.0001, "loss": 0.0094, "step": 193690 }, { "epoch": 1274.342105263158, "grad_norm": 0.6478739380836487, "learning_rate": 0.0001, "loss": 0.007, "step": 193700 }, { "epoch": 1274.407894736842, "grad_norm": 0.8767127990722656, "learning_rate": 0.0001, "loss": 0.0082, "step": 193710 }, { "epoch": 1274.4736842105262, "grad_norm": 1.1907069683074951, "learning_rate": 0.0001, "loss": 0.0108, "step": 193720 }, { "epoch": 1274.5394736842106, "grad_norm": 1.0393046140670776, "learning_rate": 0.0001, "loss": 0.0076, "step": 193730 }, { "epoch": 1274.6052631578948, "grad_norm": 0.9379701614379883, "learning_rate": 0.0001, "loss": 0.0112, "step": 193740 }, { "epoch": 1274.671052631579, "grad_norm": 0.976507306098938, "learning_rate": 0.0001, "loss": 0.0086, "step": 193750 }, { "epoch": 1274.7368421052631, "grad_norm": 0.986788272857666, "learning_rate": 0.0001, "loss": 0.0084, "step": 193760 }, { "epoch": 1274.8026315789473, "grad_norm": 1.1617448329925537, "learning_rate": 0.0001, "loss": 0.0108, "step": 193770 }, { "epoch": 1274.8684210526317, "grad_norm": 1.0417400598526, "learning_rate": 0.0001, "loss": 0.011, "step": 193780 }, { "epoch": 1274.9342105263158, "grad_norm": 0.8060194253921509, "learning_rate": 0.0001, "loss": 0.0101, "step": 193790 }, { "epoch": 1275.0, "grad_norm": 1.053001880645752, "learning_rate": 0.0001, "loss": 0.0084, "step": 193800 }, { "epoch": 1275.0657894736842, "grad_norm": 1.0343172550201416, "learning_rate": 0.0001, "loss": 0.0092, "step": 193810 }, { "epoch": 1275.1315789473683, "grad_norm": 0.8656930923461914, "learning_rate": 0.0001, "loss": 0.008, "step": 193820 }, { "epoch": 1275.1973684210527, "grad_norm": 1.0269204378128052, "learning_rate": 0.0001, "loss": 0.0098, "step": 193830 }, { "epoch": 1275.2631578947369, "grad_norm": 1.2087546586990356, "learning_rate": 0.0001, "loss": 0.0079, "step": 193840 }, { "epoch": 1275.328947368421, "grad_norm": 0.8350012898445129, "learning_rate": 0.0001, "loss": 0.008, "step": 193850 }, { "epoch": 1275.3947368421052, "grad_norm": 0.9556059241294861, "learning_rate": 0.0001, "loss": 0.0093, "step": 193860 }, { "epoch": 1275.4605263157894, "grad_norm": 1.050558090209961, "learning_rate": 0.0001, "loss": 0.0088, "step": 193870 }, { "epoch": 1275.5263157894738, "grad_norm": 1.1796860694885254, "learning_rate": 0.0001, "loss": 0.0079, "step": 193880 }, { "epoch": 1275.592105263158, "grad_norm": 0.9585551023483276, "learning_rate": 0.0001, "loss": 0.0083, "step": 193890 }, { "epoch": 1275.657894736842, "grad_norm": 1.2745286226272583, "learning_rate": 0.0001, "loss": 0.0083, "step": 193900 }, { "epoch": 1275.7236842105262, "grad_norm": 0.8697680234909058, "learning_rate": 0.0001, "loss": 0.0103, "step": 193910 }, { "epoch": 1275.7894736842106, "grad_norm": 1.0319898128509521, "learning_rate": 0.0001, "loss": 0.0097, "step": 193920 }, { "epoch": 1275.8552631578948, "grad_norm": 0.8313315510749817, "learning_rate": 0.0001, "loss": 0.0097, "step": 193930 }, { "epoch": 1275.921052631579, "grad_norm": 1.148795247077942, "learning_rate": 0.0001, "loss": 0.0088, "step": 193940 }, { "epoch": 1275.9868421052631, "grad_norm": 0.8364288210868835, "learning_rate": 0.0001, "loss": 0.0097, "step": 193950 }, { "epoch": 1276.0526315789473, "grad_norm": 0.909010648727417, "learning_rate": 0.0001, "loss": 0.0096, "step": 193960 }, { "epoch": 1276.1184210526317, "grad_norm": 0.9541910290718079, "learning_rate": 0.0001, "loss": 0.0093, "step": 193970 }, { "epoch": 1276.1842105263158, "grad_norm": 1.6058369874954224, "learning_rate": 0.0001, "loss": 0.0095, "step": 193980 }, { "epoch": 1276.25, "grad_norm": 3.058375835418701, "learning_rate": 0.0001, "loss": 0.0103, "step": 193990 }, { "epoch": 1276.3157894736842, "grad_norm": 2.173565149307251, "learning_rate": 0.0001, "loss": 0.0085, "step": 194000 }, { "epoch": 1276.3815789473683, "grad_norm": 1.741365671157837, "learning_rate": 0.0001, "loss": 0.0098, "step": 194010 }, { "epoch": 1276.4473684210527, "grad_norm": 1.5380631685256958, "learning_rate": 0.0001, "loss": 0.0085, "step": 194020 }, { "epoch": 1276.5131578947369, "grad_norm": 1.3168596029281616, "learning_rate": 0.0001, "loss": 0.0071, "step": 194030 }, { "epoch": 1276.578947368421, "grad_norm": 1.2907633781433105, "learning_rate": 0.0001, "loss": 0.0085, "step": 194040 }, { "epoch": 1276.6447368421052, "grad_norm": 1.0855131149291992, "learning_rate": 0.0001, "loss": 0.0082, "step": 194050 }, { "epoch": 1276.7105263157894, "grad_norm": 1.2930248975753784, "learning_rate": 0.0001, "loss": 0.0075, "step": 194060 }, { "epoch": 1276.7763157894738, "grad_norm": 1.222780466079712, "learning_rate": 0.0001, "loss": 0.0089, "step": 194070 }, { "epoch": 1276.842105263158, "grad_norm": 1.4316956996917725, "learning_rate": 0.0001, "loss": 0.0089, "step": 194080 }, { "epoch": 1276.907894736842, "grad_norm": 1.0720902681350708, "learning_rate": 0.0001, "loss": 0.0107, "step": 194090 }, { "epoch": 1276.9736842105262, "grad_norm": 1.6173840761184692, "learning_rate": 0.0001, "loss": 0.0084, "step": 194100 }, { "epoch": 1277.0394736842106, "grad_norm": 1.3037574291229248, "learning_rate": 0.0001, "loss": 0.007, "step": 194110 }, { "epoch": 1277.1052631578948, "grad_norm": 1.301659345626831, "learning_rate": 0.0001, "loss": 0.0073, "step": 194120 }, { "epoch": 1277.171052631579, "grad_norm": 1.402639627456665, "learning_rate": 0.0001, "loss": 0.0098, "step": 194130 }, { "epoch": 1277.2368421052631, "grad_norm": 1.272783637046814, "learning_rate": 0.0001, "loss": 0.0112, "step": 194140 }, { "epoch": 1277.3026315789473, "grad_norm": 0.7505586743354797, "learning_rate": 0.0001, "loss": 0.0072, "step": 194150 }, { "epoch": 1277.3684210526317, "grad_norm": 0.9745710492134094, "learning_rate": 0.0001, "loss": 0.0095, "step": 194160 }, { "epoch": 1277.4342105263158, "grad_norm": 1.12748384475708, "learning_rate": 0.0001, "loss": 0.0075, "step": 194170 }, { "epoch": 1277.5, "grad_norm": 1.296922206878662, "learning_rate": 0.0001, "loss": 0.0092, "step": 194180 }, { "epoch": 1277.5657894736842, "grad_norm": 1.1418737173080444, "learning_rate": 0.0001, "loss": 0.0083, "step": 194190 }, { "epoch": 1277.6315789473683, "grad_norm": 0.7893008589744568, "learning_rate": 0.0001, "loss": 0.0095, "step": 194200 }, { "epoch": 1277.6973684210527, "grad_norm": 0.9640665650367737, "learning_rate": 0.0001, "loss": 0.0095, "step": 194210 }, { "epoch": 1277.7631578947369, "grad_norm": 1.2299898862838745, "learning_rate": 0.0001, "loss": 0.0079, "step": 194220 }, { "epoch": 1277.828947368421, "grad_norm": 0.9324010610580444, "learning_rate": 0.0001, "loss": 0.0084, "step": 194230 }, { "epoch": 1277.8947368421052, "grad_norm": 0.841152012348175, "learning_rate": 0.0001, "loss": 0.0085, "step": 194240 }, { "epoch": 1277.9605263157894, "grad_norm": 1.026451826095581, "learning_rate": 0.0001, "loss": 0.0094, "step": 194250 }, { "epoch": 1278.0263157894738, "grad_norm": 0.8263790011405945, "learning_rate": 0.0001, "loss": 0.008, "step": 194260 }, { "epoch": 1278.092105263158, "grad_norm": 0.8429759740829468, "learning_rate": 0.0001, "loss": 0.0077, "step": 194270 }, { "epoch": 1278.157894736842, "grad_norm": 0.9431121945381165, "learning_rate": 0.0001, "loss": 0.0085, "step": 194280 }, { "epoch": 1278.2236842105262, "grad_norm": 0.8703896999359131, "learning_rate": 0.0001, "loss": 0.0101, "step": 194290 }, { "epoch": 1278.2894736842106, "grad_norm": 0.7083398699760437, "learning_rate": 0.0001, "loss": 0.0102, "step": 194300 }, { "epoch": 1278.3552631578948, "grad_norm": 1.1701858043670654, "learning_rate": 0.0001, "loss": 0.0102, "step": 194310 }, { "epoch": 1278.421052631579, "grad_norm": 1.2647616863250732, "learning_rate": 0.0001, "loss": 0.0077, "step": 194320 }, { "epoch": 1278.4868421052631, "grad_norm": 0.9830572009086609, "learning_rate": 0.0001, "loss": 0.0108, "step": 194330 }, { "epoch": 1278.5526315789473, "grad_norm": 0.9235027432441711, "learning_rate": 0.0001, "loss": 0.0099, "step": 194340 }, { "epoch": 1278.6184210526317, "grad_norm": 0.9852763414382935, "learning_rate": 0.0001, "loss": 0.0081, "step": 194350 }, { "epoch": 1278.6842105263158, "grad_norm": 0.9919136762619019, "learning_rate": 0.0001, "loss": 0.0078, "step": 194360 }, { "epoch": 1278.75, "grad_norm": 0.9679889678955078, "learning_rate": 0.0001, "loss": 0.0095, "step": 194370 }, { "epoch": 1278.8157894736842, "grad_norm": 1.1155283451080322, "learning_rate": 0.0001, "loss": 0.0105, "step": 194380 }, { "epoch": 1278.8815789473683, "grad_norm": 1.1698013544082642, "learning_rate": 0.0001, "loss": 0.0095, "step": 194390 }, { "epoch": 1278.9473684210527, "grad_norm": 1.2240992784500122, "learning_rate": 0.0001, "loss": 0.0081, "step": 194400 }, { "epoch": 1279.0131578947369, "grad_norm": 1.3296053409576416, "learning_rate": 0.0001, "loss": 0.0095, "step": 194410 }, { "epoch": 1279.078947368421, "grad_norm": 1.4155553579330444, "learning_rate": 0.0001, "loss": 0.0087, "step": 194420 }, { "epoch": 1279.1447368421052, "grad_norm": 1.3145627975463867, "learning_rate": 0.0001, "loss": 0.0095, "step": 194430 }, { "epoch": 1279.2105263157894, "grad_norm": 1.4205524921417236, "learning_rate": 0.0001, "loss": 0.009, "step": 194440 }, { "epoch": 1279.2763157894738, "grad_norm": 0.8720544576644897, "learning_rate": 0.0001, "loss": 0.0078, "step": 194450 }, { "epoch": 1279.342105263158, "grad_norm": 0.9181710481643677, "learning_rate": 0.0001, "loss": 0.008, "step": 194460 }, { "epoch": 1279.407894736842, "grad_norm": 1.1017323732376099, "learning_rate": 0.0001, "loss": 0.0104, "step": 194470 }, { "epoch": 1279.4736842105262, "grad_norm": 0.9298954606056213, "learning_rate": 0.0001, "loss": 0.009, "step": 194480 }, { "epoch": 1279.5394736842106, "grad_norm": 0.9003967046737671, "learning_rate": 0.0001, "loss": 0.0092, "step": 194490 }, { "epoch": 1279.6052631578948, "grad_norm": 1.0150399208068848, "learning_rate": 0.0001, "loss": 0.0097, "step": 194500 }, { "epoch": 1279.671052631579, "grad_norm": 0.9780325889587402, "learning_rate": 0.0001, "loss": 0.0078, "step": 194510 }, { "epoch": 1279.7368421052631, "grad_norm": 0.9293487668037415, "learning_rate": 0.0001, "loss": 0.0089, "step": 194520 }, { "epoch": 1279.8026315789473, "grad_norm": 0.82585608959198, "learning_rate": 0.0001, "loss": 0.0098, "step": 194530 }, { "epoch": 1279.8684210526317, "grad_norm": 0.9033555388450623, "learning_rate": 0.0001, "loss": 0.0104, "step": 194540 }, { "epoch": 1279.9342105263158, "grad_norm": 1.1674561500549316, "learning_rate": 0.0001, "loss": 0.009, "step": 194550 }, { "epoch": 1280.0, "grad_norm": 0.9872167110443115, "learning_rate": 0.0001, "loss": 0.0076, "step": 194560 }, { "epoch": 1280.0657894736842, "grad_norm": 0.7526688575744629, "learning_rate": 0.0001, "loss": 0.0078, "step": 194570 }, { "epoch": 1280.1315789473683, "grad_norm": 1.100469708442688, "learning_rate": 0.0001, "loss": 0.0089, "step": 194580 }, { "epoch": 1280.1973684210527, "grad_norm": 1.1157853603363037, "learning_rate": 0.0001, "loss": 0.009, "step": 194590 }, { "epoch": 1280.2631578947369, "grad_norm": 1.3560817241668701, "learning_rate": 0.0001, "loss": 0.0086, "step": 194600 }, { "epoch": 1280.328947368421, "grad_norm": 1.132397174835205, "learning_rate": 0.0001, "loss": 0.0093, "step": 194610 }, { "epoch": 1280.3947368421052, "grad_norm": 1.0123302936553955, "learning_rate": 0.0001, "loss": 0.0093, "step": 194620 }, { "epoch": 1280.4605263157894, "grad_norm": 0.9225643873214722, "learning_rate": 0.0001, "loss": 0.0101, "step": 194630 }, { "epoch": 1280.5263157894738, "grad_norm": 0.7562076449394226, "learning_rate": 0.0001, "loss": 0.0087, "step": 194640 }, { "epoch": 1280.592105263158, "grad_norm": 0.5493844747543335, "learning_rate": 0.0001, "loss": 0.0078, "step": 194650 }, { "epoch": 1280.657894736842, "grad_norm": 0.9773281216621399, "learning_rate": 0.0001, "loss": 0.01, "step": 194660 }, { "epoch": 1280.7236842105262, "grad_norm": 0.8132543563842773, "learning_rate": 0.0001, "loss": 0.0102, "step": 194670 }, { "epoch": 1280.7894736842106, "grad_norm": 1.0290168523788452, "learning_rate": 0.0001, "loss": 0.0081, "step": 194680 }, { "epoch": 1280.8552631578948, "grad_norm": 1.0300383567810059, "learning_rate": 0.0001, "loss": 0.0103, "step": 194690 }, { "epoch": 1280.921052631579, "grad_norm": 0.831333339214325, "learning_rate": 0.0001, "loss": 0.0082, "step": 194700 }, { "epoch": 1280.9868421052631, "grad_norm": 1.1776654720306396, "learning_rate": 0.0001, "loss": 0.0088, "step": 194710 }, { "epoch": 1281.0526315789473, "grad_norm": 1.3399312496185303, "learning_rate": 0.0001, "loss": 0.0099, "step": 194720 }, { "epoch": 1281.1184210526317, "grad_norm": 1.3882694244384766, "learning_rate": 0.0001, "loss": 0.009, "step": 194730 }, { "epoch": 1281.1842105263158, "grad_norm": 1.010141372680664, "learning_rate": 0.0001, "loss": 0.0078, "step": 194740 }, { "epoch": 1281.25, "grad_norm": 1.1796314716339111, "learning_rate": 0.0001, "loss": 0.0096, "step": 194750 }, { "epoch": 1281.3157894736842, "grad_norm": 0.9938530325889587, "learning_rate": 0.0001, "loss": 0.0088, "step": 194760 }, { "epoch": 1281.3815789473683, "grad_norm": 0.8807727098464966, "learning_rate": 0.0001, "loss": 0.0084, "step": 194770 }, { "epoch": 1281.4473684210527, "grad_norm": 0.9626410603523254, "learning_rate": 0.0001, "loss": 0.0092, "step": 194780 }, { "epoch": 1281.5131578947369, "grad_norm": 0.9949114322662354, "learning_rate": 0.0001, "loss": 0.0106, "step": 194790 }, { "epoch": 1281.578947368421, "grad_norm": 0.6117430925369263, "learning_rate": 0.0001, "loss": 0.0084, "step": 194800 }, { "epoch": 1281.6447368421052, "grad_norm": 0.8383743762969971, "learning_rate": 0.0001, "loss": 0.0094, "step": 194810 }, { "epoch": 1281.7105263157894, "grad_norm": 1.091646432876587, "learning_rate": 0.0001, "loss": 0.01, "step": 194820 }, { "epoch": 1281.7763157894738, "grad_norm": 0.7561153769493103, "learning_rate": 0.0001, "loss": 0.0078, "step": 194830 }, { "epoch": 1281.842105263158, "grad_norm": 0.7222174406051636, "learning_rate": 0.0001, "loss": 0.0083, "step": 194840 }, { "epoch": 1281.907894736842, "grad_norm": 0.7637395262718201, "learning_rate": 0.0001, "loss": 0.0092, "step": 194850 }, { "epoch": 1281.9736842105262, "grad_norm": 1.0813342332839966, "learning_rate": 0.0001, "loss": 0.0092, "step": 194860 }, { "epoch": 1282.0394736842106, "grad_norm": 1.0132391452789307, "learning_rate": 0.0001, "loss": 0.0086, "step": 194870 }, { "epoch": 1282.1052631578948, "grad_norm": 0.9855432510375977, "learning_rate": 0.0001, "loss": 0.0082, "step": 194880 }, { "epoch": 1282.171052631579, "grad_norm": 1.1103159189224243, "learning_rate": 0.0001, "loss": 0.0103, "step": 194890 }, { "epoch": 1282.2368421052631, "grad_norm": 1.3338035345077515, "learning_rate": 0.0001, "loss": 0.0094, "step": 194900 }, { "epoch": 1282.3026315789473, "grad_norm": 1.1458650827407837, "learning_rate": 0.0001, "loss": 0.0085, "step": 194910 }, { "epoch": 1282.3684210526317, "grad_norm": 1.2011997699737549, "learning_rate": 0.0001, "loss": 0.0104, "step": 194920 }, { "epoch": 1282.4342105263158, "grad_norm": 0.8735988736152649, "learning_rate": 0.0001, "loss": 0.0089, "step": 194930 }, { "epoch": 1282.5, "grad_norm": 0.8202658295631409, "learning_rate": 0.0001, "loss": 0.0085, "step": 194940 }, { "epoch": 1282.5657894736842, "grad_norm": 1.0125209093093872, "learning_rate": 0.0001, "loss": 0.0105, "step": 194950 }, { "epoch": 1282.6315789473683, "grad_norm": 1.1227607727050781, "learning_rate": 0.0001, "loss": 0.0094, "step": 194960 }, { "epoch": 1282.6973684210527, "grad_norm": 1.05240797996521, "learning_rate": 0.0001, "loss": 0.0088, "step": 194970 }, { "epoch": 1282.7631578947369, "grad_norm": 1.0156041383743286, "learning_rate": 0.0001, "loss": 0.0093, "step": 194980 }, { "epoch": 1282.828947368421, "grad_norm": 1.0174468755722046, "learning_rate": 0.0001, "loss": 0.0103, "step": 194990 }, { "epoch": 1282.8947368421052, "grad_norm": 1.300093412399292, "learning_rate": 0.0001, "loss": 0.008, "step": 195000 }, { "epoch": 1282.9605263157894, "grad_norm": 1.298466444015503, "learning_rate": 0.0001, "loss": 0.0092, "step": 195010 }, { "epoch": 1283.0263157894738, "grad_norm": 1.0919368267059326, "learning_rate": 0.0001, "loss": 0.0098, "step": 195020 }, { "epoch": 1283.092105263158, "grad_norm": 1.0547558069229126, "learning_rate": 0.0001, "loss": 0.0105, "step": 195030 }, { "epoch": 1283.157894736842, "grad_norm": 1.1440446376800537, "learning_rate": 0.0001, "loss": 0.0099, "step": 195040 }, { "epoch": 1283.2236842105262, "grad_norm": 0.7482719421386719, "learning_rate": 0.0001, "loss": 0.0108, "step": 195050 }, { "epoch": 1283.2894736842106, "grad_norm": 1.0653194189071655, "learning_rate": 0.0001, "loss": 0.0097, "step": 195060 }, { "epoch": 1283.3552631578948, "grad_norm": 1.172141432762146, "learning_rate": 0.0001, "loss": 0.0079, "step": 195070 }, { "epoch": 1283.421052631579, "grad_norm": 0.7871286273002625, "learning_rate": 0.0001, "loss": 0.0072, "step": 195080 }, { "epoch": 1283.4868421052631, "grad_norm": 0.9559378027915955, "learning_rate": 0.0001, "loss": 0.0078, "step": 195090 }, { "epoch": 1283.5526315789473, "grad_norm": 1.331890344619751, "learning_rate": 0.0001, "loss": 0.0096, "step": 195100 }, { "epoch": 1283.6184210526317, "grad_norm": 0.9049778580665588, "learning_rate": 0.0001, "loss": 0.0087, "step": 195110 }, { "epoch": 1283.6842105263158, "grad_norm": 0.6896979808807373, "learning_rate": 0.0001, "loss": 0.0081, "step": 195120 }, { "epoch": 1283.75, "grad_norm": 0.6551532745361328, "learning_rate": 0.0001, "loss": 0.0085, "step": 195130 }, { "epoch": 1283.8157894736842, "grad_norm": 0.8988133072853088, "learning_rate": 0.0001, "loss": 0.0096, "step": 195140 }, { "epoch": 1283.8815789473683, "grad_norm": 0.7520022392272949, "learning_rate": 0.0001, "loss": 0.0084, "step": 195150 }, { "epoch": 1283.9473684210527, "grad_norm": 1.120882511138916, "learning_rate": 0.0001, "loss": 0.0096, "step": 195160 }, { "epoch": 1284.0131578947369, "grad_norm": 0.8684665560722351, "learning_rate": 0.0001, "loss": 0.0077, "step": 195170 }, { "epoch": 1284.078947368421, "grad_norm": 0.9502143859863281, "learning_rate": 0.0001, "loss": 0.0086, "step": 195180 }, { "epoch": 1284.1447368421052, "grad_norm": 1.1483829021453857, "learning_rate": 0.0001, "loss": 0.011, "step": 195190 }, { "epoch": 1284.2105263157894, "grad_norm": 1.1794594526290894, "learning_rate": 0.0001, "loss": 0.0085, "step": 195200 }, { "epoch": 1284.2763157894738, "grad_norm": 0.681542694568634, "learning_rate": 0.0001, "loss": 0.0089, "step": 195210 }, { "epoch": 1284.342105263158, "grad_norm": 0.8862521648406982, "learning_rate": 0.0001, "loss": 0.0088, "step": 195220 }, { "epoch": 1284.407894736842, "grad_norm": 0.9744770526885986, "learning_rate": 0.0001, "loss": 0.0069, "step": 195230 }, { "epoch": 1284.4736842105262, "grad_norm": 0.7451329827308655, "learning_rate": 0.0001, "loss": 0.0084, "step": 195240 }, { "epoch": 1284.5394736842106, "grad_norm": 0.7159759402275085, "learning_rate": 0.0001, "loss": 0.0083, "step": 195250 }, { "epoch": 1284.6052631578948, "grad_norm": 1.3413786888122559, "learning_rate": 0.0001, "loss": 0.011, "step": 195260 }, { "epoch": 1284.671052631579, "grad_norm": 1.1536619663238525, "learning_rate": 0.0001, "loss": 0.0081, "step": 195270 }, { "epoch": 1284.7368421052631, "grad_norm": 0.8164981007575989, "learning_rate": 0.0001, "loss": 0.0098, "step": 195280 }, { "epoch": 1284.8026315789473, "grad_norm": 0.990378737449646, "learning_rate": 0.0001, "loss": 0.0089, "step": 195290 }, { "epoch": 1284.8684210526317, "grad_norm": 0.8975867629051208, "learning_rate": 0.0001, "loss": 0.0094, "step": 195300 }, { "epoch": 1284.9342105263158, "grad_norm": 0.9112573862075806, "learning_rate": 0.0001, "loss": 0.0093, "step": 195310 }, { "epoch": 1285.0, "grad_norm": 0.8136677145957947, "learning_rate": 0.0001, "loss": 0.01, "step": 195320 }, { "epoch": 1285.0657894736842, "grad_norm": 0.9761696457862854, "learning_rate": 0.0001, "loss": 0.009, "step": 195330 }, { "epoch": 1285.1315789473683, "grad_norm": 0.5923354029655457, "learning_rate": 0.0001, "loss": 0.0082, "step": 195340 }, { "epoch": 1285.1973684210527, "grad_norm": 0.8915498852729797, "learning_rate": 0.0001, "loss": 0.0085, "step": 195350 }, { "epoch": 1285.2631578947369, "grad_norm": 1.0586600303649902, "learning_rate": 0.0001, "loss": 0.0096, "step": 195360 }, { "epoch": 1285.328947368421, "grad_norm": 1.0132033824920654, "learning_rate": 0.0001, "loss": 0.012, "step": 195370 }, { "epoch": 1285.3947368421052, "grad_norm": 1.0920168161392212, "learning_rate": 0.0001, "loss": 0.0089, "step": 195380 }, { "epoch": 1285.4605263157894, "grad_norm": 0.8626097440719604, "learning_rate": 0.0001, "loss": 0.0092, "step": 195390 }, { "epoch": 1285.5263157894738, "grad_norm": 0.9792760014533997, "learning_rate": 0.0001, "loss": 0.0086, "step": 195400 }, { "epoch": 1285.592105263158, "grad_norm": 1.0166680812835693, "learning_rate": 0.0001, "loss": 0.0103, "step": 195410 }, { "epoch": 1285.657894736842, "grad_norm": 0.970467209815979, "learning_rate": 0.0001, "loss": 0.0096, "step": 195420 }, { "epoch": 1285.7236842105262, "grad_norm": 1.0889147520065308, "learning_rate": 0.0001, "loss": 0.0101, "step": 195430 }, { "epoch": 1285.7894736842106, "grad_norm": 0.7250308394432068, "learning_rate": 0.0001, "loss": 0.0107, "step": 195440 }, { "epoch": 1285.8552631578948, "grad_norm": 0.8987478613853455, "learning_rate": 0.0001, "loss": 0.009, "step": 195450 }, { "epoch": 1285.921052631579, "grad_norm": 0.7027731537818909, "learning_rate": 0.0001, "loss": 0.0105, "step": 195460 }, { "epoch": 1285.9868421052631, "grad_norm": 1.0836803913116455, "learning_rate": 0.0001, "loss": 0.0109, "step": 195470 }, { "epoch": 1286.0526315789473, "grad_norm": 0.9599834084510803, "learning_rate": 0.0001, "loss": 0.0096, "step": 195480 }, { "epoch": 1286.1184210526317, "grad_norm": 1.119584083557129, "learning_rate": 0.0001, "loss": 0.0113, "step": 195490 }, { "epoch": 1286.1842105263158, "grad_norm": 1.0971158742904663, "learning_rate": 0.0001, "loss": 0.0091, "step": 195500 }, { "epoch": 1286.25, "grad_norm": 0.7488318085670471, "learning_rate": 0.0001, "loss": 0.0113, "step": 195510 }, { "epoch": 1286.3157894736842, "grad_norm": 0.7044021487236023, "learning_rate": 0.0001, "loss": 0.0077, "step": 195520 }, { "epoch": 1286.3815789473683, "grad_norm": 1.1740487813949585, "learning_rate": 0.0001, "loss": 0.0113, "step": 195530 }, { "epoch": 1286.4473684210527, "grad_norm": 1.206044316291809, "learning_rate": 0.0001, "loss": 0.009, "step": 195540 }, { "epoch": 1286.5131578947369, "grad_norm": 0.9043120741844177, "learning_rate": 0.0001, "loss": 0.011, "step": 195550 }, { "epoch": 1286.578947368421, "grad_norm": 1.060771107673645, "learning_rate": 0.0001, "loss": 0.0103, "step": 195560 }, { "epoch": 1286.6447368421052, "grad_norm": 1.237007975578308, "learning_rate": 0.0001, "loss": 0.0084, "step": 195570 }, { "epoch": 1286.7105263157894, "grad_norm": 1.2525277137756348, "learning_rate": 0.0001, "loss": 0.0084, "step": 195580 }, { "epoch": 1286.7763157894738, "grad_norm": 0.9757577180862427, "learning_rate": 0.0001, "loss": 0.01, "step": 195590 }, { "epoch": 1286.842105263158, "grad_norm": 0.8785386085510254, "learning_rate": 0.0001, "loss": 0.0091, "step": 195600 }, { "epoch": 1286.907894736842, "grad_norm": 1.1863728761672974, "learning_rate": 0.0001, "loss": 0.0098, "step": 195610 }, { "epoch": 1286.9736842105262, "grad_norm": 0.9649335741996765, "learning_rate": 0.0001, "loss": 0.0091, "step": 195620 }, { "epoch": 1287.0394736842106, "grad_norm": 0.8843129873275757, "learning_rate": 0.0001, "loss": 0.0091, "step": 195630 }, { "epoch": 1287.1052631578948, "grad_norm": 1.302202820777893, "learning_rate": 0.0001, "loss": 0.0108, "step": 195640 }, { "epoch": 1287.171052631579, "grad_norm": 1.2117173671722412, "learning_rate": 0.0001, "loss": 0.0108, "step": 195650 }, { "epoch": 1287.2368421052631, "grad_norm": 1.168351411819458, "learning_rate": 0.0001, "loss": 0.0088, "step": 195660 }, { "epoch": 1287.3026315789473, "grad_norm": 1.0641189813613892, "learning_rate": 0.0001, "loss": 0.0124, "step": 195670 }, { "epoch": 1287.3684210526317, "grad_norm": 1.3389551639556885, "learning_rate": 0.0001, "loss": 0.0095, "step": 195680 }, { "epoch": 1287.4342105263158, "grad_norm": 0.8608621954917908, "learning_rate": 0.0001, "loss": 0.0107, "step": 195690 }, { "epoch": 1287.5, "grad_norm": 0.9058868885040283, "learning_rate": 0.0001, "loss": 0.0095, "step": 195700 }, { "epoch": 1287.5657894736842, "grad_norm": 1.3241692781448364, "learning_rate": 0.0001, "loss": 0.0095, "step": 195710 }, { "epoch": 1287.6315789473683, "grad_norm": 1.0901117324829102, "learning_rate": 0.0001, "loss": 0.0096, "step": 195720 }, { "epoch": 1287.6973684210527, "grad_norm": 1.2744159698486328, "learning_rate": 0.0001, "loss": 0.009, "step": 195730 }, { "epoch": 1287.7631578947369, "grad_norm": 0.9215327501296997, "learning_rate": 0.0001, "loss": 0.0096, "step": 195740 }, { "epoch": 1287.828947368421, "grad_norm": 0.8423858284950256, "learning_rate": 0.0001, "loss": 0.0102, "step": 195750 }, { "epoch": 1287.8947368421052, "grad_norm": 1.16117262840271, "learning_rate": 0.0001, "loss": 0.0087, "step": 195760 }, { "epoch": 1287.9605263157894, "grad_norm": 0.973017156124115, "learning_rate": 0.0001, "loss": 0.011, "step": 195770 }, { "epoch": 1288.0263157894738, "grad_norm": 0.9248650074005127, "learning_rate": 0.0001, "loss": 0.0087, "step": 195780 }, { "epoch": 1288.092105263158, "grad_norm": 1.0818302631378174, "learning_rate": 0.0001, "loss": 0.0087, "step": 195790 }, { "epoch": 1288.157894736842, "grad_norm": 0.8217904567718506, "learning_rate": 0.0001, "loss": 0.0082, "step": 195800 }, { "epoch": 1288.2236842105262, "grad_norm": 1.021702527999878, "learning_rate": 0.0001, "loss": 0.0095, "step": 195810 }, { "epoch": 1288.2894736842106, "grad_norm": 0.8209040760993958, "learning_rate": 0.0001, "loss": 0.0102, "step": 195820 }, { "epoch": 1288.3552631578948, "grad_norm": 0.7926214933395386, "learning_rate": 0.0001, "loss": 0.009, "step": 195830 }, { "epoch": 1288.421052631579, "grad_norm": 0.9760251045227051, "learning_rate": 0.0001, "loss": 0.0089, "step": 195840 }, { "epoch": 1288.4868421052631, "grad_norm": 0.9303892850875854, "learning_rate": 0.0001, "loss": 0.0107, "step": 195850 }, { "epoch": 1288.5526315789473, "grad_norm": 1.2029811143875122, "learning_rate": 0.0001, "loss": 0.0106, "step": 195860 }, { "epoch": 1288.6184210526317, "grad_norm": 1.0326858758926392, "learning_rate": 0.0001, "loss": 0.0089, "step": 195870 }, { "epoch": 1288.6842105263158, "grad_norm": 0.8558474779129028, "learning_rate": 0.0001, "loss": 0.0087, "step": 195880 }, { "epoch": 1288.75, "grad_norm": 0.9762908220291138, "learning_rate": 0.0001, "loss": 0.0092, "step": 195890 }, { "epoch": 1288.8157894736842, "grad_norm": 0.9533197283744812, "learning_rate": 0.0001, "loss": 0.0098, "step": 195900 }, { "epoch": 1288.8815789473683, "grad_norm": 1.0016499757766724, "learning_rate": 0.0001, "loss": 0.0099, "step": 195910 }, { "epoch": 1288.9473684210527, "grad_norm": 1.404975414276123, "learning_rate": 0.0001, "loss": 0.0075, "step": 195920 }, { "epoch": 1289.0131578947369, "grad_norm": 1.2138851881027222, "learning_rate": 0.0001, "loss": 0.0084, "step": 195930 }, { "epoch": 1289.078947368421, "grad_norm": 1.0937894582748413, "learning_rate": 0.0001, "loss": 0.0091, "step": 195940 }, { "epoch": 1289.1447368421052, "grad_norm": 0.9936800599098206, "learning_rate": 0.0001, "loss": 0.009, "step": 195950 }, { "epoch": 1289.2105263157894, "grad_norm": 1.2059030532836914, "learning_rate": 0.0001, "loss": 0.0071, "step": 195960 }, { "epoch": 1289.2763157894738, "grad_norm": 1.204706072807312, "learning_rate": 0.0001, "loss": 0.0097, "step": 195970 }, { "epoch": 1289.342105263158, "grad_norm": 0.8758791089057922, "learning_rate": 0.0001, "loss": 0.0081, "step": 195980 }, { "epoch": 1289.407894736842, "grad_norm": 0.9688547849655151, "learning_rate": 0.0001, "loss": 0.01, "step": 195990 }, { "epoch": 1289.4736842105262, "grad_norm": 0.8228425979614258, "learning_rate": 0.0001, "loss": 0.0088, "step": 196000 }, { "epoch": 1289.5394736842106, "grad_norm": 0.6902533173561096, "learning_rate": 0.0001, "loss": 0.0079, "step": 196010 }, { "epoch": 1289.6052631578948, "grad_norm": 1.0823955535888672, "learning_rate": 0.0001, "loss": 0.0099, "step": 196020 }, { "epoch": 1289.671052631579, "grad_norm": 1.114900827407837, "learning_rate": 0.0001, "loss": 0.0085, "step": 196030 }, { "epoch": 1289.7368421052631, "grad_norm": 1.076406717300415, "learning_rate": 0.0001, "loss": 0.01, "step": 196040 }, { "epoch": 1289.8026315789473, "grad_norm": 0.841337263584137, "learning_rate": 0.0001, "loss": 0.0091, "step": 196050 }, { "epoch": 1289.8684210526317, "grad_norm": 0.953133761882782, "learning_rate": 0.0001, "loss": 0.0086, "step": 196060 }, { "epoch": 1289.9342105263158, "grad_norm": 0.9329866766929626, "learning_rate": 0.0001, "loss": 0.008, "step": 196070 }, { "epoch": 1290.0, "grad_norm": 1.4121922254562378, "learning_rate": 0.0001, "loss": 0.0099, "step": 196080 }, { "epoch": 1290.0657894736842, "grad_norm": 1.1717429161071777, "learning_rate": 0.0001, "loss": 0.0082, "step": 196090 }, { "epoch": 1290.1315789473683, "grad_norm": 1.0481109619140625, "learning_rate": 0.0001, "loss": 0.0087, "step": 196100 }, { "epoch": 1290.1973684210527, "grad_norm": 1.3172156810760498, "learning_rate": 0.0001, "loss": 0.0103, "step": 196110 }, { "epoch": 1290.2631578947369, "grad_norm": 1.1237008571624756, "learning_rate": 0.0001, "loss": 0.0106, "step": 196120 }, { "epoch": 1290.328947368421, "grad_norm": 1.2435554265975952, "learning_rate": 0.0001, "loss": 0.0098, "step": 196130 }, { "epoch": 1290.3947368421052, "grad_norm": 0.9842655062675476, "learning_rate": 0.0001, "loss": 0.008, "step": 196140 }, { "epoch": 1290.4605263157894, "grad_norm": 0.9428750276565552, "learning_rate": 0.0001, "loss": 0.0068, "step": 196150 }, { "epoch": 1290.5263157894738, "grad_norm": 0.8579443693161011, "learning_rate": 0.0001, "loss": 0.0096, "step": 196160 }, { "epoch": 1290.592105263158, "grad_norm": 0.9661730527877808, "learning_rate": 0.0001, "loss": 0.008, "step": 196170 }, { "epoch": 1290.657894736842, "grad_norm": 1.2594717741012573, "learning_rate": 0.0001, "loss": 0.0093, "step": 196180 }, { "epoch": 1290.7236842105262, "grad_norm": 1.2238619327545166, "learning_rate": 0.0001, "loss": 0.0071, "step": 196190 }, { "epoch": 1290.7894736842106, "grad_norm": 1.045121431350708, "learning_rate": 0.0001, "loss": 0.0103, "step": 196200 }, { "epoch": 1290.8552631578948, "grad_norm": 1.2637920379638672, "learning_rate": 0.0001, "loss": 0.0105, "step": 196210 }, { "epoch": 1290.921052631579, "grad_norm": 1.0736345052719116, "learning_rate": 0.0001, "loss": 0.0068, "step": 196220 }, { "epoch": 1290.9868421052631, "grad_norm": 1.1902934312820435, "learning_rate": 0.0001, "loss": 0.0084, "step": 196230 }, { "epoch": 1291.0526315789473, "grad_norm": 1.206912875175476, "learning_rate": 0.0001, "loss": 0.0086, "step": 196240 }, { "epoch": 1291.1184210526317, "grad_norm": 0.6837294697761536, "learning_rate": 0.0001, "loss": 0.0093, "step": 196250 }, { "epoch": 1291.1842105263158, "grad_norm": 0.8661401271820068, "learning_rate": 0.0001, "loss": 0.0076, "step": 196260 }, { "epoch": 1291.25, "grad_norm": 0.7421029210090637, "learning_rate": 0.0001, "loss": 0.0079, "step": 196270 }, { "epoch": 1291.3157894736842, "grad_norm": 1.000644326210022, "learning_rate": 0.0001, "loss": 0.0101, "step": 196280 }, { "epoch": 1291.3815789473683, "grad_norm": 0.9609134793281555, "learning_rate": 0.0001, "loss": 0.0082, "step": 196290 }, { "epoch": 1291.4473684210527, "grad_norm": 0.8988838791847229, "learning_rate": 0.0001, "loss": 0.0089, "step": 196300 }, { "epoch": 1291.5131578947369, "grad_norm": 1.2548774480819702, "learning_rate": 0.0001, "loss": 0.008, "step": 196310 }, { "epoch": 1291.578947368421, "grad_norm": 1.2236343622207642, "learning_rate": 0.0001, "loss": 0.009, "step": 196320 }, { "epoch": 1291.6447368421052, "grad_norm": 0.982333242893219, "learning_rate": 0.0001, "loss": 0.0097, "step": 196330 }, { "epoch": 1291.7105263157894, "grad_norm": 1.1261245012283325, "learning_rate": 0.0001, "loss": 0.0086, "step": 196340 }, { "epoch": 1291.7763157894738, "grad_norm": 1.1063497066497803, "learning_rate": 0.0001, "loss": 0.0095, "step": 196350 }, { "epoch": 1291.842105263158, "grad_norm": 0.8248695731163025, "learning_rate": 0.0001, "loss": 0.0108, "step": 196360 }, { "epoch": 1291.907894736842, "grad_norm": 1.017749309539795, "learning_rate": 0.0001, "loss": 0.0099, "step": 196370 }, { "epoch": 1291.9736842105262, "grad_norm": 0.7296943664550781, "learning_rate": 0.0001, "loss": 0.0082, "step": 196380 }, { "epoch": 1292.0394736842106, "grad_norm": 0.9445720314979553, "learning_rate": 0.0001, "loss": 0.0071, "step": 196390 }, { "epoch": 1292.1052631578948, "grad_norm": 2.0632238388061523, "learning_rate": 0.0001, "loss": 0.0079, "step": 196400 }, { "epoch": 1292.171052631579, "grad_norm": 1.4882488250732422, "learning_rate": 0.0001, "loss": 0.0075, "step": 196410 }, { "epoch": 1292.2368421052631, "grad_norm": 1.1946419477462769, "learning_rate": 0.0001, "loss": 0.0116, "step": 196420 }, { "epoch": 1292.3026315789473, "grad_norm": 1.3664677143096924, "learning_rate": 0.0001, "loss": 0.0098, "step": 196430 }, { "epoch": 1292.3684210526317, "grad_norm": 1.171695590019226, "learning_rate": 0.0001, "loss": 0.0092, "step": 196440 }, { "epoch": 1292.4342105263158, "grad_norm": 1.2387498617172241, "learning_rate": 0.0001, "loss": 0.0084, "step": 196450 }, { "epoch": 1292.5, "grad_norm": 1.0718955993652344, "learning_rate": 0.0001, "loss": 0.0097, "step": 196460 }, { "epoch": 1292.5657894736842, "grad_norm": 1.0958354473114014, "learning_rate": 0.0001, "loss": 0.0088, "step": 196470 }, { "epoch": 1292.6315789473683, "grad_norm": 1.5984266996383667, "learning_rate": 0.0001, "loss": 0.0075, "step": 196480 }, { "epoch": 1292.6973684210527, "grad_norm": 1.1815001964569092, "learning_rate": 0.0001, "loss": 0.0114, "step": 196490 }, { "epoch": 1292.7631578947369, "grad_norm": 1.5802987813949585, "learning_rate": 0.0001, "loss": 0.0096, "step": 196500 }, { "epoch": 1292.828947368421, "grad_norm": 1.259151816368103, "learning_rate": 0.0001, "loss": 0.009, "step": 196510 }, { "epoch": 1292.8947368421052, "grad_norm": 1.411533236503601, "learning_rate": 0.0001, "loss": 0.0074, "step": 196520 }, { "epoch": 1292.9605263157894, "grad_norm": 0.8868615031242371, "learning_rate": 0.0001, "loss": 0.0099, "step": 196530 }, { "epoch": 1293.0263157894738, "grad_norm": 0.9721224904060364, "learning_rate": 0.0001, "loss": 0.0097, "step": 196540 }, { "epoch": 1293.092105263158, "grad_norm": 1.0972973108291626, "learning_rate": 0.0001, "loss": 0.0082, "step": 196550 }, { "epoch": 1293.157894736842, "grad_norm": 0.9356346130371094, "learning_rate": 0.0001, "loss": 0.0094, "step": 196560 }, { "epoch": 1293.2236842105262, "grad_norm": 1.2877929210662842, "learning_rate": 0.0001, "loss": 0.0104, "step": 196570 }, { "epoch": 1293.2894736842106, "grad_norm": 0.9061616659164429, "learning_rate": 0.0001, "loss": 0.0093, "step": 196580 }, { "epoch": 1293.3552631578948, "grad_norm": 1.1051673889160156, "learning_rate": 0.0001, "loss": 0.0089, "step": 196590 }, { "epoch": 1293.421052631579, "grad_norm": 1.0051326751708984, "learning_rate": 0.0001, "loss": 0.0088, "step": 196600 }, { "epoch": 1293.4868421052631, "grad_norm": 1.1201657056808472, "learning_rate": 0.0001, "loss": 0.0076, "step": 196610 }, { "epoch": 1293.5526315789473, "grad_norm": 0.9499081969261169, "learning_rate": 0.0001, "loss": 0.0097, "step": 196620 }, { "epoch": 1293.6184210526317, "grad_norm": 0.87836092710495, "learning_rate": 0.0001, "loss": 0.0086, "step": 196630 }, { "epoch": 1293.6842105263158, "grad_norm": 0.5928868651390076, "learning_rate": 0.0001, "loss": 0.0095, "step": 196640 }, { "epoch": 1293.75, "grad_norm": 0.7514234781265259, "learning_rate": 0.0001, "loss": 0.0083, "step": 196650 }, { "epoch": 1293.8157894736842, "grad_norm": 0.8907245397567749, "learning_rate": 0.0001, "loss": 0.0091, "step": 196660 }, { "epoch": 1293.8815789473683, "grad_norm": 0.7311668395996094, "learning_rate": 0.0001, "loss": 0.0114, "step": 196670 }, { "epoch": 1293.9473684210527, "grad_norm": 1.2829216718673706, "learning_rate": 0.0001, "loss": 0.0084, "step": 196680 }, { "epoch": 1294.0131578947369, "grad_norm": 0.797347366809845, "learning_rate": 0.0001, "loss": 0.0082, "step": 196690 }, { "epoch": 1294.078947368421, "grad_norm": 1.1499863862991333, "learning_rate": 0.0001, "loss": 0.0069, "step": 196700 }, { "epoch": 1294.1447368421052, "grad_norm": 1.033013939857483, "learning_rate": 0.0001, "loss": 0.0092, "step": 196710 }, { "epoch": 1294.2105263157894, "grad_norm": 0.9540650248527527, "learning_rate": 0.0001, "loss": 0.0114, "step": 196720 }, { "epoch": 1294.2763157894738, "grad_norm": 1.2450796365737915, "learning_rate": 0.0001, "loss": 0.0081, "step": 196730 }, { "epoch": 1294.342105263158, "grad_norm": 0.8691481947898865, "learning_rate": 0.0001, "loss": 0.0095, "step": 196740 }, { "epoch": 1294.407894736842, "grad_norm": 1.0715320110321045, "learning_rate": 0.0001, "loss": 0.0099, "step": 196750 }, { "epoch": 1294.4736842105262, "grad_norm": 1.0476354360580444, "learning_rate": 0.0001, "loss": 0.0096, "step": 196760 }, { "epoch": 1294.5394736842106, "grad_norm": 0.9503270983695984, "learning_rate": 0.0001, "loss": 0.0112, "step": 196770 }, { "epoch": 1294.6052631578948, "grad_norm": 1.1776961088180542, "learning_rate": 0.0001, "loss": 0.0091, "step": 196780 }, { "epoch": 1294.671052631579, "grad_norm": 0.6950992345809937, "learning_rate": 0.0001, "loss": 0.0088, "step": 196790 }, { "epoch": 1294.7368421052631, "grad_norm": 1.3217885494232178, "learning_rate": 0.0001, "loss": 0.0071, "step": 196800 }, { "epoch": 1294.8026315789473, "grad_norm": 0.9721072912216187, "learning_rate": 0.0001, "loss": 0.0073, "step": 196810 }, { "epoch": 1294.8684210526317, "grad_norm": 0.996015191078186, "learning_rate": 0.0001, "loss": 0.0072, "step": 196820 }, { "epoch": 1294.9342105263158, "grad_norm": 1.1851248741149902, "learning_rate": 0.0001, "loss": 0.0093, "step": 196830 }, { "epoch": 1295.0, "grad_norm": 1.0879162549972534, "learning_rate": 0.0001, "loss": 0.0094, "step": 196840 }, { "epoch": 1295.0657894736842, "grad_norm": 0.9492610692977905, "learning_rate": 0.0001, "loss": 0.0101, "step": 196850 }, { "epoch": 1295.1315789473683, "grad_norm": 1.0667566061019897, "learning_rate": 0.0001, "loss": 0.0077, "step": 196860 }, { "epoch": 1295.1973684210527, "grad_norm": 0.8629001975059509, "learning_rate": 0.0001, "loss": 0.0098, "step": 196870 }, { "epoch": 1295.2631578947369, "grad_norm": 0.9111774563789368, "learning_rate": 0.0001, "loss": 0.0092, "step": 196880 }, { "epoch": 1295.328947368421, "grad_norm": 0.9569632411003113, "learning_rate": 0.0001, "loss": 0.0112, "step": 196890 }, { "epoch": 1295.3947368421052, "grad_norm": 0.8886310458183289, "learning_rate": 0.0001, "loss": 0.0083, "step": 196900 }, { "epoch": 1295.4605263157894, "grad_norm": 1.119640588760376, "learning_rate": 0.0001, "loss": 0.0083, "step": 196910 }, { "epoch": 1295.5263157894738, "grad_norm": 0.7103597521781921, "learning_rate": 0.0001, "loss": 0.0098, "step": 196920 }, { "epoch": 1295.592105263158, "grad_norm": 1.0032562017440796, "learning_rate": 0.0001, "loss": 0.007, "step": 196930 }, { "epoch": 1295.657894736842, "grad_norm": 0.6882836818695068, "learning_rate": 0.0001, "loss": 0.0065, "step": 196940 }, { "epoch": 1295.7236842105262, "grad_norm": 1.1084208488464355, "learning_rate": 0.0001, "loss": 0.0073, "step": 196950 }, { "epoch": 1295.7894736842106, "grad_norm": 1.101213812828064, "learning_rate": 0.0001, "loss": 0.0109, "step": 196960 }, { "epoch": 1295.8552631578948, "grad_norm": 1.2237820625305176, "learning_rate": 0.0001, "loss": 0.0108, "step": 196970 }, { "epoch": 1295.921052631579, "grad_norm": 1.164002537727356, "learning_rate": 0.0001, "loss": 0.0087, "step": 196980 }, { "epoch": 1295.9868421052631, "grad_norm": 0.5948657989501953, "learning_rate": 0.0001, "loss": 0.0087, "step": 196990 }, { "epoch": 1296.0526315789473, "grad_norm": 0.8001142144203186, "learning_rate": 0.0001, "loss": 0.0073, "step": 197000 }, { "epoch": 1296.1184210526317, "grad_norm": 0.9679588675498962, "learning_rate": 0.0001, "loss": 0.0093, "step": 197010 }, { "epoch": 1296.1842105263158, "grad_norm": 0.7849892377853394, "learning_rate": 0.0001, "loss": 0.0087, "step": 197020 }, { "epoch": 1296.25, "grad_norm": 0.9673242568969727, "learning_rate": 0.0001, "loss": 0.0095, "step": 197030 }, { "epoch": 1296.3157894736842, "grad_norm": 0.968097984790802, "learning_rate": 0.0001, "loss": 0.0079, "step": 197040 }, { "epoch": 1296.3815789473683, "grad_norm": 0.8999632596969604, "learning_rate": 0.0001, "loss": 0.0077, "step": 197050 }, { "epoch": 1296.4473684210527, "grad_norm": 0.7191337943077087, "learning_rate": 0.0001, "loss": 0.0106, "step": 197060 }, { "epoch": 1296.5131578947369, "grad_norm": 1.1006282567977905, "learning_rate": 0.0001, "loss": 0.009, "step": 197070 }, { "epoch": 1296.578947368421, "grad_norm": 1.0084878206253052, "learning_rate": 0.0001, "loss": 0.0088, "step": 197080 }, { "epoch": 1296.6447368421052, "grad_norm": 0.9073280096054077, "learning_rate": 0.0001, "loss": 0.0103, "step": 197090 }, { "epoch": 1296.7105263157894, "grad_norm": 0.8795638680458069, "learning_rate": 0.0001, "loss": 0.0099, "step": 197100 }, { "epoch": 1296.7763157894738, "grad_norm": 1.0686129331588745, "learning_rate": 0.0001, "loss": 0.0087, "step": 197110 }, { "epoch": 1296.842105263158, "grad_norm": 0.8052754402160645, "learning_rate": 0.0001, "loss": 0.0098, "step": 197120 }, { "epoch": 1296.907894736842, "grad_norm": 0.8181116580963135, "learning_rate": 0.0001, "loss": 0.0092, "step": 197130 }, { "epoch": 1296.9736842105262, "grad_norm": 1.0895122289657593, "learning_rate": 0.0001, "loss": 0.0074, "step": 197140 }, { "epoch": 1297.0394736842106, "grad_norm": 0.7319552898406982, "learning_rate": 0.0001, "loss": 0.0096, "step": 197150 }, { "epoch": 1297.1052631578948, "grad_norm": 1.2623764276504517, "learning_rate": 0.0001, "loss": 0.0082, "step": 197160 }, { "epoch": 1297.171052631579, "grad_norm": 1.238411784172058, "learning_rate": 0.0001, "loss": 0.0097, "step": 197170 }, { "epoch": 1297.2368421052631, "grad_norm": 1.0991114377975464, "learning_rate": 0.0001, "loss": 0.0108, "step": 197180 }, { "epoch": 1297.3026315789473, "grad_norm": 0.8547118306159973, "learning_rate": 0.0001, "loss": 0.0078, "step": 197190 }, { "epoch": 1297.3684210526317, "grad_norm": 1.1389788389205933, "learning_rate": 0.0001, "loss": 0.0079, "step": 197200 }, { "epoch": 1297.4342105263158, "grad_norm": 1.2709906101226807, "learning_rate": 0.0001, "loss": 0.0106, "step": 197210 }, { "epoch": 1297.5, "grad_norm": 0.9706898331642151, "learning_rate": 0.0001, "loss": 0.0086, "step": 197220 }, { "epoch": 1297.5657894736842, "grad_norm": 0.8353281021118164, "learning_rate": 0.0001, "loss": 0.01, "step": 197230 }, { "epoch": 1297.6315789473683, "grad_norm": 1.0063377618789673, "learning_rate": 0.0001, "loss": 0.0097, "step": 197240 }, { "epoch": 1297.6973684210527, "grad_norm": 1.0630806684494019, "learning_rate": 0.0001, "loss": 0.0091, "step": 197250 }, { "epoch": 1297.7631578947369, "grad_norm": 0.7940020561218262, "learning_rate": 0.0001, "loss": 0.0091, "step": 197260 }, { "epoch": 1297.828947368421, "grad_norm": 1.0338921546936035, "learning_rate": 0.0001, "loss": 0.0089, "step": 197270 }, { "epoch": 1297.8947368421052, "grad_norm": 1.1591367721557617, "learning_rate": 0.0001, "loss": 0.0095, "step": 197280 }, { "epoch": 1297.9605263157894, "grad_norm": 0.9339560270309448, "learning_rate": 0.0001, "loss": 0.0084, "step": 197290 }, { "epoch": 1298.0263157894738, "grad_norm": 1.0255118608474731, "learning_rate": 0.0001, "loss": 0.0105, "step": 197300 }, { "epoch": 1298.092105263158, "grad_norm": 1.0539302825927734, "learning_rate": 0.0001, "loss": 0.0086, "step": 197310 }, { "epoch": 1298.157894736842, "grad_norm": 0.9863162636756897, "learning_rate": 0.0001, "loss": 0.0069, "step": 197320 }, { "epoch": 1298.2236842105262, "grad_norm": 1.1610512733459473, "learning_rate": 0.0001, "loss": 0.0111, "step": 197330 }, { "epoch": 1298.2894736842106, "grad_norm": 0.9269651174545288, "learning_rate": 0.0001, "loss": 0.0082, "step": 197340 }, { "epoch": 1298.3552631578948, "grad_norm": 0.8783165812492371, "learning_rate": 0.0001, "loss": 0.008, "step": 197350 }, { "epoch": 1298.421052631579, "grad_norm": 0.9454626441001892, "learning_rate": 0.0001, "loss": 0.0082, "step": 197360 }, { "epoch": 1298.4868421052631, "grad_norm": 0.9759495854377747, "learning_rate": 0.0001, "loss": 0.0086, "step": 197370 }, { "epoch": 1298.5526315789473, "grad_norm": 1.3680849075317383, "learning_rate": 0.0001, "loss": 0.0103, "step": 197380 }, { "epoch": 1298.6184210526317, "grad_norm": 1.3781981468200684, "learning_rate": 0.0001, "loss": 0.0092, "step": 197390 }, { "epoch": 1298.6842105263158, "grad_norm": 1.1003533601760864, "learning_rate": 0.0001, "loss": 0.0102, "step": 197400 }, { "epoch": 1298.75, "grad_norm": 1.1373398303985596, "learning_rate": 0.0001, "loss": 0.011, "step": 197410 }, { "epoch": 1298.8157894736842, "grad_norm": 0.7512350082397461, "learning_rate": 0.0001, "loss": 0.009, "step": 197420 }, { "epoch": 1298.8815789473683, "grad_norm": 1.035928726196289, "learning_rate": 0.0001, "loss": 0.0088, "step": 197430 }, { "epoch": 1298.9473684210527, "grad_norm": 1.0015102624893188, "learning_rate": 0.0001, "loss": 0.0099, "step": 197440 }, { "epoch": 1299.0131578947369, "grad_norm": 1.1075206995010376, "learning_rate": 0.0001, "loss": 0.0084, "step": 197450 }, { "epoch": 1299.078947368421, "grad_norm": 1.2172952890396118, "learning_rate": 0.0001, "loss": 0.0076, "step": 197460 }, { "epoch": 1299.1447368421052, "grad_norm": 1.0585336685180664, "learning_rate": 0.0001, "loss": 0.0097, "step": 197470 }, { "epoch": 1299.2105263157894, "grad_norm": 0.9066409468650818, "learning_rate": 0.0001, "loss": 0.0091, "step": 197480 }, { "epoch": 1299.2763157894738, "grad_norm": 1.2567695379257202, "learning_rate": 0.0001, "loss": 0.0076, "step": 197490 }, { "epoch": 1299.342105263158, "grad_norm": 1.0604040622711182, "learning_rate": 0.0001, "loss": 0.0101, "step": 197500 }, { "epoch": 1299.407894736842, "grad_norm": 1.0578597784042358, "learning_rate": 0.0001, "loss": 0.009, "step": 197510 }, { "epoch": 1299.4736842105262, "grad_norm": 0.9691057205200195, "learning_rate": 0.0001, "loss": 0.0098, "step": 197520 }, { "epoch": 1299.5394736842106, "grad_norm": 1.079813003540039, "learning_rate": 0.0001, "loss": 0.0097, "step": 197530 }, { "epoch": 1299.6052631578948, "grad_norm": 1.141811490058899, "learning_rate": 0.0001, "loss": 0.0073, "step": 197540 }, { "epoch": 1299.671052631579, "grad_norm": 0.8607233166694641, "learning_rate": 0.0001, "loss": 0.0109, "step": 197550 }, { "epoch": 1299.7368421052631, "grad_norm": 0.815772533416748, "learning_rate": 0.0001, "loss": 0.0099, "step": 197560 }, { "epoch": 1299.8026315789473, "grad_norm": 0.8933233022689819, "learning_rate": 0.0001, "loss": 0.009, "step": 197570 }, { "epoch": 1299.8684210526317, "grad_norm": 0.9021427035331726, "learning_rate": 0.0001, "loss": 0.0112, "step": 197580 }, { "epoch": 1299.9342105263158, "grad_norm": 0.763812780380249, "learning_rate": 0.0001, "loss": 0.009, "step": 197590 }, { "epoch": 1300.0, "grad_norm": 0.9004136323928833, "learning_rate": 0.0001, "loss": 0.009, "step": 197600 }, { "epoch": 1300.0657894736842, "grad_norm": 0.7930411100387573, "learning_rate": 0.0001, "loss": 0.0093, "step": 197610 }, { "epoch": 1300.1315789473683, "grad_norm": 1.1747939586639404, "learning_rate": 0.0001, "loss": 0.0084, "step": 197620 }, { "epoch": 1300.1973684210527, "grad_norm": 0.7587085962295532, "learning_rate": 0.0001, "loss": 0.0094, "step": 197630 }, { "epoch": 1300.2631578947369, "grad_norm": 0.8571013808250427, "learning_rate": 0.0001, "loss": 0.0085, "step": 197640 }, { "epoch": 1300.328947368421, "grad_norm": 1.2042564153671265, "learning_rate": 0.0001, "loss": 0.0102, "step": 197650 }, { "epoch": 1300.3947368421052, "grad_norm": 1.0559279918670654, "learning_rate": 0.0001, "loss": 0.0084, "step": 197660 }, { "epoch": 1300.4605263157894, "grad_norm": 1.0643962621688843, "learning_rate": 0.0001, "loss": 0.0098, "step": 197670 }, { "epoch": 1300.5263157894738, "grad_norm": 1.2402312755584717, "learning_rate": 0.0001, "loss": 0.0091, "step": 197680 }, { "epoch": 1300.592105263158, "grad_norm": 1.1355198621749878, "learning_rate": 0.0001, "loss": 0.0101, "step": 197690 }, { "epoch": 1300.657894736842, "grad_norm": 0.8521161675453186, "learning_rate": 0.0001, "loss": 0.0081, "step": 197700 }, { "epoch": 1300.7236842105262, "grad_norm": 0.7998043894767761, "learning_rate": 0.0001, "loss": 0.0091, "step": 197710 }, { "epoch": 1300.7894736842106, "grad_norm": 1.1688848733901978, "learning_rate": 0.0001, "loss": 0.012, "step": 197720 }, { "epoch": 1300.8552631578948, "grad_norm": 1.0704905986785889, "learning_rate": 0.0001, "loss": 0.009, "step": 197730 }, { "epoch": 1300.921052631579, "grad_norm": 0.9947075247764587, "learning_rate": 0.0001, "loss": 0.0101, "step": 197740 }, { "epoch": 1300.9868421052631, "grad_norm": 1.168898582458496, "learning_rate": 0.0001, "loss": 0.0108, "step": 197750 }, { "epoch": 1301.0526315789473, "grad_norm": 1.1258999109268188, "learning_rate": 0.0001, "loss": 0.0112, "step": 197760 }, { "epoch": 1301.1184210526317, "grad_norm": 0.9728697538375854, "learning_rate": 0.0001, "loss": 0.0113, "step": 197770 }, { "epoch": 1301.1842105263158, "grad_norm": 0.923896849155426, "learning_rate": 0.0001, "loss": 0.0095, "step": 197780 }, { "epoch": 1301.25, "grad_norm": 1.1840898990631104, "learning_rate": 0.0001, "loss": 0.0077, "step": 197790 }, { "epoch": 1301.3157894736842, "grad_norm": 1.1185284852981567, "learning_rate": 0.0001, "loss": 0.0084, "step": 197800 }, { "epoch": 1301.3815789473683, "grad_norm": 0.7468376159667969, "learning_rate": 0.0001, "loss": 0.0101, "step": 197810 }, { "epoch": 1301.4473684210527, "grad_norm": 0.6839302778244019, "learning_rate": 0.0001, "loss": 0.0092, "step": 197820 }, { "epoch": 1301.5131578947369, "grad_norm": 0.9034473299980164, "learning_rate": 0.0001, "loss": 0.0087, "step": 197830 }, { "epoch": 1301.578947368421, "grad_norm": 1.357205867767334, "learning_rate": 0.0001, "loss": 0.0095, "step": 197840 }, { "epoch": 1301.6447368421052, "grad_norm": 1.2651640176773071, "learning_rate": 0.0001, "loss": 0.0097, "step": 197850 }, { "epoch": 1301.7105263157894, "grad_norm": 0.9837512969970703, "learning_rate": 0.0001, "loss": 0.0092, "step": 197860 }, { "epoch": 1301.7763157894738, "grad_norm": 1.1584852933883667, "learning_rate": 0.0001, "loss": 0.0093, "step": 197870 }, { "epoch": 1301.842105263158, "grad_norm": 0.8707510232925415, "learning_rate": 0.0001, "loss": 0.0089, "step": 197880 }, { "epoch": 1301.907894736842, "grad_norm": 1.073400855064392, "learning_rate": 0.0001, "loss": 0.0101, "step": 197890 }, { "epoch": 1301.9736842105262, "grad_norm": 1.0655863285064697, "learning_rate": 0.0001, "loss": 0.0096, "step": 197900 }, { "epoch": 1302.0394736842106, "grad_norm": 0.9237596988677979, "learning_rate": 0.0001, "loss": 0.0104, "step": 197910 }, { "epoch": 1302.1052631578948, "grad_norm": 1.0519744157791138, "learning_rate": 0.0001, "loss": 0.0092, "step": 197920 }, { "epoch": 1302.171052631579, "grad_norm": 0.7526938319206238, "learning_rate": 0.0001, "loss": 0.0095, "step": 197930 }, { "epoch": 1302.2368421052631, "grad_norm": 0.9896842837333679, "learning_rate": 0.0001, "loss": 0.0106, "step": 197940 }, { "epoch": 1302.3026315789473, "grad_norm": 0.7161365151405334, "learning_rate": 0.0001, "loss": 0.0074, "step": 197950 }, { "epoch": 1302.3684210526317, "grad_norm": 1.2991057634353638, "learning_rate": 0.0001, "loss": 0.0099, "step": 197960 }, { "epoch": 1302.4342105263158, "grad_norm": 1.0487098693847656, "learning_rate": 0.0001, "loss": 0.0085, "step": 197970 }, { "epoch": 1302.5, "grad_norm": 1.1503612995147705, "learning_rate": 0.0001, "loss": 0.0081, "step": 197980 }, { "epoch": 1302.5657894736842, "grad_norm": 1.0851579904556274, "learning_rate": 0.0001, "loss": 0.0092, "step": 197990 }, { "epoch": 1302.6315789473683, "grad_norm": 1.2855541706085205, "learning_rate": 0.0001, "loss": 0.0066, "step": 198000 }, { "epoch": 1302.6973684210527, "grad_norm": 1.1550692319869995, "learning_rate": 0.0001, "loss": 0.0076, "step": 198010 }, { "epoch": 1302.7631578947369, "grad_norm": 0.8146882653236389, "learning_rate": 0.0001, "loss": 0.0081, "step": 198020 }, { "epoch": 1302.828947368421, "grad_norm": 0.6463797092437744, "learning_rate": 0.0001, "loss": 0.0087, "step": 198030 }, { "epoch": 1302.8947368421052, "grad_norm": 1.289481520652771, "learning_rate": 0.0001, "loss": 0.0114, "step": 198040 }, { "epoch": 1302.9605263157894, "grad_norm": 0.8075095415115356, "learning_rate": 0.0001, "loss": 0.0106, "step": 198050 }, { "epoch": 1303.0263157894738, "grad_norm": 1.2249948978424072, "learning_rate": 0.0001, "loss": 0.0099, "step": 198060 }, { "epoch": 1303.092105263158, "grad_norm": 0.8465560674667358, "learning_rate": 0.0001, "loss": 0.0106, "step": 198070 }, { "epoch": 1303.157894736842, "grad_norm": 0.8450498580932617, "learning_rate": 0.0001, "loss": 0.0092, "step": 198080 }, { "epoch": 1303.2236842105262, "grad_norm": 1.2542957067489624, "learning_rate": 0.0001, "loss": 0.0072, "step": 198090 }, { "epoch": 1303.2894736842106, "grad_norm": 0.9559205770492554, "learning_rate": 0.0001, "loss": 0.0101, "step": 198100 }, { "epoch": 1303.3552631578948, "grad_norm": 0.6290387511253357, "learning_rate": 0.0001, "loss": 0.0091, "step": 198110 }, { "epoch": 1303.421052631579, "grad_norm": 0.8307482600212097, "learning_rate": 0.0001, "loss": 0.0086, "step": 198120 }, { "epoch": 1303.4868421052631, "grad_norm": 0.7818741202354431, "learning_rate": 0.0001, "loss": 0.0095, "step": 198130 }, { "epoch": 1303.5526315789473, "grad_norm": 0.7035477757453918, "learning_rate": 0.0001, "loss": 0.0099, "step": 198140 }, { "epoch": 1303.6184210526317, "grad_norm": 0.9868258833885193, "learning_rate": 0.0001, "loss": 0.0093, "step": 198150 }, { "epoch": 1303.6842105263158, "grad_norm": 1.5310142040252686, "learning_rate": 0.0001, "loss": 0.0127, "step": 198160 }, { "epoch": 1303.75, "grad_norm": 1.3995954990386963, "learning_rate": 0.0001, "loss": 0.0093, "step": 198170 }, { "epoch": 1303.8157894736842, "grad_norm": 1.1260607242584229, "learning_rate": 0.0001, "loss": 0.0104, "step": 198180 }, { "epoch": 1303.8815789473683, "grad_norm": 1.8549559116363525, "learning_rate": 0.0001, "loss": 0.0082, "step": 198190 }, { "epoch": 1303.9473684210527, "grad_norm": 1.5865100622177124, "learning_rate": 0.0001, "loss": 0.0098, "step": 198200 }, { "epoch": 1304.0131578947369, "grad_norm": 1.6978919506072998, "learning_rate": 0.0001, "loss": 0.0083, "step": 198210 }, { "epoch": 1304.078947368421, "grad_norm": 1.3336944580078125, "learning_rate": 0.0001, "loss": 0.0085, "step": 198220 }, { "epoch": 1304.1447368421052, "grad_norm": 1.2469390630722046, "learning_rate": 0.0001, "loss": 0.009, "step": 198230 }, { "epoch": 1304.2105263157894, "grad_norm": 1.321698546409607, "learning_rate": 0.0001, "loss": 0.0093, "step": 198240 }, { "epoch": 1304.2763157894738, "grad_norm": 0.9702907800674438, "learning_rate": 0.0001, "loss": 0.008, "step": 198250 }, { "epoch": 1304.342105263158, "grad_norm": 1.213561773300171, "learning_rate": 0.0001, "loss": 0.0081, "step": 198260 }, { "epoch": 1304.407894736842, "grad_norm": 1.2231154441833496, "learning_rate": 0.0001, "loss": 0.0108, "step": 198270 }, { "epoch": 1304.4736842105262, "grad_norm": 1.1985797882080078, "learning_rate": 0.0001, "loss": 0.0084, "step": 198280 }, { "epoch": 1304.5394736842106, "grad_norm": 0.9450637102127075, "learning_rate": 0.0001, "loss": 0.0098, "step": 198290 }, { "epoch": 1304.6052631578948, "grad_norm": 0.9720757007598877, "learning_rate": 0.0001, "loss": 0.0096, "step": 198300 }, { "epoch": 1304.671052631579, "grad_norm": 0.849722683429718, "learning_rate": 0.0001, "loss": 0.0095, "step": 198310 }, { "epoch": 1304.7368421052631, "grad_norm": 1.1238338947296143, "learning_rate": 0.0001, "loss": 0.0079, "step": 198320 }, { "epoch": 1304.8026315789473, "grad_norm": 1.1874395608901978, "learning_rate": 0.0001, "loss": 0.0089, "step": 198330 }, { "epoch": 1304.8684210526317, "grad_norm": 1.0838104486465454, "learning_rate": 0.0001, "loss": 0.008, "step": 198340 }, { "epoch": 1304.9342105263158, "grad_norm": 1.154099464416504, "learning_rate": 0.0001, "loss": 0.0092, "step": 198350 }, { "epoch": 1305.0, "grad_norm": 1.3620731830596924, "learning_rate": 0.0001, "loss": 0.0097, "step": 198360 }, { "epoch": 1305.0657894736842, "grad_norm": 1.2841746807098389, "learning_rate": 0.0001, "loss": 0.0072, "step": 198370 }, { "epoch": 1305.1315789473683, "grad_norm": 0.7634298801422119, "learning_rate": 0.0001, "loss": 0.0118, "step": 198380 }, { "epoch": 1305.1973684210527, "grad_norm": 0.82984858751297, "learning_rate": 0.0001, "loss": 0.0084, "step": 198390 }, { "epoch": 1305.2631578947369, "grad_norm": 1.093189001083374, "learning_rate": 0.0001, "loss": 0.0076, "step": 198400 }, { "epoch": 1305.328947368421, "grad_norm": 0.9606587290763855, "learning_rate": 0.0001, "loss": 0.009, "step": 198410 }, { "epoch": 1305.3947368421052, "grad_norm": 0.9093089699745178, "learning_rate": 0.0001, "loss": 0.0099, "step": 198420 }, { "epoch": 1305.4605263157894, "grad_norm": 0.9972700476646423, "learning_rate": 0.0001, "loss": 0.0093, "step": 198430 }, { "epoch": 1305.5263157894738, "grad_norm": 0.724269688129425, "learning_rate": 0.0001, "loss": 0.012, "step": 198440 }, { "epoch": 1305.592105263158, "grad_norm": 0.6498937606811523, "learning_rate": 0.0001, "loss": 0.0094, "step": 198450 }, { "epoch": 1305.657894736842, "grad_norm": 0.9374182224273682, "learning_rate": 0.0001, "loss": 0.008, "step": 198460 }, { "epoch": 1305.7236842105262, "grad_norm": 1.0840044021606445, "learning_rate": 0.0001, "loss": 0.0099, "step": 198470 }, { "epoch": 1305.7894736842106, "grad_norm": 0.8806322813034058, "learning_rate": 0.0001, "loss": 0.0079, "step": 198480 }, { "epoch": 1305.8552631578948, "grad_norm": 1.1491776704788208, "learning_rate": 0.0001, "loss": 0.0073, "step": 198490 }, { "epoch": 1305.921052631579, "grad_norm": 1.1558951139450073, "learning_rate": 0.0001, "loss": 0.0083, "step": 198500 }, { "epoch": 1305.9868421052631, "grad_norm": 0.8697763085365295, "learning_rate": 0.0001, "loss": 0.0077, "step": 198510 }, { "epoch": 1306.0526315789473, "grad_norm": 0.7972436547279358, "learning_rate": 0.0001, "loss": 0.0111, "step": 198520 }, { "epoch": 1306.1184210526317, "grad_norm": 0.7652742266654968, "learning_rate": 0.0001, "loss": 0.0072, "step": 198530 }, { "epoch": 1306.1842105263158, "grad_norm": 1.0677361488342285, "learning_rate": 0.0001, "loss": 0.01, "step": 198540 }, { "epoch": 1306.25, "grad_norm": 0.9967530965805054, "learning_rate": 0.0001, "loss": 0.0071, "step": 198550 }, { "epoch": 1306.3157894736842, "grad_norm": 0.8501853942871094, "learning_rate": 0.0001, "loss": 0.008, "step": 198560 }, { "epoch": 1306.3815789473683, "grad_norm": 1.3013569116592407, "learning_rate": 0.0001, "loss": 0.01, "step": 198570 }, { "epoch": 1306.4473684210527, "grad_norm": 0.9409443140029907, "learning_rate": 0.0001, "loss": 0.0082, "step": 198580 }, { "epoch": 1306.5131578947369, "grad_norm": 0.9747039675712585, "learning_rate": 0.0001, "loss": 0.0079, "step": 198590 }, { "epoch": 1306.578947368421, "grad_norm": 1.0571503639221191, "learning_rate": 0.0001, "loss": 0.0102, "step": 198600 }, { "epoch": 1306.6447368421052, "grad_norm": 0.7892196178436279, "learning_rate": 0.0001, "loss": 0.0088, "step": 198610 }, { "epoch": 1306.7105263157894, "grad_norm": 1.2711453437805176, "learning_rate": 0.0001, "loss": 0.0092, "step": 198620 }, { "epoch": 1306.7763157894738, "grad_norm": 1.0646253824234009, "learning_rate": 0.0001, "loss": 0.0072, "step": 198630 }, { "epoch": 1306.842105263158, "grad_norm": 0.9739760160446167, "learning_rate": 0.0001, "loss": 0.0087, "step": 198640 }, { "epoch": 1306.907894736842, "grad_norm": 1.054294466972351, "learning_rate": 0.0001, "loss": 0.0081, "step": 198650 }, { "epoch": 1306.9736842105262, "grad_norm": 0.9815489649772644, "learning_rate": 0.0001, "loss": 0.0111, "step": 198660 }, { "epoch": 1307.0394736842106, "grad_norm": 1.2176196575164795, "learning_rate": 0.0001, "loss": 0.0099, "step": 198670 }, { "epoch": 1307.1052631578948, "grad_norm": 0.6826998591423035, "learning_rate": 0.0001, "loss": 0.0092, "step": 198680 }, { "epoch": 1307.171052631579, "grad_norm": 0.6777370572090149, "learning_rate": 0.0001, "loss": 0.009, "step": 198690 }, { "epoch": 1307.2368421052631, "grad_norm": 0.7783268690109253, "learning_rate": 0.0001, "loss": 0.0083, "step": 198700 }, { "epoch": 1307.3026315789473, "grad_norm": 0.9428658485412598, "learning_rate": 0.0001, "loss": 0.0077, "step": 198710 }, { "epoch": 1307.3684210526317, "grad_norm": 0.6998884081840515, "learning_rate": 0.0001, "loss": 0.0085, "step": 198720 }, { "epoch": 1307.4342105263158, "grad_norm": 0.6625298261642456, "learning_rate": 0.0001, "loss": 0.0073, "step": 198730 }, { "epoch": 1307.5, "grad_norm": 0.868622362613678, "learning_rate": 0.0001, "loss": 0.0101, "step": 198740 }, { "epoch": 1307.5657894736842, "grad_norm": 0.9482408165931702, "learning_rate": 0.0001, "loss": 0.0072, "step": 198750 }, { "epoch": 1307.6315789473683, "grad_norm": 0.9562262296676636, "learning_rate": 0.0001, "loss": 0.0095, "step": 198760 }, { "epoch": 1307.6973684210527, "grad_norm": 1.1496707201004028, "learning_rate": 0.0001, "loss": 0.0072, "step": 198770 }, { "epoch": 1307.7631578947369, "grad_norm": 1.1873894929885864, "learning_rate": 0.0001, "loss": 0.0106, "step": 198780 }, { "epoch": 1307.828947368421, "grad_norm": 1.321275234222412, "learning_rate": 0.0001, "loss": 0.009, "step": 198790 }, { "epoch": 1307.8947368421052, "grad_norm": 1.3467177152633667, "learning_rate": 0.0001, "loss": 0.0095, "step": 198800 }, { "epoch": 1307.9605263157894, "grad_norm": 1.229738473892212, "learning_rate": 0.0001, "loss": 0.0093, "step": 198810 }, { "epoch": 1308.0263157894738, "grad_norm": 1.2855587005615234, "learning_rate": 0.0001, "loss": 0.0082, "step": 198820 }, { "epoch": 1308.092105263158, "grad_norm": 0.9853659272193909, "learning_rate": 0.0001, "loss": 0.0085, "step": 198830 }, { "epoch": 1308.157894736842, "grad_norm": 1.0061135292053223, "learning_rate": 0.0001, "loss": 0.0075, "step": 198840 }, { "epoch": 1308.2236842105262, "grad_norm": 0.6921624541282654, "learning_rate": 0.0001, "loss": 0.0079, "step": 198850 }, { "epoch": 1308.2894736842106, "grad_norm": 0.9409551024436951, "learning_rate": 0.0001, "loss": 0.0106, "step": 198860 }, { "epoch": 1308.3552631578948, "grad_norm": 1.0803308486938477, "learning_rate": 0.0001, "loss": 0.0081, "step": 198870 }, { "epoch": 1308.421052631579, "grad_norm": 1.107574224472046, "learning_rate": 0.0001, "loss": 0.008, "step": 198880 }, { "epoch": 1308.4868421052631, "grad_norm": 1.182572603225708, "learning_rate": 0.0001, "loss": 0.0087, "step": 198890 }, { "epoch": 1308.5526315789473, "grad_norm": 1.107548713684082, "learning_rate": 0.0001, "loss": 0.0094, "step": 198900 }, { "epoch": 1308.6184210526317, "grad_norm": 1.1584941148757935, "learning_rate": 0.0001, "loss": 0.0084, "step": 198910 }, { "epoch": 1308.6842105263158, "grad_norm": 1.4747897386550903, "learning_rate": 0.0001, "loss": 0.0083, "step": 198920 }, { "epoch": 1308.75, "grad_norm": 1.3047585487365723, "learning_rate": 0.0001, "loss": 0.0103, "step": 198930 }, { "epoch": 1308.8157894736842, "grad_norm": 1.0019290447235107, "learning_rate": 0.0001, "loss": 0.0075, "step": 198940 }, { "epoch": 1308.8815789473683, "grad_norm": 0.7977175712585449, "learning_rate": 0.0001, "loss": 0.0086, "step": 198950 }, { "epoch": 1308.9473684210527, "grad_norm": 1.2986831665039062, "learning_rate": 0.0001, "loss": 0.0094, "step": 198960 }, { "epoch": 1309.0131578947369, "grad_norm": 1.226833701133728, "learning_rate": 0.0001, "loss": 0.0095, "step": 198970 }, { "epoch": 1309.078947368421, "grad_norm": 0.9073089957237244, "learning_rate": 0.0001, "loss": 0.0085, "step": 198980 }, { "epoch": 1309.1447368421052, "grad_norm": 0.9509008526802063, "learning_rate": 0.0001, "loss": 0.0073, "step": 198990 }, { "epoch": 1309.2105263157894, "grad_norm": 1.083253026008606, "learning_rate": 0.0001, "loss": 0.0099, "step": 199000 }, { "epoch": 1309.2763157894738, "grad_norm": 0.731686532497406, "learning_rate": 0.0001, "loss": 0.0095, "step": 199010 }, { "epoch": 1309.342105263158, "grad_norm": 1.5306459665298462, "learning_rate": 0.0001, "loss": 0.0086, "step": 199020 }, { "epoch": 1309.407894736842, "grad_norm": 1.169585108757019, "learning_rate": 0.0001, "loss": 0.0087, "step": 199030 }, { "epoch": 1309.4736842105262, "grad_norm": 1.225286841392517, "learning_rate": 0.0001, "loss": 0.0092, "step": 199040 }, { "epoch": 1309.5394736842106, "grad_norm": 1.1703866720199585, "learning_rate": 0.0001, "loss": 0.0083, "step": 199050 }, { "epoch": 1309.6052631578948, "grad_norm": 1.381737470626831, "learning_rate": 0.0001, "loss": 0.0097, "step": 199060 }, { "epoch": 1309.671052631579, "grad_norm": 0.9777848720550537, "learning_rate": 0.0001, "loss": 0.008, "step": 199070 }, { "epoch": 1309.7368421052631, "grad_norm": 1.1087908744812012, "learning_rate": 0.0001, "loss": 0.0087, "step": 199080 }, { "epoch": 1309.8026315789473, "grad_norm": 1.5419447422027588, "learning_rate": 0.0001, "loss": 0.008, "step": 199090 }, { "epoch": 1309.8684210526317, "grad_norm": 1.1349118947982788, "learning_rate": 0.0001, "loss": 0.0085, "step": 199100 }, { "epoch": 1309.9342105263158, "grad_norm": 1.1746926307678223, "learning_rate": 0.0001, "loss": 0.0077, "step": 199110 }, { "epoch": 1310.0, "grad_norm": 0.8852264881134033, "learning_rate": 0.0001, "loss": 0.0094, "step": 199120 }, { "epoch": 1310.0657894736842, "grad_norm": 1.0382320880889893, "learning_rate": 0.0001, "loss": 0.0093, "step": 199130 }, { "epoch": 1310.1315789473683, "grad_norm": 1.3453278541564941, "learning_rate": 0.0001, "loss": 0.0078, "step": 199140 }, { "epoch": 1310.1973684210527, "grad_norm": 1.1044265031814575, "learning_rate": 0.0001, "loss": 0.0085, "step": 199150 }, { "epoch": 1310.2631578947369, "grad_norm": 0.6985461711883545, "learning_rate": 0.0001, "loss": 0.0077, "step": 199160 }, { "epoch": 1310.328947368421, "grad_norm": 1.2022053003311157, "learning_rate": 0.0001, "loss": 0.0103, "step": 199170 }, { "epoch": 1310.3947368421052, "grad_norm": 1.3073654174804688, "learning_rate": 0.0001, "loss": 0.0092, "step": 199180 }, { "epoch": 1310.4605263157894, "grad_norm": 0.9767325520515442, "learning_rate": 0.0001, "loss": 0.0074, "step": 199190 }, { "epoch": 1310.5263157894738, "grad_norm": 0.7886372208595276, "learning_rate": 0.0001, "loss": 0.0083, "step": 199200 }, { "epoch": 1310.592105263158, "grad_norm": 1.1879262924194336, "learning_rate": 0.0001, "loss": 0.0098, "step": 199210 }, { "epoch": 1310.657894736842, "grad_norm": 1.3642725944519043, "learning_rate": 0.0001, "loss": 0.0072, "step": 199220 }, { "epoch": 1310.7236842105262, "grad_norm": 0.7498663663864136, "learning_rate": 0.0001, "loss": 0.0101, "step": 199230 }, { "epoch": 1310.7894736842106, "grad_norm": 0.6963070034980774, "learning_rate": 0.0001, "loss": 0.0102, "step": 199240 }, { "epoch": 1310.8552631578948, "grad_norm": 1.091447114944458, "learning_rate": 0.0001, "loss": 0.0086, "step": 199250 }, { "epoch": 1310.921052631579, "grad_norm": 0.7097912430763245, "learning_rate": 0.0001, "loss": 0.0084, "step": 199260 }, { "epoch": 1310.9868421052631, "grad_norm": 0.9089406728744507, "learning_rate": 0.0001, "loss": 0.0078, "step": 199270 }, { "epoch": 1311.0526315789473, "grad_norm": 1.0399571657180786, "learning_rate": 0.0001, "loss": 0.0104, "step": 199280 }, { "epoch": 1311.1184210526317, "grad_norm": 0.8863736987113953, "learning_rate": 0.0001, "loss": 0.0112, "step": 199290 }, { "epoch": 1311.1842105263158, "grad_norm": 0.7154498100280762, "learning_rate": 0.0001, "loss": 0.0082, "step": 199300 }, { "epoch": 1311.25, "grad_norm": 0.644045352935791, "learning_rate": 0.0001, "loss": 0.0092, "step": 199310 }, { "epoch": 1311.3157894736842, "grad_norm": 1.04582941532135, "learning_rate": 0.0001, "loss": 0.0075, "step": 199320 }, { "epoch": 1311.3815789473683, "grad_norm": 0.9396390318870544, "learning_rate": 0.0001, "loss": 0.0093, "step": 199330 }, { "epoch": 1311.4473684210527, "grad_norm": 0.959603488445282, "learning_rate": 0.0001, "loss": 0.0094, "step": 199340 }, { "epoch": 1311.5131578947369, "grad_norm": 1.1740236282348633, "learning_rate": 0.0001, "loss": 0.0092, "step": 199350 }, { "epoch": 1311.578947368421, "grad_norm": 0.7795823216438293, "learning_rate": 0.0001, "loss": 0.01, "step": 199360 }, { "epoch": 1311.6447368421052, "grad_norm": 1.1692177057266235, "learning_rate": 0.0001, "loss": 0.0106, "step": 199370 }, { "epoch": 1311.7105263157894, "grad_norm": 1.053812026977539, "learning_rate": 0.0001, "loss": 0.0069, "step": 199380 }, { "epoch": 1311.7763157894738, "grad_norm": 1.0118703842163086, "learning_rate": 0.0001, "loss": 0.0094, "step": 199390 }, { "epoch": 1311.842105263158, "grad_norm": 0.7939363718032837, "learning_rate": 0.0001, "loss": 0.0082, "step": 199400 }, { "epoch": 1311.907894736842, "grad_norm": 1.0312461853027344, "learning_rate": 0.0001, "loss": 0.0084, "step": 199410 }, { "epoch": 1311.9736842105262, "grad_norm": 1.237866759300232, "learning_rate": 0.0001, "loss": 0.0074, "step": 199420 }, { "epoch": 1312.0394736842106, "grad_norm": 1.2162584066390991, "learning_rate": 0.0001, "loss": 0.0116, "step": 199430 }, { "epoch": 1312.1052631578948, "grad_norm": 0.9375420212745667, "learning_rate": 0.0001, "loss": 0.009, "step": 199440 }, { "epoch": 1312.171052631579, "grad_norm": 1.0901834964752197, "learning_rate": 0.0001, "loss": 0.0088, "step": 199450 }, { "epoch": 1312.2368421052631, "grad_norm": 1.2144420146942139, "learning_rate": 0.0001, "loss": 0.0079, "step": 199460 }, { "epoch": 1312.3026315789473, "grad_norm": 1.3519539833068848, "learning_rate": 0.0001, "loss": 0.0087, "step": 199470 }, { "epoch": 1312.3684210526317, "grad_norm": 0.9426065683364868, "learning_rate": 0.0001, "loss": 0.0082, "step": 199480 }, { "epoch": 1312.4342105263158, "grad_norm": 0.7949636578559875, "learning_rate": 0.0001, "loss": 0.009, "step": 199490 }, { "epoch": 1312.5, "grad_norm": 1.197507619857788, "learning_rate": 0.0001, "loss": 0.0098, "step": 199500 }, { "epoch": 1312.5657894736842, "grad_norm": 1.0171011686325073, "learning_rate": 0.0001, "loss": 0.0088, "step": 199510 }, { "epoch": 1312.6315789473683, "grad_norm": 0.9687016010284424, "learning_rate": 0.0001, "loss": 0.0078, "step": 199520 }, { "epoch": 1312.6973684210527, "grad_norm": 1.1884241104125977, "learning_rate": 0.0001, "loss": 0.0086, "step": 199530 }, { "epoch": 1312.7631578947369, "grad_norm": 1.3759465217590332, "learning_rate": 0.0001, "loss": 0.0101, "step": 199540 }, { "epoch": 1312.828947368421, "grad_norm": 1.368860125541687, "learning_rate": 0.0001, "loss": 0.0089, "step": 199550 }, { "epoch": 1312.8947368421052, "grad_norm": 1.2088021039962769, "learning_rate": 0.0001, "loss": 0.0077, "step": 199560 }, { "epoch": 1312.9605263157894, "grad_norm": 0.524134635925293, "learning_rate": 0.0001, "loss": 0.0088, "step": 199570 }, { "epoch": 1313.0263157894738, "grad_norm": 1.3162397146224976, "learning_rate": 0.0001, "loss": 0.009, "step": 199580 }, { "epoch": 1313.092105263158, "grad_norm": 1.575825810432434, "learning_rate": 0.0001, "loss": 0.0088, "step": 199590 }, { "epoch": 1313.157894736842, "grad_norm": 1.1224581003189087, "learning_rate": 0.0001, "loss": 0.0076, "step": 199600 }, { "epoch": 1313.2236842105262, "grad_norm": 1.0737751722335815, "learning_rate": 0.0001, "loss": 0.0097, "step": 199610 }, { "epoch": 1313.2894736842106, "grad_norm": 0.9804463982582092, "learning_rate": 0.0001, "loss": 0.008, "step": 199620 }, { "epoch": 1313.3552631578948, "grad_norm": 0.9724069833755493, "learning_rate": 0.0001, "loss": 0.0086, "step": 199630 }, { "epoch": 1313.421052631579, "grad_norm": 1.1374396085739136, "learning_rate": 0.0001, "loss": 0.0117, "step": 199640 }, { "epoch": 1313.4868421052631, "grad_norm": 0.8667737245559692, "learning_rate": 0.0001, "loss": 0.0077, "step": 199650 }, { "epoch": 1313.5526315789473, "grad_norm": 0.9983107447624207, "learning_rate": 0.0001, "loss": 0.0107, "step": 199660 }, { "epoch": 1313.6184210526317, "grad_norm": 1.2259620428085327, "learning_rate": 0.0001, "loss": 0.0086, "step": 199670 }, { "epoch": 1313.6842105263158, "grad_norm": 0.9851988554000854, "learning_rate": 0.0001, "loss": 0.0105, "step": 199680 }, { "epoch": 1313.75, "grad_norm": 0.5888131260871887, "learning_rate": 0.0001, "loss": 0.0101, "step": 199690 }, { "epoch": 1313.8157894736842, "grad_norm": 0.9664670825004578, "learning_rate": 0.0001, "loss": 0.0106, "step": 199700 }, { "epoch": 1313.8815789473683, "grad_norm": 0.9594868421554565, "learning_rate": 0.0001, "loss": 0.0099, "step": 199710 }, { "epoch": 1313.9473684210527, "grad_norm": 0.9215731620788574, "learning_rate": 0.0001, "loss": 0.0092, "step": 199720 }, { "epoch": 1314.0131578947369, "grad_norm": 1.1164031028747559, "learning_rate": 0.0001, "loss": 0.0094, "step": 199730 }, { "epoch": 1314.078947368421, "grad_norm": 0.9203608632087708, "learning_rate": 0.0001, "loss": 0.0087, "step": 199740 }, { "epoch": 1314.1447368421052, "grad_norm": 0.9199415445327759, "learning_rate": 0.0001, "loss": 0.0116, "step": 199750 }, { "epoch": 1314.2105263157894, "grad_norm": 1.0740537643432617, "learning_rate": 0.0001, "loss": 0.0099, "step": 199760 }, { "epoch": 1314.2763157894738, "grad_norm": 0.9334034323692322, "learning_rate": 0.0001, "loss": 0.0093, "step": 199770 }, { "epoch": 1314.342105263158, "grad_norm": 1.0269443988800049, "learning_rate": 0.0001, "loss": 0.0094, "step": 199780 }, { "epoch": 1314.407894736842, "grad_norm": 0.8074368834495544, "learning_rate": 0.0001, "loss": 0.0102, "step": 199790 }, { "epoch": 1314.4736842105262, "grad_norm": 0.929581880569458, "learning_rate": 0.0001, "loss": 0.0094, "step": 199800 }, { "epoch": 1314.5394736842106, "grad_norm": 0.8619239926338196, "learning_rate": 0.0001, "loss": 0.0088, "step": 199810 }, { "epoch": 1314.6052631578948, "grad_norm": 1.4143906831741333, "learning_rate": 0.0001, "loss": 0.0106, "step": 199820 }, { "epoch": 1314.671052631579, "grad_norm": 1.1911793947219849, "learning_rate": 0.0001, "loss": 0.0093, "step": 199830 }, { "epoch": 1314.7368421052631, "grad_norm": 1.1507445573806763, "learning_rate": 0.0001, "loss": 0.0103, "step": 199840 }, { "epoch": 1314.8026315789473, "grad_norm": 1.1112452745437622, "learning_rate": 0.0001, "loss": 0.0106, "step": 199850 }, { "epoch": 1314.8684210526317, "grad_norm": 1.0850579738616943, "learning_rate": 0.0001, "loss": 0.0087, "step": 199860 }, { "epoch": 1314.9342105263158, "grad_norm": 1.0612223148345947, "learning_rate": 0.0001, "loss": 0.01, "step": 199870 }, { "epoch": 1315.0, "grad_norm": 0.6841676831245422, "learning_rate": 0.0001, "loss": 0.0107, "step": 199880 }, { "epoch": 1315.0657894736842, "grad_norm": 0.7603421211242676, "learning_rate": 0.0001, "loss": 0.0093, "step": 199890 }, { "epoch": 1315.1315789473683, "grad_norm": 0.862991988658905, "learning_rate": 0.0001, "loss": 0.0112, "step": 199900 }, { "epoch": 1315.1973684210527, "grad_norm": 0.9214925169944763, "learning_rate": 0.0001, "loss": 0.0119, "step": 199910 }, { "epoch": 1315.2631578947369, "grad_norm": 1.2615870237350464, "learning_rate": 0.0001, "loss": 0.0109, "step": 199920 }, { "epoch": 1315.328947368421, "grad_norm": 1.203710675239563, "learning_rate": 0.0001, "loss": 0.0095, "step": 199930 }, { "epoch": 1315.3947368421052, "grad_norm": 1.0401464700698853, "learning_rate": 0.0001, "loss": 0.0102, "step": 199940 }, { "epoch": 1315.4605263157894, "grad_norm": 0.9347955584526062, "learning_rate": 0.0001, "loss": 0.0095, "step": 199950 }, { "epoch": 1315.5263157894738, "grad_norm": 0.8496779799461365, "learning_rate": 0.0001, "loss": 0.0102, "step": 199960 }, { "epoch": 1315.592105263158, "grad_norm": 1.1234538555145264, "learning_rate": 0.0001, "loss": 0.0081, "step": 199970 }, { "epoch": 1315.657894736842, "grad_norm": 1.2898207902908325, "learning_rate": 0.0001, "loss": 0.008, "step": 199980 }, { "epoch": 1315.7236842105262, "grad_norm": 0.773065984249115, "learning_rate": 0.0001, "loss": 0.0086, "step": 199990 }, { "epoch": 1315.7894736842106, "grad_norm": 1.2071137428283691, "learning_rate": 0.0001, "loss": 0.0079, "step": 200000 }, { "epoch": 1315.7894736842106, "step": 200000, "total_flos": 0.0, "train_loss": 0.01848614209484309, "train_runtime": 93663.6535, "train_samples_per_second": 136.659, "train_steps_per_second": 2.135 } ], "logging_steps": 10, "max_steps": 200000, "num_input_tokens_seen": 0, "num_train_epochs": 1316, "save_steps": 5000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 64, "trial_name": null, "trial_params": null }