{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1315.7894736842106, "eval_steps": 500, "global_step": 200000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06578947368421052, "grad_norm": 147.00340270996094, "learning_rate": 0.0001, "loss": 21.5054, "step": 10 }, { "epoch": 0.13157894736842105, "grad_norm": 75.18893432617188, "learning_rate": 0.0001, "loss": 4.8796, "step": 20 }, { "epoch": 0.19736842105263158, "grad_norm": 64.60633087158203, "learning_rate": 0.0001, "loss": 3.6607, "step": 30 }, { "epoch": 0.2631578947368421, "grad_norm": 53.299015045166016, "learning_rate": 0.0001, "loss": 3.1492, "step": 40 }, { "epoch": 0.32894736842105265, "grad_norm": 53.748531341552734, "learning_rate": 0.0001, "loss": 2.965, "step": 50 }, { "epoch": 0.39473684210526316, "grad_norm": 61.381805419921875, "learning_rate": 0.0001, "loss": 2.7867, "step": 60 }, { "epoch": 0.4605263157894737, "grad_norm": 55.977203369140625, "learning_rate": 0.0001, "loss": 2.5674, "step": 70 }, { "epoch": 0.5263157894736842, "grad_norm": 49.522483825683594, "learning_rate": 0.0001, "loss": 2.4033, "step": 80 }, { "epoch": 0.5921052631578947, "grad_norm": 47.35883712768555, "learning_rate": 0.0001, "loss": 2.235, "step": 90 }, { "epoch": 0.6578947368421053, "grad_norm": 35.355003356933594, "learning_rate": 0.0001, "loss": 2.1005, "step": 100 }, { "epoch": 0.7236842105263158, "grad_norm": 40.69276428222656, "learning_rate": 0.0001, "loss": 2.0012, "step": 110 }, { "epoch": 0.7894736842105263, "grad_norm": 54.77851867675781, "learning_rate": 0.0001, "loss": 1.9005, "step": 120 }, { "epoch": 0.8552631578947368, "grad_norm": 46.83567810058594, "learning_rate": 0.0001, "loss": 1.814, "step": 130 }, { "epoch": 0.9210526315789473, "grad_norm": 41.94735336303711, "learning_rate": 0.0001, "loss": 1.7573, "step": 140 }, { "epoch": 0.9868421052631579, "grad_norm": 35.08190155029297, "learning_rate": 0.0001, "loss": 1.654, "step": 150 }, { "epoch": 1.0526315789473684, "grad_norm": 44.48291015625, "learning_rate": 0.0001, "loss": 1.5705, "step": 160 }, { "epoch": 1.118421052631579, "grad_norm": 31.81441879272461, "learning_rate": 0.0001, "loss": 1.4843, "step": 170 }, { "epoch": 1.1842105263157894, "grad_norm": 32.60026550292969, "learning_rate": 0.0001, "loss": 1.4132, "step": 180 }, { "epoch": 1.25, "grad_norm": 41.517005920410156, "learning_rate": 0.0001, "loss": 1.3827, "step": 190 }, { "epoch": 1.3157894736842106, "grad_norm": 35.75162887573242, "learning_rate": 0.0001, "loss": 1.2906, "step": 200 }, { "epoch": 1.381578947368421, "grad_norm": 36.43583297729492, "learning_rate": 0.0001, "loss": 1.2285, "step": 210 }, { "epoch": 1.4473684210526316, "grad_norm": 27.052431106567383, "learning_rate": 0.0001, "loss": 1.1653, "step": 220 }, { "epoch": 1.513157894736842, "grad_norm": 33.150672912597656, "learning_rate": 0.0001, "loss": 1.1156, "step": 230 }, { "epoch": 1.5789473684210527, "grad_norm": 31.084774017333984, "learning_rate": 0.0001, "loss": 1.0502, "step": 240 }, { "epoch": 1.6447368421052633, "grad_norm": 34.409236907958984, "learning_rate": 0.0001, "loss": 1.0133, "step": 250 }, { "epoch": 1.7105263157894737, "grad_norm": 34.15842056274414, "learning_rate": 0.0001, "loss": 0.9704, "step": 260 }, { "epoch": 1.776315789473684, "grad_norm": 28.07461166381836, "learning_rate": 0.0001, "loss": 0.8923, "step": 270 }, { "epoch": 1.8421052631578947, "grad_norm": 26.535066604614258, "learning_rate": 0.0001, "loss": 0.8428, "step": 280 }, { "epoch": 1.9078947368421053, "grad_norm": 30.600873947143555, "learning_rate": 0.0001, "loss": 0.8012, "step": 290 }, { "epoch": 1.973684210526316, "grad_norm": 27.880096435546875, "learning_rate": 0.0001, "loss": 0.7579, "step": 300 }, { "epoch": 2.039473684210526, "grad_norm": 33.62862014770508, "learning_rate": 0.0001, "loss": 0.7262, "step": 310 }, { "epoch": 2.1052631578947367, "grad_norm": 25.085758209228516, "learning_rate": 0.0001, "loss": 0.6675, "step": 320 }, { "epoch": 2.1710526315789473, "grad_norm": 27.23250389099121, "learning_rate": 0.0001, "loss": 0.642, "step": 330 }, { "epoch": 2.236842105263158, "grad_norm": 27.94266700744629, "learning_rate": 0.0001, "loss": 0.6191, "step": 340 }, { "epoch": 2.3026315789473686, "grad_norm": 20.9904727935791, "learning_rate": 0.0001, "loss": 0.5783, "step": 350 }, { "epoch": 2.3684210526315788, "grad_norm": 23.568016052246094, "learning_rate": 0.0001, "loss": 0.5469, "step": 360 }, { "epoch": 2.4342105263157894, "grad_norm": 24.500930786132812, "learning_rate": 0.0001, "loss": 0.5437, "step": 370 }, { "epoch": 2.5, "grad_norm": 23.668123245239258, "learning_rate": 0.0001, "loss": 0.5123, "step": 380 }, { "epoch": 2.5657894736842106, "grad_norm": 23.93753433227539, "learning_rate": 0.0001, "loss": 0.5082, "step": 390 }, { "epoch": 2.6315789473684212, "grad_norm": 22.929643630981445, "learning_rate": 0.0001, "loss": 0.4867, "step": 400 }, { "epoch": 2.6973684210526314, "grad_norm": 21.82855796813965, "learning_rate": 0.0001, "loss": 0.4665, "step": 410 }, { "epoch": 2.763157894736842, "grad_norm": 18.323549270629883, "learning_rate": 0.0001, "loss": 0.4549, "step": 420 }, { "epoch": 2.8289473684210527, "grad_norm": 16.781408309936523, "learning_rate": 0.0001, "loss": 0.4321, "step": 430 }, { "epoch": 2.8947368421052633, "grad_norm": 17.668415069580078, "learning_rate": 0.0001, "loss": 0.4312, "step": 440 }, { "epoch": 2.9605263157894735, "grad_norm": 17.911794662475586, "learning_rate": 0.0001, "loss": 0.402, "step": 450 }, { "epoch": 3.026315789473684, "grad_norm": 17.774690628051758, "learning_rate": 0.0001, "loss": 0.3864, "step": 460 }, { "epoch": 3.0921052631578947, "grad_norm": 16.26464080810547, "learning_rate": 0.0001, "loss": 0.3749, "step": 470 }, { "epoch": 3.1578947368421053, "grad_norm": 19.025352478027344, "learning_rate": 0.0001, "loss": 0.3712, "step": 480 }, { "epoch": 3.223684210526316, "grad_norm": 15.689960479736328, "learning_rate": 0.0001, "loss": 0.3584, "step": 490 }, { "epoch": 3.2894736842105265, "grad_norm": 18.045785903930664, "learning_rate": 0.0001, "loss": 0.3495, "step": 500 }, { "epoch": 3.3552631578947367, "grad_norm": 16.737571716308594, "learning_rate": 0.0001, "loss": 0.3404, "step": 510 }, { "epoch": 3.4210526315789473, "grad_norm": 15.480450630187988, "learning_rate": 0.0001, "loss": 0.3334, "step": 520 }, { "epoch": 3.486842105263158, "grad_norm": 16.040254592895508, "learning_rate": 0.0001, "loss": 0.3301, "step": 530 }, { "epoch": 3.5526315789473686, "grad_norm": 15.321078300476074, "learning_rate": 0.0001, "loss": 0.3285, "step": 540 }, { "epoch": 3.6184210526315788, "grad_norm": 14.546820640563965, "learning_rate": 0.0001, "loss": 0.3247, "step": 550 }, { "epoch": 3.6842105263157894, "grad_norm": 15.800512313842773, "learning_rate": 0.0001, "loss": 0.318, "step": 560 }, { "epoch": 3.75, "grad_norm": 14.586511611938477, "learning_rate": 0.0001, "loss": 0.3034, "step": 570 }, { "epoch": 3.8157894736842106, "grad_norm": 14.677214622497559, "learning_rate": 0.0001, "loss": 0.297, "step": 580 }, { "epoch": 3.8815789473684212, "grad_norm": 14.887819290161133, "learning_rate": 0.0001, "loss": 0.2994, "step": 590 }, { "epoch": 3.9473684210526314, "grad_norm": 15.036578178405762, "learning_rate": 0.0001, "loss": 0.2905, "step": 600 }, { "epoch": 4.0131578947368425, "grad_norm": 15.837952613830566, "learning_rate": 0.0001, "loss": 0.2795, "step": 610 }, { "epoch": 4.078947368421052, "grad_norm": 15.964160919189453, "learning_rate": 0.0001, "loss": 0.2887, "step": 620 }, { "epoch": 4.144736842105263, "grad_norm": 15.995771408081055, "learning_rate": 0.0001, "loss": 0.2842, "step": 630 }, { "epoch": 4.2105263157894735, "grad_norm": 11.476451873779297, "learning_rate": 0.0001, "loss": 0.2717, "step": 640 }, { "epoch": 4.276315789473684, "grad_norm": 13.376639366149902, "learning_rate": 0.0001, "loss": 0.268, "step": 650 }, { "epoch": 4.342105263157895, "grad_norm": 16.156909942626953, "learning_rate": 0.0001, "loss": 0.2633, "step": 660 }, { "epoch": 4.407894736842105, "grad_norm": 13.77620792388916, "learning_rate": 0.0001, "loss": 0.2601, "step": 670 }, { "epoch": 4.473684210526316, "grad_norm": 14.24107551574707, "learning_rate": 0.0001, "loss": 0.2557, "step": 680 }, { "epoch": 4.5394736842105265, "grad_norm": 12.445223808288574, "learning_rate": 0.0001, "loss": 0.2604, "step": 690 }, { "epoch": 4.605263157894737, "grad_norm": 14.017378807067871, "learning_rate": 0.0001, "loss": 0.2501, "step": 700 }, { "epoch": 4.671052631578947, "grad_norm": 14.1619291305542, "learning_rate": 0.0001, "loss": 0.2454, "step": 710 }, { "epoch": 4.7368421052631575, "grad_norm": 12.56690788269043, "learning_rate": 0.0001, "loss": 0.2564, "step": 720 }, { "epoch": 4.802631578947368, "grad_norm": 11.870203971862793, "learning_rate": 0.0001, "loss": 0.256, "step": 730 }, { "epoch": 4.868421052631579, "grad_norm": 13.470266342163086, "learning_rate": 0.0001, "loss": 0.2487, "step": 740 }, { "epoch": 4.934210526315789, "grad_norm": 14.058941841125488, "learning_rate": 0.0001, "loss": 0.2474, "step": 750 }, { "epoch": 5.0, "grad_norm": 13.21171760559082, "learning_rate": 0.0001, "loss": 0.246, "step": 760 }, { "epoch": 5.065789473684211, "grad_norm": 12.306739807128906, "learning_rate": 0.0001, "loss": 0.2411, "step": 770 }, { "epoch": 5.131578947368421, "grad_norm": 13.37558364868164, "learning_rate": 0.0001, "loss": 0.2383, "step": 780 }, { "epoch": 5.197368421052632, "grad_norm": 13.978446960449219, "learning_rate": 0.0001, "loss": 0.2404, "step": 790 }, { "epoch": 5.2631578947368425, "grad_norm": 14.252902030944824, "learning_rate": 0.0001, "loss": 0.2297, "step": 800 }, { "epoch": 5.328947368421053, "grad_norm": 12.678069114685059, "learning_rate": 0.0001, "loss": 0.2302, "step": 810 }, { "epoch": 5.394736842105263, "grad_norm": 11.686091423034668, "learning_rate": 0.0001, "loss": 0.226, "step": 820 }, { "epoch": 5.4605263157894735, "grad_norm": 10.88893985748291, "learning_rate": 0.0001, "loss": 0.2299, "step": 830 }, { "epoch": 5.526315789473684, "grad_norm": 10.443198204040527, "learning_rate": 0.0001, "loss": 0.2175, "step": 840 }, { "epoch": 5.592105263157895, "grad_norm": 13.31001091003418, "learning_rate": 0.0001, "loss": 0.2228, "step": 850 }, { "epoch": 5.657894736842105, "grad_norm": 13.272442817687988, "learning_rate": 0.0001, "loss": 0.2182, "step": 860 }, { "epoch": 5.723684210526316, "grad_norm": 12.23871898651123, "learning_rate": 0.0001, "loss": 0.2192, "step": 870 }, { "epoch": 5.7894736842105265, "grad_norm": 11.551015853881836, "learning_rate": 0.0001, "loss": 0.2086, "step": 880 }, { "epoch": 5.855263157894737, "grad_norm": 13.253652572631836, "learning_rate": 0.0001, "loss": 0.2169, "step": 890 }, { "epoch": 5.921052631578947, "grad_norm": 10.96214485168457, "learning_rate": 0.0001, "loss": 0.2151, "step": 900 }, { "epoch": 5.9868421052631575, "grad_norm": 11.494643211364746, "learning_rate": 0.0001, "loss": 0.2081, "step": 910 }, { "epoch": 6.052631578947368, "grad_norm": 10.467440605163574, "learning_rate": 0.0001, "loss": 0.2077, "step": 920 }, { "epoch": 6.118421052631579, "grad_norm": 9.891035079956055, "learning_rate": 0.0001, "loss": 0.2008, "step": 930 }, { "epoch": 6.184210526315789, "grad_norm": 11.876483917236328, "learning_rate": 0.0001, "loss": 0.197, "step": 940 }, { "epoch": 6.25, "grad_norm": 11.160200119018555, "learning_rate": 0.0001, "loss": 0.1992, "step": 950 }, { "epoch": 6.315789473684211, "grad_norm": 11.48719310760498, "learning_rate": 0.0001, "loss": 0.2053, "step": 960 }, { "epoch": 6.381578947368421, "grad_norm": 12.996438026428223, "learning_rate": 0.0001, "loss": 0.1982, "step": 970 }, { "epoch": 6.447368421052632, "grad_norm": 11.707313537597656, "learning_rate": 0.0001, "loss": 0.201, "step": 980 }, { "epoch": 6.5131578947368425, "grad_norm": 10.841751098632812, "learning_rate": 0.0001, "loss": 0.1913, "step": 990 }, { "epoch": 6.578947368421053, "grad_norm": 10.24755859375, "learning_rate": 0.0001, "loss": 0.1875, "step": 1000 }, { "epoch": 6.644736842105263, "grad_norm": 10.395952224731445, "learning_rate": 0.0001, "loss": 0.1882, "step": 1010 }, { "epoch": 6.7105263157894735, "grad_norm": 10.0187349319458, "learning_rate": 0.0001, "loss": 0.1855, "step": 1020 }, { "epoch": 6.776315789473684, "grad_norm": 11.295548439025879, "learning_rate": 0.0001, "loss": 0.183, "step": 1030 }, { "epoch": 6.842105263157895, "grad_norm": 13.44489860534668, "learning_rate": 0.0001, "loss": 0.1821, "step": 1040 }, { "epoch": 6.907894736842105, "grad_norm": 10.803842544555664, "learning_rate": 0.0001, "loss": 0.1822, "step": 1050 }, { "epoch": 6.973684210526316, "grad_norm": 9.939297676086426, "learning_rate": 0.0001, "loss": 0.1824, "step": 1060 }, { "epoch": 7.0394736842105265, "grad_norm": 10.647833824157715, "learning_rate": 0.0001, "loss": 0.1825, "step": 1070 }, { "epoch": 7.105263157894737, "grad_norm": 9.66645622253418, "learning_rate": 0.0001, "loss": 0.1846, "step": 1080 }, { "epoch": 7.171052631578948, "grad_norm": 11.150897979736328, "learning_rate": 0.0001, "loss": 0.1739, "step": 1090 }, { "epoch": 7.2368421052631575, "grad_norm": 8.952764511108398, "learning_rate": 0.0001, "loss": 0.1726, "step": 1100 }, { "epoch": 7.302631578947368, "grad_norm": 10.215106964111328, "learning_rate": 0.0001, "loss": 0.1729, "step": 1110 }, { "epoch": 7.368421052631579, "grad_norm": 11.059536933898926, "learning_rate": 0.0001, "loss": 0.1721, "step": 1120 }, { "epoch": 7.434210526315789, "grad_norm": 9.348877906799316, "learning_rate": 0.0001, "loss": 0.1667, "step": 1130 }, { "epoch": 7.5, "grad_norm": 10.604543685913086, "learning_rate": 0.0001, "loss": 0.1702, "step": 1140 }, { "epoch": 7.565789473684211, "grad_norm": 10.364450454711914, "learning_rate": 0.0001, "loss": 0.1652, "step": 1150 }, { "epoch": 7.631578947368421, "grad_norm": 9.543376922607422, "learning_rate": 0.0001, "loss": 0.1641, "step": 1160 }, { "epoch": 7.697368421052632, "grad_norm": 8.747283935546875, "learning_rate": 0.0001, "loss": 0.1638, "step": 1170 }, { "epoch": 7.7631578947368425, "grad_norm": 9.087156295776367, "learning_rate": 0.0001, "loss": 0.1675, "step": 1180 }, { "epoch": 7.828947368421053, "grad_norm": 11.355291366577148, "learning_rate": 0.0001, "loss": 0.168, "step": 1190 }, { "epoch": 7.894736842105263, "grad_norm": 11.241161346435547, "learning_rate": 0.0001, "loss": 0.1655, "step": 1200 }, { "epoch": 7.9605263157894735, "grad_norm": 11.03511905670166, "learning_rate": 0.0001, "loss": 0.1694, "step": 1210 }, { "epoch": 8.026315789473685, "grad_norm": 9.319551467895508, "learning_rate": 0.0001, "loss": 0.1641, "step": 1220 }, { "epoch": 8.092105263157896, "grad_norm": 9.20547866821289, "learning_rate": 0.0001, "loss": 0.1627, "step": 1230 }, { "epoch": 8.157894736842104, "grad_norm": 8.785297393798828, "learning_rate": 0.0001, "loss": 0.1601, "step": 1240 }, { "epoch": 8.223684210526315, "grad_norm": 10.615660667419434, "learning_rate": 0.0001, "loss": 0.1622, "step": 1250 }, { "epoch": 8.289473684210526, "grad_norm": 9.420092582702637, "learning_rate": 0.0001, "loss": 0.161, "step": 1260 }, { "epoch": 8.355263157894736, "grad_norm": 8.747106552124023, "learning_rate": 0.0001, "loss": 0.1545, "step": 1270 }, { "epoch": 8.421052631578947, "grad_norm": 9.730173110961914, "learning_rate": 0.0001, "loss": 0.1587, "step": 1280 }, { "epoch": 8.486842105263158, "grad_norm": 8.109198570251465, "learning_rate": 0.0001, "loss": 0.1563, "step": 1290 }, { "epoch": 8.552631578947368, "grad_norm": 8.72352409362793, "learning_rate": 0.0001, "loss": 0.1508, "step": 1300 }, { "epoch": 8.618421052631579, "grad_norm": 9.72221565246582, "learning_rate": 0.0001, "loss": 0.1552, "step": 1310 }, { "epoch": 8.68421052631579, "grad_norm": 9.086143493652344, "learning_rate": 0.0001, "loss": 0.1535, "step": 1320 }, { "epoch": 8.75, "grad_norm": 9.754488945007324, "learning_rate": 0.0001, "loss": 0.155, "step": 1330 }, { "epoch": 8.81578947368421, "grad_norm": 9.492188453674316, "learning_rate": 0.0001, "loss": 0.1515, "step": 1340 }, { "epoch": 8.881578947368421, "grad_norm": 10.892655372619629, "learning_rate": 0.0001, "loss": 0.1532, "step": 1350 }, { "epoch": 8.947368421052632, "grad_norm": 7.42869758605957, "learning_rate": 0.0001, "loss": 0.1511, "step": 1360 }, { "epoch": 9.013157894736842, "grad_norm": 8.937113761901855, "learning_rate": 0.0001, "loss": 0.1584, "step": 1370 }, { "epoch": 9.078947368421053, "grad_norm": 10.114669799804688, "learning_rate": 0.0001, "loss": 0.1484, "step": 1380 }, { "epoch": 9.144736842105264, "grad_norm": 9.457815170288086, "learning_rate": 0.0001, "loss": 0.1492, "step": 1390 }, { "epoch": 9.210526315789474, "grad_norm": 8.469773292541504, "learning_rate": 0.0001, "loss": 0.1437, "step": 1400 }, { "epoch": 9.276315789473685, "grad_norm": 9.525146484375, "learning_rate": 0.0001, "loss": 0.1383, "step": 1410 }, { "epoch": 9.342105263157896, "grad_norm": 9.315752029418945, "learning_rate": 0.0001, "loss": 0.1388, "step": 1420 }, { "epoch": 9.407894736842104, "grad_norm": 9.242635726928711, "learning_rate": 0.0001, "loss": 0.1417, "step": 1430 }, { "epoch": 9.473684210526315, "grad_norm": 8.627606391906738, "learning_rate": 0.0001, "loss": 0.1396, "step": 1440 }, { "epoch": 9.539473684210526, "grad_norm": 9.800474166870117, "learning_rate": 0.0001, "loss": 0.1388, "step": 1450 }, { "epoch": 9.605263157894736, "grad_norm": 9.886494636535645, "learning_rate": 0.0001, "loss": 0.1384, "step": 1460 }, { "epoch": 9.671052631578947, "grad_norm": 8.103569030761719, "learning_rate": 0.0001, "loss": 0.1345, "step": 1470 }, { "epoch": 9.736842105263158, "grad_norm": 9.987874984741211, "learning_rate": 0.0001, "loss": 0.14, "step": 1480 }, { "epoch": 9.802631578947368, "grad_norm": 8.908867835998535, "learning_rate": 0.0001, "loss": 0.1329, "step": 1490 }, { "epoch": 9.868421052631579, "grad_norm": 8.019304275512695, "learning_rate": 0.0001, "loss": 0.1301, "step": 1500 }, { "epoch": 9.93421052631579, "grad_norm": 7.718055248260498, "learning_rate": 0.0001, "loss": 0.1331, "step": 1510 }, { "epoch": 10.0, "grad_norm": 7.718787670135498, "learning_rate": 0.0001, "loss": 0.1344, "step": 1520 }, { "epoch": 10.06578947368421, "grad_norm": 8.487434387207031, "learning_rate": 0.0001, "loss": 0.1307, "step": 1530 }, { "epoch": 10.131578947368421, "grad_norm": 8.06506061553955, "learning_rate": 0.0001, "loss": 0.131, "step": 1540 }, { "epoch": 10.197368421052632, "grad_norm": 7.534064292907715, "learning_rate": 0.0001, "loss": 0.1324, "step": 1550 }, { "epoch": 10.263157894736842, "grad_norm": 8.083373069763184, "learning_rate": 0.0001, "loss": 0.1315, "step": 1560 }, { "epoch": 10.328947368421053, "grad_norm": 8.27895450592041, "learning_rate": 0.0001, "loss": 0.1319, "step": 1570 }, { "epoch": 10.394736842105264, "grad_norm": 8.521573066711426, "learning_rate": 0.0001, "loss": 0.1301, "step": 1580 }, { "epoch": 10.460526315789474, "grad_norm": 9.80729866027832, "learning_rate": 0.0001, "loss": 0.1272, "step": 1590 }, { "epoch": 10.526315789473685, "grad_norm": 8.914023399353027, "learning_rate": 0.0001, "loss": 0.1356, "step": 1600 }, { "epoch": 10.592105263157894, "grad_norm": 9.150543212890625, "learning_rate": 0.0001, "loss": 0.1342, "step": 1610 }, { "epoch": 10.657894736842106, "grad_norm": 10.384299278259277, "learning_rate": 0.0001, "loss": 0.1337, "step": 1620 }, { "epoch": 10.723684210526315, "grad_norm": 8.355812072753906, "learning_rate": 0.0001, "loss": 0.1301, "step": 1630 }, { "epoch": 10.789473684210526, "grad_norm": 8.742225646972656, "learning_rate": 0.0001, "loss": 0.1243, "step": 1640 }, { "epoch": 10.855263157894736, "grad_norm": 7.470909118652344, "learning_rate": 0.0001, "loss": 0.1243, "step": 1650 }, { "epoch": 10.921052631578947, "grad_norm": 7.4198222160339355, "learning_rate": 0.0001, "loss": 0.1239, "step": 1660 }, { "epoch": 10.986842105263158, "grad_norm": 6.868049144744873, "learning_rate": 0.0001, "loss": 0.1257, "step": 1670 }, { "epoch": 11.052631578947368, "grad_norm": 7.1938090324401855, "learning_rate": 0.0001, "loss": 0.1224, "step": 1680 }, { "epoch": 11.118421052631579, "grad_norm": 8.326366424560547, "learning_rate": 0.0001, "loss": 0.1172, "step": 1690 }, { "epoch": 11.18421052631579, "grad_norm": 8.412864685058594, "learning_rate": 0.0001, "loss": 0.1195, "step": 1700 }, { "epoch": 11.25, "grad_norm": 8.174013137817383, "learning_rate": 0.0001, "loss": 0.1216, "step": 1710 }, { "epoch": 11.31578947368421, "grad_norm": 7.741543292999268, "learning_rate": 0.0001, "loss": 0.1223, "step": 1720 }, { "epoch": 11.381578947368421, "grad_norm": 7.337162494659424, "learning_rate": 0.0001, "loss": 0.1221, "step": 1730 }, { "epoch": 11.447368421052632, "grad_norm": 7.082222938537598, "learning_rate": 0.0001, "loss": 0.1205, "step": 1740 }, { "epoch": 11.513157894736842, "grad_norm": 7.397077560424805, "learning_rate": 0.0001, "loss": 0.1187, "step": 1750 }, { "epoch": 11.578947368421053, "grad_norm": 7.9401469230651855, "learning_rate": 0.0001, "loss": 0.1206, "step": 1760 }, { "epoch": 11.644736842105264, "grad_norm": 7.378265380859375, "learning_rate": 0.0001, "loss": 0.1204, "step": 1770 }, { "epoch": 11.710526315789474, "grad_norm": 7.089422225952148, "learning_rate": 0.0001, "loss": 0.1165, "step": 1780 }, { "epoch": 11.776315789473685, "grad_norm": 8.4090576171875, "learning_rate": 0.0001, "loss": 0.1149, "step": 1790 }, { "epoch": 11.842105263157894, "grad_norm": 6.9335174560546875, "learning_rate": 0.0001, "loss": 0.1171, "step": 1800 }, { "epoch": 11.907894736842106, "grad_norm": 6.528397560119629, "learning_rate": 0.0001, "loss": 0.1194, "step": 1810 }, { "epoch": 11.973684210526315, "grad_norm": 6.176355361938477, "learning_rate": 0.0001, "loss": 0.1151, "step": 1820 }, { "epoch": 12.039473684210526, "grad_norm": 6.412083625793457, "learning_rate": 0.0001, "loss": 0.1119, "step": 1830 }, { "epoch": 12.105263157894736, "grad_norm": 7.9793572425842285, "learning_rate": 0.0001, "loss": 0.1198, "step": 1840 }, { "epoch": 12.171052631578947, "grad_norm": 7.067323207855225, "learning_rate": 0.0001, "loss": 0.1168, "step": 1850 }, { "epoch": 12.236842105263158, "grad_norm": 7.075022220611572, "learning_rate": 0.0001, "loss": 0.112, "step": 1860 }, { "epoch": 12.302631578947368, "grad_norm": 7.169610977172852, "learning_rate": 0.0001, "loss": 0.1082, "step": 1870 }, { "epoch": 12.368421052631579, "grad_norm": 6.910126209259033, "learning_rate": 0.0001, "loss": 0.1071, "step": 1880 }, { "epoch": 12.43421052631579, "grad_norm": 8.200340270996094, "learning_rate": 0.0001, "loss": 0.1086, "step": 1890 }, { "epoch": 12.5, "grad_norm": 8.145902633666992, "learning_rate": 0.0001, "loss": 0.1139, "step": 1900 }, { "epoch": 12.56578947368421, "grad_norm": 7.244836330413818, "learning_rate": 0.0001, "loss": 0.1073, "step": 1910 }, { "epoch": 12.631578947368421, "grad_norm": 6.382747173309326, "learning_rate": 0.0001, "loss": 0.1057, "step": 1920 }, { "epoch": 12.697368421052632, "grad_norm": 7.7286834716796875, "learning_rate": 0.0001, "loss": 0.1102, "step": 1930 }, { "epoch": 12.763157894736842, "grad_norm": 8.407709121704102, "learning_rate": 0.0001, "loss": 0.1086, "step": 1940 }, { "epoch": 12.828947368421053, "grad_norm": 8.538996696472168, "learning_rate": 0.0001, "loss": 0.1141, "step": 1950 }, { "epoch": 12.894736842105264, "grad_norm": 6.654195308685303, "learning_rate": 0.0001, "loss": 0.1091, "step": 1960 }, { "epoch": 12.960526315789474, "grad_norm": 6.598415374755859, "learning_rate": 0.0001, "loss": 0.1063, "step": 1970 }, { "epoch": 13.026315789473685, "grad_norm": 7.287312984466553, "learning_rate": 0.0001, "loss": 0.108, "step": 1980 }, { "epoch": 13.092105263157896, "grad_norm": 6.205286979675293, "learning_rate": 0.0001, "loss": 0.1015, "step": 1990 }, { "epoch": 13.157894736842104, "grad_norm": 6.776149749755859, "learning_rate": 0.0001, "loss": 0.1064, "step": 2000 }, { "epoch": 13.223684210526315, "grad_norm": 6.995049476623535, "learning_rate": 0.0001, "loss": 0.1046, "step": 2010 }, { "epoch": 13.289473684210526, "grad_norm": 6.1356024742126465, "learning_rate": 0.0001, "loss": 0.1077, "step": 2020 }, { "epoch": 13.355263157894736, "grad_norm": 7.5055694580078125, "learning_rate": 0.0001, "loss": 0.1026, "step": 2030 }, { "epoch": 13.421052631578947, "grad_norm": 7.035951614379883, "learning_rate": 0.0001, "loss": 0.107, "step": 2040 }, { "epoch": 13.486842105263158, "grad_norm": 6.5295257568359375, "learning_rate": 0.0001, "loss": 0.1055, "step": 2050 }, { "epoch": 13.552631578947368, "grad_norm": 6.732638835906982, "learning_rate": 0.0001, "loss": 0.1041, "step": 2060 }, { "epoch": 13.618421052631579, "grad_norm": 7.681493759155273, "learning_rate": 0.0001, "loss": 0.1016, "step": 2070 }, { "epoch": 13.68421052631579, "grad_norm": 7.217351913452148, "learning_rate": 0.0001, "loss": 0.1049, "step": 2080 }, { "epoch": 13.75, "grad_norm": 6.3972296714782715, "learning_rate": 0.0001, "loss": 0.1027, "step": 2090 }, { "epoch": 13.81578947368421, "grad_norm": 6.423898696899414, "learning_rate": 0.0001, "loss": 0.1055, "step": 2100 }, { "epoch": 13.881578947368421, "grad_norm": 7.37164831161499, "learning_rate": 0.0001, "loss": 0.1055, "step": 2110 }, { "epoch": 13.947368421052632, "grad_norm": 6.01750373840332, "learning_rate": 0.0001, "loss": 0.1017, "step": 2120 }, { "epoch": 14.013157894736842, "grad_norm": 6.607297420501709, "learning_rate": 0.0001, "loss": 0.1051, "step": 2130 }, { "epoch": 14.078947368421053, "grad_norm": 7.331927299499512, "learning_rate": 0.0001, "loss": 0.101, "step": 2140 }, { "epoch": 14.144736842105264, "grad_norm": 6.072599411010742, "learning_rate": 0.0001, "loss": 0.0969, "step": 2150 }, { "epoch": 14.210526315789474, "grad_norm": 7.233094215393066, "learning_rate": 0.0001, "loss": 0.1002, "step": 2160 }, { "epoch": 14.276315789473685, "grad_norm": 6.769193649291992, "learning_rate": 0.0001, "loss": 0.0983, "step": 2170 }, { "epoch": 14.342105263157896, "grad_norm": 5.834155559539795, "learning_rate": 0.0001, "loss": 0.0988, "step": 2180 }, { "epoch": 14.407894736842104, "grad_norm": 4.882734775543213, "learning_rate": 0.0001, "loss": 0.0957, "step": 2190 }, { "epoch": 14.473684210526315, "grad_norm": 5.751882076263428, "learning_rate": 0.0001, "loss": 0.098, "step": 2200 }, { "epoch": 14.539473684210526, "grad_norm": 6.713118553161621, "learning_rate": 0.0001, "loss": 0.1035, "step": 2210 }, { "epoch": 14.605263157894736, "grad_norm": 5.757167816162109, "learning_rate": 0.0001, "loss": 0.0967, "step": 2220 }, { "epoch": 14.671052631578947, "grad_norm": 6.847421169281006, "learning_rate": 0.0001, "loss": 0.102, "step": 2230 }, { "epoch": 14.736842105263158, "grad_norm": 7.241466522216797, "learning_rate": 0.0001, "loss": 0.1001, "step": 2240 }, { "epoch": 14.802631578947368, "grad_norm": 6.223239898681641, "learning_rate": 0.0001, "loss": 0.0987, "step": 2250 }, { "epoch": 14.868421052631579, "grad_norm": 6.875740051269531, "learning_rate": 0.0001, "loss": 0.0971, "step": 2260 }, { "epoch": 14.93421052631579, "grad_norm": 6.629533290863037, "learning_rate": 0.0001, "loss": 0.1009, "step": 2270 }, { "epoch": 15.0, "grad_norm": 6.687826156616211, "learning_rate": 0.0001, "loss": 0.0966, "step": 2280 }, { "epoch": 15.06578947368421, "grad_norm": 7.25020170211792, "learning_rate": 0.0001, "loss": 0.0968, "step": 2290 }, { "epoch": 15.131578947368421, "grad_norm": 6.609081268310547, "learning_rate": 0.0001, "loss": 0.0969, "step": 2300 }, { "epoch": 15.197368421052632, "grad_norm": 5.925322532653809, "learning_rate": 0.0001, "loss": 0.0952, "step": 2310 }, { "epoch": 15.263157894736842, "grad_norm": 5.882940769195557, "learning_rate": 0.0001, "loss": 0.0933, "step": 2320 }, { "epoch": 15.328947368421053, "grad_norm": 5.915431499481201, "learning_rate": 0.0001, "loss": 0.0949, "step": 2330 }, { "epoch": 15.394736842105264, "grad_norm": 5.569434642791748, "learning_rate": 0.0001, "loss": 0.092, "step": 2340 }, { "epoch": 15.460526315789474, "grad_norm": 6.778213977813721, "learning_rate": 0.0001, "loss": 0.0922, "step": 2350 }, { "epoch": 15.526315789473685, "grad_norm": 7.685764312744141, "learning_rate": 0.0001, "loss": 0.0951, "step": 2360 }, { "epoch": 15.592105263157894, "grad_norm": 5.782797336578369, "learning_rate": 0.0001, "loss": 0.0918, "step": 2370 }, { "epoch": 15.657894736842106, "grad_norm": 5.513795852661133, "learning_rate": 0.0001, "loss": 0.0891, "step": 2380 }, { "epoch": 15.723684210526315, "grad_norm": 6.363500595092773, "learning_rate": 0.0001, "loss": 0.0888, "step": 2390 }, { "epoch": 15.789473684210526, "grad_norm": 5.4419965744018555, "learning_rate": 0.0001, "loss": 0.0925, "step": 2400 }, { "epoch": 15.855263157894736, "grad_norm": 5.667367935180664, "learning_rate": 0.0001, "loss": 0.0914, "step": 2410 }, { "epoch": 15.921052631578947, "grad_norm": 7.027116298675537, "learning_rate": 0.0001, "loss": 0.0894, "step": 2420 }, { "epoch": 15.986842105263158, "grad_norm": 6.267201900482178, "learning_rate": 0.0001, "loss": 0.0908, "step": 2430 }, { "epoch": 16.05263157894737, "grad_norm": 5.420660495758057, "learning_rate": 0.0001, "loss": 0.087, "step": 2440 }, { "epoch": 16.11842105263158, "grad_norm": 6.2726593017578125, "learning_rate": 0.0001, "loss": 0.0875, "step": 2450 }, { "epoch": 16.18421052631579, "grad_norm": 6.008795261383057, "learning_rate": 0.0001, "loss": 0.0907, "step": 2460 }, { "epoch": 16.25, "grad_norm": 5.749043941497803, "learning_rate": 0.0001, "loss": 0.0901, "step": 2470 }, { "epoch": 16.31578947368421, "grad_norm": 5.430087566375732, "learning_rate": 0.0001, "loss": 0.0886, "step": 2480 }, { "epoch": 16.38157894736842, "grad_norm": 5.837812423706055, "learning_rate": 0.0001, "loss": 0.094, "step": 2490 }, { "epoch": 16.44736842105263, "grad_norm": 6.450821876525879, "learning_rate": 0.0001, "loss": 0.0889, "step": 2500 }, { "epoch": 16.513157894736842, "grad_norm": 5.3272480964660645, "learning_rate": 0.0001, "loss": 0.0925, "step": 2510 }, { "epoch": 16.57894736842105, "grad_norm": 6.572229385375977, "learning_rate": 0.0001, "loss": 0.0872, "step": 2520 }, { "epoch": 16.644736842105264, "grad_norm": 5.057812213897705, "learning_rate": 0.0001, "loss": 0.0885, "step": 2530 }, { "epoch": 16.710526315789473, "grad_norm": 5.950173854827881, "learning_rate": 0.0001, "loss": 0.0874, "step": 2540 }, { "epoch": 16.776315789473685, "grad_norm": 5.652223110198975, "learning_rate": 0.0001, "loss": 0.0851, "step": 2550 }, { "epoch": 16.842105263157894, "grad_norm": 5.964550971984863, "learning_rate": 0.0001, "loss": 0.0816, "step": 2560 }, { "epoch": 16.907894736842106, "grad_norm": 6.1443610191345215, "learning_rate": 0.0001, "loss": 0.0928, "step": 2570 }, { "epoch": 16.973684210526315, "grad_norm": 6.2366251945495605, "learning_rate": 0.0001, "loss": 0.0838, "step": 2580 }, { "epoch": 17.039473684210527, "grad_norm": 5.735668659210205, "learning_rate": 0.0001, "loss": 0.0819, "step": 2590 }, { "epoch": 17.105263157894736, "grad_norm": 5.684398651123047, "learning_rate": 0.0001, "loss": 0.0869, "step": 2600 }, { "epoch": 17.17105263157895, "grad_norm": 5.294824600219727, "learning_rate": 0.0001, "loss": 0.0851, "step": 2610 }, { "epoch": 17.236842105263158, "grad_norm": 6.483946800231934, "learning_rate": 0.0001, "loss": 0.0929, "step": 2620 }, { "epoch": 17.30263157894737, "grad_norm": 5.532753944396973, "learning_rate": 0.0001, "loss": 0.0809, "step": 2630 }, { "epoch": 17.36842105263158, "grad_norm": 5.64046573638916, "learning_rate": 0.0001, "loss": 0.0801, "step": 2640 }, { "epoch": 17.43421052631579, "grad_norm": 6.075127601623535, "learning_rate": 0.0001, "loss": 0.083, "step": 2650 }, { "epoch": 17.5, "grad_norm": 6.17835807800293, "learning_rate": 0.0001, "loss": 0.085, "step": 2660 }, { "epoch": 17.56578947368421, "grad_norm": 5.5751633644104, "learning_rate": 0.0001, "loss": 0.0841, "step": 2670 }, { "epoch": 17.63157894736842, "grad_norm": 5.194972038269043, "learning_rate": 0.0001, "loss": 0.0833, "step": 2680 }, { "epoch": 17.69736842105263, "grad_norm": 5.882618427276611, "learning_rate": 0.0001, "loss": 0.0863, "step": 2690 }, { "epoch": 17.763157894736842, "grad_norm": 5.743243217468262, "learning_rate": 0.0001, "loss": 0.0855, "step": 2700 }, { "epoch": 17.82894736842105, "grad_norm": 5.605013847351074, "learning_rate": 0.0001, "loss": 0.0828, "step": 2710 }, { "epoch": 17.894736842105264, "grad_norm": 4.955957889556885, "learning_rate": 0.0001, "loss": 0.0881, "step": 2720 }, { "epoch": 17.960526315789473, "grad_norm": 5.226777076721191, "learning_rate": 0.0001, "loss": 0.0851, "step": 2730 }, { "epoch": 18.026315789473685, "grad_norm": 4.742803573608398, "learning_rate": 0.0001, "loss": 0.0852, "step": 2740 }, { "epoch": 18.092105263157894, "grad_norm": 5.126669883728027, "learning_rate": 0.0001, "loss": 0.0826, "step": 2750 }, { "epoch": 18.157894736842106, "grad_norm": 5.684654235839844, "learning_rate": 0.0001, "loss": 0.0813, "step": 2760 }, { "epoch": 18.223684210526315, "grad_norm": 5.026456832885742, "learning_rate": 0.0001, "loss": 0.0824, "step": 2770 }, { "epoch": 18.289473684210527, "grad_norm": 5.902011394500732, "learning_rate": 0.0001, "loss": 0.0835, "step": 2780 }, { "epoch": 18.355263157894736, "grad_norm": 4.694034099578857, "learning_rate": 0.0001, "loss": 0.0794, "step": 2790 }, { "epoch": 18.42105263157895, "grad_norm": 5.5888872146606445, "learning_rate": 0.0001, "loss": 0.0767, "step": 2800 }, { "epoch": 18.486842105263158, "grad_norm": 5.113994598388672, "learning_rate": 0.0001, "loss": 0.0763, "step": 2810 }, { "epoch": 18.55263157894737, "grad_norm": 4.98398494720459, "learning_rate": 0.0001, "loss": 0.0811, "step": 2820 }, { "epoch": 18.61842105263158, "grad_norm": 5.536447525024414, "learning_rate": 0.0001, "loss": 0.081, "step": 2830 }, { "epoch": 18.68421052631579, "grad_norm": 5.4956374168396, "learning_rate": 0.0001, "loss": 0.0839, "step": 2840 }, { "epoch": 18.75, "grad_norm": 5.086822986602783, "learning_rate": 0.0001, "loss": 0.0821, "step": 2850 }, { "epoch": 18.81578947368421, "grad_norm": 4.801605701446533, "learning_rate": 0.0001, "loss": 0.0858, "step": 2860 }, { "epoch": 18.88157894736842, "grad_norm": 4.682705879211426, "learning_rate": 0.0001, "loss": 0.0874, "step": 2870 }, { "epoch": 18.94736842105263, "grad_norm": 4.058840751647949, "learning_rate": 0.0001, "loss": 0.0816, "step": 2880 }, { "epoch": 19.013157894736842, "grad_norm": 5.074926376342773, "learning_rate": 0.0001, "loss": 0.0801, "step": 2890 }, { "epoch": 19.07894736842105, "grad_norm": 4.428182125091553, "learning_rate": 0.0001, "loss": 0.0795, "step": 2900 }, { "epoch": 19.144736842105264, "grad_norm": 5.359776020050049, "learning_rate": 0.0001, "loss": 0.0758, "step": 2910 }, { "epoch": 19.210526315789473, "grad_norm": 4.843128204345703, "learning_rate": 0.0001, "loss": 0.0755, "step": 2920 }, { "epoch": 19.276315789473685, "grad_norm": 5.099305629730225, "learning_rate": 0.0001, "loss": 0.0842, "step": 2930 }, { "epoch": 19.342105263157894, "grad_norm": 4.722768306732178, "learning_rate": 0.0001, "loss": 0.0797, "step": 2940 }, { "epoch": 19.407894736842106, "grad_norm": 4.998010635375977, "learning_rate": 0.0001, "loss": 0.08, "step": 2950 }, { "epoch": 19.473684210526315, "grad_norm": 4.9191436767578125, "learning_rate": 0.0001, "loss": 0.0769, "step": 2960 }, { "epoch": 19.539473684210527, "grad_norm": 5.0299296379089355, "learning_rate": 0.0001, "loss": 0.0766, "step": 2970 }, { "epoch": 19.605263157894736, "grad_norm": 4.707181930541992, "learning_rate": 0.0001, "loss": 0.0778, "step": 2980 }, { "epoch": 19.67105263157895, "grad_norm": 5.107912063598633, "learning_rate": 0.0001, "loss": 0.0806, "step": 2990 }, { "epoch": 19.736842105263158, "grad_norm": 4.7845234870910645, "learning_rate": 0.0001, "loss": 0.0792, "step": 3000 }, { "epoch": 19.80263157894737, "grad_norm": 5.166806221008301, "learning_rate": 0.0001, "loss": 0.0753, "step": 3010 }, { "epoch": 19.86842105263158, "grad_norm": 4.91328239440918, "learning_rate": 0.0001, "loss": 0.0756, "step": 3020 }, { "epoch": 19.93421052631579, "grad_norm": 4.633301734924316, "learning_rate": 0.0001, "loss": 0.0765, "step": 3030 }, { "epoch": 20.0, "grad_norm": 4.5034003257751465, "learning_rate": 0.0001, "loss": 0.0736, "step": 3040 }, { "epoch": 20.06578947368421, "grad_norm": 4.714102745056152, "learning_rate": 0.0001, "loss": 0.0781, "step": 3050 }, { "epoch": 20.13157894736842, "grad_norm": 4.558036804199219, "learning_rate": 0.0001, "loss": 0.0718, "step": 3060 }, { "epoch": 20.19736842105263, "grad_norm": 5.834008693695068, "learning_rate": 0.0001, "loss": 0.0753, "step": 3070 }, { "epoch": 20.263157894736842, "grad_norm": 5.136417388916016, "learning_rate": 0.0001, "loss": 0.0777, "step": 3080 }, { "epoch": 20.32894736842105, "grad_norm": 4.8221755027771, "learning_rate": 0.0001, "loss": 0.0769, "step": 3090 }, { "epoch": 20.394736842105264, "grad_norm": 4.458479404449463, "learning_rate": 0.0001, "loss": 0.0743, "step": 3100 }, { "epoch": 20.460526315789473, "grad_norm": 5.055057048797607, "learning_rate": 0.0001, "loss": 0.074, "step": 3110 }, { "epoch": 20.526315789473685, "grad_norm": 4.767285346984863, "learning_rate": 0.0001, "loss": 0.0706, "step": 3120 }, { "epoch": 20.592105263157894, "grad_norm": 5.064611911773682, "learning_rate": 0.0001, "loss": 0.069, "step": 3130 }, { "epoch": 20.657894736842106, "grad_norm": 4.318564414978027, "learning_rate": 0.0001, "loss": 0.0718, "step": 3140 }, { "epoch": 20.723684210526315, "grad_norm": 4.05781888961792, "learning_rate": 0.0001, "loss": 0.0696, "step": 3150 }, { "epoch": 20.789473684210527, "grad_norm": 4.847416877746582, "learning_rate": 0.0001, "loss": 0.0707, "step": 3160 }, { "epoch": 20.855263157894736, "grad_norm": 4.064643859863281, "learning_rate": 0.0001, "loss": 0.074, "step": 3170 }, { "epoch": 20.92105263157895, "grad_norm": 4.087489604949951, "learning_rate": 0.0001, "loss": 0.0712, "step": 3180 }, { "epoch": 20.986842105263158, "grad_norm": 4.515960216522217, "learning_rate": 0.0001, "loss": 0.0737, "step": 3190 }, { "epoch": 21.05263157894737, "grad_norm": 4.27933931350708, "learning_rate": 0.0001, "loss": 0.0734, "step": 3200 }, { "epoch": 21.11842105263158, "grad_norm": 5.128896236419678, "learning_rate": 0.0001, "loss": 0.0703, "step": 3210 }, { "epoch": 21.18421052631579, "grad_norm": 4.698765277862549, "learning_rate": 0.0001, "loss": 0.0741, "step": 3220 }, { "epoch": 21.25, "grad_norm": 4.387735366821289, "learning_rate": 0.0001, "loss": 0.0724, "step": 3230 }, { "epoch": 21.31578947368421, "grad_norm": 4.451697826385498, "learning_rate": 0.0001, "loss": 0.072, "step": 3240 }, { "epoch": 21.38157894736842, "grad_norm": 4.852103233337402, "learning_rate": 0.0001, "loss": 0.0718, "step": 3250 }, { "epoch": 21.44736842105263, "grad_norm": 4.674655437469482, "learning_rate": 0.0001, "loss": 0.0721, "step": 3260 }, { "epoch": 21.513157894736842, "grad_norm": 4.349355220794678, "learning_rate": 0.0001, "loss": 0.0728, "step": 3270 }, { "epoch": 21.57894736842105, "grad_norm": 4.743768692016602, "learning_rate": 0.0001, "loss": 0.0723, "step": 3280 }, { "epoch": 21.644736842105264, "grad_norm": 4.1435227394104, "learning_rate": 0.0001, "loss": 0.0767, "step": 3290 }, { "epoch": 21.710526315789473, "grad_norm": 4.883895397186279, "learning_rate": 0.0001, "loss": 0.0712, "step": 3300 }, { "epoch": 21.776315789473685, "grad_norm": 4.2418904304504395, "learning_rate": 0.0001, "loss": 0.0714, "step": 3310 }, { "epoch": 21.842105263157894, "grad_norm": 4.920485973358154, "learning_rate": 0.0001, "loss": 0.0713, "step": 3320 }, { "epoch": 21.907894736842106, "grad_norm": 4.722439765930176, "learning_rate": 0.0001, "loss": 0.0755, "step": 3330 }, { "epoch": 21.973684210526315, "grad_norm": 4.32571268081665, "learning_rate": 0.0001, "loss": 0.0711, "step": 3340 }, { "epoch": 22.039473684210527, "grad_norm": 4.815313339233398, "learning_rate": 0.0001, "loss": 0.0705, "step": 3350 }, { "epoch": 22.105263157894736, "grad_norm": 4.726692199707031, "learning_rate": 0.0001, "loss": 0.0691, "step": 3360 }, { "epoch": 22.17105263157895, "grad_norm": 3.9656684398651123, "learning_rate": 0.0001, "loss": 0.0697, "step": 3370 }, { "epoch": 22.236842105263158, "grad_norm": 4.352407932281494, "learning_rate": 0.0001, "loss": 0.0664, "step": 3380 }, { "epoch": 22.30263157894737, "grad_norm": 4.362992763519287, "learning_rate": 0.0001, "loss": 0.0726, "step": 3390 }, { "epoch": 22.36842105263158, "grad_norm": 4.78966760635376, "learning_rate": 0.0001, "loss": 0.0705, "step": 3400 }, { "epoch": 22.43421052631579, "grad_norm": 4.368546962738037, "learning_rate": 0.0001, "loss": 0.0664, "step": 3410 }, { "epoch": 22.5, "grad_norm": 3.869096040725708, "learning_rate": 0.0001, "loss": 0.0661, "step": 3420 }, { "epoch": 22.56578947368421, "grad_norm": 4.45152473449707, "learning_rate": 0.0001, "loss": 0.0683, "step": 3430 }, { "epoch": 22.63157894736842, "grad_norm": 4.191099166870117, "learning_rate": 0.0001, "loss": 0.0688, "step": 3440 }, { "epoch": 22.69736842105263, "grad_norm": 3.653599977493286, "learning_rate": 0.0001, "loss": 0.0689, "step": 3450 }, { "epoch": 22.763157894736842, "grad_norm": 4.751868724822998, "learning_rate": 0.0001, "loss": 0.0684, "step": 3460 }, { "epoch": 22.82894736842105, "grad_norm": 4.288401126861572, "learning_rate": 0.0001, "loss": 0.0699, "step": 3470 }, { "epoch": 22.894736842105264, "grad_norm": 4.474709987640381, "learning_rate": 0.0001, "loss": 0.0704, "step": 3480 }, { "epoch": 22.960526315789473, "grad_norm": 3.779205560684204, "learning_rate": 0.0001, "loss": 0.0686, "step": 3490 }, { "epoch": 23.026315789473685, "grad_norm": 3.9008610248565674, "learning_rate": 0.0001, "loss": 0.0695, "step": 3500 }, { "epoch": 23.092105263157894, "grad_norm": 3.924901247024536, "learning_rate": 0.0001, "loss": 0.0654, "step": 3510 }, { "epoch": 23.157894736842106, "grad_norm": 3.7847535610198975, "learning_rate": 0.0001, "loss": 0.0653, "step": 3520 }, { "epoch": 23.223684210526315, "grad_norm": 4.4501190185546875, "learning_rate": 0.0001, "loss": 0.0639, "step": 3530 }, { "epoch": 23.289473684210527, "grad_norm": 4.245830059051514, "learning_rate": 0.0001, "loss": 0.0707, "step": 3540 }, { "epoch": 23.355263157894736, "grad_norm": 3.8827168941497803, "learning_rate": 0.0001, "loss": 0.0667, "step": 3550 }, { "epoch": 23.42105263157895, "grad_norm": 3.9480772018432617, "learning_rate": 0.0001, "loss": 0.0667, "step": 3560 }, { "epoch": 23.486842105263158, "grad_norm": 4.582841873168945, "learning_rate": 0.0001, "loss": 0.0667, "step": 3570 }, { "epoch": 23.55263157894737, "grad_norm": 3.9947125911712646, "learning_rate": 0.0001, "loss": 0.0688, "step": 3580 }, { "epoch": 23.61842105263158, "grad_norm": 3.1520323753356934, "learning_rate": 0.0001, "loss": 0.0654, "step": 3590 }, { "epoch": 23.68421052631579, "grad_norm": 4.064833641052246, "learning_rate": 0.0001, "loss": 0.0711, "step": 3600 }, { "epoch": 23.75, "grad_norm": 4.018986701965332, "learning_rate": 0.0001, "loss": 0.068, "step": 3610 }, { "epoch": 23.81578947368421, "grad_norm": 4.334568500518799, "learning_rate": 0.0001, "loss": 0.0679, "step": 3620 }, { "epoch": 23.88157894736842, "grad_norm": 3.937211513519287, "learning_rate": 0.0001, "loss": 0.0635, "step": 3630 }, { "epoch": 23.94736842105263, "grad_norm": 3.9586544036865234, "learning_rate": 0.0001, "loss": 0.0639, "step": 3640 }, { "epoch": 24.013157894736842, "grad_norm": 3.5492544174194336, "learning_rate": 0.0001, "loss": 0.0614, "step": 3650 }, { "epoch": 24.07894736842105, "grad_norm": 4.145535469055176, "learning_rate": 0.0001, "loss": 0.0639, "step": 3660 }, { "epoch": 24.144736842105264, "grad_norm": 4.084836483001709, "learning_rate": 0.0001, "loss": 0.0639, "step": 3670 }, { "epoch": 24.210526315789473, "grad_norm": 3.6263599395751953, "learning_rate": 0.0001, "loss": 0.0674, "step": 3680 }, { "epoch": 24.276315789473685, "grad_norm": 4.040128231048584, "learning_rate": 0.0001, "loss": 0.067, "step": 3690 }, { "epoch": 24.342105263157894, "grad_norm": 3.945181131362915, "learning_rate": 0.0001, "loss": 0.0648, "step": 3700 }, { "epoch": 24.407894736842106, "grad_norm": 3.7952756881713867, "learning_rate": 0.0001, "loss": 0.0709, "step": 3710 }, { "epoch": 24.473684210526315, "grad_norm": 4.249261379241943, "learning_rate": 0.0001, "loss": 0.063, "step": 3720 }, { "epoch": 24.539473684210527, "grad_norm": 4.466711521148682, "learning_rate": 0.0001, "loss": 0.0715, "step": 3730 }, { "epoch": 24.605263157894736, "grad_norm": 3.2659964561462402, "learning_rate": 0.0001, "loss": 0.0654, "step": 3740 }, { "epoch": 24.67105263157895, "grad_norm": 3.9412729740142822, "learning_rate": 0.0001, "loss": 0.0644, "step": 3750 }, { "epoch": 24.736842105263158, "grad_norm": 3.85429310798645, "learning_rate": 0.0001, "loss": 0.0644, "step": 3760 }, { "epoch": 24.80263157894737, "grad_norm": 4.128312110900879, "learning_rate": 0.0001, "loss": 0.0629, "step": 3770 }, { "epoch": 24.86842105263158, "grad_norm": 4.078192234039307, "learning_rate": 0.0001, "loss": 0.0638, "step": 3780 }, { "epoch": 24.93421052631579, "grad_norm": 3.702103853225708, "learning_rate": 0.0001, "loss": 0.0648, "step": 3790 }, { "epoch": 25.0, "grad_norm": 3.5492160320281982, "learning_rate": 0.0001, "loss": 0.0586, "step": 3800 }, { "epoch": 25.06578947368421, "grad_norm": 3.5674662590026855, "learning_rate": 0.0001, "loss": 0.0627, "step": 3810 }, { "epoch": 25.13157894736842, "grad_norm": 4.175353527069092, "learning_rate": 0.0001, "loss": 0.0611, "step": 3820 }, { "epoch": 25.19736842105263, "grad_norm": 3.6022441387176514, "learning_rate": 0.0001, "loss": 0.063, "step": 3830 }, { "epoch": 25.263157894736842, "grad_norm": 3.388962745666504, "learning_rate": 0.0001, "loss": 0.0581, "step": 3840 }, { "epoch": 25.32894736842105, "grad_norm": 3.591062545776367, "learning_rate": 0.0001, "loss": 0.0625, "step": 3850 }, { "epoch": 25.394736842105264, "grad_norm": 3.834019660949707, "learning_rate": 0.0001, "loss": 0.06, "step": 3860 }, { "epoch": 25.460526315789473, "grad_norm": 3.805022716522217, "learning_rate": 0.0001, "loss": 0.0613, "step": 3870 }, { "epoch": 25.526315789473685, "grad_norm": 3.719787120819092, "learning_rate": 0.0001, "loss": 0.064, "step": 3880 }, { "epoch": 25.592105263157894, "grad_norm": 3.2884254455566406, "learning_rate": 0.0001, "loss": 0.0604, "step": 3890 }, { "epoch": 25.657894736842106, "grad_norm": 3.4764997959136963, "learning_rate": 0.0001, "loss": 0.0626, "step": 3900 }, { "epoch": 25.723684210526315, "grad_norm": 3.6451432704925537, "learning_rate": 0.0001, "loss": 0.061, "step": 3910 }, { "epoch": 25.789473684210527, "grad_norm": 3.4945576190948486, "learning_rate": 0.0001, "loss": 0.0697, "step": 3920 }, { "epoch": 25.855263157894736, "grad_norm": 3.495290756225586, "learning_rate": 0.0001, "loss": 0.0651, "step": 3930 }, { "epoch": 25.92105263157895, "grad_norm": 3.7976269721984863, "learning_rate": 0.0001, "loss": 0.0658, "step": 3940 }, { "epoch": 25.986842105263158, "grad_norm": 3.6578562259674072, "learning_rate": 0.0001, "loss": 0.065, "step": 3950 }, { "epoch": 26.05263157894737, "grad_norm": 3.2625396251678467, "learning_rate": 0.0001, "loss": 0.0589, "step": 3960 }, { "epoch": 26.11842105263158, "grad_norm": 3.7619807720184326, "learning_rate": 0.0001, "loss": 0.0599, "step": 3970 }, { "epoch": 26.18421052631579, "grad_norm": 2.8766796588897705, "learning_rate": 0.0001, "loss": 0.0607, "step": 3980 }, { "epoch": 26.25, "grad_norm": 3.315241813659668, "learning_rate": 0.0001, "loss": 0.0591, "step": 3990 }, { "epoch": 26.31578947368421, "grad_norm": 3.3254284858703613, "learning_rate": 0.0001, "loss": 0.0629, "step": 4000 }, { "epoch": 26.38157894736842, "grad_norm": 3.8001041412353516, "learning_rate": 0.0001, "loss": 0.0616, "step": 4010 }, { "epoch": 26.44736842105263, "grad_norm": 3.3354110717773438, "learning_rate": 0.0001, "loss": 0.061, "step": 4020 }, { "epoch": 26.513157894736842, "grad_norm": 3.633927345275879, "learning_rate": 0.0001, "loss": 0.0666, "step": 4030 }, { "epoch": 26.57894736842105, "grad_norm": 3.6385960578918457, "learning_rate": 0.0001, "loss": 0.066, "step": 4040 }, { "epoch": 26.644736842105264, "grad_norm": 3.731023073196411, "learning_rate": 0.0001, "loss": 0.0649, "step": 4050 }, { "epoch": 26.710526315789473, "grad_norm": 3.3314356803894043, "learning_rate": 0.0001, "loss": 0.0637, "step": 4060 }, { "epoch": 26.776315789473685, "grad_norm": 3.397261142730713, "learning_rate": 0.0001, "loss": 0.0612, "step": 4070 }, { "epoch": 26.842105263157894, "grad_norm": 2.887632131576538, "learning_rate": 0.0001, "loss": 0.0638, "step": 4080 }, { "epoch": 26.907894736842106, "grad_norm": 3.4083003997802734, "learning_rate": 0.0001, "loss": 0.0594, "step": 4090 }, { "epoch": 26.973684210526315, "grad_norm": 3.3727428913116455, "learning_rate": 0.0001, "loss": 0.062, "step": 4100 }, { "epoch": 27.039473684210527, "grad_norm": 3.0372891426086426, "learning_rate": 0.0001, "loss": 0.0647, "step": 4110 }, { "epoch": 27.105263157894736, "grad_norm": 3.6236624717712402, "learning_rate": 0.0001, "loss": 0.0578, "step": 4120 }, { "epoch": 27.17105263157895, "grad_norm": 3.4702653884887695, "learning_rate": 0.0001, "loss": 0.0588, "step": 4130 }, { "epoch": 27.236842105263158, "grad_norm": 3.2590980529785156, "learning_rate": 0.0001, "loss": 0.0612, "step": 4140 }, { "epoch": 27.30263157894737, "grad_norm": 3.1996586322784424, "learning_rate": 0.0001, "loss": 0.061, "step": 4150 }, { "epoch": 27.36842105263158, "grad_norm": 3.2794950008392334, "learning_rate": 0.0001, "loss": 0.0555, "step": 4160 }, { "epoch": 27.43421052631579, "grad_norm": 3.267763614654541, "learning_rate": 0.0001, "loss": 0.057, "step": 4170 }, { "epoch": 27.5, "grad_norm": 3.489725351333618, "learning_rate": 0.0001, "loss": 0.0607, "step": 4180 }, { "epoch": 27.56578947368421, "grad_norm": 3.3513667583465576, "learning_rate": 0.0001, "loss": 0.0595, "step": 4190 }, { "epoch": 27.63157894736842, "grad_norm": 3.110064744949341, "learning_rate": 0.0001, "loss": 0.0563, "step": 4200 }, { "epoch": 27.69736842105263, "grad_norm": 3.10685396194458, "learning_rate": 0.0001, "loss": 0.0595, "step": 4210 }, { "epoch": 27.763157894736842, "grad_norm": 3.10368013381958, "learning_rate": 0.0001, "loss": 0.0565, "step": 4220 }, { "epoch": 27.82894736842105, "grad_norm": 3.396838903427124, "learning_rate": 0.0001, "loss": 0.0592, "step": 4230 }, { "epoch": 27.894736842105264, "grad_norm": 3.3157389163970947, "learning_rate": 0.0001, "loss": 0.0633, "step": 4240 }, { "epoch": 27.960526315789473, "grad_norm": 3.303504228591919, "learning_rate": 0.0001, "loss": 0.0546, "step": 4250 }, { "epoch": 28.026315789473685, "grad_norm": 3.533159017562866, "learning_rate": 0.0001, "loss": 0.0553, "step": 4260 }, { "epoch": 28.092105263157894, "grad_norm": 3.449673652648926, "learning_rate": 0.0001, "loss": 0.0603, "step": 4270 }, { "epoch": 28.157894736842106, "grad_norm": 3.859006404876709, "learning_rate": 0.0001, "loss": 0.0588, "step": 4280 }, { "epoch": 28.223684210526315, "grad_norm": 2.84765362739563, "learning_rate": 0.0001, "loss": 0.0538, "step": 4290 }, { "epoch": 28.289473684210527, "grad_norm": 3.3362221717834473, "learning_rate": 0.0001, "loss": 0.0509, "step": 4300 }, { "epoch": 28.355263157894736, "grad_norm": 2.668583869934082, "learning_rate": 0.0001, "loss": 0.0561, "step": 4310 }, { "epoch": 28.42105263157895, "grad_norm": 3.316804885864258, "learning_rate": 0.0001, "loss": 0.0566, "step": 4320 }, { "epoch": 28.486842105263158, "grad_norm": 2.706888198852539, "learning_rate": 0.0001, "loss": 0.056, "step": 4330 }, { "epoch": 28.55263157894737, "grad_norm": 3.3674638271331787, "learning_rate": 0.0001, "loss": 0.0553, "step": 4340 }, { "epoch": 28.61842105263158, "grad_norm": 3.639359712600708, "learning_rate": 0.0001, "loss": 0.0593, "step": 4350 }, { "epoch": 28.68421052631579, "grad_norm": 3.2882487773895264, "learning_rate": 0.0001, "loss": 0.053, "step": 4360 }, { "epoch": 28.75, "grad_norm": 3.0167760848999023, "learning_rate": 0.0001, "loss": 0.0585, "step": 4370 }, { "epoch": 28.81578947368421, "grad_norm": 2.9259047508239746, "learning_rate": 0.0001, "loss": 0.0576, "step": 4380 }, { "epoch": 28.88157894736842, "grad_norm": 3.3054912090301514, "learning_rate": 0.0001, "loss": 0.0588, "step": 4390 }, { "epoch": 28.94736842105263, "grad_norm": 3.5616681575775146, "learning_rate": 0.0001, "loss": 0.0562, "step": 4400 }, { "epoch": 29.013157894736842, "grad_norm": 3.0496392250061035, "learning_rate": 0.0001, "loss": 0.0606, "step": 4410 }, { "epoch": 29.07894736842105, "grad_norm": 2.9855754375457764, "learning_rate": 0.0001, "loss": 0.0551, "step": 4420 }, { "epoch": 29.144736842105264, "grad_norm": 2.992281913757324, "learning_rate": 0.0001, "loss": 0.053, "step": 4430 }, { "epoch": 29.210526315789473, "grad_norm": 3.256664514541626, "learning_rate": 0.0001, "loss": 0.0577, "step": 4440 }, { "epoch": 29.276315789473685, "grad_norm": 3.3926141262054443, "learning_rate": 0.0001, "loss": 0.0619, "step": 4450 }, { "epoch": 29.342105263157894, "grad_norm": 3.262199640274048, "learning_rate": 0.0001, "loss": 0.0563, "step": 4460 }, { "epoch": 29.407894736842106, "grad_norm": 3.3873112201690674, "learning_rate": 0.0001, "loss": 0.059, "step": 4470 }, { "epoch": 29.473684210526315, "grad_norm": 3.682753324508667, "learning_rate": 0.0001, "loss": 0.0531, "step": 4480 }, { "epoch": 29.539473684210527, "grad_norm": 3.109187602996826, "learning_rate": 0.0001, "loss": 0.0532, "step": 4490 }, { "epoch": 29.605263157894736, "grad_norm": 3.0220916271209717, "learning_rate": 0.0001, "loss": 0.056, "step": 4500 }, { "epoch": 29.67105263157895, "grad_norm": 2.999417304992676, "learning_rate": 0.0001, "loss": 0.0532, "step": 4510 }, { "epoch": 29.736842105263158, "grad_norm": 3.3180506229400635, "learning_rate": 0.0001, "loss": 0.0528, "step": 4520 }, { "epoch": 29.80263157894737, "grad_norm": 3.1784257888793945, "learning_rate": 0.0001, "loss": 0.0566, "step": 4530 }, { "epoch": 29.86842105263158, "grad_norm": 3.4933249950408936, "learning_rate": 0.0001, "loss": 0.0541, "step": 4540 }, { "epoch": 29.93421052631579, "grad_norm": 3.5023021697998047, "learning_rate": 0.0001, "loss": 0.0538, "step": 4550 }, { "epoch": 30.0, "grad_norm": 3.4711380004882812, "learning_rate": 0.0001, "loss": 0.0536, "step": 4560 }, { "epoch": 30.06578947368421, "grad_norm": 3.4317917823791504, "learning_rate": 0.0001, "loss": 0.0568, "step": 4570 }, { "epoch": 30.13157894736842, "grad_norm": 3.0634424686431885, "learning_rate": 0.0001, "loss": 0.0573, "step": 4580 }, { "epoch": 30.19736842105263, "grad_norm": 3.5550920963287354, "learning_rate": 0.0001, "loss": 0.0504, "step": 4590 }, { "epoch": 30.263157894736842, "grad_norm": 3.286557674407959, "learning_rate": 0.0001, "loss": 0.0533, "step": 4600 }, { "epoch": 30.32894736842105, "grad_norm": 3.3888823986053467, "learning_rate": 0.0001, "loss": 0.0512, "step": 4610 }, { "epoch": 30.394736842105264, "grad_norm": 3.647151470184326, "learning_rate": 0.0001, "loss": 0.0503, "step": 4620 }, { "epoch": 30.460526315789473, "grad_norm": 3.0501487255096436, "learning_rate": 0.0001, "loss": 0.0548, "step": 4630 }, { "epoch": 30.526315789473685, "grad_norm": 3.472285270690918, "learning_rate": 0.0001, "loss": 0.0528, "step": 4640 }, { "epoch": 30.592105263157894, "grad_norm": 3.474325656890869, "learning_rate": 0.0001, "loss": 0.0546, "step": 4650 }, { "epoch": 30.657894736842106, "grad_norm": 2.9422216415405273, "learning_rate": 0.0001, "loss": 0.054, "step": 4660 }, { "epoch": 30.723684210526315, "grad_norm": 2.904676675796509, "learning_rate": 0.0001, "loss": 0.0515, "step": 4670 }, { "epoch": 30.789473684210527, "grad_norm": 2.886228322982788, "learning_rate": 0.0001, "loss": 0.056, "step": 4680 }, { "epoch": 30.855263157894736, "grad_norm": 3.2168831825256348, "learning_rate": 0.0001, "loss": 0.0551, "step": 4690 }, { "epoch": 30.92105263157895, "grad_norm": 2.902871608734131, "learning_rate": 0.0001, "loss": 0.0538, "step": 4700 }, { "epoch": 30.986842105263158, "grad_norm": 3.0855331420898438, "learning_rate": 0.0001, "loss": 0.0534, "step": 4710 }, { "epoch": 31.05263157894737, "grad_norm": 3.0220413208007812, "learning_rate": 0.0001, "loss": 0.0535, "step": 4720 }, { "epoch": 31.11842105263158, "grad_norm": 3.0965516567230225, "learning_rate": 0.0001, "loss": 0.0527, "step": 4730 }, { "epoch": 31.18421052631579, "grad_norm": 2.8717806339263916, "learning_rate": 0.0001, "loss": 0.0594, "step": 4740 }, { "epoch": 31.25, "grad_norm": 2.711956024169922, "learning_rate": 0.0001, "loss": 0.0519, "step": 4750 }, { "epoch": 31.31578947368421, "grad_norm": 3.1045429706573486, "learning_rate": 0.0001, "loss": 0.0519, "step": 4760 }, { "epoch": 31.38157894736842, "grad_norm": 3.117788076400757, "learning_rate": 0.0001, "loss": 0.0584, "step": 4770 }, { "epoch": 31.44736842105263, "grad_norm": 3.287254571914673, "learning_rate": 0.0001, "loss": 0.0504, "step": 4780 }, { "epoch": 31.513157894736842, "grad_norm": 3.2362005710601807, "learning_rate": 0.0001, "loss": 0.0522, "step": 4790 }, { "epoch": 31.57894736842105, "grad_norm": 2.827608346939087, "learning_rate": 0.0001, "loss": 0.0525, "step": 4800 }, { "epoch": 31.644736842105264, "grad_norm": 2.8567302227020264, "learning_rate": 0.0001, "loss": 0.0499, "step": 4810 }, { "epoch": 31.710526315789473, "grad_norm": 3.6337804794311523, "learning_rate": 0.0001, "loss": 0.0514, "step": 4820 }, { "epoch": 31.776315789473685, "grad_norm": 2.956559181213379, "learning_rate": 0.0001, "loss": 0.0502, "step": 4830 }, { "epoch": 31.842105263157894, "grad_norm": 3.28625226020813, "learning_rate": 0.0001, "loss": 0.0534, "step": 4840 }, { "epoch": 31.907894736842106, "grad_norm": 3.2129995822906494, "learning_rate": 0.0001, "loss": 0.0489, "step": 4850 }, { "epoch": 31.973684210526315, "grad_norm": 2.639615535736084, "learning_rate": 0.0001, "loss": 0.0511, "step": 4860 }, { "epoch": 32.03947368421053, "grad_norm": 2.3046576976776123, "learning_rate": 0.0001, "loss": 0.0504, "step": 4870 }, { "epoch": 32.10526315789474, "grad_norm": 2.692249059677124, "learning_rate": 0.0001, "loss": 0.0499, "step": 4880 }, { "epoch": 32.171052631578945, "grad_norm": 2.7841904163360596, "learning_rate": 0.0001, "loss": 0.0521, "step": 4890 }, { "epoch": 32.23684210526316, "grad_norm": 2.8528077602386475, "learning_rate": 0.0001, "loss": 0.0502, "step": 4900 }, { "epoch": 32.30263157894737, "grad_norm": 2.6691136360168457, "learning_rate": 0.0001, "loss": 0.0514, "step": 4910 }, { "epoch": 32.36842105263158, "grad_norm": 2.7115674018859863, "learning_rate": 0.0001, "loss": 0.0528, "step": 4920 }, { "epoch": 32.43421052631579, "grad_norm": 2.594912052154541, "learning_rate": 0.0001, "loss": 0.0575, "step": 4930 }, { "epoch": 32.5, "grad_norm": 2.7402970790863037, "learning_rate": 0.0001, "loss": 0.0561, "step": 4940 }, { "epoch": 32.56578947368421, "grad_norm": 2.3485794067382812, "learning_rate": 0.0001, "loss": 0.0542, "step": 4950 }, { "epoch": 32.63157894736842, "grad_norm": 2.912541151046753, "learning_rate": 0.0001, "loss": 0.0508, "step": 4960 }, { "epoch": 32.69736842105263, "grad_norm": 2.9665489196777344, "learning_rate": 0.0001, "loss": 0.0527, "step": 4970 }, { "epoch": 32.76315789473684, "grad_norm": 2.5939853191375732, "learning_rate": 0.0001, "loss": 0.0498, "step": 4980 }, { "epoch": 32.828947368421055, "grad_norm": 2.8964650630950928, "learning_rate": 0.0001, "loss": 0.0533, "step": 4990 }, { "epoch": 32.89473684210526, "grad_norm": 2.7249815464019775, "learning_rate": 0.0001, "loss": 0.0497, "step": 5000 }, { "epoch": 32.96052631578947, "grad_norm": 2.8237695693969727, "learning_rate": 0.0001, "loss": 0.0555, "step": 5010 }, { "epoch": 33.026315789473685, "grad_norm": 2.8563241958618164, "learning_rate": 0.0001, "loss": 0.0487, "step": 5020 }, { "epoch": 33.0921052631579, "grad_norm": 2.5187313556671143, "learning_rate": 0.0001, "loss": 0.0482, "step": 5030 }, { "epoch": 33.1578947368421, "grad_norm": 2.3663876056671143, "learning_rate": 0.0001, "loss": 0.0476, "step": 5040 }, { "epoch": 33.223684210526315, "grad_norm": 2.672464609146118, "learning_rate": 0.0001, "loss": 0.0499, "step": 5050 }, { "epoch": 33.28947368421053, "grad_norm": 2.777970314025879, "learning_rate": 0.0001, "loss": 0.0503, "step": 5060 }, { "epoch": 33.35526315789474, "grad_norm": 2.7205967903137207, "learning_rate": 0.0001, "loss": 0.0483, "step": 5070 }, { "epoch": 33.421052631578945, "grad_norm": 2.815912961959839, "learning_rate": 0.0001, "loss": 0.0501, "step": 5080 }, { "epoch": 33.48684210526316, "grad_norm": 3.0020999908447266, "learning_rate": 0.0001, "loss": 0.0544, "step": 5090 }, { "epoch": 33.55263157894737, "grad_norm": 2.765181541442871, "learning_rate": 0.0001, "loss": 0.054, "step": 5100 }, { "epoch": 33.61842105263158, "grad_norm": 2.611555576324463, "learning_rate": 0.0001, "loss": 0.0554, "step": 5110 }, { "epoch": 33.68421052631579, "grad_norm": 2.435938835144043, "learning_rate": 0.0001, "loss": 0.0495, "step": 5120 }, { "epoch": 33.75, "grad_norm": 2.4229235649108887, "learning_rate": 0.0001, "loss": 0.0506, "step": 5130 }, { "epoch": 33.81578947368421, "grad_norm": 3.1830482482910156, "learning_rate": 0.0001, "loss": 0.0524, "step": 5140 }, { "epoch": 33.88157894736842, "grad_norm": 2.5907979011535645, "learning_rate": 0.0001, "loss": 0.0485, "step": 5150 }, { "epoch": 33.94736842105263, "grad_norm": 3.0316452980041504, "learning_rate": 0.0001, "loss": 0.0512, "step": 5160 }, { "epoch": 34.01315789473684, "grad_norm": 2.7345519065856934, "learning_rate": 0.0001, "loss": 0.053, "step": 5170 }, { "epoch": 34.078947368421055, "grad_norm": 2.5166094303131104, "learning_rate": 0.0001, "loss": 0.0519, "step": 5180 }, { "epoch": 34.14473684210526, "grad_norm": 2.277397871017456, "learning_rate": 0.0001, "loss": 0.0469, "step": 5190 }, { "epoch": 34.21052631578947, "grad_norm": 2.3698158264160156, "learning_rate": 0.0001, "loss": 0.0489, "step": 5200 }, { "epoch": 34.276315789473685, "grad_norm": 2.7935893535614014, "learning_rate": 0.0001, "loss": 0.0503, "step": 5210 }, { "epoch": 34.3421052631579, "grad_norm": 2.4114792346954346, "learning_rate": 0.0001, "loss": 0.0499, "step": 5220 }, { "epoch": 34.4078947368421, "grad_norm": 3.0464606285095215, "learning_rate": 0.0001, "loss": 0.0502, "step": 5230 }, { "epoch": 34.473684210526315, "grad_norm": 2.59800386428833, "learning_rate": 0.0001, "loss": 0.052, "step": 5240 }, { "epoch": 34.53947368421053, "grad_norm": 2.8786325454711914, "learning_rate": 0.0001, "loss": 0.0503, "step": 5250 }, { "epoch": 34.60526315789474, "grad_norm": 2.137685775756836, "learning_rate": 0.0001, "loss": 0.0467, "step": 5260 }, { "epoch": 34.671052631578945, "grad_norm": 2.6953678131103516, "learning_rate": 0.0001, "loss": 0.0475, "step": 5270 }, { "epoch": 34.73684210526316, "grad_norm": 2.744508981704712, "learning_rate": 0.0001, "loss": 0.0491, "step": 5280 }, { "epoch": 34.80263157894737, "grad_norm": 3.012741804122925, "learning_rate": 0.0001, "loss": 0.05, "step": 5290 }, { "epoch": 34.86842105263158, "grad_norm": 2.6633169651031494, "learning_rate": 0.0001, "loss": 0.0505, "step": 5300 }, { "epoch": 34.93421052631579, "grad_norm": 2.5258846282958984, "learning_rate": 0.0001, "loss": 0.0545, "step": 5310 }, { "epoch": 35.0, "grad_norm": 2.1681854724884033, "learning_rate": 0.0001, "loss": 0.0535, "step": 5320 }, { "epoch": 35.06578947368421, "grad_norm": 2.61966872215271, "learning_rate": 0.0001, "loss": 0.0515, "step": 5330 }, { "epoch": 35.13157894736842, "grad_norm": 2.305449962615967, "learning_rate": 0.0001, "loss": 0.049, "step": 5340 }, { "epoch": 35.19736842105263, "grad_norm": 2.4154679775238037, "learning_rate": 0.0001, "loss": 0.0485, "step": 5350 }, { "epoch": 35.26315789473684, "grad_norm": 2.5637152194976807, "learning_rate": 0.0001, "loss": 0.0448, "step": 5360 }, { "epoch": 35.328947368421055, "grad_norm": 2.768707513809204, "learning_rate": 0.0001, "loss": 0.0478, "step": 5370 }, { "epoch": 35.39473684210526, "grad_norm": 2.7736899852752686, "learning_rate": 0.0001, "loss": 0.0541, "step": 5380 }, { "epoch": 35.46052631578947, "grad_norm": 2.877556085586548, "learning_rate": 0.0001, "loss": 0.047, "step": 5390 }, { "epoch": 35.526315789473685, "grad_norm": 2.5548365116119385, "learning_rate": 0.0001, "loss": 0.0505, "step": 5400 }, { "epoch": 35.5921052631579, "grad_norm": 3.0415854454040527, "learning_rate": 0.0001, "loss": 0.0482, "step": 5410 }, { "epoch": 35.6578947368421, "grad_norm": 2.779832363128662, "learning_rate": 0.0001, "loss": 0.0462, "step": 5420 }, { "epoch": 35.723684210526315, "grad_norm": 3.037933111190796, "learning_rate": 0.0001, "loss": 0.0455, "step": 5430 }, { "epoch": 35.78947368421053, "grad_norm": 3.0311741828918457, "learning_rate": 0.0001, "loss": 0.0495, "step": 5440 }, { "epoch": 35.85526315789474, "grad_norm": 2.7167882919311523, "learning_rate": 0.0001, "loss": 0.0446, "step": 5450 }, { "epoch": 35.921052631578945, "grad_norm": 2.808307409286499, "learning_rate": 0.0001, "loss": 0.0466, "step": 5460 }, { "epoch": 35.98684210526316, "grad_norm": 2.661672353744507, "learning_rate": 0.0001, "loss": 0.0456, "step": 5470 }, { "epoch": 36.05263157894737, "grad_norm": 2.773181200027466, "learning_rate": 0.0001, "loss": 0.0503, "step": 5480 }, { "epoch": 36.11842105263158, "grad_norm": 2.837127685546875, "learning_rate": 0.0001, "loss": 0.0487, "step": 5490 }, { "epoch": 36.18421052631579, "grad_norm": 3.545161724090576, "learning_rate": 0.0001, "loss": 0.0486, "step": 5500 }, { "epoch": 36.25, "grad_norm": 2.9748051166534424, "learning_rate": 0.0001, "loss": 0.0474, "step": 5510 }, { "epoch": 36.31578947368421, "grad_norm": 2.696676254272461, "learning_rate": 0.0001, "loss": 0.0442, "step": 5520 }, { "epoch": 36.38157894736842, "grad_norm": 2.8143951892852783, "learning_rate": 0.0001, "loss": 0.0492, "step": 5530 }, { "epoch": 36.44736842105263, "grad_norm": 2.845813274383545, "learning_rate": 0.0001, "loss": 0.0479, "step": 5540 }, { "epoch": 36.51315789473684, "grad_norm": 2.7452096939086914, "learning_rate": 0.0001, "loss": 0.0494, "step": 5550 }, { "epoch": 36.578947368421055, "grad_norm": 3.2405014038085938, "learning_rate": 0.0001, "loss": 0.0434, "step": 5560 }, { "epoch": 36.64473684210526, "grad_norm": 2.782123565673828, "learning_rate": 0.0001, "loss": 0.0442, "step": 5570 }, { "epoch": 36.71052631578947, "grad_norm": 2.497817039489746, "learning_rate": 0.0001, "loss": 0.0472, "step": 5580 }, { "epoch": 36.776315789473685, "grad_norm": 2.6232833862304688, "learning_rate": 0.0001, "loss": 0.0453, "step": 5590 }, { "epoch": 36.8421052631579, "grad_norm": 2.552119255065918, "learning_rate": 0.0001, "loss": 0.0444, "step": 5600 }, { "epoch": 36.9078947368421, "grad_norm": 2.180877923965454, "learning_rate": 0.0001, "loss": 0.0478, "step": 5610 }, { "epoch": 36.973684210526315, "grad_norm": 2.6592648029327393, "learning_rate": 0.0001, "loss": 0.0476, "step": 5620 }, { "epoch": 37.03947368421053, "grad_norm": 2.4329006671905518, "learning_rate": 0.0001, "loss": 0.0497, "step": 5630 }, { "epoch": 37.10526315789474, "grad_norm": 2.4764468669891357, "learning_rate": 0.0001, "loss": 0.0489, "step": 5640 }, { "epoch": 37.171052631578945, "grad_norm": 2.646169424057007, "learning_rate": 0.0001, "loss": 0.0484, "step": 5650 }, { "epoch": 37.23684210526316, "grad_norm": 2.1575703620910645, "learning_rate": 0.0001, "loss": 0.0486, "step": 5660 }, { "epoch": 37.30263157894737, "grad_norm": 2.361581802368164, "learning_rate": 0.0001, "loss": 0.0469, "step": 5670 }, { "epoch": 37.36842105263158, "grad_norm": 2.2917912006378174, "learning_rate": 0.0001, "loss": 0.0461, "step": 5680 }, { "epoch": 37.43421052631579, "grad_norm": 3.4187848567962646, "learning_rate": 0.0001, "loss": 0.0531, "step": 5690 }, { "epoch": 37.5, "grad_norm": 2.7088265419006348, "learning_rate": 0.0001, "loss": 0.0479, "step": 5700 }, { "epoch": 37.56578947368421, "grad_norm": 2.8174796104431152, "learning_rate": 0.0001, "loss": 0.0451, "step": 5710 }, { "epoch": 37.63157894736842, "grad_norm": 2.8281352519989014, "learning_rate": 0.0001, "loss": 0.0459, "step": 5720 }, { "epoch": 37.69736842105263, "grad_norm": 2.5422720909118652, "learning_rate": 0.0001, "loss": 0.0464, "step": 5730 }, { "epoch": 37.76315789473684, "grad_norm": 2.5474753379821777, "learning_rate": 0.0001, "loss": 0.0446, "step": 5740 }, { "epoch": 37.828947368421055, "grad_norm": 2.2637791633605957, "learning_rate": 0.0001, "loss": 0.044, "step": 5750 }, { "epoch": 37.89473684210526, "grad_norm": 2.5315074920654297, "learning_rate": 0.0001, "loss": 0.0479, "step": 5760 }, { "epoch": 37.96052631578947, "grad_norm": 2.577911615371704, "learning_rate": 0.0001, "loss": 0.045, "step": 5770 }, { "epoch": 38.026315789473685, "grad_norm": 2.334559679031372, "learning_rate": 0.0001, "loss": 0.05, "step": 5780 }, { "epoch": 38.0921052631579, "grad_norm": 2.6234664916992188, "learning_rate": 0.0001, "loss": 0.0462, "step": 5790 }, { "epoch": 38.1578947368421, "grad_norm": 2.534587860107422, "learning_rate": 0.0001, "loss": 0.0431, "step": 5800 }, { "epoch": 38.223684210526315, "grad_norm": 2.4762656688690186, "learning_rate": 0.0001, "loss": 0.0437, "step": 5810 }, { "epoch": 38.28947368421053, "grad_norm": 2.167504072189331, "learning_rate": 0.0001, "loss": 0.0422, "step": 5820 }, { "epoch": 38.35526315789474, "grad_norm": 2.4448747634887695, "learning_rate": 0.0001, "loss": 0.0436, "step": 5830 }, { "epoch": 38.421052631578945, "grad_norm": 2.530709743499756, "learning_rate": 0.0001, "loss": 0.0458, "step": 5840 }, { "epoch": 38.48684210526316, "grad_norm": 2.174250602722168, "learning_rate": 0.0001, "loss": 0.046, "step": 5850 }, { "epoch": 38.55263157894737, "grad_norm": 2.057209014892578, "learning_rate": 0.0001, "loss": 0.0473, "step": 5860 }, { "epoch": 38.61842105263158, "grad_norm": 2.4377598762512207, "learning_rate": 0.0001, "loss": 0.0441, "step": 5870 }, { "epoch": 38.68421052631579, "grad_norm": 2.5525925159454346, "learning_rate": 0.0001, "loss": 0.0481, "step": 5880 }, { "epoch": 38.75, "grad_norm": 2.7554850578308105, "learning_rate": 0.0001, "loss": 0.0461, "step": 5890 }, { "epoch": 38.81578947368421, "grad_norm": 2.3499162197113037, "learning_rate": 0.0001, "loss": 0.0446, "step": 5900 }, { "epoch": 38.88157894736842, "grad_norm": 2.5296130180358887, "learning_rate": 0.0001, "loss": 0.0452, "step": 5910 }, { "epoch": 38.94736842105263, "grad_norm": 2.4431755542755127, "learning_rate": 0.0001, "loss": 0.0468, "step": 5920 }, { "epoch": 39.01315789473684, "grad_norm": 2.608210563659668, "learning_rate": 0.0001, "loss": 0.0473, "step": 5930 }, { "epoch": 39.078947368421055, "grad_norm": 2.528297185897827, "learning_rate": 0.0001, "loss": 0.0423, "step": 5940 }, { "epoch": 39.14473684210526, "grad_norm": 2.6326704025268555, "learning_rate": 0.0001, "loss": 0.0454, "step": 5950 }, { "epoch": 39.21052631578947, "grad_norm": 2.402974843978882, "learning_rate": 0.0001, "loss": 0.0429, "step": 5960 }, { "epoch": 39.276315789473685, "grad_norm": 2.383145570755005, "learning_rate": 0.0001, "loss": 0.0442, "step": 5970 }, { "epoch": 39.3421052631579, "grad_norm": 1.9563897848129272, "learning_rate": 0.0001, "loss": 0.041, "step": 5980 }, { "epoch": 39.4078947368421, "grad_norm": 2.3800816535949707, "learning_rate": 0.0001, "loss": 0.0432, "step": 5990 }, { "epoch": 39.473684210526315, "grad_norm": 2.2849771976470947, "learning_rate": 0.0001, "loss": 0.0429, "step": 6000 }, { "epoch": 39.53947368421053, "grad_norm": 2.383751630783081, "learning_rate": 0.0001, "loss": 0.044, "step": 6010 }, { "epoch": 39.60526315789474, "grad_norm": 2.467508554458618, "learning_rate": 0.0001, "loss": 0.0493, "step": 6020 }, { "epoch": 39.671052631578945, "grad_norm": 2.188605785369873, "learning_rate": 0.0001, "loss": 0.0464, "step": 6030 }, { "epoch": 39.73684210526316, "grad_norm": 2.179704427719116, "learning_rate": 0.0001, "loss": 0.0444, "step": 6040 }, { "epoch": 39.80263157894737, "grad_norm": 2.507868766784668, "learning_rate": 0.0001, "loss": 0.0496, "step": 6050 }, { "epoch": 39.86842105263158, "grad_norm": 2.579693078994751, "learning_rate": 0.0001, "loss": 0.0485, "step": 6060 }, { "epoch": 39.93421052631579, "grad_norm": 2.23881196975708, "learning_rate": 0.0001, "loss": 0.0439, "step": 6070 }, { "epoch": 40.0, "grad_norm": 2.2880659103393555, "learning_rate": 0.0001, "loss": 0.0452, "step": 6080 }, { "epoch": 40.06578947368421, "grad_norm": 1.96604585647583, "learning_rate": 0.0001, "loss": 0.0448, "step": 6090 }, { "epoch": 40.13157894736842, "grad_norm": 2.073749303817749, "learning_rate": 0.0001, "loss": 0.0449, "step": 6100 }, { "epoch": 40.19736842105263, "grad_norm": 2.251397132873535, "learning_rate": 0.0001, "loss": 0.0449, "step": 6110 }, { "epoch": 40.26315789473684, "grad_norm": 2.5593795776367188, "learning_rate": 0.0001, "loss": 0.0464, "step": 6120 }, { "epoch": 40.328947368421055, "grad_norm": 2.652355194091797, "learning_rate": 0.0001, "loss": 0.0452, "step": 6130 }, { "epoch": 40.39473684210526, "grad_norm": 2.3340888023376465, "learning_rate": 0.0001, "loss": 0.0429, "step": 6140 }, { "epoch": 40.46052631578947, "grad_norm": 2.7447798252105713, "learning_rate": 0.0001, "loss": 0.0415, "step": 6150 }, { "epoch": 40.526315789473685, "grad_norm": 2.3984744548797607, "learning_rate": 0.0001, "loss": 0.0445, "step": 6160 }, { "epoch": 40.5921052631579, "grad_norm": 2.0473427772521973, "learning_rate": 0.0001, "loss": 0.0437, "step": 6170 }, { "epoch": 40.6578947368421, "grad_norm": 2.163087844848633, "learning_rate": 0.0001, "loss": 0.0443, "step": 6180 }, { "epoch": 40.723684210526315, "grad_norm": 2.2935638427734375, "learning_rate": 0.0001, "loss": 0.0547, "step": 6190 }, { "epoch": 40.78947368421053, "grad_norm": 2.378009557723999, "learning_rate": 0.0001, "loss": 0.0442, "step": 6200 }, { "epoch": 40.85526315789474, "grad_norm": 2.437608480453491, "learning_rate": 0.0001, "loss": 0.0431, "step": 6210 }, { "epoch": 40.921052631578945, "grad_norm": 2.374829053878784, "learning_rate": 0.0001, "loss": 0.0411, "step": 6220 }, { "epoch": 40.98684210526316, "grad_norm": 2.5893914699554443, "learning_rate": 0.0001, "loss": 0.0423, "step": 6230 }, { "epoch": 41.05263157894737, "grad_norm": 2.3711605072021484, "learning_rate": 0.0001, "loss": 0.0469, "step": 6240 }, { "epoch": 41.11842105263158, "grad_norm": 2.6288208961486816, "learning_rate": 0.0001, "loss": 0.0423, "step": 6250 }, { "epoch": 41.18421052631579, "grad_norm": 1.9889806509017944, "learning_rate": 0.0001, "loss": 0.0444, "step": 6260 }, { "epoch": 41.25, "grad_norm": 2.4192678928375244, "learning_rate": 0.0001, "loss": 0.0439, "step": 6270 }, { "epoch": 41.31578947368421, "grad_norm": 2.491905927658081, "learning_rate": 0.0001, "loss": 0.0425, "step": 6280 }, { "epoch": 41.38157894736842, "grad_norm": 2.178677797317505, "learning_rate": 0.0001, "loss": 0.0409, "step": 6290 }, { "epoch": 41.44736842105263, "grad_norm": 2.648658514022827, "learning_rate": 0.0001, "loss": 0.0431, "step": 6300 }, { "epoch": 41.51315789473684, "grad_norm": 2.4951446056365967, "learning_rate": 0.0001, "loss": 0.0443, "step": 6310 }, { "epoch": 41.578947368421055, "grad_norm": 2.1832523345947266, "learning_rate": 0.0001, "loss": 0.0465, "step": 6320 }, { "epoch": 41.64473684210526, "grad_norm": 2.763068675994873, "learning_rate": 0.0001, "loss": 0.0438, "step": 6330 }, { "epoch": 41.71052631578947, "grad_norm": 2.6481575965881348, "learning_rate": 0.0001, "loss": 0.0399, "step": 6340 }, { "epoch": 41.776315789473685, "grad_norm": 2.498065710067749, "learning_rate": 0.0001, "loss": 0.0493, "step": 6350 }, { "epoch": 41.8421052631579, "grad_norm": 2.5920052528381348, "learning_rate": 0.0001, "loss": 0.0415, "step": 6360 }, { "epoch": 41.9078947368421, "grad_norm": 2.3389225006103516, "learning_rate": 0.0001, "loss": 0.0442, "step": 6370 }, { "epoch": 41.973684210526315, "grad_norm": 2.3539235591888428, "learning_rate": 0.0001, "loss": 0.0424, "step": 6380 }, { "epoch": 42.03947368421053, "grad_norm": 1.941389799118042, "learning_rate": 0.0001, "loss": 0.0426, "step": 6390 }, { "epoch": 42.10526315789474, "grad_norm": 2.1393790245056152, "learning_rate": 0.0001, "loss": 0.0442, "step": 6400 }, { "epoch": 42.171052631578945, "grad_norm": 2.314061403274536, "learning_rate": 0.0001, "loss": 0.043, "step": 6410 }, { "epoch": 42.23684210526316, "grad_norm": 2.17966890335083, "learning_rate": 0.0001, "loss": 0.0422, "step": 6420 }, { "epoch": 42.30263157894737, "grad_norm": 2.0357677936553955, "learning_rate": 0.0001, "loss": 0.044, "step": 6430 }, { "epoch": 42.36842105263158, "grad_norm": 2.3330137729644775, "learning_rate": 0.0001, "loss": 0.0409, "step": 6440 }, { "epoch": 42.43421052631579, "grad_norm": 2.178384304046631, "learning_rate": 0.0001, "loss": 0.0423, "step": 6450 }, { "epoch": 42.5, "grad_norm": 2.3437020778656006, "learning_rate": 0.0001, "loss": 0.0447, "step": 6460 }, { "epoch": 42.56578947368421, "grad_norm": 2.277813673019409, "learning_rate": 0.0001, "loss": 0.0463, "step": 6470 }, { "epoch": 42.63157894736842, "grad_norm": 2.014697551727295, "learning_rate": 0.0001, "loss": 0.0415, "step": 6480 }, { "epoch": 42.69736842105263, "grad_norm": 2.140352249145508, "learning_rate": 0.0001, "loss": 0.0404, "step": 6490 }, { "epoch": 42.76315789473684, "grad_norm": 2.2959890365600586, "learning_rate": 0.0001, "loss": 0.0434, "step": 6500 }, { "epoch": 42.828947368421055, "grad_norm": 2.178832769393921, "learning_rate": 0.0001, "loss": 0.0472, "step": 6510 }, { "epoch": 42.89473684210526, "grad_norm": 2.037851572036743, "learning_rate": 0.0001, "loss": 0.0421, "step": 6520 }, { "epoch": 42.96052631578947, "grad_norm": 1.7611000537872314, "learning_rate": 0.0001, "loss": 0.0403, "step": 6530 }, { "epoch": 43.026315789473685, "grad_norm": 2.360478639602661, "learning_rate": 0.0001, "loss": 0.0386, "step": 6540 }, { "epoch": 43.0921052631579, "grad_norm": 2.457676410675049, "learning_rate": 0.0001, "loss": 0.0458, "step": 6550 }, { "epoch": 43.1578947368421, "grad_norm": 2.4581384658813477, "learning_rate": 0.0001, "loss": 0.0421, "step": 6560 }, { "epoch": 43.223684210526315, "grad_norm": 2.0751826763153076, "learning_rate": 0.0001, "loss": 0.0425, "step": 6570 }, { "epoch": 43.28947368421053, "grad_norm": 2.2040812969207764, "learning_rate": 0.0001, "loss": 0.0412, "step": 6580 }, { "epoch": 43.35526315789474, "grad_norm": 1.8542298078536987, "learning_rate": 0.0001, "loss": 0.0455, "step": 6590 }, { "epoch": 43.421052631578945, "grad_norm": 2.6020007133483887, "learning_rate": 0.0001, "loss": 0.047, "step": 6600 }, { "epoch": 43.48684210526316, "grad_norm": 2.3764736652374268, "learning_rate": 0.0001, "loss": 0.0428, "step": 6610 }, { "epoch": 43.55263157894737, "grad_norm": 2.538691282272339, "learning_rate": 0.0001, "loss": 0.0421, "step": 6620 }, { "epoch": 43.61842105263158, "grad_norm": 2.1738691329956055, "learning_rate": 0.0001, "loss": 0.0388, "step": 6630 }, { "epoch": 43.68421052631579, "grad_norm": 2.0375754833221436, "learning_rate": 0.0001, "loss": 0.0387, "step": 6640 }, { "epoch": 43.75, "grad_norm": 2.128758192062378, "learning_rate": 0.0001, "loss": 0.044, "step": 6650 }, { "epoch": 43.81578947368421, "grad_norm": 2.1560118198394775, "learning_rate": 0.0001, "loss": 0.0411, "step": 6660 }, { "epoch": 43.88157894736842, "grad_norm": 2.5647659301757812, "learning_rate": 0.0001, "loss": 0.0399, "step": 6670 }, { "epoch": 43.94736842105263, "grad_norm": 2.5152177810668945, "learning_rate": 0.0001, "loss": 0.0425, "step": 6680 }, { "epoch": 44.01315789473684, "grad_norm": 2.533376932144165, "learning_rate": 0.0001, "loss": 0.0407, "step": 6690 }, { "epoch": 44.078947368421055, "grad_norm": 2.424565076828003, "learning_rate": 0.0001, "loss": 0.0424, "step": 6700 }, { "epoch": 44.14473684210526, "grad_norm": 2.0574584007263184, "learning_rate": 0.0001, "loss": 0.0438, "step": 6710 }, { "epoch": 44.21052631578947, "grad_norm": 2.4216036796569824, "learning_rate": 0.0001, "loss": 0.0408, "step": 6720 }, { "epoch": 44.276315789473685, "grad_norm": 2.2913320064544678, "learning_rate": 0.0001, "loss": 0.041, "step": 6730 }, { "epoch": 44.3421052631579, "grad_norm": 2.423386573791504, "learning_rate": 0.0001, "loss": 0.0434, "step": 6740 }, { "epoch": 44.4078947368421, "grad_norm": 2.4834423065185547, "learning_rate": 0.0001, "loss": 0.0406, "step": 6750 }, { "epoch": 44.473684210526315, "grad_norm": 2.5011675357818604, "learning_rate": 0.0001, "loss": 0.0413, "step": 6760 }, { "epoch": 44.53947368421053, "grad_norm": 2.57098388671875, "learning_rate": 0.0001, "loss": 0.04, "step": 6770 }, { "epoch": 44.60526315789474, "grad_norm": 2.5909740924835205, "learning_rate": 0.0001, "loss": 0.0406, "step": 6780 }, { "epoch": 44.671052631578945, "grad_norm": 2.3900396823883057, "learning_rate": 0.0001, "loss": 0.0395, "step": 6790 }, { "epoch": 44.73684210526316, "grad_norm": 2.4889774322509766, "learning_rate": 0.0001, "loss": 0.0451, "step": 6800 }, { "epoch": 44.80263157894737, "grad_norm": 2.3394553661346436, "learning_rate": 0.0001, "loss": 0.0409, "step": 6810 }, { "epoch": 44.86842105263158, "grad_norm": 1.9768896102905273, "learning_rate": 0.0001, "loss": 0.04, "step": 6820 }, { "epoch": 44.93421052631579, "grad_norm": 2.201011896133423, "learning_rate": 0.0001, "loss": 0.0377, "step": 6830 }, { "epoch": 45.0, "grad_norm": 1.9985404014587402, "learning_rate": 0.0001, "loss": 0.0397, "step": 6840 }, { "epoch": 45.06578947368421, "grad_norm": 2.127011299133301, "learning_rate": 0.0001, "loss": 0.0414, "step": 6850 }, { "epoch": 45.13157894736842, "grad_norm": 2.1681935787200928, "learning_rate": 0.0001, "loss": 0.0399, "step": 6860 }, { "epoch": 45.19736842105263, "grad_norm": 2.1315577030181885, "learning_rate": 0.0001, "loss": 0.0422, "step": 6870 }, { "epoch": 45.26315789473684, "grad_norm": 2.018547296524048, "learning_rate": 0.0001, "loss": 0.0409, "step": 6880 }, { "epoch": 45.328947368421055, "grad_norm": 2.1411643028259277, "learning_rate": 0.0001, "loss": 0.0376, "step": 6890 }, { "epoch": 45.39473684210526, "grad_norm": 2.4952762126922607, "learning_rate": 0.0001, "loss": 0.0388, "step": 6900 }, { "epoch": 45.46052631578947, "grad_norm": 2.47765851020813, "learning_rate": 0.0001, "loss": 0.0391, "step": 6910 }, { "epoch": 45.526315789473685, "grad_norm": 2.143780469894409, "learning_rate": 0.0001, "loss": 0.0375, "step": 6920 }, { "epoch": 45.5921052631579, "grad_norm": 2.1618223190307617, "learning_rate": 0.0001, "loss": 0.0406, "step": 6930 }, { "epoch": 45.6578947368421, "grad_norm": 1.9536004066467285, "learning_rate": 0.0001, "loss": 0.0384, "step": 6940 }, { "epoch": 45.723684210526315, "grad_norm": 2.174910306930542, "learning_rate": 0.0001, "loss": 0.0391, "step": 6950 }, { "epoch": 45.78947368421053, "grad_norm": 2.460047721862793, "learning_rate": 0.0001, "loss": 0.0415, "step": 6960 }, { "epoch": 45.85526315789474, "grad_norm": 2.309826135635376, "learning_rate": 0.0001, "loss": 0.0427, "step": 6970 }, { "epoch": 45.921052631578945, "grad_norm": 2.1610970497131348, "learning_rate": 0.0001, "loss": 0.0452, "step": 6980 }, { "epoch": 45.98684210526316, "grad_norm": 2.2995121479034424, "learning_rate": 0.0001, "loss": 0.0418, "step": 6990 }, { "epoch": 46.05263157894737, "grad_norm": 1.9505306482315063, "learning_rate": 0.0001, "loss": 0.0382, "step": 7000 }, { "epoch": 46.11842105263158, "grad_norm": 2.161802053451538, "learning_rate": 0.0001, "loss": 0.0414, "step": 7010 }, { "epoch": 46.18421052631579, "grad_norm": 2.418104887008667, "learning_rate": 0.0001, "loss": 0.0424, "step": 7020 }, { "epoch": 46.25, "grad_norm": 2.3262252807617188, "learning_rate": 0.0001, "loss": 0.0382, "step": 7030 }, { "epoch": 46.31578947368421, "grad_norm": 2.651866912841797, "learning_rate": 0.0001, "loss": 0.0378, "step": 7040 }, { "epoch": 46.38157894736842, "grad_norm": 2.5802295207977295, "learning_rate": 0.0001, "loss": 0.0432, "step": 7050 }, { "epoch": 46.44736842105263, "grad_norm": 2.664853096008301, "learning_rate": 0.0001, "loss": 0.039, "step": 7060 }, { "epoch": 46.51315789473684, "grad_norm": 2.4787471294403076, "learning_rate": 0.0001, "loss": 0.0385, "step": 7070 }, { "epoch": 46.578947368421055, "grad_norm": 2.6140897274017334, "learning_rate": 0.0001, "loss": 0.0424, "step": 7080 }, { "epoch": 46.64473684210526, "grad_norm": 2.183720111846924, "learning_rate": 0.0001, "loss": 0.0363, "step": 7090 }, { "epoch": 46.71052631578947, "grad_norm": 2.1815969944000244, "learning_rate": 0.0001, "loss": 0.0417, "step": 7100 }, { "epoch": 46.776315789473685, "grad_norm": 2.310917854309082, "learning_rate": 0.0001, "loss": 0.0378, "step": 7110 }, { "epoch": 46.8421052631579, "grad_norm": 2.3136982917785645, "learning_rate": 0.0001, "loss": 0.0406, "step": 7120 }, { "epoch": 46.9078947368421, "grad_norm": 2.1121630668640137, "learning_rate": 0.0001, "loss": 0.0368, "step": 7130 }, { "epoch": 46.973684210526315, "grad_norm": 2.3903613090515137, "learning_rate": 0.0001, "loss": 0.0377, "step": 7140 }, { "epoch": 47.03947368421053, "grad_norm": 2.3889095783233643, "learning_rate": 0.0001, "loss": 0.0411, "step": 7150 }, { "epoch": 47.10526315789474, "grad_norm": 2.541818380355835, "learning_rate": 0.0001, "loss": 0.0407, "step": 7160 }, { "epoch": 47.171052631578945, "grad_norm": 2.009045124053955, "learning_rate": 0.0001, "loss": 0.0409, "step": 7170 }, { "epoch": 47.23684210526316, "grad_norm": 2.195842742919922, "learning_rate": 0.0001, "loss": 0.0357, "step": 7180 }, { "epoch": 47.30263157894737, "grad_norm": 2.0758495330810547, "learning_rate": 0.0001, "loss": 0.0429, "step": 7190 }, { "epoch": 47.36842105263158, "grad_norm": 2.2477900981903076, "learning_rate": 0.0001, "loss": 0.0366, "step": 7200 }, { "epoch": 47.43421052631579, "grad_norm": 1.8586223125457764, "learning_rate": 0.0001, "loss": 0.0403, "step": 7210 }, { "epoch": 47.5, "grad_norm": 2.253445625305176, "learning_rate": 0.0001, "loss": 0.0397, "step": 7220 }, { "epoch": 47.56578947368421, "grad_norm": 1.884648084640503, "learning_rate": 0.0001, "loss": 0.0382, "step": 7230 }, { "epoch": 47.63157894736842, "grad_norm": 1.8636415004730225, "learning_rate": 0.0001, "loss": 0.0388, "step": 7240 }, { "epoch": 47.69736842105263, "grad_norm": 2.0864782333374023, "learning_rate": 0.0001, "loss": 0.0413, "step": 7250 }, { "epoch": 47.76315789473684, "grad_norm": 1.986417531967163, "learning_rate": 0.0001, "loss": 0.0408, "step": 7260 }, { "epoch": 47.828947368421055, "grad_norm": 2.068437099456787, "learning_rate": 0.0001, "loss": 0.041, "step": 7270 }, { "epoch": 47.89473684210526, "grad_norm": 2.0729715824127197, "learning_rate": 0.0001, "loss": 0.0388, "step": 7280 }, { "epoch": 47.96052631578947, "grad_norm": 2.2106552124023438, "learning_rate": 0.0001, "loss": 0.0409, "step": 7290 }, { "epoch": 48.026315789473685, "grad_norm": 1.8308414220809937, "learning_rate": 0.0001, "loss": 0.0392, "step": 7300 }, { "epoch": 48.0921052631579, "grad_norm": 2.1711678504943848, "learning_rate": 0.0001, "loss": 0.0413, "step": 7310 }, { "epoch": 48.1578947368421, "grad_norm": 2.326873779296875, "learning_rate": 0.0001, "loss": 0.0411, "step": 7320 }, { "epoch": 48.223684210526315, "grad_norm": 2.386996030807495, "learning_rate": 0.0001, "loss": 0.0381, "step": 7330 }, { "epoch": 48.28947368421053, "grad_norm": 2.138685703277588, "learning_rate": 0.0001, "loss": 0.0415, "step": 7340 }, { "epoch": 48.35526315789474, "grad_norm": 2.116856336593628, "learning_rate": 0.0001, "loss": 0.0358, "step": 7350 }, { "epoch": 48.421052631578945, "grad_norm": 2.010777235031128, "learning_rate": 0.0001, "loss": 0.0413, "step": 7360 }, { "epoch": 48.48684210526316, "grad_norm": 2.502208709716797, "learning_rate": 0.0001, "loss": 0.0412, "step": 7370 }, { "epoch": 48.55263157894737, "grad_norm": 2.1016767024993896, "learning_rate": 0.0001, "loss": 0.039, "step": 7380 }, { "epoch": 48.61842105263158, "grad_norm": 2.3174989223480225, "learning_rate": 0.0001, "loss": 0.038, "step": 7390 }, { "epoch": 48.68421052631579, "grad_norm": 2.1891210079193115, "learning_rate": 0.0001, "loss": 0.0381, "step": 7400 }, { "epoch": 48.75, "grad_norm": 1.7668248414993286, "learning_rate": 0.0001, "loss": 0.0405, "step": 7410 }, { "epoch": 48.81578947368421, "grad_norm": 1.9198734760284424, "learning_rate": 0.0001, "loss": 0.0369, "step": 7420 }, { "epoch": 48.88157894736842, "grad_norm": 2.181814670562744, "learning_rate": 0.0001, "loss": 0.0398, "step": 7430 }, { "epoch": 48.94736842105263, "grad_norm": 1.989591360092163, "learning_rate": 0.0001, "loss": 0.0354, "step": 7440 }, { "epoch": 49.01315789473684, "grad_norm": 1.745857834815979, "learning_rate": 0.0001, "loss": 0.0404, "step": 7450 }, { "epoch": 49.078947368421055, "grad_norm": 2.2992279529571533, "learning_rate": 0.0001, "loss": 0.0382, "step": 7460 }, { "epoch": 49.14473684210526, "grad_norm": 2.126858949661255, "learning_rate": 0.0001, "loss": 0.0364, "step": 7470 }, { "epoch": 49.21052631578947, "grad_norm": 2.290196418762207, "learning_rate": 0.0001, "loss": 0.0393, "step": 7480 }, { "epoch": 49.276315789473685, "grad_norm": 2.2634739875793457, "learning_rate": 0.0001, "loss": 0.0414, "step": 7490 }, { "epoch": 49.3421052631579, "grad_norm": 2.221125841140747, "learning_rate": 0.0001, "loss": 0.0387, "step": 7500 }, { "epoch": 49.4078947368421, "grad_norm": 1.7965646982192993, "learning_rate": 0.0001, "loss": 0.0367, "step": 7510 }, { "epoch": 49.473684210526315, "grad_norm": 2.0313377380371094, "learning_rate": 0.0001, "loss": 0.0419, "step": 7520 }, { "epoch": 49.53947368421053, "grad_norm": 2.1946537494659424, "learning_rate": 0.0001, "loss": 0.037, "step": 7530 }, { "epoch": 49.60526315789474, "grad_norm": 2.3055074214935303, "learning_rate": 0.0001, "loss": 0.0358, "step": 7540 }, { "epoch": 49.671052631578945, "grad_norm": 2.371293544769287, "learning_rate": 0.0001, "loss": 0.0376, "step": 7550 }, { "epoch": 49.73684210526316, "grad_norm": 2.5624117851257324, "learning_rate": 0.0001, "loss": 0.0386, "step": 7560 }, { "epoch": 49.80263157894737, "grad_norm": 2.2405447959899902, "learning_rate": 0.0001, "loss": 0.0408, "step": 7570 }, { "epoch": 49.86842105263158, "grad_norm": 2.4075613021850586, "learning_rate": 0.0001, "loss": 0.0401, "step": 7580 }, { "epoch": 49.93421052631579, "grad_norm": 2.581996202468872, "learning_rate": 0.0001, "loss": 0.0423, "step": 7590 }, { "epoch": 50.0, "grad_norm": 2.4920432567596436, "learning_rate": 0.0001, "loss": 0.0407, "step": 7600 }, { "epoch": 50.06578947368421, "grad_norm": 1.8958051204681396, "learning_rate": 0.0001, "loss": 0.0366, "step": 7610 }, { "epoch": 50.13157894736842, "grad_norm": 2.257720708847046, "learning_rate": 0.0001, "loss": 0.0407, "step": 7620 }, { "epoch": 50.19736842105263, "grad_norm": 1.9811315536499023, "learning_rate": 0.0001, "loss": 0.0365, "step": 7630 }, { "epoch": 50.26315789473684, "grad_norm": 2.08736515045166, "learning_rate": 0.0001, "loss": 0.0377, "step": 7640 }, { "epoch": 50.328947368421055, "grad_norm": 2.1356945037841797, "learning_rate": 0.0001, "loss": 0.0406, "step": 7650 }, { "epoch": 50.39473684210526, "grad_norm": 2.0264437198638916, "learning_rate": 0.0001, "loss": 0.0368, "step": 7660 }, { "epoch": 50.46052631578947, "grad_norm": 2.296083688735962, "learning_rate": 0.0001, "loss": 0.0366, "step": 7670 }, { "epoch": 50.526315789473685, "grad_norm": 1.9767868518829346, "learning_rate": 0.0001, "loss": 0.0444, "step": 7680 }, { "epoch": 50.5921052631579, "grad_norm": 2.1427462100982666, "learning_rate": 0.0001, "loss": 0.0355, "step": 7690 }, { "epoch": 50.6578947368421, "grad_norm": 2.3248414993286133, "learning_rate": 0.0001, "loss": 0.0367, "step": 7700 }, { "epoch": 50.723684210526315, "grad_norm": 2.3198843002319336, "learning_rate": 0.0001, "loss": 0.0401, "step": 7710 }, { "epoch": 50.78947368421053, "grad_norm": 2.16497802734375, "learning_rate": 0.0001, "loss": 0.0416, "step": 7720 }, { "epoch": 50.85526315789474, "grad_norm": 2.0864272117614746, "learning_rate": 0.0001, "loss": 0.0371, "step": 7730 }, { "epoch": 50.921052631578945, "grad_norm": 2.0330374240875244, "learning_rate": 0.0001, "loss": 0.0372, "step": 7740 }, { "epoch": 50.98684210526316, "grad_norm": 1.7595237493515015, "learning_rate": 0.0001, "loss": 0.0376, "step": 7750 }, { "epoch": 51.05263157894737, "grad_norm": 1.9356091022491455, "learning_rate": 0.0001, "loss": 0.0428, "step": 7760 }, { "epoch": 51.11842105263158, "grad_norm": 1.7659704685211182, "learning_rate": 0.0001, "loss": 0.0362, "step": 7770 }, { "epoch": 51.18421052631579, "grad_norm": 2.442096471786499, "learning_rate": 0.0001, "loss": 0.0386, "step": 7780 }, { "epoch": 51.25, "grad_norm": 2.14406418800354, "learning_rate": 0.0001, "loss": 0.0392, "step": 7790 }, { "epoch": 51.31578947368421, "grad_norm": 2.3461625576019287, "learning_rate": 0.0001, "loss": 0.0415, "step": 7800 }, { "epoch": 51.38157894736842, "grad_norm": 2.4702446460723877, "learning_rate": 0.0001, "loss": 0.0419, "step": 7810 }, { "epoch": 51.44736842105263, "grad_norm": 2.3408868312835693, "learning_rate": 0.0001, "loss": 0.0359, "step": 7820 }, { "epoch": 51.51315789473684, "grad_norm": 2.140207529067993, "learning_rate": 0.0001, "loss": 0.039, "step": 7830 }, { "epoch": 51.578947368421055, "grad_norm": 1.8814183473587036, "learning_rate": 0.0001, "loss": 0.0361, "step": 7840 }, { "epoch": 51.64473684210526, "grad_norm": 2.0391945838928223, "learning_rate": 0.0001, "loss": 0.0348, "step": 7850 }, { "epoch": 51.71052631578947, "grad_norm": 2.097381830215454, "learning_rate": 0.0001, "loss": 0.0389, "step": 7860 }, { "epoch": 51.776315789473685, "grad_norm": 2.451936721801758, "learning_rate": 0.0001, "loss": 0.0347, "step": 7870 }, { "epoch": 51.8421052631579, "grad_norm": 2.1647655963897705, "learning_rate": 0.0001, "loss": 0.0422, "step": 7880 }, { "epoch": 51.9078947368421, "grad_norm": 2.2722997665405273, "learning_rate": 0.0001, "loss": 0.0385, "step": 7890 }, { "epoch": 51.973684210526315, "grad_norm": 2.42398738861084, "learning_rate": 0.0001, "loss": 0.0361, "step": 7900 }, { "epoch": 52.03947368421053, "grad_norm": 2.2124574184417725, "learning_rate": 0.0001, "loss": 0.0412, "step": 7910 }, { "epoch": 52.10526315789474, "grad_norm": 2.2212417125701904, "learning_rate": 0.0001, "loss": 0.0341, "step": 7920 }, { "epoch": 52.171052631578945, "grad_norm": 2.205821990966797, "learning_rate": 0.0001, "loss": 0.0384, "step": 7930 }, { "epoch": 52.23684210526316, "grad_norm": 2.220672845840454, "learning_rate": 0.0001, "loss": 0.0375, "step": 7940 }, { "epoch": 52.30263157894737, "grad_norm": 2.0185680389404297, "learning_rate": 0.0001, "loss": 0.039, "step": 7950 }, { "epoch": 52.36842105263158, "grad_norm": 2.3008174896240234, "learning_rate": 0.0001, "loss": 0.0392, "step": 7960 }, { "epoch": 52.43421052631579, "grad_norm": 2.0556042194366455, "learning_rate": 0.0001, "loss": 0.0372, "step": 7970 }, { "epoch": 52.5, "grad_norm": 2.2419941425323486, "learning_rate": 0.0001, "loss": 0.0362, "step": 7980 }, { "epoch": 52.56578947368421, "grad_norm": 2.373690605163574, "learning_rate": 0.0001, "loss": 0.0433, "step": 7990 }, { "epoch": 52.63157894736842, "grad_norm": 1.6799159049987793, "learning_rate": 0.0001, "loss": 0.035, "step": 8000 }, { "epoch": 52.69736842105263, "grad_norm": 2.1659903526306152, "learning_rate": 0.0001, "loss": 0.036, "step": 8010 }, { "epoch": 52.76315789473684, "grad_norm": 2.2198495864868164, "learning_rate": 0.0001, "loss": 0.0347, "step": 8020 }, { "epoch": 52.828947368421055, "grad_norm": 1.784097671508789, "learning_rate": 0.0001, "loss": 0.0342, "step": 8030 }, { "epoch": 52.89473684210526, "grad_norm": 2.101797103881836, "learning_rate": 0.0001, "loss": 0.0359, "step": 8040 }, { "epoch": 52.96052631578947, "grad_norm": 2.195828437805176, "learning_rate": 0.0001, "loss": 0.0387, "step": 8050 }, { "epoch": 53.026315789473685, "grad_norm": 1.8445138931274414, "learning_rate": 0.0001, "loss": 0.0338, "step": 8060 }, { "epoch": 53.0921052631579, "grad_norm": 2.2160956859588623, "learning_rate": 0.0001, "loss": 0.0369, "step": 8070 }, { "epoch": 53.1578947368421, "grad_norm": 2.578427314758301, "learning_rate": 0.0001, "loss": 0.0371, "step": 8080 }, { "epoch": 53.223684210526315, "grad_norm": 2.306375503540039, "learning_rate": 0.0001, "loss": 0.0422, "step": 8090 }, { "epoch": 53.28947368421053, "grad_norm": 2.179263114929199, "learning_rate": 0.0001, "loss": 0.0341, "step": 8100 }, { "epoch": 53.35526315789474, "grad_norm": 1.8743386268615723, "learning_rate": 0.0001, "loss": 0.036, "step": 8110 }, { "epoch": 53.421052631578945, "grad_norm": 2.3205151557922363, "learning_rate": 0.0001, "loss": 0.0345, "step": 8120 }, { "epoch": 53.48684210526316, "grad_norm": 2.2174768447875977, "learning_rate": 0.0001, "loss": 0.0351, "step": 8130 }, { "epoch": 53.55263157894737, "grad_norm": 1.6841120719909668, "learning_rate": 0.0001, "loss": 0.0373, "step": 8140 }, { "epoch": 53.61842105263158, "grad_norm": 2.228266716003418, "learning_rate": 0.0001, "loss": 0.0396, "step": 8150 }, { "epoch": 53.68421052631579, "grad_norm": 2.182257652282715, "learning_rate": 0.0001, "loss": 0.0356, "step": 8160 }, { "epoch": 53.75, "grad_norm": 2.240403175354004, "learning_rate": 0.0001, "loss": 0.035, "step": 8170 }, { "epoch": 53.81578947368421, "grad_norm": 1.9025298357009888, "learning_rate": 0.0001, "loss": 0.0401, "step": 8180 }, { "epoch": 53.88157894736842, "grad_norm": 1.8477272987365723, "learning_rate": 0.0001, "loss": 0.0356, "step": 8190 }, { "epoch": 53.94736842105263, "grad_norm": 2.0865676403045654, "learning_rate": 0.0001, "loss": 0.0396, "step": 8200 }, { "epoch": 54.01315789473684, "grad_norm": 1.9469999074935913, "learning_rate": 0.0001, "loss": 0.0397, "step": 8210 }, { "epoch": 54.078947368421055, "grad_norm": 1.9196261167526245, "learning_rate": 0.0001, "loss": 0.0368, "step": 8220 }, { "epoch": 54.14473684210526, "grad_norm": 2.1719701290130615, "learning_rate": 0.0001, "loss": 0.0362, "step": 8230 }, { "epoch": 54.21052631578947, "grad_norm": 1.9804779291152954, "learning_rate": 0.0001, "loss": 0.0365, "step": 8240 }, { "epoch": 54.276315789473685, "grad_norm": 1.9981321096420288, "learning_rate": 0.0001, "loss": 0.0388, "step": 8250 }, { "epoch": 54.3421052631579, "grad_norm": 2.4525864124298096, "learning_rate": 0.0001, "loss": 0.0367, "step": 8260 }, { "epoch": 54.4078947368421, "grad_norm": 2.1657726764678955, "learning_rate": 0.0001, "loss": 0.0371, "step": 8270 }, { "epoch": 54.473684210526315, "grad_norm": 2.0540311336517334, "learning_rate": 0.0001, "loss": 0.0365, "step": 8280 }, { "epoch": 54.53947368421053, "grad_norm": 1.796277642250061, "learning_rate": 0.0001, "loss": 0.037, "step": 8290 }, { "epoch": 54.60526315789474, "grad_norm": 2.325568199157715, "learning_rate": 0.0001, "loss": 0.0321, "step": 8300 }, { "epoch": 54.671052631578945, "grad_norm": 2.269029378890991, "learning_rate": 0.0001, "loss": 0.034, "step": 8310 }, { "epoch": 54.73684210526316, "grad_norm": 1.9859508275985718, "learning_rate": 0.0001, "loss": 0.036, "step": 8320 }, { "epoch": 54.80263157894737, "grad_norm": 2.3518478870391846, "learning_rate": 0.0001, "loss": 0.0354, "step": 8330 }, { "epoch": 54.86842105263158, "grad_norm": 2.27053165435791, "learning_rate": 0.0001, "loss": 0.0381, "step": 8340 }, { "epoch": 54.93421052631579, "grad_norm": 2.290529489517212, "learning_rate": 0.0001, "loss": 0.0361, "step": 8350 }, { "epoch": 55.0, "grad_norm": 1.7832841873168945, "learning_rate": 0.0001, "loss": 0.0331, "step": 8360 }, { "epoch": 55.06578947368421, "grad_norm": 2.281114101409912, "learning_rate": 0.0001, "loss": 0.0377, "step": 8370 }, { "epoch": 55.13157894736842, "grad_norm": 2.1238036155700684, "learning_rate": 0.0001, "loss": 0.0354, "step": 8380 }, { "epoch": 55.19736842105263, "grad_norm": 1.897072434425354, "learning_rate": 0.0001, "loss": 0.0366, "step": 8390 }, { "epoch": 55.26315789473684, "grad_norm": 1.8354299068450928, "learning_rate": 0.0001, "loss": 0.0379, "step": 8400 }, { "epoch": 55.328947368421055, "grad_norm": 2.343350648880005, "learning_rate": 0.0001, "loss": 0.0354, "step": 8410 }, { "epoch": 55.39473684210526, "grad_norm": 2.1363508701324463, "learning_rate": 0.0001, "loss": 0.035, "step": 8420 }, { "epoch": 55.46052631578947, "grad_norm": 2.313427209854126, "learning_rate": 0.0001, "loss": 0.0373, "step": 8430 }, { "epoch": 55.526315789473685, "grad_norm": 1.9425634145736694, "learning_rate": 0.0001, "loss": 0.0333, "step": 8440 }, { "epoch": 55.5921052631579, "grad_norm": 1.886401891708374, "learning_rate": 0.0001, "loss": 0.0348, "step": 8450 }, { "epoch": 55.6578947368421, "grad_norm": 2.479999303817749, "learning_rate": 0.0001, "loss": 0.0337, "step": 8460 }, { "epoch": 55.723684210526315, "grad_norm": 2.4509387016296387, "learning_rate": 0.0001, "loss": 0.0349, "step": 8470 }, { "epoch": 55.78947368421053, "grad_norm": 2.3846399784088135, "learning_rate": 0.0001, "loss": 0.0385, "step": 8480 }, { "epoch": 55.85526315789474, "grad_norm": 2.032019853591919, "learning_rate": 0.0001, "loss": 0.0356, "step": 8490 }, { "epoch": 55.921052631578945, "grad_norm": 1.9172039031982422, "learning_rate": 0.0001, "loss": 0.0379, "step": 8500 }, { "epoch": 55.98684210526316, "grad_norm": 2.3752799034118652, "learning_rate": 0.0001, "loss": 0.0346, "step": 8510 }, { "epoch": 56.05263157894737, "grad_norm": 2.12593936920166, "learning_rate": 0.0001, "loss": 0.0358, "step": 8520 }, { "epoch": 56.11842105263158, "grad_norm": 1.9128234386444092, "learning_rate": 0.0001, "loss": 0.035, "step": 8530 }, { "epoch": 56.18421052631579, "grad_norm": 2.128859281539917, "learning_rate": 0.0001, "loss": 0.0385, "step": 8540 }, { "epoch": 56.25, "grad_norm": 2.458181619644165, "learning_rate": 0.0001, "loss": 0.0365, "step": 8550 }, { "epoch": 56.31578947368421, "grad_norm": 2.625624418258667, "learning_rate": 0.0001, "loss": 0.0343, "step": 8560 }, { "epoch": 56.38157894736842, "grad_norm": 2.32265305519104, "learning_rate": 0.0001, "loss": 0.0356, "step": 8570 }, { "epoch": 56.44736842105263, "grad_norm": 1.784980058670044, "learning_rate": 0.0001, "loss": 0.0346, "step": 8580 }, { "epoch": 56.51315789473684, "grad_norm": 2.2048745155334473, "learning_rate": 0.0001, "loss": 0.0357, "step": 8590 }, { "epoch": 56.578947368421055, "grad_norm": 1.9856022596359253, "learning_rate": 0.0001, "loss": 0.0351, "step": 8600 }, { "epoch": 56.64473684210526, "grad_norm": 1.889109492301941, "learning_rate": 0.0001, "loss": 0.0313, "step": 8610 }, { "epoch": 56.71052631578947, "grad_norm": 1.8074132204055786, "learning_rate": 0.0001, "loss": 0.035, "step": 8620 }, { "epoch": 56.776315789473685, "grad_norm": 2.243985414505005, "learning_rate": 0.0001, "loss": 0.0394, "step": 8630 }, { "epoch": 56.8421052631579, "grad_norm": 1.8554435968399048, "learning_rate": 0.0001, "loss": 0.0365, "step": 8640 }, { "epoch": 56.9078947368421, "grad_norm": 2.21171498298645, "learning_rate": 0.0001, "loss": 0.0399, "step": 8650 }, { "epoch": 56.973684210526315, "grad_norm": 2.339395523071289, "learning_rate": 0.0001, "loss": 0.035, "step": 8660 }, { "epoch": 57.03947368421053, "grad_norm": 2.24161958694458, "learning_rate": 0.0001, "loss": 0.0357, "step": 8670 }, { "epoch": 57.10526315789474, "grad_norm": 1.6437029838562012, "learning_rate": 0.0001, "loss": 0.0343, "step": 8680 }, { "epoch": 57.171052631578945, "grad_norm": 2.178180456161499, "learning_rate": 0.0001, "loss": 0.0327, "step": 8690 }, { "epoch": 57.23684210526316, "grad_norm": 2.3223023414611816, "learning_rate": 0.0001, "loss": 0.0353, "step": 8700 }, { "epoch": 57.30263157894737, "grad_norm": 2.4407029151916504, "learning_rate": 0.0001, "loss": 0.0344, "step": 8710 }, { "epoch": 57.36842105263158, "grad_norm": 2.149390697479248, "learning_rate": 0.0001, "loss": 0.0396, "step": 8720 }, { "epoch": 57.43421052631579, "grad_norm": 2.1309683322906494, "learning_rate": 0.0001, "loss": 0.0351, "step": 8730 }, { "epoch": 57.5, "grad_norm": 2.040288209915161, "learning_rate": 0.0001, "loss": 0.0331, "step": 8740 }, { "epoch": 57.56578947368421, "grad_norm": 2.48166561126709, "learning_rate": 0.0001, "loss": 0.0346, "step": 8750 }, { "epoch": 57.63157894736842, "grad_norm": 2.282402992248535, "learning_rate": 0.0001, "loss": 0.0368, "step": 8760 }, { "epoch": 57.69736842105263, "grad_norm": 2.5835509300231934, "learning_rate": 0.0001, "loss": 0.033, "step": 8770 }, { "epoch": 57.76315789473684, "grad_norm": 2.667722463607788, "learning_rate": 0.0001, "loss": 0.0379, "step": 8780 }, { "epoch": 57.828947368421055, "grad_norm": 1.739736795425415, "learning_rate": 0.0001, "loss": 0.0354, "step": 8790 }, { "epoch": 57.89473684210526, "grad_norm": 2.321622610092163, "learning_rate": 0.0001, "loss": 0.0308, "step": 8800 }, { "epoch": 57.96052631578947, "grad_norm": 2.218360185623169, "learning_rate": 0.0001, "loss": 0.0383, "step": 8810 }, { "epoch": 58.026315789473685, "grad_norm": 1.7169147729873657, "learning_rate": 0.0001, "loss": 0.0358, "step": 8820 }, { "epoch": 58.0921052631579, "grad_norm": 1.786856770515442, "learning_rate": 0.0001, "loss": 0.0346, "step": 8830 }, { "epoch": 58.1578947368421, "grad_norm": 2.19413685798645, "learning_rate": 0.0001, "loss": 0.0348, "step": 8840 }, { "epoch": 58.223684210526315, "grad_norm": 1.9983274936676025, "learning_rate": 0.0001, "loss": 0.0334, "step": 8850 }, { "epoch": 58.28947368421053, "grad_norm": 1.964223027229309, "learning_rate": 0.0001, "loss": 0.0347, "step": 8860 }, { "epoch": 58.35526315789474, "grad_norm": 1.8594284057617188, "learning_rate": 0.0001, "loss": 0.034, "step": 8870 }, { "epoch": 58.421052631578945, "grad_norm": 1.8396936655044556, "learning_rate": 0.0001, "loss": 0.0341, "step": 8880 }, { "epoch": 58.48684210526316, "grad_norm": 2.26518177986145, "learning_rate": 0.0001, "loss": 0.0315, "step": 8890 }, { "epoch": 58.55263157894737, "grad_norm": 1.9900743961334229, "learning_rate": 0.0001, "loss": 0.036, "step": 8900 }, { "epoch": 58.61842105263158, "grad_norm": 2.44905686378479, "learning_rate": 0.0001, "loss": 0.0366, "step": 8910 }, { "epoch": 58.68421052631579, "grad_norm": 1.7009742259979248, "learning_rate": 0.0001, "loss": 0.031, "step": 8920 }, { "epoch": 58.75, "grad_norm": 1.8698515892028809, "learning_rate": 0.0001, "loss": 0.038, "step": 8930 }, { "epoch": 58.81578947368421, "grad_norm": 1.9204734563827515, "learning_rate": 0.0001, "loss": 0.0343, "step": 8940 }, { "epoch": 58.88157894736842, "grad_norm": 1.6448564529418945, "learning_rate": 0.0001, "loss": 0.0361, "step": 8950 }, { "epoch": 58.94736842105263, "grad_norm": 1.8758846521377563, "learning_rate": 0.0001, "loss": 0.0418, "step": 8960 }, { "epoch": 59.01315789473684, "grad_norm": 2.6215178966522217, "learning_rate": 0.0001, "loss": 0.0357, "step": 8970 }, { "epoch": 59.078947368421055, "grad_norm": 1.9886058568954468, "learning_rate": 0.0001, "loss": 0.0335, "step": 8980 }, { "epoch": 59.14473684210526, "grad_norm": 2.228511095046997, "learning_rate": 0.0001, "loss": 0.0316, "step": 8990 }, { "epoch": 59.21052631578947, "grad_norm": 2.497946262359619, "learning_rate": 0.0001, "loss": 0.032, "step": 9000 }, { "epoch": 59.276315789473685, "grad_norm": 1.9556690454483032, "learning_rate": 0.0001, "loss": 0.0363, "step": 9010 }, { "epoch": 59.3421052631579, "grad_norm": 2.1602437496185303, "learning_rate": 0.0001, "loss": 0.0387, "step": 9020 }, { "epoch": 59.4078947368421, "grad_norm": 1.9988353252410889, "learning_rate": 0.0001, "loss": 0.0361, "step": 9030 }, { "epoch": 59.473684210526315, "grad_norm": 2.3881165981292725, "learning_rate": 0.0001, "loss": 0.0411, "step": 9040 }, { "epoch": 59.53947368421053, "grad_norm": 2.5629189014434814, "learning_rate": 0.0001, "loss": 0.0346, "step": 9050 }, { "epoch": 59.60526315789474, "grad_norm": 2.1359591484069824, "learning_rate": 0.0001, "loss": 0.0346, "step": 9060 }, { "epoch": 59.671052631578945, "grad_norm": 2.0776383876800537, "learning_rate": 0.0001, "loss": 0.0339, "step": 9070 }, { "epoch": 59.73684210526316, "grad_norm": 2.388756513595581, "learning_rate": 0.0001, "loss": 0.042, "step": 9080 }, { "epoch": 59.80263157894737, "grad_norm": 1.7650392055511475, "learning_rate": 0.0001, "loss": 0.0346, "step": 9090 }, { "epoch": 59.86842105263158, "grad_norm": 2.8666844367980957, "learning_rate": 0.0001, "loss": 0.0369, "step": 9100 }, { "epoch": 59.93421052631579, "grad_norm": 2.5306236743927, "learning_rate": 0.0001, "loss": 0.0343, "step": 9110 }, { "epoch": 60.0, "grad_norm": 2.634432792663574, "learning_rate": 0.0001, "loss": 0.034, "step": 9120 }, { "epoch": 60.06578947368421, "grad_norm": 2.6413989067077637, "learning_rate": 0.0001, "loss": 0.0356, "step": 9130 }, { "epoch": 60.13157894736842, "grad_norm": 2.7997219562530518, "learning_rate": 0.0001, "loss": 0.0381, "step": 9140 }, { "epoch": 60.19736842105263, "grad_norm": 2.7364566326141357, "learning_rate": 0.0001, "loss": 0.0307, "step": 9150 }, { "epoch": 60.26315789473684, "grad_norm": 2.612393379211426, "learning_rate": 0.0001, "loss": 0.032, "step": 9160 }, { "epoch": 60.328947368421055, "grad_norm": 2.443633794784546, "learning_rate": 0.0001, "loss": 0.0366, "step": 9170 }, { "epoch": 60.39473684210526, "grad_norm": 2.4918787479400635, "learning_rate": 0.0001, "loss": 0.0371, "step": 9180 }, { "epoch": 60.46052631578947, "grad_norm": 2.1031625270843506, "learning_rate": 0.0001, "loss": 0.0375, "step": 9190 }, { "epoch": 60.526315789473685, "grad_norm": 2.4682211875915527, "learning_rate": 0.0001, "loss": 0.0292, "step": 9200 }, { "epoch": 60.5921052631579, "grad_norm": 2.4938437938690186, "learning_rate": 0.0001, "loss": 0.0346, "step": 9210 }, { "epoch": 60.6578947368421, "grad_norm": 2.101090669631958, "learning_rate": 0.0001, "loss": 0.032, "step": 9220 }, { "epoch": 60.723684210526315, "grad_norm": 2.0934786796569824, "learning_rate": 0.0001, "loss": 0.0329, "step": 9230 }, { "epoch": 60.78947368421053, "grad_norm": 2.126350164413452, "learning_rate": 0.0001, "loss": 0.0327, "step": 9240 }, { "epoch": 60.85526315789474, "grad_norm": 2.133007049560547, "learning_rate": 0.0001, "loss": 0.0318, "step": 9250 }, { "epoch": 60.921052631578945, "grad_norm": 2.3088362216949463, "learning_rate": 0.0001, "loss": 0.0324, "step": 9260 }, { "epoch": 60.98684210526316, "grad_norm": 2.184321165084839, "learning_rate": 0.0001, "loss": 0.0356, "step": 9270 }, { "epoch": 61.05263157894737, "grad_norm": 2.0415399074554443, "learning_rate": 0.0001, "loss": 0.0388, "step": 9280 }, { "epoch": 61.11842105263158, "grad_norm": 2.311349868774414, "learning_rate": 0.0001, "loss": 0.0369, "step": 9290 }, { "epoch": 61.18421052631579, "grad_norm": 2.198589563369751, "learning_rate": 0.0001, "loss": 0.0327, "step": 9300 }, { "epoch": 61.25, "grad_norm": 1.818552851676941, "learning_rate": 0.0001, "loss": 0.0309, "step": 9310 }, { "epoch": 61.31578947368421, "grad_norm": 2.350637674331665, "learning_rate": 0.0001, "loss": 0.0316, "step": 9320 }, { "epoch": 61.38157894736842, "grad_norm": 2.230100154876709, "learning_rate": 0.0001, "loss": 0.0363, "step": 9330 }, { "epoch": 61.44736842105263, "grad_norm": 1.9746980667114258, "learning_rate": 0.0001, "loss": 0.036, "step": 9340 }, { "epoch": 61.51315789473684, "grad_norm": 2.050794839859009, "learning_rate": 0.0001, "loss": 0.0326, "step": 9350 }, { "epoch": 61.578947368421055, "grad_norm": 2.19759464263916, "learning_rate": 0.0001, "loss": 0.0326, "step": 9360 }, { "epoch": 61.64473684210526, "grad_norm": 1.9384559392929077, "learning_rate": 0.0001, "loss": 0.034, "step": 9370 }, { "epoch": 61.71052631578947, "grad_norm": 1.5629907846450806, "learning_rate": 0.0001, "loss": 0.0318, "step": 9380 }, { "epoch": 61.776315789473685, "grad_norm": 2.245145320892334, "learning_rate": 0.0001, "loss": 0.0376, "step": 9390 }, { "epoch": 61.8421052631579, "grad_norm": 2.276623487472534, "learning_rate": 0.0001, "loss": 0.0318, "step": 9400 }, { "epoch": 61.9078947368421, "grad_norm": 2.359163522720337, "learning_rate": 0.0001, "loss": 0.0347, "step": 9410 }, { "epoch": 61.973684210526315, "grad_norm": 2.2182393074035645, "learning_rate": 0.0001, "loss": 0.0308, "step": 9420 }, { "epoch": 62.03947368421053, "grad_norm": 2.2278976440429688, "learning_rate": 0.0001, "loss": 0.034, "step": 9430 }, { "epoch": 62.10526315789474, "grad_norm": 2.2542073726654053, "learning_rate": 0.0001, "loss": 0.0364, "step": 9440 }, { "epoch": 62.171052631578945, "grad_norm": 2.475452184677124, "learning_rate": 0.0001, "loss": 0.0338, "step": 9450 }, { "epoch": 62.23684210526316, "grad_norm": 2.1104462146759033, "learning_rate": 0.0001, "loss": 0.0332, "step": 9460 }, { "epoch": 62.30263157894737, "grad_norm": 2.4630398750305176, "learning_rate": 0.0001, "loss": 0.0322, "step": 9470 }, { "epoch": 62.36842105263158, "grad_norm": 2.22227144241333, "learning_rate": 0.0001, "loss": 0.0366, "step": 9480 }, { "epoch": 62.43421052631579, "grad_norm": 2.247596263885498, "learning_rate": 0.0001, "loss": 0.0326, "step": 9490 }, { "epoch": 62.5, "grad_norm": 2.2295756340026855, "learning_rate": 0.0001, "loss": 0.0332, "step": 9500 }, { "epoch": 62.56578947368421, "grad_norm": 1.8071231842041016, "learning_rate": 0.0001, "loss": 0.0321, "step": 9510 }, { "epoch": 62.63157894736842, "grad_norm": 1.7097272872924805, "learning_rate": 0.0001, "loss": 0.0331, "step": 9520 }, { "epoch": 62.69736842105263, "grad_norm": 2.013504981994629, "learning_rate": 0.0001, "loss": 0.0313, "step": 9530 }, { "epoch": 62.76315789473684, "grad_norm": 2.3877217769622803, "learning_rate": 0.0001, "loss": 0.0366, "step": 9540 }, { "epoch": 62.828947368421055, "grad_norm": 1.7919923067092896, "learning_rate": 0.0001, "loss": 0.035, "step": 9550 }, { "epoch": 62.89473684210526, "grad_norm": 1.9024256467819214, "learning_rate": 0.0001, "loss": 0.0323, "step": 9560 }, { "epoch": 62.96052631578947, "grad_norm": 2.5159573554992676, "learning_rate": 0.0001, "loss": 0.0334, "step": 9570 }, { "epoch": 63.026315789473685, "grad_norm": 1.9550975561141968, "learning_rate": 0.0001, "loss": 0.0352, "step": 9580 }, { "epoch": 63.0921052631579, "grad_norm": 1.6633784770965576, "learning_rate": 0.0001, "loss": 0.0332, "step": 9590 }, { "epoch": 63.1578947368421, "grad_norm": 2.2476999759674072, "learning_rate": 0.0001, "loss": 0.0341, "step": 9600 }, { "epoch": 63.223684210526315, "grad_norm": 1.7164835929870605, "learning_rate": 0.0001, "loss": 0.0369, "step": 9610 }, { "epoch": 63.28947368421053, "grad_norm": 1.8589459657669067, "learning_rate": 0.0001, "loss": 0.0342, "step": 9620 }, { "epoch": 63.35526315789474, "grad_norm": 2.1161880493164062, "learning_rate": 0.0001, "loss": 0.0317, "step": 9630 }, { "epoch": 63.421052631578945, "grad_norm": 2.0516700744628906, "learning_rate": 0.0001, "loss": 0.0328, "step": 9640 }, { "epoch": 63.48684210526316, "grad_norm": 1.9883986711502075, "learning_rate": 0.0001, "loss": 0.0343, "step": 9650 }, { "epoch": 63.55263157894737, "grad_norm": 1.8416293859481812, "learning_rate": 0.0001, "loss": 0.0306, "step": 9660 }, { "epoch": 63.61842105263158, "grad_norm": 1.711554765701294, "learning_rate": 0.0001, "loss": 0.0415, "step": 9670 }, { "epoch": 63.68421052631579, "grad_norm": 1.9319401979446411, "learning_rate": 0.0001, "loss": 0.0336, "step": 9680 }, { "epoch": 63.75, "grad_norm": 2.0522656440734863, "learning_rate": 0.0001, "loss": 0.034, "step": 9690 }, { "epoch": 63.81578947368421, "grad_norm": 1.9240378141403198, "learning_rate": 0.0001, "loss": 0.0333, "step": 9700 }, { "epoch": 63.88157894736842, "grad_norm": 2.1128199100494385, "learning_rate": 0.0001, "loss": 0.0334, "step": 9710 }, { "epoch": 63.94736842105263, "grad_norm": 2.129518985748291, "learning_rate": 0.0001, "loss": 0.0334, "step": 9720 }, { "epoch": 64.01315789473684, "grad_norm": 1.7781847715377808, "learning_rate": 0.0001, "loss": 0.0336, "step": 9730 }, { "epoch": 64.07894736842105, "grad_norm": 1.759955883026123, "learning_rate": 0.0001, "loss": 0.0342, "step": 9740 }, { "epoch": 64.14473684210526, "grad_norm": 1.9025697708129883, "learning_rate": 0.0001, "loss": 0.0344, "step": 9750 }, { "epoch": 64.21052631578948, "grad_norm": 1.7045345306396484, "learning_rate": 0.0001, "loss": 0.0331, "step": 9760 }, { "epoch": 64.27631578947368, "grad_norm": 2.074171543121338, "learning_rate": 0.0001, "loss": 0.0339, "step": 9770 }, { "epoch": 64.34210526315789, "grad_norm": 1.8737688064575195, "learning_rate": 0.0001, "loss": 0.0332, "step": 9780 }, { "epoch": 64.40789473684211, "grad_norm": 2.130225658416748, "learning_rate": 0.0001, "loss": 0.0395, "step": 9790 }, { "epoch": 64.47368421052632, "grad_norm": 2.031315565109253, "learning_rate": 0.0001, "loss": 0.0337, "step": 9800 }, { "epoch": 64.53947368421052, "grad_norm": 1.88314950466156, "learning_rate": 0.0001, "loss": 0.0334, "step": 9810 }, { "epoch": 64.60526315789474, "grad_norm": 1.714264988899231, "learning_rate": 0.0001, "loss": 0.0341, "step": 9820 }, { "epoch": 64.67105263157895, "grad_norm": 2.600741147994995, "learning_rate": 0.0001, "loss": 0.0341, "step": 9830 }, { "epoch": 64.73684210526316, "grad_norm": 2.0385050773620605, "learning_rate": 0.0001, "loss": 0.0348, "step": 9840 }, { "epoch": 64.80263157894737, "grad_norm": 2.372293472290039, "learning_rate": 0.0001, "loss": 0.0332, "step": 9850 }, { "epoch": 64.86842105263158, "grad_norm": 2.2337100505828857, "learning_rate": 0.0001, "loss": 0.033, "step": 9860 }, { "epoch": 64.9342105263158, "grad_norm": 2.1231322288513184, "learning_rate": 0.0001, "loss": 0.0338, "step": 9870 }, { "epoch": 65.0, "grad_norm": 2.852710723876953, "learning_rate": 0.0001, "loss": 0.0351, "step": 9880 }, { "epoch": 65.0657894736842, "grad_norm": 2.2976479530334473, "learning_rate": 0.0001, "loss": 0.0374, "step": 9890 }, { "epoch": 65.13157894736842, "grad_norm": 2.6037559509277344, "learning_rate": 0.0001, "loss": 0.0331, "step": 9900 }, { "epoch": 65.19736842105263, "grad_norm": 2.2720115184783936, "learning_rate": 0.0001, "loss": 0.0341, "step": 9910 }, { "epoch": 65.26315789473684, "grad_norm": 2.0078892707824707, "learning_rate": 0.0001, "loss": 0.0327, "step": 9920 }, { "epoch": 65.32894736842105, "grad_norm": 2.174067258834839, "learning_rate": 0.0001, "loss": 0.0337, "step": 9930 }, { "epoch": 65.39473684210526, "grad_norm": 2.1253135204315186, "learning_rate": 0.0001, "loss": 0.0339, "step": 9940 }, { "epoch": 65.46052631578948, "grad_norm": 2.447939395904541, "learning_rate": 0.0001, "loss": 0.0342, "step": 9950 }, { "epoch": 65.52631578947368, "grad_norm": 2.0778074264526367, "learning_rate": 0.0001, "loss": 0.0315, "step": 9960 }, { "epoch": 65.59210526315789, "grad_norm": 2.072627544403076, "learning_rate": 0.0001, "loss": 0.0323, "step": 9970 }, { "epoch": 65.65789473684211, "grad_norm": 2.054117202758789, "learning_rate": 0.0001, "loss": 0.0327, "step": 9980 }, { "epoch": 65.72368421052632, "grad_norm": 1.8227187395095825, "learning_rate": 0.0001, "loss": 0.0308, "step": 9990 }, { "epoch": 65.78947368421052, "grad_norm": 1.5389827489852905, "learning_rate": 0.0001, "loss": 0.0334, "step": 10000 }, { "epoch": 65.85526315789474, "grad_norm": 1.5675287246704102, "learning_rate": 0.0001, "loss": 0.036, "step": 10010 }, { "epoch": 65.92105263157895, "grad_norm": 2.467419147491455, "learning_rate": 0.0001, "loss": 0.0323, "step": 10020 }, { "epoch": 65.98684210526316, "grad_norm": 2.1902599334716797, "learning_rate": 0.0001, "loss": 0.0345, "step": 10030 }, { "epoch": 66.05263157894737, "grad_norm": 2.1312263011932373, "learning_rate": 0.0001, "loss": 0.0368, "step": 10040 }, { "epoch": 66.11842105263158, "grad_norm": 2.1697208881378174, "learning_rate": 0.0001, "loss": 0.0297, "step": 10050 }, { "epoch": 66.1842105263158, "grad_norm": 2.319117546081543, "learning_rate": 0.0001, "loss": 0.0295, "step": 10060 }, { "epoch": 66.25, "grad_norm": 1.7576595544815063, "learning_rate": 0.0001, "loss": 0.034, "step": 10070 }, { "epoch": 66.3157894736842, "grad_norm": 2.3985297679901123, "learning_rate": 0.0001, "loss": 0.033, "step": 10080 }, { "epoch": 66.38157894736842, "grad_norm": 2.1322896480560303, "learning_rate": 0.0001, "loss": 0.0327, "step": 10090 }, { "epoch": 66.44736842105263, "grad_norm": 2.2872979640960693, "learning_rate": 0.0001, "loss": 0.0304, "step": 10100 }, { "epoch": 66.51315789473684, "grad_norm": 2.2987442016601562, "learning_rate": 0.0001, "loss": 0.0353, "step": 10110 }, { "epoch": 66.57894736842105, "grad_norm": 2.324402093887329, "learning_rate": 0.0001, "loss": 0.0337, "step": 10120 }, { "epoch": 66.64473684210526, "grad_norm": 1.7056535482406616, "learning_rate": 0.0001, "loss": 0.035, "step": 10130 }, { "epoch": 66.71052631578948, "grad_norm": 2.355088949203491, "learning_rate": 0.0001, "loss": 0.033, "step": 10140 }, { "epoch": 66.77631578947368, "grad_norm": 1.7181812524795532, "learning_rate": 0.0001, "loss": 0.0359, "step": 10150 }, { "epoch": 66.84210526315789, "grad_norm": 2.036630153656006, "learning_rate": 0.0001, "loss": 0.0282, "step": 10160 }, { "epoch": 66.90789473684211, "grad_norm": 1.7540043592453003, "learning_rate": 0.0001, "loss": 0.0317, "step": 10170 }, { "epoch": 66.97368421052632, "grad_norm": 2.0998642444610596, "learning_rate": 0.0001, "loss": 0.0318, "step": 10180 }, { "epoch": 67.03947368421052, "grad_norm": 2.110028028488159, "learning_rate": 0.0001, "loss": 0.0309, "step": 10190 }, { "epoch": 67.10526315789474, "grad_norm": 2.3706624507904053, "learning_rate": 0.0001, "loss": 0.0286, "step": 10200 }, { "epoch": 67.17105263157895, "grad_norm": 1.5878039598464966, "learning_rate": 0.0001, "loss": 0.0326, "step": 10210 }, { "epoch": 67.23684210526316, "grad_norm": 2.136363983154297, "learning_rate": 0.0001, "loss": 0.0357, "step": 10220 }, { "epoch": 67.30263157894737, "grad_norm": 1.8201273679733276, "learning_rate": 0.0001, "loss": 0.0313, "step": 10230 }, { "epoch": 67.36842105263158, "grad_norm": 1.8971366882324219, "learning_rate": 0.0001, "loss": 0.0314, "step": 10240 }, { "epoch": 67.4342105263158, "grad_norm": 1.6257121562957764, "learning_rate": 0.0001, "loss": 0.0372, "step": 10250 }, { "epoch": 67.5, "grad_norm": 2.5282788276672363, "learning_rate": 0.0001, "loss": 0.0299, "step": 10260 }, { "epoch": 67.5657894736842, "grad_norm": 2.3728461265563965, "learning_rate": 0.0001, "loss": 0.0376, "step": 10270 }, { "epoch": 67.63157894736842, "grad_norm": 1.8680884838104248, "learning_rate": 0.0001, "loss": 0.0353, "step": 10280 }, { "epoch": 67.69736842105263, "grad_norm": 1.8397204875946045, "learning_rate": 0.0001, "loss": 0.0316, "step": 10290 }, { "epoch": 67.76315789473684, "grad_norm": 2.2867355346679688, "learning_rate": 0.0001, "loss": 0.0346, "step": 10300 }, { "epoch": 67.82894736842105, "grad_norm": 2.224348545074463, "learning_rate": 0.0001, "loss": 0.0289, "step": 10310 }, { "epoch": 67.89473684210526, "grad_norm": 2.241075277328491, "learning_rate": 0.0001, "loss": 0.0311, "step": 10320 }, { "epoch": 67.96052631578948, "grad_norm": 1.9791940450668335, "learning_rate": 0.0001, "loss": 0.0289, "step": 10330 }, { "epoch": 68.02631578947368, "grad_norm": 2.045264482498169, "learning_rate": 0.0001, "loss": 0.0312, "step": 10340 }, { "epoch": 68.09210526315789, "grad_norm": 2.0490119457244873, "learning_rate": 0.0001, "loss": 0.0341, "step": 10350 }, { "epoch": 68.15789473684211, "grad_norm": 1.8479689359664917, "learning_rate": 0.0001, "loss": 0.0299, "step": 10360 }, { "epoch": 68.22368421052632, "grad_norm": 1.7661082744598389, "learning_rate": 0.0001, "loss": 0.0327, "step": 10370 }, { "epoch": 68.28947368421052, "grad_norm": 2.067166805267334, "learning_rate": 0.0001, "loss": 0.0326, "step": 10380 }, { "epoch": 68.35526315789474, "grad_norm": 1.6801044940948486, "learning_rate": 0.0001, "loss": 0.0327, "step": 10390 }, { "epoch": 68.42105263157895, "grad_norm": 1.607061505317688, "learning_rate": 0.0001, "loss": 0.035, "step": 10400 }, { "epoch": 68.48684210526316, "grad_norm": 1.8414115905761719, "learning_rate": 0.0001, "loss": 0.0341, "step": 10410 }, { "epoch": 68.55263157894737, "grad_norm": 1.6416038274765015, "learning_rate": 0.0001, "loss": 0.0343, "step": 10420 }, { "epoch": 68.61842105263158, "grad_norm": 2.226360321044922, "learning_rate": 0.0001, "loss": 0.0321, "step": 10430 }, { "epoch": 68.6842105263158, "grad_norm": 1.799359917640686, "learning_rate": 0.0001, "loss": 0.0343, "step": 10440 }, { "epoch": 68.75, "grad_norm": 1.895742654800415, "learning_rate": 0.0001, "loss": 0.0341, "step": 10450 }, { "epoch": 68.8157894736842, "grad_norm": 1.908806562423706, "learning_rate": 0.0001, "loss": 0.0321, "step": 10460 }, { "epoch": 68.88157894736842, "grad_norm": 2.2551791667938232, "learning_rate": 0.0001, "loss": 0.035, "step": 10470 }, { "epoch": 68.94736842105263, "grad_norm": 1.6532186269760132, "learning_rate": 0.0001, "loss": 0.0336, "step": 10480 }, { "epoch": 69.01315789473684, "grad_norm": 1.6594740152359009, "learning_rate": 0.0001, "loss": 0.0315, "step": 10490 }, { "epoch": 69.07894736842105, "grad_norm": 1.8618637323379517, "learning_rate": 0.0001, "loss": 0.0357, "step": 10500 }, { "epoch": 69.14473684210526, "grad_norm": 2.4688313007354736, "learning_rate": 0.0001, "loss": 0.0333, "step": 10510 }, { "epoch": 69.21052631578948, "grad_norm": 2.3658173084259033, "learning_rate": 0.0001, "loss": 0.0328, "step": 10520 }, { "epoch": 69.27631578947368, "grad_norm": 1.5731991529464722, "learning_rate": 0.0001, "loss": 0.0345, "step": 10530 }, { "epoch": 69.34210526315789, "grad_norm": 1.6794017553329468, "learning_rate": 0.0001, "loss": 0.0323, "step": 10540 }, { "epoch": 69.40789473684211, "grad_norm": 1.9663618803024292, "learning_rate": 0.0001, "loss": 0.0331, "step": 10550 }, { "epoch": 69.47368421052632, "grad_norm": 1.8672981262207031, "learning_rate": 0.0001, "loss": 0.0376, "step": 10560 }, { "epoch": 69.53947368421052, "grad_norm": 2.0682976245880127, "learning_rate": 0.0001, "loss": 0.0345, "step": 10570 }, { "epoch": 69.60526315789474, "grad_norm": 2.144080400466919, "learning_rate": 0.0001, "loss": 0.0333, "step": 10580 }, { "epoch": 69.67105263157895, "grad_norm": 2.329354763031006, "learning_rate": 0.0001, "loss": 0.0323, "step": 10590 }, { "epoch": 69.73684210526316, "grad_norm": 1.9449678659439087, "learning_rate": 0.0001, "loss": 0.0349, "step": 10600 }, { "epoch": 69.80263157894737, "grad_norm": 2.0480034351348877, "learning_rate": 0.0001, "loss": 0.0313, "step": 10610 }, { "epoch": 69.86842105263158, "grad_norm": 2.006821632385254, "learning_rate": 0.0001, "loss": 0.0306, "step": 10620 }, { "epoch": 69.9342105263158, "grad_norm": 2.084979772567749, "learning_rate": 0.0001, "loss": 0.0366, "step": 10630 }, { "epoch": 70.0, "grad_norm": 1.9789648056030273, "learning_rate": 0.0001, "loss": 0.0341, "step": 10640 }, { "epoch": 70.0657894736842, "grad_norm": 1.5859389305114746, "learning_rate": 0.0001, "loss": 0.0351, "step": 10650 }, { "epoch": 70.13157894736842, "grad_norm": 2.132671594619751, "learning_rate": 0.0001, "loss": 0.0327, "step": 10660 }, { "epoch": 70.19736842105263, "grad_norm": 1.8210572004318237, "learning_rate": 0.0001, "loss": 0.0356, "step": 10670 }, { "epoch": 70.26315789473684, "grad_norm": 1.9513922929763794, "learning_rate": 0.0001, "loss": 0.0337, "step": 10680 }, { "epoch": 70.32894736842105, "grad_norm": 1.7509745359420776, "learning_rate": 0.0001, "loss": 0.0308, "step": 10690 }, { "epoch": 70.39473684210526, "grad_norm": 1.7114336490631104, "learning_rate": 0.0001, "loss": 0.0332, "step": 10700 }, { "epoch": 70.46052631578948, "grad_norm": 1.9209591150283813, "learning_rate": 0.0001, "loss": 0.0305, "step": 10710 }, { "epoch": 70.52631578947368, "grad_norm": 1.6857737302780151, "learning_rate": 0.0001, "loss": 0.0328, "step": 10720 }, { "epoch": 70.59210526315789, "grad_norm": 2.0842723846435547, "learning_rate": 0.0001, "loss": 0.0366, "step": 10730 }, { "epoch": 70.65789473684211, "grad_norm": 2.2455458641052246, "learning_rate": 0.0001, "loss": 0.0353, "step": 10740 }, { "epoch": 70.72368421052632, "grad_norm": 2.1329753398895264, "learning_rate": 0.0001, "loss": 0.0375, "step": 10750 }, { "epoch": 70.78947368421052, "grad_norm": 1.2481554746627808, "learning_rate": 0.0001, "loss": 0.0311, "step": 10760 }, { "epoch": 70.85526315789474, "grad_norm": 1.8279143571853638, "learning_rate": 0.0001, "loss": 0.0291, "step": 10770 }, { "epoch": 70.92105263157895, "grad_norm": 1.7830314636230469, "learning_rate": 0.0001, "loss": 0.0291, "step": 10780 }, { "epoch": 70.98684210526316, "grad_norm": 1.7848114967346191, "learning_rate": 0.0001, "loss": 0.0334, "step": 10790 }, { "epoch": 71.05263157894737, "grad_norm": 1.9861677885055542, "learning_rate": 0.0001, "loss": 0.0307, "step": 10800 }, { "epoch": 71.11842105263158, "grad_norm": 2.212383985519409, "learning_rate": 0.0001, "loss": 0.0305, "step": 10810 }, { "epoch": 71.1842105263158, "grad_norm": 1.8296265602111816, "learning_rate": 0.0001, "loss": 0.0375, "step": 10820 }, { "epoch": 71.25, "grad_norm": 2.0627100467681885, "learning_rate": 0.0001, "loss": 0.0281, "step": 10830 }, { "epoch": 71.3157894736842, "grad_norm": 2.212965965270996, "learning_rate": 0.0001, "loss": 0.0331, "step": 10840 }, { "epoch": 71.38157894736842, "grad_norm": 2.0871388912200928, "learning_rate": 0.0001, "loss": 0.0322, "step": 10850 }, { "epoch": 71.44736842105263, "grad_norm": 1.9299043416976929, "learning_rate": 0.0001, "loss": 0.0356, "step": 10860 }, { "epoch": 71.51315789473684, "grad_norm": 1.698004961013794, "learning_rate": 0.0001, "loss": 0.0314, "step": 10870 }, { "epoch": 71.57894736842105, "grad_norm": 1.8044395446777344, "learning_rate": 0.0001, "loss": 0.0273, "step": 10880 }, { "epoch": 71.64473684210526, "grad_norm": 1.8270800113677979, "learning_rate": 0.0001, "loss": 0.0315, "step": 10890 }, { "epoch": 71.71052631578948, "grad_norm": 2.037364959716797, "learning_rate": 0.0001, "loss": 0.0298, "step": 10900 }, { "epoch": 71.77631578947368, "grad_norm": 1.7579048871994019, "learning_rate": 0.0001, "loss": 0.0308, "step": 10910 }, { "epoch": 71.84210526315789, "grad_norm": 1.9596993923187256, "learning_rate": 0.0001, "loss": 0.0342, "step": 10920 }, { "epoch": 71.90789473684211, "grad_norm": 1.6674180030822754, "learning_rate": 0.0001, "loss": 0.0354, "step": 10930 }, { "epoch": 71.97368421052632, "grad_norm": 1.9461649656295776, "learning_rate": 0.0001, "loss": 0.0318, "step": 10940 }, { "epoch": 72.03947368421052, "grad_norm": 2.0485146045684814, "learning_rate": 0.0001, "loss": 0.0349, "step": 10950 }, { "epoch": 72.10526315789474, "grad_norm": 1.8540596961975098, "learning_rate": 0.0001, "loss": 0.0305, "step": 10960 }, { "epoch": 72.17105263157895, "grad_norm": 2.0809316635131836, "learning_rate": 0.0001, "loss": 0.0349, "step": 10970 }, { "epoch": 72.23684210526316, "grad_norm": 2.4290947914123535, "learning_rate": 0.0001, "loss": 0.0343, "step": 10980 }, { "epoch": 72.30263157894737, "grad_norm": 2.057790994644165, "learning_rate": 0.0001, "loss": 0.0342, "step": 10990 }, { "epoch": 72.36842105263158, "grad_norm": 2.304600477218628, "learning_rate": 0.0001, "loss": 0.0298, "step": 11000 }, { "epoch": 72.4342105263158, "grad_norm": 1.7730439901351929, "learning_rate": 0.0001, "loss": 0.0322, "step": 11010 }, { "epoch": 72.5, "grad_norm": 2.008673667907715, "learning_rate": 0.0001, "loss": 0.0306, "step": 11020 }, { "epoch": 72.5657894736842, "grad_norm": 1.9240750074386597, "learning_rate": 0.0001, "loss": 0.0332, "step": 11030 }, { "epoch": 72.63157894736842, "grad_norm": 2.139969825744629, "learning_rate": 0.0001, "loss": 0.0308, "step": 11040 }, { "epoch": 72.69736842105263, "grad_norm": 2.434133291244507, "learning_rate": 0.0001, "loss": 0.034, "step": 11050 }, { "epoch": 72.76315789473684, "grad_norm": 1.9157482385635376, "learning_rate": 0.0001, "loss": 0.0323, "step": 11060 }, { "epoch": 72.82894736842105, "grad_norm": 1.6790140867233276, "learning_rate": 0.0001, "loss": 0.0279, "step": 11070 }, { "epoch": 72.89473684210526, "grad_norm": 2.2375621795654297, "learning_rate": 0.0001, "loss": 0.0311, "step": 11080 }, { "epoch": 72.96052631578948, "grad_norm": 1.626340627670288, "learning_rate": 0.0001, "loss": 0.0303, "step": 11090 }, { "epoch": 73.02631578947368, "grad_norm": 2.3413140773773193, "learning_rate": 0.0001, "loss": 0.0295, "step": 11100 }, { "epoch": 73.09210526315789, "grad_norm": 2.1686933040618896, "learning_rate": 0.0001, "loss": 0.0332, "step": 11110 }, { "epoch": 73.15789473684211, "grad_norm": 2.5606400966644287, "learning_rate": 0.0001, "loss": 0.0291, "step": 11120 }, { "epoch": 73.22368421052632, "grad_norm": 1.8890550136566162, "learning_rate": 0.0001, "loss": 0.0337, "step": 11130 }, { "epoch": 73.28947368421052, "grad_norm": 2.093996286392212, "learning_rate": 0.0001, "loss": 0.0332, "step": 11140 }, { "epoch": 73.35526315789474, "grad_norm": 2.0110158920288086, "learning_rate": 0.0001, "loss": 0.0317, "step": 11150 }, { "epoch": 73.42105263157895, "grad_norm": 2.1425280570983887, "learning_rate": 0.0001, "loss": 0.0297, "step": 11160 }, { "epoch": 73.48684210526316, "grad_norm": 1.6674917936325073, "learning_rate": 0.0001, "loss": 0.0313, "step": 11170 }, { "epoch": 73.55263157894737, "grad_norm": 2.0728542804718018, "learning_rate": 0.0001, "loss": 0.0301, "step": 11180 }, { "epoch": 73.61842105263158, "grad_norm": 2.287900686264038, "learning_rate": 0.0001, "loss": 0.0319, "step": 11190 }, { "epoch": 73.6842105263158, "grad_norm": 1.950493574142456, "learning_rate": 0.0001, "loss": 0.0316, "step": 11200 }, { "epoch": 73.75, "grad_norm": 2.0623505115509033, "learning_rate": 0.0001, "loss": 0.0314, "step": 11210 }, { "epoch": 73.8157894736842, "grad_norm": 2.222360134124756, "learning_rate": 0.0001, "loss": 0.0329, "step": 11220 }, { "epoch": 73.88157894736842, "grad_norm": 2.407506227493286, "learning_rate": 0.0001, "loss": 0.0306, "step": 11230 }, { "epoch": 73.94736842105263, "grad_norm": 2.1076436042785645, "learning_rate": 0.0001, "loss": 0.0315, "step": 11240 }, { "epoch": 74.01315789473684, "grad_norm": 2.26798152923584, "learning_rate": 0.0001, "loss": 0.0286, "step": 11250 }, { "epoch": 74.07894736842105, "grad_norm": 1.7559285163879395, "learning_rate": 0.0001, "loss": 0.0312, "step": 11260 }, { "epoch": 74.14473684210526, "grad_norm": 2.2323734760284424, "learning_rate": 0.0001, "loss": 0.0327, "step": 11270 }, { "epoch": 74.21052631578948, "grad_norm": 1.947619080543518, "learning_rate": 0.0001, "loss": 0.0295, "step": 11280 }, { "epoch": 74.27631578947368, "grad_norm": 1.6197162866592407, "learning_rate": 0.0001, "loss": 0.0336, "step": 11290 }, { "epoch": 74.34210526315789, "grad_norm": 1.9003392457962036, "learning_rate": 0.0001, "loss": 0.0322, "step": 11300 }, { "epoch": 74.40789473684211, "grad_norm": 2.167466640472412, "learning_rate": 0.0001, "loss": 0.0303, "step": 11310 }, { "epoch": 74.47368421052632, "grad_norm": 2.373649835586548, "learning_rate": 0.0001, "loss": 0.0305, "step": 11320 }, { "epoch": 74.53947368421052, "grad_norm": 2.2646172046661377, "learning_rate": 0.0001, "loss": 0.0294, "step": 11330 }, { "epoch": 74.60526315789474, "grad_norm": 1.9604682922363281, "learning_rate": 0.0001, "loss": 0.0294, "step": 11340 }, { "epoch": 74.67105263157895, "grad_norm": 1.860137939453125, "learning_rate": 0.0001, "loss": 0.0302, "step": 11350 }, { "epoch": 74.73684210526316, "grad_norm": 1.8657748699188232, "learning_rate": 0.0001, "loss": 0.0315, "step": 11360 }, { "epoch": 74.80263157894737, "grad_norm": 1.8343254327774048, "learning_rate": 0.0001, "loss": 0.0338, "step": 11370 }, { "epoch": 74.86842105263158, "grad_norm": 2.3574209213256836, "learning_rate": 0.0001, "loss": 0.0292, "step": 11380 }, { "epoch": 74.9342105263158, "grad_norm": 2.115976572036743, "learning_rate": 0.0001, "loss": 0.0335, "step": 11390 }, { "epoch": 75.0, "grad_norm": 2.0765559673309326, "learning_rate": 0.0001, "loss": 0.0312, "step": 11400 }, { "epoch": 75.0657894736842, "grad_norm": 2.014866590499878, "learning_rate": 0.0001, "loss": 0.0285, "step": 11410 }, { "epoch": 75.13157894736842, "grad_norm": 2.1789872646331787, "learning_rate": 0.0001, "loss": 0.0284, "step": 11420 }, { "epoch": 75.19736842105263, "grad_norm": 1.9266473054885864, "learning_rate": 0.0001, "loss": 0.0302, "step": 11430 }, { "epoch": 75.26315789473684, "grad_norm": 2.395528554916382, "learning_rate": 0.0001, "loss": 0.0283, "step": 11440 }, { "epoch": 75.32894736842105, "grad_norm": 2.5278120040893555, "learning_rate": 0.0001, "loss": 0.0298, "step": 11450 }, { "epoch": 75.39473684210526, "grad_norm": 2.1051442623138428, "learning_rate": 0.0001, "loss": 0.0279, "step": 11460 }, { "epoch": 75.46052631578948, "grad_norm": 1.9000073671340942, "learning_rate": 0.0001, "loss": 0.0306, "step": 11470 }, { "epoch": 75.52631578947368, "grad_norm": 1.973384141921997, "learning_rate": 0.0001, "loss": 0.0334, "step": 11480 }, { "epoch": 75.59210526315789, "grad_norm": 2.174776792526245, "learning_rate": 0.0001, "loss": 0.0329, "step": 11490 }, { "epoch": 75.65789473684211, "grad_norm": 1.6336909532546997, "learning_rate": 0.0001, "loss": 0.0291, "step": 11500 }, { "epoch": 75.72368421052632, "grad_norm": 1.6577805280685425, "learning_rate": 0.0001, "loss": 0.0377, "step": 11510 }, { "epoch": 75.78947368421052, "grad_norm": 1.8932713270187378, "learning_rate": 0.0001, "loss": 0.0318, "step": 11520 }, { "epoch": 75.85526315789474, "grad_norm": 1.4841527938842773, "learning_rate": 0.0001, "loss": 0.0314, "step": 11530 }, { "epoch": 75.92105263157895, "grad_norm": 1.922417163848877, "learning_rate": 0.0001, "loss": 0.0308, "step": 11540 }, { "epoch": 75.98684210526316, "grad_norm": 2.405369997024536, "learning_rate": 0.0001, "loss": 0.0336, "step": 11550 }, { "epoch": 76.05263157894737, "grad_norm": 1.8680914640426636, "learning_rate": 0.0001, "loss": 0.034, "step": 11560 }, { "epoch": 76.11842105263158, "grad_norm": 1.8438783884048462, "learning_rate": 0.0001, "loss": 0.0307, "step": 11570 }, { "epoch": 76.1842105263158, "grad_norm": 2.4350788593292236, "learning_rate": 0.0001, "loss": 0.0301, "step": 11580 }, { "epoch": 76.25, "grad_norm": 1.4686951637268066, "learning_rate": 0.0001, "loss": 0.0334, "step": 11590 }, { "epoch": 76.3157894736842, "grad_norm": 2.0404906272888184, "learning_rate": 0.0001, "loss": 0.0314, "step": 11600 }, { "epoch": 76.38157894736842, "grad_norm": 1.3880646228790283, "learning_rate": 0.0001, "loss": 0.0273, "step": 11610 }, { "epoch": 76.44736842105263, "grad_norm": 1.80776047706604, "learning_rate": 0.0001, "loss": 0.0319, "step": 11620 }, { "epoch": 76.51315789473684, "grad_norm": 1.8519164323806763, "learning_rate": 0.0001, "loss": 0.0348, "step": 11630 }, { "epoch": 76.57894736842105, "grad_norm": 1.441853404045105, "learning_rate": 0.0001, "loss": 0.0307, "step": 11640 }, { "epoch": 76.64473684210526, "grad_norm": 1.9860663414001465, "learning_rate": 0.0001, "loss": 0.0294, "step": 11650 }, { "epoch": 76.71052631578948, "grad_norm": 2.1795623302459717, "learning_rate": 0.0001, "loss": 0.0315, "step": 11660 }, { "epoch": 76.77631578947368, "grad_norm": 1.922486662864685, "learning_rate": 0.0001, "loss": 0.0323, "step": 11670 }, { "epoch": 76.84210526315789, "grad_norm": 1.9533334970474243, "learning_rate": 0.0001, "loss": 0.032, "step": 11680 }, { "epoch": 76.90789473684211, "grad_norm": 1.546584963798523, "learning_rate": 0.0001, "loss": 0.0299, "step": 11690 }, { "epoch": 76.97368421052632, "grad_norm": 1.790946364402771, "learning_rate": 0.0001, "loss": 0.0345, "step": 11700 }, { "epoch": 77.03947368421052, "grad_norm": 1.6082978248596191, "learning_rate": 0.0001, "loss": 0.0316, "step": 11710 }, { "epoch": 77.10526315789474, "grad_norm": 1.8653497695922852, "learning_rate": 0.0001, "loss": 0.0327, "step": 11720 }, { "epoch": 77.17105263157895, "grad_norm": 2.02057147026062, "learning_rate": 0.0001, "loss": 0.0309, "step": 11730 }, { "epoch": 77.23684210526316, "grad_norm": 2.486804723739624, "learning_rate": 0.0001, "loss": 0.0308, "step": 11740 }, { "epoch": 77.30263157894737, "grad_norm": 1.8372561931610107, "learning_rate": 0.0001, "loss": 0.0325, "step": 11750 }, { "epoch": 77.36842105263158, "grad_norm": 1.783441424369812, "learning_rate": 0.0001, "loss": 0.0313, "step": 11760 }, { "epoch": 77.4342105263158, "grad_norm": 1.6678776741027832, "learning_rate": 0.0001, "loss": 0.0308, "step": 11770 }, { "epoch": 77.5, "grad_norm": 2.1685516834259033, "learning_rate": 0.0001, "loss": 0.0275, "step": 11780 }, { "epoch": 77.5657894736842, "grad_norm": 2.4455788135528564, "learning_rate": 0.0001, "loss": 0.0297, "step": 11790 }, { "epoch": 77.63157894736842, "grad_norm": 2.2124006748199463, "learning_rate": 0.0001, "loss": 0.0295, "step": 11800 }, { "epoch": 77.69736842105263, "grad_norm": 2.0547845363616943, "learning_rate": 0.0001, "loss": 0.0309, "step": 11810 }, { "epoch": 77.76315789473684, "grad_norm": 2.1371893882751465, "learning_rate": 0.0001, "loss": 0.0306, "step": 11820 }, { "epoch": 77.82894736842105, "grad_norm": 1.9479222297668457, "learning_rate": 0.0001, "loss": 0.0323, "step": 11830 }, { "epoch": 77.89473684210526, "grad_norm": 1.858992099761963, "learning_rate": 0.0001, "loss": 0.0302, "step": 11840 }, { "epoch": 77.96052631578948, "grad_norm": 1.5059412717819214, "learning_rate": 0.0001, "loss": 0.0325, "step": 11850 }, { "epoch": 78.02631578947368, "grad_norm": 1.696049451828003, "learning_rate": 0.0001, "loss": 0.034, "step": 11860 }, { "epoch": 78.09210526315789, "grad_norm": 1.9519498348236084, "learning_rate": 0.0001, "loss": 0.0282, "step": 11870 }, { "epoch": 78.15789473684211, "grad_norm": 1.7731332778930664, "learning_rate": 0.0001, "loss": 0.0325, "step": 11880 }, { "epoch": 78.22368421052632, "grad_norm": 1.6738544702529907, "learning_rate": 0.0001, "loss": 0.0326, "step": 11890 }, { "epoch": 78.28947368421052, "grad_norm": 2.0603187084198, "learning_rate": 0.0001, "loss": 0.0305, "step": 11900 }, { "epoch": 78.35526315789474, "grad_norm": 2.2363109588623047, "learning_rate": 0.0001, "loss": 0.028, "step": 11910 }, { "epoch": 78.42105263157895, "grad_norm": 1.8674083948135376, "learning_rate": 0.0001, "loss": 0.031, "step": 11920 }, { "epoch": 78.48684210526316, "grad_norm": 2.1834182739257812, "learning_rate": 0.0001, "loss": 0.0305, "step": 11930 }, { "epoch": 78.55263157894737, "grad_norm": 1.75481116771698, "learning_rate": 0.0001, "loss": 0.0378, "step": 11940 }, { "epoch": 78.61842105263158, "grad_norm": 1.7296005487442017, "learning_rate": 0.0001, "loss": 0.0278, "step": 11950 }, { "epoch": 78.6842105263158, "grad_norm": 2.218533515930176, "learning_rate": 0.0001, "loss": 0.0312, "step": 11960 }, { "epoch": 78.75, "grad_norm": 1.5156240463256836, "learning_rate": 0.0001, "loss": 0.0325, "step": 11970 }, { "epoch": 78.8157894736842, "grad_norm": 1.9989612102508545, "learning_rate": 0.0001, "loss": 0.0308, "step": 11980 }, { "epoch": 78.88157894736842, "grad_norm": 2.0327396392822266, "learning_rate": 0.0001, "loss": 0.0303, "step": 11990 }, { "epoch": 78.94736842105263, "grad_norm": 2.1482322216033936, "learning_rate": 0.0001, "loss": 0.0306, "step": 12000 }, { "epoch": 79.01315789473684, "grad_norm": 1.5345418453216553, "learning_rate": 0.0001, "loss": 0.0312, "step": 12010 }, { "epoch": 79.07894736842105, "grad_norm": 1.7488360404968262, "learning_rate": 0.0001, "loss": 0.0316, "step": 12020 }, { "epoch": 79.14473684210526, "grad_norm": 1.853369951248169, "learning_rate": 0.0001, "loss": 0.0299, "step": 12030 }, { "epoch": 79.21052631578948, "grad_norm": 1.81135892868042, "learning_rate": 0.0001, "loss": 0.0303, "step": 12040 }, { "epoch": 79.27631578947368, "grad_norm": 2.0191962718963623, "learning_rate": 0.0001, "loss": 0.0273, "step": 12050 }, { "epoch": 79.34210526315789, "grad_norm": 1.5767617225646973, "learning_rate": 0.0001, "loss": 0.0302, "step": 12060 }, { "epoch": 79.40789473684211, "grad_norm": 2.721212863922119, "learning_rate": 0.0001, "loss": 0.0279, "step": 12070 }, { "epoch": 79.47368421052632, "grad_norm": 2.155026912689209, "learning_rate": 0.0001, "loss": 0.0319, "step": 12080 }, { "epoch": 79.53947368421052, "grad_norm": 2.018669843673706, "learning_rate": 0.0001, "loss": 0.0292, "step": 12090 }, { "epoch": 79.60526315789474, "grad_norm": 1.7364435195922852, "learning_rate": 0.0001, "loss": 0.035, "step": 12100 }, { "epoch": 79.67105263157895, "grad_norm": 1.8277462720870972, "learning_rate": 0.0001, "loss": 0.0313, "step": 12110 }, { "epoch": 79.73684210526316, "grad_norm": 1.4780710935592651, "learning_rate": 0.0001, "loss": 0.0295, "step": 12120 }, { "epoch": 79.80263157894737, "grad_norm": 1.5706015825271606, "learning_rate": 0.0001, "loss": 0.0305, "step": 12130 }, { "epoch": 79.86842105263158, "grad_norm": 1.814844012260437, "learning_rate": 0.0001, "loss": 0.0284, "step": 12140 }, { "epoch": 79.9342105263158, "grad_norm": 1.9011082649230957, "learning_rate": 0.0001, "loss": 0.0307, "step": 12150 }, { "epoch": 80.0, "grad_norm": 1.7437013387680054, "learning_rate": 0.0001, "loss": 0.0357, "step": 12160 }, { "epoch": 80.0657894736842, "grad_norm": 1.8489567041397095, "learning_rate": 0.0001, "loss": 0.0278, "step": 12170 }, { "epoch": 80.13157894736842, "grad_norm": 1.9185560941696167, "learning_rate": 0.0001, "loss": 0.0296, "step": 12180 }, { "epoch": 80.19736842105263, "grad_norm": 1.9406415224075317, "learning_rate": 0.0001, "loss": 0.0308, "step": 12190 }, { "epoch": 80.26315789473684, "grad_norm": 1.9059523344039917, "learning_rate": 0.0001, "loss": 0.0314, "step": 12200 }, { "epoch": 80.32894736842105, "grad_norm": 1.7519546747207642, "learning_rate": 0.0001, "loss": 0.0313, "step": 12210 }, { "epoch": 80.39473684210526, "grad_norm": 1.8003839254379272, "learning_rate": 0.0001, "loss": 0.0318, "step": 12220 }, { "epoch": 80.46052631578948, "grad_norm": 1.8841017484664917, "learning_rate": 0.0001, "loss": 0.0325, "step": 12230 }, { "epoch": 80.52631578947368, "grad_norm": 1.9253623485565186, "learning_rate": 0.0001, "loss": 0.0328, "step": 12240 }, { "epoch": 80.59210526315789, "grad_norm": 1.7255377769470215, "learning_rate": 0.0001, "loss": 0.0317, "step": 12250 }, { "epoch": 80.65789473684211, "grad_norm": 1.7051324844360352, "learning_rate": 0.0001, "loss": 0.0305, "step": 12260 }, { "epoch": 80.72368421052632, "grad_norm": 1.8028048276901245, "learning_rate": 0.0001, "loss": 0.0354, "step": 12270 }, { "epoch": 80.78947368421052, "grad_norm": 1.9010478258132935, "learning_rate": 0.0001, "loss": 0.031, "step": 12280 }, { "epoch": 80.85526315789474, "grad_norm": 2.0443882942199707, "learning_rate": 0.0001, "loss": 0.0285, "step": 12290 }, { "epoch": 80.92105263157895, "grad_norm": 2.1045596599578857, "learning_rate": 0.0001, "loss": 0.0315, "step": 12300 }, { "epoch": 80.98684210526316, "grad_norm": 2.2704505920410156, "learning_rate": 0.0001, "loss": 0.0298, "step": 12310 }, { "epoch": 81.05263157894737, "grad_norm": 2.170691728591919, "learning_rate": 0.0001, "loss": 0.0326, "step": 12320 }, { "epoch": 81.11842105263158, "grad_norm": 2.419440746307373, "learning_rate": 0.0001, "loss": 0.0303, "step": 12330 }, { "epoch": 81.1842105263158, "grad_norm": 2.45249342918396, "learning_rate": 0.0001, "loss": 0.0283, "step": 12340 }, { "epoch": 81.25, "grad_norm": 1.9213991165161133, "learning_rate": 0.0001, "loss": 0.0286, "step": 12350 }, { "epoch": 81.3157894736842, "grad_norm": 1.3771448135375977, "learning_rate": 0.0001, "loss": 0.0264, "step": 12360 }, { "epoch": 81.38157894736842, "grad_norm": 2.076711416244507, "learning_rate": 0.0001, "loss": 0.0291, "step": 12370 }, { "epoch": 81.44736842105263, "grad_norm": 1.606643557548523, "learning_rate": 0.0001, "loss": 0.0324, "step": 12380 }, { "epoch": 81.51315789473684, "grad_norm": 1.7662115097045898, "learning_rate": 0.0001, "loss": 0.0332, "step": 12390 }, { "epoch": 81.57894736842105, "grad_norm": 1.5275945663452148, "learning_rate": 0.0001, "loss": 0.0278, "step": 12400 }, { "epoch": 81.64473684210526, "grad_norm": 1.5726087093353271, "learning_rate": 0.0001, "loss": 0.0282, "step": 12410 }, { "epoch": 81.71052631578948, "grad_norm": 1.5958622694015503, "learning_rate": 0.0001, "loss": 0.0313, "step": 12420 }, { "epoch": 81.77631578947368, "grad_norm": 1.9642810821533203, "learning_rate": 0.0001, "loss": 0.0298, "step": 12430 }, { "epoch": 81.84210526315789, "grad_norm": 2.109614610671997, "learning_rate": 0.0001, "loss": 0.0278, "step": 12440 }, { "epoch": 81.90789473684211, "grad_norm": 1.9129739999771118, "learning_rate": 0.0001, "loss": 0.0338, "step": 12450 }, { "epoch": 81.97368421052632, "grad_norm": 1.9781088829040527, "learning_rate": 0.0001, "loss": 0.0302, "step": 12460 }, { "epoch": 82.03947368421052, "grad_norm": 1.6821966171264648, "learning_rate": 0.0001, "loss": 0.0347, "step": 12470 }, { "epoch": 82.10526315789474, "grad_norm": 1.872234582901001, "learning_rate": 0.0001, "loss": 0.0302, "step": 12480 }, { "epoch": 82.17105263157895, "grad_norm": 2.181631565093994, "learning_rate": 0.0001, "loss": 0.0304, "step": 12490 }, { "epoch": 82.23684210526316, "grad_norm": 2.1000289916992188, "learning_rate": 0.0001, "loss": 0.0315, "step": 12500 }, { "epoch": 82.30263157894737, "grad_norm": 1.7296178340911865, "learning_rate": 0.0001, "loss": 0.0299, "step": 12510 }, { "epoch": 82.36842105263158, "grad_norm": 1.427299976348877, "learning_rate": 0.0001, "loss": 0.0297, "step": 12520 }, { "epoch": 82.4342105263158, "grad_norm": 1.8688770532608032, "learning_rate": 0.0001, "loss": 0.0301, "step": 12530 }, { "epoch": 82.5, "grad_norm": 2.176377773284912, "learning_rate": 0.0001, "loss": 0.028, "step": 12540 }, { "epoch": 82.5657894736842, "grad_norm": 2.6286277770996094, "learning_rate": 0.0001, "loss": 0.0314, "step": 12550 }, { "epoch": 82.63157894736842, "grad_norm": 2.2303385734558105, "learning_rate": 0.0001, "loss": 0.0274, "step": 12560 }, { "epoch": 82.69736842105263, "grad_norm": 2.096400499343872, "learning_rate": 0.0001, "loss": 0.0289, "step": 12570 }, { "epoch": 82.76315789473684, "grad_norm": 2.2465031147003174, "learning_rate": 0.0001, "loss": 0.0307, "step": 12580 }, { "epoch": 82.82894736842105, "grad_norm": 1.879124641418457, "learning_rate": 0.0001, "loss": 0.0276, "step": 12590 }, { "epoch": 82.89473684210526, "grad_norm": 1.928536295890808, "learning_rate": 0.0001, "loss": 0.0278, "step": 12600 }, { "epoch": 82.96052631578948, "grad_norm": 1.6147547960281372, "learning_rate": 0.0001, "loss": 0.0264, "step": 12610 }, { "epoch": 83.02631578947368, "grad_norm": 1.7372201681137085, "learning_rate": 0.0001, "loss": 0.0304, "step": 12620 }, { "epoch": 83.09210526315789, "grad_norm": 2.426722526550293, "learning_rate": 0.0001, "loss": 0.0285, "step": 12630 }, { "epoch": 83.15789473684211, "grad_norm": 1.675399661064148, "learning_rate": 0.0001, "loss": 0.0319, "step": 12640 }, { "epoch": 83.22368421052632, "grad_norm": 1.7672971487045288, "learning_rate": 0.0001, "loss": 0.0321, "step": 12650 }, { "epoch": 83.28947368421052, "grad_norm": 2.098170757293701, "learning_rate": 0.0001, "loss": 0.0283, "step": 12660 }, { "epoch": 83.35526315789474, "grad_norm": 2.477064371109009, "learning_rate": 0.0001, "loss": 0.0309, "step": 12670 }, { "epoch": 83.42105263157895, "grad_norm": 1.5548956394195557, "learning_rate": 0.0001, "loss": 0.0264, "step": 12680 }, { "epoch": 83.48684210526316, "grad_norm": 2.0761024951934814, "learning_rate": 0.0001, "loss": 0.0294, "step": 12690 }, { "epoch": 83.55263157894737, "grad_norm": 1.7435846328735352, "learning_rate": 0.0001, "loss": 0.0329, "step": 12700 }, { "epoch": 83.61842105263158, "grad_norm": 1.4317384958267212, "learning_rate": 0.0001, "loss": 0.0271, "step": 12710 }, { "epoch": 83.6842105263158, "grad_norm": 1.8853839635849, "learning_rate": 0.0001, "loss": 0.0282, "step": 12720 }, { "epoch": 83.75, "grad_norm": 1.5135477781295776, "learning_rate": 0.0001, "loss": 0.0278, "step": 12730 }, { "epoch": 83.8157894736842, "grad_norm": 1.928153157234192, "learning_rate": 0.0001, "loss": 0.0299, "step": 12740 }, { "epoch": 83.88157894736842, "grad_norm": 1.6143454313278198, "learning_rate": 0.0001, "loss": 0.0304, "step": 12750 }, { "epoch": 83.94736842105263, "grad_norm": 1.5729345083236694, "learning_rate": 0.0001, "loss": 0.0319, "step": 12760 }, { "epoch": 84.01315789473684, "grad_norm": 1.7620943784713745, "learning_rate": 0.0001, "loss": 0.0288, "step": 12770 }, { "epoch": 84.07894736842105, "grad_norm": 2.241382122039795, "learning_rate": 0.0001, "loss": 0.0296, "step": 12780 }, { "epoch": 84.14473684210526, "grad_norm": 1.5702205896377563, "learning_rate": 0.0001, "loss": 0.0321, "step": 12790 }, { "epoch": 84.21052631578948, "grad_norm": 1.8133102655410767, "learning_rate": 0.0001, "loss": 0.0288, "step": 12800 }, { "epoch": 84.27631578947368, "grad_norm": 1.9252326488494873, "learning_rate": 0.0001, "loss": 0.0271, "step": 12810 }, { "epoch": 84.34210526315789, "grad_norm": 2.208315372467041, "learning_rate": 0.0001, "loss": 0.0286, "step": 12820 }, { "epoch": 84.40789473684211, "grad_norm": 1.8565562963485718, "learning_rate": 0.0001, "loss": 0.0292, "step": 12830 }, { "epoch": 84.47368421052632, "grad_norm": 1.8825724124908447, "learning_rate": 0.0001, "loss": 0.032, "step": 12840 }, { "epoch": 84.53947368421052, "grad_norm": 1.7567288875579834, "learning_rate": 0.0001, "loss": 0.0278, "step": 12850 }, { "epoch": 84.60526315789474, "grad_norm": 1.5186409950256348, "learning_rate": 0.0001, "loss": 0.0301, "step": 12860 }, { "epoch": 84.67105263157895, "grad_norm": 1.2818951606750488, "learning_rate": 0.0001, "loss": 0.0295, "step": 12870 }, { "epoch": 84.73684210526316, "grad_norm": 1.8219605684280396, "learning_rate": 0.0001, "loss": 0.0308, "step": 12880 }, { "epoch": 84.80263157894737, "grad_norm": 1.948317527770996, "learning_rate": 0.0001, "loss": 0.0305, "step": 12890 }, { "epoch": 84.86842105263158, "grad_norm": 1.590794563293457, "learning_rate": 0.0001, "loss": 0.0298, "step": 12900 }, { "epoch": 84.9342105263158, "grad_norm": 1.6171526908874512, "learning_rate": 0.0001, "loss": 0.0295, "step": 12910 }, { "epoch": 85.0, "grad_norm": 1.416867971420288, "learning_rate": 0.0001, "loss": 0.0281, "step": 12920 }, { "epoch": 85.0657894736842, "grad_norm": 1.7470098733901978, "learning_rate": 0.0001, "loss": 0.0295, "step": 12930 }, { "epoch": 85.13157894736842, "grad_norm": 1.666382074356079, "learning_rate": 0.0001, "loss": 0.0303, "step": 12940 }, { "epoch": 85.19736842105263, "grad_norm": 1.7826032638549805, "learning_rate": 0.0001, "loss": 0.027, "step": 12950 }, { "epoch": 85.26315789473684, "grad_norm": 2.0271217823028564, "learning_rate": 0.0001, "loss": 0.0331, "step": 12960 }, { "epoch": 85.32894736842105, "grad_norm": 2.0209686756134033, "learning_rate": 0.0001, "loss": 0.0321, "step": 12970 }, { "epoch": 85.39473684210526, "grad_norm": 1.9585864543914795, "learning_rate": 0.0001, "loss": 0.0311, "step": 12980 }, { "epoch": 85.46052631578948, "grad_norm": 1.8794137239456177, "learning_rate": 0.0001, "loss": 0.0278, "step": 12990 }, { "epoch": 85.52631578947368, "grad_norm": 1.6562678813934326, "learning_rate": 0.0001, "loss": 0.027, "step": 13000 }, { "epoch": 85.59210526315789, "grad_norm": 1.721022367477417, "learning_rate": 0.0001, "loss": 0.0256, "step": 13010 }, { "epoch": 85.65789473684211, "grad_norm": 2.0679917335510254, "learning_rate": 0.0001, "loss": 0.028, "step": 13020 }, { "epoch": 85.72368421052632, "grad_norm": 2.2101480960845947, "learning_rate": 0.0001, "loss": 0.033, "step": 13030 }, { "epoch": 85.78947368421052, "grad_norm": 2.5326573848724365, "learning_rate": 0.0001, "loss": 0.0351, "step": 13040 }, { "epoch": 85.85526315789474, "grad_norm": 1.9026429653167725, "learning_rate": 0.0001, "loss": 0.0261, "step": 13050 }, { "epoch": 85.92105263157895, "grad_norm": 2.2550907135009766, "learning_rate": 0.0001, "loss": 0.0271, "step": 13060 }, { "epoch": 85.98684210526316, "grad_norm": 1.961686372756958, "learning_rate": 0.0001, "loss": 0.0269, "step": 13070 }, { "epoch": 86.05263157894737, "grad_norm": 1.960931658744812, "learning_rate": 0.0001, "loss": 0.0258, "step": 13080 }, { "epoch": 86.11842105263158, "grad_norm": 2.150092363357544, "learning_rate": 0.0001, "loss": 0.0295, "step": 13090 }, { "epoch": 86.1842105263158, "grad_norm": 3.0566225051879883, "learning_rate": 0.0001, "loss": 0.0328, "step": 13100 }, { "epoch": 86.25, "grad_norm": 2.762721061706543, "learning_rate": 0.0001, "loss": 0.0292, "step": 13110 }, { "epoch": 86.3157894736842, "grad_norm": 2.4581403732299805, "learning_rate": 0.0001, "loss": 0.0285, "step": 13120 }, { "epoch": 86.38157894736842, "grad_norm": 2.441354513168335, "learning_rate": 0.0001, "loss": 0.029, "step": 13130 }, { "epoch": 86.44736842105263, "grad_norm": 2.6619606018066406, "learning_rate": 0.0001, "loss": 0.0307, "step": 13140 }, { "epoch": 86.51315789473684, "grad_norm": 2.3844335079193115, "learning_rate": 0.0001, "loss": 0.0258, "step": 13150 }, { "epoch": 86.57894736842105, "grad_norm": 2.026658058166504, "learning_rate": 0.0001, "loss": 0.0268, "step": 13160 }, { "epoch": 86.64473684210526, "grad_norm": 1.7314461469650269, "learning_rate": 0.0001, "loss": 0.0245, "step": 13170 }, { "epoch": 86.71052631578948, "grad_norm": 1.5041627883911133, "learning_rate": 0.0001, "loss": 0.0271, "step": 13180 }, { "epoch": 86.77631578947368, "grad_norm": 2.2199673652648926, "learning_rate": 0.0001, "loss": 0.0266, "step": 13190 }, { "epoch": 86.84210526315789, "grad_norm": 2.251030921936035, "learning_rate": 0.0001, "loss": 0.028, "step": 13200 }, { "epoch": 86.90789473684211, "grad_norm": 2.4143950939178467, "learning_rate": 0.0001, "loss": 0.0319, "step": 13210 }, { "epoch": 86.97368421052632, "grad_norm": 2.188281536102295, "learning_rate": 0.0001, "loss": 0.0263, "step": 13220 }, { "epoch": 87.03947368421052, "grad_norm": 2.020051956176758, "learning_rate": 0.0001, "loss": 0.0277, "step": 13230 }, { "epoch": 87.10526315789474, "grad_norm": 1.9716978073120117, "learning_rate": 0.0001, "loss": 0.0289, "step": 13240 }, { "epoch": 87.17105263157895, "grad_norm": 1.9438613653182983, "learning_rate": 0.0001, "loss": 0.0275, "step": 13250 }, { "epoch": 87.23684210526316, "grad_norm": 2.1312286853790283, "learning_rate": 0.0001, "loss": 0.0327, "step": 13260 }, { "epoch": 87.30263157894737, "grad_norm": 1.445481777191162, "learning_rate": 0.0001, "loss": 0.0251, "step": 13270 }, { "epoch": 87.36842105263158, "grad_norm": 1.5866940021514893, "learning_rate": 0.0001, "loss": 0.0293, "step": 13280 }, { "epoch": 87.4342105263158, "grad_norm": 1.8600282669067383, "learning_rate": 0.0001, "loss": 0.0289, "step": 13290 }, { "epoch": 87.5, "grad_norm": 2.1697843074798584, "learning_rate": 0.0001, "loss": 0.0292, "step": 13300 }, { "epoch": 87.5657894736842, "grad_norm": 2.0753848552703857, "learning_rate": 0.0001, "loss": 0.0261, "step": 13310 }, { "epoch": 87.63157894736842, "grad_norm": 1.3753223419189453, "learning_rate": 0.0001, "loss": 0.0286, "step": 13320 }, { "epoch": 87.69736842105263, "grad_norm": 1.8515719175338745, "learning_rate": 0.0001, "loss": 0.0311, "step": 13330 }, { "epoch": 87.76315789473684, "grad_norm": 1.6124696731567383, "learning_rate": 0.0001, "loss": 0.0283, "step": 13340 }, { "epoch": 87.82894736842105, "grad_norm": 1.9067317247390747, "learning_rate": 0.0001, "loss": 0.0274, "step": 13350 }, { "epoch": 87.89473684210526, "grad_norm": 1.5507631301879883, "learning_rate": 0.0001, "loss": 0.0281, "step": 13360 }, { "epoch": 87.96052631578948, "grad_norm": 2.281433343887329, "learning_rate": 0.0001, "loss": 0.029, "step": 13370 }, { "epoch": 88.02631578947368, "grad_norm": 2.5696237087249756, "learning_rate": 0.0001, "loss": 0.0282, "step": 13380 }, { "epoch": 88.09210526315789, "grad_norm": 1.8510385751724243, "learning_rate": 0.0001, "loss": 0.0293, "step": 13390 }, { "epoch": 88.15789473684211, "grad_norm": 2.1197049617767334, "learning_rate": 0.0001, "loss": 0.0266, "step": 13400 }, { "epoch": 88.22368421052632, "grad_norm": 1.4444364309310913, "learning_rate": 0.0001, "loss": 0.029, "step": 13410 }, { "epoch": 88.28947368421052, "grad_norm": 1.5638121366500854, "learning_rate": 0.0001, "loss": 0.0288, "step": 13420 }, { "epoch": 88.35526315789474, "grad_norm": 2.076504945755005, "learning_rate": 0.0001, "loss": 0.0288, "step": 13430 }, { "epoch": 88.42105263157895, "grad_norm": 1.9657983779907227, "learning_rate": 0.0001, "loss": 0.0306, "step": 13440 }, { "epoch": 88.48684210526316, "grad_norm": 2.0577805042266846, "learning_rate": 0.0001, "loss": 0.0359, "step": 13450 }, { "epoch": 88.55263157894737, "grad_norm": 1.487951636314392, "learning_rate": 0.0001, "loss": 0.027, "step": 13460 }, { "epoch": 88.61842105263158, "grad_norm": 1.9243242740631104, "learning_rate": 0.0001, "loss": 0.0305, "step": 13470 }, { "epoch": 88.6842105263158, "grad_norm": 2.0615384578704834, "learning_rate": 0.0001, "loss": 0.0289, "step": 13480 }, { "epoch": 88.75, "grad_norm": 2.29386568069458, "learning_rate": 0.0001, "loss": 0.0281, "step": 13490 }, { "epoch": 88.8157894736842, "grad_norm": 1.9896496534347534, "learning_rate": 0.0001, "loss": 0.0252, "step": 13500 }, { "epoch": 88.88157894736842, "grad_norm": 1.4538456201553345, "learning_rate": 0.0001, "loss": 0.0261, "step": 13510 }, { "epoch": 88.94736842105263, "grad_norm": 2.3025753498077393, "learning_rate": 0.0001, "loss": 0.0292, "step": 13520 }, { "epoch": 89.01315789473684, "grad_norm": 1.9081006050109863, "learning_rate": 0.0001, "loss": 0.0287, "step": 13530 }, { "epoch": 89.07894736842105, "grad_norm": 2.106062412261963, "learning_rate": 0.0001, "loss": 0.0284, "step": 13540 }, { "epoch": 89.14473684210526, "grad_norm": 2.2155580520629883, "learning_rate": 0.0001, "loss": 0.0297, "step": 13550 }, { "epoch": 89.21052631578948, "grad_norm": 1.976468801498413, "learning_rate": 0.0001, "loss": 0.0267, "step": 13560 }, { "epoch": 89.27631578947368, "grad_norm": 1.7300221920013428, "learning_rate": 0.0001, "loss": 0.0249, "step": 13570 }, { "epoch": 89.34210526315789, "grad_norm": 2.4204659461975098, "learning_rate": 0.0001, "loss": 0.0329, "step": 13580 }, { "epoch": 89.40789473684211, "grad_norm": 2.0575718879699707, "learning_rate": 0.0001, "loss": 0.0306, "step": 13590 }, { "epoch": 89.47368421052632, "grad_norm": 2.151022434234619, "learning_rate": 0.0001, "loss": 0.025, "step": 13600 }, { "epoch": 89.53947368421052, "grad_norm": 1.6772764921188354, "learning_rate": 0.0001, "loss": 0.0269, "step": 13610 }, { "epoch": 89.60526315789474, "grad_norm": 1.7545968294143677, "learning_rate": 0.0001, "loss": 0.0262, "step": 13620 }, { "epoch": 89.67105263157895, "grad_norm": 1.9663658142089844, "learning_rate": 0.0001, "loss": 0.0274, "step": 13630 }, { "epoch": 89.73684210526316, "grad_norm": 1.7301418781280518, "learning_rate": 0.0001, "loss": 0.0301, "step": 13640 }, { "epoch": 89.80263157894737, "grad_norm": 1.7092499732971191, "learning_rate": 0.0001, "loss": 0.0305, "step": 13650 }, { "epoch": 89.86842105263158, "grad_norm": 1.8346612453460693, "learning_rate": 0.0001, "loss": 0.0271, "step": 13660 }, { "epoch": 89.9342105263158, "grad_norm": 2.477088212966919, "learning_rate": 0.0001, "loss": 0.0277, "step": 13670 }, { "epoch": 90.0, "grad_norm": 2.1571311950683594, "learning_rate": 0.0001, "loss": 0.0259, "step": 13680 }, { "epoch": 90.0657894736842, "grad_norm": 1.5478508472442627, "learning_rate": 0.0001, "loss": 0.0249, "step": 13690 }, { "epoch": 90.13157894736842, "grad_norm": 1.9429690837860107, "learning_rate": 0.0001, "loss": 0.0272, "step": 13700 }, { "epoch": 90.19736842105263, "grad_norm": 1.6077224016189575, "learning_rate": 0.0001, "loss": 0.031, "step": 13710 }, { "epoch": 90.26315789473684, "grad_norm": 1.9207209348678589, "learning_rate": 0.0001, "loss": 0.0252, "step": 13720 }, { "epoch": 90.32894736842105, "grad_norm": 1.244343638420105, "learning_rate": 0.0001, "loss": 0.0264, "step": 13730 }, { "epoch": 90.39473684210526, "grad_norm": 1.5334579944610596, "learning_rate": 0.0001, "loss": 0.0285, "step": 13740 }, { "epoch": 90.46052631578948, "grad_norm": 2.1468112468719482, "learning_rate": 0.0001, "loss": 0.0322, "step": 13750 }, { "epoch": 90.52631578947368, "grad_norm": 1.7345777750015259, "learning_rate": 0.0001, "loss": 0.031, "step": 13760 }, { "epoch": 90.59210526315789, "grad_norm": 2.029191493988037, "learning_rate": 0.0001, "loss": 0.0268, "step": 13770 }, { "epoch": 90.65789473684211, "grad_norm": 1.6793742179870605, "learning_rate": 0.0001, "loss": 0.0288, "step": 13780 }, { "epoch": 90.72368421052632, "grad_norm": 2.1732263565063477, "learning_rate": 0.0001, "loss": 0.0297, "step": 13790 }, { "epoch": 90.78947368421052, "grad_norm": 2.0500664710998535, "learning_rate": 0.0001, "loss": 0.0339, "step": 13800 }, { "epoch": 90.85526315789474, "grad_norm": 1.9515972137451172, "learning_rate": 0.0001, "loss": 0.0265, "step": 13810 }, { "epoch": 90.92105263157895, "grad_norm": 1.968629002571106, "learning_rate": 0.0001, "loss": 0.0313, "step": 13820 }, { "epoch": 90.98684210526316, "grad_norm": 1.656190037727356, "learning_rate": 0.0001, "loss": 0.0278, "step": 13830 }, { "epoch": 91.05263157894737, "grad_norm": 1.896385908126831, "learning_rate": 0.0001, "loss": 0.03, "step": 13840 }, { "epoch": 91.11842105263158, "grad_norm": 1.747622013092041, "learning_rate": 0.0001, "loss": 0.0352, "step": 13850 }, { "epoch": 91.1842105263158, "grad_norm": 2.0331578254699707, "learning_rate": 0.0001, "loss": 0.0302, "step": 13860 }, { "epoch": 91.25, "grad_norm": 2.143805503845215, "learning_rate": 0.0001, "loss": 0.0247, "step": 13870 }, { "epoch": 91.3157894736842, "grad_norm": 1.5800591707229614, "learning_rate": 0.0001, "loss": 0.0272, "step": 13880 }, { "epoch": 91.38157894736842, "grad_norm": 1.9809304475784302, "learning_rate": 0.0001, "loss": 0.0298, "step": 13890 }, { "epoch": 91.44736842105263, "grad_norm": 2.080841064453125, "learning_rate": 0.0001, "loss": 0.0288, "step": 13900 }, { "epoch": 91.51315789473684, "grad_norm": 1.6849013566970825, "learning_rate": 0.0001, "loss": 0.0274, "step": 13910 }, { "epoch": 91.57894736842105, "grad_norm": 1.3943883180618286, "learning_rate": 0.0001, "loss": 0.025, "step": 13920 }, { "epoch": 91.64473684210526, "grad_norm": 2.1087758541107178, "learning_rate": 0.0001, "loss": 0.0271, "step": 13930 }, { "epoch": 91.71052631578948, "grad_norm": 2.4073121547698975, "learning_rate": 0.0001, "loss": 0.025, "step": 13940 }, { "epoch": 91.77631578947368, "grad_norm": 1.8754463195800781, "learning_rate": 0.0001, "loss": 0.0298, "step": 13950 }, { "epoch": 91.84210526315789, "grad_norm": 2.5178284645080566, "learning_rate": 0.0001, "loss": 0.0275, "step": 13960 }, { "epoch": 91.90789473684211, "grad_norm": 1.903548240661621, "learning_rate": 0.0001, "loss": 0.0298, "step": 13970 }, { "epoch": 91.97368421052632, "grad_norm": 1.7104161977767944, "learning_rate": 0.0001, "loss": 0.0274, "step": 13980 }, { "epoch": 92.03947368421052, "grad_norm": 1.7793035507202148, "learning_rate": 0.0001, "loss": 0.026, "step": 13990 }, { "epoch": 92.10526315789474, "grad_norm": 1.8837029933929443, "learning_rate": 0.0001, "loss": 0.0303, "step": 14000 }, { "epoch": 92.17105263157895, "grad_norm": 1.856390357017517, "learning_rate": 0.0001, "loss": 0.0283, "step": 14010 }, { "epoch": 92.23684210526316, "grad_norm": 2.1468257904052734, "learning_rate": 0.0001, "loss": 0.0288, "step": 14020 }, { "epoch": 92.30263157894737, "grad_norm": 2.2803831100463867, "learning_rate": 0.0001, "loss": 0.0264, "step": 14030 }, { "epoch": 92.36842105263158, "grad_norm": 2.045029878616333, "learning_rate": 0.0001, "loss": 0.03, "step": 14040 }, { "epoch": 92.4342105263158, "grad_norm": 1.394469141960144, "learning_rate": 0.0001, "loss": 0.0249, "step": 14050 }, { "epoch": 92.5, "grad_norm": 1.7581952810287476, "learning_rate": 0.0001, "loss": 0.0289, "step": 14060 }, { "epoch": 92.5657894736842, "grad_norm": 2.1290814876556396, "learning_rate": 0.0001, "loss": 0.0313, "step": 14070 }, { "epoch": 92.63157894736842, "grad_norm": 2.269195556640625, "learning_rate": 0.0001, "loss": 0.0307, "step": 14080 }, { "epoch": 92.69736842105263, "grad_norm": 1.862597107887268, "learning_rate": 0.0001, "loss": 0.0259, "step": 14090 }, { "epoch": 92.76315789473684, "grad_norm": 1.8109031915664673, "learning_rate": 0.0001, "loss": 0.0265, "step": 14100 }, { "epoch": 92.82894736842105, "grad_norm": 1.9729983806610107, "learning_rate": 0.0001, "loss": 0.0297, "step": 14110 }, { "epoch": 92.89473684210526, "grad_norm": 2.2512547969818115, "learning_rate": 0.0001, "loss": 0.0245, "step": 14120 }, { "epoch": 92.96052631578948, "grad_norm": 1.999061107635498, "learning_rate": 0.0001, "loss": 0.0238, "step": 14130 }, { "epoch": 93.02631578947368, "grad_norm": 1.9152888059616089, "learning_rate": 0.0001, "loss": 0.0313, "step": 14140 }, { "epoch": 93.09210526315789, "grad_norm": 2.020137310028076, "learning_rate": 0.0001, "loss": 0.0267, "step": 14150 }, { "epoch": 93.15789473684211, "grad_norm": 2.153320789337158, "learning_rate": 0.0001, "loss": 0.0274, "step": 14160 }, { "epoch": 93.22368421052632, "grad_norm": 2.1114258766174316, "learning_rate": 0.0001, "loss": 0.0314, "step": 14170 }, { "epoch": 93.28947368421052, "grad_norm": 2.2189090251922607, "learning_rate": 0.0001, "loss": 0.0284, "step": 14180 }, { "epoch": 93.35526315789474, "grad_norm": 1.8290343284606934, "learning_rate": 0.0001, "loss": 0.0274, "step": 14190 }, { "epoch": 93.42105263157895, "grad_norm": 1.778281331062317, "learning_rate": 0.0001, "loss": 0.0274, "step": 14200 }, { "epoch": 93.48684210526316, "grad_norm": 2.2371256351470947, "learning_rate": 0.0001, "loss": 0.0277, "step": 14210 }, { "epoch": 93.55263157894737, "grad_norm": 2.120712995529175, "learning_rate": 0.0001, "loss": 0.0264, "step": 14220 }, { "epoch": 93.61842105263158, "grad_norm": 1.602144479751587, "learning_rate": 0.0001, "loss": 0.0285, "step": 14230 }, { "epoch": 93.6842105263158, "grad_norm": 1.777031421661377, "learning_rate": 0.0001, "loss": 0.0255, "step": 14240 }, { "epoch": 93.75, "grad_norm": 2.1054422855377197, "learning_rate": 0.0001, "loss": 0.0284, "step": 14250 }, { "epoch": 93.8157894736842, "grad_norm": 2.1115353107452393, "learning_rate": 0.0001, "loss": 0.0265, "step": 14260 }, { "epoch": 93.88157894736842, "grad_norm": 1.898567795753479, "learning_rate": 0.0001, "loss": 0.0275, "step": 14270 }, { "epoch": 93.94736842105263, "grad_norm": 1.669534683227539, "learning_rate": 0.0001, "loss": 0.0269, "step": 14280 }, { "epoch": 94.01315789473684, "grad_norm": 1.6839345693588257, "learning_rate": 0.0001, "loss": 0.0276, "step": 14290 }, { "epoch": 94.07894736842105, "grad_norm": 2.027613639831543, "learning_rate": 0.0001, "loss": 0.0314, "step": 14300 }, { "epoch": 94.14473684210526, "grad_norm": 2.3967013359069824, "learning_rate": 0.0001, "loss": 0.0265, "step": 14310 }, { "epoch": 94.21052631578948, "grad_norm": 1.9463882446289062, "learning_rate": 0.0001, "loss": 0.0252, "step": 14320 }, { "epoch": 94.27631578947368, "grad_norm": 1.6494497060775757, "learning_rate": 0.0001, "loss": 0.0263, "step": 14330 }, { "epoch": 94.34210526315789, "grad_norm": 1.867606520652771, "learning_rate": 0.0001, "loss": 0.0285, "step": 14340 }, { "epoch": 94.40789473684211, "grad_norm": 1.694853663444519, "learning_rate": 0.0001, "loss": 0.0277, "step": 14350 }, { "epoch": 94.47368421052632, "grad_norm": 1.6939771175384521, "learning_rate": 0.0001, "loss": 0.0288, "step": 14360 }, { "epoch": 94.53947368421052, "grad_norm": 1.7957245111465454, "learning_rate": 0.0001, "loss": 0.0244, "step": 14370 }, { "epoch": 94.60526315789474, "grad_norm": 1.569079875946045, "learning_rate": 0.0001, "loss": 0.0283, "step": 14380 }, { "epoch": 94.67105263157895, "grad_norm": 1.9433953762054443, "learning_rate": 0.0001, "loss": 0.0273, "step": 14390 }, { "epoch": 94.73684210526316, "grad_norm": 2.097531318664551, "learning_rate": 0.0001, "loss": 0.0262, "step": 14400 }, { "epoch": 94.80263157894737, "grad_norm": 2.1827645301818848, "learning_rate": 0.0001, "loss": 0.0342, "step": 14410 }, { "epoch": 94.86842105263158, "grad_norm": 2.0006275177001953, "learning_rate": 0.0001, "loss": 0.0266, "step": 14420 }, { "epoch": 94.9342105263158, "grad_norm": 1.9928101301193237, "learning_rate": 0.0001, "loss": 0.026, "step": 14430 }, { "epoch": 95.0, "grad_norm": 2.7189114093780518, "learning_rate": 0.0001, "loss": 0.0288, "step": 14440 }, { "epoch": 95.0657894736842, "grad_norm": 1.9546595811843872, "learning_rate": 0.0001, "loss": 0.0275, "step": 14450 }, { "epoch": 95.13157894736842, "grad_norm": 1.9585716724395752, "learning_rate": 0.0001, "loss": 0.0272, "step": 14460 }, { "epoch": 95.19736842105263, "grad_norm": 2.2246456146240234, "learning_rate": 0.0001, "loss": 0.0267, "step": 14470 }, { "epoch": 95.26315789473684, "grad_norm": 2.017874002456665, "learning_rate": 0.0001, "loss": 0.0271, "step": 14480 }, { "epoch": 95.32894736842105, "grad_norm": 1.6511551141738892, "learning_rate": 0.0001, "loss": 0.0283, "step": 14490 }, { "epoch": 95.39473684210526, "grad_norm": 1.9135774374008179, "learning_rate": 0.0001, "loss": 0.0266, "step": 14500 }, { "epoch": 95.46052631578948, "grad_norm": 1.6640851497650146, "learning_rate": 0.0001, "loss": 0.0296, "step": 14510 }, { "epoch": 95.52631578947368, "grad_norm": 2.0358288288116455, "learning_rate": 0.0001, "loss": 0.0266, "step": 14520 }, { "epoch": 95.59210526315789, "grad_norm": 1.9654853343963623, "learning_rate": 0.0001, "loss": 0.0289, "step": 14530 }, { "epoch": 95.65789473684211, "grad_norm": 2.1904754638671875, "learning_rate": 0.0001, "loss": 0.0283, "step": 14540 }, { "epoch": 95.72368421052632, "grad_norm": 2.1774344444274902, "learning_rate": 0.0001, "loss": 0.0289, "step": 14550 }, { "epoch": 95.78947368421052, "grad_norm": 2.2308552265167236, "learning_rate": 0.0001, "loss": 0.0241, "step": 14560 }, { "epoch": 95.85526315789474, "grad_norm": 1.651077389717102, "learning_rate": 0.0001, "loss": 0.031, "step": 14570 }, { "epoch": 95.92105263157895, "grad_norm": 2.4886536598205566, "learning_rate": 0.0001, "loss": 0.0271, "step": 14580 }, { "epoch": 95.98684210526316, "grad_norm": 2.293731689453125, "learning_rate": 0.0001, "loss": 0.0292, "step": 14590 }, { "epoch": 96.05263157894737, "grad_norm": 1.4591704607009888, "learning_rate": 0.0001, "loss": 0.0266, "step": 14600 }, { "epoch": 96.11842105263158, "grad_norm": 2.1636171340942383, "learning_rate": 0.0001, "loss": 0.0252, "step": 14610 }, { "epoch": 96.1842105263158, "grad_norm": 2.323413372039795, "learning_rate": 0.0001, "loss": 0.0275, "step": 14620 }, { "epoch": 96.25, "grad_norm": 1.7864532470703125, "learning_rate": 0.0001, "loss": 0.0258, "step": 14630 }, { "epoch": 96.3157894736842, "grad_norm": 2.2976467609405518, "learning_rate": 0.0001, "loss": 0.0296, "step": 14640 }, { "epoch": 96.38157894736842, "grad_norm": 1.969920039176941, "learning_rate": 0.0001, "loss": 0.0314, "step": 14650 }, { "epoch": 96.44736842105263, "grad_norm": 1.9380629062652588, "learning_rate": 0.0001, "loss": 0.0286, "step": 14660 }, { "epoch": 96.51315789473684, "grad_norm": 1.9160699844360352, "learning_rate": 0.0001, "loss": 0.0275, "step": 14670 }, { "epoch": 96.57894736842105, "grad_norm": 2.019214630126953, "learning_rate": 0.0001, "loss": 0.0272, "step": 14680 }, { "epoch": 96.64473684210526, "grad_norm": 2.1641576290130615, "learning_rate": 0.0001, "loss": 0.0293, "step": 14690 }, { "epoch": 96.71052631578948, "grad_norm": 2.038372278213501, "learning_rate": 0.0001, "loss": 0.0299, "step": 14700 }, { "epoch": 96.77631578947368, "grad_norm": 2.1015946865081787, "learning_rate": 0.0001, "loss": 0.0264, "step": 14710 }, { "epoch": 96.84210526315789, "grad_norm": 1.8551138639450073, "learning_rate": 0.0001, "loss": 0.0298, "step": 14720 }, { "epoch": 96.90789473684211, "grad_norm": 1.9416499137878418, "learning_rate": 0.0001, "loss": 0.0301, "step": 14730 }, { "epoch": 96.97368421052632, "grad_norm": 2.1658217906951904, "learning_rate": 0.0001, "loss": 0.0253, "step": 14740 }, { "epoch": 97.03947368421052, "grad_norm": 2.393296003341675, "learning_rate": 0.0001, "loss": 0.0271, "step": 14750 }, { "epoch": 97.10526315789474, "grad_norm": 2.0896482467651367, "learning_rate": 0.0001, "loss": 0.0239, "step": 14760 }, { "epoch": 97.17105263157895, "grad_norm": 2.183789014816284, "learning_rate": 0.0001, "loss": 0.0285, "step": 14770 }, { "epoch": 97.23684210526316, "grad_norm": 1.6294279098510742, "learning_rate": 0.0001, "loss": 0.0325, "step": 14780 }, { "epoch": 97.30263157894737, "grad_norm": 1.5257704257965088, "learning_rate": 0.0001, "loss": 0.0266, "step": 14790 }, { "epoch": 97.36842105263158, "grad_norm": 1.531581163406372, "learning_rate": 0.0001, "loss": 0.0267, "step": 14800 }, { "epoch": 97.4342105263158, "grad_norm": 1.6387615203857422, "learning_rate": 0.0001, "loss": 0.0304, "step": 14810 }, { "epoch": 97.5, "grad_norm": 2.0373177528381348, "learning_rate": 0.0001, "loss": 0.03, "step": 14820 }, { "epoch": 97.5657894736842, "grad_norm": 1.6404818296432495, "learning_rate": 0.0001, "loss": 0.0241, "step": 14830 }, { "epoch": 97.63157894736842, "grad_norm": 2.305403232574463, "learning_rate": 0.0001, "loss": 0.028, "step": 14840 }, { "epoch": 97.69736842105263, "grad_norm": 1.6697332859039307, "learning_rate": 0.0001, "loss": 0.0293, "step": 14850 }, { "epoch": 97.76315789473684, "grad_norm": 2.396461009979248, "learning_rate": 0.0001, "loss": 0.0259, "step": 14860 }, { "epoch": 97.82894736842105, "grad_norm": 2.2732276916503906, "learning_rate": 0.0001, "loss": 0.0256, "step": 14870 }, { "epoch": 97.89473684210526, "grad_norm": 2.248926877975464, "learning_rate": 0.0001, "loss": 0.0261, "step": 14880 }, { "epoch": 97.96052631578948, "grad_norm": 1.8717553615570068, "learning_rate": 0.0001, "loss": 0.0294, "step": 14890 }, { "epoch": 98.02631578947368, "grad_norm": 1.9725555181503296, "learning_rate": 0.0001, "loss": 0.026, "step": 14900 }, { "epoch": 98.09210526315789, "grad_norm": 1.7908445596694946, "learning_rate": 0.0001, "loss": 0.028, "step": 14910 }, { "epoch": 98.15789473684211, "grad_norm": 2.280064582824707, "learning_rate": 0.0001, "loss": 0.0251, "step": 14920 }, { "epoch": 98.22368421052632, "grad_norm": 2.29864764213562, "learning_rate": 0.0001, "loss": 0.0277, "step": 14930 }, { "epoch": 98.28947368421052, "grad_norm": 2.016547679901123, "learning_rate": 0.0001, "loss": 0.0281, "step": 14940 }, { "epoch": 98.35526315789474, "grad_norm": 1.6282696723937988, "learning_rate": 0.0001, "loss": 0.0237, "step": 14950 }, { "epoch": 98.42105263157895, "grad_norm": 1.3370261192321777, "learning_rate": 0.0001, "loss": 0.0277, "step": 14960 }, { "epoch": 98.48684210526316, "grad_norm": 1.5991334915161133, "learning_rate": 0.0001, "loss": 0.0248, "step": 14970 }, { "epoch": 98.55263157894737, "grad_norm": 1.8528897762298584, "learning_rate": 0.0001, "loss": 0.0285, "step": 14980 }, { "epoch": 98.61842105263158, "grad_norm": 1.3448964357376099, "learning_rate": 0.0001, "loss": 0.0272, "step": 14990 }, { "epoch": 98.6842105263158, "grad_norm": 1.7850093841552734, "learning_rate": 0.0001, "loss": 0.0278, "step": 15000 }, { "epoch": 98.75, "grad_norm": 1.9346328973770142, "learning_rate": 0.0001, "loss": 0.0318, "step": 15010 }, { "epoch": 98.8157894736842, "grad_norm": 1.4820646047592163, "learning_rate": 0.0001, "loss": 0.0309, "step": 15020 }, { "epoch": 98.88157894736842, "grad_norm": 2.0904035568237305, "learning_rate": 0.0001, "loss": 0.0239, "step": 15030 }, { "epoch": 98.94736842105263, "grad_norm": 1.5879857540130615, "learning_rate": 0.0001, "loss": 0.0309, "step": 15040 }, { "epoch": 99.01315789473684, "grad_norm": 2.1796715259552, "learning_rate": 0.0001, "loss": 0.0255, "step": 15050 }, { "epoch": 99.07894736842105, "grad_norm": 2.1525943279266357, "learning_rate": 0.0001, "loss": 0.0276, "step": 15060 }, { "epoch": 99.14473684210526, "grad_norm": 1.527500033378601, "learning_rate": 0.0001, "loss": 0.0255, "step": 15070 }, { "epoch": 99.21052631578948, "grad_norm": 1.4886884689331055, "learning_rate": 0.0001, "loss": 0.0317, "step": 15080 }, { "epoch": 99.27631578947368, "grad_norm": 2.00887393951416, "learning_rate": 0.0001, "loss": 0.0261, "step": 15090 }, { "epoch": 99.34210526315789, "grad_norm": 2.067657709121704, "learning_rate": 0.0001, "loss": 0.0276, "step": 15100 }, { "epoch": 99.40789473684211, "grad_norm": 1.9155423641204834, "learning_rate": 0.0001, "loss": 0.0248, "step": 15110 }, { "epoch": 99.47368421052632, "grad_norm": 1.8938848972320557, "learning_rate": 0.0001, "loss": 0.0293, "step": 15120 }, { "epoch": 99.53947368421052, "grad_norm": 2.0960636138916016, "learning_rate": 0.0001, "loss": 0.0265, "step": 15130 }, { "epoch": 99.60526315789474, "grad_norm": 2.165073871612549, "learning_rate": 0.0001, "loss": 0.0255, "step": 15140 }, { "epoch": 99.67105263157895, "grad_norm": 1.365526556968689, "learning_rate": 0.0001, "loss": 0.0308, "step": 15150 }, { "epoch": 99.73684210526316, "grad_norm": 2.4183483123779297, "learning_rate": 0.0001, "loss": 0.0271, "step": 15160 }, { "epoch": 99.80263157894737, "grad_norm": 2.2821171283721924, "learning_rate": 0.0001, "loss": 0.024, "step": 15170 }, { "epoch": 99.86842105263158, "grad_norm": 2.287201166152954, "learning_rate": 0.0001, "loss": 0.0293, "step": 15180 }, { "epoch": 99.9342105263158, "grad_norm": 2.2767271995544434, "learning_rate": 0.0001, "loss": 0.0266, "step": 15190 }, { "epoch": 100.0, "grad_norm": 1.8606728315353394, "learning_rate": 0.0001, "loss": 0.0288, "step": 15200 }, { "epoch": 100.0657894736842, "grad_norm": 1.7946810722351074, "learning_rate": 0.0001, "loss": 0.0252, "step": 15210 }, { "epoch": 100.13157894736842, "grad_norm": 1.6785385608673096, "learning_rate": 0.0001, "loss": 0.0233, "step": 15220 }, { "epoch": 100.19736842105263, "grad_norm": 2.6557602882385254, "learning_rate": 0.0001, "loss": 0.0253, "step": 15230 }, { "epoch": 100.26315789473684, "grad_norm": 2.510667324066162, "learning_rate": 0.0001, "loss": 0.0251, "step": 15240 }, { "epoch": 100.32894736842105, "grad_norm": 2.2144439220428467, "learning_rate": 0.0001, "loss": 0.0273, "step": 15250 }, { "epoch": 100.39473684210526, "grad_norm": 1.940328598022461, "learning_rate": 0.0001, "loss": 0.0289, "step": 15260 }, { "epoch": 100.46052631578948, "grad_norm": 2.1864285469055176, "learning_rate": 0.0001, "loss": 0.0283, "step": 15270 }, { "epoch": 100.52631578947368, "grad_norm": 1.5068169832229614, "learning_rate": 0.0001, "loss": 0.0271, "step": 15280 }, { "epoch": 100.59210526315789, "grad_norm": 2.0003082752227783, "learning_rate": 0.0001, "loss": 0.0243, "step": 15290 }, { "epoch": 100.65789473684211, "grad_norm": 1.7404160499572754, "learning_rate": 0.0001, "loss": 0.0248, "step": 15300 }, { "epoch": 100.72368421052632, "grad_norm": 1.7657220363616943, "learning_rate": 0.0001, "loss": 0.0275, "step": 15310 }, { "epoch": 100.78947368421052, "grad_norm": 1.9427289962768555, "learning_rate": 0.0001, "loss": 0.0259, "step": 15320 }, { "epoch": 100.85526315789474, "grad_norm": 2.2194952964782715, "learning_rate": 0.0001, "loss": 0.0285, "step": 15330 }, { "epoch": 100.92105263157895, "grad_norm": 2.1912899017333984, "learning_rate": 0.0001, "loss": 0.0272, "step": 15340 }, { "epoch": 100.98684210526316, "grad_norm": 1.8904715776443481, "learning_rate": 0.0001, "loss": 0.0331, "step": 15350 }, { "epoch": 101.05263157894737, "grad_norm": 2.3213186264038086, "learning_rate": 0.0001, "loss": 0.0287, "step": 15360 }, { "epoch": 101.11842105263158, "grad_norm": 1.9349029064178467, "learning_rate": 0.0001, "loss": 0.0276, "step": 15370 }, { "epoch": 101.1842105263158, "grad_norm": 1.8972091674804688, "learning_rate": 0.0001, "loss": 0.0275, "step": 15380 }, { "epoch": 101.25, "grad_norm": 1.4387093782424927, "learning_rate": 0.0001, "loss": 0.0283, "step": 15390 }, { "epoch": 101.3157894736842, "grad_norm": 1.8240761756896973, "learning_rate": 0.0001, "loss": 0.0237, "step": 15400 }, { "epoch": 101.38157894736842, "grad_norm": 1.8374823331832886, "learning_rate": 0.0001, "loss": 0.0301, "step": 15410 }, { "epoch": 101.44736842105263, "grad_norm": 1.4567687511444092, "learning_rate": 0.0001, "loss": 0.0244, "step": 15420 }, { "epoch": 101.51315789473684, "grad_norm": 1.9445396661758423, "learning_rate": 0.0001, "loss": 0.0292, "step": 15430 }, { "epoch": 101.57894736842105, "grad_norm": 1.8015769720077515, "learning_rate": 0.0001, "loss": 0.0268, "step": 15440 }, { "epoch": 101.64473684210526, "grad_norm": 1.6518374681472778, "learning_rate": 0.0001, "loss": 0.0296, "step": 15450 }, { "epoch": 101.71052631578948, "grad_norm": 2.0007996559143066, "learning_rate": 0.0001, "loss": 0.0261, "step": 15460 }, { "epoch": 101.77631578947368, "grad_norm": 1.4292012453079224, "learning_rate": 0.0001, "loss": 0.0243, "step": 15470 }, { "epoch": 101.84210526315789, "grad_norm": 1.7935521602630615, "learning_rate": 0.0001, "loss": 0.0287, "step": 15480 }, { "epoch": 101.90789473684211, "grad_norm": 1.6997992992401123, "learning_rate": 0.0001, "loss": 0.0306, "step": 15490 }, { "epoch": 101.97368421052632, "grad_norm": 1.557344913482666, "learning_rate": 0.0001, "loss": 0.0291, "step": 15500 }, { "epoch": 102.03947368421052, "grad_norm": 2.037781000137329, "learning_rate": 0.0001, "loss": 0.0273, "step": 15510 }, { "epoch": 102.10526315789474, "grad_norm": 2.1140239238739014, "learning_rate": 0.0001, "loss": 0.0294, "step": 15520 }, { "epoch": 102.17105263157895, "grad_norm": 2.0415267944335938, "learning_rate": 0.0001, "loss": 0.0251, "step": 15530 }, { "epoch": 102.23684210526316, "grad_norm": 1.876612901687622, "learning_rate": 0.0001, "loss": 0.0295, "step": 15540 }, { "epoch": 102.30263157894737, "grad_norm": 1.9280022382736206, "learning_rate": 0.0001, "loss": 0.0306, "step": 15550 }, { "epoch": 102.36842105263158, "grad_norm": 2.048522472381592, "learning_rate": 0.0001, "loss": 0.0246, "step": 15560 }, { "epoch": 102.4342105263158, "grad_norm": 1.5332177877426147, "learning_rate": 0.0001, "loss": 0.0281, "step": 15570 }, { "epoch": 102.5, "grad_norm": 1.8215255737304688, "learning_rate": 0.0001, "loss": 0.026, "step": 15580 }, { "epoch": 102.5657894736842, "grad_norm": 1.6897083520889282, "learning_rate": 0.0001, "loss": 0.027, "step": 15590 }, { "epoch": 102.63157894736842, "grad_norm": 2.283853769302368, "learning_rate": 0.0001, "loss": 0.0286, "step": 15600 }, { "epoch": 102.69736842105263, "grad_norm": 1.2878587245941162, "learning_rate": 0.0001, "loss": 0.0263, "step": 15610 }, { "epoch": 102.76315789473684, "grad_norm": 1.4527045488357544, "learning_rate": 0.0001, "loss": 0.0239, "step": 15620 }, { "epoch": 102.82894736842105, "grad_norm": 1.7951486110687256, "learning_rate": 0.0001, "loss": 0.0266, "step": 15630 }, { "epoch": 102.89473684210526, "grad_norm": 2.265005111694336, "learning_rate": 0.0001, "loss": 0.0289, "step": 15640 }, { "epoch": 102.96052631578948, "grad_norm": 1.521929383277893, "learning_rate": 0.0001, "loss": 0.0268, "step": 15650 }, { "epoch": 103.02631578947368, "grad_norm": 1.6560927629470825, "learning_rate": 0.0001, "loss": 0.0274, "step": 15660 }, { "epoch": 103.09210526315789, "grad_norm": 1.9417213201522827, "learning_rate": 0.0001, "loss": 0.0289, "step": 15670 }, { "epoch": 103.15789473684211, "grad_norm": 2.0600521564483643, "learning_rate": 0.0001, "loss": 0.0281, "step": 15680 }, { "epoch": 103.22368421052632, "grad_norm": 1.8477832078933716, "learning_rate": 0.0001, "loss": 0.0291, "step": 15690 }, { "epoch": 103.28947368421052, "grad_norm": 1.5557602643966675, "learning_rate": 0.0001, "loss": 0.0245, "step": 15700 }, { "epoch": 103.35526315789474, "grad_norm": 1.618970274925232, "learning_rate": 0.0001, "loss": 0.0276, "step": 15710 }, { "epoch": 103.42105263157895, "grad_norm": 2.015901565551758, "learning_rate": 0.0001, "loss": 0.0243, "step": 15720 }, { "epoch": 103.48684210526316, "grad_norm": 2.090688943862915, "learning_rate": 0.0001, "loss": 0.029, "step": 15730 }, { "epoch": 103.55263157894737, "grad_norm": 2.080714702606201, "learning_rate": 0.0001, "loss": 0.0255, "step": 15740 }, { "epoch": 103.61842105263158, "grad_norm": 2.0590639114379883, "learning_rate": 0.0001, "loss": 0.0297, "step": 15750 }, { "epoch": 103.6842105263158, "grad_norm": 2.074503183364868, "learning_rate": 0.0001, "loss": 0.0284, "step": 15760 }, { "epoch": 103.75, "grad_norm": 2.1587677001953125, "learning_rate": 0.0001, "loss": 0.0261, "step": 15770 }, { "epoch": 103.8157894736842, "grad_norm": 2.5688490867614746, "learning_rate": 0.0001, "loss": 0.0282, "step": 15780 }, { "epoch": 103.88157894736842, "grad_norm": 1.791428804397583, "learning_rate": 0.0001, "loss": 0.0252, "step": 15790 }, { "epoch": 103.94736842105263, "grad_norm": 1.7393711805343628, "learning_rate": 0.0001, "loss": 0.0263, "step": 15800 }, { "epoch": 104.01315789473684, "grad_norm": 2.1554300785064697, "learning_rate": 0.0001, "loss": 0.0298, "step": 15810 }, { "epoch": 104.07894736842105, "grad_norm": 2.193875789642334, "learning_rate": 0.0001, "loss": 0.0241, "step": 15820 }, { "epoch": 104.14473684210526, "grad_norm": 2.155054807662964, "learning_rate": 0.0001, "loss": 0.0246, "step": 15830 }, { "epoch": 104.21052631578948, "grad_norm": 2.231163740158081, "learning_rate": 0.0001, "loss": 0.025, "step": 15840 }, { "epoch": 104.27631578947368, "grad_norm": 2.484995126724243, "learning_rate": 0.0001, "loss": 0.0287, "step": 15850 }, { "epoch": 104.34210526315789, "grad_norm": 2.012603521347046, "learning_rate": 0.0001, "loss": 0.0261, "step": 15860 }, { "epoch": 104.40789473684211, "grad_norm": 2.135913848876953, "learning_rate": 0.0001, "loss": 0.0264, "step": 15870 }, { "epoch": 104.47368421052632, "grad_norm": 2.4394423961639404, "learning_rate": 0.0001, "loss": 0.0243, "step": 15880 }, { "epoch": 104.53947368421052, "grad_norm": 1.9328018426895142, "learning_rate": 0.0001, "loss": 0.0244, "step": 15890 }, { "epoch": 104.60526315789474, "grad_norm": 2.9380195140838623, "learning_rate": 0.0001, "loss": 0.0299, "step": 15900 }, { "epoch": 104.67105263157895, "grad_norm": 3.8800406455993652, "learning_rate": 0.0001, "loss": 0.0326, "step": 15910 }, { "epoch": 104.73684210526316, "grad_norm": 3.160806894302368, "learning_rate": 0.0001, "loss": 0.0281, "step": 15920 }, { "epoch": 104.80263157894737, "grad_norm": 2.4324960708618164, "learning_rate": 0.0001, "loss": 0.0261, "step": 15930 }, { "epoch": 104.86842105263158, "grad_norm": 2.4244940280914307, "learning_rate": 0.0001, "loss": 0.024, "step": 15940 }, { "epoch": 104.9342105263158, "grad_norm": 2.3864426612854004, "learning_rate": 0.0001, "loss": 0.0267, "step": 15950 }, { "epoch": 105.0, "grad_norm": 1.9788310527801514, "learning_rate": 0.0001, "loss": 0.0277, "step": 15960 }, { "epoch": 105.0657894736842, "grad_norm": 1.8434258699417114, "learning_rate": 0.0001, "loss": 0.0261, "step": 15970 }, { "epoch": 105.13157894736842, "grad_norm": 2.552311658859253, "learning_rate": 0.0001, "loss": 0.0276, "step": 15980 }, { "epoch": 105.19736842105263, "grad_norm": 1.8631820678710938, "learning_rate": 0.0001, "loss": 0.0226, "step": 15990 }, { "epoch": 105.26315789473684, "grad_norm": 1.7003192901611328, "learning_rate": 0.0001, "loss": 0.0285, "step": 16000 }, { "epoch": 105.32894736842105, "grad_norm": 2.019674301147461, "learning_rate": 0.0001, "loss": 0.0238, "step": 16010 }, { "epoch": 105.39473684210526, "grad_norm": 1.9895055294036865, "learning_rate": 0.0001, "loss": 0.0241, "step": 16020 }, { "epoch": 105.46052631578948, "grad_norm": 1.6922650337219238, "learning_rate": 0.0001, "loss": 0.0283, "step": 16030 }, { "epoch": 105.52631578947368, "grad_norm": 2.3513472080230713, "learning_rate": 0.0001, "loss": 0.0273, "step": 16040 }, { "epoch": 105.59210526315789, "grad_norm": 1.818518877029419, "learning_rate": 0.0001, "loss": 0.0275, "step": 16050 }, { "epoch": 105.65789473684211, "grad_norm": 4.6884026527404785, "learning_rate": 0.0001, "loss": 0.0369, "step": 16060 }, { "epoch": 105.72368421052632, "grad_norm": 3.762373447418213, "learning_rate": 0.0001, "loss": 0.0281, "step": 16070 }, { "epoch": 105.78947368421052, "grad_norm": 3.3358280658721924, "learning_rate": 0.0001, "loss": 0.0257, "step": 16080 }, { "epoch": 105.85526315789474, "grad_norm": 2.7881295680999756, "learning_rate": 0.0001, "loss": 0.0271, "step": 16090 }, { "epoch": 105.92105263157895, "grad_norm": 2.499375343322754, "learning_rate": 0.0001, "loss": 0.0256, "step": 16100 }, { "epoch": 105.98684210526316, "grad_norm": 2.133852005004883, "learning_rate": 0.0001, "loss": 0.0277, "step": 16110 }, { "epoch": 106.05263157894737, "grad_norm": 2.0693490505218506, "learning_rate": 0.0001, "loss": 0.0246, "step": 16120 }, { "epoch": 106.11842105263158, "grad_norm": 2.052652597427368, "learning_rate": 0.0001, "loss": 0.0257, "step": 16130 }, { "epoch": 106.1842105263158, "grad_norm": 2.4387190341949463, "learning_rate": 0.0001, "loss": 0.0266, "step": 16140 }, { "epoch": 106.25, "grad_norm": 2.4031901359558105, "learning_rate": 0.0001, "loss": 0.024, "step": 16150 }, { "epoch": 106.3157894736842, "grad_norm": 2.189540386199951, "learning_rate": 0.0001, "loss": 0.0224, "step": 16160 }, { "epoch": 106.38157894736842, "grad_norm": 2.526071548461914, "learning_rate": 0.0001, "loss": 0.0224, "step": 16170 }, { "epoch": 106.44736842105263, "grad_norm": 2.331416130065918, "learning_rate": 0.0001, "loss": 0.025, "step": 16180 }, { "epoch": 106.51315789473684, "grad_norm": 2.430283546447754, "learning_rate": 0.0001, "loss": 0.0268, "step": 16190 }, { "epoch": 106.57894736842105, "grad_norm": 2.512856960296631, "learning_rate": 0.0001, "loss": 0.0289, "step": 16200 }, { "epoch": 106.64473684210526, "grad_norm": 2.1114501953125, "learning_rate": 0.0001, "loss": 0.029, "step": 16210 }, { "epoch": 106.71052631578948, "grad_norm": 1.9545527696609497, "learning_rate": 0.0001, "loss": 0.0277, "step": 16220 }, { "epoch": 106.77631578947368, "grad_norm": 2.1288859844207764, "learning_rate": 0.0001, "loss": 0.025, "step": 16230 }, { "epoch": 106.84210526315789, "grad_norm": 1.9409619569778442, "learning_rate": 0.0001, "loss": 0.0272, "step": 16240 }, { "epoch": 106.90789473684211, "grad_norm": 2.0520477294921875, "learning_rate": 0.0001, "loss": 0.0267, "step": 16250 }, { "epoch": 106.97368421052632, "grad_norm": 2.060316562652588, "learning_rate": 0.0001, "loss": 0.0266, "step": 16260 }, { "epoch": 107.03947368421052, "grad_norm": 1.8336608409881592, "learning_rate": 0.0001, "loss": 0.0262, "step": 16270 }, { "epoch": 107.10526315789474, "grad_norm": 1.6445056200027466, "learning_rate": 0.0001, "loss": 0.0254, "step": 16280 }, { "epoch": 107.17105263157895, "grad_norm": 1.4911423921585083, "learning_rate": 0.0001, "loss": 0.0268, "step": 16290 }, { "epoch": 107.23684210526316, "grad_norm": 1.6284449100494385, "learning_rate": 0.0001, "loss": 0.0263, "step": 16300 }, { "epoch": 107.30263157894737, "grad_norm": 2.245706796646118, "learning_rate": 0.0001, "loss": 0.0249, "step": 16310 }, { "epoch": 107.36842105263158, "grad_norm": 2.1498985290527344, "learning_rate": 0.0001, "loss": 0.0256, "step": 16320 }, { "epoch": 107.4342105263158, "grad_norm": 2.1248772144317627, "learning_rate": 0.0001, "loss": 0.0285, "step": 16330 }, { "epoch": 107.5, "grad_norm": 1.6220266819000244, "learning_rate": 0.0001, "loss": 0.0284, "step": 16340 }, { "epoch": 107.5657894736842, "grad_norm": 2.294360399246216, "learning_rate": 0.0001, "loss": 0.0275, "step": 16350 }, { "epoch": 107.63157894736842, "grad_norm": 2.017869472503662, "learning_rate": 0.0001, "loss": 0.0286, "step": 16360 }, { "epoch": 107.69736842105263, "grad_norm": 2.3317759037017822, "learning_rate": 0.0001, "loss": 0.0283, "step": 16370 }, { "epoch": 107.76315789473684, "grad_norm": 2.26668381690979, "learning_rate": 0.0001, "loss": 0.0255, "step": 16380 }, { "epoch": 107.82894736842105, "grad_norm": 2.2005198001861572, "learning_rate": 0.0001, "loss": 0.0298, "step": 16390 }, { "epoch": 107.89473684210526, "grad_norm": 2.0651819705963135, "learning_rate": 0.0001, "loss": 0.0248, "step": 16400 }, { "epoch": 107.96052631578948, "grad_norm": 2.035367965698242, "learning_rate": 0.0001, "loss": 0.0254, "step": 16410 }, { "epoch": 108.02631578947368, "grad_norm": 2.0537023544311523, "learning_rate": 0.0001, "loss": 0.0323, "step": 16420 }, { "epoch": 108.09210526315789, "grad_norm": 1.9223154783248901, "learning_rate": 0.0001, "loss": 0.0312, "step": 16430 }, { "epoch": 108.15789473684211, "grad_norm": 2.0924315452575684, "learning_rate": 0.0001, "loss": 0.0247, "step": 16440 }, { "epoch": 108.22368421052632, "grad_norm": 2.242103099822998, "learning_rate": 0.0001, "loss": 0.0283, "step": 16450 }, { "epoch": 108.28947368421052, "grad_norm": 2.243272066116333, "learning_rate": 0.0001, "loss": 0.0257, "step": 16460 }, { "epoch": 108.35526315789474, "grad_norm": 1.8688474893569946, "learning_rate": 0.0001, "loss": 0.0252, "step": 16470 }, { "epoch": 108.42105263157895, "grad_norm": 1.826938509941101, "learning_rate": 0.0001, "loss": 0.025, "step": 16480 }, { "epoch": 108.48684210526316, "grad_norm": 2.22900652885437, "learning_rate": 0.0001, "loss": 0.025, "step": 16490 }, { "epoch": 108.55263157894737, "grad_norm": 1.620694875717163, "learning_rate": 0.0001, "loss": 0.028, "step": 16500 }, { "epoch": 108.61842105263158, "grad_norm": 1.898985743522644, "learning_rate": 0.0001, "loss": 0.0263, "step": 16510 }, { "epoch": 108.6842105263158, "grad_norm": 1.8080660104751587, "learning_rate": 0.0001, "loss": 0.0287, "step": 16520 }, { "epoch": 108.75, "grad_norm": 2.0966269969940186, "learning_rate": 0.0001, "loss": 0.0243, "step": 16530 }, { "epoch": 108.8157894736842, "grad_norm": 2.0495636463165283, "learning_rate": 0.0001, "loss": 0.0295, "step": 16540 }, { "epoch": 108.88157894736842, "grad_norm": 1.994903564453125, "learning_rate": 0.0001, "loss": 0.0273, "step": 16550 }, { "epoch": 108.94736842105263, "grad_norm": 2.226318120956421, "learning_rate": 0.0001, "loss": 0.028, "step": 16560 }, { "epoch": 109.01315789473684, "grad_norm": 1.9312965869903564, "learning_rate": 0.0001, "loss": 0.0257, "step": 16570 }, { "epoch": 109.07894736842105, "grad_norm": 1.422808051109314, "learning_rate": 0.0001, "loss": 0.0283, "step": 16580 }, { "epoch": 109.14473684210526, "grad_norm": 1.6690243482589722, "learning_rate": 0.0001, "loss": 0.0265, "step": 16590 }, { "epoch": 109.21052631578948, "grad_norm": 2.347050666809082, "learning_rate": 0.0001, "loss": 0.0259, "step": 16600 }, { "epoch": 109.27631578947368, "grad_norm": 2.3017115592956543, "learning_rate": 0.0001, "loss": 0.0269, "step": 16610 }, { "epoch": 109.34210526315789, "grad_norm": 2.1307175159454346, "learning_rate": 0.0001, "loss": 0.0244, "step": 16620 }, { "epoch": 109.40789473684211, "grad_norm": 1.9986432790756226, "learning_rate": 0.0001, "loss": 0.0256, "step": 16630 }, { "epoch": 109.47368421052632, "grad_norm": 1.8740025758743286, "learning_rate": 0.0001, "loss": 0.0266, "step": 16640 }, { "epoch": 109.53947368421052, "grad_norm": 1.9358890056610107, "learning_rate": 0.0001, "loss": 0.0278, "step": 16650 }, { "epoch": 109.60526315789474, "grad_norm": 1.8746507167816162, "learning_rate": 0.0001, "loss": 0.0299, "step": 16660 }, { "epoch": 109.67105263157895, "grad_norm": 2.0397682189941406, "learning_rate": 0.0001, "loss": 0.0265, "step": 16670 }, { "epoch": 109.73684210526316, "grad_norm": 1.8216837644577026, "learning_rate": 0.0001, "loss": 0.0244, "step": 16680 }, { "epoch": 109.80263157894737, "grad_norm": 1.850361704826355, "learning_rate": 0.0001, "loss": 0.0275, "step": 16690 }, { "epoch": 109.86842105263158, "grad_norm": 1.9159032106399536, "learning_rate": 0.0001, "loss": 0.0239, "step": 16700 }, { "epoch": 109.9342105263158, "grad_norm": 1.8834518194198608, "learning_rate": 0.0001, "loss": 0.0323, "step": 16710 }, { "epoch": 110.0, "grad_norm": 1.6780990362167358, "learning_rate": 0.0001, "loss": 0.0275, "step": 16720 }, { "epoch": 110.0657894736842, "grad_norm": 2.079728364944458, "learning_rate": 0.0001, "loss": 0.0291, "step": 16730 }, { "epoch": 110.13157894736842, "grad_norm": 1.6663262844085693, "learning_rate": 0.0001, "loss": 0.0287, "step": 16740 }, { "epoch": 110.19736842105263, "grad_norm": 2.2004787921905518, "learning_rate": 0.0001, "loss": 0.0283, "step": 16750 }, { "epoch": 110.26315789473684, "grad_norm": 1.7732805013656616, "learning_rate": 0.0001, "loss": 0.0288, "step": 16760 }, { "epoch": 110.32894736842105, "grad_norm": 1.8447638750076294, "learning_rate": 0.0001, "loss": 0.0262, "step": 16770 }, { "epoch": 110.39473684210526, "grad_norm": 1.46072518825531, "learning_rate": 0.0001, "loss": 0.0253, "step": 16780 }, { "epoch": 110.46052631578948, "grad_norm": 2.0208094120025635, "learning_rate": 0.0001, "loss": 0.0288, "step": 16790 }, { "epoch": 110.52631578947368, "grad_norm": 1.7401812076568604, "learning_rate": 0.0001, "loss": 0.0244, "step": 16800 }, { "epoch": 110.59210526315789, "grad_norm": 1.75077486038208, "learning_rate": 0.0001, "loss": 0.0271, "step": 16810 }, { "epoch": 110.65789473684211, "grad_norm": 2.134385347366333, "learning_rate": 0.0001, "loss": 0.0308, "step": 16820 }, { "epoch": 110.72368421052632, "grad_norm": 2.192401647567749, "learning_rate": 0.0001, "loss": 0.0279, "step": 16830 }, { "epoch": 110.78947368421052, "grad_norm": 1.8374766111373901, "learning_rate": 0.0001, "loss": 0.0235, "step": 16840 }, { "epoch": 110.85526315789474, "grad_norm": 1.9058111906051636, "learning_rate": 0.0001, "loss": 0.0277, "step": 16850 }, { "epoch": 110.92105263157895, "grad_norm": 1.5309765338897705, "learning_rate": 0.0001, "loss": 0.0248, "step": 16860 }, { "epoch": 110.98684210526316, "grad_norm": 2.3446078300476074, "learning_rate": 0.0001, "loss": 0.0316, "step": 16870 }, { "epoch": 111.05263157894737, "grad_norm": 2.063183546066284, "learning_rate": 0.0001, "loss": 0.0313, "step": 16880 }, { "epoch": 111.11842105263158, "grad_norm": 2.470144033432007, "learning_rate": 0.0001, "loss": 0.0237, "step": 16890 }, { "epoch": 111.1842105263158, "grad_norm": 2.2028310298919678, "learning_rate": 0.0001, "loss": 0.0279, "step": 16900 }, { "epoch": 111.25, "grad_norm": 2.3714919090270996, "learning_rate": 0.0001, "loss": 0.0253, "step": 16910 }, { "epoch": 111.3157894736842, "grad_norm": 2.368988275527954, "learning_rate": 0.0001, "loss": 0.0264, "step": 16920 }, { "epoch": 111.38157894736842, "grad_norm": 1.9965325593948364, "learning_rate": 0.0001, "loss": 0.0222, "step": 16930 }, { "epoch": 111.44736842105263, "grad_norm": 2.408559560775757, "learning_rate": 0.0001, "loss": 0.0243, "step": 16940 }, { "epoch": 111.51315789473684, "grad_norm": 2.028428554534912, "learning_rate": 0.0001, "loss": 0.0294, "step": 16950 }, { "epoch": 111.57894736842105, "grad_norm": 1.8796049356460571, "learning_rate": 0.0001, "loss": 0.0293, "step": 16960 }, { "epoch": 111.64473684210526, "grad_norm": 2.236135959625244, "learning_rate": 0.0001, "loss": 0.0241, "step": 16970 }, { "epoch": 111.71052631578948, "grad_norm": 2.6062753200531006, "learning_rate": 0.0001, "loss": 0.0245, "step": 16980 }, { "epoch": 111.77631578947368, "grad_norm": 2.443655490875244, "learning_rate": 0.0001, "loss": 0.0276, "step": 16990 }, { "epoch": 111.84210526315789, "grad_norm": 2.100095272064209, "learning_rate": 0.0001, "loss": 0.023, "step": 17000 }, { "epoch": 111.90789473684211, "grad_norm": 1.961275339126587, "learning_rate": 0.0001, "loss": 0.0235, "step": 17010 }, { "epoch": 111.97368421052632, "grad_norm": 2.0095651149749756, "learning_rate": 0.0001, "loss": 0.0242, "step": 17020 }, { "epoch": 112.03947368421052, "grad_norm": 2.287458896636963, "learning_rate": 0.0001, "loss": 0.0306, "step": 17030 }, { "epoch": 112.10526315789474, "grad_norm": 2.317812919616699, "learning_rate": 0.0001, "loss": 0.0279, "step": 17040 }, { "epoch": 112.17105263157895, "grad_norm": 1.9940478801727295, "learning_rate": 0.0001, "loss": 0.0265, "step": 17050 }, { "epoch": 112.23684210526316, "grad_norm": 1.9077701568603516, "learning_rate": 0.0001, "loss": 0.026, "step": 17060 }, { "epoch": 112.30263157894737, "grad_norm": 1.9471129179000854, "learning_rate": 0.0001, "loss": 0.0262, "step": 17070 }, { "epoch": 112.36842105263158, "grad_norm": 1.6634745597839355, "learning_rate": 0.0001, "loss": 0.0242, "step": 17080 }, { "epoch": 112.4342105263158, "grad_norm": 2.093482732772827, "learning_rate": 0.0001, "loss": 0.0263, "step": 17090 }, { "epoch": 112.5, "grad_norm": 2.1159985065460205, "learning_rate": 0.0001, "loss": 0.0285, "step": 17100 }, { "epoch": 112.5657894736842, "grad_norm": 1.4871076345443726, "learning_rate": 0.0001, "loss": 0.0241, "step": 17110 }, { "epoch": 112.63157894736842, "grad_norm": 2.0643796920776367, "learning_rate": 0.0001, "loss": 0.0216, "step": 17120 }, { "epoch": 112.69736842105263, "grad_norm": 1.7118054628372192, "learning_rate": 0.0001, "loss": 0.024, "step": 17130 }, { "epoch": 112.76315789473684, "grad_norm": 2.0616228580474854, "learning_rate": 0.0001, "loss": 0.0249, "step": 17140 }, { "epoch": 112.82894736842105, "grad_norm": 1.9186131954193115, "learning_rate": 0.0001, "loss": 0.0288, "step": 17150 }, { "epoch": 112.89473684210526, "grad_norm": 1.3125476837158203, "learning_rate": 0.0001, "loss": 0.0265, "step": 17160 }, { "epoch": 112.96052631578948, "grad_norm": 1.9149065017700195, "learning_rate": 0.0001, "loss": 0.0236, "step": 17170 }, { "epoch": 113.02631578947368, "grad_norm": 2.3811302185058594, "learning_rate": 0.0001, "loss": 0.0277, "step": 17180 }, { "epoch": 113.09210526315789, "grad_norm": 2.368880271911621, "learning_rate": 0.0001, "loss": 0.0325, "step": 17190 }, { "epoch": 113.15789473684211, "grad_norm": 2.096588134765625, "learning_rate": 0.0001, "loss": 0.026, "step": 17200 }, { "epoch": 113.22368421052632, "grad_norm": 2.0155177116394043, "learning_rate": 0.0001, "loss": 0.0271, "step": 17210 }, { "epoch": 113.28947368421052, "grad_norm": 1.8278216123580933, "learning_rate": 0.0001, "loss": 0.0259, "step": 17220 }, { "epoch": 113.35526315789474, "grad_norm": 1.7309237718582153, "learning_rate": 0.0001, "loss": 0.0264, "step": 17230 }, { "epoch": 113.42105263157895, "grad_norm": 2.171433448791504, "learning_rate": 0.0001, "loss": 0.0278, "step": 17240 }, { "epoch": 113.48684210526316, "grad_norm": 1.6378244161605835, "learning_rate": 0.0001, "loss": 0.029, "step": 17250 }, { "epoch": 113.55263157894737, "grad_norm": 1.9353471994400024, "learning_rate": 0.0001, "loss": 0.0247, "step": 17260 }, { "epoch": 113.61842105263158, "grad_norm": 1.7667689323425293, "learning_rate": 0.0001, "loss": 0.0287, "step": 17270 }, { "epoch": 113.6842105263158, "grad_norm": 2.0202419757843018, "learning_rate": 0.0001, "loss": 0.0231, "step": 17280 }, { "epoch": 113.75, "grad_norm": 2.242023468017578, "learning_rate": 0.0001, "loss": 0.0226, "step": 17290 }, { "epoch": 113.8157894736842, "grad_norm": 1.9780585765838623, "learning_rate": 0.0001, "loss": 0.027, "step": 17300 }, { "epoch": 113.88157894736842, "grad_norm": 1.9151015281677246, "learning_rate": 0.0001, "loss": 0.0262, "step": 17310 }, { "epoch": 113.94736842105263, "grad_norm": 1.819868564605713, "learning_rate": 0.0001, "loss": 0.0246, "step": 17320 }, { "epoch": 114.01315789473684, "grad_norm": 2.6609933376312256, "learning_rate": 0.0001, "loss": 0.0309, "step": 17330 }, { "epoch": 114.07894736842105, "grad_norm": 2.3400418758392334, "learning_rate": 0.0001, "loss": 0.0265, "step": 17340 }, { "epoch": 114.14473684210526, "grad_norm": 2.0548183917999268, "learning_rate": 0.0001, "loss": 0.0253, "step": 17350 }, { "epoch": 114.21052631578948, "grad_norm": 2.605983257293701, "learning_rate": 0.0001, "loss": 0.0278, "step": 17360 }, { "epoch": 114.27631578947368, "grad_norm": 3.0620665550231934, "learning_rate": 0.0001, "loss": 0.023, "step": 17370 }, { "epoch": 114.34210526315789, "grad_norm": 2.6482555866241455, "learning_rate": 0.0001, "loss": 0.03, "step": 17380 }, { "epoch": 114.40789473684211, "grad_norm": 2.935077667236328, "learning_rate": 0.0001, "loss": 0.0302, "step": 17390 }, { "epoch": 114.47368421052632, "grad_norm": 3.2299726009368896, "learning_rate": 0.0001, "loss": 0.0246, "step": 17400 }, { "epoch": 114.53947368421052, "grad_norm": 1.9776182174682617, "learning_rate": 0.0001, "loss": 0.0225, "step": 17410 }, { "epoch": 114.60526315789474, "grad_norm": 2.3752281665802, "learning_rate": 0.0001, "loss": 0.0208, "step": 17420 }, { "epoch": 114.67105263157895, "grad_norm": 1.9767193794250488, "learning_rate": 0.0001, "loss": 0.0277, "step": 17430 }, { "epoch": 114.73684210526316, "grad_norm": 2.27160382270813, "learning_rate": 0.0001, "loss": 0.0259, "step": 17440 }, { "epoch": 114.80263157894737, "grad_norm": 1.9607106447219849, "learning_rate": 0.0001, "loss": 0.0258, "step": 17450 }, { "epoch": 114.86842105263158, "grad_norm": 2.156690835952759, "learning_rate": 0.0001, "loss": 0.0234, "step": 17460 }, { "epoch": 114.9342105263158, "grad_norm": 2.5387444496154785, "learning_rate": 0.0001, "loss": 0.0245, "step": 17470 }, { "epoch": 115.0, "grad_norm": 2.386267900466919, "learning_rate": 0.0001, "loss": 0.0258, "step": 17480 }, { "epoch": 115.0657894736842, "grad_norm": 2.645312786102295, "learning_rate": 0.0001, "loss": 0.0241, "step": 17490 }, { "epoch": 115.13157894736842, "grad_norm": 1.8886359930038452, "learning_rate": 0.0001, "loss": 0.0268, "step": 17500 }, { "epoch": 115.19736842105263, "grad_norm": 2.4975099563598633, "learning_rate": 0.0001, "loss": 0.0267, "step": 17510 }, { "epoch": 115.26315789473684, "grad_norm": 1.47410249710083, "learning_rate": 0.0001, "loss": 0.0288, "step": 17520 }, { "epoch": 115.32894736842105, "grad_norm": 1.9773564338684082, "learning_rate": 0.0001, "loss": 0.0245, "step": 17530 }, { "epoch": 115.39473684210526, "grad_norm": 2.0314948558807373, "learning_rate": 0.0001, "loss": 0.0223, "step": 17540 }, { "epoch": 115.46052631578948, "grad_norm": 1.898085355758667, "learning_rate": 0.0001, "loss": 0.022, "step": 17550 }, { "epoch": 115.52631578947368, "grad_norm": 2.5004093647003174, "learning_rate": 0.0001, "loss": 0.0266, "step": 17560 }, { "epoch": 115.59210526315789, "grad_norm": 2.2146267890930176, "learning_rate": 0.0001, "loss": 0.026, "step": 17570 }, { "epoch": 115.65789473684211, "grad_norm": 2.2045538425445557, "learning_rate": 0.0001, "loss": 0.0232, "step": 17580 }, { "epoch": 115.72368421052632, "grad_norm": 2.181370496749878, "learning_rate": 0.0001, "loss": 0.0276, "step": 17590 }, { "epoch": 115.78947368421052, "grad_norm": 2.3170197010040283, "learning_rate": 0.0001, "loss": 0.0271, "step": 17600 }, { "epoch": 115.85526315789474, "grad_norm": 2.3052330017089844, "learning_rate": 0.0001, "loss": 0.026, "step": 17610 }, { "epoch": 115.92105263157895, "grad_norm": 1.9015933275222778, "learning_rate": 0.0001, "loss": 0.0237, "step": 17620 }, { "epoch": 115.98684210526316, "grad_norm": 2.123948335647583, "learning_rate": 0.0001, "loss": 0.0249, "step": 17630 }, { "epoch": 116.05263157894737, "grad_norm": 2.145974636077881, "learning_rate": 0.0001, "loss": 0.027, "step": 17640 }, { "epoch": 116.11842105263158, "grad_norm": 1.8038709163665771, "learning_rate": 0.0001, "loss": 0.0255, "step": 17650 }, { "epoch": 116.1842105263158, "grad_norm": 1.9244108200073242, "learning_rate": 0.0001, "loss": 0.023, "step": 17660 }, { "epoch": 116.25, "grad_norm": 2.0838706493377686, "learning_rate": 0.0001, "loss": 0.0254, "step": 17670 }, { "epoch": 116.3157894736842, "grad_norm": 2.0365140438079834, "learning_rate": 0.0001, "loss": 0.0278, "step": 17680 }, { "epoch": 116.38157894736842, "grad_norm": 1.5982880592346191, "learning_rate": 0.0001, "loss": 0.0224, "step": 17690 }, { "epoch": 116.44736842105263, "grad_norm": 1.5846002101898193, "learning_rate": 0.0001, "loss": 0.0268, "step": 17700 }, { "epoch": 116.51315789473684, "grad_norm": 1.6681671142578125, "learning_rate": 0.0001, "loss": 0.0253, "step": 17710 }, { "epoch": 116.57894736842105, "grad_norm": 2.167015314102173, "learning_rate": 0.0001, "loss": 0.0258, "step": 17720 }, { "epoch": 116.64473684210526, "grad_norm": 2.1947343349456787, "learning_rate": 0.0001, "loss": 0.0276, "step": 17730 }, { "epoch": 116.71052631578948, "grad_norm": 2.0868046283721924, "learning_rate": 0.0001, "loss": 0.0246, "step": 17740 }, { "epoch": 116.77631578947368, "grad_norm": 1.8610203266143799, "learning_rate": 0.0001, "loss": 0.0285, "step": 17750 }, { "epoch": 116.84210526315789, "grad_norm": 2.1402037143707275, "learning_rate": 0.0001, "loss": 0.0222, "step": 17760 }, { "epoch": 116.90789473684211, "grad_norm": 2.1477067470550537, "learning_rate": 0.0001, "loss": 0.0309, "step": 17770 }, { "epoch": 116.97368421052632, "grad_norm": 1.438616156578064, "learning_rate": 0.0001, "loss": 0.0251, "step": 17780 }, { "epoch": 117.03947368421052, "grad_norm": 1.4384360313415527, "learning_rate": 0.0001, "loss": 0.0278, "step": 17790 }, { "epoch": 117.10526315789474, "grad_norm": 1.366669774055481, "learning_rate": 0.0001, "loss": 0.028, "step": 17800 }, { "epoch": 117.17105263157895, "grad_norm": 1.6095635890960693, "learning_rate": 0.0001, "loss": 0.023, "step": 17810 }, { "epoch": 117.23684210526316, "grad_norm": 1.5138568878173828, "learning_rate": 0.0001, "loss": 0.0253, "step": 17820 }, { "epoch": 117.30263157894737, "grad_norm": 1.647003173828125, "learning_rate": 0.0001, "loss": 0.0275, "step": 17830 }, { "epoch": 117.36842105263158, "grad_norm": 1.944541573524475, "learning_rate": 0.0001, "loss": 0.0239, "step": 17840 }, { "epoch": 117.4342105263158, "grad_norm": 1.8034436702728271, "learning_rate": 0.0001, "loss": 0.0226, "step": 17850 }, { "epoch": 117.5, "grad_norm": 2.1945533752441406, "learning_rate": 0.0001, "loss": 0.0274, "step": 17860 }, { "epoch": 117.5657894736842, "grad_norm": 2.0484542846679688, "learning_rate": 0.0001, "loss": 0.0273, "step": 17870 }, { "epoch": 117.63157894736842, "grad_norm": 1.4505342245101929, "learning_rate": 0.0001, "loss": 0.023, "step": 17880 }, { "epoch": 117.69736842105263, "grad_norm": 1.8884072303771973, "learning_rate": 0.0001, "loss": 0.0306, "step": 17890 }, { "epoch": 117.76315789473684, "grad_norm": 1.8351541757583618, "learning_rate": 0.0001, "loss": 0.0265, "step": 17900 }, { "epoch": 117.82894736842105, "grad_norm": 2.1997241973876953, "learning_rate": 0.0001, "loss": 0.0262, "step": 17910 }, { "epoch": 117.89473684210526, "grad_norm": 2.2633774280548096, "learning_rate": 0.0001, "loss": 0.0304, "step": 17920 }, { "epoch": 117.96052631578948, "grad_norm": 2.172111749649048, "learning_rate": 0.0001, "loss": 0.0299, "step": 17930 }, { "epoch": 118.02631578947368, "grad_norm": 2.19293475151062, "learning_rate": 0.0001, "loss": 0.0278, "step": 17940 }, { "epoch": 118.09210526315789, "grad_norm": 2.129624605178833, "learning_rate": 0.0001, "loss": 0.0251, "step": 17950 }, { "epoch": 118.15789473684211, "grad_norm": 1.4686297178268433, "learning_rate": 0.0001, "loss": 0.0304, "step": 17960 }, { "epoch": 118.22368421052632, "grad_norm": 1.611789345741272, "learning_rate": 0.0001, "loss": 0.028, "step": 17970 }, { "epoch": 118.28947368421052, "grad_norm": 2.2963104248046875, "learning_rate": 0.0001, "loss": 0.0279, "step": 17980 }, { "epoch": 118.35526315789474, "grad_norm": 2.1280159950256348, "learning_rate": 0.0001, "loss": 0.0251, "step": 17990 }, { "epoch": 118.42105263157895, "grad_norm": 1.6880080699920654, "learning_rate": 0.0001, "loss": 0.0248, "step": 18000 }, { "epoch": 118.48684210526316, "grad_norm": 1.9854836463928223, "learning_rate": 0.0001, "loss": 0.0254, "step": 18010 }, { "epoch": 118.55263157894737, "grad_norm": 1.3126332759857178, "learning_rate": 0.0001, "loss": 0.026, "step": 18020 }, { "epoch": 118.61842105263158, "grad_norm": 1.4336762428283691, "learning_rate": 0.0001, "loss": 0.0231, "step": 18030 }, { "epoch": 118.6842105263158, "grad_norm": 1.885015606880188, "learning_rate": 0.0001, "loss": 0.0247, "step": 18040 }, { "epoch": 118.75, "grad_norm": 1.8094593286514282, "learning_rate": 0.0001, "loss": 0.0284, "step": 18050 }, { "epoch": 118.8157894736842, "grad_norm": 1.7837412357330322, "learning_rate": 0.0001, "loss": 0.0297, "step": 18060 }, { "epoch": 118.88157894736842, "grad_norm": 2.0892906188964844, "learning_rate": 0.0001, "loss": 0.0278, "step": 18070 }, { "epoch": 118.94736842105263, "grad_norm": 1.8415613174438477, "learning_rate": 0.0001, "loss": 0.0252, "step": 18080 }, { "epoch": 119.01315789473684, "grad_norm": 1.9143998622894287, "learning_rate": 0.0001, "loss": 0.0234, "step": 18090 }, { "epoch": 119.07894736842105, "grad_norm": 2.2368993759155273, "learning_rate": 0.0001, "loss": 0.0253, "step": 18100 }, { "epoch": 119.14473684210526, "grad_norm": 2.216662883758545, "learning_rate": 0.0001, "loss": 0.03, "step": 18110 }, { "epoch": 119.21052631578948, "grad_norm": 2.2755494117736816, "learning_rate": 0.0001, "loss": 0.0287, "step": 18120 }, { "epoch": 119.27631578947368, "grad_norm": 1.9684113264083862, "learning_rate": 0.0001, "loss": 0.0284, "step": 18130 }, { "epoch": 119.34210526315789, "grad_norm": 1.9307280778884888, "learning_rate": 0.0001, "loss": 0.0287, "step": 18140 }, { "epoch": 119.40789473684211, "grad_norm": 1.8714255094528198, "learning_rate": 0.0001, "loss": 0.0265, "step": 18150 }, { "epoch": 119.47368421052632, "grad_norm": 1.8705039024353027, "learning_rate": 0.0001, "loss": 0.0235, "step": 18160 }, { "epoch": 119.53947368421052, "grad_norm": 1.8537838459014893, "learning_rate": 0.0001, "loss": 0.0237, "step": 18170 }, { "epoch": 119.60526315789474, "grad_norm": 1.6018664836883545, "learning_rate": 0.0001, "loss": 0.0253, "step": 18180 }, { "epoch": 119.67105263157895, "grad_norm": 1.8138760328292847, "learning_rate": 0.0001, "loss": 0.027, "step": 18190 }, { "epoch": 119.73684210526316, "grad_norm": 2.0714685916900635, "learning_rate": 0.0001, "loss": 0.0268, "step": 18200 }, { "epoch": 119.80263157894737, "grad_norm": 1.8330061435699463, "learning_rate": 0.0001, "loss": 0.0258, "step": 18210 }, { "epoch": 119.86842105263158, "grad_norm": 2.1680679321289062, "learning_rate": 0.0001, "loss": 0.0268, "step": 18220 }, { "epoch": 119.9342105263158, "grad_norm": 1.8612273931503296, "learning_rate": 0.0001, "loss": 0.0247, "step": 18230 }, { "epoch": 120.0, "grad_norm": 1.8502516746520996, "learning_rate": 0.0001, "loss": 0.0259, "step": 18240 }, { "epoch": 120.0657894736842, "grad_norm": 2.4574503898620605, "learning_rate": 0.0001, "loss": 0.025, "step": 18250 }, { "epoch": 120.13157894736842, "grad_norm": 1.8532601594924927, "learning_rate": 0.0001, "loss": 0.0246, "step": 18260 }, { "epoch": 120.19736842105263, "grad_norm": 1.643709421157837, "learning_rate": 0.0001, "loss": 0.0231, "step": 18270 }, { "epoch": 120.26315789473684, "grad_norm": 1.4813412427902222, "learning_rate": 0.0001, "loss": 0.0227, "step": 18280 }, { "epoch": 120.32894736842105, "grad_norm": 1.9131499528884888, "learning_rate": 0.0001, "loss": 0.0223, "step": 18290 }, { "epoch": 120.39473684210526, "grad_norm": 2.1297249794006348, "learning_rate": 0.0001, "loss": 0.0253, "step": 18300 }, { "epoch": 120.46052631578948, "grad_norm": 1.9142228364944458, "learning_rate": 0.0001, "loss": 0.0293, "step": 18310 }, { "epoch": 120.52631578947368, "grad_norm": 1.7929357290267944, "learning_rate": 0.0001, "loss": 0.025, "step": 18320 }, { "epoch": 120.59210526315789, "grad_norm": 1.6818759441375732, "learning_rate": 0.0001, "loss": 0.0258, "step": 18330 }, { "epoch": 120.65789473684211, "grad_norm": 1.6884393692016602, "learning_rate": 0.0001, "loss": 0.0264, "step": 18340 }, { "epoch": 120.72368421052632, "grad_norm": 1.7016091346740723, "learning_rate": 0.0001, "loss": 0.0241, "step": 18350 }, { "epoch": 120.78947368421052, "grad_norm": 1.7947200536727905, "learning_rate": 0.0001, "loss": 0.0262, "step": 18360 }, { "epoch": 120.85526315789474, "grad_norm": 1.7796252965927124, "learning_rate": 0.0001, "loss": 0.0284, "step": 18370 }, { "epoch": 120.92105263157895, "grad_norm": 2.1086223125457764, "learning_rate": 0.0001, "loss": 0.0254, "step": 18380 }, { "epoch": 120.98684210526316, "grad_norm": 2.283383369445801, "learning_rate": 0.0001, "loss": 0.0311, "step": 18390 }, { "epoch": 121.05263157894737, "grad_norm": 2.4059031009674072, "learning_rate": 0.0001, "loss": 0.0297, "step": 18400 }, { "epoch": 121.11842105263158, "grad_norm": 1.4060285091400146, "learning_rate": 0.0001, "loss": 0.0255, "step": 18410 }, { "epoch": 121.1842105263158, "grad_norm": 2.509514808654785, "learning_rate": 0.0001, "loss": 0.025, "step": 18420 }, { "epoch": 121.25, "grad_norm": 1.6113377809524536, "learning_rate": 0.0001, "loss": 0.0261, "step": 18430 }, { "epoch": 121.3157894736842, "grad_norm": 1.3063204288482666, "learning_rate": 0.0001, "loss": 0.0225, "step": 18440 }, { "epoch": 121.38157894736842, "grad_norm": 1.7738269567489624, "learning_rate": 0.0001, "loss": 0.0279, "step": 18450 }, { "epoch": 121.44736842105263, "grad_norm": 2.4521734714508057, "learning_rate": 0.0001, "loss": 0.0289, "step": 18460 }, { "epoch": 121.51315789473684, "grad_norm": 2.1434450149536133, "learning_rate": 0.0001, "loss": 0.0254, "step": 18470 }, { "epoch": 121.57894736842105, "grad_norm": 2.094493865966797, "learning_rate": 0.0001, "loss": 0.0271, "step": 18480 }, { "epoch": 121.64473684210526, "grad_norm": 1.8470053672790527, "learning_rate": 0.0001, "loss": 0.0249, "step": 18490 }, { "epoch": 121.71052631578948, "grad_norm": 2.112290859222412, "learning_rate": 0.0001, "loss": 0.0259, "step": 18500 }, { "epoch": 121.77631578947368, "grad_norm": 1.723717212677002, "learning_rate": 0.0001, "loss": 0.0225, "step": 18510 }, { "epoch": 121.84210526315789, "grad_norm": 1.5702247619628906, "learning_rate": 0.0001, "loss": 0.0236, "step": 18520 }, { "epoch": 121.90789473684211, "grad_norm": 1.857112169265747, "learning_rate": 0.0001, "loss": 0.025, "step": 18530 }, { "epoch": 121.97368421052632, "grad_norm": 2.0412120819091797, "learning_rate": 0.0001, "loss": 0.0259, "step": 18540 }, { "epoch": 122.03947368421052, "grad_norm": 1.7568377256393433, "learning_rate": 0.0001, "loss": 0.0273, "step": 18550 }, { "epoch": 122.10526315789474, "grad_norm": 1.964200735092163, "learning_rate": 0.0001, "loss": 0.0281, "step": 18560 }, { "epoch": 122.17105263157895, "grad_norm": 1.5965940952301025, "learning_rate": 0.0001, "loss": 0.0249, "step": 18570 }, { "epoch": 122.23684210526316, "grad_norm": 1.845375657081604, "learning_rate": 0.0001, "loss": 0.0239, "step": 18580 }, { "epoch": 122.30263157894737, "grad_norm": 1.8132684230804443, "learning_rate": 0.0001, "loss": 0.0257, "step": 18590 }, { "epoch": 122.36842105263158, "grad_norm": 1.9531135559082031, "learning_rate": 0.0001, "loss": 0.0276, "step": 18600 }, { "epoch": 122.4342105263158, "grad_norm": 1.5511837005615234, "learning_rate": 0.0001, "loss": 0.0257, "step": 18610 }, { "epoch": 122.5, "grad_norm": 1.698506474494934, "learning_rate": 0.0001, "loss": 0.0253, "step": 18620 }, { "epoch": 122.5657894736842, "grad_norm": 2.1158041954040527, "learning_rate": 0.0001, "loss": 0.0284, "step": 18630 }, { "epoch": 122.63157894736842, "grad_norm": 2.3637046813964844, "learning_rate": 0.0001, "loss": 0.022, "step": 18640 }, { "epoch": 122.69736842105263, "grad_norm": 1.8886162042617798, "learning_rate": 0.0001, "loss": 0.0224, "step": 18650 }, { "epoch": 122.76315789473684, "grad_norm": 1.538310170173645, "learning_rate": 0.0001, "loss": 0.0235, "step": 18660 }, { "epoch": 122.82894736842105, "grad_norm": 1.992874026298523, "learning_rate": 0.0001, "loss": 0.0238, "step": 18670 }, { "epoch": 122.89473684210526, "grad_norm": 1.4296581745147705, "learning_rate": 0.0001, "loss": 0.0234, "step": 18680 }, { "epoch": 122.96052631578948, "grad_norm": 1.9176102876663208, "learning_rate": 0.0001, "loss": 0.0265, "step": 18690 }, { "epoch": 123.02631578947368, "grad_norm": 1.5256253480911255, "learning_rate": 0.0001, "loss": 0.0255, "step": 18700 }, { "epoch": 123.09210526315789, "grad_norm": 2.0053677558898926, "learning_rate": 0.0001, "loss": 0.0279, "step": 18710 }, { "epoch": 123.15789473684211, "grad_norm": 2.5207440853118896, "learning_rate": 0.0001, "loss": 0.0257, "step": 18720 }, { "epoch": 123.22368421052632, "grad_norm": 1.983201265335083, "learning_rate": 0.0001, "loss": 0.0303, "step": 18730 }, { "epoch": 123.28947368421052, "grad_norm": 1.9480719566345215, "learning_rate": 0.0001, "loss": 0.0248, "step": 18740 }, { "epoch": 123.35526315789474, "grad_norm": 2.0850934982299805, "learning_rate": 0.0001, "loss": 0.0242, "step": 18750 }, { "epoch": 123.42105263157895, "grad_norm": 2.2098841667175293, "learning_rate": 0.0001, "loss": 0.0247, "step": 18760 }, { "epoch": 123.48684210526316, "grad_norm": 2.3098835945129395, "learning_rate": 0.0001, "loss": 0.0237, "step": 18770 }, { "epoch": 123.55263157894737, "grad_norm": 1.9796433448791504, "learning_rate": 0.0001, "loss": 0.0225, "step": 18780 }, { "epoch": 123.61842105263158, "grad_norm": 1.7718544006347656, "learning_rate": 0.0001, "loss": 0.0234, "step": 18790 }, { "epoch": 123.6842105263158, "grad_norm": 2.1403863430023193, "learning_rate": 0.0001, "loss": 0.024, "step": 18800 }, { "epoch": 123.75, "grad_norm": 1.859907627105713, "learning_rate": 0.0001, "loss": 0.0304, "step": 18810 }, { "epoch": 123.8157894736842, "grad_norm": 2.301901340484619, "learning_rate": 0.0001, "loss": 0.0274, "step": 18820 }, { "epoch": 123.88157894736842, "grad_norm": 1.924214243888855, "learning_rate": 0.0001, "loss": 0.0285, "step": 18830 }, { "epoch": 123.94736842105263, "grad_norm": 1.3768935203552246, "learning_rate": 0.0001, "loss": 0.0242, "step": 18840 }, { "epoch": 124.01315789473684, "grad_norm": 1.603979468345642, "learning_rate": 0.0001, "loss": 0.0248, "step": 18850 }, { "epoch": 124.07894736842105, "grad_norm": 1.9125226736068726, "learning_rate": 0.0001, "loss": 0.0269, "step": 18860 }, { "epoch": 124.14473684210526, "grad_norm": 1.8109312057495117, "learning_rate": 0.0001, "loss": 0.0264, "step": 18870 }, { "epoch": 124.21052631578948, "grad_norm": 1.2533246278762817, "learning_rate": 0.0001, "loss": 0.0246, "step": 18880 }, { "epoch": 124.27631578947368, "grad_norm": 1.8866891860961914, "learning_rate": 0.0001, "loss": 0.0269, "step": 18890 }, { "epoch": 124.34210526315789, "grad_norm": 2.1068756580352783, "learning_rate": 0.0001, "loss": 0.0247, "step": 18900 }, { "epoch": 124.40789473684211, "grad_norm": 1.5635088682174683, "learning_rate": 0.0001, "loss": 0.0255, "step": 18910 }, { "epoch": 124.47368421052632, "grad_norm": 1.8745625019073486, "learning_rate": 0.0001, "loss": 0.0247, "step": 18920 }, { "epoch": 124.53947368421052, "grad_norm": 2.228271007537842, "learning_rate": 0.0001, "loss": 0.0283, "step": 18930 }, { "epoch": 124.60526315789474, "grad_norm": 1.8584200143814087, "learning_rate": 0.0001, "loss": 0.0219, "step": 18940 }, { "epoch": 124.67105263157895, "grad_norm": 1.7092759609222412, "learning_rate": 0.0001, "loss": 0.0249, "step": 18950 }, { "epoch": 124.73684210526316, "grad_norm": 1.8545434474945068, "learning_rate": 0.0001, "loss": 0.0259, "step": 18960 }, { "epoch": 124.80263157894737, "grad_norm": 1.7169967889785767, "learning_rate": 0.0001, "loss": 0.0229, "step": 18970 }, { "epoch": 124.86842105263158, "grad_norm": 1.6332005262374878, "learning_rate": 0.0001, "loss": 0.0269, "step": 18980 }, { "epoch": 124.9342105263158, "grad_norm": 1.7428288459777832, "learning_rate": 0.0001, "loss": 0.0257, "step": 18990 }, { "epoch": 125.0, "grad_norm": 1.760222315788269, "learning_rate": 0.0001, "loss": 0.0268, "step": 19000 }, { "epoch": 125.0657894736842, "grad_norm": 1.930885910987854, "learning_rate": 0.0001, "loss": 0.0254, "step": 19010 }, { "epoch": 125.13157894736842, "grad_norm": 1.8744144439697266, "learning_rate": 0.0001, "loss": 0.0251, "step": 19020 }, { "epoch": 125.19736842105263, "grad_norm": 2.0317344665527344, "learning_rate": 0.0001, "loss": 0.0265, "step": 19030 }, { "epoch": 125.26315789473684, "grad_norm": 1.9729773998260498, "learning_rate": 0.0001, "loss": 0.0246, "step": 19040 }, { "epoch": 125.32894736842105, "grad_norm": 2.093649387359619, "learning_rate": 0.0001, "loss": 0.0293, "step": 19050 }, { "epoch": 125.39473684210526, "grad_norm": 1.8289169073104858, "learning_rate": 0.0001, "loss": 0.025, "step": 19060 }, { "epoch": 125.46052631578948, "grad_norm": 1.5744597911834717, "learning_rate": 0.0001, "loss": 0.0232, "step": 19070 }, { "epoch": 125.52631578947368, "grad_norm": 1.9290709495544434, "learning_rate": 0.0001, "loss": 0.0245, "step": 19080 }, { "epoch": 125.59210526315789, "grad_norm": 1.8091089725494385, "learning_rate": 0.0001, "loss": 0.0246, "step": 19090 }, { "epoch": 125.65789473684211, "grad_norm": 1.8553354740142822, "learning_rate": 0.0001, "loss": 0.025, "step": 19100 }, { "epoch": 125.72368421052632, "grad_norm": 1.6613904237747192, "learning_rate": 0.0001, "loss": 0.027, "step": 19110 }, { "epoch": 125.78947368421052, "grad_norm": 1.8727397918701172, "learning_rate": 0.0001, "loss": 0.027, "step": 19120 }, { "epoch": 125.85526315789474, "grad_norm": 1.7846415042877197, "learning_rate": 0.0001, "loss": 0.025, "step": 19130 }, { "epoch": 125.92105263157895, "grad_norm": 1.868438959121704, "learning_rate": 0.0001, "loss": 0.0262, "step": 19140 }, { "epoch": 125.98684210526316, "grad_norm": 1.984620213508606, "learning_rate": 0.0001, "loss": 0.0271, "step": 19150 }, { "epoch": 126.05263157894737, "grad_norm": 1.8104655742645264, "learning_rate": 0.0001, "loss": 0.0287, "step": 19160 }, { "epoch": 126.11842105263158, "grad_norm": 1.9041368961334229, "learning_rate": 0.0001, "loss": 0.0242, "step": 19170 }, { "epoch": 126.1842105263158, "grad_norm": 1.9640965461730957, "learning_rate": 0.0001, "loss": 0.0251, "step": 19180 }, { "epoch": 126.25, "grad_norm": 1.8535256385803223, "learning_rate": 0.0001, "loss": 0.0255, "step": 19190 }, { "epoch": 126.3157894736842, "grad_norm": 2.0887107849121094, "learning_rate": 0.0001, "loss": 0.0219, "step": 19200 }, { "epoch": 126.38157894736842, "grad_norm": 1.5138654708862305, "learning_rate": 0.0001, "loss": 0.024, "step": 19210 }, { "epoch": 126.44736842105263, "grad_norm": 1.6962283849716187, "learning_rate": 0.0001, "loss": 0.0226, "step": 19220 }, { "epoch": 126.51315789473684, "grad_norm": 1.5781933069229126, "learning_rate": 0.0001, "loss": 0.0274, "step": 19230 }, { "epoch": 126.57894736842105, "grad_norm": 1.6702773571014404, "learning_rate": 0.0001, "loss": 0.0267, "step": 19240 }, { "epoch": 126.64473684210526, "grad_norm": 1.8269321918487549, "learning_rate": 0.0001, "loss": 0.0247, "step": 19250 }, { "epoch": 126.71052631578948, "grad_norm": 1.6859822273254395, "learning_rate": 0.0001, "loss": 0.026, "step": 19260 }, { "epoch": 126.77631578947368, "grad_norm": 1.7919362783432007, "learning_rate": 0.0001, "loss": 0.0236, "step": 19270 }, { "epoch": 126.84210526315789, "grad_norm": 1.6908595561981201, "learning_rate": 0.0001, "loss": 0.0272, "step": 19280 }, { "epoch": 126.90789473684211, "grad_norm": 1.655458927154541, "learning_rate": 0.0001, "loss": 0.0227, "step": 19290 }, { "epoch": 126.97368421052632, "grad_norm": 1.6404393911361694, "learning_rate": 0.0001, "loss": 0.0261, "step": 19300 }, { "epoch": 127.03947368421052, "grad_norm": 1.5987060070037842, "learning_rate": 0.0001, "loss": 0.0268, "step": 19310 }, { "epoch": 127.10526315789474, "grad_norm": 1.6571155786514282, "learning_rate": 0.0001, "loss": 0.0272, "step": 19320 }, { "epoch": 127.17105263157895, "grad_norm": 2.077024221420288, "learning_rate": 0.0001, "loss": 0.0291, "step": 19330 }, { "epoch": 127.23684210526316, "grad_norm": 1.986234188079834, "learning_rate": 0.0001, "loss": 0.0223, "step": 19340 }, { "epoch": 127.30263157894737, "grad_norm": 2.0921826362609863, "learning_rate": 0.0001, "loss": 0.0231, "step": 19350 }, { "epoch": 127.36842105263158, "grad_norm": 2.38157320022583, "learning_rate": 0.0001, "loss": 0.0308, "step": 19360 }, { "epoch": 127.4342105263158, "grad_norm": 2.1261889934539795, "learning_rate": 0.0001, "loss": 0.0273, "step": 19370 }, { "epoch": 127.5, "grad_norm": 2.128756284713745, "learning_rate": 0.0001, "loss": 0.0267, "step": 19380 }, { "epoch": 127.5657894736842, "grad_norm": 1.9944250583648682, "learning_rate": 0.0001, "loss": 0.0238, "step": 19390 }, { "epoch": 127.63157894736842, "grad_norm": 1.7386157512664795, "learning_rate": 0.0001, "loss": 0.0239, "step": 19400 }, { "epoch": 127.69736842105263, "grad_norm": 2.1061716079711914, "learning_rate": 0.0001, "loss": 0.0231, "step": 19410 }, { "epoch": 127.76315789473684, "grad_norm": 1.981045126914978, "learning_rate": 0.0001, "loss": 0.0233, "step": 19420 }, { "epoch": 127.82894736842105, "grad_norm": 2.225492477416992, "learning_rate": 0.0001, "loss": 0.0252, "step": 19430 }, { "epoch": 127.89473684210526, "grad_norm": 1.9185640811920166, "learning_rate": 0.0001, "loss": 0.023, "step": 19440 }, { "epoch": 127.96052631578948, "grad_norm": 1.9105247259140015, "learning_rate": 0.0001, "loss": 0.0248, "step": 19450 }, { "epoch": 128.02631578947367, "grad_norm": 1.653397560119629, "learning_rate": 0.0001, "loss": 0.0267, "step": 19460 }, { "epoch": 128.0921052631579, "grad_norm": 1.8522017002105713, "learning_rate": 0.0001, "loss": 0.0237, "step": 19470 }, { "epoch": 128.1578947368421, "grad_norm": 1.8537046909332275, "learning_rate": 0.0001, "loss": 0.0255, "step": 19480 }, { "epoch": 128.22368421052633, "grad_norm": 1.8370147943496704, "learning_rate": 0.0001, "loss": 0.0306, "step": 19490 }, { "epoch": 128.28947368421052, "grad_norm": 2.03987979888916, "learning_rate": 0.0001, "loss": 0.0287, "step": 19500 }, { "epoch": 128.35526315789474, "grad_norm": 1.5290727615356445, "learning_rate": 0.0001, "loss": 0.0216, "step": 19510 }, { "epoch": 128.42105263157896, "grad_norm": 1.964829683303833, "learning_rate": 0.0001, "loss": 0.0232, "step": 19520 }, { "epoch": 128.48684210526315, "grad_norm": 2.359168529510498, "learning_rate": 0.0001, "loss": 0.0227, "step": 19530 }, { "epoch": 128.55263157894737, "grad_norm": 2.366980791091919, "learning_rate": 0.0001, "loss": 0.028, "step": 19540 }, { "epoch": 128.6184210526316, "grad_norm": 1.804248571395874, "learning_rate": 0.0001, "loss": 0.0229, "step": 19550 }, { "epoch": 128.68421052631578, "grad_norm": 2.105407476425171, "learning_rate": 0.0001, "loss": 0.027, "step": 19560 }, { "epoch": 128.75, "grad_norm": 1.9095569849014282, "learning_rate": 0.0001, "loss": 0.0241, "step": 19570 }, { "epoch": 128.81578947368422, "grad_norm": 2.2041072845458984, "learning_rate": 0.0001, "loss": 0.0201, "step": 19580 }, { "epoch": 128.8815789473684, "grad_norm": 1.346085786819458, "learning_rate": 0.0001, "loss": 0.0219, "step": 19590 }, { "epoch": 128.94736842105263, "grad_norm": 1.300341248512268, "learning_rate": 0.0001, "loss": 0.0235, "step": 19600 }, { "epoch": 129.01315789473685, "grad_norm": 1.8605320453643799, "learning_rate": 0.0001, "loss": 0.023, "step": 19610 }, { "epoch": 129.07894736842104, "grad_norm": 2.3670060634613037, "learning_rate": 0.0001, "loss": 0.0253, "step": 19620 }, { "epoch": 129.14473684210526, "grad_norm": 1.9396594762802124, "learning_rate": 0.0001, "loss": 0.0255, "step": 19630 }, { "epoch": 129.21052631578948, "grad_norm": 1.8751026391983032, "learning_rate": 0.0001, "loss": 0.023, "step": 19640 }, { "epoch": 129.27631578947367, "grad_norm": 1.4153773784637451, "learning_rate": 0.0001, "loss": 0.0256, "step": 19650 }, { "epoch": 129.3421052631579, "grad_norm": 1.7430685758590698, "learning_rate": 0.0001, "loss": 0.025, "step": 19660 }, { "epoch": 129.4078947368421, "grad_norm": 1.9713855981826782, "learning_rate": 0.0001, "loss": 0.0267, "step": 19670 }, { "epoch": 129.47368421052633, "grad_norm": 1.8829004764556885, "learning_rate": 0.0001, "loss": 0.0258, "step": 19680 }, { "epoch": 129.53947368421052, "grad_norm": 1.65418541431427, "learning_rate": 0.0001, "loss": 0.0261, "step": 19690 }, { "epoch": 129.60526315789474, "grad_norm": 1.6000053882598877, "learning_rate": 0.0001, "loss": 0.026, "step": 19700 }, { "epoch": 129.67105263157896, "grad_norm": 1.5823752880096436, "learning_rate": 0.0001, "loss": 0.0253, "step": 19710 }, { "epoch": 129.73684210526315, "grad_norm": 1.6089004278182983, "learning_rate": 0.0001, "loss": 0.0244, "step": 19720 }, { "epoch": 129.80263157894737, "grad_norm": 2.0166778564453125, "learning_rate": 0.0001, "loss": 0.0261, "step": 19730 }, { "epoch": 129.8684210526316, "grad_norm": 1.7504478693008423, "learning_rate": 0.0001, "loss": 0.0266, "step": 19740 }, { "epoch": 129.93421052631578, "grad_norm": 2.014627456665039, "learning_rate": 0.0001, "loss": 0.0258, "step": 19750 }, { "epoch": 130.0, "grad_norm": 1.8641383647918701, "learning_rate": 0.0001, "loss": 0.0245, "step": 19760 }, { "epoch": 130.06578947368422, "grad_norm": 1.87761390209198, "learning_rate": 0.0001, "loss": 0.0265, "step": 19770 }, { "epoch": 130.1315789473684, "grad_norm": 1.6318854093551636, "learning_rate": 0.0001, "loss": 0.0228, "step": 19780 }, { "epoch": 130.19736842105263, "grad_norm": 1.7881773710250854, "learning_rate": 0.0001, "loss": 0.0223, "step": 19790 }, { "epoch": 130.26315789473685, "grad_norm": 1.9617841243743896, "learning_rate": 0.0001, "loss": 0.025, "step": 19800 }, { "epoch": 130.32894736842104, "grad_norm": 1.9510157108306885, "learning_rate": 0.0001, "loss": 0.0225, "step": 19810 }, { "epoch": 130.39473684210526, "grad_norm": 1.942069411277771, "learning_rate": 0.0001, "loss": 0.0246, "step": 19820 }, { "epoch": 130.46052631578948, "grad_norm": 2.51275634765625, "learning_rate": 0.0001, "loss": 0.0285, "step": 19830 }, { "epoch": 130.52631578947367, "grad_norm": 1.7510710954666138, "learning_rate": 0.0001, "loss": 0.0225, "step": 19840 }, { "epoch": 130.5921052631579, "grad_norm": 1.7511276006698608, "learning_rate": 0.0001, "loss": 0.0292, "step": 19850 }, { "epoch": 130.6578947368421, "grad_norm": 2.1589889526367188, "learning_rate": 0.0001, "loss": 0.0246, "step": 19860 }, { "epoch": 130.72368421052633, "grad_norm": 2.308643102645874, "learning_rate": 0.0001, "loss": 0.0266, "step": 19870 }, { "epoch": 130.78947368421052, "grad_norm": 2.2457451820373535, "learning_rate": 0.0001, "loss": 0.0237, "step": 19880 }, { "epoch": 130.85526315789474, "grad_norm": 1.3958607912063599, "learning_rate": 0.0001, "loss": 0.0225, "step": 19890 }, { "epoch": 130.92105263157896, "grad_norm": 2.2176380157470703, "learning_rate": 0.0001, "loss": 0.0231, "step": 19900 }, { "epoch": 130.98684210526315, "grad_norm": 2.0587217807769775, "learning_rate": 0.0001, "loss": 0.025, "step": 19910 }, { "epoch": 131.05263157894737, "grad_norm": 1.9006084203720093, "learning_rate": 0.0001, "loss": 0.0329, "step": 19920 }, { "epoch": 131.1184210526316, "grad_norm": 1.5869535207748413, "learning_rate": 0.0001, "loss": 0.0224, "step": 19930 }, { "epoch": 131.18421052631578, "grad_norm": 1.9711054563522339, "learning_rate": 0.0001, "loss": 0.0258, "step": 19940 }, { "epoch": 131.25, "grad_norm": 2.0233941078186035, "learning_rate": 0.0001, "loss": 0.0239, "step": 19950 }, { "epoch": 131.31578947368422, "grad_norm": 1.5863701105117798, "learning_rate": 0.0001, "loss": 0.0241, "step": 19960 }, { "epoch": 131.3815789473684, "grad_norm": 2.083430528640747, "learning_rate": 0.0001, "loss": 0.0227, "step": 19970 }, { "epoch": 131.44736842105263, "grad_norm": 1.8329519033432007, "learning_rate": 0.0001, "loss": 0.0295, "step": 19980 }, { "epoch": 131.51315789473685, "grad_norm": 1.7035294771194458, "learning_rate": 0.0001, "loss": 0.0219, "step": 19990 }, { "epoch": 131.57894736842104, "grad_norm": 1.4417226314544678, "learning_rate": 0.0001, "loss": 0.0236, "step": 20000 }, { "epoch": 131.64473684210526, "grad_norm": 2.1020822525024414, "learning_rate": 0.0001, "loss": 0.0221, "step": 20010 }, { "epoch": 131.71052631578948, "grad_norm": 2.0516645908355713, "learning_rate": 0.0001, "loss": 0.0228, "step": 20020 }, { "epoch": 131.77631578947367, "grad_norm": 1.9011911153793335, "learning_rate": 0.0001, "loss": 0.0283, "step": 20030 }, { "epoch": 131.8421052631579, "grad_norm": 1.9834504127502441, "learning_rate": 0.0001, "loss": 0.0216, "step": 20040 }, { "epoch": 131.9078947368421, "grad_norm": 2.10687518119812, "learning_rate": 0.0001, "loss": 0.0225, "step": 20050 }, { "epoch": 131.97368421052633, "grad_norm": 1.8846466541290283, "learning_rate": 0.0001, "loss": 0.0258, "step": 20060 }, { "epoch": 132.03947368421052, "grad_norm": 1.848825216293335, "learning_rate": 0.0001, "loss": 0.0259, "step": 20070 }, { "epoch": 132.10526315789474, "grad_norm": 1.45933198928833, "learning_rate": 0.0001, "loss": 0.0262, "step": 20080 }, { "epoch": 132.17105263157896, "grad_norm": 1.3605796098709106, "learning_rate": 0.0001, "loss": 0.0218, "step": 20090 }, { "epoch": 132.23684210526315, "grad_norm": 1.9131947755813599, "learning_rate": 0.0001, "loss": 0.0236, "step": 20100 }, { "epoch": 132.30263157894737, "grad_norm": 1.6620484590530396, "learning_rate": 0.0001, "loss": 0.0235, "step": 20110 }, { "epoch": 132.3684210526316, "grad_norm": 1.9352024793624878, "learning_rate": 0.0001, "loss": 0.0224, "step": 20120 }, { "epoch": 132.43421052631578, "grad_norm": 1.7947289943695068, "learning_rate": 0.0001, "loss": 0.0215, "step": 20130 }, { "epoch": 132.5, "grad_norm": 2.069443941116333, "learning_rate": 0.0001, "loss": 0.0265, "step": 20140 }, { "epoch": 132.56578947368422, "grad_norm": 2.1777825355529785, "learning_rate": 0.0001, "loss": 0.0267, "step": 20150 }, { "epoch": 132.6315789473684, "grad_norm": 2.1591851711273193, "learning_rate": 0.0001, "loss": 0.0257, "step": 20160 }, { "epoch": 132.69736842105263, "grad_norm": 1.553578495979309, "learning_rate": 0.0001, "loss": 0.0245, "step": 20170 }, { "epoch": 132.76315789473685, "grad_norm": 1.4275785684585571, "learning_rate": 0.0001, "loss": 0.0286, "step": 20180 }, { "epoch": 132.82894736842104, "grad_norm": 1.9212124347686768, "learning_rate": 0.0001, "loss": 0.0227, "step": 20190 }, { "epoch": 132.89473684210526, "grad_norm": 1.440201759338379, "learning_rate": 0.0001, "loss": 0.0248, "step": 20200 }, { "epoch": 132.96052631578948, "grad_norm": 1.553096890449524, "learning_rate": 0.0001, "loss": 0.0235, "step": 20210 }, { "epoch": 133.02631578947367, "grad_norm": 1.9589226245880127, "learning_rate": 0.0001, "loss": 0.0285, "step": 20220 }, { "epoch": 133.0921052631579, "grad_norm": 1.6086151599884033, "learning_rate": 0.0001, "loss": 0.0224, "step": 20230 }, { "epoch": 133.1578947368421, "grad_norm": 2.1637918949127197, "learning_rate": 0.0001, "loss": 0.024, "step": 20240 }, { "epoch": 133.22368421052633, "grad_norm": 1.9245336055755615, "learning_rate": 0.0001, "loss": 0.0253, "step": 20250 }, { "epoch": 133.28947368421052, "grad_norm": 1.719673752784729, "learning_rate": 0.0001, "loss": 0.0262, "step": 20260 }, { "epoch": 133.35526315789474, "grad_norm": 1.849079966545105, "learning_rate": 0.0001, "loss": 0.0232, "step": 20270 }, { "epoch": 133.42105263157896, "grad_norm": 1.8020918369293213, "learning_rate": 0.0001, "loss": 0.0265, "step": 20280 }, { "epoch": 133.48684210526315, "grad_norm": 2.045667886734009, "learning_rate": 0.0001, "loss": 0.0256, "step": 20290 }, { "epoch": 133.55263157894737, "grad_norm": 2.2873334884643555, "learning_rate": 0.0001, "loss": 0.0274, "step": 20300 }, { "epoch": 133.6184210526316, "grad_norm": 1.7476017475128174, "learning_rate": 0.0001, "loss": 0.0263, "step": 20310 }, { "epoch": 133.68421052631578, "grad_norm": 1.8047903776168823, "learning_rate": 0.0001, "loss": 0.0242, "step": 20320 }, { "epoch": 133.75, "grad_norm": 1.9686743021011353, "learning_rate": 0.0001, "loss": 0.0223, "step": 20330 }, { "epoch": 133.81578947368422, "grad_norm": 1.8366899490356445, "learning_rate": 0.0001, "loss": 0.0261, "step": 20340 }, { "epoch": 133.8815789473684, "grad_norm": 1.8112549781799316, "learning_rate": 0.0001, "loss": 0.0249, "step": 20350 }, { "epoch": 133.94736842105263, "grad_norm": 1.707838773727417, "learning_rate": 0.0001, "loss": 0.0288, "step": 20360 }, { "epoch": 134.01315789473685, "grad_norm": 1.6416288614273071, "learning_rate": 0.0001, "loss": 0.0224, "step": 20370 }, { "epoch": 134.07894736842104, "grad_norm": 1.450613260269165, "learning_rate": 0.0001, "loss": 0.0265, "step": 20380 }, { "epoch": 134.14473684210526, "grad_norm": 1.7487660646438599, "learning_rate": 0.0001, "loss": 0.0251, "step": 20390 }, { "epoch": 134.21052631578948, "grad_norm": 1.7303248643875122, "learning_rate": 0.0001, "loss": 0.0291, "step": 20400 }, { "epoch": 134.27631578947367, "grad_norm": 1.8292486667633057, "learning_rate": 0.0001, "loss": 0.0266, "step": 20410 }, { "epoch": 134.3421052631579, "grad_norm": 1.4182590246200562, "learning_rate": 0.0001, "loss": 0.0226, "step": 20420 }, { "epoch": 134.4078947368421, "grad_norm": 1.5015760660171509, "learning_rate": 0.0001, "loss": 0.0277, "step": 20430 }, { "epoch": 134.47368421052633, "grad_norm": 1.6061108112335205, "learning_rate": 0.0001, "loss": 0.0264, "step": 20440 }, { "epoch": 134.53947368421052, "grad_norm": 1.3194855451583862, "learning_rate": 0.0001, "loss": 0.0223, "step": 20450 }, { "epoch": 134.60526315789474, "grad_norm": 1.5655157566070557, "learning_rate": 0.0001, "loss": 0.0257, "step": 20460 }, { "epoch": 134.67105263157896, "grad_norm": 1.4508320093154907, "learning_rate": 0.0001, "loss": 0.0253, "step": 20470 }, { "epoch": 134.73684210526315, "grad_norm": 1.8024054765701294, "learning_rate": 0.0001, "loss": 0.0262, "step": 20480 }, { "epoch": 134.80263157894737, "grad_norm": 1.9928098917007446, "learning_rate": 0.0001, "loss": 0.0223, "step": 20490 }, { "epoch": 134.8684210526316, "grad_norm": 1.9649032354354858, "learning_rate": 0.0001, "loss": 0.0244, "step": 20500 }, { "epoch": 134.93421052631578, "grad_norm": 1.7947524785995483, "learning_rate": 0.0001, "loss": 0.0219, "step": 20510 }, { "epoch": 135.0, "grad_norm": 2.2396163940429688, "learning_rate": 0.0001, "loss": 0.0246, "step": 20520 }, { "epoch": 135.06578947368422, "grad_norm": 1.8939369916915894, "learning_rate": 0.0001, "loss": 0.0252, "step": 20530 }, { "epoch": 135.1315789473684, "grad_norm": 2.0510876178741455, "learning_rate": 0.0001, "loss": 0.0224, "step": 20540 }, { "epoch": 135.19736842105263, "grad_norm": 1.9284095764160156, "learning_rate": 0.0001, "loss": 0.0235, "step": 20550 }, { "epoch": 135.26315789473685, "grad_norm": 1.5331149101257324, "learning_rate": 0.0001, "loss": 0.0265, "step": 20560 }, { "epoch": 135.32894736842104, "grad_norm": 2.062297821044922, "learning_rate": 0.0001, "loss": 0.027, "step": 20570 }, { "epoch": 135.39473684210526, "grad_norm": 2.359318733215332, "learning_rate": 0.0001, "loss": 0.0293, "step": 20580 }, { "epoch": 135.46052631578948, "grad_norm": 2.1272988319396973, "learning_rate": 0.0001, "loss": 0.0247, "step": 20590 }, { "epoch": 135.52631578947367, "grad_norm": 1.7684650421142578, "learning_rate": 0.0001, "loss": 0.0243, "step": 20600 }, { "epoch": 135.5921052631579, "grad_norm": 1.9371172189712524, "learning_rate": 0.0001, "loss": 0.0235, "step": 20610 }, { "epoch": 135.6578947368421, "grad_norm": 1.8104451894760132, "learning_rate": 0.0001, "loss": 0.0285, "step": 20620 }, { "epoch": 135.72368421052633, "grad_norm": 1.2122979164123535, "learning_rate": 0.0001, "loss": 0.021, "step": 20630 }, { "epoch": 135.78947368421052, "grad_norm": 2.1881160736083984, "learning_rate": 0.0001, "loss": 0.021, "step": 20640 }, { "epoch": 135.85526315789474, "grad_norm": 1.6207855939865112, "learning_rate": 0.0001, "loss": 0.0215, "step": 20650 }, { "epoch": 135.92105263157896, "grad_norm": 1.9453741312026978, "learning_rate": 0.0001, "loss": 0.0232, "step": 20660 }, { "epoch": 135.98684210526315, "grad_norm": 2.2391207218170166, "learning_rate": 0.0001, "loss": 0.025, "step": 20670 }, { "epoch": 136.05263157894737, "grad_norm": 1.6344738006591797, "learning_rate": 0.0001, "loss": 0.0213, "step": 20680 }, { "epoch": 136.1184210526316, "grad_norm": 1.6547691822052002, "learning_rate": 0.0001, "loss": 0.023, "step": 20690 }, { "epoch": 136.18421052631578, "grad_norm": 1.5499417781829834, "learning_rate": 0.0001, "loss": 0.026, "step": 20700 }, { "epoch": 136.25, "grad_norm": 2.2277190685272217, "learning_rate": 0.0001, "loss": 0.0239, "step": 20710 }, { "epoch": 136.31578947368422, "grad_norm": 1.8326318264007568, "learning_rate": 0.0001, "loss": 0.0226, "step": 20720 }, { "epoch": 136.3815789473684, "grad_norm": 1.7075777053833008, "learning_rate": 0.0001, "loss": 0.0253, "step": 20730 }, { "epoch": 136.44736842105263, "grad_norm": 1.9605571031570435, "learning_rate": 0.0001, "loss": 0.0232, "step": 20740 }, { "epoch": 136.51315789473685, "grad_norm": 1.6516122817993164, "learning_rate": 0.0001, "loss": 0.0228, "step": 20750 }, { "epoch": 136.57894736842104, "grad_norm": 2.261673927307129, "learning_rate": 0.0001, "loss": 0.0295, "step": 20760 }, { "epoch": 136.64473684210526, "grad_norm": 1.7209066152572632, "learning_rate": 0.0001, "loss": 0.0209, "step": 20770 }, { "epoch": 136.71052631578948, "grad_norm": 1.7834664583206177, "learning_rate": 0.0001, "loss": 0.0251, "step": 20780 }, { "epoch": 136.77631578947367, "grad_norm": 1.8337637186050415, "learning_rate": 0.0001, "loss": 0.0248, "step": 20790 }, { "epoch": 136.8421052631579, "grad_norm": 1.3802740573883057, "learning_rate": 0.0001, "loss": 0.0243, "step": 20800 }, { "epoch": 136.9078947368421, "grad_norm": 1.8581647872924805, "learning_rate": 0.0001, "loss": 0.0205, "step": 20810 }, { "epoch": 136.97368421052633, "grad_norm": 1.847280740737915, "learning_rate": 0.0001, "loss": 0.0298, "step": 20820 }, { "epoch": 137.03947368421052, "grad_norm": 1.9097923040390015, "learning_rate": 0.0001, "loss": 0.0219, "step": 20830 }, { "epoch": 137.10526315789474, "grad_norm": 1.755126714706421, "learning_rate": 0.0001, "loss": 0.0238, "step": 20840 }, { "epoch": 137.17105263157896, "grad_norm": 1.7855124473571777, "learning_rate": 0.0001, "loss": 0.0246, "step": 20850 }, { "epoch": 137.23684210526315, "grad_norm": 1.395665168762207, "learning_rate": 0.0001, "loss": 0.0259, "step": 20860 }, { "epoch": 137.30263157894737, "grad_norm": 1.4009815454483032, "learning_rate": 0.0001, "loss": 0.026, "step": 20870 }, { "epoch": 137.3684210526316, "grad_norm": 1.9336813688278198, "learning_rate": 0.0001, "loss": 0.0238, "step": 20880 }, { "epoch": 137.43421052631578, "grad_norm": 1.8234789371490479, "learning_rate": 0.0001, "loss": 0.0245, "step": 20890 }, { "epoch": 137.5, "grad_norm": 1.6587345600128174, "learning_rate": 0.0001, "loss": 0.0229, "step": 20900 }, { "epoch": 137.56578947368422, "grad_norm": 2.088578462600708, "learning_rate": 0.0001, "loss": 0.024, "step": 20910 }, { "epoch": 137.6315789473684, "grad_norm": 1.9078112840652466, "learning_rate": 0.0001, "loss": 0.0233, "step": 20920 }, { "epoch": 137.69736842105263, "grad_norm": 1.3603087663650513, "learning_rate": 0.0001, "loss": 0.0267, "step": 20930 }, { "epoch": 137.76315789473685, "grad_norm": 2.5016865730285645, "learning_rate": 0.0001, "loss": 0.0218, "step": 20940 }, { "epoch": 137.82894736842104, "grad_norm": 1.9751944541931152, "learning_rate": 0.0001, "loss": 0.0238, "step": 20950 }, { "epoch": 137.89473684210526, "grad_norm": 1.7295329570770264, "learning_rate": 0.0001, "loss": 0.0207, "step": 20960 }, { "epoch": 137.96052631578948, "grad_norm": 2.091442823410034, "learning_rate": 0.0001, "loss": 0.0281, "step": 20970 }, { "epoch": 138.02631578947367, "grad_norm": 1.6409990787506104, "learning_rate": 0.0001, "loss": 0.0263, "step": 20980 }, { "epoch": 138.0921052631579, "grad_norm": 1.9481006860733032, "learning_rate": 0.0001, "loss": 0.0238, "step": 20990 }, { "epoch": 138.1578947368421, "grad_norm": 1.5705374479293823, "learning_rate": 0.0001, "loss": 0.0221, "step": 21000 }, { "epoch": 138.22368421052633, "grad_norm": 1.6654582023620605, "learning_rate": 0.0001, "loss": 0.0273, "step": 21010 }, { "epoch": 138.28947368421052, "grad_norm": 1.9335616827011108, "learning_rate": 0.0001, "loss": 0.0225, "step": 21020 }, { "epoch": 138.35526315789474, "grad_norm": 1.702642798423767, "learning_rate": 0.0001, "loss": 0.024, "step": 21030 }, { "epoch": 138.42105263157896, "grad_norm": 1.9914929866790771, "learning_rate": 0.0001, "loss": 0.0255, "step": 21040 }, { "epoch": 138.48684210526315, "grad_norm": 1.6007366180419922, "learning_rate": 0.0001, "loss": 0.0214, "step": 21050 }, { "epoch": 138.55263157894737, "grad_norm": 2.0565643310546875, "learning_rate": 0.0001, "loss": 0.0223, "step": 21060 }, { "epoch": 138.6184210526316, "grad_norm": 2.0268542766571045, "learning_rate": 0.0001, "loss": 0.0239, "step": 21070 }, { "epoch": 138.68421052631578, "grad_norm": 1.9232654571533203, "learning_rate": 0.0001, "loss": 0.0228, "step": 21080 }, { "epoch": 138.75, "grad_norm": 1.8031882047653198, "learning_rate": 0.0001, "loss": 0.0242, "step": 21090 }, { "epoch": 138.81578947368422, "grad_norm": 1.726823329925537, "learning_rate": 0.0001, "loss": 0.0201, "step": 21100 }, { "epoch": 138.8815789473684, "grad_norm": 1.7830045223236084, "learning_rate": 0.0001, "loss": 0.0258, "step": 21110 }, { "epoch": 138.94736842105263, "grad_norm": 1.4756869077682495, "learning_rate": 0.0001, "loss": 0.0251, "step": 21120 }, { "epoch": 139.01315789473685, "grad_norm": 1.7401427030563354, "learning_rate": 0.0001, "loss": 0.0245, "step": 21130 }, { "epoch": 139.07894736842104, "grad_norm": 2.1344950199127197, "learning_rate": 0.0001, "loss": 0.0271, "step": 21140 }, { "epoch": 139.14473684210526, "grad_norm": 2.3567869663238525, "learning_rate": 0.0001, "loss": 0.0247, "step": 21150 }, { "epoch": 139.21052631578948, "grad_norm": 2.3153390884399414, "learning_rate": 0.0001, "loss": 0.0208, "step": 21160 }, { "epoch": 139.27631578947367, "grad_norm": 1.3460135459899902, "learning_rate": 0.0001, "loss": 0.0228, "step": 21170 }, { "epoch": 139.3421052631579, "grad_norm": 1.6409149169921875, "learning_rate": 0.0001, "loss": 0.0247, "step": 21180 }, { "epoch": 139.4078947368421, "grad_norm": 2.447929620742798, "learning_rate": 0.0001, "loss": 0.0218, "step": 21190 }, { "epoch": 139.47368421052633, "grad_norm": 1.7827374935150146, "learning_rate": 0.0001, "loss": 0.0237, "step": 21200 }, { "epoch": 139.53947368421052, "grad_norm": 1.4780184030532837, "learning_rate": 0.0001, "loss": 0.0265, "step": 21210 }, { "epoch": 139.60526315789474, "grad_norm": 1.6772148609161377, "learning_rate": 0.0001, "loss": 0.0296, "step": 21220 }, { "epoch": 139.67105263157896, "grad_norm": 1.4648807048797607, "learning_rate": 0.0001, "loss": 0.0219, "step": 21230 }, { "epoch": 139.73684210526315, "grad_norm": 1.1315582990646362, "learning_rate": 0.0001, "loss": 0.021, "step": 21240 }, { "epoch": 139.80263157894737, "grad_norm": 2.370311975479126, "learning_rate": 0.0001, "loss": 0.0234, "step": 21250 }, { "epoch": 139.8684210526316, "grad_norm": 1.4790973663330078, "learning_rate": 0.0001, "loss": 0.0221, "step": 21260 }, { "epoch": 139.93421052631578, "grad_norm": 1.9545704126358032, "learning_rate": 0.0001, "loss": 0.0227, "step": 21270 }, { "epoch": 140.0, "grad_norm": 1.665725588798523, "learning_rate": 0.0001, "loss": 0.028, "step": 21280 }, { "epoch": 140.06578947368422, "grad_norm": 1.5102581977844238, "learning_rate": 0.0001, "loss": 0.0229, "step": 21290 }, { "epoch": 140.1315789473684, "grad_norm": 2.0479938983917236, "learning_rate": 0.0001, "loss": 0.023, "step": 21300 }, { "epoch": 140.19736842105263, "grad_norm": 1.5911844968795776, "learning_rate": 0.0001, "loss": 0.0227, "step": 21310 }, { "epoch": 140.26315789473685, "grad_norm": 1.7683504819869995, "learning_rate": 0.0001, "loss": 0.0249, "step": 21320 }, { "epoch": 140.32894736842104, "grad_norm": 1.7384178638458252, "learning_rate": 0.0001, "loss": 0.027, "step": 21330 }, { "epoch": 140.39473684210526, "grad_norm": 1.3435590267181396, "learning_rate": 0.0001, "loss": 0.023, "step": 21340 }, { "epoch": 140.46052631578948, "grad_norm": 1.8047139644622803, "learning_rate": 0.0001, "loss": 0.031, "step": 21350 }, { "epoch": 140.52631578947367, "grad_norm": 1.5749142169952393, "learning_rate": 0.0001, "loss": 0.0221, "step": 21360 }, { "epoch": 140.5921052631579, "grad_norm": 1.9031199216842651, "learning_rate": 0.0001, "loss": 0.0237, "step": 21370 }, { "epoch": 140.6578947368421, "grad_norm": 2.0255463123321533, "learning_rate": 0.0001, "loss": 0.0254, "step": 21380 }, { "epoch": 140.72368421052633, "grad_norm": 1.8793830871582031, "learning_rate": 0.0001, "loss": 0.0198, "step": 21390 }, { "epoch": 140.78947368421052, "grad_norm": 2.307014226913452, "learning_rate": 0.0001, "loss": 0.0206, "step": 21400 }, { "epoch": 140.85526315789474, "grad_norm": 2.0153861045837402, "learning_rate": 0.0001, "loss": 0.0224, "step": 21410 }, { "epoch": 140.92105263157896, "grad_norm": 2.0237343311309814, "learning_rate": 0.0001, "loss": 0.0241, "step": 21420 }, { "epoch": 140.98684210526315, "grad_norm": 1.7851033210754395, "learning_rate": 0.0001, "loss": 0.0221, "step": 21430 }, { "epoch": 141.05263157894737, "grad_norm": 2.174520254135132, "learning_rate": 0.0001, "loss": 0.0246, "step": 21440 }, { "epoch": 141.1184210526316, "grad_norm": 1.7811450958251953, "learning_rate": 0.0001, "loss": 0.023, "step": 21450 }, { "epoch": 141.18421052631578, "grad_norm": 1.833375096321106, "learning_rate": 0.0001, "loss": 0.0225, "step": 21460 }, { "epoch": 141.25, "grad_norm": 2.1252572536468506, "learning_rate": 0.0001, "loss": 0.0289, "step": 21470 }, { "epoch": 141.31578947368422, "grad_norm": 1.8513518571853638, "learning_rate": 0.0001, "loss": 0.0238, "step": 21480 }, { "epoch": 141.3815789473684, "grad_norm": 1.3134546279907227, "learning_rate": 0.0001, "loss": 0.0241, "step": 21490 }, { "epoch": 141.44736842105263, "grad_norm": 1.5518691539764404, "learning_rate": 0.0001, "loss": 0.0246, "step": 21500 }, { "epoch": 141.51315789473685, "grad_norm": 1.4816968441009521, "learning_rate": 0.0001, "loss": 0.025, "step": 21510 }, { "epoch": 141.57894736842104, "grad_norm": 1.833187222480774, "learning_rate": 0.0001, "loss": 0.0235, "step": 21520 }, { "epoch": 141.64473684210526, "grad_norm": 1.5506269931793213, "learning_rate": 0.0001, "loss": 0.0232, "step": 21530 }, { "epoch": 141.71052631578948, "grad_norm": 1.8367644548416138, "learning_rate": 0.0001, "loss": 0.0284, "step": 21540 }, { "epoch": 141.77631578947367, "grad_norm": 1.7513940334320068, "learning_rate": 0.0001, "loss": 0.0245, "step": 21550 }, { "epoch": 141.8421052631579, "grad_norm": 1.6217204332351685, "learning_rate": 0.0001, "loss": 0.0244, "step": 21560 }, { "epoch": 141.9078947368421, "grad_norm": 2.042733669281006, "learning_rate": 0.0001, "loss": 0.0229, "step": 21570 }, { "epoch": 141.97368421052633, "grad_norm": 1.7559936046600342, "learning_rate": 0.0001, "loss": 0.0231, "step": 21580 }, { "epoch": 142.03947368421052, "grad_norm": 1.8520795106887817, "learning_rate": 0.0001, "loss": 0.0252, "step": 21590 }, { "epoch": 142.10526315789474, "grad_norm": 2.0685160160064697, "learning_rate": 0.0001, "loss": 0.0201, "step": 21600 }, { "epoch": 142.17105263157896, "grad_norm": 2.297084331512451, "learning_rate": 0.0001, "loss": 0.0275, "step": 21610 }, { "epoch": 142.23684210526315, "grad_norm": 1.3651399612426758, "learning_rate": 0.0001, "loss": 0.0205, "step": 21620 }, { "epoch": 142.30263157894737, "grad_norm": 1.5720806121826172, "learning_rate": 0.0001, "loss": 0.0264, "step": 21630 }, { "epoch": 142.3684210526316, "grad_norm": 1.996098518371582, "learning_rate": 0.0001, "loss": 0.0278, "step": 21640 }, { "epoch": 142.43421052631578, "grad_norm": 1.8295928239822388, "learning_rate": 0.0001, "loss": 0.0224, "step": 21650 }, { "epoch": 142.5, "grad_norm": 2.2300074100494385, "learning_rate": 0.0001, "loss": 0.0215, "step": 21660 }, { "epoch": 142.56578947368422, "grad_norm": 1.8775023221969604, "learning_rate": 0.0001, "loss": 0.0225, "step": 21670 }, { "epoch": 142.6315789473684, "grad_norm": 1.930974006652832, "learning_rate": 0.0001, "loss": 0.0221, "step": 21680 }, { "epoch": 142.69736842105263, "grad_norm": 1.7470499277114868, "learning_rate": 0.0001, "loss": 0.0241, "step": 21690 }, { "epoch": 142.76315789473685, "grad_norm": 1.7044408321380615, "learning_rate": 0.0001, "loss": 0.0235, "step": 21700 }, { "epoch": 142.82894736842104, "grad_norm": 2.334876537322998, "learning_rate": 0.0001, "loss": 0.0215, "step": 21710 }, { "epoch": 142.89473684210526, "grad_norm": 1.8626861572265625, "learning_rate": 0.0001, "loss": 0.0258, "step": 21720 }, { "epoch": 142.96052631578948, "grad_norm": 2.104708671569824, "learning_rate": 0.0001, "loss": 0.0242, "step": 21730 }, { "epoch": 143.02631578947367, "grad_norm": 2.2149274349212646, "learning_rate": 0.0001, "loss": 0.0231, "step": 21740 }, { "epoch": 143.0921052631579, "grad_norm": 2.064143419265747, "learning_rate": 0.0001, "loss": 0.0209, "step": 21750 }, { "epoch": 143.1578947368421, "grad_norm": 1.9816228151321411, "learning_rate": 0.0001, "loss": 0.0201, "step": 21760 }, { "epoch": 143.22368421052633, "grad_norm": 1.839816689491272, "learning_rate": 0.0001, "loss": 0.0237, "step": 21770 }, { "epoch": 143.28947368421052, "grad_norm": 1.9676623344421387, "learning_rate": 0.0001, "loss": 0.0207, "step": 21780 }, { "epoch": 143.35526315789474, "grad_norm": 1.7473933696746826, "learning_rate": 0.0001, "loss": 0.0247, "step": 21790 }, { "epoch": 143.42105263157896, "grad_norm": 1.6396965980529785, "learning_rate": 0.0001, "loss": 0.0202, "step": 21800 }, { "epoch": 143.48684210526315, "grad_norm": 2.007516384124756, "learning_rate": 0.0001, "loss": 0.0236, "step": 21810 }, { "epoch": 143.55263157894737, "grad_norm": 1.6236062049865723, "learning_rate": 0.0001, "loss": 0.0245, "step": 21820 }, { "epoch": 143.6184210526316, "grad_norm": 1.7902733087539673, "learning_rate": 0.0001, "loss": 0.0256, "step": 21830 }, { "epoch": 143.68421052631578, "grad_norm": 2.0690295696258545, "learning_rate": 0.0001, "loss": 0.0253, "step": 21840 }, { "epoch": 143.75, "grad_norm": 1.5114775896072388, "learning_rate": 0.0001, "loss": 0.0279, "step": 21850 }, { "epoch": 143.81578947368422, "grad_norm": 2.143566131591797, "learning_rate": 0.0001, "loss": 0.0215, "step": 21860 }, { "epoch": 143.8815789473684, "grad_norm": 2.0019001960754395, "learning_rate": 0.0001, "loss": 0.0214, "step": 21870 }, { "epoch": 143.94736842105263, "grad_norm": 1.281444787979126, "learning_rate": 0.0001, "loss": 0.0273, "step": 21880 }, { "epoch": 144.01315789473685, "grad_norm": 2.0614049434661865, "learning_rate": 0.0001, "loss": 0.0201, "step": 21890 }, { "epoch": 144.07894736842104, "grad_norm": 1.79042387008667, "learning_rate": 0.0001, "loss": 0.023, "step": 21900 }, { "epoch": 144.14473684210526, "grad_norm": 1.7312357425689697, "learning_rate": 0.0001, "loss": 0.025, "step": 21910 }, { "epoch": 144.21052631578948, "grad_norm": 1.9795812368392944, "learning_rate": 0.0001, "loss": 0.0241, "step": 21920 }, { "epoch": 144.27631578947367, "grad_norm": 1.7046825885772705, "learning_rate": 0.0001, "loss": 0.0243, "step": 21930 }, { "epoch": 144.3421052631579, "grad_norm": 1.641147255897522, "learning_rate": 0.0001, "loss": 0.026, "step": 21940 }, { "epoch": 144.4078947368421, "grad_norm": 1.8217450380325317, "learning_rate": 0.0001, "loss": 0.0235, "step": 21950 }, { "epoch": 144.47368421052633, "grad_norm": 1.8906354904174805, "learning_rate": 0.0001, "loss": 0.0266, "step": 21960 }, { "epoch": 144.53947368421052, "grad_norm": 1.729722261428833, "learning_rate": 0.0001, "loss": 0.0215, "step": 21970 }, { "epoch": 144.60526315789474, "grad_norm": 1.4992221593856812, "learning_rate": 0.0001, "loss": 0.0246, "step": 21980 }, { "epoch": 144.67105263157896, "grad_norm": 1.6865441799163818, "learning_rate": 0.0001, "loss": 0.0266, "step": 21990 }, { "epoch": 144.73684210526315, "grad_norm": 1.500360131263733, "learning_rate": 0.0001, "loss": 0.0228, "step": 22000 }, { "epoch": 144.80263157894737, "grad_norm": 2.239863872528076, "learning_rate": 0.0001, "loss": 0.0229, "step": 22010 }, { "epoch": 144.8684210526316, "grad_norm": 2.0327227115631104, "learning_rate": 0.0001, "loss": 0.0247, "step": 22020 }, { "epoch": 144.93421052631578, "grad_norm": 2.064368963241577, "learning_rate": 0.0001, "loss": 0.0247, "step": 22030 }, { "epoch": 145.0, "grad_norm": 2.2726497650146484, "learning_rate": 0.0001, "loss": 0.024, "step": 22040 }, { "epoch": 145.06578947368422, "grad_norm": 1.834810733795166, "learning_rate": 0.0001, "loss": 0.0226, "step": 22050 }, { "epoch": 145.1315789473684, "grad_norm": 1.8517099618911743, "learning_rate": 0.0001, "loss": 0.0225, "step": 22060 }, { "epoch": 145.19736842105263, "grad_norm": 2.3797566890716553, "learning_rate": 0.0001, "loss": 0.0264, "step": 22070 }, { "epoch": 145.26315789473685, "grad_norm": 2.2897703647613525, "learning_rate": 0.0001, "loss": 0.0236, "step": 22080 }, { "epoch": 145.32894736842104, "grad_norm": 1.7484413385391235, "learning_rate": 0.0001, "loss": 0.0223, "step": 22090 }, { "epoch": 145.39473684210526, "grad_norm": 1.4264564514160156, "learning_rate": 0.0001, "loss": 0.0213, "step": 22100 }, { "epoch": 145.46052631578948, "grad_norm": 1.9112331867218018, "learning_rate": 0.0001, "loss": 0.0199, "step": 22110 }, { "epoch": 145.52631578947367, "grad_norm": 2.0321085453033447, "learning_rate": 0.0001, "loss": 0.0199, "step": 22120 }, { "epoch": 145.5921052631579, "grad_norm": 1.8403044939041138, "learning_rate": 0.0001, "loss": 0.023, "step": 22130 }, { "epoch": 145.6578947368421, "grad_norm": 2.2092909812927246, "learning_rate": 0.0001, "loss": 0.026, "step": 22140 }, { "epoch": 145.72368421052633, "grad_norm": 1.415128231048584, "learning_rate": 0.0001, "loss": 0.0232, "step": 22150 }, { "epoch": 145.78947368421052, "grad_norm": 1.411267638206482, "learning_rate": 0.0001, "loss": 0.0271, "step": 22160 }, { "epoch": 145.85526315789474, "grad_norm": 1.627475380897522, "learning_rate": 0.0001, "loss": 0.0225, "step": 22170 }, { "epoch": 145.92105263157896, "grad_norm": 1.5571300983428955, "learning_rate": 0.0001, "loss": 0.0236, "step": 22180 }, { "epoch": 145.98684210526315, "grad_norm": 1.6933298110961914, "learning_rate": 0.0001, "loss": 0.0258, "step": 22190 }, { "epoch": 146.05263157894737, "grad_norm": 1.5969090461730957, "learning_rate": 0.0001, "loss": 0.0263, "step": 22200 }, { "epoch": 146.1184210526316, "grad_norm": 2.3203437328338623, "learning_rate": 0.0001, "loss": 0.0239, "step": 22210 }, { "epoch": 146.18421052631578, "grad_norm": 2.258730411529541, "learning_rate": 0.0001, "loss": 0.0207, "step": 22220 }, { "epoch": 146.25, "grad_norm": 1.732814073562622, "learning_rate": 0.0001, "loss": 0.0224, "step": 22230 }, { "epoch": 146.31578947368422, "grad_norm": 2.0569674968719482, "learning_rate": 0.0001, "loss": 0.0299, "step": 22240 }, { "epoch": 146.3815789473684, "grad_norm": 1.5648237466812134, "learning_rate": 0.0001, "loss": 0.0244, "step": 22250 }, { "epoch": 146.44736842105263, "grad_norm": 1.9750428199768066, "learning_rate": 0.0001, "loss": 0.0199, "step": 22260 }, { "epoch": 146.51315789473685, "grad_norm": 1.7606499195098877, "learning_rate": 0.0001, "loss": 0.0238, "step": 22270 }, { "epoch": 146.57894736842104, "grad_norm": 1.9812954664230347, "learning_rate": 0.0001, "loss": 0.0303, "step": 22280 }, { "epoch": 146.64473684210526, "grad_norm": 1.9692021608352661, "learning_rate": 0.0001, "loss": 0.0228, "step": 22290 }, { "epoch": 146.71052631578948, "grad_norm": 1.4715423583984375, "learning_rate": 0.0001, "loss": 0.0229, "step": 22300 }, { "epoch": 146.77631578947367, "grad_norm": 1.7276480197906494, "learning_rate": 0.0001, "loss": 0.0212, "step": 22310 }, { "epoch": 146.8421052631579, "grad_norm": 1.6966041326522827, "learning_rate": 0.0001, "loss": 0.0225, "step": 22320 }, { "epoch": 146.9078947368421, "grad_norm": 1.9347985982894897, "learning_rate": 0.0001, "loss": 0.0251, "step": 22330 }, { "epoch": 146.97368421052633, "grad_norm": 1.9648555517196655, "learning_rate": 0.0001, "loss": 0.0229, "step": 22340 }, { "epoch": 147.03947368421052, "grad_norm": 2.184546947479248, "learning_rate": 0.0001, "loss": 0.0243, "step": 22350 }, { "epoch": 147.10526315789474, "grad_norm": 2.262394666671753, "learning_rate": 0.0001, "loss": 0.022, "step": 22360 }, { "epoch": 147.17105263157896, "grad_norm": 2.2745139598846436, "learning_rate": 0.0001, "loss": 0.0254, "step": 22370 }, { "epoch": 147.23684210526315, "grad_norm": 1.951706886291504, "learning_rate": 0.0001, "loss": 0.0224, "step": 22380 }, { "epoch": 147.30263157894737, "grad_norm": 2.128751516342163, "learning_rate": 0.0001, "loss": 0.0238, "step": 22390 }, { "epoch": 147.3684210526316, "grad_norm": 2.257786273956299, "learning_rate": 0.0001, "loss": 0.024, "step": 22400 }, { "epoch": 147.43421052631578, "grad_norm": 2.047661066055298, "learning_rate": 0.0001, "loss": 0.025, "step": 22410 }, { "epoch": 147.5, "grad_norm": 1.6348052024841309, "learning_rate": 0.0001, "loss": 0.0225, "step": 22420 }, { "epoch": 147.56578947368422, "grad_norm": 1.9407272338867188, "learning_rate": 0.0001, "loss": 0.0289, "step": 22430 }, { "epoch": 147.6315789473684, "grad_norm": 1.7540066242218018, "learning_rate": 0.0001, "loss": 0.0235, "step": 22440 }, { "epoch": 147.69736842105263, "grad_norm": 1.5277987718582153, "learning_rate": 0.0001, "loss": 0.0213, "step": 22450 }, { "epoch": 147.76315789473685, "grad_norm": 1.5615267753601074, "learning_rate": 0.0001, "loss": 0.0219, "step": 22460 }, { "epoch": 147.82894736842104, "grad_norm": 1.6693035364151, "learning_rate": 0.0001, "loss": 0.0225, "step": 22470 }, { "epoch": 147.89473684210526, "grad_norm": 1.9146636724472046, "learning_rate": 0.0001, "loss": 0.0217, "step": 22480 }, { "epoch": 147.96052631578948, "grad_norm": 1.39113187789917, "learning_rate": 0.0001, "loss": 0.0227, "step": 22490 }, { "epoch": 148.02631578947367, "grad_norm": 1.8487118482589722, "learning_rate": 0.0001, "loss": 0.0235, "step": 22500 }, { "epoch": 148.0921052631579, "grad_norm": 2.1315605640411377, "learning_rate": 0.0001, "loss": 0.0208, "step": 22510 }, { "epoch": 148.1578947368421, "grad_norm": 2.3060011863708496, "learning_rate": 0.0001, "loss": 0.0256, "step": 22520 }, { "epoch": 148.22368421052633, "grad_norm": 1.4468302726745605, "learning_rate": 0.0001, "loss": 0.0229, "step": 22530 }, { "epoch": 148.28947368421052, "grad_norm": 1.5412636995315552, "learning_rate": 0.0001, "loss": 0.0248, "step": 22540 }, { "epoch": 148.35526315789474, "grad_norm": 2.062985897064209, "learning_rate": 0.0001, "loss": 0.0225, "step": 22550 }, { "epoch": 148.42105263157896, "grad_norm": 1.8660447597503662, "learning_rate": 0.0001, "loss": 0.0207, "step": 22560 }, { "epoch": 148.48684210526315, "grad_norm": 2.258763313293457, "learning_rate": 0.0001, "loss": 0.0231, "step": 22570 }, { "epoch": 148.55263157894737, "grad_norm": 2.1915619373321533, "learning_rate": 0.0001, "loss": 0.022, "step": 22580 }, { "epoch": 148.6184210526316, "grad_norm": 1.850996971130371, "learning_rate": 0.0001, "loss": 0.0257, "step": 22590 }, { "epoch": 148.68421052631578, "grad_norm": 1.636374831199646, "learning_rate": 0.0001, "loss": 0.022, "step": 22600 }, { "epoch": 148.75, "grad_norm": 1.8013947010040283, "learning_rate": 0.0001, "loss": 0.0255, "step": 22610 }, { "epoch": 148.81578947368422, "grad_norm": 1.685067892074585, "learning_rate": 0.0001, "loss": 0.0244, "step": 22620 }, { "epoch": 148.8815789473684, "grad_norm": 2.037907123565674, "learning_rate": 0.0001, "loss": 0.0277, "step": 22630 }, { "epoch": 148.94736842105263, "grad_norm": 1.6777455806732178, "learning_rate": 0.0001, "loss": 0.0216, "step": 22640 }, { "epoch": 149.01315789473685, "grad_norm": 1.6430779695510864, "learning_rate": 0.0001, "loss": 0.0215, "step": 22650 }, { "epoch": 149.07894736842104, "grad_norm": 1.4594988822937012, "learning_rate": 0.0001, "loss": 0.0241, "step": 22660 }, { "epoch": 149.14473684210526, "grad_norm": 1.7377822399139404, "learning_rate": 0.0001, "loss": 0.0231, "step": 22670 }, { "epoch": 149.21052631578948, "grad_norm": 2.041853904724121, "learning_rate": 0.0001, "loss": 0.0232, "step": 22680 }, { "epoch": 149.27631578947367, "grad_norm": 1.9490633010864258, "learning_rate": 0.0001, "loss": 0.0264, "step": 22690 }, { "epoch": 149.3421052631579, "grad_norm": 1.8445067405700684, "learning_rate": 0.0001, "loss": 0.0219, "step": 22700 }, { "epoch": 149.4078947368421, "grad_norm": 1.5795514583587646, "learning_rate": 0.0001, "loss": 0.0269, "step": 22710 }, { "epoch": 149.47368421052633, "grad_norm": 1.781008243560791, "learning_rate": 0.0001, "loss": 0.024, "step": 22720 }, { "epoch": 149.53947368421052, "grad_norm": 1.750200629234314, "learning_rate": 0.0001, "loss": 0.025, "step": 22730 }, { "epoch": 149.60526315789474, "grad_norm": 2.2269809246063232, "learning_rate": 0.0001, "loss": 0.0224, "step": 22740 }, { "epoch": 149.67105263157896, "grad_norm": 2.5554909706115723, "learning_rate": 0.0001, "loss": 0.0251, "step": 22750 }, { "epoch": 149.73684210526315, "grad_norm": 2.0077261924743652, "learning_rate": 0.0001, "loss": 0.0237, "step": 22760 }, { "epoch": 149.80263157894737, "grad_norm": 1.2584450244903564, "learning_rate": 0.0001, "loss": 0.0208, "step": 22770 }, { "epoch": 149.8684210526316, "grad_norm": 2.1201558113098145, "learning_rate": 0.0001, "loss": 0.022, "step": 22780 }, { "epoch": 149.93421052631578, "grad_norm": 2.0100152492523193, "learning_rate": 0.0001, "loss": 0.0237, "step": 22790 }, { "epoch": 150.0, "grad_norm": 1.9175035953521729, "learning_rate": 0.0001, "loss": 0.0202, "step": 22800 }, { "epoch": 150.06578947368422, "grad_norm": 1.9752273559570312, "learning_rate": 0.0001, "loss": 0.0212, "step": 22810 }, { "epoch": 150.1315789473684, "grad_norm": 1.7187401056289673, "learning_rate": 0.0001, "loss": 0.0223, "step": 22820 }, { "epoch": 150.19736842105263, "grad_norm": 2.175177812576294, "learning_rate": 0.0001, "loss": 0.0255, "step": 22830 }, { "epoch": 150.26315789473685, "grad_norm": 1.8649355173110962, "learning_rate": 0.0001, "loss": 0.022, "step": 22840 }, { "epoch": 150.32894736842104, "grad_norm": 1.729252815246582, "learning_rate": 0.0001, "loss": 0.0239, "step": 22850 }, { "epoch": 150.39473684210526, "grad_norm": 1.4372940063476562, "learning_rate": 0.0001, "loss": 0.0211, "step": 22860 }, { "epoch": 150.46052631578948, "grad_norm": 1.6681050062179565, "learning_rate": 0.0001, "loss": 0.0231, "step": 22870 }, { "epoch": 150.52631578947367, "grad_norm": 1.4467500448226929, "learning_rate": 0.0001, "loss": 0.0203, "step": 22880 }, { "epoch": 150.5921052631579, "grad_norm": 1.8710521459579468, "learning_rate": 0.0001, "loss": 0.0206, "step": 22890 }, { "epoch": 150.6578947368421, "grad_norm": 1.769212007522583, "learning_rate": 0.0001, "loss": 0.0221, "step": 22900 }, { "epoch": 150.72368421052633, "grad_norm": 2.050865411758423, "learning_rate": 0.0001, "loss": 0.0265, "step": 22910 }, { "epoch": 150.78947368421052, "grad_norm": 2.209623098373413, "learning_rate": 0.0001, "loss": 0.0294, "step": 22920 }, { "epoch": 150.85526315789474, "grad_norm": 2.157266855239868, "learning_rate": 0.0001, "loss": 0.0241, "step": 22930 }, { "epoch": 150.92105263157896, "grad_norm": 1.8346295356750488, "learning_rate": 0.0001, "loss": 0.0225, "step": 22940 }, { "epoch": 150.98684210526315, "grad_norm": 1.6448334455490112, "learning_rate": 0.0001, "loss": 0.0206, "step": 22950 }, { "epoch": 151.05263157894737, "grad_norm": 1.8171651363372803, "learning_rate": 0.0001, "loss": 0.0232, "step": 22960 }, { "epoch": 151.1184210526316, "grad_norm": 1.7332133054733276, "learning_rate": 0.0001, "loss": 0.0233, "step": 22970 }, { "epoch": 151.18421052631578, "grad_norm": 1.631152868270874, "learning_rate": 0.0001, "loss": 0.0257, "step": 22980 }, { "epoch": 151.25, "grad_norm": 1.9434047937393188, "learning_rate": 0.0001, "loss": 0.0251, "step": 22990 }, { "epoch": 151.31578947368422, "grad_norm": 1.7034841775894165, "learning_rate": 0.0001, "loss": 0.0223, "step": 23000 }, { "epoch": 151.3815789473684, "grad_norm": 1.9644235372543335, "learning_rate": 0.0001, "loss": 0.0264, "step": 23010 }, { "epoch": 151.44736842105263, "grad_norm": 1.9968042373657227, "learning_rate": 0.0001, "loss": 0.0233, "step": 23020 }, { "epoch": 151.51315789473685, "grad_norm": 1.8825291395187378, "learning_rate": 0.0001, "loss": 0.0285, "step": 23030 }, { "epoch": 151.57894736842104, "grad_norm": 2.003087043762207, "learning_rate": 0.0001, "loss": 0.0229, "step": 23040 }, { "epoch": 151.64473684210526, "grad_norm": 2.659870147705078, "learning_rate": 0.0001, "loss": 0.0232, "step": 23050 }, { "epoch": 151.71052631578948, "grad_norm": 1.7626811265945435, "learning_rate": 0.0001, "loss": 0.0204, "step": 23060 }, { "epoch": 151.77631578947367, "grad_norm": 1.8824435472488403, "learning_rate": 0.0001, "loss": 0.0218, "step": 23070 }, { "epoch": 151.8421052631579, "grad_norm": 1.9627138376235962, "learning_rate": 0.0001, "loss": 0.0222, "step": 23080 }, { "epoch": 151.9078947368421, "grad_norm": 1.5043540000915527, "learning_rate": 0.0001, "loss": 0.0222, "step": 23090 }, { "epoch": 151.97368421052633, "grad_norm": 2.0112340450286865, "learning_rate": 0.0001, "loss": 0.0216, "step": 23100 }, { "epoch": 152.03947368421052, "grad_norm": 1.9368575811386108, "learning_rate": 0.0001, "loss": 0.0299, "step": 23110 }, { "epoch": 152.10526315789474, "grad_norm": 2.075627088546753, "learning_rate": 0.0001, "loss": 0.0227, "step": 23120 }, { "epoch": 152.17105263157896, "grad_norm": 2.0449328422546387, "learning_rate": 0.0001, "loss": 0.0235, "step": 23130 }, { "epoch": 152.23684210526315, "grad_norm": 1.9587254524230957, "learning_rate": 0.0001, "loss": 0.0211, "step": 23140 }, { "epoch": 152.30263157894737, "grad_norm": 1.6881006956100464, "learning_rate": 0.0001, "loss": 0.0197, "step": 23150 }, { "epoch": 152.3684210526316, "grad_norm": 2.091472625732422, "learning_rate": 0.0001, "loss": 0.0218, "step": 23160 }, { "epoch": 152.43421052631578, "grad_norm": 1.8674694299697876, "learning_rate": 0.0001, "loss": 0.0213, "step": 23170 }, { "epoch": 152.5, "grad_norm": 1.9017199277877808, "learning_rate": 0.0001, "loss": 0.0249, "step": 23180 }, { "epoch": 152.56578947368422, "grad_norm": 2.1731154918670654, "learning_rate": 0.0001, "loss": 0.0243, "step": 23190 }, { "epoch": 152.6315789473684, "grad_norm": 1.7593505382537842, "learning_rate": 0.0001, "loss": 0.0235, "step": 23200 }, { "epoch": 152.69736842105263, "grad_norm": 1.8673349618911743, "learning_rate": 0.0001, "loss": 0.022, "step": 23210 }, { "epoch": 152.76315789473685, "grad_norm": 2.1088645458221436, "learning_rate": 0.0001, "loss": 0.0199, "step": 23220 }, { "epoch": 152.82894736842104, "grad_norm": 1.7978702783584595, "learning_rate": 0.0001, "loss": 0.0196, "step": 23230 }, { "epoch": 152.89473684210526, "grad_norm": 1.9001758098602295, "learning_rate": 0.0001, "loss": 0.0246, "step": 23240 }, { "epoch": 152.96052631578948, "grad_norm": 1.5604044198989868, "learning_rate": 0.0001, "loss": 0.0219, "step": 23250 }, { "epoch": 153.02631578947367, "grad_norm": 1.6195697784423828, "learning_rate": 0.0001, "loss": 0.022, "step": 23260 }, { "epoch": 153.0921052631579, "grad_norm": 1.759411334991455, "learning_rate": 0.0001, "loss": 0.0205, "step": 23270 }, { "epoch": 153.1578947368421, "grad_norm": 2.338738203048706, "learning_rate": 0.0001, "loss": 0.0208, "step": 23280 }, { "epoch": 153.22368421052633, "grad_norm": 2.2819790840148926, "learning_rate": 0.0001, "loss": 0.0213, "step": 23290 }, { "epoch": 153.28947368421052, "grad_norm": 1.934208631515503, "learning_rate": 0.0001, "loss": 0.0232, "step": 23300 }, { "epoch": 153.35526315789474, "grad_norm": 1.6523973941802979, "learning_rate": 0.0001, "loss": 0.0228, "step": 23310 }, { "epoch": 153.42105263157896, "grad_norm": 1.7348743677139282, "learning_rate": 0.0001, "loss": 0.0288, "step": 23320 }, { "epoch": 153.48684210526315, "grad_norm": 1.6765868663787842, "learning_rate": 0.0001, "loss": 0.0212, "step": 23330 }, { "epoch": 153.55263157894737, "grad_norm": 1.6323940753936768, "learning_rate": 0.0001, "loss": 0.0248, "step": 23340 }, { "epoch": 153.6184210526316, "grad_norm": 2.0089447498321533, "learning_rate": 0.0001, "loss": 0.0203, "step": 23350 }, { "epoch": 153.68421052631578, "grad_norm": 1.2461085319519043, "learning_rate": 0.0001, "loss": 0.0218, "step": 23360 }, { "epoch": 153.75, "grad_norm": 2.095489025115967, "learning_rate": 0.0001, "loss": 0.0217, "step": 23370 }, { "epoch": 153.81578947368422, "grad_norm": 1.7755779027938843, "learning_rate": 0.0001, "loss": 0.0199, "step": 23380 }, { "epoch": 153.8815789473684, "grad_norm": 1.5544899702072144, "learning_rate": 0.0001, "loss": 0.0258, "step": 23390 }, { "epoch": 153.94736842105263, "grad_norm": 1.9439818859100342, "learning_rate": 0.0001, "loss": 0.0206, "step": 23400 }, { "epoch": 154.01315789473685, "grad_norm": 1.537244200706482, "learning_rate": 0.0001, "loss": 0.0281, "step": 23410 }, { "epoch": 154.07894736842104, "grad_norm": 1.808190107345581, "learning_rate": 0.0001, "loss": 0.0238, "step": 23420 }, { "epoch": 154.14473684210526, "grad_norm": 1.5731375217437744, "learning_rate": 0.0001, "loss": 0.0267, "step": 23430 }, { "epoch": 154.21052631578948, "grad_norm": 1.620076298713684, "learning_rate": 0.0001, "loss": 0.0247, "step": 23440 }, { "epoch": 154.27631578947367, "grad_norm": 1.995374321937561, "learning_rate": 0.0001, "loss": 0.0206, "step": 23450 }, { "epoch": 154.3421052631579, "grad_norm": 1.8202935457229614, "learning_rate": 0.0001, "loss": 0.0204, "step": 23460 }, { "epoch": 154.4078947368421, "grad_norm": 2.0409576892852783, "learning_rate": 0.0001, "loss": 0.0213, "step": 23470 }, { "epoch": 154.47368421052633, "grad_norm": 2.014470338821411, "learning_rate": 0.0001, "loss": 0.0225, "step": 23480 }, { "epoch": 154.53947368421052, "grad_norm": 1.7902055978775024, "learning_rate": 0.0001, "loss": 0.0229, "step": 23490 }, { "epoch": 154.60526315789474, "grad_norm": 1.7112482786178589, "learning_rate": 0.0001, "loss": 0.0221, "step": 23500 }, { "epoch": 154.67105263157896, "grad_norm": 1.7765389680862427, "learning_rate": 0.0001, "loss": 0.0268, "step": 23510 }, { "epoch": 154.73684210526315, "grad_norm": 1.8928415775299072, "learning_rate": 0.0001, "loss": 0.0218, "step": 23520 }, { "epoch": 154.80263157894737, "grad_norm": 1.6941531896591187, "learning_rate": 0.0001, "loss": 0.0235, "step": 23530 }, { "epoch": 154.8684210526316, "grad_norm": 2.4524118900299072, "learning_rate": 0.0001, "loss": 0.0253, "step": 23540 }, { "epoch": 154.93421052631578, "grad_norm": 1.6318260431289673, "learning_rate": 0.0001, "loss": 0.0222, "step": 23550 }, { "epoch": 155.0, "grad_norm": 2.1063106060028076, "learning_rate": 0.0001, "loss": 0.0212, "step": 23560 }, { "epoch": 155.06578947368422, "grad_norm": 1.5853408575057983, "learning_rate": 0.0001, "loss": 0.0204, "step": 23570 }, { "epoch": 155.1315789473684, "grad_norm": 2.06276535987854, "learning_rate": 0.0001, "loss": 0.0241, "step": 23580 }, { "epoch": 155.19736842105263, "grad_norm": 1.8583823442459106, "learning_rate": 0.0001, "loss": 0.0191, "step": 23590 }, { "epoch": 155.26315789473685, "grad_norm": 1.0552171468734741, "learning_rate": 0.0001, "loss": 0.02, "step": 23600 }, { "epoch": 155.32894736842104, "grad_norm": 1.8386611938476562, "learning_rate": 0.0001, "loss": 0.0264, "step": 23610 }, { "epoch": 155.39473684210526, "grad_norm": 1.7614744901657104, "learning_rate": 0.0001, "loss": 0.0226, "step": 23620 }, { "epoch": 155.46052631578948, "grad_norm": 1.9284693002700806, "learning_rate": 0.0001, "loss": 0.0221, "step": 23630 }, { "epoch": 155.52631578947367, "grad_norm": 1.6458121538162231, "learning_rate": 0.0001, "loss": 0.0206, "step": 23640 }, { "epoch": 155.5921052631579, "grad_norm": 1.6920061111450195, "learning_rate": 0.0001, "loss": 0.027, "step": 23650 }, { "epoch": 155.6578947368421, "grad_norm": 1.9613218307495117, "learning_rate": 0.0001, "loss": 0.0249, "step": 23660 }, { "epoch": 155.72368421052633, "grad_norm": 1.5980496406555176, "learning_rate": 0.0001, "loss": 0.0256, "step": 23670 }, { "epoch": 155.78947368421052, "grad_norm": 1.8403593301773071, "learning_rate": 0.0001, "loss": 0.0257, "step": 23680 }, { "epoch": 155.85526315789474, "grad_norm": 1.6842002868652344, "learning_rate": 0.0001, "loss": 0.0223, "step": 23690 }, { "epoch": 155.92105263157896, "grad_norm": 1.9670850038528442, "learning_rate": 0.0001, "loss": 0.0233, "step": 23700 }, { "epoch": 155.98684210526315, "grad_norm": 1.5577634572982788, "learning_rate": 0.0001, "loss": 0.0214, "step": 23710 }, { "epoch": 156.05263157894737, "grad_norm": 1.851831078529358, "learning_rate": 0.0001, "loss": 0.0239, "step": 23720 }, { "epoch": 156.1184210526316, "grad_norm": 1.2264952659606934, "learning_rate": 0.0001, "loss": 0.0233, "step": 23730 }, { "epoch": 156.18421052631578, "grad_norm": 1.7241930961608887, "learning_rate": 0.0001, "loss": 0.0273, "step": 23740 }, { "epoch": 156.25, "grad_norm": 1.5615603923797607, "learning_rate": 0.0001, "loss": 0.0235, "step": 23750 }, { "epoch": 156.31578947368422, "grad_norm": 2.022111415863037, "learning_rate": 0.0001, "loss": 0.0219, "step": 23760 }, { "epoch": 156.3815789473684, "grad_norm": 1.629679799079895, "learning_rate": 0.0001, "loss": 0.0254, "step": 23770 }, { "epoch": 156.44736842105263, "grad_norm": 1.597721815109253, "learning_rate": 0.0001, "loss": 0.0211, "step": 23780 }, { "epoch": 156.51315789473685, "grad_norm": 1.9511239528656006, "learning_rate": 0.0001, "loss": 0.0224, "step": 23790 }, { "epoch": 156.57894736842104, "grad_norm": 2.190659999847412, "learning_rate": 0.0001, "loss": 0.0222, "step": 23800 }, { "epoch": 156.64473684210526, "grad_norm": 1.8419705629348755, "learning_rate": 0.0001, "loss": 0.0235, "step": 23810 }, { "epoch": 156.71052631578948, "grad_norm": 2.1330528259277344, "learning_rate": 0.0001, "loss": 0.0245, "step": 23820 }, { "epoch": 156.77631578947367, "grad_norm": 2.34106183052063, "learning_rate": 0.0001, "loss": 0.0216, "step": 23830 }, { "epoch": 156.8421052631579, "grad_norm": 2.4046969413757324, "learning_rate": 0.0001, "loss": 0.024, "step": 23840 }, { "epoch": 156.9078947368421, "grad_norm": 2.5134739875793457, "learning_rate": 0.0001, "loss": 0.0233, "step": 23850 }, { "epoch": 156.97368421052633, "grad_norm": 2.9191415309906006, "learning_rate": 0.0001, "loss": 0.0222, "step": 23860 }, { "epoch": 157.03947368421052, "grad_norm": 1.7491819858551025, "learning_rate": 0.0001, "loss": 0.0197, "step": 23870 }, { "epoch": 157.10526315789474, "grad_norm": 2.390775680541992, "learning_rate": 0.0001, "loss": 0.022, "step": 23880 }, { "epoch": 157.17105263157896, "grad_norm": 1.8659433126449585, "learning_rate": 0.0001, "loss": 0.0215, "step": 23890 }, { "epoch": 157.23684210526315, "grad_norm": 1.982847809791565, "learning_rate": 0.0001, "loss": 0.0251, "step": 23900 }, { "epoch": 157.30263157894737, "grad_norm": 2.087888479232788, "learning_rate": 0.0001, "loss": 0.0242, "step": 23910 }, { "epoch": 157.3684210526316, "grad_norm": 1.5428775548934937, "learning_rate": 0.0001, "loss": 0.0245, "step": 23920 }, { "epoch": 157.43421052631578, "grad_norm": 1.748639702796936, "learning_rate": 0.0001, "loss": 0.0232, "step": 23930 }, { "epoch": 157.5, "grad_norm": 1.9910292625427246, "learning_rate": 0.0001, "loss": 0.0192, "step": 23940 }, { "epoch": 157.56578947368422, "grad_norm": 1.622446894645691, "learning_rate": 0.0001, "loss": 0.0214, "step": 23950 }, { "epoch": 157.6315789473684, "grad_norm": 2.163909673690796, "learning_rate": 0.0001, "loss": 0.0195, "step": 23960 }, { "epoch": 157.69736842105263, "grad_norm": 2.2139010429382324, "learning_rate": 0.0001, "loss": 0.0221, "step": 23970 }, { "epoch": 157.76315789473685, "grad_norm": 1.817961573600769, "learning_rate": 0.0001, "loss": 0.0209, "step": 23980 }, { "epoch": 157.82894736842104, "grad_norm": 1.4640038013458252, "learning_rate": 0.0001, "loss": 0.0215, "step": 23990 }, { "epoch": 157.89473684210526, "grad_norm": 2.1661226749420166, "learning_rate": 0.0001, "loss": 0.0252, "step": 24000 }, { "epoch": 157.96052631578948, "grad_norm": 1.770627498626709, "learning_rate": 0.0001, "loss": 0.0198, "step": 24010 }, { "epoch": 158.02631578947367, "grad_norm": 1.1292043924331665, "learning_rate": 0.0001, "loss": 0.0234, "step": 24020 }, { "epoch": 158.0921052631579, "grad_norm": 2.0309619903564453, "learning_rate": 0.0001, "loss": 0.024, "step": 24030 }, { "epoch": 158.1578947368421, "grad_norm": 1.209721326828003, "learning_rate": 0.0001, "loss": 0.0238, "step": 24040 }, { "epoch": 158.22368421052633, "grad_norm": 1.308620572090149, "learning_rate": 0.0001, "loss": 0.0202, "step": 24050 }, { "epoch": 158.28947368421052, "grad_norm": 1.2972275018692017, "learning_rate": 0.0001, "loss": 0.0222, "step": 24060 }, { "epoch": 158.35526315789474, "grad_norm": 1.7762638330459595, "learning_rate": 0.0001, "loss": 0.028, "step": 24070 }, { "epoch": 158.42105263157896, "grad_norm": 1.5822019577026367, "learning_rate": 0.0001, "loss": 0.0246, "step": 24080 }, { "epoch": 158.48684210526315, "grad_norm": 1.7776178121566772, "learning_rate": 0.0001, "loss": 0.02, "step": 24090 }, { "epoch": 158.55263157894737, "grad_norm": 1.9444401264190674, "learning_rate": 0.0001, "loss": 0.022, "step": 24100 }, { "epoch": 158.6184210526316, "grad_norm": 1.4498629570007324, "learning_rate": 0.0001, "loss": 0.0244, "step": 24110 }, { "epoch": 158.68421052631578, "grad_norm": 1.7580052614212036, "learning_rate": 0.0001, "loss": 0.0245, "step": 24120 }, { "epoch": 158.75, "grad_norm": 1.3597279787063599, "learning_rate": 0.0001, "loss": 0.024, "step": 24130 }, { "epoch": 158.81578947368422, "grad_norm": 1.352316975593567, "learning_rate": 0.0001, "loss": 0.0235, "step": 24140 }, { "epoch": 158.8815789473684, "grad_norm": 1.8332091569900513, "learning_rate": 0.0001, "loss": 0.0218, "step": 24150 }, { "epoch": 158.94736842105263, "grad_norm": 1.7174333333969116, "learning_rate": 0.0001, "loss": 0.0235, "step": 24160 }, { "epoch": 159.01315789473685, "grad_norm": 2.038804531097412, "learning_rate": 0.0001, "loss": 0.0232, "step": 24170 }, { "epoch": 159.07894736842104, "grad_norm": 1.3634918928146362, "learning_rate": 0.0001, "loss": 0.0228, "step": 24180 }, { "epoch": 159.14473684210526, "grad_norm": 1.7274096012115479, "learning_rate": 0.0001, "loss": 0.0258, "step": 24190 }, { "epoch": 159.21052631578948, "grad_norm": 1.6375901699066162, "learning_rate": 0.0001, "loss": 0.023, "step": 24200 }, { "epoch": 159.27631578947367, "grad_norm": 1.9453024864196777, "learning_rate": 0.0001, "loss": 0.0225, "step": 24210 }, { "epoch": 159.3421052631579, "grad_norm": 1.561934232711792, "learning_rate": 0.0001, "loss": 0.0261, "step": 24220 }, { "epoch": 159.4078947368421, "grad_norm": 1.2609162330627441, "learning_rate": 0.0001, "loss": 0.0238, "step": 24230 }, { "epoch": 159.47368421052633, "grad_norm": 1.9866515398025513, "learning_rate": 0.0001, "loss": 0.0232, "step": 24240 }, { "epoch": 159.53947368421052, "grad_norm": 1.624843955039978, "learning_rate": 0.0001, "loss": 0.032, "step": 24250 }, { "epoch": 159.60526315789474, "grad_norm": 1.4248985052108765, "learning_rate": 0.0001, "loss": 0.0231, "step": 24260 }, { "epoch": 159.67105263157896, "grad_norm": 1.317251443862915, "learning_rate": 0.0001, "loss": 0.0228, "step": 24270 }, { "epoch": 159.73684210526315, "grad_norm": 1.2739076614379883, "learning_rate": 0.0001, "loss": 0.0255, "step": 24280 }, { "epoch": 159.80263157894737, "grad_norm": 1.4558136463165283, "learning_rate": 0.0001, "loss": 0.0212, "step": 24290 }, { "epoch": 159.8684210526316, "grad_norm": 1.7319409847259521, "learning_rate": 0.0001, "loss": 0.0222, "step": 24300 }, { "epoch": 159.93421052631578, "grad_norm": 1.525909423828125, "learning_rate": 0.0001, "loss": 0.0222, "step": 24310 }, { "epoch": 160.0, "grad_norm": 1.9642457962036133, "learning_rate": 0.0001, "loss": 0.0223, "step": 24320 }, { "epoch": 160.06578947368422, "grad_norm": 2.0831828117370605, "learning_rate": 0.0001, "loss": 0.0226, "step": 24330 }, { "epoch": 160.1315789473684, "grad_norm": 2.4830100536346436, "learning_rate": 0.0001, "loss": 0.0223, "step": 24340 }, { "epoch": 160.19736842105263, "grad_norm": 1.889860987663269, "learning_rate": 0.0001, "loss": 0.027, "step": 24350 }, { "epoch": 160.26315789473685, "grad_norm": 1.9782153367996216, "learning_rate": 0.0001, "loss": 0.0188, "step": 24360 }, { "epoch": 160.32894736842104, "grad_norm": 2.2089006900787354, "learning_rate": 0.0001, "loss": 0.025, "step": 24370 }, { "epoch": 160.39473684210526, "grad_norm": 1.4382158517837524, "learning_rate": 0.0001, "loss": 0.0221, "step": 24380 }, { "epoch": 160.46052631578948, "grad_norm": 1.4691486358642578, "learning_rate": 0.0001, "loss": 0.0255, "step": 24390 }, { "epoch": 160.52631578947367, "grad_norm": 1.9097893238067627, "learning_rate": 0.0001, "loss": 0.0209, "step": 24400 }, { "epoch": 160.5921052631579, "grad_norm": 1.9869564771652222, "learning_rate": 0.0001, "loss": 0.02, "step": 24410 }, { "epoch": 160.6578947368421, "grad_norm": 1.8164511919021606, "learning_rate": 0.0001, "loss": 0.0206, "step": 24420 }, { "epoch": 160.72368421052633, "grad_norm": 2.0141594409942627, "learning_rate": 0.0001, "loss": 0.0262, "step": 24430 }, { "epoch": 160.78947368421052, "grad_norm": 2.3886501789093018, "learning_rate": 0.0001, "loss": 0.0199, "step": 24440 }, { "epoch": 160.85526315789474, "grad_norm": 2.701129674911499, "learning_rate": 0.0001, "loss": 0.0204, "step": 24450 }, { "epoch": 160.92105263157896, "grad_norm": 2.436798334121704, "learning_rate": 0.0001, "loss": 0.0278, "step": 24460 }, { "epoch": 160.98684210526315, "grad_norm": 1.1855602264404297, "learning_rate": 0.0001, "loss": 0.0204, "step": 24470 }, { "epoch": 161.05263157894737, "grad_norm": 1.7768343687057495, "learning_rate": 0.0001, "loss": 0.0261, "step": 24480 }, { "epoch": 161.1184210526316, "grad_norm": 1.5704787969589233, "learning_rate": 0.0001, "loss": 0.0198, "step": 24490 }, { "epoch": 161.18421052631578, "grad_norm": 2.3227899074554443, "learning_rate": 0.0001, "loss": 0.0215, "step": 24500 }, { "epoch": 161.25, "grad_norm": 2.1513853073120117, "learning_rate": 0.0001, "loss": 0.0228, "step": 24510 }, { "epoch": 161.31578947368422, "grad_norm": 3.341531991958618, "learning_rate": 0.0001, "loss": 0.02, "step": 24520 }, { "epoch": 161.3815789473684, "grad_norm": 3.341867446899414, "learning_rate": 0.0001, "loss": 0.0248, "step": 24530 }, { "epoch": 161.44736842105263, "grad_norm": 2.543114185333252, "learning_rate": 0.0001, "loss": 0.0242, "step": 24540 }, { "epoch": 161.51315789473685, "grad_norm": 2.0302734375, "learning_rate": 0.0001, "loss": 0.0252, "step": 24550 }, { "epoch": 161.57894736842104, "grad_norm": 2.049586534500122, "learning_rate": 0.0001, "loss": 0.0254, "step": 24560 }, { "epoch": 161.64473684210526, "grad_norm": 2.299772024154663, "learning_rate": 0.0001, "loss": 0.0202, "step": 24570 }, { "epoch": 161.71052631578948, "grad_norm": 1.85726797580719, "learning_rate": 0.0001, "loss": 0.023, "step": 24580 }, { "epoch": 161.77631578947367, "grad_norm": 1.4105827808380127, "learning_rate": 0.0001, "loss": 0.0192, "step": 24590 }, { "epoch": 161.8421052631579, "grad_norm": 1.7687551975250244, "learning_rate": 0.0001, "loss": 0.0213, "step": 24600 }, { "epoch": 161.9078947368421, "grad_norm": 1.3623197078704834, "learning_rate": 0.0001, "loss": 0.0234, "step": 24610 }, { "epoch": 161.97368421052633, "grad_norm": 1.7381116151809692, "learning_rate": 0.0001, "loss": 0.0191, "step": 24620 }, { "epoch": 162.03947368421052, "grad_norm": 1.807409405708313, "learning_rate": 0.0001, "loss": 0.0224, "step": 24630 }, { "epoch": 162.10526315789474, "grad_norm": 1.6224690675735474, "learning_rate": 0.0001, "loss": 0.0244, "step": 24640 }, { "epoch": 162.17105263157896, "grad_norm": 1.7746739387512207, "learning_rate": 0.0001, "loss": 0.0194, "step": 24650 }, { "epoch": 162.23684210526315, "grad_norm": 1.5065195560455322, "learning_rate": 0.0001, "loss": 0.0245, "step": 24660 }, { "epoch": 162.30263157894737, "grad_norm": 1.8812408447265625, "learning_rate": 0.0001, "loss": 0.0216, "step": 24670 }, { "epoch": 162.3684210526316, "grad_norm": 1.9103243350982666, "learning_rate": 0.0001, "loss": 0.0254, "step": 24680 }, { "epoch": 162.43421052631578, "grad_norm": 1.7088998556137085, "learning_rate": 0.0001, "loss": 0.0212, "step": 24690 }, { "epoch": 162.5, "grad_norm": 1.5612587928771973, "learning_rate": 0.0001, "loss": 0.0221, "step": 24700 }, { "epoch": 162.56578947368422, "grad_norm": 1.4126250743865967, "learning_rate": 0.0001, "loss": 0.0193, "step": 24710 }, { "epoch": 162.6315789473684, "grad_norm": 1.5410237312316895, "learning_rate": 0.0001, "loss": 0.0214, "step": 24720 }, { "epoch": 162.69736842105263, "grad_norm": 1.4778897762298584, "learning_rate": 0.0001, "loss": 0.0211, "step": 24730 }, { "epoch": 162.76315789473685, "grad_norm": 1.7722923755645752, "learning_rate": 0.0001, "loss": 0.0289, "step": 24740 }, { "epoch": 162.82894736842104, "grad_norm": 1.5543876886367798, "learning_rate": 0.0001, "loss": 0.0251, "step": 24750 }, { "epoch": 162.89473684210526, "grad_norm": 1.6109910011291504, "learning_rate": 0.0001, "loss": 0.0232, "step": 24760 }, { "epoch": 162.96052631578948, "grad_norm": 2.16675066947937, "learning_rate": 0.0001, "loss": 0.0242, "step": 24770 }, { "epoch": 163.02631578947367, "grad_norm": 2.0300378799438477, "learning_rate": 0.0001, "loss": 0.0226, "step": 24780 }, { "epoch": 163.0921052631579, "grad_norm": 1.6694172620773315, "learning_rate": 0.0001, "loss": 0.0193, "step": 24790 }, { "epoch": 163.1578947368421, "grad_norm": 2.136312484741211, "learning_rate": 0.0001, "loss": 0.0245, "step": 24800 }, { "epoch": 163.22368421052633, "grad_norm": 1.7899351119995117, "learning_rate": 0.0001, "loss": 0.0236, "step": 24810 }, { "epoch": 163.28947368421052, "grad_norm": 2.0168893337249756, "learning_rate": 0.0001, "loss": 0.0295, "step": 24820 }, { "epoch": 163.35526315789474, "grad_norm": 1.9441477060317993, "learning_rate": 0.0001, "loss": 0.0216, "step": 24830 }, { "epoch": 163.42105263157896, "grad_norm": 2.2133045196533203, "learning_rate": 0.0001, "loss": 0.0207, "step": 24840 }, { "epoch": 163.48684210526315, "grad_norm": 2.1752119064331055, "learning_rate": 0.0001, "loss": 0.0247, "step": 24850 }, { "epoch": 163.55263157894737, "grad_norm": 1.6687957048416138, "learning_rate": 0.0001, "loss": 0.0224, "step": 24860 }, { "epoch": 163.6184210526316, "grad_norm": 1.7108625173568726, "learning_rate": 0.0001, "loss": 0.0182, "step": 24870 }, { "epoch": 163.68421052631578, "grad_norm": 1.949640154838562, "learning_rate": 0.0001, "loss": 0.0211, "step": 24880 }, { "epoch": 163.75, "grad_norm": 2.0547618865966797, "learning_rate": 0.0001, "loss": 0.0239, "step": 24890 }, { "epoch": 163.81578947368422, "grad_norm": 1.6096420288085938, "learning_rate": 0.0001, "loss": 0.0202, "step": 24900 }, { "epoch": 163.8815789473684, "grad_norm": 1.7185242176055908, "learning_rate": 0.0001, "loss": 0.0219, "step": 24910 }, { "epoch": 163.94736842105263, "grad_norm": 1.8576570749282837, "learning_rate": 0.0001, "loss": 0.0202, "step": 24920 }, { "epoch": 164.01315789473685, "grad_norm": 1.9916502237319946, "learning_rate": 0.0001, "loss": 0.0226, "step": 24930 }, { "epoch": 164.07894736842104, "grad_norm": 1.6580673456192017, "learning_rate": 0.0001, "loss": 0.0211, "step": 24940 }, { "epoch": 164.14473684210526, "grad_norm": 1.2811954021453857, "learning_rate": 0.0001, "loss": 0.0226, "step": 24950 }, { "epoch": 164.21052631578948, "grad_norm": 1.6875170469284058, "learning_rate": 0.0001, "loss": 0.0225, "step": 24960 }, { "epoch": 164.27631578947367, "grad_norm": 1.8426777124404907, "learning_rate": 0.0001, "loss": 0.0255, "step": 24970 }, { "epoch": 164.3421052631579, "grad_norm": 2.3827812671661377, "learning_rate": 0.0001, "loss": 0.0216, "step": 24980 }, { "epoch": 164.4078947368421, "grad_norm": 1.7686107158660889, "learning_rate": 0.0001, "loss": 0.0247, "step": 24990 }, { "epoch": 164.47368421052633, "grad_norm": 1.7262240648269653, "learning_rate": 0.0001, "loss": 0.0197, "step": 25000 }, { "epoch": 164.53947368421052, "grad_norm": 1.6811727285385132, "learning_rate": 0.0001, "loss": 0.0206, "step": 25010 }, { "epoch": 164.60526315789474, "grad_norm": 2.1168859004974365, "learning_rate": 0.0001, "loss": 0.0214, "step": 25020 }, { "epoch": 164.67105263157896, "grad_norm": 2.0979933738708496, "learning_rate": 0.0001, "loss": 0.026, "step": 25030 }, { "epoch": 164.73684210526315, "grad_norm": 1.7728992700576782, "learning_rate": 0.0001, "loss": 0.0227, "step": 25040 }, { "epoch": 164.80263157894737, "grad_norm": 2.154282808303833, "learning_rate": 0.0001, "loss": 0.0217, "step": 25050 }, { "epoch": 164.8684210526316, "grad_norm": 1.865557074546814, "learning_rate": 0.0001, "loss": 0.0204, "step": 25060 }, { "epoch": 164.93421052631578, "grad_norm": 1.9065227508544922, "learning_rate": 0.0001, "loss": 0.0239, "step": 25070 }, { "epoch": 165.0, "grad_norm": 1.865450382232666, "learning_rate": 0.0001, "loss": 0.023, "step": 25080 }, { "epoch": 165.06578947368422, "grad_norm": 1.5958958864212036, "learning_rate": 0.0001, "loss": 0.0241, "step": 25090 }, { "epoch": 165.1315789473684, "grad_norm": 2.103440761566162, "learning_rate": 0.0001, "loss": 0.0214, "step": 25100 }, { "epoch": 165.19736842105263, "grad_norm": 2.1394553184509277, "learning_rate": 0.0001, "loss": 0.0244, "step": 25110 }, { "epoch": 165.26315789473685, "grad_norm": 1.8895655870437622, "learning_rate": 0.0001, "loss": 0.0237, "step": 25120 }, { "epoch": 165.32894736842104, "grad_norm": 1.9759409427642822, "learning_rate": 0.0001, "loss": 0.023, "step": 25130 }, { "epoch": 165.39473684210526, "grad_norm": 1.5826544761657715, "learning_rate": 0.0001, "loss": 0.0222, "step": 25140 }, { "epoch": 165.46052631578948, "grad_norm": 1.8101918697357178, "learning_rate": 0.0001, "loss": 0.0236, "step": 25150 }, { "epoch": 165.52631578947367, "grad_norm": 1.2470104694366455, "learning_rate": 0.0001, "loss": 0.0234, "step": 25160 }, { "epoch": 165.5921052631579, "grad_norm": 1.7871711254119873, "learning_rate": 0.0001, "loss": 0.0189, "step": 25170 }, { "epoch": 165.6578947368421, "grad_norm": 1.746167540550232, "learning_rate": 0.0001, "loss": 0.0255, "step": 25180 }, { "epoch": 165.72368421052633, "grad_norm": 1.7877182960510254, "learning_rate": 0.0001, "loss": 0.0231, "step": 25190 }, { "epoch": 165.78947368421052, "grad_norm": 1.8038177490234375, "learning_rate": 0.0001, "loss": 0.0224, "step": 25200 }, { "epoch": 165.85526315789474, "grad_norm": 1.4833903312683105, "learning_rate": 0.0001, "loss": 0.0217, "step": 25210 }, { "epoch": 165.92105263157896, "grad_norm": 1.5070832967758179, "learning_rate": 0.0001, "loss": 0.0231, "step": 25220 }, { "epoch": 165.98684210526315, "grad_norm": 1.9843478202819824, "learning_rate": 0.0001, "loss": 0.0218, "step": 25230 }, { "epoch": 166.05263157894737, "grad_norm": 2.1306796073913574, "learning_rate": 0.0001, "loss": 0.0257, "step": 25240 }, { "epoch": 166.1184210526316, "grad_norm": 1.44062340259552, "learning_rate": 0.0001, "loss": 0.02, "step": 25250 }, { "epoch": 166.18421052631578, "grad_norm": 1.8091195821762085, "learning_rate": 0.0001, "loss": 0.0216, "step": 25260 }, { "epoch": 166.25, "grad_norm": 1.5930238962173462, "learning_rate": 0.0001, "loss": 0.0244, "step": 25270 }, { "epoch": 166.31578947368422, "grad_norm": 1.6070538759231567, "learning_rate": 0.0001, "loss": 0.0211, "step": 25280 }, { "epoch": 166.3815789473684, "grad_norm": 2.1745870113372803, "learning_rate": 0.0001, "loss": 0.0215, "step": 25290 }, { "epoch": 166.44736842105263, "grad_norm": 2.135207176208496, "learning_rate": 0.0001, "loss": 0.0273, "step": 25300 }, { "epoch": 166.51315789473685, "grad_norm": 2.0317273139953613, "learning_rate": 0.0001, "loss": 0.0202, "step": 25310 }, { "epoch": 166.57894736842104, "grad_norm": 1.974480152130127, "learning_rate": 0.0001, "loss": 0.0222, "step": 25320 }, { "epoch": 166.64473684210526, "grad_norm": 1.630603551864624, "learning_rate": 0.0001, "loss": 0.0203, "step": 25330 }, { "epoch": 166.71052631578948, "grad_norm": 1.7155988216400146, "learning_rate": 0.0001, "loss": 0.0246, "step": 25340 }, { "epoch": 166.77631578947367, "grad_norm": 1.6035009622573853, "learning_rate": 0.0001, "loss": 0.0209, "step": 25350 }, { "epoch": 166.8421052631579, "grad_norm": 1.4180253744125366, "learning_rate": 0.0001, "loss": 0.0251, "step": 25360 }, { "epoch": 166.9078947368421, "grad_norm": 1.7308539152145386, "learning_rate": 0.0001, "loss": 0.0264, "step": 25370 }, { "epoch": 166.97368421052633, "grad_norm": 1.3995314836502075, "learning_rate": 0.0001, "loss": 0.0242, "step": 25380 }, { "epoch": 167.03947368421052, "grad_norm": 1.2770179510116577, "learning_rate": 0.0001, "loss": 0.0202, "step": 25390 }, { "epoch": 167.10526315789474, "grad_norm": 1.6639790534973145, "learning_rate": 0.0001, "loss": 0.0259, "step": 25400 }, { "epoch": 167.17105263157896, "grad_norm": 1.5271854400634766, "learning_rate": 0.0001, "loss": 0.0212, "step": 25410 }, { "epoch": 167.23684210526315, "grad_norm": 1.395559310913086, "learning_rate": 0.0001, "loss": 0.0213, "step": 25420 }, { "epoch": 167.30263157894737, "grad_norm": 2.25486159324646, "learning_rate": 0.0001, "loss": 0.0289, "step": 25430 }, { "epoch": 167.3684210526316, "grad_norm": 1.4598565101623535, "learning_rate": 0.0001, "loss": 0.021, "step": 25440 }, { "epoch": 167.43421052631578, "grad_norm": 1.6802423000335693, "learning_rate": 0.0001, "loss": 0.024, "step": 25450 }, { "epoch": 167.5, "grad_norm": 1.6243155002593994, "learning_rate": 0.0001, "loss": 0.0215, "step": 25460 }, { "epoch": 167.56578947368422, "grad_norm": 1.4182437658309937, "learning_rate": 0.0001, "loss": 0.0249, "step": 25470 }, { "epoch": 167.6315789473684, "grad_norm": 2.063037157058716, "learning_rate": 0.0001, "loss": 0.0204, "step": 25480 }, { "epoch": 167.69736842105263, "grad_norm": 2.4031455516815186, "learning_rate": 0.0001, "loss": 0.021, "step": 25490 }, { "epoch": 167.76315789473685, "grad_norm": 2.103890895843506, "learning_rate": 0.0001, "loss": 0.0241, "step": 25500 }, { "epoch": 167.82894736842104, "grad_norm": 1.659893274307251, "learning_rate": 0.0001, "loss": 0.0227, "step": 25510 }, { "epoch": 167.89473684210526, "grad_norm": 2.0746796131134033, "learning_rate": 0.0001, "loss": 0.0267, "step": 25520 }, { "epoch": 167.96052631578948, "grad_norm": 1.4943183660507202, "learning_rate": 0.0001, "loss": 0.0189, "step": 25530 }, { "epoch": 168.02631578947367, "grad_norm": 1.8996371030807495, "learning_rate": 0.0001, "loss": 0.0224, "step": 25540 }, { "epoch": 168.0921052631579, "grad_norm": 1.9129077196121216, "learning_rate": 0.0001, "loss": 0.0206, "step": 25550 }, { "epoch": 168.1578947368421, "grad_norm": 1.8461135625839233, "learning_rate": 0.0001, "loss": 0.0257, "step": 25560 }, { "epoch": 168.22368421052633, "grad_norm": 2.115773916244507, "learning_rate": 0.0001, "loss": 0.0241, "step": 25570 }, { "epoch": 168.28947368421052, "grad_norm": 2.333953619003296, "learning_rate": 0.0001, "loss": 0.0219, "step": 25580 }, { "epoch": 168.35526315789474, "grad_norm": 1.5396063327789307, "learning_rate": 0.0001, "loss": 0.023, "step": 25590 }, { "epoch": 168.42105263157896, "grad_norm": 2.014495611190796, "learning_rate": 0.0001, "loss": 0.021, "step": 25600 }, { "epoch": 168.48684210526315, "grad_norm": 1.60231351852417, "learning_rate": 0.0001, "loss": 0.0189, "step": 25610 }, { "epoch": 168.55263157894737, "grad_norm": 2.170673131942749, "learning_rate": 0.0001, "loss": 0.0228, "step": 25620 }, { "epoch": 168.6184210526316, "grad_norm": 1.4584606885910034, "learning_rate": 0.0001, "loss": 0.0245, "step": 25630 }, { "epoch": 168.68421052631578, "grad_norm": 1.5162572860717773, "learning_rate": 0.0001, "loss": 0.0212, "step": 25640 }, { "epoch": 168.75, "grad_norm": 1.6019231081008911, "learning_rate": 0.0001, "loss": 0.0225, "step": 25650 }, { "epoch": 168.81578947368422, "grad_norm": 1.6418616771697998, "learning_rate": 0.0001, "loss": 0.0237, "step": 25660 }, { "epoch": 168.8815789473684, "grad_norm": 1.3318489789962769, "learning_rate": 0.0001, "loss": 0.0214, "step": 25670 }, { "epoch": 168.94736842105263, "grad_norm": 1.8107408285140991, "learning_rate": 0.0001, "loss": 0.0234, "step": 25680 }, { "epoch": 169.01315789473685, "grad_norm": 1.7817691564559937, "learning_rate": 0.0001, "loss": 0.0236, "step": 25690 }, { "epoch": 169.07894736842104, "grad_norm": 1.4330850839614868, "learning_rate": 0.0001, "loss": 0.0263, "step": 25700 }, { "epoch": 169.14473684210526, "grad_norm": 1.4552818536758423, "learning_rate": 0.0001, "loss": 0.0209, "step": 25710 }, { "epoch": 169.21052631578948, "grad_norm": 1.5058170557022095, "learning_rate": 0.0001, "loss": 0.0251, "step": 25720 }, { "epoch": 169.27631578947367, "grad_norm": 1.8806143999099731, "learning_rate": 0.0001, "loss": 0.0211, "step": 25730 }, { "epoch": 169.3421052631579, "grad_norm": 2.259068727493286, "learning_rate": 0.0001, "loss": 0.0238, "step": 25740 }, { "epoch": 169.4078947368421, "grad_norm": 2.3348898887634277, "learning_rate": 0.0001, "loss": 0.0203, "step": 25750 }, { "epoch": 169.47368421052633, "grad_norm": 2.3622491359710693, "learning_rate": 0.0001, "loss": 0.0244, "step": 25760 }, { "epoch": 169.53947368421052, "grad_norm": 2.124016523361206, "learning_rate": 0.0001, "loss": 0.0211, "step": 25770 }, { "epoch": 169.60526315789474, "grad_norm": 2.0746822357177734, "learning_rate": 0.0001, "loss": 0.0318, "step": 25780 }, { "epoch": 169.67105263157896, "grad_norm": 1.5856722593307495, "learning_rate": 0.0001, "loss": 0.021, "step": 25790 }, { "epoch": 169.73684210526315, "grad_norm": 1.807373046875, "learning_rate": 0.0001, "loss": 0.0336, "step": 25800 }, { "epoch": 169.80263157894737, "grad_norm": 2.3961710929870605, "learning_rate": 0.0001, "loss": 0.0218, "step": 25810 }, { "epoch": 169.8684210526316, "grad_norm": 2.2760679721832275, "learning_rate": 0.0001, "loss": 0.0222, "step": 25820 }, { "epoch": 169.93421052631578, "grad_norm": 2.251207113265991, "learning_rate": 0.0001, "loss": 0.0201, "step": 25830 }, { "epoch": 170.0, "grad_norm": 1.8260701894760132, "learning_rate": 0.0001, "loss": 0.0218, "step": 25840 }, { "epoch": 170.06578947368422, "grad_norm": 1.8284475803375244, "learning_rate": 0.0001, "loss": 0.0214, "step": 25850 }, { "epoch": 170.1315789473684, "grad_norm": 2.4426116943359375, "learning_rate": 0.0001, "loss": 0.0192, "step": 25860 }, { "epoch": 170.19736842105263, "grad_norm": 2.1333835124969482, "learning_rate": 0.0001, "loss": 0.0243, "step": 25870 }, { "epoch": 170.26315789473685, "grad_norm": 1.8559892177581787, "learning_rate": 0.0001, "loss": 0.0199, "step": 25880 }, { "epoch": 170.32894736842104, "grad_norm": 1.4473143815994263, "learning_rate": 0.0001, "loss": 0.0217, "step": 25890 }, { "epoch": 170.39473684210526, "grad_norm": 2.0138096809387207, "learning_rate": 0.0001, "loss": 0.0177, "step": 25900 }, { "epoch": 170.46052631578948, "grad_norm": 2.234858512878418, "learning_rate": 0.0001, "loss": 0.0203, "step": 25910 }, { "epoch": 170.52631578947367, "grad_norm": 1.7965219020843506, "learning_rate": 0.0001, "loss": 0.019, "step": 25920 }, { "epoch": 170.5921052631579, "grad_norm": 1.8856391906738281, "learning_rate": 0.0001, "loss": 0.025, "step": 25930 }, { "epoch": 170.6578947368421, "grad_norm": 2.1251943111419678, "learning_rate": 0.0001, "loss": 0.0205, "step": 25940 }, { "epoch": 170.72368421052633, "grad_norm": 1.5532712936401367, "learning_rate": 0.0001, "loss": 0.0226, "step": 25950 }, { "epoch": 170.78947368421052, "grad_norm": 1.5193606615066528, "learning_rate": 0.0001, "loss": 0.0215, "step": 25960 }, { "epoch": 170.85526315789474, "grad_norm": 1.47963547706604, "learning_rate": 0.0001, "loss": 0.0222, "step": 25970 }, { "epoch": 170.92105263157896, "grad_norm": 1.4889603853225708, "learning_rate": 0.0001, "loss": 0.0234, "step": 25980 }, { "epoch": 170.98684210526315, "grad_norm": 1.9464101791381836, "learning_rate": 0.0001, "loss": 0.0215, "step": 25990 }, { "epoch": 171.05263157894737, "grad_norm": 1.8738350868225098, "learning_rate": 0.0001, "loss": 0.0208, "step": 26000 }, { "epoch": 171.1184210526316, "grad_norm": 1.815657377243042, "learning_rate": 0.0001, "loss": 0.0238, "step": 26010 }, { "epoch": 171.18421052631578, "grad_norm": 1.604430913925171, "learning_rate": 0.0001, "loss": 0.02, "step": 26020 }, { "epoch": 171.25, "grad_norm": 1.956494688987732, "learning_rate": 0.0001, "loss": 0.019, "step": 26030 }, { "epoch": 171.31578947368422, "grad_norm": 1.962283730506897, "learning_rate": 0.0001, "loss": 0.0248, "step": 26040 }, { "epoch": 171.3815789473684, "grad_norm": 1.5422719717025757, "learning_rate": 0.0001, "loss": 0.0208, "step": 26050 }, { "epoch": 171.44736842105263, "grad_norm": 1.9913711547851562, "learning_rate": 0.0001, "loss": 0.0229, "step": 26060 }, { "epoch": 171.51315789473685, "grad_norm": 1.730075716972351, "learning_rate": 0.0001, "loss": 0.0244, "step": 26070 }, { "epoch": 171.57894736842104, "grad_norm": 1.6983269453048706, "learning_rate": 0.0001, "loss": 0.022, "step": 26080 }, { "epoch": 171.64473684210526, "grad_norm": 1.9188017845153809, "learning_rate": 0.0001, "loss": 0.0226, "step": 26090 }, { "epoch": 171.71052631578948, "grad_norm": 1.8665289878845215, "learning_rate": 0.0001, "loss": 0.0209, "step": 26100 }, { "epoch": 171.77631578947367, "grad_norm": 2.2272634506225586, "learning_rate": 0.0001, "loss": 0.0207, "step": 26110 }, { "epoch": 171.8421052631579, "grad_norm": 2.0777463912963867, "learning_rate": 0.0001, "loss": 0.0216, "step": 26120 }, { "epoch": 171.9078947368421, "grad_norm": 2.4250311851501465, "learning_rate": 0.0001, "loss": 0.0261, "step": 26130 }, { "epoch": 171.97368421052633, "grad_norm": 2.2014386653900146, "learning_rate": 0.0001, "loss": 0.0238, "step": 26140 }, { "epoch": 172.03947368421052, "grad_norm": 1.7425817251205444, "learning_rate": 0.0001, "loss": 0.0198, "step": 26150 }, { "epoch": 172.10526315789474, "grad_norm": 1.7521036863327026, "learning_rate": 0.0001, "loss": 0.0206, "step": 26160 }, { "epoch": 172.17105263157896, "grad_norm": 2.3023085594177246, "learning_rate": 0.0001, "loss": 0.0213, "step": 26170 }, { "epoch": 172.23684210526315, "grad_norm": 1.6948318481445312, "learning_rate": 0.0001, "loss": 0.0194, "step": 26180 }, { "epoch": 172.30263157894737, "grad_norm": 1.9453755617141724, "learning_rate": 0.0001, "loss": 0.0197, "step": 26190 }, { "epoch": 172.3684210526316, "grad_norm": 1.5177639722824097, "learning_rate": 0.0001, "loss": 0.0243, "step": 26200 }, { "epoch": 172.43421052631578, "grad_norm": 1.4456307888031006, "learning_rate": 0.0001, "loss": 0.0226, "step": 26210 }, { "epoch": 172.5, "grad_norm": 2.054652452468872, "learning_rate": 0.0001, "loss": 0.0236, "step": 26220 }, { "epoch": 172.56578947368422, "grad_norm": 1.6973083019256592, "learning_rate": 0.0001, "loss": 0.0239, "step": 26230 }, { "epoch": 172.6315789473684, "grad_norm": 2.4906063079833984, "learning_rate": 0.0001, "loss": 0.0223, "step": 26240 }, { "epoch": 172.69736842105263, "grad_norm": 2.173586368560791, "learning_rate": 0.0001, "loss": 0.0226, "step": 26250 }, { "epoch": 172.76315789473685, "grad_norm": 1.9783223867416382, "learning_rate": 0.0001, "loss": 0.0209, "step": 26260 }, { "epoch": 172.82894736842104, "grad_norm": 2.0747900009155273, "learning_rate": 0.0001, "loss": 0.0199, "step": 26270 }, { "epoch": 172.89473684210526, "grad_norm": 1.5780723094940186, "learning_rate": 0.0001, "loss": 0.0218, "step": 26280 }, { "epoch": 172.96052631578948, "grad_norm": 1.771170973777771, "learning_rate": 0.0001, "loss": 0.0185, "step": 26290 }, { "epoch": 173.02631578947367, "grad_norm": 1.338998556137085, "learning_rate": 0.0001, "loss": 0.027, "step": 26300 }, { "epoch": 173.0921052631579, "grad_norm": 1.5725027322769165, "learning_rate": 0.0001, "loss": 0.0204, "step": 26310 }, { "epoch": 173.1578947368421, "grad_norm": 1.5240291357040405, "learning_rate": 0.0001, "loss": 0.025, "step": 26320 }, { "epoch": 173.22368421052633, "grad_norm": 1.8865097761154175, "learning_rate": 0.0001, "loss": 0.0192, "step": 26330 }, { "epoch": 173.28947368421052, "grad_norm": 1.8996760845184326, "learning_rate": 0.0001, "loss": 0.0242, "step": 26340 }, { "epoch": 173.35526315789474, "grad_norm": 1.5630555152893066, "learning_rate": 0.0001, "loss": 0.0202, "step": 26350 }, { "epoch": 173.42105263157896, "grad_norm": 1.6215249300003052, "learning_rate": 0.0001, "loss": 0.0194, "step": 26360 }, { "epoch": 173.48684210526315, "grad_norm": 1.3086528778076172, "learning_rate": 0.0001, "loss": 0.0262, "step": 26370 }, { "epoch": 173.55263157894737, "grad_norm": 2.32761549949646, "learning_rate": 0.0001, "loss": 0.0212, "step": 26380 }, { "epoch": 173.6184210526316, "grad_norm": 1.6208863258361816, "learning_rate": 0.0001, "loss": 0.0218, "step": 26390 }, { "epoch": 173.68421052631578, "grad_norm": 1.417353868484497, "learning_rate": 0.0001, "loss": 0.0214, "step": 26400 }, { "epoch": 173.75, "grad_norm": 1.602004885673523, "learning_rate": 0.0001, "loss": 0.0238, "step": 26410 }, { "epoch": 173.81578947368422, "grad_norm": 1.7189509868621826, "learning_rate": 0.0001, "loss": 0.0246, "step": 26420 }, { "epoch": 173.8815789473684, "grad_norm": 1.7798396348953247, "learning_rate": 0.0001, "loss": 0.0217, "step": 26430 }, { "epoch": 173.94736842105263, "grad_norm": 1.6465184688568115, "learning_rate": 0.0001, "loss": 0.0265, "step": 26440 }, { "epoch": 174.01315789473685, "grad_norm": 1.5844390392303467, "learning_rate": 0.0001, "loss": 0.0228, "step": 26450 }, { "epoch": 174.07894736842104, "grad_norm": 1.979604721069336, "learning_rate": 0.0001, "loss": 0.0272, "step": 26460 }, { "epoch": 174.14473684210526, "grad_norm": 1.5402264595031738, "learning_rate": 0.0001, "loss": 0.0255, "step": 26470 }, { "epoch": 174.21052631578948, "grad_norm": 1.812268614768982, "learning_rate": 0.0001, "loss": 0.0226, "step": 26480 }, { "epoch": 174.27631578947367, "grad_norm": 1.7953846454620361, "learning_rate": 0.0001, "loss": 0.0235, "step": 26490 }, { "epoch": 174.3421052631579, "grad_norm": 1.4559024572372437, "learning_rate": 0.0001, "loss": 0.02, "step": 26500 }, { "epoch": 174.4078947368421, "grad_norm": 1.2799280881881714, "learning_rate": 0.0001, "loss": 0.0203, "step": 26510 }, { "epoch": 174.47368421052633, "grad_norm": 1.097396731376648, "learning_rate": 0.0001, "loss": 0.0195, "step": 26520 }, { "epoch": 174.53947368421052, "grad_norm": 1.3001881837844849, "learning_rate": 0.0001, "loss": 0.0253, "step": 26530 }, { "epoch": 174.60526315789474, "grad_norm": 1.833054542541504, "learning_rate": 0.0001, "loss": 0.0247, "step": 26540 }, { "epoch": 174.67105263157896, "grad_norm": 1.7093205451965332, "learning_rate": 0.0001, "loss": 0.0209, "step": 26550 }, { "epoch": 174.73684210526315, "grad_norm": 1.809487223625183, "learning_rate": 0.0001, "loss": 0.023, "step": 26560 }, { "epoch": 174.80263157894737, "grad_norm": 1.6241194009780884, "learning_rate": 0.0001, "loss": 0.0211, "step": 26570 }, { "epoch": 174.8684210526316, "grad_norm": 1.616720199584961, "learning_rate": 0.0001, "loss": 0.0245, "step": 26580 }, { "epoch": 174.93421052631578, "grad_norm": 1.684369444847107, "learning_rate": 0.0001, "loss": 0.0228, "step": 26590 }, { "epoch": 175.0, "grad_norm": 1.4860320091247559, "learning_rate": 0.0001, "loss": 0.0225, "step": 26600 }, { "epoch": 175.06578947368422, "grad_norm": 1.473105549812317, "learning_rate": 0.0001, "loss": 0.0219, "step": 26610 }, { "epoch": 175.1315789473684, "grad_norm": 1.3402820825576782, "learning_rate": 0.0001, "loss": 0.022, "step": 26620 }, { "epoch": 175.19736842105263, "grad_norm": 1.9095194339752197, "learning_rate": 0.0001, "loss": 0.025, "step": 26630 }, { "epoch": 175.26315789473685, "grad_norm": 2.3372244834899902, "learning_rate": 0.0001, "loss": 0.0261, "step": 26640 }, { "epoch": 175.32894736842104, "grad_norm": 2.663201332092285, "learning_rate": 0.0001, "loss": 0.0252, "step": 26650 }, { "epoch": 175.39473684210526, "grad_norm": 2.1149675846099854, "learning_rate": 0.0001, "loss": 0.0243, "step": 26660 }, { "epoch": 175.46052631578948, "grad_norm": 2.2278642654418945, "learning_rate": 0.0001, "loss": 0.026, "step": 26670 }, { "epoch": 175.52631578947367, "grad_norm": 1.7145836353302002, "learning_rate": 0.0001, "loss": 0.0211, "step": 26680 }, { "epoch": 175.5921052631579, "grad_norm": 2.237415075302124, "learning_rate": 0.0001, "loss": 0.0229, "step": 26690 }, { "epoch": 175.6578947368421, "grad_norm": 1.7287206649780273, "learning_rate": 0.0001, "loss": 0.0221, "step": 26700 }, { "epoch": 175.72368421052633, "grad_norm": 1.5968941450119019, "learning_rate": 0.0001, "loss": 0.0247, "step": 26710 }, { "epoch": 175.78947368421052, "grad_norm": 1.726304531097412, "learning_rate": 0.0001, "loss": 0.0219, "step": 26720 }, { "epoch": 175.85526315789474, "grad_norm": 1.52772057056427, "learning_rate": 0.0001, "loss": 0.0207, "step": 26730 }, { "epoch": 175.92105263157896, "grad_norm": 1.7848730087280273, "learning_rate": 0.0001, "loss": 0.0195, "step": 26740 }, { "epoch": 175.98684210526315, "grad_norm": 1.7189780473709106, "learning_rate": 0.0001, "loss": 0.0187, "step": 26750 }, { "epoch": 176.05263157894737, "grad_norm": 1.7168396711349487, "learning_rate": 0.0001, "loss": 0.0208, "step": 26760 }, { "epoch": 176.1184210526316, "grad_norm": 1.5791232585906982, "learning_rate": 0.0001, "loss": 0.0214, "step": 26770 }, { "epoch": 176.18421052631578, "grad_norm": 1.6979987621307373, "learning_rate": 0.0001, "loss": 0.0226, "step": 26780 }, { "epoch": 176.25, "grad_norm": 1.8915300369262695, "learning_rate": 0.0001, "loss": 0.0213, "step": 26790 }, { "epoch": 176.31578947368422, "grad_norm": 1.6844674348831177, "learning_rate": 0.0001, "loss": 0.0243, "step": 26800 }, { "epoch": 176.3815789473684, "grad_norm": 1.6832135915756226, "learning_rate": 0.0001, "loss": 0.0221, "step": 26810 }, { "epoch": 176.44736842105263, "grad_norm": 2.555859327316284, "learning_rate": 0.0001, "loss": 0.0223, "step": 26820 }, { "epoch": 176.51315789473685, "grad_norm": 1.6947392225265503, "learning_rate": 0.0001, "loss": 0.0268, "step": 26830 }, { "epoch": 176.57894736842104, "grad_norm": 1.626143455505371, "learning_rate": 0.0001, "loss": 0.0271, "step": 26840 }, { "epoch": 176.64473684210526, "grad_norm": 1.5046982765197754, "learning_rate": 0.0001, "loss": 0.0211, "step": 26850 }, { "epoch": 176.71052631578948, "grad_norm": 1.5275020599365234, "learning_rate": 0.0001, "loss": 0.0214, "step": 26860 }, { "epoch": 176.77631578947367, "grad_norm": 1.693964958190918, "learning_rate": 0.0001, "loss": 0.023, "step": 26870 }, { "epoch": 176.8421052631579, "grad_norm": 2.3514294624328613, "learning_rate": 0.0001, "loss": 0.0229, "step": 26880 }, { "epoch": 176.9078947368421, "grad_norm": 1.6730153560638428, "learning_rate": 0.0001, "loss": 0.0226, "step": 26890 }, { "epoch": 176.97368421052633, "grad_norm": 1.7412693500518799, "learning_rate": 0.0001, "loss": 0.0199, "step": 26900 }, { "epoch": 177.03947368421052, "grad_norm": 1.7152788639068604, "learning_rate": 0.0001, "loss": 0.0342, "step": 26910 }, { "epoch": 177.10526315789474, "grad_norm": 1.9347385168075562, "learning_rate": 0.0001, "loss": 0.023, "step": 26920 }, { "epoch": 177.17105263157896, "grad_norm": 2.177044153213501, "learning_rate": 0.0001, "loss": 0.0209, "step": 26930 }, { "epoch": 177.23684210526315, "grad_norm": 1.5955945253372192, "learning_rate": 0.0001, "loss": 0.0265, "step": 26940 }, { "epoch": 177.30263157894737, "grad_norm": 1.5710334777832031, "learning_rate": 0.0001, "loss": 0.0201, "step": 26950 }, { "epoch": 177.3684210526316, "grad_norm": 1.9074351787567139, "learning_rate": 0.0001, "loss": 0.0222, "step": 26960 }, { "epoch": 177.43421052631578, "grad_norm": 2.0679683685302734, "learning_rate": 0.0001, "loss": 0.0218, "step": 26970 }, { "epoch": 177.5, "grad_norm": 1.8049530982971191, "learning_rate": 0.0001, "loss": 0.0218, "step": 26980 }, { "epoch": 177.56578947368422, "grad_norm": 2.084031820297241, "learning_rate": 0.0001, "loss": 0.0199, "step": 26990 }, { "epoch": 177.6315789473684, "grad_norm": 2.104003429412842, "learning_rate": 0.0001, "loss": 0.0294, "step": 27000 }, { "epoch": 177.69736842105263, "grad_norm": 3.0594112873077393, "learning_rate": 0.0001, "loss": 0.019, "step": 27010 }, { "epoch": 177.76315789473685, "grad_norm": 1.9388364553451538, "learning_rate": 0.0001, "loss": 0.019, "step": 27020 }, { "epoch": 177.82894736842104, "grad_norm": 2.231229782104492, "learning_rate": 0.0001, "loss": 0.022, "step": 27030 }, { "epoch": 177.89473684210526, "grad_norm": 1.946751356124878, "learning_rate": 0.0001, "loss": 0.0189, "step": 27040 }, { "epoch": 177.96052631578948, "grad_norm": 2.079904556274414, "learning_rate": 0.0001, "loss": 0.0226, "step": 27050 }, { "epoch": 178.02631578947367, "grad_norm": 1.8007524013519287, "learning_rate": 0.0001, "loss": 0.023, "step": 27060 }, { "epoch": 178.0921052631579, "grad_norm": 1.852569580078125, "learning_rate": 0.0001, "loss": 0.0206, "step": 27070 }, { "epoch": 178.1578947368421, "grad_norm": 1.696610450744629, "learning_rate": 0.0001, "loss": 0.0214, "step": 27080 }, { "epoch": 178.22368421052633, "grad_norm": 1.8233119249343872, "learning_rate": 0.0001, "loss": 0.0207, "step": 27090 }, { "epoch": 178.28947368421052, "grad_norm": 1.8464583158493042, "learning_rate": 0.0001, "loss": 0.0206, "step": 27100 }, { "epoch": 178.35526315789474, "grad_norm": 2.6388022899627686, "learning_rate": 0.0001, "loss": 0.0225, "step": 27110 }, { "epoch": 178.42105263157896, "grad_norm": 1.932837724685669, "learning_rate": 0.0001, "loss": 0.0216, "step": 27120 }, { "epoch": 178.48684210526315, "grad_norm": 1.7792550325393677, "learning_rate": 0.0001, "loss": 0.0204, "step": 27130 }, { "epoch": 178.55263157894737, "grad_norm": 1.8592194318771362, "learning_rate": 0.0001, "loss": 0.0206, "step": 27140 }, { "epoch": 178.6184210526316, "grad_norm": 2.272883653640747, "learning_rate": 0.0001, "loss": 0.0205, "step": 27150 }, { "epoch": 178.68421052631578, "grad_norm": 1.717185378074646, "learning_rate": 0.0001, "loss": 0.026, "step": 27160 }, { "epoch": 178.75, "grad_norm": 2.217207670211792, "learning_rate": 0.0001, "loss": 0.0217, "step": 27170 }, { "epoch": 178.81578947368422, "grad_norm": 1.5436404943466187, "learning_rate": 0.0001, "loss": 0.0231, "step": 27180 }, { "epoch": 178.8815789473684, "grad_norm": 2.3764970302581787, "learning_rate": 0.0001, "loss": 0.0212, "step": 27190 }, { "epoch": 178.94736842105263, "grad_norm": 1.9373306035995483, "learning_rate": 0.0001, "loss": 0.0212, "step": 27200 }, { "epoch": 179.01315789473685, "grad_norm": 2.300225019454956, "learning_rate": 0.0001, "loss": 0.0256, "step": 27210 }, { "epoch": 179.07894736842104, "grad_norm": 2.676640748977661, "learning_rate": 0.0001, "loss": 0.0212, "step": 27220 }, { "epoch": 179.14473684210526, "grad_norm": 2.5426862239837646, "learning_rate": 0.0001, "loss": 0.0211, "step": 27230 }, { "epoch": 179.21052631578948, "grad_norm": 2.0926120281219482, "learning_rate": 0.0001, "loss": 0.0212, "step": 27240 }, { "epoch": 179.27631578947367, "grad_norm": 2.2013559341430664, "learning_rate": 0.0001, "loss": 0.0224, "step": 27250 }, { "epoch": 179.3421052631579, "grad_norm": 2.4887239933013916, "learning_rate": 0.0001, "loss": 0.0259, "step": 27260 }, { "epoch": 179.4078947368421, "grad_norm": 1.4472169876098633, "learning_rate": 0.0001, "loss": 0.0189, "step": 27270 }, { "epoch": 179.47368421052633, "grad_norm": 3.1190879344940186, "learning_rate": 0.0001, "loss": 0.0222, "step": 27280 }, { "epoch": 179.53947368421052, "grad_norm": 2.074324369430542, "learning_rate": 0.0001, "loss": 0.0209, "step": 27290 }, { "epoch": 179.60526315789474, "grad_norm": 2.1198768615722656, "learning_rate": 0.0001, "loss": 0.0215, "step": 27300 }, { "epoch": 179.67105263157896, "grad_norm": 2.1850316524505615, "learning_rate": 0.0001, "loss": 0.0193, "step": 27310 }, { "epoch": 179.73684210526315, "grad_norm": 1.575083613395691, "learning_rate": 0.0001, "loss": 0.0188, "step": 27320 }, { "epoch": 179.80263157894737, "grad_norm": 1.4868146181106567, "learning_rate": 0.0001, "loss": 0.0191, "step": 27330 }, { "epoch": 179.8684210526316, "grad_norm": 1.7876735925674438, "learning_rate": 0.0001, "loss": 0.0215, "step": 27340 }, { "epoch": 179.93421052631578, "grad_norm": 1.797590970993042, "learning_rate": 0.0001, "loss": 0.0216, "step": 27350 }, { "epoch": 180.0, "grad_norm": 1.7287899255752563, "learning_rate": 0.0001, "loss": 0.0233, "step": 27360 }, { "epoch": 180.06578947368422, "grad_norm": 1.9443573951721191, "learning_rate": 0.0001, "loss": 0.0236, "step": 27370 }, { "epoch": 180.1315789473684, "grad_norm": 1.811497688293457, "learning_rate": 0.0001, "loss": 0.0212, "step": 27380 }, { "epoch": 180.19736842105263, "grad_norm": 2.010878086090088, "learning_rate": 0.0001, "loss": 0.0205, "step": 27390 }, { "epoch": 180.26315789473685, "grad_norm": 2.100095510482788, "learning_rate": 0.0001, "loss": 0.0187, "step": 27400 }, { "epoch": 180.32894736842104, "grad_norm": 1.6829472780227661, "learning_rate": 0.0001, "loss": 0.0237, "step": 27410 }, { "epoch": 180.39473684210526, "grad_norm": 1.5078117847442627, "learning_rate": 0.0001, "loss": 0.0201, "step": 27420 }, { "epoch": 180.46052631578948, "grad_norm": 1.7165087461471558, "learning_rate": 0.0001, "loss": 0.0208, "step": 27430 }, { "epoch": 180.52631578947367, "grad_norm": 1.7594207525253296, "learning_rate": 0.0001, "loss": 0.0211, "step": 27440 }, { "epoch": 180.5921052631579, "grad_norm": 2.005337715148926, "learning_rate": 0.0001, "loss": 0.0231, "step": 27450 }, { "epoch": 180.6578947368421, "grad_norm": 1.8836561441421509, "learning_rate": 0.0001, "loss": 0.021, "step": 27460 }, { "epoch": 180.72368421052633, "grad_norm": 1.9070645570755005, "learning_rate": 0.0001, "loss": 0.0207, "step": 27470 }, { "epoch": 180.78947368421052, "grad_norm": 1.6358156204223633, "learning_rate": 0.0001, "loss": 0.0287, "step": 27480 }, { "epoch": 180.85526315789474, "grad_norm": 1.9038141965866089, "learning_rate": 0.0001, "loss": 0.0221, "step": 27490 }, { "epoch": 180.92105263157896, "grad_norm": 1.928163766860962, "learning_rate": 0.0001, "loss": 0.0209, "step": 27500 }, { "epoch": 180.98684210526315, "grad_norm": 1.953156590461731, "learning_rate": 0.0001, "loss": 0.0189, "step": 27510 }, { "epoch": 181.05263157894737, "grad_norm": 2.108285903930664, "learning_rate": 0.0001, "loss": 0.0209, "step": 27520 }, { "epoch": 181.1184210526316, "grad_norm": 1.850010871887207, "learning_rate": 0.0001, "loss": 0.0202, "step": 27530 }, { "epoch": 181.18421052631578, "grad_norm": 2.28619647026062, "learning_rate": 0.0001, "loss": 0.0233, "step": 27540 }, { "epoch": 181.25, "grad_norm": 1.6853114366531372, "learning_rate": 0.0001, "loss": 0.024, "step": 27550 }, { "epoch": 181.31578947368422, "grad_norm": 1.3880066871643066, "learning_rate": 0.0001, "loss": 0.0195, "step": 27560 }, { "epoch": 181.3815789473684, "grad_norm": 1.857547402381897, "learning_rate": 0.0001, "loss": 0.0247, "step": 27570 }, { "epoch": 181.44736842105263, "grad_norm": 1.727618932723999, "learning_rate": 0.0001, "loss": 0.0235, "step": 27580 }, { "epoch": 181.51315789473685, "grad_norm": 1.5550187826156616, "learning_rate": 0.0001, "loss": 0.0224, "step": 27590 }, { "epoch": 181.57894736842104, "grad_norm": 1.6502233743667603, "learning_rate": 0.0001, "loss": 0.026, "step": 27600 }, { "epoch": 181.64473684210526, "grad_norm": 1.8184963464736938, "learning_rate": 0.0001, "loss": 0.0213, "step": 27610 }, { "epoch": 181.71052631578948, "grad_norm": 2.041795253753662, "learning_rate": 0.0001, "loss": 0.0225, "step": 27620 }, { "epoch": 181.77631578947367, "grad_norm": 1.5650016069412231, "learning_rate": 0.0001, "loss": 0.0225, "step": 27630 }, { "epoch": 181.8421052631579, "grad_norm": 1.9156227111816406, "learning_rate": 0.0001, "loss": 0.0229, "step": 27640 }, { "epoch": 181.9078947368421, "grad_norm": 1.7636873722076416, "learning_rate": 0.0001, "loss": 0.0196, "step": 27650 }, { "epoch": 181.97368421052633, "grad_norm": 1.1472523212432861, "learning_rate": 0.0001, "loss": 0.019, "step": 27660 }, { "epoch": 182.03947368421052, "grad_norm": 1.8197755813598633, "learning_rate": 0.0001, "loss": 0.0205, "step": 27670 }, { "epoch": 182.10526315789474, "grad_norm": 1.660266637802124, "learning_rate": 0.0001, "loss": 0.0193, "step": 27680 }, { "epoch": 182.17105263157896, "grad_norm": 1.5495004653930664, "learning_rate": 0.0001, "loss": 0.021, "step": 27690 }, { "epoch": 182.23684210526315, "grad_norm": 1.915493369102478, "learning_rate": 0.0001, "loss": 0.0216, "step": 27700 }, { "epoch": 182.30263157894737, "grad_norm": 1.771114468574524, "learning_rate": 0.0001, "loss": 0.0256, "step": 27710 }, { "epoch": 182.3684210526316, "grad_norm": 2.213027238845825, "learning_rate": 0.0001, "loss": 0.0214, "step": 27720 }, { "epoch": 182.43421052631578, "grad_norm": 1.5584380626678467, "learning_rate": 0.0001, "loss": 0.0223, "step": 27730 }, { "epoch": 182.5, "grad_norm": 1.6455339193344116, "learning_rate": 0.0001, "loss": 0.022, "step": 27740 }, { "epoch": 182.56578947368422, "grad_norm": 1.6668483018875122, "learning_rate": 0.0001, "loss": 0.0249, "step": 27750 }, { "epoch": 182.6315789473684, "grad_norm": 1.6636048555374146, "learning_rate": 0.0001, "loss": 0.0201, "step": 27760 }, { "epoch": 182.69736842105263, "grad_norm": 1.0746219158172607, "learning_rate": 0.0001, "loss": 0.0224, "step": 27770 }, { "epoch": 182.76315789473685, "grad_norm": 1.3197077512741089, "learning_rate": 0.0001, "loss": 0.0246, "step": 27780 }, { "epoch": 182.82894736842104, "grad_norm": 1.7114886045455933, "learning_rate": 0.0001, "loss": 0.0201, "step": 27790 }, { "epoch": 182.89473684210526, "grad_norm": 1.1970462799072266, "learning_rate": 0.0001, "loss": 0.0238, "step": 27800 }, { "epoch": 182.96052631578948, "grad_norm": 1.5738550424575806, "learning_rate": 0.0001, "loss": 0.0201, "step": 27810 }, { "epoch": 183.02631578947367, "grad_norm": 1.6552011966705322, "learning_rate": 0.0001, "loss": 0.0245, "step": 27820 }, { "epoch": 183.0921052631579, "grad_norm": 1.7810993194580078, "learning_rate": 0.0001, "loss": 0.022, "step": 27830 }, { "epoch": 183.1578947368421, "grad_norm": 1.766648530960083, "learning_rate": 0.0001, "loss": 0.0196, "step": 27840 }, { "epoch": 183.22368421052633, "grad_norm": 1.8874194622039795, "learning_rate": 0.0001, "loss": 0.0249, "step": 27850 }, { "epoch": 183.28947368421052, "grad_norm": 1.6686333417892456, "learning_rate": 0.0001, "loss": 0.0229, "step": 27860 }, { "epoch": 183.35526315789474, "grad_norm": 1.4397567510604858, "learning_rate": 0.0001, "loss": 0.0222, "step": 27870 }, { "epoch": 183.42105263157896, "grad_norm": 1.4104321002960205, "learning_rate": 0.0001, "loss": 0.0183, "step": 27880 }, { "epoch": 183.48684210526315, "grad_norm": 2.2269418239593506, "learning_rate": 0.0001, "loss": 0.0203, "step": 27890 }, { "epoch": 183.55263157894737, "grad_norm": 1.6375118494033813, "learning_rate": 0.0001, "loss": 0.0208, "step": 27900 }, { "epoch": 183.6184210526316, "grad_norm": 1.5520962476730347, "learning_rate": 0.0001, "loss": 0.0256, "step": 27910 }, { "epoch": 183.68421052631578, "grad_norm": 1.8245152235031128, "learning_rate": 0.0001, "loss": 0.0206, "step": 27920 }, { "epoch": 183.75, "grad_norm": 1.2335766553878784, "learning_rate": 0.0001, "loss": 0.019, "step": 27930 }, { "epoch": 183.81578947368422, "grad_norm": 1.649624228477478, "learning_rate": 0.0001, "loss": 0.0223, "step": 27940 }, { "epoch": 183.8815789473684, "grad_norm": 1.3164063692092896, "learning_rate": 0.0001, "loss": 0.0235, "step": 27950 }, { "epoch": 183.94736842105263, "grad_norm": 2.376342296600342, "learning_rate": 0.0001, "loss": 0.0224, "step": 27960 }, { "epoch": 184.01315789473685, "grad_norm": 1.7755391597747803, "learning_rate": 0.0001, "loss": 0.0222, "step": 27970 }, { "epoch": 184.07894736842104, "grad_norm": 2.1354801654815674, "learning_rate": 0.0001, "loss": 0.0195, "step": 27980 }, { "epoch": 184.14473684210526, "grad_norm": 1.846364974975586, "learning_rate": 0.0001, "loss": 0.023, "step": 27990 }, { "epoch": 184.21052631578948, "grad_norm": 1.9973737001419067, "learning_rate": 0.0001, "loss": 0.0177, "step": 28000 }, { "epoch": 184.27631578947367, "grad_norm": 1.8828153610229492, "learning_rate": 0.0001, "loss": 0.0258, "step": 28010 }, { "epoch": 184.3421052631579, "grad_norm": 1.655981183052063, "learning_rate": 0.0001, "loss": 0.0206, "step": 28020 }, { "epoch": 184.4078947368421, "grad_norm": 2.0815064907073975, "learning_rate": 0.0001, "loss": 0.0199, "step": 28030 }, { "epoch": 184.47368421052633, "grad_norm": 1.62791907787323, "learning_rate": 0.0001, "loss": 0.022, "step": 28040 }, { "epoch": 184.53947368421052, "grad_norm": 1.3334391117095947, "learning_rate": 0.0001, "loss": 0.0194, "step": 28050 }, { "epoch": 184.60526315789474, "grad_norm": 1.9450081586837769, "learning_rate": 0.0001, "loss": 0.0204, "step": 28060 }, { "epoch": 184.67105263157896, "grad_norm": 1.709817886352539, "learning_rate": 0.0001, "loss": 0.0211, "step": 28070 }, { "epoch": 184.73684210526315, "grad_norm": 1.836328387260437, "learning_rate": 0.0001, "loss": 0.0213, "step": 28080 }, { "epoch": 184.80263157894737, "grad_norm": 1.2043198347091675, "learning_rate": 0.0001, "loss": 0.0247, "step": 28090 }, { "epoch": 184.8684210526316, "grad_norm": 1.6187876462936401, "learning_rate": 0.0001, "loss": 0.0179, "step": 28100 }, { "epoch": 184.93421052631578, "grad_norm": 1.818486213684082, "learning_rate": 0.0001, "loss": 0.0259, "step": 28110 }, { "epoch": 185.0, "grad_norm": 1.170192003250122, "learning_rate": 0.0001, "loss": 0.0242, "step": 28120 }, { "epoch": 185.06578947368422, "grad_norm": 1.7584054470062256, "learning_rate": 0.0001, "loss": 0.0252, "step": 28130 }, { "epoch": 185.1315789473684, "grad_norm": 1.5260273218154907, "learning_rate": 0.0001, "loss": 0.0195, "step": 28140 }, { "epoch": 185.19736842105263, "grad_norm": 1.2513256072998047, "learning_rate": 0.0001, "loss": 0.018, "step": 28150 }, { "epoch": 185.26315789473685, "grad_norm": 1.6926679611206055, "learning_rate": 0.0001, "loss": 0.0249, "step": 28160 }, { "epoch": 185.32894736842104, "grad_norm": 1.8237426280975342, "learning_rate": 0.0001, "loss": 0.0222, "step": 28170 }, { "epoch": 185.39473684210526, "grad_norm": 1.7703783512115479, "learning_rate": 0.0001, "loss": 0.0229, "step": 28180 }, { "epoch": 185.46052631578948, "grad_norm": 2.102468252182007, "learning_rate": 0.0001, "loss": 0.0251, "step": 28190 }, { "epoch": 185.52631578947367, "grad_norm": 1.8051787614822388, "learning_rate": 0.0001, "loss": 0.0218, "step": 28200 }, { "epoch": 185.5921052631579, "grad_norm": 1.618276834487915, "learning_rate": 0.0001, "loss": 0.0253, "step": 28210 }, { "epoch": 185.6578947368421, "grad_norm": 1.3617651462554932, "learning_rate": 0.0001, "loss": 0.021, "step": 28220 }, { "epoch": 185.72368421052633, "grad_norm": 2.0609161853790283, "learning_rate": 0.0001, "loss": 0.0211, "step": 28230 }, { "epoch": 185.78947368421052, "grad_norm": 1.9133977890014648, "learning_rate": 0.0001, "loss": 0.0212, "step": 28240 }, { "epoch": 185.85526315789474, "grad_norm": 1.110733151435852, "learning_rate": 0.0001, "loss": 0.0226, "step": 28250 }, { "epoch": 185.92105263157896, "grad_norm": 1.5395692586898804, "learning_rate": 0.0001, "loss": 0.02, "step": 28260 }, { "epoch": 185.98684210526315, "grad_norm": 1.55654776096344, "learning_rate": 0.0001, "loss": 0.0192, "step": 28270 }, { "epoch": 186.05263157894737, "grad_norm": 1.7067625522613525, "learning_rate": 0.0001, "loss": 0.0225, "step": 28280 }, { "epoch": 186.1184210526316, "grad_norm": 1.3781365156173706, "learning_rate": 0.0001, "loss": 0.0182, "step": 28290 }, { "epoch": 186.18421052631578, "grad_norm": 1.1878485679626465, "learning_rate": 0.0001, "loss": 0.0244, "step": 28300 }, { "epoch": 186.25, "grad_norm": 1.6011766195297241, "learning_rate": 0.0001, "loss": 0.0242, "step": 28310 }, { "epoch": 186.31578947368422, "grad_norm": 1.594082236289978, "learning_rate": 0.0001, "loss": 0.0263, "step": 28320 }, { "epoch": 186.3815789473684, "grad_norm": 1.2427823543548584, "learning_rate": 0.0001, "loss": 0.0197, "step": 28330 }, { "epoch": 186.44736842105263, "grad_norm": 1.298697829246521, "learning_rate": 0.0001, "loss": 0.0241, "step": 28340 }, { "epoch": 186.51315789473685, "grad_norm": 1.6615734100341797, "learning_rate": 0.0001, "loss": 0.0228, "step": 28350 }, { "epoch": 186.57894736842104, "grad_norm": 1.809204339981079, "learning_rate": 0.0001, "loss": 0.021, "step": 28360 }, { "epoch": 186.64473684210526, "grad_norm": 1.726158857345581, "learning_rate": 0.0001, "loss": 0.0187, "step": 28370 }, { "epoch": 186.71052631578948, "grad_norm": 1.2036124467849731, "learning_rate": 0.0001, "loss": 0.0222, "step": 28380 }, { "epoch": 186.77631578947367, "grad_norm": 1.6681259870529175, "learning_rate": 0.0001, "loss": 0.0252, "step": 28390 }, { "epoch": 186.8421052631579, "grad_norm": 1.6337006092071533, "learning_rate": 0.0001, "loss": 0.0223, "step": 28400 }, { "epoch": 186.9078947368421, "grad_norm": 1.4013375043869019, "learning_rate": 0.0001, "loss": 0.022, "step": 28410 }, { "epoch": 186.97368421052633, "grad_norm": 1.2779196500778198, "learning_rate": 0.0001, "loss": 0.0188, "step": 28420 }, { "epoch": 187.03947368421052, "grad_norm": 1.4654245376586914, "learning_rate": 0.0001, "loss": 0.0195, "step": 28430 }, { "epoch": 187.10526315789474, "grad_norm": 1.775181770324707, "learning_rate": 0.0001, "loss": 0.0203, "step": 28440 }, { "epoch": 187.17105263157896, "grad_norm": 1.5515815019607544, "learning_rate": 0.0001, "loss": 0.0231, "step": 28450 }, { "epoch": 187.23684210526315, "grad_norm": 0.8186315298080444, "learning_rate": 0.0001, "loss": 0.0215, "step": 28460 }, { "epoch": 187.30263157894737, "grad_norm": 2.137362480163574, "learning_rate": 0.0001, "loss": 0.0197, "step": 28470 }, { "epoch": 187.3684210526316, "grad_norm": 1.0924235582351685, "learning_rate": 0.0001, "loss": 0.022, "step": 28480 }, { "epoch": 187.43421052631578, "grad_norm": 1.5934661626815796, "learning_rate": 0.0001, "loss": 0.0243, "step": 28490 }, { "epoch": 187.5, "grad_norm": 1.366613507270813, "learning_rate": 0.0001, "loss": 0.019, "step": 28500 }, { "epoch": 187.56578947368422, "grad_norm": 1.0969226360321045, "learning_rate": 0.0001, "loss": 0.0245, "step": 28510 }, { "epoch": 187.6315789473684, "grad_norm": 1.397963285446167, "learning_rate": 0.0001, "loss": 0.0252, "step": 28520 }, { "epoch": 187.69736842105263, "grad_norm": 1.144210934638977, "learning_rate": 0.0001, "loss": 0.0236, "step": 28530 }, { "epoch": 187.76315789473685, "grad_norm": 1.4225585460662842, "learning_rate": 0.0001, "loss": 0.0224, "step": 28540 }, { "epoch": 187.82894736842104, "grad_norm": 1.3719286918640137, "learning_rate": 0.0001, "loss": 0.0247, "step": 28550 }, { "epoch": 187.89473684210526, "grad_norm": 1.225841760635376, "learning_rate": 0.0001, "loss": 0.0214, "step": 28560 }, { "epoch": 187.96052631578948, "grad_norm": 1.3913520574569702, "learning_rate": 0.0001, "loss": 0.0247, "step": 28570 }, { "epoch": 188.02631578947367, "grad_norm": 1.5756137371063232, "learning_rate": 0.0001, "loss": 0.0265, "step": 28580 }, { "epoch": 188.0921052631579, "grad_norm": 1.7170803546905518, "learning_rate": 0.0001, "loss": 0.0214, "step": 28590 }, { "epoch": 188.1578947368421, "grad_norm": 1.1530786752700806, "learning_rate": 0.0001, "loss": 0.0224, "step": 28600 }, { "epoch": 188.22368421052633, "grad_norm": 2.1069421768188477, "learning_rate": 0.0001, "loss": 0.0228, "step": 28610 }, { "epoch": 188.28947368421052, "grad_norm": 2.2930939197540283, "learning_rate": 0.0001, "loss": 0.0214, "step": 28620 }, { "epoch": 188.35526315789474, "grad_norm": 1.9653240442276, "learning_rate": 0.0001, "loss": 0.0218, "step": 28630 }, { "epoch": 188.42105263157896, "grad_norm": 1.6879068613052368, "learning_rate": 0.0001, "loss": 0.021, "step": 28640 }, { "epoch": 188.48684210526315, "grad_norm": 2.145551919937134, "learning_rate": 0.0001, "loss": 0.0185, "step": 28650 }, { "epoch": 188.55263157894737, "grad_norm": 1.7812880277633667, "learning_rate": 0.0001, "loss": 0.0283, "step": 28660 }, { "epoch": 188.6184210526316, "grad_norm": 1.4547702074050903, "learning_rate": 0.0001, "loss": 0.0227, "step": 28670 }, { "epoch": 188.68421052631578, "grad_norm": 1.667601466178894, "learning_rate": 0.0001, "loss": 0.023, "step": 28680 }, { "epoch": 188.75, "grad_norm": 1.6737927198410034, "learning_rate": 0.0001, "loss": 0.0223, "step": 28690 }, { "epoch": 188.81578947368422, "grad_norm": 2.0257487297058105, "learning_rate": 0.0001, "loss": 0.0222, "step": 28700 }, { "epoch": 188.8815789473684, "grad_norm": 1.5649522542953491, "learning_rate": 0.0001, "loss": 0.022, "step": 28710 }, { "epoch": 188.94736842105263, "grad_norm": 1.368647813796997, "learning_rate": 0.0001, "loss": 0.0191, "step": 28720 }, { "epoch": 189.01315789473685, "grad_norm": 1.5580404996871948, "learning_rate": 0.0001, "loss": 0.0217, "step": 28730 }, { "epoch": 189.07894736842104, "grad_norm": 1.2899588346481323, "learning_rate": 0.0001, "loss": 0.0224, "step": 28740 }, { "epoch": 189.14473684210526, "grad_norm": 1.3944998979568481, "learning_rate": 0.0001, "loss": 0.0217, "step": 28750 }, { "epoch": 189.21052631578948, "grad_norm": 1.623589277267456, "learning_rate": 0.0001, "loss": 0.0202, "step": 28760 }, { "epoch": 189.27631578947367, "grad_norm": 1.6657559871673584, "learning_rate": 0.0001, "loss": 0.0198, "step": 28770 }, { "epoch": 189.3421052631579, "grad_norm": 1.7521365880966187, "learning_rate": 0.0001, "loss": 0.0216, "step": 28780 }, { "epoch": 189.4078947368421, "grad_norm": 1.3185832500457764, "learning_rate": 0.0001, "loss": 0.0193, "step": 28790 }, { "epoch": 189.47368421052633, "grad_norm": 1.5753531455993652, "learning_rate": 0.0001, "loss": 0.0218, "step": 28800 }, { "epoch": 189.53947368421052, "grad_norm": 1.8359376192092896, "learning_rate": 0.0001, "loss": 0.0222, "step": 28810 }, { "epoch": 189.60526315789474, "grad_norm": 1.4978209733963013, "learning_rate": 0.0001, "loss": 0.0254, "step": 28820 }, { "epoch": 189.67105263157896, "grad_norm": 1.289913535118103, "learning_rate": 0.0001, "loss": 0.0203, "step": 28830 }, { "epoch": 189.73684210526315, "grad_norm": 1.6008424758911133, "learning_rate": 0.0001, "loss": 0.02, "step": 28840 }, { "epoch": 189.80263157894737, "grad_norm": 1.1742582321166992, "learning_rate": 0.0001, "loss": 0.0192, "step": 28850 }, { "epoch": 189.8684210526316, "grad_norm": 2.025604724884033, "learning_rate": 0.0001, "loss": 0.0236, "step": 28860 }, { "epoch": 189.93421052631578, "grad_norm": 1.7539658546447754, "learning_rate": 0.0001, "loss": 0.0213, "step": 28870 }, { "epoch": 190.0, "grad_norm": 1.787758708000183, "learning_rate": 0.0001, "loss": 0.0248, "step": 28880 }, { "epoch": 190.06578947368422, "grad_norm": 1.9884114265441895, "learning_rate": 0.0001, "loss": 0.0207, "step": 28890 }, { "epoch": 190.1315789473684, "grad_norm": 2.2127199172973633, "learning_rate": 0.0001, "loss": 0.0175, "step": 28900 }, { "epoch": 190.19736842105263, "grad_norm": 1.865920901298523, "learning_rate": 0.0001, "loss": 0.0253, "step": 28910 }, { "epoch": 190.26315789473685, "grad_norm": 2.050685405731201, "learning_rate": 0.0001, "loss": 0.0251, "step": 28920 }, { "epoch": 190.32894736842104, "grad_norm": 1.7176504135131836, "learning_rate": 0.0001, "loss": 0.0199, "step": 28930 }, { "epoch": 190.39473684210526, "grad_norm": 1.688055396080017, "learning_rate": 0.0001, "loss": 0.0174, "step": 28940 }, { "epoch": 190.46052631578948, "grad_norm": 1.5751014947891235, "learning_rate": 0.0001, "loss": 0.0226, "step": 28950 }, { "epoch": 190.52631578947367, "grad_norm": 1.2568857669830322, "learning_rate": 0.0001, "loss": 0.0181, "step": 28960 }, { "epoch": 190.5921052631579, "grad_norm": 1.9590922594070435, "learning_rate": 0.0001, "loss": 0.0204, "step": 28970 }, { "epoch": 190.6578947368421, "grad_norm": 1.349768042564392, "learning_rate": 0.0001, "loss": 0.0194, "step": 28980 }, { "epoch": 190.72368421052633, "grad_norm": 1.4772484302520752, "learning_rate": 0.0001, "loss": 0.0255, "step": 28990 }, { "epoch": 190.78947368421052, "grad_norm": 1.3791097402572632, "learning_rate": 0.0001, "loss": 0.0228, "step": 29000 }, { "epoch": 190.85526315789474, "grad_norm": 1.8325904607772827, "learning_rate": 0.0001, "loss": 0.0254, "step": 29010 }, { "epoch": 190.92105263157896, "grad_norm": 1.8586369752883911, "learning_rate": 0.0001, "loss": 0.0226, "step": 29020 }, { "epoch": 190.98684210526315, "grad_norm": 1.9696778059005737, "learning_rate": 0.0001, "loss": 0.0183, "step": 29030 }, { "epoch": 191.05263157894737, "grad_norm": 1.7906421422958374, "learning_rate": 0.0001, "loss": 0.0185, "step": 29040 }, { "epoch": 191.1184210526316, "grad_norm": 2.033433437347412, "learning_rate": 0.0001, "loss": 0.0186, "step": 29050 }, { "epoch": 191.18421052631578, "grad_norm": 1.781383991241455, "learning_rate": 0.0001, "loss": 0.0217, "step": 29060 }, { "epoch": 191.25, "grad_norm": 1.7858682870864868, "learning_rate": 0.0001, "loss": 0.0182, "step": 29070 }, { "epoch": 191.31578947368422, "grad_norm": 1.253417730331421, "learning_rate": 0.0001, "loss": 0.0224, "step": 29080 }, { "epoch": 191.3815789473684, "grad_norm": 1.2598531246185303, "learning_rate": 0.0001, "loss": 0.0191, "step": 29090 }, { "epoch": 191.44736842105263, "grad_norm": 1.7701340913772583, "learning_rate": 0.0001, "loss": 0.0198, "step": 29100 }, { "epoch": 191.51315789473685, "grad_norm": 1.278489589691162, "learning_rate": 0.0001, "loss": 0.0225, "step": 29110 }, { "epoch": 191.57894736842104, "grad_norm": 1.708103895187378, "learning_rate": 0.0001, "loss": 0.0205, "step": 29120 }, { "epoch": 191.64473684210526, "grad_norm": 1.9011845588684082, "learning_rate": 0.0001, "loss": 0.0214, "step": 29130 }, { "epoch": 191.71052631578948, "grad_norm": 1.9283276796340942, "learning_rate": 0.0001, "loss": 0.0237, "step": 29140 }, { "epoch": 191.77631578947367, "grad_norm": 2.1835248470306396, "learning_rate": 0.0001, "loss": 0.022, "step": 29150 }, { "epoch": 191.8421052631579, "grad_norm": 1.8641891479492188, "learning_rate": 0.0001, "loss": 0.0214, "step": 29160 }, { "epoch": 191.9078947368421, "grad_norm": 1.6205722093582153, "learning_rate": 0.0001, "loss": 0.0227, "step": 29170 }, { "epoch": 191.97368421052633, "grad_norm": 2.1189794540405273, "learning_rate": 0.0001, "loss": 0.0229, "step": 29180 }, { "epoch": 192.03947368421052, "grad_norm": 1.9623830318450928, "learning_rate": 0.0001, "loss": 0.0304, "step": 29190 }, { "epoch": 192.10526315789474, "grad_norm": 1.7565741539001465, "learning_rate": 0.0001, "loss": 0.0207, "step": 29200 }, { "epoch": 192.17105263157896, "grad_norm": 1.489354133605957, "learning_rate": 0.0001, "loss": 0.0201, "step": 29210 }, { "epoch": 192.23684210526315, "grad_norm": 2.051517963409424, "learning_rate": 0.0001, "loss": 0.0229, "step": 29220 }, { "epoch": 192.30263157894737, "grad_norm": 1.884886622428894, "learning_rate": 0.0001, "loss": 0.0175, "step": 29230 }, { "epoch": 192.3684210526316, "grad_norm": 1.7932251691818237, "learning_rate": 0.0001, "loss": 0.0178, "step": 29240 }, { "epoch": 192.43421052631578, "grad_norm": 1.9630385637283325, "learning_rate": 0.0001, "loss": 0.0207, "step": 29250 }, { "epoch": 192.5, "grad_norm": 1.5972824096679688, "learning_rate": 0.0001, "loss": 0.0206, "step": 29260 }, { "epoch": 192.56578947368422, "grad_norm": 1.5813103914260864, "learning_rate": 0.0001, "loss": 0.0179, "step": 29270 }, { "epoch": 192.6315789473684, "grad_norm": 1.9925544261932373, "learning_rate": 0.0001, "loss": 0.0213, "step": 29280 }, { "epoch": 192.69736842105263, "grad_norm": 1.6847556829452515, "learning_rate": 0.0001, "loss": 0.0213, "step": 29290 }, { "epoch": 192.76315789473685, "grad_norm": 1.776502013206482, "learning_rate": 0.0001, "loss": 0.0174, "step": 29300 }, { "epoch": 192.82894736842104, "grad_norm": 1.8349354267120361, "learning_rate": 0.0001, "loss": 0.0271, "step": 29310 }, { "epoch": 192.89473684210526, "grad_norm": 1.4941068887710571, "learning_rate": 0.0001, "loss": 0.02, "step": 29320 }, { "epoch": 192.96052631578948, "grad_norm": 0.856435239315033, "learning_rate": 0.0001, "loss": 0.0203, "step": 29330 }, { "epoch": 193.02631578947367, "grad_norm": 1.935820460319519, "learning_rate": 0.0001, "loss": 0.0236, "step": 29340 }, { "epoch": 193.0921052631579, "grad_norm": 2.037137985229492, "learning_rate": 0.0001, "loss": 0.0214, "step": 29350 }, { "epoch": 193.1578947368421, "grad_norm": 1.7521394491195679, "learning_rate": 0.0001, "loss": 0.0194, "step": 29360 }, { "epoch": 193.22368421052633, "grad_norm": 1.8772003650665283, "learning_rate": 0.0001, "loss": 0.0264, "step": 29370 }, { "epoch": 193.28947368421052, "grad_norm": 1.8321219682693481, "learning_rate": 0.0001, "loss": 0.0191, "step": 29380 }, { "epoch": 193.35526315789474, "grad_norm": 1.7304871082305908, "learning_rate": 0.0001, "loss": 0.0214, "step": 29390 }, { "epoch": 193.42105263157896, "grad_norm": 1.9163637161254883, "learning_rate": 0.0001, "loss": 0.0239, "step": 29400 }, { "epoch": 193.48684210526315, "grad_norm": 1.906887412071228, "learning_rate": 0.0001, "loss": 0.0213, "step": 29410 }, { "epoch": 193.55263157894737, "grad_norm": 1.7262425422668457, "learning_rate": 0.0001, "loss": 0.0187, "step": 29420 }, { "epoch": 193.6184210526316, "grad_norm": 1.8037112951278687, "learning_rate": 0.0001, "loss": 0.0182, "step": 29430 }, { "epoch": 193.68421052631578, "grad_norm": 1.668190836906433, "learning_rate": 0.0001, "loss": 0.0176, "step": 29440 }, { "epoch": 193.75, "grad_norm": 1.8752731084823608, "learning_rate": 0.0001, "loss": 0.0251, "step": 29450 }, { "epoch": 193.81578947368422, "grad_norm": 1.656114935874939, "learning_rate": 0.0001, "loss": 0.0236, "step": 29460 }, { "epoch": 193.8815789473684, "grad_norm": 1.6296031475067139, "learning_rate": 0.0001, "loss": 0.0195, "step": 29470 }, { "epoch": 193.94736842105263, "grad_norm": 1.9496815204620361, "learning_rate": 0.0001, "loss": 0.0194, "step": 29480 }, { "epoch": 194.01315789473685, "grad_norm": 1.5491927862167358, "learning_rate": 0.0001, "loss": 0.0187, "step": 29490 }, { "epoch": 194.07894736842104, "grad_norm": 1.7801871299743652, "learning_rate": 0.0001, "loss": 0.025, "step": 29500 }, { "epoch": 194.14473684210526, "grad_norm": 1.816591501235962, "learning_rate": 0.0001, "loss": 0.0195, "step": 29510 }, { "epoch": 194.21052631578948, "grad_norm": 1.6459835767745972, "learning_rate": 0.0001, "loss": 0.0191, "step": 29520 }, { "epoch": 194.27631578947367, "grad_norm": 1.2114415168762207, "learning_rate": 0.0001, "loss": 0.0206, "step": 29530 }, { "epoch": 194.3421052631579, "grad_norm": 1.9629594087600708, "learning_rate": 0.0001, "loss": 0.0246, "step": 29540 }, { "epoch": 194.4078947368421, "grad_norm": 1.976059079170227, "learning_rate": 0.0001, "loss": 0.0215, "step": 29550 }, { "epoch": 194.47368421052633, "grad_norm": 1.8450648784637451, "learning_rate": 0.0001, "loss": 0.0197, "step": 29560 }, { "epoch": 194.53947368421052, "grad_norm": 1.7382110357284546, "learning_rate": 0.0001, "loss": 0.0228, "step": 29570 }, { "epoch": 194.60526315789474, "grad_norm": 1.613635778427124, "learning_rate": 0.0001, "loss": 0.0197, "step": 29580 }, { "epoch": 194.67105263157896, "grad_norm": 1.3867443799972534, "learning_rate": 0.0001, "loss": 0.0199, "step": 29590 }, { "epoch": 194.73684210526315, "grad_norm": 1.64284086227417, "learning_rate": 0.0001, "loss": 0.0233, "step": 29600 }, { "epoch": 194.80263157894737, "grad_norm": 1.3933079242706299, "learning_rate": 0.0001, "loss": 0.0201, "step": 29610 }, { "epoch": 194.8684210526316, "grad_norm": 1.570577621459961, "learning_rate": 0.0001, "loss": 0.0233, "step": 29620 }, { "epoch": 194.93421052631578, "grad_norm": 1.3077232837677002, "learning_rate": 0.0001, "loss": 0.0189, "step": 29630 }, { "epoch": 195.0, "grad_norm": 1.6433497667312622, "learning_rate": 0.0001, "loss": 0.0215, "step": 29640 }, { "epoch": 195.06578947368422, "grad_norm": 1.6985344886779785, "learning_rate": 0.0001, "loss": 0.0221, "step": 29650 }, { "epoch": 195.1315789473684, "grad_norm": 1.5853031873703003, "learning_rate": 0.0001, "loss": 0.0197, "step": 29660 }, { "epoch": 195.19736842105263, "grad_norm": 1.3907804489135742, "learning_rate": 0.0001, "loss": 0.0197, "step": 29670 }, { "epoch": 195.26315789473685, "grad_norm": 1.8093420267105103, "learning_rate": 0.0001, "loss": 0.0184, "step": 29680 }, { "epoch": 195.32894736842104, "grad_norm": 1.5977129936218262, "learning_rate": 0.0001, "loss": 0.0223, "step": 29690 }, { "epoch": 195.39473684210526, "grad_norm": 1.5647841691970825, "learning_rate": 0.0001, "loss": 0.019, "step": 29700 }, { "epoch": 195.46052631578948, "grad_norm": 1.8674135208129883, "learning_rate": 0.0001, "loss": 0.0205, "step": 29710 }, { "epoch": 195.52631578947367, "grad_norm": 1.590970754623413, "learning_rate": 0.0001, "loss": 0.0232, "step": 29720 }, { "epoch": 195.5921052631579, "grad_norm": 1.874959111213684, "learning_rate": 0.0001, "loss": 0.0244, "step": 29730 }, { "epoch": 195.6578947368421, "grad_norm": 1.5044567584991455, "learning_rate": 0.0001, "loss": 0.0191, "step": 29740 }, { "epoch": 195.72368421052633, "grad_norm": 1.1230733394622803, "learning_rate": 0.0001, "loss": 0.0223, "step": 29750 }, { "epoch": 195.78947368421052, "grad_norm": 1.7708184719085693, "learning_rate": 0.0001, "loss": 0.0211, "step": 29760 }, { "epoch": 195.85526315789474, "grad_norm": 1.6044379472732544, "learning_rate": 0.0001, "loss": 0.0203, "step": 29770 }, { "epoch": 195.92105263157896, "grad_norm": 2.0074005126953125, "learning_rate": 0.0001, "loss": 0.0225, "step": 29780 }, { "epoch": 195.98684210526315, "grad_norm": 1.288496732711792, "learning_rate": 0.0001, "loss": 0.0235, "step": 29790 }, { "epoch": 196.05263157894737, "grad_norm": 1.9483606815338135, "learning_rate": 0.0001, "loss": 0.0222, "step": 29800 }, { "epoch": 196.1184210526316, "grad_norm": 1.6906429529190063, "learning_rate": 0.0001, "loss": 0.0219, "step": 29810 }, { "epoch": 196.18421052631578, "grad_norm": 1.9194693565368652, "learning_rate": 0.0001, "loss": 0.0236, "step": 29820 }, { "epoch": 196.25, "grad_norm": 2.104372978210449, "learning_rate": 0.0001, "loss": 0.0228, "step": 29830 }, { "epoch": 196.31578947368422, "grad_norm": 1.6090569496154785, "learning_rate": 0.0001, "loss": 0.023, "step": 29840 }, { "epoch": 196.3815789473684, "grad_norm": 2.0302860736846924, "learning_rate": 0.0001, "loss": 0.0223, "step": 29850 }, { "epoch": 196.44736842105263, "grad_norm": 2.013773202896118, "learning_rate": 0.0001, "loss": 0.0201, "step": 29860 }, { "epoch": 196.51315789473685, "grad_norm": 1.5836812257766724, "learning_rate": 0.0001, "loss": 0.0187, "step": 29870 }, { "epoch": 196.57894736842104, "grad_norm": 1.8830434083938599, "learning_rate": 0.0001, "loss": 0.0194, "step": 29880 }, { "epoch": 196.64473684210526, "grad_norm": 1.7520039081573486, "learning_rate": 0.0001, "loss": 0.0198, "step": 29890 }, { "epoch": 196.71052631578948, "grad_norm": 1.7888561487197876, "learning_rate": 0.0001, "loss": 0.0206, "step": 29900 }, { "epoch": 196.77631578947367, "grad_norm": 1.9435513019561768, "learning_rate": 0.0001, "loss": 0.0186, "step": 29910 }, { "epoch": 196.8421052631579, "grad_norm": 1.647741436958313, "learning_rate": 0.0001, "loss": 0.0181, "step": 29920 }, { "epoch": 196.9078947368421, "grad_norm": 1.3881535530090332, "learning_rate": 0.0001, "loss": 0.0232, "step": 29930 }, { "epoch": 196.97368421052633, "grad_norm": 1.3140674829483032, "learning_rate": 0.0001, "loss": 0.0217, "step": 29940 }, { "epoch": 197.03947368421052, "grad_norm": 1.49045991897583, "learning_rate": 0.0001, "loss": 0.0212, "step": 29950 }, { "epoch": 197.10526315789474, "grad_norm": 1.5708675384521484, "learning_rate": 0.0001, "loss": 0.0176, "step": 29960 }, { "epoch": 197.17105263157896, "grad_norm": 1.482925295829773, "learning_rate": 0.0001, "loss": 0.0221, "step": 29970 }, { "epoch": 197.23684210526315, "grad_norm": 1.6646392345428467, "learning_rate": 0.0001, "loss": 0.021, "step": 29980 }, { "epoch": 197.30263157894737, "grad_norm": 2.0031685829162598, "learning_rate": 0.0001, "loss": 0.0204, "step": 29990 }, { "epoch": 197.3684210526316, "grad_norm": 1.0727046728134155, "learning_rate": 0.0001, "loss": 0.0189, "step": 30000 }, { "epoch": 197.43421052631578, "grad_norm": 1.458987832069397, "learning_rate": 0.0001, "loss": 0.0186, "step": 30010 }, { "epoch": 197.5, "grad_norm": 1.4648079872131348, "learning_rate": 0.0001, "loss": 0.0225, "step": 30020 }, { "epoch": 197.56578947368422, "grad_norm": 1.4106168746948242, "learning_rate": 0.0001, "loss": 0.0233, "step": 30030 }, { "epoch": 197.6315789473684, "grad_norm": 1.818960428237915, "learning_rate": 0.0001, "loss": 0.0233, "step": 30040 }, { "epoch": 197.69736842105263, "grad_norm": 1.481907844543457, "learning_rate": 0.0001, "loss": 0.0181, "step": 30050 }, { "epoch": 197.76315789473685, "grad_norm": 1.6936240196228027, "learning_rate": 0.0001, "loss": 0.0263, "step": 30060 }, { "epoch": 197.82894736842104, "grad_norm": 1.5905367136001587, "learning_rate": 0.0001, "loss": 0.0221, "step": 30070 }, { "epoch": 197.89473684210526, "grad_norm": 1.7702603340148926, "learning_rate": 0.0001, "loss": 0.0182, "step": 30080 }, { "epoch": 197.96052631578948, "grad_norm": 2.16865873336792, "learning_rate": 0.0001, "loss": 0.0224, "step": 30090 }, { "epoch": 198.02631578947367, "grad_norm": 1.220324993133545, "learning_rate": 0.0001, "loss": 0.0202, "step": 30100 }, { "epoch": 198.0921052631579, "grad_norm": 1.5375144481658936, "learning_rate": 0.0001, "loss": 0.0234, "step": 30110 }, { "epoch": 198.1578947368421, "grad_norm": 1.597976803779602, "learning_rate": 0.0001, "loss": 0.02, "step": 30120 }, { "epoch": 198.22368421052633, "grad_norm": 1.5262008905410767, "learning_rate": 0.0001, "loss": 0.0233, "step": 30130 }, { "epoch": 198.28947368421052, "grad_norm": 1.7972211837768555, "learning_rate": 0.0001, "loss": 0.0215, "step": 30140 }, { "epoch": 198.35526315789474, "grad_norm": 1.8802919387817383, "learning_rate": 0.0001, "loss": 0.0199, "step": 30150 }, { "epoch": 198.42105263157896, "grad_norm": 1.4194831848144531, "learning_rate": 0.0001, "loss": 0.0205, "step": 30160 }, { "epoch": 198.48684210526315, "grad_norm": 1.5012543201446533, "learning_rate": 0.0001, "loss": 0.0189, "step": 30170 }, { "epoch": 198.55263157894737, "grad_norm": 1.443147897720337, "learning_rate": 0.0001, "loss": 0.0204, "step": 30180 }, { "epoch": 198.6184210526316, "grad_norm": 1.925945520401001, "learning_rate": 0.0001, "loss": 0.0229, "step": 30190 }, { "epoch": 198.68421052631578, "grad_norm": 1.7948559522628784, "learning_rate": 0.0001, "loss": 0.0217, "step": 30200 }, { "epoch": 198.75, "grad_norm": 1.7154260873794556, "learning_rate": 0.0001, "loss": 0.0199, "step": 30210 }, { "epoch": 198.81578947368422, "grad_norm": 1.6892834901809692, "learning_rate": 0.0001, "loss": 0.0219, "step": 30220 }, { "epoch": 198.8815789473684, "grad_norm": 1.819498896598816, "learning_rate": 0.0001, "loss": 0.0244, "step": 30230 }, { "epoch": 198.94736842105263, "grad_norm": 1.8391540050506592, "learning_rate": 0.0001, "loss": 0.023, "step": 30240 }, { "epoch": 199.01315789473685, "grad_norm": 1.1044708490371704, "learning_rate": 0.0001, "loss": 0.0214, "step": 30250 }, { "epoch": 199.07894736842104, "grad_norm": 1.758634090423584, "learning_rate": 0.0001, "loss": 0.0248, "step": 30260 }, { "epoch": 199.14473684210526, "grad_norm": 1.9117934703826904, "learning_rate": 0.0001, "loss": 0.0226, "step": 30270 }, { "epoch": 199.21052631578948, "grad_norm": 1.8559709787368774, "learning_rate": 0.0001, "loss": 0.0175, "step": 30280 }, { "epoch": 199.27631578947367, "grad_norm": 1.6030089855194092, "learning_rate": 0.0001, "loss": 0.0213, "step": 30290 }, { "epoch": 199.3421052631579, "grad_norm": 1.9212050437927246, "learning_rate": 0.0001, "loss": 0.018, "step": 30300 }, { "epoch": 199.4078947368421, "grad_norm": 1.7537636756896973, "learning_rate": 0.0001, "loss": 0.0187, "step": 30310 }, { "epoch": 199.47368421052633, "grad_norm": 1.8607996702194214, "learning_rate": 0.0001, "loss": 0.0227, "step": 30320 }, { "epoch": 199.53947368421052, "grad_norm": 1.989687204360962, "learning_rate": 0.0001, "loss": 0.0223, "step": 30330 }, { "epoch": 199.60526315789474, "grad_norm": 1.3881646394729614, "learning_rate": 0.0001, "loss": 0.0208, "step": 30340 }, { "epoch": 199.67105263157896, "grad_norm": 1.292202353477478, "learning_rate": 0.0001, "loss": 0.0175, "step": 30350 }, { "epoch": 199.73684210526315, "grad_norm": 1.5454723834991455, "learning_rate": 0.0001, "loss": 0.0215, "step": 30360 }, { "epoch": 199.80263157894737, "grad_norm": 1.6735018491744995, "learning_rate": 0.0001, "loss": 0.0195, "step": 30370 }, { "epoch": 199.8684210526316, "grad_norm": 1.2719719409942627, "learning_rate": 0.0001, "loss": 0.0207, "step": 30380 }, { "epoch": 199.93421052631578, "grad_norm": 1.6605944633483887, "learning_rate": 0.0001, "loss": 0.025, "step": 30390 }, { "epoch": 200.0, "grad_norm": 1.5493183135986328, "learning_rate": 0.0001, "loss": 0.026, "step": 30400 }, { "epoch": 200.06578947368422, "grad_norm": 1.61273193359375, "learning_rate": 0.0001, "loss": 0.0229, "step": 30410 }, { "epoch": 200.1315789473684, "grad_norm": 1.472001552581787, "learning_rate": 0.0001, "loss": 0.0236, "step": 30420 }, { "epoch": 200.19736842105263, "grad_norm": 1.6361870765686035, "learning_rate": 0.0001, "loss": 0.0197, "step": 30430 }, { "epoch": 200.26315789473685, "grad_norm": 1.3757432699203491, "learning_rate": 0.0001, "loss": 0.0217, "step": 30440 }, { "epoch": 200.32894736842104, "grad_norm": 1.7093154191970825, "learning_rate": 0.0001, "loss": 0.0217, "step": 30450 }, { "epoch": 200.39473684210526, "grad_norm": 1.4716482162475586, "learning_rate": 0.0001, "loss": 0.0196, "step": 30460 }, { "epoch": 200.46052631578948, "grad_norm": 1.5358667373657227, "learning_rate": 0.0001, "loss": 0.025, "step": 30470 }, { "epoch": 200.52631578947367, "grad_norm": 1.5514477491378784, "learning_rate": 0.0001, "loss": 0.019, "step": 30480 }, { "epoch": 200.5921052631579, "grad_norm": 1.5961191654205322, "learning_rate": 0.0001, "loss": 0.0209, "step": 30490 }, { "epoch": 200.6578947368421, "grad_norm": 1.7440402507781982, "learning_rate": 0.0001, "loss": 0.0218, "step": 30500 }, { "epoch": 200.72368421052633, "grad_norm": 1.709675669670105, "learning_rate": 0.0001, "loss": 0.0199, "step": 30510 }, { "epoch": 200.78947368421052, "grad_norm": 1.9849077463150024, "learning_rate": 0.0001, "loss": 0.0184, "step": 30520 }, { "epoch": 200.85526315789474, "grad_norm": 1.70286226272583, "learning_rate": 0.0001, "loss": 0.0217, "step": 30530 }, { "epoch": 200.92105263157896, "grad_norm": 1.4762600660324097, "learning_rate": 0.0001, "loss": 0.0196, "step": 30540 }, { "epoch": 200.98684210526315, "grad_norm": 1.4608465433120728, "learning_rate": 0.0001, "loss": 0.0212, "step": 30550 }, { "epoch": 201.05263157894737, "grad_norm": 2.105011224746704, "learning_rate": 0.0001, "loss": 0.0206, "step": 30560 }, { "epoch": 201.1184210526316, "grad_norm": 2.1448943614959717, "learning_rate": 0.0001, "loss": 0.0215, "step": 30570 }, { "epoch": 201.18421052631578, "grad_norm": 1.8005919456481934, "learning_rate": 0.0001, "loss": 0.0177, "step": 30580 }, { "epoch": 201.25, "grad_norm": 1.7431254386901855, "learning_rate": 0.0001, "loss": 0.0204, "step": 30590 }, { "epoch": 201.31578947368422, "grad_norm": 2.019214630126953, "learning_rate": 0.0001, "loss": 0.0223, "step": 30600 }, { "epoch": 201.3815789473684, "grad_norm": 1.1416155099868774, "learning_rate": 0.0001, "loss": 0.0187, "step": 30610 }, { "epoch": 201.44736842105263, "grad_norm": 1.8822823762893677, "learning_rate": 0.0001, "loss": 0.0174, "step": 30620 }, { "epoch": 201.51315789473685, "grad_norm": 1.5316903591156006, "learning_rate": 0.0001, "loss": 0.0231, "step": 30630 }, { "epoch": 201.57894736842104, "grad_norm": 1.7801430225372314, "learning_rate": 0.0001, "loss": 0.0193, "step": 30640 }, { "epoch": 201.64473684210526, "grad_norm": 1.3527132272720337, "learning_rate": 0.0001, "loss": 0.0174, "step": 30650 }, { "epoch": 201.71052631578948, "grad_norm": 1.2425373792648315, "learning_rate": 0.0001, "loss": 0.0219, "step": 30660 }, { "epoch": 201.77631578947367, "grad_norm": 1.569126009941101, "learning_rate": 0.0001, "loss": 0.0207, "step": 30670 }, { "epoch": 201.8421052631579, "grad_norm": 1.2191718816757202, "learning_rate": 0.0001, "loss": 0.0221, "step": 30680 }, { "epoch": 201.9078947368421, "grad_norm": 1.8991650342941284, "learning_rate": 0.0001, "loss": 0.0251, "step": 30690 }, { "epoch": 201.97368421052633, "grad_norm": 1.3879530429840088, "learning_rate": 0.0001, "loss": 0.0219, "step": 30700 }, { "epoch": 202.03947368421052, "grad_norm": 1.9551665782928467, "learning_rate": 0.0001, "loss": 0.0215, "step": 30710 }, { "epoch": 202.10526315789474, "grad_norm": 1.9264495372772217, "learning_rate": 0.0001, "loss": 0.0248, "step": 30720 }, { "epoch": 202.17105263157896, "grad_norm": 1.1877387762069702, "learning_rate": 0.0001, "loss": 0.0219, "step": 30730 }, { "epoch": 202.23684210526315, "grad_norm": 1.3885385990142822, "learning_rate": 0.0001, "loss": 0.0182, "step": 30740 }, { "epoch": 202.30263157894737, "grad_norm": 1.178396463394165, "learning_rate": 0.0001, "loss": 0.0192, "step": 30750 }, { "epoch": 202.3684210526316, "grad_norm": 1.757398247718811, "learning_rate": 0.0001, "loss": 0.0196, "step": 30760 }, { "epoch": 202.43421052631578, "grad_norm": 1.768359661102295, "learning_rate": 0.0001, "loss": 0.0219, "step": 30770 }, { "epoch": 202.5, "grad_norm": 1.561580777168274, "learning_rate": 0.0001, "loss": 0.0242, "step": 30780 }, { "epoch": 202.56578947368422, "grad_norm": 1.515398621559143, "learning_rate": 0.0001, "loss": 0.0201, "step": 30790 }, { "epoch": 202.6315789473684, "grad_norm": 1.615315318107605, "learning_rate": 0.0001, "loss": 0.0207, "step": 30800 }, { "epoch": 202.69736842105263, "grad_norm": 1.504817247390747, "learning_rate": 0.0001, "loss": 0.0184, "step": 30810 }, { "epoch": 202.76315789473685, "grad_norm": 1.2672476768493652, "learning_rate": 0.0001, "loss": 0.0199, "step": 30820 }, { "epoch": 202.82894736842104, "grad_norm": 1.5801236629486084, "learning_rate": 0.0001, "loss": 0.0253, "step": 30830 }, { "epoch": 202.89473684210526, "grad_norm": 1.6555253267288208, "learning_rate": 0.0001, "loss": 0.0194, "step": 30840 }, { "epoch": 202.96052631578948, "grad_norm": 1.341805100440979, "learning_rate": 0.0001, "loss": 0.023, "step": 30850 }, { "epoch": 203.02631578947367, "grad_norm": 1.4629896879196167, "learning_rate": 0.0001, "loss": 0.0222, "step": 30860 }, { "epoch": 203.0921052631579, "grad_norm": 1.3491787910461426, "learning_rate": 0.0001, "loss": 0.0212, "step": 30870 }, { "epoch": 203.1578947368421, "grad_norm": 1.1865770816802979, "learning_rate": 0.0001, "loss": 0.019, "step": 30880 }, { "epoch": 203.22368421052633, "grad_norm": 1.6524006128311157, "learning_rate": 0.0001, "loss": 0.0281, "step": 30890 }, { "epoch": 203.28947368421052, "grad_norm": 1.3040587902069092, "learning_rate": 0.0001, "loss": 0.0204, "step": 30900 }, { "epoch": 203.35526315789474, "grad_norm": 1.6916311979293823, "learning_rate": 0.0001, "loss": 0.0237, "step": 30910 }, { "epoch": 203.42105263157896, "grad_norm": 1.4695109128952026, "learning_rate": 0.0001, "loss": 0.0194, "step": 30920 }, { "epoch": 203.48684210526315, "grad_norm": 1.3718390464782715, "learning_rate": 0.0001, "loss": 0.0194, "step": 30930 }, { "epoch": 203.55263157894737, "grad_norm": 1.3557465076446533, "learning_rate": 0.0001, "loss": 0.0191, "step": 30940 }, { "epoch": 203.6184210526316, "grad_norm": 1.696816086769104, "learning_rate": 0.0001, "loss": 0.0176, "step": 30950 }, { "epoch": 203.68421052631578, "grad_norm": 1.7964882850646973, "learning_rate": 0.0001, "loss": 0.0194, "step": 30960 }, { "epoch": 203.75, "grad_norm": 1.567218542098999, "learning_rate": 0.0001, "loss": 0.0218, "step": 30970 }, { "epoch": 203.81578947368422, "grad_norm": 1.6756153106689453, "learning_rate": 0.0001, "loss": 0.0223, "step": 30980 }, { "epoch": 203.8815789473684, "grad_norm": 1.8224167823791504, "learning_rate": 0.0001, "loss": 0.0169, "step": 30990 }, { "epoch": 203.94736842105263, "grad_norm": 1.9230962991714478, "learning_rate": 0.0001, "loss": 0.0203, "step": 31000 }, { "epoch": 204.01315789473685, "grad_norm": 1.8138291835784912, "learning_rate": 0.0001, "loss": 0.0243, "step": 31010 }, { "epoch": 204.07894736842104, "grad_norm": 1.6284130811691284, "learning_rate": 0.0001, "loss": 0.0183, "step": 31020 }, { "epoch": 204.14473684210526, "grad_norm": 1.6433476209640503, "learning_rate": 0.0001, "loss": 0.0238, "step": 31030 }, { "epoch": 204.21052631578948, "grad_norm": 1.628328561782837, "learning_rate": 0.0001, "loss": 0.0192, "step": 31040 }, { "epoch": 204.27631578947367, "grad_norm": 2.0316884517669678, "learning_rate": 0.0001, "loss": 0.018, "step": 31050 }, { "epoch": 204.3421052631579, "grad_norm": 1.375148892402649, "learning_rate": 0.0001, "loss": 0.0196, "step": 31060 }, { "epoch": 204.4078947368421, "grad_norm": 1.7790313959121704, "learning_rate": 0.0001, "loss": 0.0203, "step": 31070 }, { "epoch": 204.47368421052633, "grad_norm": 1.867216944694519, "learning_rate": 0.0001, "loss": 0.0208, "step": 31080 }, { "epoch": 204.53947368421052, "grad_norm": 1.6077388525009155, "learning_rate": 0.0001, "loss": 0.0217, "step": 31090 }, { "epoch": 204.60526315789474, "grad_norm": 1.3491142988204956, "learning_rate": 0.0001, "loss": 0.0204, "step": 31100 }, { "epoch": 204.67105263157896, "grad_norm": 1.6094666719436646, "learning_rate": 0.0001, "loss": 0.0236, "step": 31110 }, { "epoch": 204.73684210526315, "grad_norm": 1.4862449169158936, "learning_rate": 0.0001, "loss": 0.0206, "step": 31120 }, { "epoch": 204.80263157894737, "grad_norm": 1.2085576057434082, "learning_rate": 0.0001, "loss": 0.0228, "step": 31130 }, { "epoch": 204.8684210526316, "grad_norm": 2.0655462741851807, "learning_rate": 0.0001, "loss": 0.0236, "step": 31140 }, { "epoch": 204.93421052631578, "grad_norm": 1.7094095945358276, "learning_rate": 0.0001, "loss": 0.0223, "step": 31150 }, { "epoch": 205.0, "grad_norm": 2.0275492668151855, "learning_rate": 0.0001, "loss": 0.0197, "step": 31160 }, { "epoch": 205.06578947368422, "grad_norm": 2.1301138401031494, "learning_rate": 0.0001, "loss": 0.0174, "step": 31170 }, { "epoch": 205.1315789473684, "grad_norm": 1.7153005599975586, "learning_rate": 0.0001, "loss": 0.0208, "step": 31180 }, { "epoch": 205.19736842105263, "grad_norm": 1.7883518934249878, "learning_rate": 0.0001, "loss": 0.0228, "step": 31190 }, { "epoch": 205.26315789473685, "grad_norm": 1.79774808883667, "learning_rate": 0.0001, "loss": 0.0205, "step": 31200 }, { "epoch": 205.32894736842104, "grad_norm": 1.3827197551727295, "learning_rate": 0.0001, "loss": 0.0211, "step": 31210 }, { "epoch": 205.39473684210526, "grad_norm": 1.0627816915512085, "learning_rate": 0.0001, "loss": 0.0203, "step": 31220 }, { "epoch": 205.46052631578948, "grad_norm": 1.5019917488098145, "learning_rate": 0.0001, "loss": 0.0195, "step": 31230 }, { "epoch": 205.52631578947367, "grad_norm": 1.7205636501312256, "learning_rate": 0.0001, "loss": 0.0217, "step": 31240 }, { "epoch": 205.5921052631579, "grad_norm": 1.1048029661178589, "learning_rate": 0.0001, "loss": 0.0227, "step": 31250 }, { "epoch": 205.6578947368421, "grad_norm": 1.6423404216766357, "learning_rate": 0.0001, "loss": 0.0204, "step": 31260 }, { "epoch": 205.72368421052633, "grad_norm": 1.7275729179382324, "learning_rate": 0.0001, "loss": 0.0179, "step": 31270 }, { "epoch": 205.78947368421052, "grad_norm": 1.9325852394104004, "learning_rate": 0.0001, "loss": 0.0231, "step": 31280 }, { "epoch": 205.85526315789474, "grad_norm": 1.5582325458526611, "learning_rate": 0.0001, "loss": 0.0197, "step": 31290 }, { "epoch": 205.92105263157896, "grad_norm": 1.5213496685028076, "learning_rate": 0.0001, "loss": 0.0207, "step": 31300 }, { "epoch": 205.98684210526315, "grad_norm": 1.0843008756637573, "learning_rate": 0.0001, "loss": 0.0217, "step": 31310 }, { "epoch": 206.05263157894737, "grad_norm": 1.3545747995376587, "learning_rate": 0.0001, "loss": 0.0181, "step": 31320 }, { "epoch": 206.1184210526316, "grad_norm": 1.6205179691314697, "learning_rate": 0.0001, "loss": 0.0197, "step": 31330 }, { "epoch": 206.18421052631578, "grad_norm": 1.5990562438964844, "learning_rate": 0.0001, "loss": 0.0247, "step": 31340 }, { "epoch": 206.25, "grad_norm": 1.8758898973464966, "learning_rate": 0.0001, "loss": 0.0192, "step": 31350 }, { "epoch": 206.31578947368422, "grad_norm": 1.881001591682434, "learning_rate": 0.0001, "loss": 0.0211, "step": 31360 }, { "epoch": 206.3815789473684, "grad_norm": 1.6116578578948975, "learning_rate": 0.0001, "loss": 0.0202, "step": 31370 }, { "epoch": 206.44736842105263, "grad_norm": 1.525825023651123, "learning_rate": 0.0001, "loss": 0.0218, "step": 31380 }, { "epoch": 206.51315789473685, "grad_norm": 1.7037791013717651, "learning_rate": 0.0001, "loss": 0.02, "step": 31390 }, { "epoch": 206.57894736842104, "grad_norm": 1.275388479232788, "learning_rate": 0.0001, "loss": 0.0233, "step": 31400 }, { "epoch": 206.64473684210526, "grad_norm": 1.9510146379470825, "learning_rate": 0.0001, "loss": 0.0192, "step": 31410 }, { "epoch": 206.71052631578948, "grad_norm": 1.9110829830169678, "learning_rate": 0.0001, "loss": 0.0175, "step": 31420 }, { "epoch": 206.77631578947367, "grad_norm": 1.7622538805007935, "learning_rate": 0.0001, "loss": 0.02, "step": 31430 }, { "epoch": 206.8421052631579, "grad_norm": 1.030696153640747, "learning_rate": 0.0001, "loss": 0.0251, "step": 31440 }, { "epoch": 206.9078947368421, "grad_norm": 2.1570944786071777, "learning_rate": 0.0001, "loss": 0.0198, "step": 31450 }, { "epoch": 206.97368421052633, "grad_norm": 1.6286648511886597, "learning_rate": 0.0001, "loss": 0.0201, "step": 31460 }, { "epoch": 207.03947368421052, "grad_norm": 1.047431468963623, "learning_rate": 0.0001, "loss": 0.022, "step": 31470 }, { "epoch": 207.10526315789474, "grad_norm": 1.906747579574585, "learning_rate": 0.0001, "loss": 0.0209, "step": 31480 }, { "epoch": 207.17105263157896, "grad_norm": 1.61786687374115, "learning_rate": 0.0001, "loss": 0.0222, "step": 31490 }, { "epoch": 207.23684210526315, "grad_norm": 1.8046218156814575, "learning_rate": 0.0001, "loss": 0.0205, "step": 31500 }, { "epoch": 207.30263157894737, "grad_norm": 1.8618955612182617, "learning_rate": 0.0001, "loss": 0.0205, "step": 31510 }, { "epoch": 207.3684210526316, "grad_norm": 1.6168882846832275, "learning_rate": 0.0001, "loss": 0.0198, "step": 31520 }, { "epoch": 207.43421052631578, "grad_norm": 2.2711944580078125, "learning_rate": 0.0001, "loss": 0.0217, "step": 31530 }, { "epoch": 207.5, "grad_norm": 2.1742124557495117, "learning_rate": 0.0001, "loss": 0.0185, "step": 31540 }, { "epoch": 207.56578947368422, "grad_norm": 1.9432016611099243, "learning_rate": 0.0001, "loss": 0.022, "step": 31550 }, { "epoch": 207.6315789473684, "grad_norm": 1.82257878780365, "learning_rate": 0.0001, "loss": 0.0195, "step": 31560 }, { "epoch": 207.69736842105263, "grad_norm": 2.324435234069824, "learning_rate": 0.0001, "loss": 0.0235, "step": 31570 }, { "epoch": 207.76315789473685, "grad_norm": 2.51759672164917, "learning_rate": 0.0001, "loss": 0.0192, "step": 31580 }, { "epoch": 207.82894736842104, "grad_norm": 1.7208609580993652, "learning_rate": 0.0001, "loss": 0.0195, "step": 31590 }, { "epoch": 207.89473684210526, "grad_norm": 2.197638511657715, "learning_rate": 0.0001, "loss": 0.0187, "step": 31600 }, { "epoch": 207.96052631578948, "grad_norm": 1.5184706449508667, "learning_rate": 0.0001, "loss": 0.0219, "step": 31610 }, { "epoch": 208.02631578947367, "grad_norm": 1.4848127365112305, "learning_rate": 0.0001, "loss": 0.0163, "step": 31620 }, { "epoch": 208.0921052631579, "grad_norm": 1.901423692703247, "learning_rate": 0.0001, "loss": 0.0195, "step": 31630 }, { "epoch": 208.1578947368421, "grad_norm": 1.7514464855194092, "learning_rate": 0.0001, "loss": 0.0216, "step": 31640 }, { "epoch": 208.22368421052633, "grad_norm": 1.487572431564331, "learning_rate": 0.0001, "loss": 0.0187, "step": 31650 }, { "epoch": 208.28947368421052, "grad_norm": 1.9583408832550049, "learning_rate": 0.0001, "loss": 0.0209, "step": 31660 }, { "epoch": 208.35526315789474, "grad_norm": 1.676891803741455, "learning_rate": 0.0001, "loss": 0.0189, "step": 31670 }, { "epoch": 208.42105263157896, "grad_norm": 1.5929988622665405, "learning_rate": 0.0001, "loss": 0.0188, "step": 31680 }, { "epoch": 208.48684210526315, "grad_norm": 1.2992725372314453, "learning_rate": 0.0001, "loss": 0.0191, "step": 31690 }, { "epoch": 208.55263157894737, "grad_norm": 1.9675555229187012, "learning_rate": 0.0001, "loss": 0.0213, "step": 31700 }, { "epoch": 208.6184210526316, "grad_norm": 1.7783570289611816, "learning_rate": 0.0001, "loss": 0.0224, "step": 31710 }, { "epoch": 208.68421052631578, "grad_norm": 2.063783884048462, "learning_rate": 0.0001, "loss": 0.0196, "step": 31720 }, { "epoch": 208.75, "grad_norm": 2.097982883453369, "learning_rate": 0.0001, "loss": 0.0191, "step": 31730 }, { "epoch": 208.81578947368422, "grad_norm": 1.7320441007614136, "learning_rate": 0.0001, "loss": 0.02, "step": 31740 }, { "epoch": 208.8815789473684, "grad_norm": 1.7295154333114624, "learning_rate": 0.0001, "loss": 0.0227, "step": 31750 }, { "epoch": 208.94736842105263, "grad_norm": 1.3698469400405884, "learning_rate": 0.0001, "loss": 0.0187, "step": 31760 }, { "epoch": 209.01315789473685, "grad_norm": 1.6702486276626587, "learning_rate": 0.0001, "loss": 0.0231, "step": 31770 }, { "epoch": 209.07894736842104, "grad_norm": 1.8951776027679443, "learning_rate": 0.0001, "loss": 0.0219, "step": 31780 }, { "epoch": 209.14473684210526, "grad_norm": 1.7318812608718872, "learning_rate": 0.0001, "loss": 0.0192, "step": 31790 }, { "epoch": 209.21052631578948, "grad_norm": 1.6184018850326538, "learning_rate": 0.0001, "loss": 0.0198, "step": 31800 }, { "epoch": 209.27631578947367, "grad_norm": 1.7655669450759888, "learning_rate": 0.0001, "loss": 0.0198, "step": 31810 }, { "epoch": 209.3421052631579, "grad_norm": 1.685247540473938, "learning_rate": 0.0001, "loss": 0.0177, "step": 31820 }, { "epoch": 209.4078947368421, "grad_norm": 1.2856509685516357, "learning_rate": 0.0001, "loss": 0.0215, "step": 31830 }, { "epoch": 209.47368421052633, "grad_norm": 2.1353418827056885, "learning_rate": 0.0001, "loss": 0.0183, "step": 31840 }, { "epoch": 209.53947368421052, "grad_norm": 1.7726033926010132, "learning_rate": 0.0001, "loss": 0.0194, "step": 31850 }, { "epoch": 209.60526315789474, "grad_norm": 1.8259968757629395, "learning_rate": 0.0001, "loss": 0.019, "step": 31860 }, { "epoch": 209.67105263157896, "grad_norm": 1.9122925996780396, "learning_rate": 0.0001, "loss": 0.0185, "step": 31870 }, { "epoch": 209.73684210526315, "grad_norm": 1.7333896160125732, "learning_rate": 0.0001, "loss": 0.022, "step": 31880 }, { "epoch": 209.80263157894737, "grad_norm": 1.230520248413086, "learning_rate": 0.0001, "loss": 0.0167, "step": 31890 }, { "epoch": 209.8684210526316, "grad_norm": 1.6185321807861328, "learning_rate": 0.0001, "loss": 0.0186, "step": 31900 }, { "epoch": 209.93421052631578, "grad_norm": 1.5841702222824097, "learning_rate": 0.0001, "loss": 0.0286, "step": 31910 }, { "epoch": 210.0, "grad_norm": 2.142430067062378, "learning_rate": 0.0001, "loss": 0.0242, "step": 31920 }, { "epoch": 210.06578947368422, "grad_norm": 1.9651762247085571, "learning_rate": 0.0001, "loss": 0.0208, "step": 31930 }, { "epoch": 210.1315789473684, "grad_norm": 1.5994958877563477, "learning_rate": 0.0001, "loss": 0.0197, "step": 31940 }, { "epoch": 210.19736842105263, "grad_norm": 1.7306599617004395, "learning_rate": 0.0001, "loss": 0.0214, "step": 31950 }, { "epoch": 210.26315789473685, "grad_norm": 1.8339742422103882, "learning_rate": 0.0001, "loss": 0.0197, "step": 31960 }, { "epoch": 210.32894736842104, "grad_norm": 1.587613821029663, "learning_rate": 0.0001, "loss": 0.0212, "step": 31970 }, { "epoch": 210.39473684210526, "grad_norm": 1.5708545446395874, "learning_rate": 0.0001, "loss": 0.0208, "step": 31980 }, { "epoch": 210.46052631578948, "grad_norm": 1.5486080646514893, "learning_rate": 0.0001, "loss": 0.0187, "step": 31990 }, { "epoch": 210.52631578947367, "grad_norm": 1.3415610790252686, "learning_rate": 0.0001, "loss": 0.0195, "step": 32000 }, { "epoch": 210.5921052631579, "grad_norm": 1.4788271188735962, "learning_rate": 0.0001, "loss": 0.0212, "step": 32010 }, { "epoch": 210.6578947368421, "grad_norm": 1.6119464635849, "learning_rate": 0.0001, "loss": 0.0198, "step": 32020 }, { "epoch": 210.72368421052633, "grad_norm": 1.5892401933670044, "learning_rate": 0.0001, "loss": 0.0205, "step": 32030 }, { "epoch": 210.78947368421052, "grad_norm": 2.053013563156128, "learning_rate": 0.0001, "loss": 0.0237, "step": 32040 }, { "epoch": 210.85526315789474, "grad_norm": 2.3230509757995605, "learning_rate": 0.0001, "loss": 0.0236, "step": 32050 }, { "epoch": 210.92105263157896, "grad_norm": 2.06728196144104, "learning_rate": 0.0001, "loss": 0.0205, "step": 32060 }, { "epoch": 210.98684210526315, "grad_norm": 2.00748610496521, "learning_rate": 0.0001, "loss": 0.0199, "step": 32070 }, { "epoch": 211.05263157894737, "grad_norm": 2.3134827613830566, "learning_rate": 0.0001, "loss": 0.02, "step": 32080 }, { "epoch": 211.1184210526316, "grad_norm": 1.4831547737121582, "learning_rate": 0.0001, "loss": 0.0184, "step": 32090 }, { "epoch": 211.18421052631578, "grad_norm": 1.791903018951416, "learning_rate": 0.0001, "loss": 0.0206, "step": 32100 }, { "epoch": 211.25, "grad_norm": 1.9414644241333008, "learning_rate": 0.0001, "loss": 0.0221, "step": 32110 }, { "epoch": 211.31578947368422, "grad_norm": 1.7492371797561646, "learning_rate": 0.0001, "loss": 0.0226, "step": 32120 }, { "epoch": 211.3815789473684, "grad_norm": 2.0842301845550537, "learning_rate": 0.0001, "loss": 0.021, "step": 32130 }, { "epoch": 211.44736842105263, "grad_norm": 1.5090574026107788, "learning_rate": 0.0001, "loss": 0.0204, "step": 32140 }, { "epoch": 211.51315789473685, "grad_norm": 1.7957526445388794, "learning_rate": 0.0001, "loss": 0.0198, "step": 32150 }, { "epoch": 211.57894736842104, "grad_norm": 1.5593161582946777, "learning_rate": 0.0001, "loss": 0.017, "step": 32160 }, { "epoch": 211.64473684210526, "grad_norm": 1.3850765228271484, "learning_rate": 0.0001, "loss": 0.0195, "step": 32170 }, { "epoch": 211.71052631578948, "grad_norm": 1.4125727415084839, "learning_rate": 0.0001, "loss": 0.0196, "step": 32180 }, { "epoch": 211.77631578947367, "grad_norm": 1.6527589559555054, "learning_rate": 0.0001, "loss": 0.0178, "step": 32190 }, { "epoch": 211.8421052631579, "grad_norm": 1.9852145910263062, "learning_rate": 0.0001, "loss": 0.0227, "step": 32200 }, { "epoch": 211.9078947368421, "grad_norm": 1.926606297492981, "learning_rate": 0.0001, "loss": 0.0179, "step": 32210 }, { "epoch": 211.97368421052633, "grad_norm": 1.8793563842773438, "learning_rate": 0.0001, "loss": 0.0271, "step": 32220 }, { "epoch": 212.03947368421052, "grad_norm": 1.575800895690918, "learning_rate": 0.0001, "loss": 0.0177, "step": 32230 }, { "epoch": 212.10526315789474, "grad_norm": 1.6938706636428833, "learning_rate": 0.0001, "loss": 0.0192, "step": 32240 }, { "epoch": 212.17105263157896, "grad_norm": 1.5453282594680786, "learning_rate": 0.0001, "loss": 0.0224, "step": 32250 }, { "epoch": 212.23684210526315, "grad_norm": 2.0789339542388916, "learning_rate": 0.0001, "loss": 0.0189, "step": 32260 }, { "epoch": 212.30263157894737, "grad_norm": 1.279339075088501, "learning_rate": 0.0001, "loss": 0.0214, "step": 32270 }, { "epoch": 212.3684210526316, "grad_norm": 1.7247045040130615, "learning_rate": 0.0001, "loss": 0.0212, "step": 32280 }, { "epoch": 212.43421052631578, "grad_norm": 1.9526416063308716, "learning_rate": 0.0001, "loss": 0.0195, "step": 32290 }, { "epoch": 212.5, "grad_norm": 2.395677089691162, "learning_rate": 0.0001, "loss": 0.0201, "step": 32300 }, { "epoch": 212.56578947368422, "grad_norm": 1.486334204673767, "learning_rate": 0.0001, "loss": 0.0207, "step": 32310 }, { "epoch": 212.6315789473684, "grad_norm": 1.6074014902114868, "learning_rate": 0.0001, "loss": 0.0215, "step": 32320 }, { "epoch": 212.69736842105263, "grad_norm": 1.644374966621399, "learning_rate": 0.0001, "loss": 0.0228, "step": 32330 }, { "epoch": 212.76315789473685, "grad_norm": 2.11665940284729, "learning_rate": 0.0001, "loss": 0.0179, "step": 32340 }, { "epoch": 212.82894736842104, "grad_norm": 1.7390121221542358, "learning_rate": 0.0001, "loss": 0.0211, "step": 32350 }, { "epoch": 212.89473684210526, "grad_norm": 2.237168312072754, "learning_rate": 0.0001, "loss": 0.0222, "step": 32360 }, { "epoch": 212.96052631578948, "grad_norm": 2.0192513465881348, "learning_rate": 0.0001, "loss": 0.0185, "step": 32370 }, { "epoch": 213.02631578947367, "grad_norm": 2.019789457321167, "learning_rate": 0.0001, "loss": 0.0218, "step": 32380 }, { "epoch": 213.0921052631579, "grad_norm": 2.1887383460998535, "learning_rate": 0.0001, "loss": 0.0195, "step": 32390 }, { "epoch": 213.1578947368421, "grad_norm": 1.3362786769866943, "learning_rate": 0.0001, "loss": 0.02, "step": 32400 }, { "epoch": 213.22368421052633, "grad_norm": 1.797851324081421, "learning_rate": 0.0001, "loss": 0.0208, "step": 32410 }, { "epoch": 213.28947368421052, "grad_norm": 1.6571106910705566, "learning_rate": 0.0001, "loss": 0.0193, "step": 32420 }, { "epoch": 213.35526315789474, "grad_norm": 1.5875810384750366, "learning_rate": 0.0001, "loss": 0.0183, "step": 32430 }, { "epoch": 213.42105263157896, "grad_norm": 1.847184419631958, "learning_rate": 0.0001, "loss": 0.0211, "step": 32440 }, { "epoch": 213.48684210526315, "grad_norm": 1.6170673370361328, "learning_rate": 0.0001, "loss": 0.0203, "step": 32450 }, { "epoch": 213.55263157894737, "grad_norm": 1.556815505027771, "learning_rate": 0.0001, "loss": 0.0235, "step": 32460 }, { "epoch": 213.6184210526316, "grad_norm": 1.562529444694519, "learning_rate": 0.0001, "loss": 0.0236, "step": 32470 }, { "epoch": 213.68421052631578, "grad_norm": 1.4311121702194214, "learning_rate": 0.0001, "loss": 0.0229, "step": 32480 }, { "epoch": 213.75, "grad_norm": 1.6788355112075806, "learning_rate": 0.0001, "loss": 0.0197, "step": 32490 }, { "epoch": 213.81578947368422, "grad_norm": 1.266538143157959, "learning_rate": 0.0001, "loss": 0.0208, "step": 32500 }, { "epoch": 213.8815789473684, "grad_norm": 1.5936938524246216, "learning_rate": 0.0001, "loss": 0.0226, "step": 32510 }, { "epoch": 213.94736842105263, "grad_norm": 1.5481919050216675, "learning_rate": 0.0001, "loss": 0.0213, "step": 32520 }, { "epoch": 214.01315789473685, "grad_norm": 1.995795488357544, "learning_rate": 0.0001, "loss": 0.0204, "step": 32530 }, { "epoch": 214.07894736842104, "grad_norm": 1.645315408706665, "learning_rate": 0.0001, "loss": 0.0188, "step": 32540 }, { "epoch": 214.14473684210526, "grad_norm": 1.5984383821487427, "learning_rate": 0.0001, "loss": 0.0197, "step": 32550 }, { "epoch": 214.21052631578948, "grad_norm": 1.7336411476135254, "learning_rate": 0.0001, "loss": 0.024, "step": 32560 }, { "epoch": 214.27631578947367, "grad_norm": 1.6104766130447388, "learning_rate": 0.0001, "loss": 0.022, "step": 32570 }, { "epoch": 214.3421052631579, "grad_norm": 1.4508626461029053, "learning_rate": 0.0001, "loss": 0.018, "step": 32580 }, { "epoch": 214.4078947368421, "grad_norm": 1.6178287267684937, "learning_rate": 0.0001, "loss": 0.0216, "step": 32590 }, { "epoch": 214.47368421052633, "grad_norm": 1.8017045259475708, "learning_rate": 0.0001, "loss": 0.0206, "step": 32600 }, { "epoch": 214.53947368421052, "grad_norm": 1.350999355316162, "learning_rate": 0.0001, "loss": 0.0206, "step": 32610 }, { "epoch": 214.60526315789474, "grad_norm": 1.7437649965286255, "learning_rate": 0.0001, "loss": 0.0225, "step": 32620 }, { "epoch": 214.67105263157896, "grad_norm": 1.7614089250564575, "learning_rate": 0.0001, "loss": 0.0215, "step": 32630 }, { "epoch": 214.73684210526315, "grad_norm": 1.560103416442871, "learning_rate": 0.0001, "loss": 0.0191, "step": 32640 }, { "epoch": 214.80263157894737, "grad_norm": 1.5458365678787231, "learning_rate": 0.0001, "loss": 0.0201, "step": 32650 }, { "epoch": 214.8684210526316, "grad_norm": 1.8855100870132446, "learning_rate": 0.0001, "loss": 0.0193, "step": 32660 }, { "epoch": 214.93421052631578, "grad_norm": 1.780310869216919, "learning_rate": 0.0001, "loss": 0.0222, "step": 32670 }, { "epoch": 215.0, "grad_norm": 2.024651050567627, "learning_rate": 0.0001, "loss": 0.0187, "step": 32680 }, { "epoch": 215.06578947368422, "grad_norm": 1.6653035879135132, "learning_rate": 0.0001, "loss": 0.0184, "step": 32690 }, { "epoch": 215.1315789473684, "grad_norm": 1.8294401168823242, "learning_rate": 0.0001, "loss": 0.0195, "step": 32700 }, { "epoch": 215.19736842105263, "grad_norm": 1.9059126377105713, "learning_rate": 0.0001, "loss": 0.0248, "step": 32710 }, { "epoch": 215.26315789473685, "grad_norm": 1.7675671577453613, "learning_rate": 0.0001, "loss": 0.0198, "step": 32720 }, { "epoch": 215.32894736842104, "grad_norm": 1.75802743434906, "learning_rate": 0.0001, "loss": 0.0172, "step": 32730 }, { "epoch": 215.39473684210526, "grad_norm": 1.5701301097869873, "learning_rate": 0.0001, "loss": 0.018, "step": 32740 }, { "epoch": 215.46052631578948, "grad_norm": 1.50315523147583, "learning_rate": 0.0001, "loss": 0.0168, "step": 32750 }, { "epoch": 215.52631578947367, "grad_norm": 1.7359874248504639, "learning_rate": 0.0001, "loss": 0.0197, "step": 32760 }, { "epoch": 215.5921052631579, "grad_norm": 1.58086359500885, "learning_rate": 0.0001, "loss": 0.0228, "step": 32770 }, { "epoch": 215.6578947368421, "grad_norm": 1.7424448728561401, "learning_rate": 0.0001, "loss": 0.0189, "step": 32780 }, { "epoch": 215.72368421052633, "grad_norm": 1.9194139242172241, "learning_rate": 0.0001, "loss": 0.0187, "step": 32790 }, { "epoch": 215.78947368421052, "grad_norm": 1.5939202308654785, "learning_rate": 0.0001, "loss": 0.0198, "step": 32800 }, { "epoch": 215.85526315789474, "grad_norm": 1.5494540929794312, "learning_rate": 0.0001, "loss": 0.0257, "step": 32810 }, { "epoch": 215.92105263157896, "grad_norm": 1.864775538444519, "learning_rate": 0.0001, "loss": 0.0204, "step": 32820 }, { "epoch": 215.98684210526315, "grad_norm": 1.5371991395950317, "learning_rate": 0.0001, "loss": 0.022, "step": 32830 }, { "epoch": 216.05263157894737, "grad_norm": 1.3041534423828125, "learning_rate": 0.0001, "loss": 0.0202, "step": 32840 }, { "epoch": 216.1184210526316, "grad_norm": 1.9525803327560425, "learning_rate": 0.0001, "loss": 0.0175, "step": 32850 }, { "epoch": 216.18421052631578, "grad_norm": 2.2468392848968506, "learning_rate": 0.0001, "loss": 0.0242, "step": 32860 }, { "epoch": 216.25, "grad_norm": 1.9308664798736572, "learning_rate": 0.0001, "loss": 0.0208, "step": 32870 }, { "epoch": 216.31578947368422, "grad_norm": 2.1628239154815674, "learning_rate": 0.0001, "loss": 0.02, "step": 32880 }, { "epoch": 216.3815789473684, "grad_norm": 1.7279856204986572, "learning_rate": 0.0001, "loss": 0.021, "step": 32890 }, { "epoch": 216.44736842105263, "grad_norm": 1.5904881954193115, "learning_rate": 0.0001, "loss": 0.0182, "step": 32900 }, { "epoch": 216.51315789473685, "grad_norm": 1.3136777877807617, "learning_rate": 0.0001, "loss": 0.0196, "step": 32910 }, { "epoch": 216.57894736842104, "grad_norm": 2.6427407264709473, "learning_rate": 0.0001, "loss": 0.0264, "step": 32920 }, { "epoch": 216.64473684210526, "grad_norm": 1.9672690629959106, "learning_rate": 0.0001, "loss": 0.0189, "step": 32930 }, { "epoch": 216.71052631578948, "grad_norm": 2.135986328125, "learning_rate": 0.0001, "loss": 0.0171, "step": 32940 }, { "epoch": 216.77631578947367, "grad_norm": 2.2665491104125977, "learning_rate": 0.0001, "loss": 0.0191, "step": 32950 }, { "epoch": 216.8421052631579, "grad_norm": 1.7183866500854492, "learning_rate": 0.0001, "loss": 0.0208, "step": 32960 }, { "epoch": 216.9078947368421, "grad_norm": 1.5164421796798706, "learning_rate": 0.0001, "loss": 0.0179, "step": 32970 }, { "epoch": 216.97368421052633, "grad_norm": 1.9096405506134033, "learning_rate": 0.0001, "loss": 0.0182, "step": 32980 }, { "epoch": 217.03947368421052, "grad_norm": 1.7999237775802612, "learning_rate": 0.0001, "loss": 0.0197, "step": 32990 }, { "epoch": 217.10526315789474, "grad_norm": 1.7066471576690674, "learning_rate": 0.0001, "loss": 0.0209, "step": 33000 }, { "epoch": 217.17105263157896, "grad_norm": 1.5361874103546143, "learning_rate": 0.0001, "loss": 0.0212, "step": 33010 }, { "epoch": 217.23684210526315, "grad_norm": 1.6301158666610718, "learning_rate": 0.0001, "loss": 0.0182, "step": 33020 }, { "epoch": 217.30263157894737, "grad_norm": 1.9637690782546997, "learning_rate": 0.0001, "loss": 0.0214, "step": 33030 }, { "epoch": 217.3684210526316, "grad_norm": 1.3986831903457642, "learning_rate": 0.0001, "loss": 0.0202, "step": 33040 }, { "epoch": 217.43421052631578, "grad_norm": 1.584710717201233, "learning_rate": 0.0001, "loss": 0.0211, "step": 33050 }, { "epoch": 217.5, "grad_norm": 1.6587141752243042, "learning_rate": 0.0001, "loss": 0.0231, "step": 33060 }, { "epoch": 217.56578947368422, "grad_norm": 1.2744220495224, "learning_rate": 0.0001, "loss": 0.0169, "step": 33070 }, { "epoch": 217.6315789473684, "grad_norm": 1.3339089155197144, "learning_rate": 0.0001, "loss": 0.018, "step": 33080 }, { "epoch": 217.69736842105263, "grad_norm": 1.8888427019119263, "learning_rate": 0.0001, "loss": 0.0197, "step": 33090 }, { "epoch": 217.76315789473685, "grad_norm": 1.5098087787628174, "learning_rate": 0.0001, "loss": 0.0248, "step": 33100 }, { "epoch": 217.82894736842104, "grad_norm": 1.7567200660705566, "learning_rate": 0.0001, "loss": 0.0199, "step": 33110 }, { "epoch": 217.89473684210526, "grad_norm": 1.6109552383422852, "learning_rate": 0.0001, "loss": 0.0208, "step": 33120 }, { "epoch": 217.96052631578948, "grad_norm": 1.5790889263153076, "learning_rate": 0.0001, "loss": 0.0171, "step": 33130 }, { "epoch": 218.02631578947367, "grad_norm": 1.1049094200134277, "learning_rate": 0.0001, "loss": 0.0195, "step": 33140 }, { "epoch": 218.0921052631579, "grad_norm": 2.148970603942871, "learning_rate": 0.0001, "loss": 0.0193, "step": 33150 }, { "epoch": 218.1578947368421, "grad_norm": 1.596781611442566, "learning_rate": 0.0001, "loss": 0.0215, "step": 33160 }, { "epoch": 218.22368421052633, "grad_norm": 1.7211627960205078, "learning_rate": 0.0001, "loss": 0.0241, "step": 33170 }, { "epoch": 218.28947368421052, "grad_norm": 1.5899893045425415, "learning_rate": 0.0001, "loss": 0.0252, "step": 33180 }, { "epoch": 218.35526315789474, "grad_norm": 1.6731115579605103, "learning_rate": 0.0001, "loss": 0.0197, "step": 33190 }, { "epoch": 218.42105263157896, "grad_norm": 1.3599352836608887, "learning_rate": 0.0001, "loss": 0.0192, "step": 33200 }, { "epoch": 218.48684210526315, "grad_norm": 1.8465315103530884, "learning_rate": 0.0001, "loss": 0.019, "step": 33210 }, { "epoch": 218.55263157894737, "grad_norm": 1.3374749422073364, "learning_rate": 0.0001, "loss": 0.0215, "step": 33220 }, { "epoch": 218.6184210526316, "grad_norm": 1.8097374439239502, "learning_rate": 0.0001, "loss": 0.0203, "step": 33230 }, { "epoch": 218.68421052631578, "grad_norm": 1.7035777568817139, "learning_rate": 0.0001, "loss": 0.021, "step": 33240 }, { "epoch": 218.75, "grad_norm": 1.5182873010635376, "learning_rate": 0.0001, "loss": 0.0217, "step": 33250 }, { "epoch": 218.81578947368422, "grad_norm": 1.3759922981262207, "learning_rate": 0.0001, "loss": 0.0197, "step": 33260 }, { "epoch": 218.8815789473684, "grad_norm": 2.101545572280884, "learning_rate": 0.0001, "loss": 0.0202, "step": 33270 }, { "epoch": 218.94736842105263, "grad_norm": 1.5454655885696411, "learning_rate": 0.0001, "loss": 0.0227, "step": 33280 }, { "epoch": 219.01315789473685, "grad_norm": 2.1696059703826904, "learning_rate": 0.0001, "loss": 0.0199, "step": 33290 }, { "epoch": 219.07894736842104, "grad_norm": 1.7725549936294556, "learning_rate": 0.0001, "loss": 0.0189, "step": 33300 }, { "epoch": 219.14473684210526, "grad_norm": 1.846566915512085, "learning_rate": 0.0001, "loss": 0.0182, "step": 33310 }, { "epoch": 219.21052631578948, "grad_norm": 1.7998697757720947, "learning_rate": 0.0001, "loss": 0.0204, "step": 33320 }, { "epoch": 219.27631578947367, "grad_norm": 1.8119899034500122, "learning_rate": 0.0001, "loss": 0.0173, "step": 33330 }, { "epoch": 219.3421052631579, "grad_norm": 1.77790105342865, "learning_rate": 0.0001, "loss": 0.0202, "step": 33340 }, { "epoch": 219.4078947368421, "grad_norm": 2.147890090942383, "learning_rate": 0.0001, "loss": 0.022, "step": 33350 }, { "epoch": 219.47368421052633, "grad_norm": 1.1822025775909424, "learning_rate": 0.0001, "loss": 0.0211, "step": 33360 }, { "epoch": 219.53947368421052, "grad_norm": 1.7514559030532837, "learning_rate": 0.0001, "loss": 0.0207, "step": 33370 }, { "epoch": 219.60526315789474, "grad_norm": 1.5586379766464233, "learning_rate": 0.0001, "loss": 0.0173, "step": 33380 }, { "epoch": 219.67105263157896, "grad_norm": 1.6250948905944824, "learning_rate": 0.0001, "loss": 0.021, "step": 33390 }, { "epoch": 219.73684210526315, "grad_norm": 1.582009196281433, "learning_rate": 0.0001, "loss": 0.0202, "step": 33400 }, { "epoch": 219.80263157894737, "grad_norm": 1.2273396253585815, "learning_rate": 0.0001, "loss": 0.0221, "step": 33410 }, { "epoch": 219.8684210526316, "grad_norm": 1.5249087810516357, "learning_rate": 0.0001, "loss": 0.0231, "step": 33420 }, { "epoch": 219.93421052631578, "grad_norm": 1.619397521018982, "learning_rate": 0.0001, "loss": 0.0222, "step": 33430 }, { "epoch": 220.0, "grad_norm": 1.7138015031814575, "learning_rate": 0.0001, "loss": 0.024, "step": 33440 }, { "epoch": 220.06578947368422, "grad_norm": 1.2745755910873413, "learning_rate": 0.0001, "loss": 0.0192, "step": 33450 }, { "epoch": 220.1315789473684, "grad_norm": 1.776787519454956, "learning_rate": 0.0001, "loss": 0.0195, "step": 33460 }, { "epoch": 220.19736842105263, "grad_norm": 1.9336074590682983, "learning_rate": 0.0001, "loss": 0.0196, "step": 33470 }, { "epoch": 220.26315789473685, "grad_norm": 1.426430583000183, "learning_rate": 0.0001, "loss": 0.0205, "step": 33480 }, { "epoch": 220.32894736842104, "grad_norm": 1.357064127922058, "learning_rate": 0.0001, "loss": 0.0213, "step": 33490 }, { "epoch": 220.39473684210526, "grad_norm": 1.5292332172393799, "learning_rate": 0.0001, "loss": 0.0199, "step": 33500 }, { "epoch": 220.46052631578948, "grad_norm": 1.6075299978256226, "learning_rate": 0.0001, "loss": 0.0181, "step": 33510 }, { "epoch": 220.52631578947367, "grad_norm": 1.62702214717865, "learning_rate": 0.0001, "loss": 0.019, "step": 33520 }, { "epoch": 220.5921052631579, "grad_norm": 1.994361162185669, "learning_rate": 0.0001, "loss": 0.0184, "step": 33530 }, { "epoch": 220.6578947368421, "grad_norm": 2.0482890605926514, "learning_rate": 0.0001, "loss": 0.0185, "step": 33540 }, { "epoch": 220.72368421052633, "grad_norm": 2.009822130203247, "learning_rate": 0.0001, "loss": 0.0282, "step": 33550 }, { "epoch": 220.78947368421052, "grad_norm": 1.7584009170532227, "learning_rate": 0.0001, "loss": 0.0188, "step": 33560 }, { "epoch": 220.85526315789474, "grad_norm": 1.283975601196289, "learning_rate": 0.0001, "loss": 0.0185, "step": 33570 }, { "epoch": 220.92105263157896, "grad_norm": 1.0745770931243896, "learning_rate": 0.0001, "loss": 0.0201, "step": 33580 }, { "epoch": 220.98684210526315, "grad_norm": 1.8549362421035767, "learning_rate": 0.0001, "loss": 0.0233, "step": 33590 }, { "epoch": 221.05263157894737, "grad_norm": 1.7180960178375244, "learning_rate": 0.0001, "loss": 0.021, "step": 33600 }, { "epoch": 221.1184210526316, "grad_norm": 1.6255396604537964, "learning_rate": 0.0001, "loss": 0.0203, "step": 33610 }, { "epoch": 221.18421052631578, "grad_norm": 1.1422349214553833, "learning_rate": 0.0001, "loss": 0.0214, "step": 33620 }, { "epoch": 221.25, "grad_norm": 1.7692971229553223, "learning_rate": 0.0001, "loss": 0.0196, "step": 33630 }, { "epoch": 221.31578947368422, "grad_norm": 1.6414830684661865, "learning_rate": 0.0001, "loss": 0.0196, "step": 33640 }, { "epoch": 221.3815789473684, "grad_norm": 1.4354485273361206, "learning_rate": 0.0001, "loss": 0.0214, "step": 33650 }, { "epoch": 221.44736842105263, "grad_norm": 1.5618289709091187, "learning_rate": 0.0001, "loss": 0.0199, "step": 33660 }, { "epoch": 221.51315789473685, "grad_norm": 1.7837870121002197, "learning_rate": 0.0001, "loss": 0.0216, "step": 33670 }, { "epoch": 221.57894736842104, "grad_norm": 1.5376571416854858, "learning_rate": 0.0001, "loss": 0.0197, "step": 33680 }, { "epoch": 221.64473684210526, "grad_norm": 1.7738263607025146, "learning_rate": 0.0001, "loss": 0.0222, "step": 33690 }, { "epoch": 221.71052631578948, "grad_norm": 1.8905867338180542, "learning_rate": 0.0001, "loss": 0.022, "step": 33700 }, { "epoch": 221.77631578947367, "grad_norm": 1.5403780937194824, "learning_rate": 0.0001, "loss": 0.0227, "step": 33710 }, { "epoch": 221.8421052631579, "grad_norm": 1.877205729484558, "learning_rate": 0.0001, "loss": 0.0186, "step": 33720 }, { "epoch": 221.9078947368421, "grad_norm": 1.651350498199463, "learning_rate": 0.0001, "loss": 0.0206, "step": 33730 }, { "epoch": 221.97368421052633, "grad_norm": 1.4129379987716675, "learning_rate": 0.0001, "loss": 0.0209, "step": 33740 }, { "epoch": 222.03947368421052, "grad_norm": 1.5535253286361694, "learning_rate": 0.0001, "loss": 0.0163, "step": 33750 }, { "epoch": 222.10526315789474, "grad_norm": 1.443306803703308, "learning_rate": 0.0001, "loss": 0.0245, "step": 33760 }, { "epoch": 222.17105263157896, "grad_norm": 1.7619327306747437, "learning_rate": 0.0001, "loss": 0.0205, "step": 33770 }, { "epoch": 222.23684210526315, "grad_norm": 2.063007116317749, "learning_rate": 0.0001, "loss": 0.0187, "step": 33780 }, { "epoch": 222.30263157894737, "grad_norm": 2.092820644378662, "learning_rate": 0.0001, "loss": 0.0217, "step": 33790 }, { "epoch": 222.3684210526316, "grad_norm": 1.8833330869674683, "learning_rate": 0.0001, "loss": 0.0186, "step": 33800 }, { "epoch": 222.43421052631578, "grad_norm": 2.1867153644561768, "learning_rate": 0.0001, "loss": 0.0191, "step": 33810 }, { "epoch": 222.5, "grad_norm": 2.0753636360168457, "learning_rate": 0.0001, "loss": 0.0213, "step": 33820 }, { "epoch": 222.56578947368422, "grad_norm": 2.269841432571411, "learning_rate": 0.0001, "loss": 0.0181, "step": 33830 }, { "epoch": 222.6315789473684, "grad_norm": 1.7979649305343628, "learning_rate": 0.0001, "loss": 0.0205, "step": 33840 }, { "epoch": 222.69736842105263, "grad_norm": 1.4020638465881348, "learning_rate": 0.0001, "loss": 0.0174, "step": 33850 }, { "epoch": 222.76315789473685, "grad_norm": 1.5162993669509888, "learning_rate": 0.0001, "loss": 0.0257, "step": 33860 }, { "epoch": 222.82894736842104, "grad_norm": 1.892749309539795, "learning_rate": 0.0001, "loss": 0.0184, "step": 33870 }, { "epoch": 222.89473684210526, "grad_norm": 1.7892225980758667, "learning_rate": 0.0001, "loss": 0.0169, "step": 33880 }, { "epoch": 222.96052631578948, "grad_norm": 1.8466029167175293, "learning_rate": 0.0001, "loss": 0.0207, "step": 33890 }, { "epoch": 223.02631578947367, "grad_norm": 1.7184690237045288, "learning_rate": 0.0001, "loss": 0.0202, "step": 33900 }, { "epoch": 223.0921052631579, "grad_norm": 1.6826335191726685, "learning_rate": 0.0001, "loss": 0.0219, "step": 33910 }, { "epoch": 223.1578947368421, "grad_norm": 1.955573558807373, "learning_rate": 0.0001, "loss": 0.0205, "step": 33920 }, { "epoch": 223.22368421052633, "grad_norm": 1.3725812435150146, "learning_rate": 0.0001, "loss": 0.017, "step": 33930 }, { "epoch": 223.28947368421052, "grad_norm": 1.7764763832092285, "learning_rate": 0.0001, "loss": 0.02, "step": 33940 }, { "epoch": 223.35526315789474, "grad_norm": 1.3397630453109741, "learning_rate": 0.0001, "loss": 0.0214, "step": 33950 }, { "epoch": 223.42105263157896, "grad_norm": 1.2110341787338257, "learning_rate": 0.0001, "loss": 0.0193, "step": 33960 }, { "epoch": 223.48684210526315, "grad_norm": 1.3857015371322632, "learning_rate": 0.0001, "loss": 0.0204, "step": 33970 }, { "epoch": 223.55263157894737, "grad_norm": 1.6649104356765747, "learning_rate": 0.0001, "loss": 0.0198, "step": 33980 }, { "epoch": 223.6184210526316, "grad_norm": 1.409175157546997, "learning_rate": 0.0001, "loss": 0.0217, "step": 33990 }, { "epoch": 223.68421052631578, "grad_norm": 1.3439831733703613, "learning_rate": 0.0001, "loss": 0.0164, "step": 34000 }, { "epoch": 223.75, "grad_norm": 1.6543264389038086, "learning_rate": 0.0001, "loss": 0.0228, "step": 34010 }, { "epoch": 223.81578947368422, "grad_norm": 1.4809764623641968, "learning_rate": 0.0001, "loss": 0.0208, "step": 34020 }, { "epoch": 223.8815789473684, "grad_norm": 1.4220598936080933, "learning_rate": 0.0001, "loss": 0.019, "step": 34030 }, { "epoch": 223.94736842105263, "grad_norm": 2.2339022159576416, "learning_rate": 0.0001, "loss": 0.018, "step": 34040 }, { "epoch": 224.01315789473685, "grad_norm": 1.7137222290039062, "learning_rate": 0.0001, "loss": 0.0228, "step": 34050 }, { "epoch": 224.07894736842104, "grad_norm": 1.8791618347167969, "learning_rate": 0.0001, "loss": 0.0194, "step": 34060 }, { "epoch": 224.14473684210526, "grad_norm": 1.6035127639770508, "learning_rate": 0.0001, "loss": 0.0174, "step": 34070 }, { "epoch": 224.21052631578948, "grad_norm": 1.510225772857666, "learning_rate": 0.0001, "loss": 0.0245, "step": 34080 }, { "epoch": 224.27631578947367, "grad_norm": 1.608933687210083, "learning_rate": 0.0001, "loss": 0.0169, "step": 34090 }, { "epoch": 224.3421052631579, "grad_norm": 2.0972506999969482, "learning_rate": 0.0001, "loss": 0.0214, "step": 34100 }, { "epoch": 224.4078947368421, "grad_norm": 1.9651511907577515, "learning_rate": 0.0001, "loss": 0.0171, "step": 34110 }, { "epoch": 224.47368421052633, "grad_norm": 1.8777185678482056, "learning_rate": 0.0001, "loss": 0.0234, "step": 34120 }, { "epoch": 224.53947368421052, "grad_norm": 1.894579529762268, "learning_rate": 0.0001, "loss": 0.0212, "step": 34130 }, { "epoch": 224.60526315789474, "grad_norm": 2.1768038272857666, "learning_rate": 0.0001, "loss": 0.018, "step": 34140 }, { "epoch": 224.67105263157896, "grad_norm": 2.0654332637786865, "learning_rate": 0.0001, "loss": 0.0217, "step": 34150 }, { "epoch": 224.73684210526315, "grad_norm": 1.779296636581421, "learning_rate": 0.0001, "loss": 0.0208, "step": 34160 }, { "epoch": 224.80263157894737, "grad_norm": 1.5683943033218384, "learning_rate": 0.0001, "loss": 0.0173, "step": 34170 }, { "epoch": 224.8684210526316, "grad_norm": 1.8048464059829712, "learning_rate": 0.0001, "loss": 0.0204, "step": 34180 }, { "epoch": 224.93421052631578, "grad_norm": 1.9903167486190796, "learning_rate": 0.0001, "loss": 0.0176, "step": 34190 }, { "epoch": 225.0, "grad_norm": 2.529705286026001, "learning_rate": 0.0001, "loss": 0.0181, "step": 34200 }, { "epoch": 225.06578947368422, "grad_norm": 1.9427369832992554, "learning_rate": 0.0001, "loss": 0.0201, "step": 34210 }, { "epoch": 225.1315789473684, "grad_norm": 1.7296041250228882, "learning_rate": 0.0001, "loss": 0.0182, "step": 34220 }, { "epoch": 225.19736842105263, "grad_norm": 1.4466423988342285, "learning_rate": 0.0001, "loss": 0.019, "step": 34230 }, { "epoch": 225.26315789473685, "grad_norm": 1.7773815393447876, "learning_rate": 0.0001, "loss": 0.0215, "step": 34240 }, { "epoch": 225.32894736842104, "grad_norm": 1.5753165483474731, "learning_rate": 0.0001, "loss": 0.0212, "step": 34250 }, { "epoch": 225.39473684210526, "grad_norm": 1.907732367515564, "learning_rate": 0.0001, "loss": 0.0171, "step": 34260 }, { "epoch": 225.46052631578948, "grad_norm": 1.3668665885925293, "learning_rate": 0.0001, "loss": 0.0203, "step": 34270 }, { "epoch": 225.52631578947367, "grad_norm": 2.070647716522217, "learning_rate": 0.0001, "loss": 0.0194, "step": 34280 }, { "epoch": 225.5921052631579, "grad_norm": 2.2489066123962402, "learning_rate": 0.0001, "loss": 0.0189, "step": 34290 }, { "epoch": 225.6578947368421, "grad_norm": 1.9830833673477173, "learning_rate": 0.0001, "loss": 0.0179, "step": 34300 }, { "epoch": 225.72368421052633, "grad_norm": 1.743638038635254, "learning_rate": 0.0001, "loss": 0.0193, "step": 34310 }, { "epoch": 225.78947368421052, "grad_norm": 1.79070246219635, "learning_rate": 0.0001, "loss": 0.0221, "step": 34320 }, { "epoch": 225.85526315789474, "grad_norm": 1.5934110879898071, "learning_rate": 0.0001, "loss": 0.0185, "step": 34330 }, { "epoch": 225.92105263157896, "grad_norm": 1.5481334924697876, "learning_rate": 0.0001, "loss": 0.0207, "step": 34340 }, { "epoch": 225.98684210526315, "grad_norm": 1.108474850654602, "learning_rate": 0.0001, "loss": 0.0208, "step": 34350 }, { "epoch": 226.05263157894737, "grad_norm": 1.815788745880127, "learning_rate": 0.0001, "loss": 0.022, "step": 34360 }, { "epoch": 226.1184210526316, "grad_norm": 1.5546767711639404, "learning_rate": 0.0001, "loss": 0.0179, "step": 34370 }, { "epoch": 226.18421052631578, "grad_norm": 1.8384535312652588, "learning_rate": 0.0001, "loss": 0.0204, "step": 34380 }, { "epoch": 226.25, "grad_norm": 1.5236188173294067, "learning_rate": 0.0001, "loss": 0.0172, "step": 34390 }, { "epoch": 226.31578947368422, "grad_norm": 1.3626759052276611, "learning_rate": 0.0001, "loss": 0.0189, "step": 34400 }, { "epoch": 226.3815789473684, "grad_norm": 1.2411295175552368, "learning_rate": 0.0001, "loss": 0.0201, "step": 34410 }, { "epoch": 226.44736842105263, "grad_norm": 1.2535643577575684, "learning_rate": 0.0001, "loss": 0.0226, "step": 34420 }, { "epoch": 226.51315789473685, "grad_norm": 2.0178699493408203, "learning_rate": 0.0001, "loss": 0.022, "step": 34430 }, { "epoch": 226.57894736842104, "grad_norm": 1.8125863075256348, "learning_rate": 0.0001, "loss": 0.0182, "step": 34440 }, { "epoch": 226.64473684210526, "grad_norm": 1.4897490739822388, "learning_rate": 0.0001, "loss": 0.0217, "step": 34450 }, { "epoch": 226.71052631578948, "grad_norm": 1.9226298332214355, "learning_rate": 0.0001, "loss": 0.0276, "step": 34460 }, { "epoch": 226.77631578947367, "grad_norm": 1.8428479433059692, "learning_rate": 0.0001, "loss": 0.0194, "step": 34470 }, { "epoch": 226.8421052631579, "grad_norm": 1.8907454013824463, "learning_rate": 0.0001, "loss": 0.0176, "step": 34480 }, { "epoch": 226.9078947368421, "grad_norm": 1.64349365234375, "learning_rate": 0.0001, "loss": 0.0186, "step": 34490 }, { "epoch": 226.97368421052633, "grad_norm": 1.753318190574646, "learning_rate": 0.0001, "loss": 0.0222, "step": 34500 }, { "epoch": 227.03947368421052, "grad_norm": 1.4571988582611084, "learning_rate": 0.0001, "loss": 0.0184, "step": 34510 }, { "epoch": 227.10526315789474, "grad_norm": 1.7508764266967773, "learning_rate": 0.0001, "loss": 0.0177, "step": 34520 }, { "epoch": 227.17105263157896, "grad_norm": 1.7692101001739502, "learning_rate": 0.0001, "loss": 0.018, "step": 34530 }, { "epoch": 227.23684210526315, "grad_norm": 1.489735722541809, "learning_rate": 0.0001, "loss": 0.0193, "step": 34540 }, { "epoch": 227.30263157894737, "grad_norm": 1.6247109174728394, "learning_rate": 0.0001, "loss": 0.0193, "step": 34550 }, { "epoch": 227.3684210526316, "grad_norm": 1.4203635454177856, "learning_rate": 0.0001, "loss": 0.0193, "step": 34560 }, { "epoch": 227.43421052631578, "grad_norm": 1.7807055711746216, "learning_rate": 0.0001, "loss": 0.0223, "step": 34570 }, { "epoch": 227.5, "grad_norm": 1.260437250137329, "learning_rate": 0.0001, "loss": 0.0218, "step": 34580 }, { "epoch": 227.56578947368422, "grad_norm": 1.2050691843032837, "learning_rate": 0.0001, "loss": 0.0221, "step": 34590 }, { "epoch": 227.6315789473684, "grad_norm": 1.566447377204895, "learning_rate": 0.0001, "loss": 0.0198, "step": 34600 }, { "epoch": 227.69736842105263, "grad_norm": 1.1048874855041504, "learning_rate": 0.0001, "loss": 0.021, "step": 34610 }, { "epoch": 227.76315789473685, "grad_norm": 1.4188239574432373, "learning_rate": 0.0001, "loss": 0.0239, "step": 34620 }, { "epoch": 227.82894736842104, "grad_norm": 1.3961814641952515, "learning_rate": 0.0001, "loss": 0.02, "step": 34630 }, { "epoch": 227.89473684210526, "grad_norm": 2.070324182510376, "learning_rate": 0.0001, "loss": 0.0209, "step": 34640 }, { "epoch": 227.96052631578948, "grad_norm": 1.6649295091629028, "learning_rate": 0.0001, "loss": 0.018, "step": 34650 }, { "epoch": 228.02631578947367, "grad_norm": 1.3067505359649658, "learning_rate": 0.0001, "loss": 0.0169, "step": 34660 }, { "epoch": 228.0921052631579, "grad_norm": 1.7417423725128174, "learning_rate": 0.0001, "loss": 0.0203, "step": 34670 }, { "epoch": 228.1578947368421, "grad_norm": 1.8755711317062378, "learning_rate": 0.0001, "loss": 0.0198, "step": 34680 }, { "epoch": 228.22368421052633, "grad_norm": 1.4733589887619019, "learning_rate": 0.0001, "loss": 0.0234, "step": 34690 }, { "epoch": 228.28947368421052, "grad_norm": 1.7222235202789307, "learning_rate": 0.0001, "loss": 0.0199, "step": 34700 }, { "epoch": 228.35526315789474, "grad_norm": 1.607968807220459, "learning_rate": 0.0001, "loss": 0.0172, "step": 34710 }, { "epoch": 228.42105263157896, "grad_norm": 1.6895321607589722, "learning_rate": 0.0001, "loss": 0.0239, "step": 34720 }, { "epoch": 228.48684210526315, "grad_norm": 1.5717540979385376, "learning_rate": 0.0001, "loss": 0.0175, "step": 34730 }, { "epoch": 228.55263157894737, "grad_norm": 1.5048305988311768, "learning_rate": 0.0001, "loss": 0.0174, "step": 34740 }, { "epoch": 228.6184210526316, "grad_norm": 1.6718069314956665, "learning_rate": 0.0001, "loss": 0.0197, "step": 34750 }, { "epoch": 228.68421052631578, "grad_norm": 1.8017715215682983, "learning_rate": 0.0001, "loss": 0.019, "step": 34760 }, { "epoch": 228.75, "grad_norm": 1.9306334257125854, "learning_rate": 0.0001, "loss": 0.0203, "step": 34770 }, { "epoch": 228.81578947368422, "grad_norm": 1.3869895935058594, "learning_rate": 0.0001, "loss": 0.0216, "step": 34780 }, { "epoch": 228.8815789473684, "grad_norm": 1.3939719200134277, "learning_rate": 0.0001, "loss": 0.0228, "step": 34790 }, { "epoch": 228.94736842105263, "grad_norm": 1.5759598016738892, "learning_rate": 0.0001, "loss": 0.0174, "step": 34800 }, { "epoch": 229.01315789473685, "grad_norm": 1.9750230312347412, "learning_rate": 0.0001, "loss": 0.0212, "step": 34810 }, { "epoch": 229.07894736842104, "grad_norm": 1.9965349435806274, "learning_rate": 0.0001, "loss": 0.0183, "step": 34820 }, { "epoch": 229.14473684210526, "grad_norm": 1.5650668144226074, "learning_rate": 0.0001, "loss": 0.0199, "step": 34830 }, { "epoch": 229.21052631578948, "grad_norm": 1.586815595626831, "learning_rate": 0.0001, "loss": 0.02, "step": 34840 }, { "epoch": 229.27631578947367, "grad_norm": 1.8548322916030884, "learning_rate": 0.0001, "loss": 0.0238, "step": 34850 }, { "epoch": 229.3421052631579, "grad_norm": 2.1009128093719482, "learning_rate": 0.0001, "loss": 0.0174, "step": 34860 }, { "epoch": 229.4078947368421, "grad_norm": 2.0951151847839355, "learning_rate": 0.0001, "loss": 0.0203, "step": 34870 }, { "epoch": 229.47368421052633, "grad_norm": 1.7415056228637695, "learning_rate": 0.0001, "loss": 0.0204, "step": 34880 }, { "epoch": 229.53947368421052, "grad_norm": 1.820908784866333, "learning_rate": 0.0001, "loss": 0.0169, "step": 34890 }, { "epoch": 229.60526315789474, "grad_norm": 1.9815922975540161, "learning_rate": 0.0001, "loss": 0.0186, "step": 34900 }, { "epoch": 229.67105263157896, "grad_norm": 2.254439353942871, "learning_rate": 0.0001, "loss": 0.0204, "step": 34910 }, { "epoch": 229.73684210526315, "grad_norm": 1.7427678108215332, "learning_rate": 0.0001, "loss": 0.0201, "step": 34920 }, { "epoch": 229.80263157894737, "grad_norm": 1.9506173133850098, "learning_rate": 0.0001, "loss": 0.0213, "step": 34930 }, { "epoch": 229.8684210526316, "grad_norm": 1.276955008506775, "learning_rate": 0.0001, "loss": 0.0177, "step": 34940 }, { "epoch": 229.93421052631578, "grad_norm": 1.4201771020889282, "learning_rate": 0.0001, "loss": 0.0226, "step": 34950 }, { "epoch": 230.0, "grad_norm": 1.0978769063949585, "learning_rate": 0.0001, "loss": 0.0197, "step": 34960 }, { "epoch": 230.06578947368422, "grad_norm": 1.7985621690750122, "learning_rate": 0.0001, "loss": 0.0172, "step": 34970 }, { "epoch": 230.1315789473684, "grad_norm": 2.303255081176758, "learning_rate": 0.0001, "loss": 0.0237, "step": 34980 }, { "epoch": 230.19736842105263, "grad_norm": 1.9301440715789795, "learning_rate": 0.0001, "loss": 0.0176, "step": 34990 }, { "epoch": 230.26315789473685, "grad_norm": 1.8008877038955688, "learning_rate": 0.0001, "loss": 0.0228, "step": 35000 }, { "epoch": 230.32894736842104, "grad_norm": 1.7015154361724854, "learning_rate": 0.0001, "loss": 0.0225, "step": 35010 }, { "epoch": 230.39473684210526, "grad_norm": 1.7265608310699463, "learning_rate": 0.0001, "loss": 0.0177, "step": 35020 }, { "epoch": 230.46052631578948, "grad_norm": 1.8721678256988525, "learning_rate": 0.0001, "loss": 0.0213, "step": 35030 }, { "epoch": 230.52631578947367, "grad_norm": 2.5582656860351562, "learning_rate": 0.0001, "loss": 0.0167, "step": 35040 }, { "epoch": 230.5921052631579, "grad_norm": 2.3780715465545654, "learning_rate": 0.0001, "loss": 0.0189, "step": 35050 }, { "epoch": 230.6578947368421, "grad_norm": 1.9724581241607666, "learning_rate": 0.0001, "loss": 0.0215, "step": 35060 }, { "epoch": 230.72368421052633, "grad_norm": 2.3002777099609375, "learning_rate": 0.0001, "loss": 0.0199, "step": 35070 }, { "epoch": 230.78947368421052, "grad_norm": 1.492466926574707, "learning_rate": 0.0001, "loss": 0.0213, "step": 35080 }, { "epoch": 230.85526315789474, "grad_norm": 1.9446897506713867, "learning_rate": 0.0001, "loss": 0.0193, "step": 35090 }, { "epoch": 230.92105263157896, "grad_norm": 1.481680154800415, "learning_rate": 0.0001, "loss": 0.0209, "step": 35100 }, { "epoch": 230.98684210526315, "grad_norm": 1.3322769403457642, "learning_rate": 0.0001, "loss": 0.0181, "step": 35110 }, { "epoch": 231.05263157894737, "grad_norm": 1.5927428007125854, "learning_rate": 0.0001, "loss": 0.0215, "step": 35120 }, { "epoch": 231.1184210526316, "grad_norm": 1.9702943563461304, "learning_rate": 0.0001, "loss": 0.0202, "step": 35130 }, { "epoch": 231.18421052631578, "grad_norm": 1.366599678993225, "learning_rate": 0.0001, "loss": 0.0182, "step": 35140 }, { "epoch": 231.25, "grad_norm": 1.729328989982605, "learning_rate": 0.0001, "loss": 0.0189, "step": 35150 }, { "epoch": 231.31578947368422, "grad_norm": 1.4906212091445923, "learning_rate": 0.0001, "loss": 0.0198, "step": 35160 }, { "epoch": 231.3815789473684, "grad_norm": 1.9718831777572632, "learning_rate": 0.0001, "loss": 0.0227, "step": 35170 }, { "epoch": 231.44736842105263, "grad_norm": 1.6352449655532837, "learning_rate": 0.0001, "loss": 0.0179, "step": 35180 }, { "epoch": 231.51315789473685, "grad_norm": 1.765555500984192, "learning_rate": 0.0001, "loss": 0.0178, "step": 35190 }, { "epoch": 231.57894736842104, "grad_norm": 1.2628792524337769, "learning_rate": 0.0001, "loss": 0.0189, "step": 35200 }, { "epoch": 231.64473684210526, "grad_norm": 1.407234787940979, "learning_rate": 0.0001, "loss": 0.0204, "step": 35210 }, { "epoch": 231.71052631578948, "grad_norm": 1.2669035196304321, "learning_rate": 0.0001, "loss": 0.0232, "step": 35220 }, { "epoch": 231.77631578947367, "grad_norm": 1.8195160627365112, "learning_rate": 0.0001, "loss": 0.019, "step": 35230 }, { "epoch": 231.8421052631579, "grad_norm": 1.607657790184021, "learning_rate": 0.0001, "loss": 0.0205, "step": 35240 }, { "epoch": 231.9078947368421, "grad_norm": 1.8546417951583862, "learning_rate": 0.0001, "loss": 0.0217, "step": 35250 }, { "epoch": 231.97368421052633, "grad_norm": 1.5597283840179443, "learning_rate": 0.0001, "loss": 0.0197, "step": 35260 }, { "epoch": 232.03947368421052, "grad_norm": 1.693938136100769, "learning_rate": 0.0001, "loss": 0.0168, "step": 35270 }, { "epoch": 232.10526315789474, "grad_norm": 1.8039636611938477, "learning_rate": 0.0001, "loss": 0.0187, "step": 35280 }, { "epoch": 232.17105263157896, "grad_norm": 1.763353943824768, "learning_rate": 0.0001, "loss": 0.0261, "step": 35290 }, { "epoch": 232.23684210526315, "grad_norm": 1.23368501663208, "learning_rate": 0.0001, "loss": 0.0185, "step": 35300 }, { "epoch": 232.30263157894737, "grad_norm": 0.9977569580078125, "learning_rate": 0.0001, "loss": 0.0187, "step": 35310 }, { "epoch": 232.3684210526316, "grad_norm": 1.7237621545791626, "learning_rate": 0.0001, "loss": 0.0218, "step": 35320 }, { "epoch": 232.43421052631578, "grad_norm": 1.5451558828353882, "learning_rate": 0.0001, "loss": 0.019, "step": 35330 }, { "epoch": 232.5, "grad_norm": 2.183265447616577, "learning_rate": 0.0001, "loss": 0.0184, "step": 35340 }, { "epoch": 232.56578947368422, "grad_norm": 2.0048017501831055, "learning_rate": 0.0001, "loss": 0.019, "step": 35350 }, { "epoch": 232.6315789473684, "grad_norm": 2.2010018825531006, "learning_rate": 0.0001, "loss": 0.022, "step": 35360 }, { "epoch": 232.69736842105263, "grad_norm": 1.5603358745574951, "learning_rate": 0.0001, "loss": 0.019, "step": 35370 }, { "epoch": 232.76315789473685, "grad_norm": 1.8093609809875488, "learning_rate": 0.0001, "loss": 0.022, "step": 35380 }, { "epoch": 232.82894736842104, "grad_norm": 1.452812910079956, "learning_rate": 0.0001, "loss": 0.0218, "step": 35390 }, { "epoch": 232.89473684210526, "grad_norm": 1.4911991357803345, "learning_rate": 0.0001, "loss": 0.0193, "step": 35400 }, { "epoch": 232.96052631578948, "grad_norm": 1.487074613571167, "learning_rate": 0.0001, "loss": 0.0194, "step": 35410 }, { "epoch": 233.02631578947367, "grad_norm": 1.5937689542770386, "learning_rate": 0.0001, "loss": 0.0184, "step": 35420 }, { "epoch": 233.0921052631579, "grad_norm": 1.6360892057418823, "learning_rate": 0.0001, "loss": 0.0198, "step": 35430 }, { "epoch": 233.1578947368421, "grad_norm": 1.5444726943969727, "learning_rate": 0.0001, "loss": 0.0192, "step": 35440 }, { "epoch": 233.22368421052633, "grad_norm": 1.3359811305999756, "learning_rate": 0.0001, "loss": 0.0208, "step": 35450 }, { "epoch": 233.28947368421052, "grad_norm": 1.6842749118804932, "learning_rate": 0.0001, "loss": 0.018, "step": 35460 }, { "epoch": 233.35526315789474, "grad_norm": 1.8622336387634277, "learning_rate": 0.0001, "loss": 0.0197, "step": 35470 }, { "epoch": 233.42105263157896, "grad_norm": 1.9494454860687256, "learning_rate": 0.0001, "loss": 0.0219, "step": 35480 }, { "epoch": 233.48684210526315, "grad_norm": 1.62035071849823, "learning_rate": 0.0001, "loss": 0.0231, "step": 35490 }, { "epoch": 233.55263157894737, "grad_norm": 1.5280808210372925, "learning_rate": 0.0001, "loss": 0.0232, "step": 35500 }, { "epoch": 233.6184210526316, "grad_norm": 1.547860860824585, "learning_rate": 0.0001, "loss": 0.0198, "step": 35510 }, { "epoch": 233.68421052631578, "grad_norm": 1.4683456420898438, "learning_rate": 0.0001, "loss": 0.0213, "step": 35520 }, { "epoch": 233.75, "grad_norm": 1.5079337358474731, "learning_rate": 0.0001, "loss": 0.0187, "step": 35530 }, { "epoch": 233.81578947368422, "grad_norm": 1.9671638011932373, "learning_rate": 0.0001, "loss": 0.0208, "step": 35540 }, { "epoch": 233.8815789473684, "grad_norm": 1.437347173690796, "learning_rate": 0.0001, "loss": 0.0206, "step": 35550 }, { "epoch": 233.94736842105263, "grad_norm": 1.443315863609314, "learning_rate": 0.0001, "loss": 0.0162, "step": 35560 }, { "epoch": 234.01315789473685, "grad_norm": 1.4497140645980835, "learning_rate": 0.0001, "loss": 0.0162, "step": 35570 }, { "epoch": 234.07894736842104, "grad_norm": 1.6332519054412842, "learning_rate": 0.0001, "loss": 0.017, "step": 35580 }, { "epoch": 234.14473684210526, "grad_norm": 2.0122878551483154, "learning_rate": 0.0001, "loss": 0.0244, "step": 35590 }, { "epoch": 234.21052631578948, "grad_norm": 1.391404628753662, "learning_rate": 0.0001, "loss": 0.0246, "step": 35600 }, { "epoch": 234.27631578947367, "grad_norm": 2.107131004333496, "learning_rate": 0.0001, "loss": 0.0194, "step": 35610 }, { "epoch": 234.3421052631579, "grad_norm": 1.4666751623153687, "learning_rate": 0.0001, "loss": 0.0202, "step": 35620 }, { "epoch": 234.4078947368421, "grad_norm": 1.4233797788619995, "learning_rate": 0.0001, "loss": 0.017, "step": 35630 }, { "epoch": 234.47368421052633, "grad_norm": 0.9831743240356445, "learning_rate": 0.0001, "loss": 0.0227, "step": 35640 }, { "epoch": 234.53947368421052, "grad_norm": 1.6714662313461304, "learning_rate": 0.0001, "loss": 0.0174, "step": 35650 }, { "epoch": 234.60526315789474, "grad_norm": 1.7231981754302979, "learning_rate": 0.0001, "loss": 0.0191, "step": 35660 }, { "epoch": 234.67105263157896, "grad_norm": 1.5225272178649902, "learning_rate": 0.0001, "loss": 0.0216, "step": 35670 }, { "epoch": 234.73684210526315, "grad_norm": 1.8967583179473877, "learning_rate": 0.0001, "loss": 0.0179, "step": 35680 }, { "epoch": 234.80263157894737, "grad_norm": 1.339795708656311, "learning_rate": 0.0001, "loss": 0.02, "step": 35690 }, { "epoch": 234.8684210526316, "grad_norm": 1.3654919862747192, "learning_rate": 0.0001, "loss": 0.0157, "step": 35700 }, { "epoch": 234.93421052631578, "grad_norm": 1.0237812995910645, "learning_rate": 0.0001, "loss": 0.0215, "step": 35710 }, { "epoch": 235.0, "grad_norm": 1.646314024925232, "learning_rate": 0.0001, "loss": 0.0215, "step": 35720 }, { "epoch": 235.06578947368422, "grad_norm": 1.2213183641433716, "learning_rate": 0.0001, "loss": 0.0241, "step": 35730 }, { "epoch": 235.1315789473684, "grad_norm": 1.1558490991592407, "learning_rate": 0.0001, "loss": 0.0198, "step": 35740 }, { "epoch": 235.19736842105263, "grad_norm": 1.5273646116256714, "learning_rate": 0.0001, "loss": 0.02, "step": 35750 }, { "epoch": 235.26315789473685, "grad_norm": 1.6789329051971436, "learning_rate": 0.0001, "loss": 0.0211, "step": 35760 }, { "epoch": 235.32894736842104, "grad_norm": 1.4119203090667725, "learning_rate": 0.0001, "loss": 0.0244, "step": 35770 }, { "epoch": 235.39473684210526, "grad_norm": 1.292686939239502, "learning_rate": 0.0001, "loss": 0.0204, "step": 35780 }, { "epoch": 235.46052631578948, "grad_norm": 2.029736280441284, "learning_rate": 0.0001, "loss": 0.019, "step": 35790 }, { "epoch": 235.52631578947367, "grad_norm": 1.6800975799560547, "learning_rate": 0.0001, "loss": 0.0184, "step": 35800 }, { "epoch": 235.5921052631579, "grad_norm": 1.53754723072052, "learning_rate": 0.0001, "loss": 0.0169, "step": 35810 }, { "epoch": 235.6578947368421, "grad_norm": 1.5648635625839233, "learning_rate": 0.0001, "loss": 0.0222, "step": 35820 }, { "epoch": 235.72368421052633, "grad_norm": 1.6135809421539307, "learning_rate": 0.0001, "loss": 0.0208, "step": 35830 }, { "epoch": 235.78947368421052, "grad_norm": 1.421993374824524, "learning_rate": 0.0001, "loss": 0.019, "step": 35840 }, { "epoch": 235.85526315789474, "grad_norm": 1.7172560691833496, "learning_rate": 0.0001, "loss": 0.0203, "step": 35850 }, { "epoch": 235.92105263157896, "grad_norm": 1.5904074907302856, "learning_rate": 0.0001, "loss": 0.0206, "step": 35860 }, { "epoch": 235.98684210526315, "grad_norm": 1.747056484222412, "learning_rate": 0.0001, "loss": 0.0176, "step": 35870 }, { "epoch": 236.05263157894737, "grad_norm": 1.7357476949691772, "learning_rate": 0.0001, "loss": 0.0216, "step": 35880 }, { "epoch": 236.1184210526316, "grad_norm": 1.3433114290237427, "learning_rate": 0.0001, "loss": 0.0176, "step": 35890 }, { "epoch": 236.18421052631578, "grad_norm": 1.740400791168213, "learning_rate": 0.0001, "loss": 0.0185, "step": 35900 }, { "epoch": 236.25, "grad_norm": 1.5921833515167236, "learning_rate": 0.0001, "loss": 0.0179, "step": 35910 }, { "epoch": 236.31578947368422, "grad_norm": 1.8339042663574219, "learning_rate": 0.0001, "loss": 0.0212, "step": 35920 }, { "epoch": 236.3815789473684, "grad_norm": 1.9566082954406738, "learning_rate": 0.0001, "loss": 0.0174, "step": 35930 }, { "epoch": 236.44736842105263, "grad_norm": 1.6766666173934937, "learning_rate": 0.0001, "loss": 0.0188, "step": 35940 }, { "epoch": 236.51315789473685, "grad_norm": 1.6599949598312378, "learning_rate": 0.0001, "loss": 0.0204, "step": 35950 }, { "epoch": 236.57894736842104, "grad_norm": 1.5453583002090454, "learning_rate": 0.0001, "loss": 0.0175, "step": 35960 }, { "epoch": 236.64473684210526, "grad_norm": 1.605971336364746, "learning_rate": 0.0001, "loss": 0.0195, "step": 35970 }, { "epoch": 236.71052631578948, "grad_norm": 1.8286058902740479, "learning_rate": 0.0001, "loss": 0.022, "step": 35980 }, { "epoch": 236.77631578947367, "grad_norm": 2.446091651916504, "learning_rate": 0.0001, "loss": 0.0187, "step": 35990 }, { "epoch": 236.8421052631579, "grad_norm": 1.6725691556930542, "learning_rate": 0.0001, "loss": 0.0209, "step": 36000 }, { "epoch": 236.9078947368421, "grad_norm": 1.7494187355041504, "learning_rate": 0.0001, "loss": 0.0181, "step": 36010 }, { "epoch": 236.97368421052633, "grad_norm": 1.5507603883743286, "learning_rate": 0.0001, "loss": 0.0251, "step": 36020 }, { "epoch": 237.03947368421052, "grad_norm": 1.773564100265503, "learning_rate": 0.0001, "loss": 0.0191, "step": 36030 }, { "epoch": 237.10526315789474, "grad_norm": 1.9002022743225098, "learning_rate": 0.0001, "loss": 0.0235, "step": 36040 }, { "epoch": 237.17105263157896, "grad_norm": 1.314282774925232, "learning_rate": 0.0001, "loss": 0.0172, "step": 36050 }, { "epoch": 237.23684210526315, "grad_norm": 1.4450715780258179, "learning_rate": 0.0001, "loss": 0.0164, "step": 36060 }, { "epoch": 237.30263157894737, "grad_norm": 1.4473515748977661, "learning_rate": 0.0001, "loss": 0.0182, "step": 36070 }, { "epoch": 237.3684210526316, "grad_norm": 1.4523773193359375, "learning_rate": 0.0001, "loss": 0.0186, "step": 36080 }, { "epoch": 237.43421052631578, "grad_norm": 1.133653163909912, "learning_rate": 0.0001, "loss": 0.0202, "step": 36090 }, { "epoch": 237.5, "grad_norm": 0.9999027252197266, "learning_rate": 0.0001, "loss": 0.0216, "step": 36100 }, { "epoch": 237.56578947368422, "grad_norm": 1.2679741382598877, "learning_rate": 0.0001, "loss": 0.0214, "step": 36110 }, { "epoch": 237.6315789473684, "grad_norm": 1.5661972761154175, "learning_rate": 0.0001, "loss": 0.0202, "step": 36120 }, { "epoch": 237.69736842105263, "grad_norm": 1.7564016580581665, "learning_rate": 0.0001, "loss": 0.0197, "step": 36130 }, { "epoch": 237.76315789473685, "grad_norm": 1.9973304271697998, "learning_rate": 0.0001, "loss": 0.0212, "step": 36140 }, { "epoch": 237.82894736842104, "grad_norm": 1.8002113103866577, "learning_rate": 0.0001, "loss": 0.0178, "step": 36150 }, { "epoch": 237.89473684210526, "grad_norm": 1.3267449140548706, "learning_rate": 0.0001, "loss": 0.0192, "step": 36160 }, { "epoch": 237.96052631578948, "grad_norm": 1.3318593502044678, "learning_rate": 0.0001, "loss": 0.0216, "step": 36170 }, { "epoch": 238.02631578947367, "grad_norm": 1.7060714960098267, "learning_rate": 0.0001, "loss": 0.0172, "step": 36180 }, { "epoch": 238.0921052631579, "grad_norm": 1.241672396659851, "learning_rate": 0.0001, "loss": 0.019, "step": 36190 }, { "epoch": 238.1578947368421, "grad_norm": 1.5813618898391724, "learning_rate": 0.0001, "loss": 0.0213, "step": 36200 }, { "epoch": 238.22368421052633, "grad_norm": 1.6591277122497559, "learning_rate": 0.0001, "loss": 0.0217, "step": 36210 }, { "epoch": 238.28947368421052, "grad_norm": 1.7022175788879395, "learning_rate": 0.0001, "loss": 0.0195, "step": 36220 }, { "epoch": 238.35526315789474, "grad_norm": 1.4987492561340332, "learning_rate": 0.0001, "loss": 0.0194, "step": 36230 }, { "epoch": 238.42105263157896, "grad_norm": 1.6174250841140747, "learning_rate": 0.0001, "loss": 0.0187, "step": 36240 }, { "epoch": 238.48684210526315, "grad_norm": 2.0273303985595703, "learning_rate": 0.0001, "loss": 0.0255, "step": 36250 }, { "epoch": 238.55263157894737, "grad_norm": 1.9486943483352661, "learning_rate": 0.0001, "loss": 0.0194, "step": 36260 }, { "epoch": 238.6184210526316, "grad_norm": 1.398805856704712, "learning_rate": 0.0001, "loss": 0.022, "step": 36270 }, { "epoch": 238.68421052631578, "grad_norm": 1.1562875509262085, "learning_rate": 0.0001, "loss": 0.0179, "step": 36280 }, { "epoch": 238.75, "grad_norm": 1.4297064542770386, "learning_rate": 0.0001, "loss": 0.017, "step": 36290 }, { "epoch": 238.81578947368422, "grad_norm": 1.5254102945327759, "learning_rate": 0.0001, "loss": 0.0178, "step": 36300 }, { "epoch": 238.8815789473684, "grad_norm": 1.6473784446716309, "learning_rate": 0.0001, "loss": 0.0174, "step": 36310 }, { "epoch": 238.94736842105263, "grad_norm": 1.6178659200668335, "learning_rate": 0.0001, "loss": 0.0179, "step": 36320 }, { "epoch": 239.01315789473685, "grad_norm": 1.2778416872024536, "learning_rate": 0.0001, "loss": 0.0222, "step": 36330 }, { "epoch": 239.07894736842104, "grad_norm": 1.4653970003128052, "learning_rate": 0.0001, "loss": 0.0182, "step": 36340 }, { "epoch": 239.14473684210526, "grad_norm": 1.8465867042541504, "learning_rate": 0.0001, "loss": 0.0222, "step": 36350 }, { "epoch": 239.21052631578948, "grad_norm": 1.5584577322006226, "learning_rate": 0.0001, "loss": 0.0219, "step": 36360 }, { "epoch": 239.27631578947367, "grad_norm": 1.5461065769195557, "learning_rate": 0.0001, "loss": 0.0194, "step": 36370 }, { "epoch": 239.3421052631579, "grad_norm": 1.6672723293304443, "learning_rate": 0.0001, "loss": 0.0253, "step": 36380 }, { "epoch": 239.4078947368421, "grad_norm": 1.3506889343261719, "learning_rate": 0.0001, "loss": 0.0202, "step": 36390 }, { "epoch": 239.47368421052633, "grad_norm": 1.7351913452148438, "learning_rate": 0.0001, "loss": 0.0182, "step": 36400 }, { "epoch": 239.53947368421052, "grad_norm": 1.3712842464447021, "learning_rate": 0.0001, "loss": 0.0171, "step": 36410 }, { "epoch": 239.60526315789474, "grad_norm": 1.4606026411056519, "learning_rate": 0.0001, "loss": 0.0203, "step": 36420 }, { "epoch": 239.67105263157896, "grad_norm": 1.261122703552246, "learning_rate": 0.0001, "loss": 0.0196, "step": 36430 }, { "epoch": 239.73684210526315, "grad_norm": 1.6301027536392212, "learning_rate": 0.0001, "loss": 0.019, "step": 36440 }, { "epoch": 239.80263157894737, "grad_norm": 1.5210984945297241, "learning_rate": 0.0001, "loss": 0.0184, "step": 36450 }, { "epoch": 239.8684210526316, "grad_norm": 1.7650057077407837, "learning_rate": 0.0001, "loss": 0.0202, "step": 36460 }, { "epoch": 239.93421052631578, "grad_norm": 1.6419023275375366, "learning_rate": 0.0001, "loss": 0.0195, "step": 36470 }, { "epoch": 240.0, "grad_norm": 1.401353359222412, "learning_rate": 0.0001, "loss": 0.0233, "step": 36480 }, { "epoch": 240.06578947368422, "grad_norm": 1.4204853773117065, "learning_rate": 0.0001, "loss": 0.021, "step": 36490 }, { "epoch": 240.1315789473684, "grad_norm": 1.861504316329956, "learning_rate": 0.0001, "loss": 0.0206, "step": 36500 }, { "epoch": 240.19736842105263, "grad_norm": 1.5593534708023071, "learning_rate": 0.0001, "loss": 0.0201, "step": 36510 }, { "epoch": 240.26315789473685, "grad_norm": 1.4203051328659058, "learning_rate": 0.0001, "loss": 0.0184, "step": 36520 }, { "epoch": 240.32894736842104, "grad_norm": 1.3642278909683228, "learning_rate": 0.0001, "loss": 0.0225, "step": 36530 }, { "epoch": 240.39473684210526, "grad_norm": 1.544014811515808, "learning_rate": 0.0001, "loss": 0.0194, "step": 36540 }, { "epoch": 240.46052631578948, "grad_norm": 1.5116010904312134, "learning_rate": 0.0001, "loss": 0.0176, "step": 36550 }, { "epoch": 240.52631578947367, "grad_norm": 1.6601215600967407, "learning_rate": 0.0001, "loss": 0.0184, "step": 36560 }, { "epoch": 240.5921052631579, "grad_norm": 2.3399438858032227, "learning_rate": 0.0001, "loss": 0.0186, "step": 36570 }, { "epoch": 240.6578947368421, "grad_norm": 1.8618615865707397, "learning_rate": 0.0001, "loss": 0.0228, "step": 36580 }, { "epoch": 240.72368421052633, "grad_norm": 1.7668118476867676, "learning_rate": 0.0001, "loss": 0.0198, "step": 36590 }, { "epoch": 240.78947368421052, "grad_norm": 1.797413945198059, "learning_rate": 0.0001, "loss": 0.0196, "step": 36600 }, { "epoch": 240.85526315789474, "grad_norm": 2.0445058345794678, "learning_rate": 0.0001, "loss": 0.0223, "step": 36610 }, { "epoch": 240.92105263157896, "grad_norm": 1.7085247039794922, "learning_rate": 0.0001, "loss": 0.0173, "step": 36620 }, { "epoch": 240.98684210526315, "grad_norm": 1.6667330265045166, "learning_rate": 0.0001, "loss": 0.0185, "step": 36630 }, { "epoch": 241.05263157894737, "grad_norm": 1.99186110496521, "learning_rate": 0.0001, "loss": 0.0214, "step": 36640 }, { "epoch": 241.1184210526316, "grad_norm": 1.7286796569824219, "learning_rate": 0.0001, "loss": 0.0279, "step": 36650 }, { "epoch": 241.18421052631578, "grad_norm": 1.5727570056915283, "learning_rate": 0.0001, "loss": 0.0185, "step": 36660 }, { "epoch": 241.25, "grad_norm": 1.943756341934204, "learning_rate": 0.0001, "loss": 0.0204, "step": 36670 }, { "epoch": 241.31578947368422, "grad_norm": 1.851399302482605, "learning_rate": 0.0001, "loss": 0.0189, "step": 36680 }, { "epoch": 241.3815789473684, "grad_norm": 2.0681545734405518, "learning_rate": 0.0001, "loss": 0.0187, "step": 36690 }, { "epoch": 241.44736842105263, "grad_norm": 1.668223261833191, "learning_rate": 0.0001, "loss": 0.0193, "step": 36700 }, { "epoch": 241.51315789473685, "grad_norm": 2.103315591812134, "learning_rate": 0.0001, "loss": 0.0172, "step": 36710 }, { "epoch": 241.57894736842104, "grad_norm": 1.7065314054489136, "learning_rate": 0.0001, "loss": 0.0193, "step": 36720 }, { "epoch": 241.64473684210526, "grad_norm": 2.0506625175476074, "learning_rate": 0.0001, "loss": 0.0194, "step": 36730 }, { "epoch": 241.71052631578948, "grad_norm": 1.7063671350479126, "learning_rate": 0.0001, "loss": 0.0185, "step": 36740 }, { "epoch": 241.77631578947367, "grad_norm": 2.0404856204986572, "learning_rate": 0.0001, "loss": 0.0176, "step": 36750 }, { "epoch": 241.8421052631579, "grad_norm": 1.656964898109436, "learning_rate": 0.0001, "loss": 0.0168, "step": 36760 }, { "epoch": 241.9078947368421, "grad_norm": 1.346113920211792, "learning_rate": 0.0001, "loss": 0.019, "step": 36770 }, { "epoch": 241.97368421052633, "grad_norm": 1.7544972896575928, "learning_rate": 0.0001, "loss": 0.0196, "step": 36780 }, { "epoch": 242.03947368421052, "grad_norm": 1.2361855506896973, "learning_rate": 0.0001, "loss": 0.0177, "step": 36790 }, { "epoch": 242.10526315789474, "grad_norm": 1.954705834388733, "learning_rate": 0.0001, "loss": 0.017, "step": 36800 }, { "epoch": 242.17105263157896, "grad_norm": 1.804828405380249, "learning_rate": 0.0001, "loss": 0.02, "step": 36810 }, { "epoch": 242.23684210526315, "grad_norm": 1.5054808855056763, "learning_rate": 0.0001, "loss": 0.017, "step": 36820 }, { "epoch": 242.30263157894737, "grad_norm": 1.5822430849075317, "learning_rate": 0.0001, "loss": 0.0188, "step": 36830 }, { "epoch": 242.3684210526316, "grad_norm": 1.531987190246582, "learning_rate": 0.0001, "loss": 0.0232, "step": 36840 }, { "epoch": 242.43421052631578, "grad_norm": 1.875126838684082, "learning_rate": 0.0001, "loss": 0.0197, "step": 36850 }, { "epoch": 242.5, "grad_norm": 1.8741679191589355, "learning_rate": 0.0001, "loss": 0.0203, "step": 36860 }, { "epoch": 242.56578947368422, "grad_norm": 1.6536668539047241, "learning_rate": 0.0001, "loss": 0.0178, "step": 36870 }, { "epoch": 242.6315789473684, "grad_norm": 1.4647576808929443, "learning_rate": 0.0001, "loss": 0.0194, "step": 36880 }, { "epoch": 242.69736842105263, "grad_norm": 1.5335026979446411, "learning_rate": 0.0001, "loss": 0.0186, "step": 36890 }, { "epoch": 242.76315789473685, "grad_norm": 1.6367672681808472, "learning_rate": 0.0001, "loss": 0.0215, "step": 36900 }, { "epoch": 242.82894736842104, "grad_norm": 1.5529890060424805, "learning_rate": 0.0001, "loss": 0.0199, "step": 36910 }, { "epoch": 242.89473684210526, "grad_norm": 1.5713310241699219, "learning_rate": 0.0001, "loss": 0.0211, "step": 36920 }, { "epoch": 242.96052631578948, "grad_norm": 1.6244816780090332, "learning_rate": 0.0001, "loss": 0.0184, "step": 36930 }, { "epoch": 243.02631578947367, "grad_norm": 1.6835148334503174, "learning_rate": 0.0001, "loss": 0.0191, "step": 36940 }, { "epoch": 243.0921052631579, "grad_norm": 1.6512768268585205, "learning_rate": 0.0001, "loss": 0.0198, "step": 36950 }, { "epoch": 243.1578947368421, "grad_norm": 1.3376370668411255, "learning_rate": 0.0001, "loss": 0.0169, "step": 36960 }, { "epoch": 243.22368421052633, "grad_norm": 1.605578064918518, "learning_rate": 0.0001, "loss": 0.0245, "step": 36970 }, { "epoch": 243.28947368421052, "grad_norm": 1.4589523077011108, "learning_rate": 0.0001, "loss": 0.0186, "step": 36980 }, { "epoch": 243.35526315789474, "grad_norm": 1.3555257320404053, "learning_rate": 0.0001, "loss": 0.0232, "step": 36990 }, { "epoch": 243.42105263157896, "grad_norm": 1.6319491863250732, "learning_rate": 0.0001, "loss": 0.0239, "step": 37000 }, { "epoch": 243.48684210526315, "grad_norm": 1.4695425033569336, "learning_rate": 0.0001, "loss": 0.0221, "step": 37010 }, { "epoch": 243.55263157894737, "grad_norm": 1.822950839996338, "learning_rate": 0.0001, "loss": 0.0189, "step": 37020 }, { "epoch": 243.6184210526316, "grad_norm": 1.6778573989868164, "learning_rate": 0.0001, "loss": 0.0161, "step": 37030 }, { "epoch": 243.68421052631578, "grad_norm": 1.3462587594985962, "learning_rate": 0.0001, "loss": 0.021, "step": 37040 }, { "epoch": 243.75, "grad_norm": 1.4053853750228882, "learning_rate": 0.0001, "loss": 0.0211, "step": 37050 }, { "epoch": 243.81578947368422, "grad_norm": 1.0746815204620361, "learning_rate": 0.0001, "loss": 0.0206, "step": 37060 }, { "epoch": 243.8815789473684, "grad_norm": 1.586976170539856, "learning_rate": 0.0001, "loss": 0.016, "step": 37070 }, { "epoch": 243.94736842105263, "grad_norm": 1.2493016719818115, "learning_rate": 0.0001, "loss": 0.016, "step": 37080 }, { "epoch": 244.01315789473685, "grad_norm": 1.6284087896347046, "learning_rate": 0.0001, "loss": 0.0193, "step": 37090 }, { "epoch": 244.07894736842104, "grad_norm": 1.690743327140808, "learning_rate": 0.0001, "loss": 0.0189, "step": 37100 }, { "epoch": 244.14473684210526, "grad_norm": 1.7136486768722534, "learning_rate": 0.0001, "loss": 0.0173, "step": 37110 }, { "epoch": 244.21052631578948, "grad_norm": 2.008579730987549, "learning_rate": 0.0001, "loss": 0.0204, "step": 37120 }, { "epoch": 244.27631578947367, "grad_norm": 1.5340235233306885, "learning_rate": 0.0001, "loss": 0.0185, "step": 37130 }, { "epoch": 244.3421052631579, "grad_norm": 1.1698246002197266, "learning_rate": 0.0001, "loss": 0.0182, "step": 37140 }, { "epoch": 244.4078947368421, "grad_norm": 1.5346896648406982, "learning_rate": 0.0001, "loss": 0.02, "step": 37150 }, { "epoch": 244.47368421052633, "grad_norm": 1.5449200868606567, "learning_rate": 0.0001, "loss": 0.0169, "step": 37160 }, { "epoch": 244.53947368421052, "grad_norm": 1.6691739559173584, "learning_rate": 0.0001, "loss": 0.0216, "step": 37170 }, { "epoch": 244.60526315789474, "grad_norm": 1.4477694034576416, "learning_rate": 0.0001, "loss": 0.0232, "step": 37180 }, { "epoch": 244.67105263157896, "grad_norm": 1.825637698173523, "learning_rate": 0.0001, "loss": 0.0213, "step": 37190 }, { "epoch": 244.73684210526315, "grad_norm": 1.7916767597198486, "learning_rate": 0.0001, "loss": 0.0181, "step": 37200 }, { "epoch": 244.80263157894737, "grad_norm": 1.6274843215942383, "learning_rate": 0.0001, "loss": 0.0186, "step": 37210 }, { "epoch": 244.8684210526316, "grad_norm": 1.9509586095809937, "learning_rate": 0.0001, "loss": 0.018, "step": 37220 }, { "epoch": 244.93421052631578, "grad_norm": 1.5239068269729614, "learning_rate": 0.0001, "loss": 0.0201, "step": 37230 }, { "epoch": 245.0, "grad_norm": 1.7826834917068481, "learning_rate": 0.0001, "loss": 0.0205, "step": 37240 }, { "epoch": 245.06578947368422, "grad_norm": 2.166701078414917, "learning_rate": 0.0001, "loss": 0.0226, "step": 37250 }, { "epoch": 245.1315789473684, "grad_norm": 1.9539148807525635, "learning_rate": 0.0001, "loss": 0.0169, "step": 37260 }, { "epoch": 245.19736842105263, "grad_norm": 2.131498336791992, "learning_rate": 0.0001, "loss": 0.0164, "step": 37270 }, { "epoch": 245.26315789473685, "grad_norm": 1.9524967670440674, "learning_rate": 0.0001, "loss": 0.0198, "step": 37280 }, { "epoch": 245.32894736842104, "grad_norm": 1.4309616088867188, "learning_rate": 0.0001, "loss": 0.0211, "step": 37290 }, { "epoch": 245.39473684210526, "grad_norm": 1.581757664680481, "learning_rate": 0.0001, "loss": 0.0199, "step": 37300 }, { "epoch": 245.46052631578948, "grad_norm": 1.3322498798370361, "learning_rate": 0.0001, "loss": 0.0212, "step": 37310 }, { "epoch": 245.52631578947367, "grad_norm": 1.4357860088348389, "learning_rate": 0.0001, "loss": 0.0192, "step": 37320 }, { "epoch": 245.5921052631579, "grad_norm": 1.7915875911712646, "learning_rate": 0.0001, "loss": 0.0176, "step": 37330 }, { "epoch": 245.6578947368421, "grad_norm": 1.6418904066085815, "learning_rate": 0.0001, "loss": 0.0218, "step": 37340 }, { "epoch": 245.72368421052633, "grad_norm": 1.9321050643920898, "learning_rate": 0.0001, "loss": 0.0214, "step": 37350 }, { "epoch": 245.78947368421052, "grad_norm": 1.4626762866973877, "learning_rate": 0.0001, "loss": 0.016, "step": 37360 }, { "epoch": 245.85526315789474, "grad_norm": 1.123113989830017, "learning_rate": 0.0001, "loss": 0.0199, "step": 37370 }, { "epoch": 245.92105263157896, "grad_norm": 1.561628818511963, "learning_rate": 0.0001, "loss": 0.0212, "step": 37380 }, { "epoch": 245.98684210526315, "grad_norm": 1.6528689861297607, "learning_rate": 0.0001, "loss": 0.0166, "step": 37390 }, { "epoch": 246.05263157894737, "grad_norm": 1.7247921228408813, "learning_rate": 0.0001, "loss": 0.0164, "step": 37400 }, { "epoch": 246.1184210526316, "grad_norm": 2.047614812850952, "learning_rate": 0.0001, "loss": 0.0213, "step": 37410 }, { "epoch": 246.18421052631578, "grad_norm": 1.3849968910217285, "learning_rate": 0.0001, "loss": 0.0233, "step": 37420 }, { "epoch": 246.25, "grad_norm": 1.4099758863449097, "learning_rate": 0.0001, "loss": 0.0176, "step": 37430 }, { "epoch": 246.31578947368422, "grad_norm": 1.2559192180633545, "learning_rate": 0.0001, "loss": 0.0171, "step": 37440 }, { "epoch": 246.3815789473684, "grad_norm": 1.9677481651306152, "learning_rate": 0.0001, "loss": 0.0216, "step": 37450 }, { "epoch": 246.44736842105263, "grad_norm": 1.6152703762054443, "learning_rate": 0.0001, "loss": 0.0195, "step": 37460 }, { "epoch": 246.51315789473685, "grad_norm": 1.3211369514465332, "learning_rate": 0.0001, "loss": 0.0159, "step": 37470 }, { "epoch": 246.57894736842104, "grad_norm": 1.566521406173706, "learning_rate": 0.0001, "loss": 0.0239, "step": 37480 }, { "epoch": 246.64473684210526, "grad_norm": 2.098038911819458, "learning_rate": 0.0001, "loss": 0.0177, "step": 37490 }, { "epoch": 246.71052631578948, "grad_norm": 2.216181755065918, "learning_rate": 0.0001, "loss": 0.0189, "step": 37500 }, { "epoch": 246.77631578947367, "grad_norm": 1.5426771640777588, "learning_rate": 0.0001, "loss": 0.0201, "step": 37510 }, { "epoch": 246.8421052631579, "grad_norm": 1.4075103998184204, "learning_rate": 0.0001, "loss": 0.0178, "step": 37520 }, { "epoch": 246.9078947368421, "grad_norm": 1.8004367351531982, "learning_rate": 0.0001, "loss": 0.0184, "step": 37530 }, { "epoch": 246.97368421052633, "grad_norm": 1.471125841140747, "learning_rate": 0.0001, "loss": 0.0193, "step": 37540 }, { "epoch": 247.03947368421052, "grad_norm": 1.6907250881195068, "learning_rate": 0.0001, "loss": 0.0232, "step": 37550 }, { "epoch": 247.10526315789474, "grad_norm": 1.708284616470337, "learning_rate": 0.0001, "loss": 0.0189, "step": 37560 }, { "epoch": 247.17105263157896, "grad_norm": 1.9512372016906738, "learning_rate": 0.0001, "loss": 0.0196, "step": 37570 }, { "epoch": 247.23684210526315, "grad_norm": 2.062551259994507, "learning_rate": 0.0001, "loss": 0.0199, "step": 37580 }, { "epoch": 247.30263157894737, "grad_norm": 2.493788719177246, "learning_rate": 0.0001, "loss": 0.0186, "step": 37590 }, { "epoch": 247.3684210526316, "grad_norm": 2.395887613296509, "learning_rate": 0.0001, "loss": 0.0207, "step": 37600 }, { "epoch": 247.43421052631578, "grad_norm": 1.8000961542129517, "learning_rate": 0.0001, "loss": 0.0174, "step": 37610 }, { "epoch": 247.5, "grad_norm": 2.3339929580688477, "learning_rate": 0.0001, "loss": 0.019, "step": 37620 }, { "epoch": 247.56578947368422, "grad_norm": 1.2320793867111206, "learning_rate": 0.0001, "loss": 0.0191, "step": 37630 }, { "epoch": 247.6315789473684, "grad_norm": 2.1111836433410645, "learning_rate": 0.0001, "loss": 0.0186, "step": 37640 }, { "epoch": 247.69736842105263, "grad_norm": 1.8467910289764404, "learning_rate": 0.0001, "loss": 0.0173, "step": 37650 }, { "epoch": 247.76315789473685, "grad_norm": 1.8003512620925903, "learning_rate": 0.0001, "loss": 0.0164, "step": 37660 }, { "epoch": 247.82894736842104, "grad_norm": 1.2748942375183105, "learning_rate": 0.0001, "loss": 0.0157, "step": 37670 }, { "epoch": 247.89473684210526, "grad_norm": 1.8734545707702637, "learning_rate": 0.0001, "loss": 0.0214, "step": 37680 }, { "epoch": 247.96052631578948, "grad_norm": 1.6238017082214355, "learning_rate": 0.0001, "loss": 0.022, "step": 37690 }, { "epoch": 248.02631578947367, "grad_norm": 1.3547264337539673, "learning_rate": 0.0001, "loss": 0.0203, "step": 37700 }, { "epoch": 248.0921052631579, "grad_norm": 1.8127341270446777, "learning_rate": 0.0001, "loss": 0.0193, "step": 37710 }, { "epoch": 248.1578947368421, "grad_norm": 1.5590680837631226, "learning_rate": 0.0001, "loss": 0.0197, "step": 37720 }, { "epoch": 248.22368421052633, "grad_norm": 1.8708713054656982, "learning_rate": 0.0001, "loss": 0.0192, "step": 37730 }, { "epoch": 248.28947368421052, "grad_norm": 1.5345643758773804, "learning_rate": 0.0001, "loss": 0.0232, "step": 37740 }, { "epoch": 248.35526315789474, "grad_norm": 1.9259992837905884, "learning_rate": 0.0001, "loss": 0.0226, "step": 37750 }, { "epoch": 248.42105263157896, "grad_norm": 2.0950355529785156, "learning_rate": 0.0001, "loss": 0.0175, "step": 37760 }, { "epoch": 248.48684210526315, "grad_norm": 2.0776095390319824, "learning_rate": 0.0001, "loss": 0.0219, "step": 37770 }, { "epoch": 248.55263157894737, "grad_norm": 1.77462899684906, "learning_rate": 0.0001, "loss": 0.018, "step": 37780 }, { "epoch": 248.6184210526316, "grad_norm": 1.1868139505386353, "learning_rate": 0.0001, "loss": 0.0178, "step": 37790 }, { "epoch": 248.68421052631578, "grad_norm": 1.63905930519104, "learning_rate": 0.0001, "loss": 0.0199, "step": 37800 }, { "epoch": 248.75, "grad_norm": 1.657355785369873, "learning_rate": 0.0001, "loss": 0.0177, "step": 37810 }, { "epoch": 248.81578947368422, "grad_norm": 1.7497563362121582, "learning_rate": 0.0001, "loss": 0.0189, "step": 37820 }, { "epoch": 248.8815789473684, "grad_norm": 1.6221939325332642, "learning_rate": 0.0001, "loss": 0.0185, "step": 37830 }, { "epoch": 248.94736842105263, "grad_norm": 1.7973885536193848, "learning_rate": 0.0001, "loss": 0.0179, "step": 37840 }, { "epoch": 249.01315789473685, "grad_norm": 1.6797751188278198, "learning_rate": 0.0001, "loss": 0.0176, "step": 37850 }, { "epoch": 249.07894736842104, "grad_norm": 1.213742971420288, "learning_rate": 0.0001, "loss": 0.0187, "step": 37860 }, { "epoch": 249.14473684210526, "grad_norm": 1.8710083961486816, "learning_rate": 0.0001, "loss": 0.021, "step": 37870 }, { "epoch": 249.21052631578948, "grad_norm": 1.8814297914505005, "learning_rate": 0.0001, "loss": 0.0158, "step": 37880 }, { "epoch": 249.27631578947367, "grad_norm": 1.4604511260986328, "learning_rate": 0.0001, "loss": 0.0188, "step": 37890 }, { "epoch": 249.3421052631579, "grad_norm": 1.579649806022644, "learning_rate": 0.0001, "loss": 0.0159, "step": 37900 }, { "epoch": 249.4078947368421, "grad_norm": 1.3786972761154175, "learning_rate": 0.0001, "loss": 0.0191, "step": 37910 }, { "epoch": 249.47368421052633, "grad_norm": 1.09312105178833, "learning_rate": 0.0001, "loss": 0.0157, "step": 37920 }, { "epoch": 249.53947368421052, "grad_norm": 1.4080692529678345, "learning_rate": 0.0001, "loss": 0.0219, "step": 37930 }, { "epoch": 249.60526315789474, "grad_norm": 1.6366760730743408, "learning_rate": 0.0001, "loss": 0.021, "step": 37940 }, { "epoch": 249.67105263157896, "grad_norm": 1.7723658084869385, "learning_rate": 0.0001, "loss": 0.0219, "step": 37950 }, { "epoch": 249.73684210526315, "grad_norm": 1.2559068202972412, "learning_rate": 0.0001, "loss": 0.0215, "step": 37960 }, { "epoch": 249.80263157894737, "grad_norm": 1.495743751525879, "learning_rate": 0.0001, "loss": 0.0191, "step": 37970 }, { "epoch": 249.8684210526316, "grad_norm": 1.5780885219573975, "learning_rate": 0.0001, "loss": 0.0226, "step": 37980 }, { "epoch": 249.93421052631578, "grad_norm": 2.0341742038726807, "learning_rate": 0.0001, "loss": 0.0182, "step": 37990 }, { "epoch": 250.0, "grad_norm": 1.7250137329101562, "learning_rate": 0.0001, "loss": 0.0201, "step": 38000 }, { "epoch": 250.06578947368422, "grad_norm": 1.173179268836975, "learning_rate": 0.0001, "loss": 0.0161, "step": 38010 }, { "epoch": 250.1315789473684, "grad_norm": 1.6079232692718506, "learning_rate": 0.0001, "loss": 0.0188, "step": 38020 }, { "epoch": 250.19736842105263, "grad_norm": 1.462537407875061, "learning_rate": 0.0001, "loss": 0.0209, "step": 38030 }, { "epoch": 250.26315789473685, "grad_norm": 1.1659626960754395, "learning_rate": 0.0001, "loss": 0.0234, "step": 38040 }, { "epoch": 250.32894736842104, "grad_norm": 1.6758371591567993, "learning_rate": 0.0001, "loss": 0.0192, "step": 38050 }, { "epoch": 250.39473684210526, "grad_norm": 2.2443792819976807, "learning_rate": 0.0001, "loss": 0.0193, "step": 38060 }, { "epoch": 250.46052631578948, "grad_norm": 2.139822244644165, "learning_rate": 0.0001, "loss": 0.0239, "step": 38070 }, { "epoch": 250.52631578947367, "grad_norm": 1.9330015182495117, "learning_rate": 0.0001, "loss": 0.0208, "step": 38080 }, { "epoch": 250.5921052631579, "grad_norm": 1.6171787977218628, "learning_rate": 0.0001, "loss": 0.0215, "step": 38090 }, { "epoch": 250.6578947368421, "grad_norm": 1.4429484605789185, "learning_rate": 0.0001, "loss": 0.021, "step": 38100 }, { "epoch": 250.72368421052633, "grad_norm": 2.0327658653259277, "learning_rate": 0.0001, "loss": 0.0186, "step": 38110 }, { "epoch": 250.78947368421052, "grad_norm": 1.1517729759216309, "learning_rate": 0.0001, "loss": 0.0183, "step": 38120 }, { "epoch": 250.85526315789474, "grad_norm": 1.8639031648635864, "learning_rate": 0.0001, "loss": 0.0167, "step": 38130 }, { "epoch": 250.92105263157896, "grad_norm": 1.5939135551452637, "learning_rate": 0.0001, "loss": 0.016, "step": 38140 }, { "epoch": 250.98684210526315, "grad_norm": 1.605600118637085, "learning_rate": 0.0001, "loss": 0.0191, "step": 38150 }, { "epoch": 251.05263157894737, "grad_norm": 1.6343367099761963, "learning_rate": 0.0001, "loss": 0.0221, "step": 38160 }, { "epoch": 251.1184210526316, "grad_norm": 2.3073954582214355, "learning_rate": 0.0001, "loss": 0.0173, "step": 38170 }, { "epoch": 251.18421052631578, "grad_norm": 1.7954121828079224, "learning_rate": 0.0001, "loss": 0.0169, "step": 38180 }, { "epoch": 251.25, "grad_norm": 2.028951644897461, "learning_rate": 0.0001, "loss": 0.0196, "step": 38190 }, { "epoch": 251.31578947368422, "grad_norm": 2.0561342239379883, "learning_rate": 0.0001, "loss": 0.0208, "step": 38200 }, { "epoch": 251.3815789473684, "grad_norm": 1.336519718170166, "learning_rate": 0.0001, "loss": 0.0203, "step": 38210 }, { "epoch": 251.44736842105263, "grad_norm": 1.5224111080169678, "learning_rate": 0.0001, "loss": 0.0177, "step": 38220 }, { "epoch": 251.51315789473685, "grad_norm": 1.660630226135254, "learning_rate": 0.0001, "loss": 0.019, "step": 38230 }, { "epoch": 251.57894736842104, "grad_norm": 2.0448014736175537, "learning_rate": 0.0001, "loss": 0.0168, "step": 38240 }, { "epoch": 251.64473684210526, "grad_norm": 1.4108688831329346, "learning_rate": 0.0001, "loss": 0.0188, "step": 38250 }, { "epoch": 251.71052631578948, "grad_norm": 1.8979169130325317, "learning_rate": 0.0001, "loss": 0.0179, "step": 38260 }, { "epoch": 251.77631578947367, "grad_norm": 1.6885842084884644, "learning_rate": 0.0001, "loss": 0.0178, "step": 38270 }, { "epoch": 251.8421052631579, "grad_norm": 2.055607795715332, "learning_rate": 0.0001, "loss": 0.0216, "step": 38280 }, { "epoch": 251.9078947368421, "grad_norm": 1.7068530321121216, "learning_rate": 0.0001, "loss": 0.0152, "step": 38290 }, { "epoch": 251.97368421052633, "grad_norm": 1.5706418752670288, "learning_rate": 0.0001, "loss": 0.0171, "step": 38300 }, { "epoch": 252.03947368421052, "grad_norm": 1.5545454025268555, "learning_rate": 0.0001, "loss": 0.0205, "step": 38310 }, { "epoch": 252.10526315789474, "grad_norm": 1.7795344591140747, "learning_rate": 0.0001, "loss": 0.0201, "step": 38320 }, { "epoch": 252.17105263157896, "grad_norm": 1.9067975282669067, "learning_rate": 0.0001, "loss": 0.0168, "step": 38330 }, { "epoch": 252.23684210526315, "grad_norm": 1.903225302696228, "learning_rate": 0.0001, "loss": 0.0187, "step": 38340 }, { "epoch": 252.30263157894737, "grad_norm": 1.6724716424942017, "learning_rate": 0.0001, "loss": 0.0203, "step": 38350 }, { "epoch": 252.3684210526316, "grad_norm": 1.349522352218628, "learning_rate": 0.0001, "loss": 0.0199, "step": 38360 }, { "epoch": 252.43421052631578, "grad_norm": 1.5031980276107788, "learning_rate": 0.0001, "loss": 0.0175, "step": 38370 }, { "epoch": 252.5, "grad_norm": 1.4347937107086182, "learning_rate": 0.0001, "loss": 0.0171, "step": 38380 }, { "epoch": 252.56578947368422, "grad_norm": 1.7046254873275757, "learning_rate": 0.0001, "loss": 0.0207, "step": 38390 }, { "epoch": 252.6315789473684, "grad_norm": 1.6106045246124268, "learning_rate": 0.0001, "loss": 0.0187, "step": 38400 }, { "epoch": 252.69736842105263, "grad_norm": 2.051778554916382, "learning_rate": 0.0001, "loss": 0.0188, "step": 38410 }, { "epoch": 252.76315789473685, "grad_norm": 2.0746335983276367, "learning_rate": 0.0001, "loss": 0.0196, "step": 38420 }, { "epoch": 252.82894736842104, "grad_norm": 2.1073482036590576, "learning_rate": 0.0001, "loss": 0.0183, "step": 38430 }, { "epoch": 252.89473684210526, "grad_norm": 1.4471052885055542, "learning_rate": 0.0001, "loss": 0.0221, "step": 38440 }, { "epoch": 252.96052631578948, "grad_norm": 1.899704098701477, "learning_rate": 0.0001, "loss": 0.0224, "step": 38450 }, { "epoch": 253.02631578947367, "grad_norm": 1.6013685464859009, "learning_rate": 0.0001, "loss": 0.0154, "step": 38460 }, { "epoch": 253.0921052631579, "grad_norm": 1.2136256694793701, "learning_rate": 0.0001, "loss": 0.0182, "step": 38470 }, { "epoch": 253.1578947368421, "grad_norm": 1.4341799020767212, "learning_rate": 0.0001, "loss": 0.0176, "step": 38480 }, { "epoch": 253.22368421052633, "grad_norm": 1.625044584274292, "learning_rate": 0.0001, "loss": 0.0195, "step": 38490 }, { "epoch": 253.28947368421052, "grad_norm": 1.4188827276229858, "learning_rate": 0.0001, "loss": 0.0238, "step": 38500 }, { "epoch": 253.35526315789474, "grad_norm": 1.1878427267074585, "learning_rate": 0.0001, "loss": 0.0158, "step": 38510 }, { "epoch": 253.42105263157896, "grad_norm": 1.391899585723877, "learning_rate": 0.0001, "loss": 0.0209, "step": 38520 }, { "epoch": 253.48684210526315, "grad_norm": 1.5657166242599487, "learning_rate": 0.0001, "loss": 0.0181, "step": 38530 }, { "epoch": 253.55263157894737, "grad_norm": 1.782982349395752, "learning_rate": 0.0001, "loss": 0.0218, "step": 38540 }, { "epoch": 253.6184210526316, "grad_norm": 1.7438015937805176, "learning_rate": 0.0001, "loss": 0.0197, "step": 38550 }, { "epoch": 253.68421052631578, "grad_norm": 2.1874377727508545, "learning_rate": 0.0001, "loss": 0.0204, "step": 38560 }, { "epoch": 253.75, "grad_norm": 1.9408444166183472, "learning_rate": 0.0001, "loss": 0.0165, "step": 38570 }, { "epoch": 253.81578947368422, "grad_norm": 1.644141435623169, "learning_rate": 0.0001, "loss": 0.0184, "step": 38580 }, { "epoch": 253.8815789473684, "grad_norm": 1.2520182132720947, "learning_rate": 0.0001, "loss": 0.02, "step": 38590 }, { "epoch": 253.94736842105263, "grad_norm": 1.5070645809173584, "learning_rate": 0.0001, "loss": 0.0248, "step": 38600 }, { "epoch": 254.01315789473685, "grad_norm": 1.576908826828003, "learning_rate": 0.0001, "loss": 0.0159, "step": 38610 }, { "epoch": 254.07894736842104, "grad_norm": 1.7721664905548096, "learning_rate": 0.0001, "loss": 0.0196, "step": 38620 }, { "epoch": 254.14473684210526, "grad_norm": 1.382591962814331, "learning_rate": 0.0001, "loss": 0.0201, "step": 38630 }, { "epoch": 254.21052631578948, "grad_norm": 1.5363233089447021, "learning_rate": 0.0001, "loss": 0.0226, "step": 38640 }, { "epoch": 254.27631578947367, "grad_norm": 1.5426580905914307, "learning_rate": 0.0001, "loss": 0.0197, "step": 38650 }, { "epoch": 254.3421052631579, "grad_norm": 1.6102724075317383, "learning_rate": 0.0001, "loss": 0.0185, "step": 38660 }, { "epoch": 254.4078947368421, "grad_norm": 1.3746867179870605, "learning_rate": 0.0001, "loss": 0.0185, "step": 38670 }, { "epoch": 254.47368421052633, "grad_norm": 1.4115369319915771, "learning_rate": 0.0001, "loss": 0.0195, "step": 38680 }, { "epoch": 254.53947368421052, "grad_norm": 1.595976710319519, "learning_rate": 0.0001, "loss": 0.0157, "step": 38690 }, { "epoch": 254.60526315789474, "grad_norm": 0.9186712503433228, "learning_rate": 0.0001, "loss": 0.0191, "step": 38700 }, { "epoch": 254.67105263157896, "grad_norm": 1.7686673402786255, "learning_rate": 0.0001, "loss": 0.0217, "step": 38710 }, { "epoch": 254.73684210526315, "grad_norm": 1.198823094367981, "learning_rate": 0.0001, "loss": 0.0204, "step": 38720 }, { "epoch": 254.80263157894737, "grad_norm": 1.4394563436508179, "learning_rate": 0.0001, "loss": 0.017, "step": 38730 }, { "epoch": 254.8684210526316, "grad_norm": 1.6343473196029663, "learning_rate": 0.0001, "loss": 0.0199, "step": 38740 }, { "epoch": 254.93421052631578, "grad_norm": 1.7809730768203735, "learning_rate": 0.0001, "loss": 0.0203, "step": 38750 }, { "epoch": 255.0, "grad_norm": 1.897514820098877, "learning_rate": 0.0001, "loss": 0.019, "step": 38760 }, { "epoch": 255.06578947368422, "grad_norm": 1.493811011314392, "learning_rate": 0.0001, "loss": 0.0205, "step": 38770 }, { "epoch": 255.1315789473684, "grad_norm": 1.7679293155670166, "learning_rate": 0.0001, "loss": 0.0174, "step": 38780 }, { "epoch": 255.19736842105263, "grad_norm": 1.8167680501937866, "learning_rate": 0.0001, "loss": 0.0175, "step": 38790 }, { "epoch": 255.26315789473685, "grad_norm": 1.5219337940216064, "learning_rate": 0.0001, "loss": 0.0249, "step": 38800 }, { "epoch": 255.32894736842104, "grad_norm": 1.4036918878555298, "learning_rate": 0.0001, "loss": 0.0184, "step": 38810 }, { "epoch": 255.39473684210526, "grad_norm": 1.5437465906143188, "learning_rate": 0.0001, "loss": 0.0214, "step": 38820 }, { "epoch": 255.46052631578948, "grad_norm": 1.6586639881134033, "learning_rate": 0.0001, "loss": 0.0194, "step": 38830 }, { "epoch": 255.52631578947367, "grad_norm": 1.3690851926803589, "learning_rate": 0.0001, "loss": 0.0242, "step": 38840 }, { "epoch": 255.5921052631579, "grad_norm": 1.4520416259765625, "learning_rate": 0.0001, "loss": 0.0165, "step": 38850 }, { "epoch": 255.6578947368421, "grad_norm": 1.979941725730896, "learning_rate": 0.0001, "loss": 0.016, "step": 38860 }, { "epoch": 255.72368421052633, "grad_norm": 1.957465410232544, "learning_rate": 0.0001, "loss": 0.0181, "step": 38870 }, { "epoch": 255.78947368421052, "grad_norm": 1.7558848857879639, "learning_rate": 0.0001, "loss": 0.0221, "step": 38880 }, { "epoch": 255.85526315789474, "grad_norm": 1.682460069656372, "learning_rate": 0.0001, "loss": 0.0195, "step": 38890 }, { "epoch": 255.92105263157896, "grad_norm": 2.1318373680114746, "learning_rate": 0.0001, "loss": 0.0189, "step": 38900 }, { "epoch": 255.98684210526315, "grad_norm": 1.562851071357727, "learning_rate": 0.0001, "loss": 0.0169, "step": 38910 }, { "epoch": 256.05263157894734, "grad_norm": 1.8059675693511963, "learning_rate": 0.0001, "loss": 0.0175, "step": 38920 }, { "epoch": 256.11842105263156, "grad_norm": 2.1569721698760986, "learning_rate": 0.0001, "loss": 0.0168, "step": 38930 }, { "epoch": 256.1842105263158, "grad_norm": 2.12153959274292, "learning_rate": 0.0001, "loss": 0.0174, "step": 38940 }, { "epoch": 256.25, "grad_norm": 1.945932388305664, "learning_rate": 0.0001, "loss": 0.0212, "step": 38950 }, { "epoch": 256.3157894736842, "grad_norm": 1.6950616836547852, "learning_rate": 0.0001, "loss": 0.0186, "step": 38960 }, { "epoch": 256.38157894736844, "grad_norm": 1.9012658596038818, "learning_rate": 0.0001, "loss": 0.0201, "step": 38970 }, { "epoch": 256.44736842105266, "grad_norm": 1.713428020477295, "learning_rate": 0.0001, "loss": 0.0193, "step": 38980 }, { "epoch": 256.5131578947368, "grad_norm": 1.6780660152435303, "learning_rate": 0.0001, "loss": 0.017, "step": 38990 }, { "epoch": 256.57894736842104, "grad_norm": 1.8940799236297607, "learning_rate": 0.0001, "loss": 0.0164, "step": 39000 }, { "epoch": 256.64473684210526, "grad_norm": 1.4310239553451538, "learning_rate": 0.0001, "loss": 0.0203, "step": 39010 }, { "epoch": 256.7105263157895, "grad_norm": 1.7875970602035522, "learning_rate": 0.0001, "loss": 0.0199, "step": 39020 }, { "epoch": 256.7763157894737, "grad_norm": 1.8596012592315674, "learning_rate": 0.0001, "loss": 0.0183, "step": 39030 }, { "epoch": 256.8421052631579, "grad_norm": 1.5417879819869995, "learning_rate": 0.0001, "loss": 0.0227, "step": 39040 }, { "epoch": 256.9078947368421, "grad_norm": 1.8173226118087769, "learning_rate": 0.0001, "loss": 0.0179, "step": 39050 }, { "epoch": 256.9736842105263, "grad_norm": 1.4406777620315552, "learning_rate": 0.0001, "loss": 0.0205, "step": 39060 }, { "epoch": 257.0394736842105, "grad_norm": 1.8053386211395264, "learning_rate": 0.0001, "loss": 0.0173, "step": 39070 }, { "epoch": 257.10526315789474, "grad_norm": 1.3948285579681396, "learning_rate": 0.0001, "loss": 0.0209, "step": 39080 }, { "epoch": 257.17105263157896, "grad_norm": 1.6264697313308716, "learning_rate": 0.0001, "loss": 0.0198, "step": 39090 }, { "epoch": 257.2368421052632, "grad_norm": 1.606439232826233, "learning_rate": 0.0001, "loss": 0.0192, "step": 39100 }, { "epoch": 257.30263157894734, "grad_norm": 1.6731964349746704, "learning_rate": 0.0001, "loss": 0.0199, "step": 39110 }, { "epoch": 257.36842105263156, "grad_norm": 1.6354118585586548, "learning_rate": 0.0001, "loss": 0.0177, "step": 39120 }, { "epoch": 257.4342105263158, "grad_norm": 2.1584279537200928, "learning_rate": 0.0001, "loss": 0.0184, "step": 39130 }, { "epoch": 257.5, "grad_norm": 1.7423194646835327, "learning_rate": 0.0001, "loss": 0.0162, "step": 39140 }, { "epoch": 257.5657894736842, "grad_norm": 1.2725094556808472, "learning_rate": 0.0001, "loss": 0.0209, "step": 39150 }, { "epoch": 257.63157894736844, "grad_norm": 1.66993248462677, "learning_rate": 0.0001, "loss": 0.0206, "step": 39160 }, { "epoch": 257.69736842105266, "grad_norm": 1.305206537246704, "learning_rate": 0.0001, "loss": 0.0245, "step": 39170 }, { "epoch": 257.7631578947368, "grad_norm": 1.756927728652954, "learning_rate": 0.0001, "loss": 0.0194, "step": 39180 }, { "epoch": 257.82894736842104, "grad_norm": 1.831089973449707, "learning_rate": 0.0001, "loss": 0.0165, "step": 39190 }, { "epoch": 257.89473684210526, "grad_norm": 1.3005661964416504, "learning_rate": 0.0001, "loss": 0.0179, "step": 39200 }, { "epoch": 257.9605263157895, "grad_norm": 1.2015342712402344, "learning_rate": 0.0001, "loss": 0.0155, "step": 39210 }, { "epoch": 258.0263157894737, "grad_norm": 1.8697941303253174, "learning_rate": 0.0001, "loss": 0.0204, "step": 39220 }, { "epoch": 258.0921052631579, "grad_norm": 1.4910387992858887, "learning_rate": 0.0001, "loss": 0.0211, "step": 39230 }, { "epoch": 258.1578947368421, "grad_norm": 1.3176043033599854, "learning_rate": 0.0001, "loss": 0.0183, "step": 39240 }, { "epoch": 258.2236842105263, "grad_norm": 1.3578097820281982, "learning_rate": 0.0001, "loss": 0.0219, "step": 39250 }, { "epoch": 258.2894736842105, "grad_norm": 1.6976616382598877, "learning_rate": 0.0001, "loss": 0.0172, "step": 39260 }, { "epoch": 258.35526315789474, "grad_norm": 1.8571430444717407, "learning_rate": 0.0001, "loss": 0.0231, "step": 39270 }, { "epoch": 258.42105263157896, "grad_norm": 1.7738218307495117, "learning_rate": 0.0001, "loss": 0.0196, "step": 39280 }, { "epoch": 258.4868421052632, "grad_norm": 1.5694677829742432, "learning_rate": 0.0001, "loss": 0.0201, "step": 39290 }, { "epoch": 258.55263157894734, "grad_norm": 1.7082417011260986, "learning_rate": 0.0001, "loss": 0.0191, "step": 39300 }, { "epoch": 258.61842105263156, "grad_norm": 1.5695738792419434, "learning_rate": 0.0001, "loss": 0.0205, "step": 39310 }, { "epoch": 258.6842105263158, "grad_norm": 1.5818785429000854, "learning_rate": 0.0001, "loss": 0.0175, "step": 39320 }, { "epoch": 258.75, "grad_norm": 1.5905065536499023, "learning_rate": 0.0001, "loss": 0.02, "step": 39330 }, { "epoch": 258.8157894736842, "grad_norm": 1.5156221389770508, "learning_rate": 0.0001, "loss": 0.0202, "step": 39340 }, { "epoch": 258.88157894736844, "grad_norm": 1.775646448135376, "learning_rate": 0.0001, "loss": 0.0195, "step": 39350 }, { "epoch": 258.94736842105266, "grad_norm": 1.1186273097991943, "learning_rate": 0.0001, "loss": 0.0211, "step": 39360 }, { "epoch": 259.0131578947368, "grad_norm": 1.5344960689544678, "learning_rate": 0.0001, "loss": 0.0163, "step": 39370 }, { "epoch": 259.07894736842104, "grad_norm": 1.6038583517074585, "learning_rate": 0.0001, "loss": 0.0205, "step": 39380 }, { "epoch": 259.14473684210526, "grad_norm": 1.4369868040084839, "learning_rate": 0.0001, "loss": 0.0164, "step": 39390 }, { "epoch": 259.2105263157895, "grad_norm": 1.7847784757614136, "learning_rate": 0.0001, "loss": 0.0211, "step": 39400 }, { "epoch": 259.2763157894737, "grad_norm": 2.05953311920166, "learning_rate": 0.0001, "loss": 0.0271, "step": 39410 }, { "epoch": 259.3421052631579, "grad_norm": 1.6906061172485352, "learning_rate": 0.0001, "loss": 0.0198, "step": 39420 }, { "epoch": 259.4078947368421, "grad_norm": 1.6554229259490967, "learning_rate": 0.0001, "loss": 0.0185, "step": 39430 }, { "epoch": 259.4736842105263, "grad_norm": 1.6874994039535522, "learning_rate": 0.0001, "loss": 0.0166, "step": 39440 }, { "epoch": 259.5394736842105, "grad_norm": 1.7425236701965332, "learning_rate": 0.0001, "loss": 0.0229, "step": 39450 }, { "epoch": 259.60526315789474, "grad_norm": 1.4733233451843262, "learning_rate": 0.0001, "loss": 0.0174, "step": 39460 }, { "epoch": 259.67105263157896, "grad_norm": 1.8148967027664185, "learning_rate": 0.0001, "loss": 0.0183, "step": 39470 }, { "epoch": 259.7368421052632, "grad_norm": 1.5376193523406982, "learning_rate": 0.0001, "loss": 0.0182, "step": 39480 }, { "epoch": 259.80263157894734, "grad_norm": 1.4294403791427612, "learning_rate": 0.0001, "loss": 0.0213, "step": 39490 }, { "epoch": 259.86842105263156, "grad_norm": 1.360865831375122, "learning_rate": 0.0001, "loss": 0.0151, "step": 39500 }, { "epoch": 259.9342105263158, "grad_norm": 1.8409717082977295, "learning_rate": 0.0001, "loss": 0.017, "step": 39510 }, { "epoch": 260.0, "grad_norm": 1.2264381647109985, "learning_rate": 0.0001, "loss": 0.0202, "step": 39520 }, { "epoch": 260.0657894736842, "grad_norm": 1.3339258432388306, "learning_rate": 0.0001, "loss": 0.0179, "step": 39530 }, { "epoch": 260.13157894736844, "grad_norm": 1.607704997062683, "learning_rate": 0.0001, "loss": 0.0206, "step": 39540 }, { "epoch": 260.19736842105266, "grad_norm": 1.5771342515945435, "learning_rate": 0.0001, "loss": 0.0209, "step": 39550 }, { "epoch": 260.2631578947368, "grad_norm": 1.9265590906143188, "learning_rate": 0.0001, "loss": 0.0195, "step": 39560 }, { "epoch": 260.32894736842104, "grad_norm": 1.6826872825622559, "learning_rate": 0.0001, "loss": 0.0178, "step": 39570 }, { "epoch": 260.39473684210526, "grad_norm": 1.3243662118911743, "learning_rate": 0.0001, "loss": 0.0208, "step": 39580 }, { "epoch": 260.4605263157895, "grad_norm": 1.3684234619140625, "learning_rate": 0.0001, "loss": 0.023, "step": 39590 }, { "epoch": 260.5263157894737, "grad_norm": 1.4882593154907227, "learning_rate": 0.0001, "loss": 0.0204, "step": 39600 }, { "epoch": 260.5921052631579, "grad_norm": 1.7429821491241455, "learning_rate": 0.0001, "loss": 0.0198, "step": 39610 }, { "epoch": 260.6578947368421, "grad_norm": 1.5070884227752686, "learning_rate": 0.0001, "loss": 0.0172, "step": 39620 }, { "epoch": 260.7236842105263, "grad_norm": 2.075457811355591, "learning_rate": 0.0001, "loss": 0.0178, "step": 39630 }, { "epoch": 260.7894736842105, "grad_norm": 1.8386484384536743, "learning_rate": 0.0001, "loss": 0.0192, "step": 39640 }, { "epoch": 260.85526315789474, "grad_norm": 1.3382987976074219, "learning_rate": 0.0001, "loss": 0.0176, "step": 39650 }, { "epoch": 260.92105263157896, "grad_norm": 1.5108665227890015, "learning_rate": 0.0001, "loss": 0.0171, "step": 39660 }, { "epoch": 260.9868421052632, "grad_norm": 1.4449321031570435, "learning_rate": 0.0001, "loss": 0.0188, "step": 39670 }, { "epoch": 261.05263157894734, "grad_norm": 1.6972180604934692, "learning_rate": 0.0001, "loss": 0.0171, "step": 39680 }, { "epoch": 261.11842105263156, "grad_norm": 2.1116812229156494, "learning_rate": 0.0001, "loss": 0.0219, "step": 39690 }, { "epoch": 261.1842105263158, "grad_norm": 2.142426013946533, "learning_rate": 0.0001, "loss": 0.0155, "step": 39700 }, { "epoch": 261.25, "grad_norm": 2.0831239223480225, "learning_rate": 0.0001, "loss": 0.0156, "step": 39710 }, { "epoch": 261.3157894736842, "grad_norm": 1.500062346458435, "learning_rate": 0.0001, "loss": 0.0166, "step": 39720 }, { "epoch": 261.38157894736844, "grad_norm": 1.598147988319397, "learning_rate": 0.0001, "loss": 0.0174, "step": 39730 }, { "epoch": 261.44736842105266, "grad_norm": 2.0930604934692383, "learning_rate": 0.0001, "loss": 0.0206, "step": 39740 }, { "epoch": 261.5131578947368, "grad_norm": 2.1327574253082275, "learning_rate": 0.0001, "loss": 0.0204, "step": 39750 }, { "epoch": 261.57894736842104, "grad_norm": 1.1232839822769165, "learning_rate": 0.0001, "loss": 0.0186, "step": 39760 }, { "epoch": 261.64473684210526, "grad_norm": 1.511362075805664, "learning_rate": 0.0001, "loss": 0.0173, "step": 39770 }, { "epoch": 261.7105263157895, "grad_norm": 1.7076692581176758, "learning_rate": 0.0001, "loss": 0.0179, "step": 39780 }, { "epoch": 261.7763157894737, "grad_norm": 1.676173448562622, "learning_rate": 0.0001, "loss": 0.0178, "step": 39790 }, { "epoch": 261.8421052631579, "grad_norm": 1.4135398864746094, "learning_rate": 0.0001, "loss": 0.0208, "step": 39800 }, { "epoch": 261.9078947368421, "grad_norm": 1.3834638595581055, "learning_rate": 0.0001, "loss": 0.0183, "step": 39810 }, { "epoch": 261.9736842105263, "grad_norm": 1.699268102645874, "learning_rate": 0.0001, "loss": 0.0236, "step": 39820 }, { "epoch": 262.0394736842105, "grad_norm": 1.5050941705703735, "learning_rate": 0.0001, "loss": 0.0216, "step": 39830 }, { "epoch": 262.10526315789474, "grad_norm": 1.348667025566101, "learning_rate": 0.0001, "loss": 0.0199, "step": 39840 }, { "epoch": 262.17105263157896, "grad_norm": 1.6351021528244019, "learning_rate": 0.0001, "loss": 0.0194, "step": 39850 }, { "epoch": 262.2368421052632, "grad_norm": 2.0290474891662598, "learning_rate": 0.0001, "loss": 0.0164, "step": 39860 }, { "epoch": 262.30263157894734, "grad_norm": 1.874064564704895, "learning_rate": 0.0001, "loss": 0.0162, "step": 39870 }, { "epoch": 262.36842105263156, "grad_norm": 1.3394337892532349, "learning_rate": 0.0001, "loss": 0.0172, "step": 39880 }, { "epoch": 262.4342105263158, "grad_norm": 1.2875337600708008, "learning_rate": 0.0001, "loss": 0.019, "step": 39890 }, { "epoch": 262.5, "grad_norm": 1.7091710567474365, "learning_rate": 0.0001, "loss": 0.0182, "step": 39900 }, { "epoch": 262.5657894736842, "grad_norm": 1.8082983493804932, "learning_rate": 0.0001, "loss": 0.0161, "step": 39910 }, { "epoch": 262.63157894736844, "grad_norm": 1.8591419458389282, "learning_rate": 0.0001, "loss": 0.0193, "step": 39920 }, { "epoch": 262.69736842105266, "grad_norm": 1.6899263858795166, "learning_rate": 0.0001, "loss": 0.0184, "step": 39930 }, { "epoch": 262.7631578947368, "grad_norm": 1.7574496269226074, "learning_rate": 0.0001, "loss": 0.0245, "step": 39940 }, { "epoch": 262.82894736842104, "grad_norm": 1.955613136291504, "learning_rate": 0.0001, "loss": 0.02, "step": 39950 }, { "epoch": 262.89473684210526, "grad_norm": 2.1217236518859863, "learning_rate": 0.0001, "loss": 0.0183, "step": 39960 }, { "epoch": 262.9605263157895, "grad_norm": 1.9723694324493408, "learning_rate": 0.0001, "loss": 0.0189, "step": 39970 }, { "epoch": 263.0263157894737, "grad_norm": 1.8849053382873535, "learning_rate": 0.0001, "loss": 0.0212, "step": 39980 }, { "epoch": 263.0921052631579, "grad_norm": 2.174670457839966, "learning_rate": 0.0001, "loss": 0.0192, "step": 39990 }, { "epoch": 263.1578947368421, "grad_norm": 2.446721315383911, "learning_rate": 0.0001, "loss": 0.0154, "step": 40000 }, { "epoch": 263.2236842105263, "grad_norm": 2.017679452896118, "learning_rate": 0.0001, "loss": 0.0151, "step": 40010 }, { "epoch": 263.2894736842105, "grad_norm": 2.0108237266540527, "learning_rate": 0.0001, "loss": 0.019, "step": 40020 }, { "epoch": 263.35526315789474, "grad_norm": 1.9936039447784424, "learning_rate": 0.0001, "loss": 0.0162, "step": 40030 }, { "epoch": 263.42105263157896, "grad_norm": 1.3560761213302612, "learning_rate": 0.0001, "loss": 0.0154, "step": 40040 }, { "epoch": 263.4868421052632, "grad_norm": 1.5309953689575195, "learning_rate": 0.0001, "loss": 0.0239, "step": 40050 }, { "epoch": 263.55263157894734, "grad_norm": 1.4847453832626343, "learning_rate": 0.0001, "loss": 0.0188, "step": 40060 }, { "epoch": 263.61842105263156, "grad_norm": 1.6265153884887695, "learning_rate": 0.0001, "loss": 0.0189, "step": 40070 }, { "epoch": 263.6842105263158, "grad_norm": 1.3924559354782104, "learning_rate": 0.0001, "loss": 0.0223, "step": 40080 }, { "epoch": 263.75, "grad_norm": 1.6881301403045654, "learning_rate": 0.0001, "loss": 0.0182, "step": 40090 }, { "epoch": 263.8157894736842, "grad_norm": 1.6361554861068726, "learning_rate": 0.0001, "loss": 0.0192, "step": 40100 }, { "epoch": 263.88157894736844, "grad_norm": 1.711307406425476, "learning_rate": 0.0001, "loss": 0.0199, "step": 40110 }, { "epoch": 263.94736842105266, "grad_norm": 1.5587164163589478, "learning_rate": 0.0001, "loss": 0.0196, "step": 40120 }, { "epoch": 264.0131578947368, "grad_norm": 1.428592324256897, "learning_rate": 0.0001, "loss": 0.0172, "step": 40130 }, { "epoch": 264.07894736842104, "grad_norm": 1.674706220626831, "learning_rate": 0.0001, "loss": 0.0198, "step": 40140 }, { "epoch": 264.14473684210526, "grad_norm": 1.840646505355835, "learning_rate": 0.0001, "loss": 0.0203, "step": 40150 }, { "epoch": 264.2105263157895, "grad_norm": 1.9254913330078125, "learning_rate": 0.0001, "loss": 0.0153, "step": 40160 }, { "epoch": 264.2763157894737, "grad_norm": 1.2348902225494385, "learning_rate": 0.0001, "loss": 0.0192, "step": 40170 }, { "epoch": 264.3421052631579, "grad_norm": 1.1720656156539917, "learning_rate": 0.0001, "loss": 0.017, "step": 40180 }, { "epoch": 264.4078947368421, "grad_norm": 1.3570659160614014, "learning_rate": 0.0001, "loss": 0.0199, "step": 40190 }, { "epoch": 264.4736842105263, "grad_norm": 1.3176871538162231, "learning_rate": 0.0001, "loss": 0.0172, "step": 40200 }, { "epoch": 264.5394736842105, "grad_norm": 1.3044230937957764, "learning_rate": 0.0001, "loss": 0.0193, "step": 40210 }, { "epoch": 264.60526315789474, "grad_norm": 1.8131052255630493, "learning_rate": 0.0001, "loss": 0.0179, "step": 40220 }, { "epoch": 264.67105263157896, "grad_norm": 1.6476603746414185, "learning_rate": 0.0001, "loss": 0.0216, "step": 40230 }, { "epoch": 264.7368421052632, "grad_norm": 1.629779577255249, "learning_rate": 0.0001, "loss": 0.0193, "step": 40240 }, { "epoch": 264.80263157894734, "grad_norm": 1.6341257095336914, "learning_rate": 0.0001, "loss": 0.024, "step": 40250 }, { "epoch": 264.86842105263156, "grad_norm": 1.7007060050964355, "learning_rate": 0.0001, "loss": 0.0188, "step": 40260 }, { "epoch": 264.9342105263158, "grad_norm": 1.7716270685195923, "learning_rate": 0.0001, "loss": 0.0194, "step": 40270 }, { "epoch": 265.0, "grad_norm": 1.6287013292312622, "learning_rate": 0.0001, "loss": 0.0184, "step": 40280 }, { "epoch": 265.0657894736842, "grad_norm": 2.1033124923706055, "learning_rate": 0.0001, "loss": 0.0207, "step": 40290 }, { "epoch": 265.13157894736844, "grad_norm": 2.1983604431152344, "learning_rate": 0.0001, "loss": 0.0182, "step": 40300 }, { "epoch": 265.19736842105266, "grad_norm": 1.9886090755462646, "learning_rate": 0.0001, "loss": 0.0194, "step": 40310 }, { "epoch": 265.2631578947368, "grad_norm": 2.09682297706604, "learning_rate": 0.0001, "loss": 0.0183, "step": 40320 }, { "epoch": 265.32894736842104, "grad_norm": 1.3365137577056885, "learning_rate": 0.0001, "loss": 0.0151, "step": 40330 }, { "epoch": 265.39473684210526, "grad_norm": 1.5782885551452637, "learning_rate": 0.0001, "loss": 0.019, "step": 40340 }, { "epoch": 265.4605263157895, "grad_norm": 1.7036433219909668, "learning_rate": 0.0001, "loss": 0.0189, "step": 40350 }, { "epoch": 265.5263157894737, "grad_norm": 2.279188394546509, "learning_rate": 0.0001, "loss": 0.0186, "step": 40360 }, { "epoch": 265.5921052631579, "grad_norm": 2.1793713569641113, "learning_rate": 0.0001, "loss": 0.0166, "step": 40370 }, { "epoch": 265.6578947368421, "grad_norm": 1.9996507167816162, "learning_rate": 0.0001, "loss": 0.0162, "step": 40380 }, { "epoch": 265.7236842105263, "grad_norm": 1.7126402854919434, "learning_rate": 0.0001, "loss": 0.0168, "step": 40390 }, { "epoch": 265.7894736842105, "grad_norm": 2.0961573123931885, "learning_rate": 0.0001, "loss": 0.0167, "step": 40400 }, { "epoch": 265.85526315789474, "grad_norm": 1.9421353340148926, "learning_rate": 0.0001, "loss": 0.0185, "step": 40410 }, { "epoch": 265.92105263157896, "grad_norm": 1.537495732307434, "learning_rate": 0.0001, "loss": 0.0236, "step": 40420 }, { "epoch": 265.9868421052632, "grad_norm": 1.6581580638885498, "learning_rate": 0.0001, "loss": 0.0241, "step": 40430 }, { "epoch": 266.05263157894734, "grad_norm": 1.596714973449707, "learning_rate": 0.0001, "loss": 0.0213, "step": 40440 }, { "epoch": 266.11842105263156, "grad_norm": 1.9232572317123413, "learning_rate": 0.0001, "loss": 0.0192, "step": 40450 }, { "epoch": 266.1842105263158, "grad_norm": 2.3757705688476562, "learning_rate": 0.0001, "loss": 0.0171, "step": 40460 }, { "epoch": 266.25, "grad_norm": 1.7801306247711182, "learning_rate": 0.0001, "loss": 0.0207, "step": 40470 }, { "epoch": 266.3157894736842, "grad_norm": 2.234375, "learning_rate": 0.0001, "loss": 0.0151, "step": 40480 }, { "epoch": 266.38157894736844, "grad_norm": 1.7447097301483154, "learning_rate": 0.0001, "loss": 0.0186, "step": 40490 }, { "epoch": 266.44736842105266, "grad_norm": 1.826393723487854, "learning_rate": 0.0001, "loss": 0.0153, "step": 40500 }, { "epoch": 266.5131578947368, "grad_norm": 1.7347716093063354, "learning_rate": 0.0001, "loss": 0.0163, "step": 40510 }, { "epoch": 266.57894736842104, "grad_norm": 1.8880516290664673, "learning_rate": 0.0001, "loss": 0.0169, "step": 40520 }, { "epoch": 266.64473684210526, "grad_norm": 1.515757441520691, "learning_rate": 0.0001, "loss": 0.0219, "step": 40530 }, { "epoch": 266.7105263157895, "grad_norm": 1.7228015661239624, "learning_rate": 0.0001, "loss": 0.0178, "step": 40540 }, { "epoch": 266.7763157894737, "grad_norm": 1.3889470100402832, "learning_rate": 0.0001, "loss": 0.017, "step": 40550 }, { "epoch": 266.8421052631579, "grad_norm": 1.4706288576126099, "learning_rate": 0.0001, "loss": 0.0191, "step": 40560 }, { "epoch": 266.9078947368421, "grad_norm": 1.234788179397583, "learning_rate": 0.0001, "loss": 0.0206, "step": 40570 }, { "epoch": 266.9736842105263, "grad_norm": 1.6449663639068604, "learning_rate": 0.0001, "loss": 0.0181, "step": 40580 }, { "epoch": 267.0394736842105, "grad_norm": 1.5770649909973145, "learning_rate": 0.0001, "loss": 0.0212, "step": 40590 }, { "epoch": 267.10526315789474, "grad_norm": 1.950232744216919, "learning_rate": 0.0001, "loss": 0.0168, "step": 40600 }, { "epoch": 267.17105263157896, "grad_norm": 1.7165342569351196, "learning_rate": 0.0001, "loss": 0.0151, "step": 40610 }, { "epoch": 267.2368421052632, "grad_norm": 1.7429927587509155, "learning_rate": 0.0001, "loss": 0.0198, "step": 40620 }, { "epoch": 267.30263157894734, "grad_norm": 1.821451187133789, "learning_rate": 0.0001, "loss": 0.0183, "step": 40630 }, { "epoch": 267.36842105263156, "grad_norm": 1.541358232498169, "learning_rate": 0.0001, "loss": 0.0212, "step": 40640 }, { "epoch": 267.4342105263158, "grad_norm": 1.6618016958236694, "learning_rate": 0.0001, "loss": 0.0195, "step": 40650 }, { "epoch": 267.5, "grad_norm": 1.7775176763534546, "learning_rate": 0.0001, "loss": 0.0204, "step": 40660 }, { "epoch": 267.5657894736842, "grad_norm": 1.8699449300765991, "learning_rate": 0.0001, "loss": 0.0174, "step": 40670 }, { "epoch": 267.63157894736844, "grad_norm": 1.9495797157287598, "learning_rate": 0.0001, "loss": 0.0186, "step": 40680 }, { "epoch": 267.69736842105266, "grad_norm": 1.943674921989441, "learning_rate": 0.0001, "loss": 0.0173, "step": 40690 }, { "epoch": 267.7631578947368, "grad_norm": 1.881866693496704, "learning_rate": 0.0001, "loss": 0.0188, "step": 40700 }, { "epoch": 267.82894736842104, "grad_norm": 1.6626840829849243, "learning_rate": 0.0001, "loss": 0.021, "step": 40710 }, { "epoch": 267.89473684210526, "grad_norm": 2.0168418884277344, "learning_rate": 0.0001, "loss": 0.0163, "step": 40720 }, { "epoch": 267.9605263157895, "grad_norm": 1.6018821001052856, "learning_rate": 0.0001, "loss": 0.021, "step": 40730 }, { "epoch": 268.0263157894737, "grad_norm": 1.69930100440979, "learning_rate": 0.0001, "loss": 0.0205, "step": 40740 }, { "epoch": 268.0921052631579, "grad_norm": 1.5911381244659424, "learning_rate": 0.0001, "loss": 0.0173, "step": 40750 }, { "epoch": 268.1578947368421, "grad_norm": 1.410374641418457, "learning_rate": 0.0001, "loss": 0.0191, "step": 40760 }, { "epoch": 268.2236842105263, "grad_norm": 1.1957463026046753, "learning_rate": 0.0001, "loss": 0.0203, "step": 40770 }, { "epoch": 268.2894736842105, "grad_norm": 1.5440082550048828, "learning_rate": 0.0001, "loss": 0.0179, "step": 40780 }, { "epoch": 268.35526315789474, "grad_norm": 1.4329774379730225, "learning_rate": 0.0001, "loss": 0.0187, "step": 40790 }, { "epoch": 268.42105263157896, "grad_norm": 1.4347599744796753, "learning_rate": 0.0001, "loss": 0.0185, "step": 40800 }, { "epoch": 268.4868421052632, "grad_norm": 1.7393370866775513, "learning_rate": 0.0001, "loss": 0.0203, "step": 40810 }, { "epoch": 268.55263157894734, "grad_norm": 1.4429786205291748, "learning_rate": 0.0001, "loss": 0.0183, "step": 40820 }, { "epoch": 268.61842105263156, "grad_norm": 1.6456552743911743, "learning_rate": 0.0001, "loss": 0.0163, "step": 40830 }, { "epoch": 268.6842105263158, "grad_norm": 2.0974817276000977, "learning_rate": 0.0001, "loss": 0.0222, "step": 40840 }, { "epoch": 268.75, "grad_norm": 1.5846291780471802, "learning_rate": 0.0001, "loss": 0.0211, "step": 40850 }, { "epoch": 268.8157894736842, "grad_norm": 1.2266579866409302, "learning_rate": 0.0001, "loss": 0.022, "step": 40860 }, { "epoch": 268.88157894736844, "grad_norm": 1.3804123401641846, "learning_rate": 0.0001, "loss": 0.0171, "step": 40870 }, { "epoch": 268.94736842105266, "grad_norm": 1.5524760484695435, "learning_rate": 0.0001, "loss": 0.0172, "step": 40880 }, { "epoch": 269.0131578947368, "grad_norm": 1.7440029382705688, "learning_rate": 0.0001, "loss": 0.0162, "step": 40890 }, { "epoch": 269.07894736842104, "grad_norm": 1.4769954681396484, "learning_rate": 0.0001, "loss": 0.0235, "step": 40900 }, { "epoch": 269.14473684210526, "grad_norm": 1.6985127925872803, "learning_rate": 0.0001, "loss": 0.0166, "step": 40910 }, { "epoch": 269.2105263157895, "grad_norm": 1.4418299198150635, "learning_rate": 0.0001, "loss": 0.0178, "step": 40920 }, { "epoch": 269.2763157894737, "grad_norm": 1.5119794607162476, "learning_rate": 0.0001, "loss": 0.0195, "step": 40930 }, { "epoch": 269.3421052631579, "grad_norm": 1.9186110496520996, "learning_rate": 0.0001, "loss": 0.0165, "step": 40940 }, { "epoch": 269.4078947368421, "grad_norm": 1.716105341911316, "learning_rate": 0.0001, "loss": 0.0188, "step": 40950 }, { "epoch": 269.4736842105263, "grad_norm": 2.0301809310913086, "learning_rate": 0.0001, "loss": 0.0167, "step": 40960 }, { "epoch": 269.5394736842105, "grad_norm": 1.503841519355774, "learning_rate": 0.0001, "loss": 0.0227, "step": 40970 }, { "epoch": 269.60526315789474, "grad_norm": 1.6427055597305298, "learning_rate": 0.0001, "loss": 0.0205, "step": 40980 }, { "epoch": 269.67105263157896, "grad_norm": 1.8199440240859985, "learning_rate": 0.0001, "loss": 0.0191, "step": 40990 }, { "epoch": 269.7368421052632, "grad_norm": 1.3589208126068115, "learning_rate": 0.0001, "loss": 0.0195, "step": 41000 }, { "epoch": 269.80263157894734, "grad_norm": 1.839385747909546, "learning_rate": 0.0001, "loss": 0.0168, "step": 41010 }, { "epoch": 269.86842105263156, "grad_norm": 1.3285698890686035, "learning_rate": 0.0001, "loss": 0.0179, "step": 41020 }, { "epoch": 269.9342105263158, "grad_norm": 1.4309489727020264, "learning_rate": 0.0001, "loss": 0.0193, "step": 41030 }, { "epoch": 270.0, "grad_norm": 1.3896076679229736, "learning_rate": 0.0001, "loss": 0.0175, "step": 41040 }, { "epoch": 270.0657894736842, "grad_norm": 1.2361701726913452, "learning_rate": 0.0001, "loss": 0.0205, "step": 41050 }, { "epoch": 270.13157894736844, "grad_norm": 1.8730806112289429, "learning_rate": 0.0001, "loss": 0.0173, "step": 41060 }, { "epoch": 270.19736842105266, "grad_norm": 1.8127845525741577, "learning_rate": 0.0001, "loss": 0.0223, "step": 41070 }, { "epoch": 270.2631578947368, "grad_norm": 1.4464410543441772, "learning_rate": 0.0001, "loss": 0.0172, "step": 41080 }, { "epoch": 270.32894736842104, "grad_norm": 1.8598712682724, "learning_rate": 0.0001, "loss": 0.0201, "step": 41090 }, { "epoch": 270.39473684210526, "grad_norm": 1.3607782125473022, "learning_rate": 0.0001, "loss": 0.021, "step": 41100 }, { "epoch": 270.4605263157895, "grad_norm": 1.326936960220337, "learning_rate": 0.0001, "loss": 0.0174, "step": 41110 }, { "epoch": 270.5263157894737, "grad_norm": 1.6399399042129517, "learning_rate": 0.0001, "loss": 0.0174, "step": 41120 }, { "epoch": 270.5921052631579, "grad_norm": 1.4695460796356201, "learning_rate": 0.0001, "loss": 0.0188, "step": 41130 }, { "epoch": 270.6578947368421, "grad_norm": 1.492980718612671, "learning_rate": 0.0001, "loss": 0.0195, "step": 41140 }, { "epoch": 270.7236842105263, "grad_norm": 1.3097330331802368, "learning_rate": 0.0001, "loss": 0.0203, "step": 41150 }, { "epoch": 270.7894736842105, "grad_norm": 1.6288543939590454, "learning_rate": 0.0001, "loss": 0.0199, "step": 41160 }, { "epoch": 270.85526315789474, "grad_norm": 1.6192128658294678, "learning_rate": 0.0001, "loss": 0.0191, "step": 41170 }, { "epoch": 270.92105263157896, "grad_norm": 1.3173986673355103, "learning_rate": 0.0001, "loss": 0.0188, "step": 41180 }, { "epoch": 270.9868421052632, "grad_norm": 1.3118349313735962, "learning_rate": 0.0001, "loss": 0.0166, "step": 41190 }, { "epoch": 271.05263157894734, "grad_norm": 1.5865185260772705, "learning_rate": 0.0001, "loss": 0.0189, "step": 41200 }, { "epoch": 271.11842105263156, "grad_norm": 1.3960522413253784, "learning_rate": 0.0001, "loss": 0.0226, "step": 41210 }, { "epoch": 271.1842105263158, "grad_norm": 1.5897351503372192, "learning_rate": 0.0001, "loss": 0.0197, "step": 41220 }, { "epoch": 271.25, "grad_norm": 1.694769263267517, "learning_rate": 0.0001, "loss": 0.0208, "step": 41230 }, { "epoch": 271.3157894736842, "grad_norm": 1.566058874130249, "learning_rate": 0.0001, "loss": 0.0197, "step": 41240 }, { "epoch": 271.38157894736844, "grad_norm": 1.272325873374939, "learning_rate": 0.0001, "loss": 0.0176, "step": 41250 }, { "epoch": 271.44736842105266, "grad_norm": 1.6997090578079224, "learning_rate": 0.0001, "loss": 0.0183, "step": 41260 }, { "epoch": 271.5131578947368, "grad_norm": 2.0227890014648438, "learning_rate": 0.0001, "loss": 0.0157, "step": 41270 }, { "epoch": 271.57894736842104, "grad_norm": 1.7674880027770996, "learning_rate": 0.0001, "loss": 0.0189, "step": 41280 }, { "epoch": 271.64473684210526, "grad_norm": 2.0618176460266113, "learning_rate": 0.0001, "loss": 0.0165, "step": 41290 }, { "epoch": 271.7105263157895, "grad_norm": 1.812538981437683, "learning_rate": 0.0001, "loss": 0.02, "step": 41300 }, { "epoch": 271.7763157894737, "grad_norm": 1.3584734201431274, "learning_rate": 0.0001, "loss": 0.0248, "step": 41310 }, { "epoch": 271.8421052631579, "grad_norm": 1.533150553703308, "learning_rate": 0.0001, "loss": 0.0235, "step": 41320 }, { "epoch": 271.9078947368421, "grad_norm": 2.111863613128662, "learning_rate": 0.0001, "loss": 0.0186, "step": 41330 }, { "epoch": 271.9736842105263, "grad_norm": 1.773369550704956, "learning_rate": 0.0001, "loss": 0.0174, "step": 41340 }, { "epoch": 272.0394736842105, "grad_norm": 1.4165172576904297, "learning_rate": 0.0001, "loss": 0.0168, "step": 41350 }, { "epoch": 272.10526315789474, "grad_norm": 2.1870625019073486, "learning_rate": 0.0001, "loss": 0.0183, "step": 41360 }, { "epoch": 272.17105263157896, "grad_norm": 1.8010666370391846, "learning_rate": 0.0001, "loss": 0.0208, "step": 41370 }, { "epoch": 272.2368421052632, "grad_norm": 2.0138986110687256, "learning_rate": 0.0001, "loss": 0.0178, "step": 41380 }, { "epoch": 272.30263157894734, "grad_norm": 1.3216615915298462, "learning_rate": 0.0001, "loss": 0.0173, "step": 41390 }, { "epoch": 272.36842105263156, "grad_norm": 1.4295320510864258, "learning_rate": 0.0001, "loss": 0.018, "step": 41400 }, { "epoch": 272.4342105263158, "grad_norm": 1.4381927251815796, "learning_rate": 0.0001, "loss": 0.021, "step": 41410 }, { "epoch": 272.5, "grad_norm": 1.471834659576416, "learning_rate": 0.0001, "loss": 0.0189, "step": 41420 }, { "epoch": 272.5657894736842, "grad_norm": 1.7069851160049438, "learning_rate": 0.0001, "loss": 0.0172, "step": 41430 }, { "epoch": 272.63157894736844, "grad_norm": 1.603374719619751, "learning_rate": 0.0001, "loss": 0.0196, "step": 41440 }, { "epoch": 272.69736842105266, "grad_norm": 1.5398693084716797, "learning_rate": 0.0001, "loss": 0.0174, "step": 41450 }, { "epoch": 272.7631578947368, "grad_norm": 2.164276361465454, "learning_rate": 0.0001, "loss": 0.0196, "step": 41460 }, { "epoch": 272.82894736842104, "grad_norm": 1.5172971487045288, "learning_rate": 0.0001, "loss": 0.0163, "step": 41470 }, { "epoch": 272.89473684210526, "grad_norm": 1.5797982215881348, "learning_rate": 0.0001, "loss": 0.0228, "step": 41480 }, { "epoch": 272.9605263157895, "grad_norm": 1.5426971912384033, "learning_rate": 0.0001, "loss": 0.0196, "step": 41490 }, { "epoch": 273.0263157894737, "grad_norm": 1.7475494146347046, "learning_rate": 0.0001, "loss": 0.0205, "step": 41500 }, { "epoch": 273.0921052631579, "grad_norm": 1.5791484117507935, "learning_rate": 0.0001, "loss": 0.0248, "step": 41510 }, { "epoch": 273.1578947368421, "grad_norm": 1.6863030195236206, "learning_rate": 0.0001, "loss": 0.019, "step": 41520 }, { "epoch": 273.2236842105263, "grad_norm": 1.7419484853744507, "learning_rate": 0.0001, "loss": 0.0174, "step": 41530 }, { "epoch": 273.2894736842105, "grad_norm": 1.655527114868164, "learning_rate": 0.0001, "loss": 0.0173, "step": 41540 }, { "epoch": 273.35526315789474, "grad_norm": 1.4512851238250732, "learning_rate": 0.0001, "loss": 0.0192, "step": 41550 }, { "epoch": 273.42105263157896, "grad_norm": 1.5540374517440796, "learning_rate": 0.0001, "loss": 0.0204, "step": 41560 }, { "epoch": 273.4868421052632, "grad_norm": 1.2474039793014526, "learning_rate": 0.0001, "loss": 0.017, "step": 41570 }, { "epoch": 273.55263157894734, "grad_norm": 1.6988989114761353, "learning_rate": 0.0001, "loss": 0.017, "step": 41580 }, { "epoch": 273.61842105263156, "grad_norm": 1.5387680530548096, "learning_rate": 0.0001, "loss": 0.0189, "step": 41590 }, { "epoch": 273.6842105263158, "grad_norm": 1.7068233489990234, "learning_rate": 0.0001, "loss": 0.0179, "step": 41600 }, { "epoch": 273.75, "grad_norm": 1.6309770345687866, "learning_rate": 0.0001, "loss": 0.0171, "step": 41610 }, { "epoch": 273.8157894736842, "grad_norm": 1.5186302661895752, "learning_rate": 0.0001, "loss": 0.0192, "step": 41620 }, { "epoch": 273.88157894736844, "grad_norm": 1.5719717741012573, "learning_rate": 0.0001, "loss": 0.0186, "step": 41630 }, { "epoch": 273.94736842105266, "grad_norm": 1.6872901916503906, "learning_rate": 0.0001, "loss": 0.0199, "step": 41640 }, { "epoch": 274.0131578947368, "grad_norm": 1.8771411180496216, "learning_rate": 0.0001, "loss": 0.0197, "step": 41650 }, { "epoch": 274.07894736842104, "grad_norm": 1.4116754531860352, "learning_rate": 0.0001, "loss": 0.0169, "step": 41660 }, { "epoch": 274.14473684210526, "grad_norm": 1.513335108757019, "learning_rate": 0.0001, "loss": 0.0204, "step": 41670 }, { "epoch": 274.2105263157895, "grad_norm": 2.0482001304626465, "learning_rate": 0.0001, "loss": 0.0203, "step": 41680 }, { "epoch": 274.2763157894737, "grad_norm": 1.4509028196334839, "learning_rate": 0.0001, "loss": 0.0199, "step": 41690 }, { "epoch": 274.3421052631579, "grad_norm": 1.8972917795181274, "learning_rate": 0.0001, "loss": 0.0165, "step": 41700 }, { "epoch": 274.4078947368421, "grad_norm": 2.099287509918213, "learning_rate": 0.0001, "loss": 0.0169, "step": 41710 }, { "epoch": 274.4736842105263, "grad_norm": 1.4306877851486206, "learning_rate": 0.0001, "loss": 0.0185, "step": 41720 }, { "epoch": 274.5394736842105, "grad_norm": 1.5213897228240967, "learning_rate": 0.0001, "loss": 0.0185, "step": 41730 }, { "epoch": 274.60526315789474, "grad_norm": 1.7782145738601685, "learning_rate": 0.0001, "loss": 0.0214, "step": 41740 }, { "epoch": 274.67105263157896, "grad_norm": 1.2567702531814575, "learning_rate": 0.0001, "loss": 0.0201, "step": 41750 }, { "epoch": 274.7368421052632, "grad_norm": 1.7306725978851318, "learning_rate": 0.0001, "loss": 0.0173, "step": 41760 }, { "epoch": 274.80263157894734, "grad_norm": 1.3863604068756104, "learning_rate": 0.0001, "loss": 0.0175, "step": 41770 }, { "epoch": 274.86842105263156, "grad_norm": 1.4189739227294922, "learning_rate": 0.0001, "loss": 0.0169, "step": 41780 }, { "epoch": 274.9342105263158, "grad_norm": 2.0104947090148926, "learning_rate": 0.0001, "loss": 0.0208, "step": 41790 }, { "epoch": 275.0, "grad_norm": 1.8437671661376953, "learning_rate": 0.0001, "loss": 0.0179, "step": 41800 }, { "epoch": 275.0657894736842, "grad_norm": 1.338564395904541, "learning_rate": 0.0001, "loss": 0.0175, "step": 41810 }, { "epoch": 275.13157894736844, "grad_norm": 1.8190362453460693, "learning_rate": 0.0001, "loss": 0.0222, "step": 41820 }, { "epoch": 275.19736842105266, "grad_norm": 1.7742242813110352, "learning_rate": 0.0001, "loss": 0.0172, "step": 41830 }, { "epoch": 275.2631578947368, "grad_norm": 1.405239224433899, "learning_rate": 0.0001, "loss": 0.0205, "step": 41840 }, { "epoch": 275.32894736842104, "grad_norm": 1.4163011312484741, "learning_rate": 0.0001, "loss": 0.0192, "step": 41850 }, { "epoch": 275.39473684210526, "grad_norm": 1.3622487783432007, "learning_rate": 0.0001, "loss": 0.0198, "step": 41860 }, { "epoch": 275.4605263157895, "grad_norm": 1.3831439018249512, "learning_rate": 0.0001, "loss": 0.017, "step": 41870 }, { "epoch": 275.5263157894737, "grad_norm": 1.7238457202911377, "learning_rate": 0.0001, "loss": 0.0223, "step": 41880 }, { "epoch": 275.5921052631579, "grad_norm": 1.4246355295181274, "learning_rate": 0.0001, "loss": 0.0188, "step": 41890 }, { "epoch": 275.6578947368421, "grad_norm": 1.4674181938171387, "learning_rate": 0.0001, "loss": 0.0201, "step": 41900 }, { "epoch": 275.7236842105263, "grad_norm": 1.6052546501159668, "learning_rate": 0.0001, "loss": 0.0192, "step": 41910 }, { "epoch": 275.7894736842105, "grad_norm": 2.0202929973602295, "learning_rate": 0.0001, "loss": 0.0186, "step": 41920 }, { "epoch": 275.85526315789474, "grad_norm": 1.7910407781600952, "learning_rate": 0.0001, "loss": 0.0191, "step": 41930 }, { "epoch": 275.92105263157896, "grad_norm": 1.758991003036499, "learning_rate": 0.0001, "loss": 0.0192, "step": 41940 }, { "epoch": 275.9868421052632, "grad_norm": 1.4082955121994019, "learning_rate": 0.0001, "loss": 0.016, "step": 41950 }, { "epoch": 276.05263157894734, "grad_norm": 1.6121453046798706, "learning_rate": 0.0001, "loss": 0.0163, "step": 41960 }, { "epoch": 276.11842105263156, "grad_norm": 1.6925808191299438, "learning_rate": 0.0001, "loss": 0.0223, "step": 41970 }, { "epoch": 276.1842105263158, "grad_norm": 1.3445415496826172, "learning_rate": 0.0001, "loss": 0.0204, "step": 41980 }, { "epoch": 276.25, "grad_norm": 1.1795084476470947, "learning_rate": 0.0001, "loss": 0.0174, "step": 41990 }, { "epoch": 276.3157894736842, "grad_norm": 1.4663439989089966, "learning_rate": 0.0001, "loss": 0.0171, "step": 42000 }, { "epoch": 276.38157894736844, "grad_norm": 1.524259328842163, "learning_rate": 0.0001, "loss": 0.0186, "step": 42010 }, { "epoch": 276.44736842105266, "grad_norm": 1.410319209098816, "learning_rate": 0.0001, "loss": 0.0209, "step": 42020 }, { "epoch": 276.5131578947368, "grad_norm": 1.7675000429153442, "learning_rate": 0.0001, "loss": 0.0186, "step": 42030 }, { "epoch": 276.57894736842104, "grad_norm": 1.8452072143554688, "learning_rate": 0.0001, "loss": 0.0226, "step": 42040 }, { "epoch": 276.64473684210526, "grad_norm": 1.7556827068328857, "learning_rate": 0.0001, "loss": 0.0197, "step": 42050 }, { "epoch": 276.7105263157895, "grad_norm": 1.5641738176345825, "learning_rate": 0.0001, "loss": 0.0181, "step": 42060 }, { "epoch": 276.7763157894737, "grad_norm": 1.5799802541732788, "learning_rate": 0.0001, "loss": 0.0174, "step": 42070 }, { "epoch": 276.8421052631579, "grad_norm": 1.681440830230713, "learning_rate": 0.0001, "loss": 0.017, "step": 42080 }, { "epoch": 276.9078947368421, "grad_norm": 1.4259965419769287, "learning_rate": 0.0001, "loss": 0.0147, "step": 42090 }, { "epoch": 276.9736842105263, "grad_norm": 1.3714890480041504, "learning_rate": 0.0001, "loss": 0.016, "step": 42100 }, { "epoch": 277.0394736842105, "grad_norm": 1.2214322090148926, "learning_rate": 0.0001, "loss": 0.0178, "step": 42110 }, { "epoch": 277.10526315789474, "grad_norm": 1.5009976625442505, "learning_rate": 0.0001, "loss": 0.0213, "step": 42120 }, { "epoch": 277.17105263157896, "grad_norm": 1.2570672035217285, "learning_rate": 0.0001, "loss": 0.0182, "step": 42130 }, { "epoch": 277.2368421052632, "grad_norm": 1.405369520187378, "learning_rate": 0.0001, "loss": 0.0159, "step": 42140 }, { "epoch": 277.30263157894734, "grad_norm": 1.2938100099563599, "learning_rate": 0.0001, "loss": 0.0175, "step": 42150 }, { "epoch": 277.36842105263156, "grad_norm": 1.991845726966858, "learning_rate": 0.0001, "loss": 0.0206, "step": 42160 }, { "epoch": 277.4342105263158, "grad_norm": 1.4616566896438599, "learning_rate": 0.0001, "loss": 0.0191, "step": 42170 }, { "epoch": 277.5, "grad_norm": 1.5656402111053467, "learning_rate": 0.0001, "loss": 0.0199, "step": 42180 }, { "epoch": 277.5657894736842, "grad_norm": 1.735464334487915, "learning_rate": 0.0001, "loss": 0.0176, "step": 42190 }, { "epoch": 277.63157894736844, "grad_norm": 1.7582405805587769, "learning_rate": 0.0001, "loss": 0.0203, "step": 42200 }, { "epoch": 277.69736842105266, "grad_norm": 1.7111985683441162, "learning_rate": 0.0001, "loss": 0.0182, "step": 42210 }, { "epoch": 277.7631578947368, "grad_norm": 1.5147396326065063, "learning_rate": 0.0001, "loss": 0.0179, "step": 42220 }, { "epoch": 277.82894736842104, "grad_norm": 1.5136624574661255, "learning_rate": 0.0001, "loss": 0.0156, "step": 42230 }, { "epoch": 277.89473684210526, "grad_norm": 1.3603137731552124, "learning_rate": 0.0001, "loss": 0.0188, "step": 42240 }, { "epoch": 277.9605263157895, "grad_norm": 1.8029460906982422, "learning_rate": 0.0001, "loss": 0.0192, "step": 42250 }, { "epoch": 278.0263157894737, "grad_norm": 1.6718553304672241, "learning_rate": 0.0001, "loss": 0.0188, "step": 42260 }, { "epoch": 278.0921052631579, "grad_norm": 1.8075721263885498, "learning_rate": 0.0001, "loss": 0.0171, "step": 42270 }, { "epoch": 278.1578947368421, "grad_norm": 1.2390210628509521, "learning_rate": 0.0001, "loss": 0.0185, "step": 42280 }, { "epoch": 278.2236842105263, "grad_norm": 1.3617089986801147, "learning_rate": 0.0001, "loss": 0.0184, "step": 42290 }, { "epoch": 278.2894736842105, "grad_norm": 1.267888069152832, "learning_rate": 0.0001, "loss": 0.0183, "step": 42300 }, { "epoch": 278.35526315789474, "grad_norm": 1.4611672163009644, "learning_rate": 0.0001, "loss": 0.019, "step": 42310 }, { "epoch": 278.42105263157896, "grad_norm": 1.6079182624816895, "learning_rate": 0.0001, "loss": 0.0202, "step": 42320 }, { "epoch": 278.4868421052632, "grad_norm": 1.2225672006607056, "learning_rate": 0.0001, "loss": 0.0232, "step": 42330 }, { "epoch": 278.55263157894734, "grad_norm": 1.4954742193222046, "learning_rate": 0.0001, "loss": 0.0193, "step": 42340 }, { "epoch": 278.61842105263156, "grad_norm": 1.5277025699615479, "learning_rate": 0.0001, "loss": 0.0218, "step": 42350 }, { "epoch": 278.6842105263158, "grad_norm": 1.2259424924850464, "learning_rate": 0.0001, "loss": 0.0192, "step": 42360 }, { "epoch": 278.75, "grad_norm": 1.676186203956604, "learning_rate": 0.0001, "loss": 0.0174, "step": 42370 }, { "epoch": 278.8157894736842, "grad_norm": 1.5923320055007935, "learning_rate": 0.0001, "loss": 0.0155, "step": 42380 }, { "epoch": 278.88157894736844, "grad_norm": 1.4335755109786987, "learning_rate": 0.0001, "loss": 0.0224, "step": 42390 }, { "epoch": 278.94736842105266, "grad_norm": 1.7243903875350952, "learning_rate": 0.0001, "loss": 0.0169, "step": 42400 }, { "epoch": 279.0131578947368, "grad_norm": 1.4780890941619873, "learning_rate": 0.0001, "loss": 0.0201, "step": 42410 }, { "epoch": 279.07894736842104, "grad_norm": 1.8659617900848389, "learning_rate": 0.0001, "loss": 0.019, "step": 42420 }, { "epoch": 279.14473684210526, "grad_norm": 1.9515522718429565, "learning_rate": 0.0001, "loss": 0.0198, "step": 42430 }, { "epoch": 279.2105263157895, "grad_norm": 1.427000641822815, "learning_rate": 0.0001, "loss": 0.0164, "step": 42440 }, { "epoch": 279.2763157894737, "grad_norm": 1.023603081703186, "learning_rate": 0.0001, "loss": 0.0194, "step": 42450 }, { "epoch": 279.3421052631579, "grad_norm": 1.6585417985916138, "learning_rate": 0.0001, "loss": 0.0167, "step": 42460 }, { "epoch": 279.4078947368421, "grad_norm": 1.5060981512069702, "learning_rate": 0.0001, "loss": 0.0203, "step": 42470 }, { "epoch": 279.4736842105263, "grad_norm": 1.6499946117401123, "learning_rate": 0.0001, "loss": 0.0172, "step": 42480 }, { "epoch": 279.5394736842105, "grad_norm": 1.3849895000457764, "learning_rate": 0.0001, "loss": 0.0194, "step": 42490 }, { "epoch": 279.60526315789474, "grad_norm": 1.2116847038269043, "learning_rate": 0.0001, "loss": 0.017, "step": 42500 }, { "epoch": 279.67105263157896, "grad_norm": 1.6651846170425415, "learning_rate": 0.0001, "loss": 0.0195, "step": 42510 }, { "epoch": 279.7368421052632, "grad_norm": 1.468172550201416, "learning_rate": 0.0001, "loss": 0.021, "step": 42520 }, { "epoch": 279.80263157894734, "grad_norm": 1.6992188692092896, "learning_rate": 0.0001, "loss": 0.0185, "step": 42530 }, { "epoch": 279.86842105263156, "grad_norm": 1.3496242761611938, "learning_rate": 0.0001, "loss": 0.0215, "step": 42540 }, { "epoch": 279.9342105263158, "grad_norm": 1.7941639423370361, "learning_rate": 0.0001, "loss": 0.0184, "step": 42550 }, { "epoch": 280.0, "grad_norm": 1.2904306650161743, "learning_rate": 0.0001, "loss": 0.0181, "step": 42560 }, { "epoch": 280.0657894736842, "grad_norm": 1.6431320905685425, "learning_rate": 0.0001, "loss": 0.0184, "step": 42570 }, { "epoch": 280.13157894736844, "grad_norm": 1.4253360033035278, "learning_rate": 0.0001, "loss": 0.0175, "step": 42580 }, { "epoch": 280.19736842105266, "grad_norm": 1.4984064102172852, "learning_rate": 0.0001, "loss": 0.0157, "step": 42590 }, { "epoch": 280.2631578947368, "grad_norm": 1.4248595237731934, "learning_rate": 0.0001, "loss": 0.0169, "step": 42600 }, { "epoch": 280.32894736842104, "grad_norm": 1.793579339981079, "learning_rate": 0.0001, "loss": 0.0206, "step": 42610 }, { "epoch": 280.39473684210526, "grad_norm": 1.4985123872756958, "learning_rate": 0.0001, "loss": 0.0168, "step": 42620 }, { "epoch": 280.4605263157895, "grad_norm": 1.815198302268982, "learning_rate": 0.0001, "loss": 0.0196, "step": 42630 }, { "epoch": 280.5263157894737, "grad_norm": 1.9252785444259644, "learning_rate": 0.0001, "loss": 0.0193, "step": 42640 }, { "epoch": 280.5921052631579, "grad_norm": 1.3112238645553589, "learning_rate": 0.0001, "loss": 0.0174, "step": 42650 }, { "epoch": 280.6578947368421, "grad_norm": 1.4443397521972656, "learning_rate": 0.0001, "loss": 0.0181, "step": 42660 }, { "epoch": 280.7236842105263, "grad_norm": 1.9062793254852295, "learning_rate": 0.0001, "loss": 0.0183, "step": 42670 }, { "epoch": 280.7894736842105, "grad_norm": 1.3313509225845337, "learning_rate": 0.0001, "loss": 0.0199, "step": 42680 }, { "epoch": 280.85526315789474, "grad_norm": 1.7564022541046143, "learning_rate": 0.0001, "loss": 0.0177, "step": 42690 }, { "epoch": 280.92105263157896, "grad_norm": 1.5908268690109253, "learning_rate": 0.0001, "loss": 0.0169, "step": 42700 }, { "epoch": 280.9868421052632, "grad_norm": 1.562179684638977, "learning_rate": 0.0001, "loss": 0.0238, "step": 42710 }, { "epoch": 281.05263157894734, "grad_norm": 1.723075032234192, "learning_rate": 0.0001, "loss": 0.0167, "step": 42720 }, { "epoch": 281.11842105263156, "grad_norm": 1.468133568763733, "learning_rate": 0.0001, "loss": 0.0153, "step": 42730 }, { "epoch": 281.1842105263158, "grad_norm": 1.5075863599777222, "learning_rate": 0.0001, "loss": 0.0174, "step": 42740 }, { "epoch": 281.25, "grad_norm": 1.6657123565673828, "learning_rate": 0.0001, "loss": 0.0197, "step": 42750 }, { "epoch": 281.3157894736842, "grad_norm": 1.198246717453003, "learning_rate": 0.0001, "loss": 0.0208, "step": 42760 }, { "epoch": 281.38157894736844, "grad_norm": 1.4929087162017822, "learning_rate": 0.0001, "loss": 0.0184, "step": 42770 }, { "epoch": 281.44736842105266, "grad_norm": 1.057673454284668, "learning_rate": 0.0001, "loss": 0.0183, "step": 42780 }, { "epoch": 281.5131578947368, "grad_norm": 1.5509480237960815, "learning_rate": 0.0001, "loss": 0.0231, "step": 42790 }, { "epoch": 281.57894736842104, "grad_norm": 1.8988211154937744, "learning_rate": 0.0001, "loss": 0.0179, "step": 42800 }, { "epoch": 281.64473684210526, "grad_norm": 1.7140709161758423, "learning_rate": 0.0001, "loss": 0.0198, "step": 42810 }, { "epoch": 281.7105263157895, "grad_norm": 1.5632435083389282, "learning_rate": 0.0001, "loss": 0.0185, "step": 42820 }, { "epoch": 281.7763157894737, "grad_norm": 2.2306594848632812, "learning_rate": 0.0001, "loss": 0.0164, "step": 42830 }, { "epoch": 281.8421052631579, "grad_norm": 1.6903070211410522, "learning_rate": 0.0001, "loss": 0.0212, "step": 42840 }, { "epoch": 281.9078947368421, "grad_norm": 2.0466508865356445, "learning_rate": 0.0001, "loss": 0.0167, "step": 42850 }, { "epoch": 281.9736842105263, "grad_norm": 1.766066312789917, "learning_rate": 0.0001, "loss": 0.0176, "step": 42860 }, { "epoch": 282.0394736842105, "grad_norm": 1.7298195362091064, "learning_rate": 0.0001, "loss": 0.0228, "step": 42870 }, { "epoch": 282.10526315789474, "grad_norm": 1.642188549041748, "learning_rate": 0.0001, "loss": 0.0193, "step": 42880 }, { "epoch": 282.17105263157896, "grad_norm": 1.3465498685836792, "learning_rate": 0.0001, "loss": 0.017, "step": 42890 }, { "epoch": 282.2368421052632, "grad_norm": 2.0446741580963135, "learning_rate": 0.0001, "loss": 0.019, "step": 42900 }, { "epoch": 282.30263157894734, "grad_norm": 1.3988940715789795, "learning_rate": 0.0001, "loss": 0.0189, "step": 42910 }, { "epoch": 282.36842105263156, "grad_norm": 1.6634869575500488, "learning_rate": 0.0001, "loss": 0.0176, "step": 42920 }, { "epoch": 282.4342105263158, "grad_norm": 1.6209821701049805, "learning_rate": 0.0001, "loss": 0.017, "step": 42930 }, { "epoch": 282.5, "grad_norm": 1.6040728092193604, "learning_rate": 0.0001, "loss": 0.017, "step": 42940 }, { "epoch": 282.5657894736842, "grad_norm": 1.5379741191864014, "learning_rate": 0.0001, "loss": 0.0156, "step": 42950 }, { "epoch": 282.63157894736844, "grad_norm": 1.6668469905853271, "learning_rate": 0.0001, "loss": 0.0185, "step": 42960 }, { "epoch": 282.69736842105266, "grad_norm": 1.523592472076416, "learning_rate": 0.0001, "loss": 0.0182, "step": 42970 }, { "epoch": 282.7631578947368, "grad_norm": 2.1597580909729004, "learning_rate": 0.0001, "loss": 0.0176, "step": 42980 }, { "epoch": 282.82894736842104, "grad_norm": 2.126228094100952, "learning_rate": 0.0001, "loss": 0.0168, "step": 42990 }, { "epoch": 282.89473684210526, "grad_norm": 2.0172858238220215, "learning_rate": 0.0001, "loss": 0.0188, "step": 43000 }, { "epoch": 282.9605263157895, "grad_norm": 1.922538161277771, "learning_rate": 0.0001, "loss": 0.0173, "step": 43010 }, { "epoch": 283.0263157894737, "grad_norm": 1.358132004737854, "learning_rate": 0.0001, "loss": 0.0178, "step": 43020 }, { "epoch": 283.0921052631579, "grad_norm": 2.043970823287964, "learning_rate": 0.0001, "loss": 0.0201, "step": 43030 }, { "epoch": 283.1578947368421, "grad_norm": 1.6649959087371826, "learning_rate": 0.0001, "loss": 0.0152, "step": 43040 }, { "epoch": 283.2236842105263, "grad_norm": 1.644811987876892, "learning_rate": 0.0001, "loss": 0.0171, "step": 43050 }, { "epoch": 283.2894736842105, "grad_norm": 1.5470813512802124, "learning_rate": 0.0001, "loss": 0.0182, "step": 43060 }, { "epoch": 283.35526315789474, "grad_norm": 1.3669248819351196, "learning_rate": 0.0001, "loss": 0.0175, "step": 43070 }, { "epoch": 283.42105263157896, "grad_norm": 1.2725013494491577, "learning_rate": 0.0001, "loss": 0.0165, "step": 43080 }, { "epoch": 283.4868421052632, "grad_norm": 1.4757492542266846, "learning_rate": 0.0001, "loss": 0.0197, "step": 43090 }, { "epoch": 283.55263157894734, "grad_norm": 2.3006818294525146, "learning_rate": 0.0001, "loss": 0.0173, "step": 43100 }, { "epoch": 283.61842105263156, "grad_norm": 1.5249394178390503, "learning_rate": 0.0001, "loss": 0.0201, "step": 43110 }, { "epoch": 283.6842105263158, "grad_norm": 1.660033941268921, "learning_rate": 0.0001, "loss": 0.0163, "step": 43120 }, { "epoch": 283.75, "grad_norm": 1.8573060035705566, "learning_rate": 0.0001, "loss": 0.0191, "step": 43130 }, { "epoch": 283.8157894736842, "grad_norm": 1.7693145275115967, "learning_rate": 0.0001, "loss": 0.015, "step": 43140 }, { "epoch": 283.88157894736844, "grad_norm": 1.4813470840454102, "learning_rate": 0.0001, "loss": 0.0206, "step": 43150 }, { "epoch": 283.94736842105266, "grad_norm": 1.5606073141098022, "learning_rate": 0.0001, "loss": 0.0202, "step": 43160 }, { "epoch": 284.0131578947368, "grad_norm": 1.5274051427841187, "learning_rate": 0.0001, "loss": 0.0162, "step": 43170 }, { "epoch": 284.07894736842104, "grad_norm": 1.828650712966919, "learning_rate": 0.0001, "loss": 0.0168, "step": 43180 }, { "epoch": 284.14473684210526, "grad_norm": 1.4957854747772217, "learning_rate": 0.0001, "loss": 0.0198, "step": 43190 }, { "epoch": 284.2105263157895, "grad_norm": 1.1267924308776855, "learning_rate": 0.0001, "loss": 0.02, "step": 43200 }, { "epoch": 284.2763157894737, "grad_norm": 1.5448977947235107, "learning_rate": 0.0001, "loss": 0.0213, "step": 43210 }, { "epoch": 284.3421052631579, "grad_norm": 1.404921054840088, "learning_rate": 0.0001, "loss": 0.016, "step": 43220 }, { "epoch": 284.4078947368421, "grad_norm": 1.3927404880523682, "learning_rate": 0.0001, "loss": 0.0177, "step": 43230 }, { "epoch": 284.4736842105263, "grad_norm": 1.6406526565551758, "learning_rate": 0.0001, "loss": 0.0192, "step": 43240 }, { "epoch": 284.5394736842105, "grad_norm": 1.327146291732788, "learning_rate": 0.0001, "loss": 0.0196, "step": 43250 }, { "epoch": 284.60526315789474, "grad_norm": 1.2856874465942383, "learning_rate": 0.0001, "loss": 0.0205, "step": 43260 }, { "epoch": 284.67105263157896, "grad_norm": 1.7168580293655396, "learning_rate": 0.0001, "loss": 0.0179, "step": 43270 }, { "epoch": 284.7368421052632, "grad_norm": 1.64984929561615, "learning_rate": 0.0001, "loss": 0.0175, "step": 43280 }, { "epoch": 284.80263157894734, "grad_norm": 1.6568399667739868, "learning_rate": 0.0001, "loss": 0.0187, "step": 43290 }, { "epoch": 284.86842105263156, "grad_norm": 1.4071943759918213, "learning_rate": 0.0001, "loss": 0.0174, "step": 43300 }, { "epoch": 284.9342105263158, "grad_norm": 1.4118492603302002, "learning_rate": 0.0001, "loss": 0.0171, "step": 43310 }, { "epoch": 285.0, "grad_norm": 1.409035325050354, "learning_rate": 0.0001, "loss": 0.0161, "step": 43320 }, { "epoch": 285.0657894736842, "grad_norm": 1.6958695650100708, "learning_rate": 0.0001, "loss": 0.0204, "step": 43330 }, { "epoch": 285.13157894736844, "grad_norm": 1.8841313123703003, "learning_rate": 0.0001, "loss": 0.0203, "step": 43340 }, { "epoch": 285.19736842105266, "grad_norm": 1.9563605785369873, "learning_rate": 0.0001, "loss": 0.0176, "step": 43350 }, { "epoch": 285.2631578947368, "grad_norm": 2.023171901702881, "learning_rate": 0.0001, "loss": 0.0169, "step": 43360 }, { "epoch": 285.32894736842104, "grad_norm": 1.689024567604065, "learning_rate": 0.0001, "loss": 0.0148, "step": 43370 }, { "epoch": 285.39473684210526, "grad_norm": 1.550378441810608, "learning_rate": 0.0001, "loss": 0.0194, "step": 43380 }, { "epoch": 285.4605263157895, "grad_norm": 2.0232954025268555, "learning_rate": 0.0001, "loss": 0.0193, "step": 43390 }, { "epoch": 285.5263157894737, "grad_norm": 1.5384697914123535, "learning_rate": 0.0001, "loss": 0.0178, "step": 43400 }, { "epoch": 285.5921052631579, "grad_norm": 1.6617937088012695, "learning_rate": 0.0001, "loss": 0.0143, "step": 43410 }, { "epoch": 285.6578947368421, "grad_norm": 2.317622423171997, "learning_rate": 0.0001, "loss": 0.0163, "step": 43420 }, { "epoch": 285.7236842105263, "grad_norm": 1.7911573648452759, "learning_rate": 0.0001, "loss": 0.0182, "step": 43430 }, { "epoch": 285.7894736842105, "grad_norm": 1.3899410963058472, "learning_rate": 0.0001, "loss": 0.0192, "step": 43440 }, { "epoch": 285.85526315789474, "grad_norm": 1.441723108291626, "learning_rate": 0.0001, "loss": 0.0222, "step": 43450 }, { "epoch": 285.92105263157896, "grad_norm": 1.5071628093719482, "learning_rate": 0.0001, "loss": 0.0166, "step": 43460 }, { "epoch": 285.9868421052632, "grad_norm": 1.8933814764022827, "learning_rate": 0.0001, "loss": 0.0205, "step": 43470 }, { "epoch": 286.05263157894734, "grad_norm": 1.7016582489013672, "learning_rate": 0.0001, "loss": 0.0175, "step": 43480 }, { "epoch": 286.11842105263156, "grad_norm": 1.6794841289520264, "learning_rate": 0.0001, "loss": 0.0175, "step": 43490 }, { "epoch": 286.1842105263158, "grad_norm": 1.4671120643615723, "learning_rate": 0.0001, "loss": 0.0166, "step": 43500 }, { "epoch": 286.25, "grad_norm": 1.7710142135620117, "learning_rate": 0.0001, "loss": 0.0179, "step": 43510 }, { "epoch": 286.3157894736842, "grad_norm": 1.8625229597091675, "learning_rate": 0.0001, "loss": 0.0219, "step": 43520 }, { "epoch": 286.38157894736844, "grad_norm": 1.502859115600586, "learning_rate": 0.0001, "loss": 0.0222, "step": 43530 }, { "epoch": 286.44736842105266, "grad_norm": 2.0193467140197754, "learning_rate": 0.0001, "loss": 0.0185, "step": 43540 }, { "epoch": 286.5131578947368, "grad_norm": 1.0825546979904175, "learning_rate": 0.0001, "loss": 0.0163, "step": 43550 }, { "epoch": 286.57894736842104, "grad_norm": 2.048198938369751, "learning_rate": 0.0001, "loss": 0.0162, "step": 43560 }, { "epoch": 286.64473684210526, "grad_norm": 1.2707399129867554, "learning_rate": 0.0001, "loss": 0.0183, "step": 43570 }, { "epoch": 286.7105263157895, "grad_norm": 1.6056419610977173, "learning_rate": 0.0001, "loss": 0.0169, "step": 43580 }, { "epoch": 286.7763157894737, "grad_norm": 1.776465654373169, "learning_rate": 0.0001, "loss": 0.02, "step": 43590 }, { "epoch": 286.8421052631579, "grad_norm": 2.0865478515625, "learning_rate": 0.0001, "loss": 0.0181, "step": 43600 }, { "epoch": 286.9078947368421, "grad_norm": 1.7961218357086182, "learning_rate": 0.0001, "loss": 0.0176, "step": 43610 }, { "epoch": 286.9736842105263, "grad_norm": 1.9837439060211182, "learning_rate": 0.0001, "loss": 0.0214, "step": 43620 }, { "epoch": 287.0394736842105, "grad_norm": 1.8464741706848145, "learning_rate": 0.0001, "loss": 0.0179, "step": 43630 }, { "epoch": 287.10526315789474, "grad_norm": 1.3452439308166504, "learning_rate": 0.0001, "loss": 0.0173, "step": 43640 }, { "epoch": 287.17105263157896, "grad_norm": 1.638796091079712, "learning_rate": 0.0001, "loss": 0.0158, "step": 43650 }, { "epoch": 287.2368421052632, "grad_norm": 1.6306103467941284, "learning_rate": 0.0001, "loss": 0.0165, "step": 43660 }, { "epoch": 287.30263157894734, "grad_norm": 1.520134449005127, "learning_rate": 0.0001, "loss": 0.0179, "step": 43670 }, { "epoch": 287.36842105263156, "grad_norm": 1.631089210510254, "learning_rate": 0.0001, "loss": 0.0199, "step": 43680 }, { "epoch": 287.4342105263158, "grad_norm": 1.614147663116455, "learning_rate": 0.0001, "loss": 0.0208, "step": 43690 }, { "epoch": 287.5, "grad_norm": 2.1006462574005127, "learning_rate": 0.0001, "loss": 0.0212, "step": 43700 }, { "epoch": 287.5657894736842, "grad_norm": 2.1098196506500244, "learning_rate": 0.0001, "loss": 0.0179, "step": 43710 }, { "epoch": 287.63157894736844, "grad_norm": 1.7778139114379883, "learning_rate": 0.0001, "loss": 0.0195, "step": 43720 }, { "epoch": 287.69736842105266, "grad_norm": 1.7488070726394653, "learning_rate": 0.0001, "loss": 0.016, "step": 43730 }, { "epoch": 287.7631578947368, "grad_norm": 1.8260210752487183, "learning_rate": 0.0001, "loss": 0.0157, "step": 43740 }, { "epoch": 287.82894736842104, "grad_norm": 1.7219903469085693, "learning_rate": 0.0001, "loss": 0.0186, "step": 43750 }, { "epoch": 287.89473684210526, "grad_norm": 1.8125314712524414, "learning_rate": 0.0001, "loss": 0.0196, "step": 43760 }, { "epoch": 287.9605263157895, "grad_norm": 1.334695816040039, "learning_rate": 0.0001, "loss": 0.0212, "step": 43770 }, { "epoch": 288.0263157894737, "grad_norm": 1.832413673400879, "learning_rate": 0.0001, "loss": 0.0256, "step": 43780 }, { "epoch": 288.0921052631579, "grad_norm": 2.1010866165161133, "learning_rate": 0.0001, "loss": 0.0176, "step": 43790 }, { "epoch": 288.1578947368421, "grad_norm": 1.9179598093032837, "learning_rate": 0.0001, "loss": 0.016, "step": 43800 }, { "epoch": 288.2236842105263, "grad_norm": 1.7713767290115356, "learning_rate": 0.0001, "loss": 0.0171, "step": 43810 }, { "epoch": 288.2894736842105, "grad_norm": 1.8749818801879883, "learning_rate": 0.0001, "loss": 0.0163, "step": 43820 }, { "epoch": 288.35526315789474, "grad_norm": 1.7882792949676514, "learning_rate": 0.0001, "loss": 0.0138, "step": 43830 }, { "epoch": 288.42105263157896, "grad_norm": 2.0325891971588135, "learning_rate": 0.0001, "loss": 0.0217, "step": 43840 }, { "epoch": 288.4868421052632, "grad_norm": 2.1175484657287598, "learning_rate": 0.0001, "loss": 0.0165, "step": 43850 }, { "epoch": 288.55263157894734, "grad_norm": 1.6696783304214478, "learning_rate": 0.0001, "loss": 0.018, "step": 43860 }, { "epoch": 288.61842105263156, "grad_norm": 1.5557706356048584, "learning_rate": 0.0001, "loss": 0.0163, "step": 43870 }, { "epoch": 288.6842105263158, "grad_norm": 1.3059769868850708, "learning_rate": 0.0001, "loss": 0.0216, "step": 43880 }, { "epoch": 288.75, "grad_norm": 1.4682835340499878, "learning_rate": 0.0001, "loss": 0.0176, "step": 43890 }, { "epoch": 288.8157894736842, "grad_norm": 1.6055163145065308, "learning_rate": 0.0001, "loss": 0.02, "step": 43900 }, { "epoch": 288.88157894736844, "grad_norm": 1.7699544429779053, "learning_rate": 0.0001, "loss": 0.0195, "step": 43910 }, { "epoch": 288.94736842105266, "grad_norm": 1.409834861755371, "learning_rate": 0.0001, "loss": 0.0201, "step": 43920 }, { "epoch": 289.0131578947368, "grad_norm": 1.2476541996002197, "learning_rate": 0.0001, "loss": 0.0183, "step": 43930 }, { "epoch": 289.07894736842104, "grad_norm": 1.9086381196975708, "learning_rate": 0.0001, "loss": 0.0182, "step": 43940 }, { "epoch": 289.14473684210526, "grad_norm": 1.912548303604126, "learning_rate": 0.0001, "loss": 0.0255, "step": 43950 }, { "epoch": 289.2105263157895, "grad_norm": 1.7160319089889526, "learning_rate": 0.0001, "loss": 0.0187, "step": 43960 }, { "epoch": 289.2763157894737, "grad_norm": 1.1536784172058105, "learning_rate": 0.0001, "loss": 0.0173, "step": 43970 }, { "epoch": 289.3421052631579, "grad_norm": 1.4616997241973877, "learning_rate": 0.0001, "loss": 0.0201, "step": 43980 }, { "epoch": 289.4078947368421, "grad_norm": 1.5245965719223022, "learning_rate": 0.0001, "loss": 0.018, "step": 43990 }, { "epoch": 289.4736842105263, "grad_norm": 1.7914817333221436, "learning_rate": 0.0001, "loss": 0.0175, "step": 44000 }, { "epoch": 289.5394736842105, "grad_norm": 1.323593258857727, "learning_rate": 0.0001, "loss": 0.0166, "step": 44010 }, { "epoch": 289.60526315789474, "grad_norm": 1.477107286453247, "learning_rate": 0.0001, "loss": 0.0164, "step": 44020 }, { "epoch": 289.67105263157896, "grad_norm": 1.4922282695770264, "learning_rate": 0.0001, "loss": 0.0147, "step": 44030 }, { "epoch": 289.7368421052632, "grad_norm": 1.3073954582214355, "learning_rate": 0.0001, "loss": 0.021, "step": 44040 }, { "epoch": 289.80263157894734, "grad_norm": 1.5141874551773071, "learning_rate": 0.0001, "loss": 0.0198, "step": 44050 }, { "epoch": 289.86842105263156, "grad_norm": 1.474725365638733, "learning_rate": 0.0001, "loss": 0.0196, "step": 44060 }, { "epoch": 289.9342105263158, "grad_norm": 1.4520765542984009, "learning_rate": 0.0001, "loss": 0.0165, "step": 44070 }, { "epoch": 290.0, "grad_norm": 1.3790737390518188, "learning_rate": 0.0001, "loss": 0.0193, "step": 44080 }, { "epoch": 290.0657894736842, "grad_norm": 1.6643176078796387, "learning_rate": 0.0001, "loss": 0.0205, "step": 44090 }, { "epoch": 290.13157894736844, "grad_norm": 1.4819704294204712, "learning_rate": 0.0001, "loss": 0.0163, "step": 44100 }, { "epoch": 290.19736842105266, "grad_norm": 1.8825124502182007, "learning_rate": 0.0001, "loss": 0.0161, "step": 44110 }, { "epoch": 290.2631578947368, "grad_norm": 2.0968852043151855, "learning_rate": 0.0001, "loss": 0.0203, "step": 44120 }, { "epoch": 290.32894736842104, "grad_norm": 1.6471590995788574, "learning_rate": 0.0001, "loss": 0.0153, "step": 44130 }, { "epoch": 290.39473684210526, "grad_norm": 1.4225800037384033, "learning_rate": 0.0001, "loss": 0.0236, "step": 44140 }, { "epoch": 290.4605263157895, "grad_norm": 1.453905701637268, "learning_rate": 0.0001, "loss": 0.017, "step": 44150 }, { "epoch": 290.5263157894737, "grad_norm": 1.6915879249572754, "learning_rate": 0.0001, "loss": 0.0157, "step": 44160 }, { "epoch": 290.5921052631579, "grad_norm": 1.2371293306350708, "learning_rate": 0.0001, "loss": 0.0181, "step": 44170 }, { "epoch": 290.6578947368421, "grad_norm": 1.117315649986267, "learning_rate": 0.0001, "loss": 0.0165, "step": 44180 }, { "epoch": 290.7236842105263, "grad_norm": 1.5877312421798706, "learning_rate": 0.0001, "loss": 0.0196, "step": 44190 }, { "epoch": 290.7894736842105, "grad_norm": 1.6425585746765137, "learning_rate": 0.0001, "loss": 0.018, "step": 44200 }, { "epoch": 290.85526315789474, "grad_norm": 1.5670872926712036, "learning_rate": 0.0001, "loss": 0.0198, "step": 44210 }, { "epoch": 290.92105263157896, "grad_norm": 1.831268548965454, "learning_rate": 0.0001, "loss": 0.0196, "step": 44220 }, { "epoch": 290.9868421052632, "grad_norm": 2.0121357440948486, "learning_rate": 0.0001, "loss": 0.0172, "step": 44230 }, { "epoch": 291.05263157894734, "grad_norm": 1.3398330211639404, "learning_rate": 0.0001, "loss": 0.0224, "step": 44240 }, { "epoch": 291.11842105263156, "grad_norm": 1.5635986328125, "learning_rate": 0.0001, "loss": 0.0157, "step": 44250 }, { "epoch": 291.1842105263158, "grad_norm": 1.5514965057373047, "learning_rate": 0.0001, "loss": 0.0183, "step": 44260 }, { "epoch": 291.25, "grad_norm": 1.2861994504928589, "learning_rate": 0.0001, "loss": 0.0193, "step": 44270 }, { "epoch": 291.3157894736842, "grad_norm": 1.2298833131790161, "learning_rate": 0.0001, "loss": 0.0179, "step": 44280 }, { "epoch": 291.38157894736844, "grad_norm": 1.444681167602539, "learning_rate": 0.0001, "loss": 0.0209, "step": 44290 }, { "epoch": 291.44736842105266, "grad_norm": 1.580941081047058, "learning_rate": 0.0001, "loss": 0.0185, "step": 44300 }, { "epoch": 291.5131578947368, "grad_norm": 1.5559594631195068, "learning_rate": 0.0001, "loss": 0.0197, "step": 44310 }, { "epoch": 291.57894736842104, "grad_norm": 1.7692807912826538, "learning_rate": 0.0001, "loss": 0.0193, "step": 44320 }, { "epoch": 291.64473684210526, "grad_norm": 1.6709803342819214, "learning_rate": 0.0001, "loss": 0.0184, "step": 44330 }, { "epoch": 291.7105263157895, "grad_norm": 1.499103307723999, "learning_rate": 0.0001, "loss": 0.0179, "step": 44340 }, { "epoch": 291.7763157894737, "grad_norm": 1.7680569887161255, "learning_rate": 0.0001, "loss": 0.0175, "step": 44350 }, { "epoch": 291.8421052631579, "grad_norm": 1.5457570552825928, "learning_rate": 0.0001, "loss": 0.0201, "step": 44360 }, { "epoch": 291.9078947368421, "grad_norm": 1.5353564023971558, "learning_rate": 0.0001, "loss": 0.017, "step": 44370 }, { "epoch": 291.9736842105263, "grad_norm": 1.3185111284255981, "learning_rate": 0.0001, "loss": 0.0225, "step": 44380 }, { "epoch": 292.0394736842105, "grad_norm": 1.2502890825271606, "learning_rate": 0.0001, "loss": 0.0169, "step": 44390 }, { "epoch": 292.10526315789474, "grad_norm": 1.2663381099700928, "learning_rate": 0.0001, "loss": 0.0198, "step": 44400 }, { "epoch": 292.17105263157896, "grad_norm": 1.451568603515625, "learning_rate": 0.0001, "loss": 0.0199, "step": 44410 }, { "epoch": 292.2368421052632, "grad_norm": 1.6659729480743408, "learning_rate": 0.0001, "loss": 0.0192, "step": 44420 }, { "epoch": 292.30263157894734, "grad_norm": 1.3059200048446655, "learning_rate": 0.0001, "loss": 0.0204, "step": 44430 }, { "epoch": 292.36842105263156, "grad_norm": 1.2945913076400757, "learning_rate": 0.0001, "loss": 0.0185, "step": 44440 }, { "epoch": 292.4342105263158, "grad_norm": 1.8692278861999512, "learning_rate": 0.0001, "loss": 0.019, "step": 44450 }, { "epoch": 292.5, "grad_norm": 1.557561993598938, "learning_rate": 0.0001, "loss": 0.02, "step": 44460 }, { "epoch": 292.5657894736842, "grad_norm": 2.0155153274536133, "learning_rate": 0.0001, "loss": 0.0212, "step": 44470 }, { "epoch": 292.63157894736844, "grad_norm": 2.0631649494171143, "learning_rate": 0.0001, "loss": 0.0152, "step": 44480 }, { "epoch": 292.69736842105266, "grad_norm": 1.9272711277008057, "learning_rate": 0.0001, "loss": 0.0178, "step": 44490 }, { "epoch": 292.7631578947368, "grad_norm": 1.8898981809616089, "learning_rate": 0.0001, "loss": 0.0163, "step": 44500 }, { "epoch": 292.82894736842104, "grad_norm": 1.6213133335113525, "learning_rate": 0.0001, "loss": 0.0186, "step": 44510 }, { "epoch": 292.89473684210526, "grad_norm": 1.507592797279358, "learning_rate": 0.0001, "loss": 0.0175, "step": 44520 }, { "epoch": 292.9605263157895, "grad_norm": 1.112674593925476, "learning_rate": 0.0001, "loss": 0.0163, "step": 44530 }, { "epoch": 293.0263157894737, "grad_norm": 1.9164493083953857, "learning_rate": 0.0001, "loss": 0.0181, "step": 44540 }, { "epoch": 293.0921052631579, "grad_norm": 1.4526554346084595, "learning_rate": 0.0001, "loss": 0.0171, "step": 44550 }, { "epoch": 293.1578947368421, "grad_norm": 1.4854985475540161, "learning_rate": 0.0001, "loss": 0.0163, "step": 44560 }, { "epoch": 293.2236842105263, "grad_norm": 1.5252107381820679, "learning_rate": 0.0001, "loss": 0.0187, "step": 44570 }, { "epoch": 293.2894736842105, "grad_norm": 2.3504488468170166, "learning_rate": 0.0001, "loss": 0.0189, "step": 44580 }, { "epoch": 293.35526315789474, "grad_norm": 2.0031487941741943, "learning_rate": 0.0001, "loss": 0.0162, "step": 44590 }, { "epoch": 293.42105263157896, "grad_norm": 1.6736921072006226, "learning_rate": 0.0001, "loss": 0.0228, "step": 44600 }, { "epoch": 293.4868421052632, "grad_norm": 1.2613401412963867, "learning_rate": 0.0001, "loss": 0.0202, "step": 44610 }, { "epoch": 293.55263157894734, "grad_norm": 1.2305114269256592, "learning_rate": 0.0001, "loss": 0.0149, "step": 44620 }, { "epoch": 293.61842105263156, "grad_norm": 1.4174754619598389, "learning_rate": 0.0001, "loss": 0.0181, "step": 44630 }, { "epoch": 293.6842105263158, "grad_norm": 1.2429598569869995, "learning_rate": 0.0001, "loss": 0.0173, "step": 44640 }, { "epoch": 293.75, "grad_norm": 1.5666090250015259, "learning_rate": 0.0001, "loss": 0.0187, "step": 44650 }, { "epoch": 293.8157894736842, "grad_norm": 1.5768183469772339, "learning_rate": 0.0001, "loss": 0.0196, "step": 44660 }, { "epoch": 293.88157894736844, "grad_norm": 1.9521734714508057, "learning_rate": 0.0001, "loss": 0.0201, "step": 44670 }, { "epoch": 293.94736842105266, "grad_norm": 1.6867706775665283, "learning_rate": 0.0001, "loss": 0.0158, "step": 44680 }, { "epoch": 294.0131578947368, "grad_norm": 1.7406214475631714, "learning_rate": 0.0001, "loss": 0.0187, "step": 44690 }, { "epoch": 294.07894736842104, "grad_norm": 1.8545516729354858, "learning_rate": 0.0001, "loss": 0.0213, "step": 44700 }, { "epoch": 294.14473684210526, "grad_norm": 1.685750961303711, "learning_rate": 0.0001, "loss": 0.0169, "step": 44710 }, { "epoch": 294.2105263157895, "grad_norm": 2.1211752891540527, "learning_rate": 0.0001, "loss": 0.0145, "step": 44720 }, { "epoch": 294.2763157894737, "grad_norm": 1.4206418991088867, "learning_rate": 0.0001, "loss": 0.0162, "step": 44730 }, { "epoch": 294.3421052631579, "grad_norm": 1.6397886276245117, "learning_rate": 0.0001, "loss": 0.0201, "step": 44740 }, { "epoch": 294.4078947368421, "grad_norm": 1.698840856552124, "learning_rate": 0.0001, "loss": 0.0177, "step": 44750 }, { "epoch": 294.4736842105263, "grad_norm": 1.7627681493759155, "learning_rate": 0.0001, "loss": 0.0158, "step": 44760 }, { "epoch": 294.5394736842105, "grad_norm": 1.7153760194778442, "learning_rate": 0.0001, "loss": 0.0197, "step": 44770 }, { "epoch": 294.60526315789474, "grad_norm": 1.5447648763656616, "learning_rate": 0.0001, "loss": 0.0186, "step": 44780 }, { "epoch": 294.67105263157896, "grad_norm": 1.8452551364898682, "learning_rate": 0.0001, "loss": 0.0197, "step": 44790 }, { "epoch": 294.7368421052632, "grad_norm": 1.492409110069275, "learning_rate": 0.0001, "loss": 0.0172, "step": 44800 }, { "epoch": 294.80263157894734, "grad_norm": 2.0421950817108154, "learning_rate": 0.0001, "loss": 0.0184, "step": 44810 }, { "epoch": 294.86842105263156, "grad_norm": 1.720566749572754, "learning_rate": 0.0001, "loss": 0.0189, "step": 44820 }, { "epoch": 294.9342105263158, "grad_norm": 1.232724666595459, "learning_rate": 0.0001, "loss": 0.0181, "step": 44830 }, { "epoch": 295.0, "grad_norm": 1.1229352951049805, "learning_rate": 0.0001, "loss": 0.0172, "step": 44840 }, { "epoch": 295.0657894736842, "grad_norm": 1.508644938468933, "learning_rate": 0.0001, "loss": 0.0183, "step": 44850 }, { "epoch": 295.13157894736844, "grad_norm": 1.1894662380218506, "learning_rate": 0.0001, "loss": 0.0186, "step": 44860 }, { "epoch": 295.19736842105266, "grad_norm": 1.9445680379867554, "learning_rate": 0.0001, "loss": 0.0175, "step": 44870 }, { "epoch": 295.2631578947368, "grad_norm": 1.5969231128692627, "learning_rate": 0.0001, "loss": 0.0181, "step": 44880 }, { "epoch": 295.32894736842104, "grad_norm": 1.6168392896652222, "learning_rate": 0.0001, "loss": 0.0199, "step": 44890 }, { "epoch": 295.39473684210526, "grad_norm": 1.7197731733322144, "learning_rate": 0.0001, "loss": 0.0144, "step": 44900 }, { "epoch": 295.4605263157895, "grad_norm": 1.581508994102478, "learning_rate": 0.0001, "loss": 0.0162, "step": 44910 }, { "epoch": 295.5263157894737, "grad_norm": 1.5625157356262207, "learning_rate": 0.0001, "loss": 0.0171, "step": 44920 }, { "epoch": 295.5921052631579, "grad_norm": 1.6876004934310913, "learning_rate": 0.0001, "loss": 0.0182, "step": 44930 }, { "epoch": 295.6578947368421, "grad_norm": 1.7796951532363892, "learning_rate": 0.0001, "loss": 0.0192, "step": 44940 }, { "epoch": 295.7236842105263, "grad_norm": 1.4066557884216309, "learning_rate": 0.0001, "loss": 0.0178, "step": 44950 }, { "epoch": 295.7894736842105, "grad_norm": 1.5315219163894653, "learning_rate": 0.0001, "loss": 0.0174, "step": 44960 }, { "epoch": 295.85526315789474, "grad_norm": 1.3101402521133423, "learning_rate": 0.0001, "loss": 0.0205, "step": 44970 }, { "epoch": 295.92105263157896, "grad_norm": 1.2518103122711182, "learning_rate": 0.0001, "loss": 0.0186, "step": 44980 }, { "epoch": 295.9868421052632, "grad_norm": 1.4019862413406372, "learning_rate": 0.0001, "loss": 0.0207, "step": 44990 }, { "epoch": 296.05263157894734, "grad_norm": 1.570473074913025, "learning_rate": 0.0001, "loss": 0.0155, "step": 45000 }, { "epoch": 296.11842105263156, "grad_norm": 1.6050716638565063, "learning_rate": 0.0001, "loss": 0.0207, "step": 45010 }, { "epoch": 296.1842105263158, "grad_norm": 1.5037024021148682, "learning_rate": 0.0001, "loss": 0.0211, "step": 45020 }, { "epoch": 296.25, "grad_norm": 1.5674915313720703, "learning_rate": 0.0001, "loss": 0.0161, "step": 45030 }, { "epoch": 296.3157894736842, "grad_norm": 1.5826584100723267, "learning_rate": 0.0001, "loss": 0.0181, "step": 45040 }, { "epoch": 296.38157894736844, "grad_norm": 1.5869287252426147, "learning_rate": 0.0001, "loss": 0.019, "step": 45050 }, { "epoch": 296.44736842105266, "grad_norm": 1.1685420274734497, "learning_rate": 0.0001, "loss": 0.0168, "step": 45060 }, { "epoch": 296.5131578947368, "grad_norm": 2.0506043434143066, "learning_rate": 0.0001, "loss": 0.0187, "step": 45070 }, { "epoch": 296.57894736842104, "grad_norm": 1.339653730392456, "learning_rate": 0.0001, "loss": 0.0163, "step": 45080 }, { "epoch": 296.64473684210526, "grad_norm": 1.2440121173858643, "learning_rate": 0.0001, "loss": 0.021, "step": 45090 }, { "epoch": 296.7105263157895, "grad_norm": 1.5392978191375732, "learning_rate": 0.0001, "loss": 0.0226, "step": 45100 }, { "epoch": 296.7763157894737, "grad_norm": 1.4259546995162964, "learning_rate": 0.0001, "loss": 0.0156, "step": 45110 }, { "epoch": 296.8421052631579, "grad_norm": 1.6736934185028076, "learning_rate": 0.0001, "loss": 0.0235, "step": 45120 }, { "epoch": 296.9078947368421, "grad_norm": 1.1300886869430542, "learning_rate": 0.0001, "loss": 0.0189, "step": 45130 }, { "epoch": 296.9736842105263, "grad_norm": 1.6193512678146362, "learning_rate": 0.0001, "loss": 0.0199, "step": 45140 }, { "epoch": 297.0394736842105, "grad_norm": 1.2433977127075195, "learning_rate": 0.0001, "loss": 0.0173, "step": 45150 }, { "epoch": 297.10526315789474, "grad_norm": 1.5035532712936401, "learning_rate": 0.0001, "loss": 0.0211, "step": 45160 }, { "epoch": 297.17105263157896, "grad_norm": 1.2870287895202637, "learning_rate": 0.0001, "loss": 0.0185, "step": 45170 }, { "epoch": 297.2368421052632, "grad_norm": 1.3958863019943237, "learning_rate": 0.0001, "loss": 0.0165, "step": 45180 }, { "epoch": 297.30263157894734, "grad_norm": 1.6416631937026978, "learning_rate": 0.0001, "loss": 0.015, "step": 45190 }, { "epoch": 297.36842105263156, "grad_norm": 2.1068778038024902, "learning_rate": 0.0001, "loss": 0.0211, "step": 45200 }, { "epoch": 297.4342105263158, "grad_norm": 1.3050501346588135, "learning_rate": 0.0001, "loss": 0.0167, "step": 45210 }, { "epoch": 297.5, "grad_norm": 1.8583145141601562, "learning_rate": 0.0001, "loss": 0.0172, "step": 45220 }, { "epoch": 297.5657894736842, "grad_norm": 1.560758113861084, "learning_rate": 0.0001, "loss": 0.0175, "step": 45230 }, { "epoch": 297.63157894736844, "grad_norm": 1.2892173528671265, "learning_rate": 0.0001, "loss": 0.0188, "step": 45240 }, { "epoch": 297.69736842105266, "grad_norm": 1.364734172821045, "learning_rate": 0.0001, "loss": 0.0163, "step": 45250 }, { "epoch": 297.7631578947368, "grad_norm": 1.2296735048294067, "learning_rate": 0.0001, "loss": 0.0146, "step": 45260 }, { "epoch": 297.82894736842104, "grad_norm": 1.721154808998108, "learning_rate": 0.0001, "loss": 0.0217, "step": 45270 }, { "epoch": 297.89473684210526, "grad_norm": 1.360379934310913, "learning_rate": 0.0001, "loss": 0.02, "step": 45280 }, { "epoch": 297.9605263157895, "grad_norm": 1.3491019010543823, "learning_rate": 0.0001, "loss": 0.0204, "step": 45290 }, { "epoch": 298.0263157894737, "grad_norm": 1.5295279026031494, "learning_rate": 0.0001, "loss": 0.0156, "step": 45300 }, { "epoch": 298.0921052631579, "grad_norm": 1.5967841148376465, "learning_rate": 0.0001, "loss": 0.0183, "step": 45310 }, { "epoch": 298.1578947368421, "grad_norm": 1.700802206993103, "learning_rate": 0.0001, "loss": 0.0163, "step": 45320 }, { "epoch": 298.2236842105263, "grad_norm": 1.4748497009277344, "learning_rate": 0.0001, "loss": 0.0174, "step": 45330 }, { "epoch": 298.2894736842105, "grad_norm": 1.526371717453003, "learning_rate": 0.0001, "loss": 0.0203, "step": 45340 }, { "epoch": 298.35526315789474, "grad_norm": 1.975524663925171, "learning_rate": 0.0001, "loss": 0.0192, "step": 45350 }, { "epoch": 298.42105263157896, "grad_norm": 1.5346332788467407, "learning_rate": 0.0001, "loss": 0.0199, "step": 45360 }, { "epoch": 298.4868421052632, "grad_norm": 1.9206433296203613, "learning_rate": 0.0001, "loss": 0.0163, "step": 45370 }, { "epoch": 298.55263157894734, "grad_norm": 1.5912476778030396, "learning_rate": 0.0001, "loss": 0.0208, "step": 45380 }, { "epoch": 298.61842105263156, "grad_norm": 1.7080267667770386, "learning_rate": 0.0001, "loss": 0.017, "step": 45390 }, { "epoch": 298.6842105263158, "grad_norm": 1.765561819076538, "learning_rate": 0.0001, "loss": 0.0162, "step": 45400 }, { "epoch": 298.75, "grad_norm": 1.2894277572631836, "learning_rate": 0.0001, "loss": 0.0182, "step": 45410 }, { "epoch": 298.8157894736842, "grad_norm": 1.4892938137054443, "learning_rate": 0.0001, "loss": 0.0209, "step": 45420 }, { "epoch": 298.88157894736844, "grad_norm": 1.4608488082885742, "learning_rate": 0.0001, "loss": 0.0188, "step": 45430 }, { "epoch": 298.94736842105266, "grad_norm": 1.4883030652999878, "learning_rate": 0.0001, "loss": 0.018, "step": 45440 }, { "epoch": 299.0131578947368, "grad_norm": 1.299932599067688, "learning_rate": 0.0001, "loss": 0.0213, "step": 45450 }, { "epoch": 299.07894736842104, "grad_norm": 1.2396427392959595, "learning_rate": 0.0001, "loss": 0.0197, "step": 45460 }, { "epoch": 299.14473684210526, "grad_norm": 1.3617972135543823, "learning_rate": 0.0001, "loss": 0.0186, "step": 45470 }, { "epoch": 299.2105263157895, "grad_norm": 1.4647009372711182, "learning_rate": 0.0001, "loss": 0.0187, "step": 45480 }, { "epoch": 299.2763157894737, "grad_norm": 1.3870558738708496, "learning_rate": 0.0001, "loss": 0.0156, "step": 45490 }, { "epoch": 299.3421052631579, "grad_norm": 1.3921622037887573, "learning_rate": 0.0001, "loss": 0.0172, "step": 45500 }, { "epoch": 299.4078947368421, "grad_norm": 1.637829303741455, "learning_rate": 0.0001, "loss": 0.02, "step": 45510 }, { "epoch": 299.4736842105263, "grad_norm": 1.720557451248169, "learning_rate": 0.0001, "loss": 0.0174, "step": 45520 }, { "epoch": 299.5394736842105, "grad_norm": 1.7584632635116577, "learning_rate": 0.0001, "loss": 0.0217, "step": 45530 }, { "epoch": 299.60526315789474, "grad_norm": 2.013603448867798, "learning_rate": 0.0001, "loss": 0.0192, "step": 45540 }, { "epoch": 299.67105263157896, "grad_norm": 1.510361671447754, "learning_rate": 0.0001, "loss": 0.0174, "step": 45550 }, { "epoch": 299.7368421052632, "grad_norm": 1.1737828254699707, "learning_rate": 0.0001, "loss": 0.0201, "step": 45560 }, { "epoch": 299.80263157894734, "grad_norm": 1.782639741897583, "learning_rate": 0.0001, "loss": 0.0222, "step": 45570 }, { "epoch": 299.86842105263156, "grad_norm": 1.5502687692642212, "learning_rate": 0.0001, "loss": 0.0158, "step": 45580 }, { "epoch": 299.9342105263158, "grad_norm": 1.1964201927185059, "learning_rate": 0.0001, "loss": 0.0184, "step": 45590 }, { "epoch": 300.0, "grad_norm": 1.6554977893829346, "learning_rate": 0.0001, "loss": 0.0156, "step": 45600 }, { "epoch": 300.0657894736842, "grad_norm": 1.739046335220337, "learning_rate": 0.0001, "loss": 0.0191, "step": 45610 }, { "epoch": 300.13157894736844, "grad_norm": 1.5695098638534546, "learning_rate": 0.0001, "loss": 0.0162, "step": 45620 }, { "epoch": 300.19736842105266, "grad_norm": 1.36410391330719, "learning_rate": 0.0001, "loss": 0.016, "step": 45630 }, { "epoch": 300.2631578947368, "grad_norm": 1.3706581592559814, "learning_rate": 0.0001, "loss": 0.0215, "step": 45640 }, { "epoch": 300.32894736842104, "grad_norm": 1.5091278553009033, "learning_rate": 0.0001, "loss": 0.0194, "step": 45650 }, { "epoch": 300.39473684210526, "grad_norm": 1.5270977020263672, "learning_rate": 0.0001, "loss": 0.0203, "step": 45660 }, { "epoch": 300.4605263157895, "grad_norm": 1.6984201669692993, "learning_rate": 0.0001, "loss": 0.0174, "step": 45670 }, { "epoch": 300.5263157894737, "grad_norm": 1.065918207168579, "learning_rate": 0.0001, "loss": 0.0201, "step": 45680 }, { "epoch": 300.5921052631579, "grad_norm": 1.7855254411697388, "learning_rate": 0.0001, "loss": 0.0187, "step": 45690 }, { "epoch": 300.6578947368421, "grad_norm": 1.3063669204711914, "learning_rate": 0.0001, "loss": 0.0182, "step": 45700 }, { "epoch": 300.7236842105263, "grad_norm": 1.5801082849502563, "learning_rate": 0.0001, "loss": 0.0183, "step": 45710 }, { "epoch": 300.7894736842105, "grad_norm": 1.4478912353515625, "learning_rate": 0.0001, "loss": 0.015, "step": 45720 }, { "epoch": 300.85526315789474, "grad_norm": 1.5584310293197632, "learning_rate": 0.0001, "loss": 0.0186, "step": 45730 }, { "epoch": 300.92105263157896, "grad_norm": 2.0109875202178955, "learning_rate": 0.0001, "loss": 0.0189, "step": 45740 }, { "epoch": 300.9868421052632, "grad_norm": 2.062621831893921, "learning_rate": 0.0001, "loss": 0.0166, "step": 45750 }, { "epoch": 301.05263157894734, "grad_norm": 1.5105104446411133, "learning_rate": 0.0001, "loss": 0.0181, "step": 45760 }, { "epoch": 301.11842105263156, "grad_norm": 1.7521532773971558, "learning_rate": 0.0001, "loss": 0.0197, "step": 45770 }, { "epoch": 301.1842105263158, "grad_norm": 1.700806975364685, "learning_rate": 0.0001, "loss": 0.0186, "step": 45780 }, { "epoch": 301.25, "grad_norm": 1.5102571249008179, "learning_rate": 0.0001, "loss": 0.0198, "step": 45790 }, { "epoch": 301.3157894736842, "grad_norm": 1.5459164381027222, "learning_rate": 0.0001, "loss": 0.0167, "step": 45800 }, { "epoch": 301.38157894736844, "grad_norm": 1.627331256866455, "learning_rate": 0.0001, "loss": 0.0164, "step": 45810 }, { "epoch": 301.44736842105266, "grad_norm": 1.4326850175857544, "learning_rate": 0.0001, "loss": 0.0182, "step": 45820 }, { "epoch": 301.5131578947368, "grad_norm": 1.634329915046692, "learning_rate": 0.0001, "loss": 0.0171, "step": 45830 }, { "epoch": 301.57894736842104, "grad_norm": 1.4096146821975708, "learning_rate": 0.0001, "loss": 0.017, "step": 45840 }, { "epoch": 301.64473684210526, "grad_norm": 1.7806950807571411, "learning_rate": 0.0001, "loss": 0.0158, "step": 45850 }, { "epoch": 301.7105263157895, "grad_norm": 1.4719057083129883, "learning_rate": 0.0001, "loss": 0.0182, "step": 45860 }, { "epoch": 301.7763157894737, "grad_norm": 1.3032773733139038, "learning_rate": 0.0001, "loss": 0.017, "step": 45870 }, { "epoch": 301.8421052631579, "grad_norm": 1.476094126701355, "learning_rate": 0.0001, "loss": 0.0213, "step": 45880 }, { "epoch": 301.9078947368421, "grad_norm": 1.6893688440322876, "learning_rate": 0.0001, "loss": 0.0183, "step": 45890 }, { "epoch": 301.9736842105263, "grad_norm": 1.6166623830795288, "learning_rate": 0.0001, "loss": 0.0205, "step": 45900 }, { "epoch": 302.0394736842105, "grad_norm": 1.420366883277893, "learning_rate": 0.0001, "loss": 0.0193, "step": 45910 }, { "epoch": 302.10526315789474, "grad_norm": 1.887187123298645, "learning_rate": 0.0001, "loss": 0.0191, "step": 45920 }, { "epoch": 302.17105263157896, "grad_norm": 1.80022394657135, "learning_rate": 0.0001, "loss": 0.0202, "step": 45930 }, { "epoch": 302.2368421052632, "grad_norm": 1.7181084156036377, "learning_rate": 0.0001, "loss": 0.0168, "step": 45940 }, { "epoch": 302.30263157894734, "grad_norm": 1.1719276905059814, "learning_rate": 0.0001, "loss": 0.016, "step": 45950 }, { "epoch": 302.36842105263156, "grad_norm": 1.8969125747680664, "learning_rate": 0.0001, "loss": 0.018, "step": 45960 }, { "epoch": 302.4342105263158, "grad_norm": 2.235708236694336, "learning_rate": 0.0001, "loss": 0.0197, "step": 45970 }, { "epoch": 302.5, "grad_norm": 1.4146760702133179, "learning_rate": 0.0001, "loss": 0.0159, "step": 45980 }, { "epoch": 302.5657894736842, "grad_norm": 1.4588083028793335, "learning_rate": 0.0001, "loss": 0.0192, "step": 45990 }, { "epoch": 302.63157894736844, "grad_norm": 1.2821359634399414, "learning_rate": 0.0001, "loss": 0.0181, "step": 46000 }, { "epoch": 302.69736842105266, "grad_norm": 1.3734441995620728, "learning_rate": 0.0001, "loss": 0.0204, "step": 46010 }, { "epoch": 302.7631578947368, "grad_norm": 1.1819570064544678, "learning_rate": 0.0001, "loss": 0.0173, "step": 46020 }, { "epoch": 302.82894736842104, "grad_norm": 1.596516728401184, "learning_rate": 0.0001, "loss": 0.0177, "step": 46030 }, { "epoch": 302.89473684210526, "grad_norm": 1.4742130041122437, "learning_rate": 0.0001, "loss": 0.0176, "step": 46040 }, { "epoch": 302.9605263157895, "grad_norm": 1.2302465438842773, "learning_rate": 0.0001, "loss": 0.0179, "step": 46050 }, { "epoch": 303.0263157894737, "grad_norm": 1.5319193601608276, "learning_rate": 0.0001, "loss": 0.0208, "step": 46060 }, { "epoch": 303.0921052631579, "grad_norm": 1.2074185609817505, "learning_rate": 0.0001, "loss": 0.0191, "step": 46070 }, { "epoch": 303.1578947368421, "grad_norm": 1.5465320348739624, "learning_rate": 0.0001, "loss": 0.0167, "step": 46080 }, { "epoch": 303.2236842105263, "grad_norm": 1.8388768434524536, "learning_rate": 0.0001, "loss": 0.0168, "step": 46090 }, { "epoch": 303.2894736842105, "grad_norm": 2.0239415168762207, "learning_rate": 0.0001, "loss": 0.0214, "step": 46100 }, { "epoch": 303.35526315789474, "grad_norm": 1.9379454851150513, "learning_rate": 0.0001, "loss": 0.0163, "step": 46110 }, { "epoch": 303.42105263157896, "grad_norm": 1.5877773761749268, "learning_rate": 0.0001, "loss": 0.0205, "step": 46120 }, { "epoch": 303.4868421052632, "grad_norm": 1.928288221359253, "learning_rate": 0.0001, "loss": 0.0221, "step": 46130 }, { "epoch": 303.55263157894734, "grad_norm": 1.5093241930007935, "learning_rate": 0.0001, "loss": 0.0175, "step": 46140 }, { "epoch": 303.61842105263156, "grad_norm": 1.3065271377563477, "learning_rate": 0.0001, "loss": 0.0186, "step": 46150 }, { "epoch": 303.6842105263158, "grad_norm": 1.8183802366256714, "learning_rate": 0.0001, "loss": 0.0142, "step": 46160 }, { "epoch": 303.75, "grad_norm": 1.0036966800689697, "learning_rate": 0.0001, "loss": 0.0139, "step": 46170 }, { "epoch": 303.8157894736842, "grad_norm": 2.083498954772949, "learning_rate": 0.0001, "loss": 0.018, "step": 46180 }, { "epoch": 303.88157894736844, "grad_norm": 1.5529634952545166, "learning_rate": 0.0001, "loss": 0.0186, "step": 46190 }, { "epoch": 303.94736842105266, "grad_norm": 1.6897246837615967, "learning_rate": 0.0001, "loss": 0.0195, "step": 46200 }, { "epoch": 304.0131578947368, "grad_norm": 2.2425012588500977, "learning_rate": 0.0001, "loss": 0.0165, "step": 46210 }, { "epoch": 304.07894736842104, "grad_norm": 1.6090149879455566, "learning_rate": 0.0001, "loss": 0.0193, "step": 46220 }, { "epoch": 304.14473684210526, "grad_norm": 1.5996177196502686, "learning_rate": 0.0001, "loss": 0.0181, "step": 46230 }, { "epoch": 304.2105263157895, "grad_norm": 1.1910287141799927, "learning_rate": 0.0001, "loss": 0.0162, "step": 46240 }, { "epoch": 304.2763157894737, "grad_norm": 1.113649845123291, "learning_rate": 0.0001, "loss": 0.0173, "step": 46250 }, { "epoch": 304.3421052631579, "grad_norm": 1.9819141626358032, "learning_rate": 0.0001, "loss": 0.0196, "step": 46260 }, { "epoch": 304.4078947368421, "grad_norm": 1.7157220840454102, "learning_rate": 0.0001, "loss": 0.017, "step": 46270 }, { "epoch": 304.4736842105263, "grad_norm": 1.8065358400344849, "learning_rate": 0.0001, "loss": 0.0193, "step": 46280 }, { "epoch": 304.5394736842105, "grad_norm": 1.6105537414550781, "learning_rate": 0.0001, "loss": 0.0154, "step": 46290 }, { "epoch": 304.60526315789474, "grad_norm": 1.5964257717132568, "learning_rate": 0.0001, "loss": 0.0173, "step": 46300 }, { "epoch": 304.67105263157896, "grad_norm": 1.528689980506897, "learning_rate": 0.0001, "loss": 0.0198, "step": 46310 }, { "epoch": 304.7368421052632, "grad_norm": 1.7941625118255615, "learning_rate": 0.0001, "loss": 0.0207, "step": 46320 }, { "epoch": 304.80263157894734, "grad_norm": 1.3337374925613403, "learning_rate": 0.0001, "loss": 0.0187, "step": 46330 }, { "epoch": 304.86842105263156, "grad_norm": 1.4397844076156616, "learning_rate": 0.0001, "loss": 0.0182, "step": 46340 }, { "epoch": 304.9342105263158, "grad_norm": 1.6499816179275513, "learning_rate": 0.0001, "loss": 0.0197, "step": 46350 }, { "epoch": 305.0, "grad_norm": 1.6768479347229004, "learning_rate": 0.0001, "loss": 0.0146, "step": 46360 }, { "epoch": 305.0657894736842, "grad_norm": 1.6572400331497192, "learning_rate": 0.0001, "loss": 0.0194, "step": 46370 }, { "epoch": 305.13157894736844, "grad_norm": 1.9143335819244385, "learning_rate": 0.0001, "loss": 0.02, "step": 46380 }, { "epoch": 305.19736842105266, "grad_norm": 1.3053088188171387, "learning_rate": 0.0001, "loss": 0.0174, "step": 46390 }, { "epoch": 305.2631578947368, "grad_norm": 1.6381217241287231, "learning_rate": 0.0001, "loss": 0.0181, "step": 46400 }, { "epoch": 305.32894736842104, "grad_norm": 1.3597675561904907, "learning_rate": 0.0001, "loss": 0.0183, "step": 46410 }, { "epoch": 305.39473684210526, "grad_norm": 1.5664676427841187, "learning_rate": 0.0001, "loss": 0.0211, "step": 46420 }, { "epoch": 305.4605263157895, "grad_norm": 1.866611123085022, "learning_rate": 0.0001, "loss": 0.0152, "step": 46430 }, { "epoch": 305.5263157894737, "grad_norm": 1.6158077716827393, "learning_rate": 0.0001, "loss": 0.0191, "step": 46440 }, { "epoch": 305.5921052631579, "grad_norm": 1.7493629455566406, "learning_rate": 0.0001, "loss": 0.0149, "step": 46450 }, { "epoch": 305.6578947368421, "grad_norm": 1.6350884437561035, "learning_rate": 0.0001, "loss": 0.019, "step": 46460 }, { "epoch": 305.7236842105263, "grad_norm": 1.8543176651000977, "learning_rate": 0.0001, "loss": 0.0162, "step": 46470 }, { "epoch": 305.7894736842105, "grad_norm": 1.8132977485656738, "learning_rate": 0.0001, "loss": 0.0221, "step": 46480 }, { "epoch": 305.85526315789474, "grad_norm": 1.8832110166549683, "learning_rate": 0.0001, "loss": 0.0198, "step": 46490 }, { "epoch": 305.92105263157896, "grad_norm": 2.2764670848846436, "learning_rate": 0.0001, "loss": 0.0168, "step": 46500 }, { "epoch": 305.9868421052632, "grad_norm": 1.4446336030960083, "learning_rate": 0.0001, "loss": 0.018, "step": 46510 }, { "epoch": 306.05263157894734, "grad_norm": 1.6488699913024902, "learning_rate": 0.0001, "loss": 0.0153, "step": 46520 }, { "epoch": 306.11842105263156, "grad_norm": 1.5506492853164673, "learning_rate": 0.0001, "loss": 0.0161, "step": 46530 }, { "epoch": 306.1842105263158, "grad_norm": 1.5354551076889038, "learning_rate": 0.0001, "loss": 0.017, "step": 46540 }, { "epoch": 306.25, "grad_norm": 1.286345362663269, "learning_rate": 0.0001, "loss": 0.017, "step": 46550 }, { "epoch": 306.3157894736842, "grad_norm": 1.5891623497009277, "learning_rate": 0.0001, "loss": 0.0156, "step": 46560 }, { "epoch": 306.38157894736844, "grad_norm": 1.6933952569961548, "learning_rate": 0.0001, "loss": 0.0195, "step": 46570 }, { "epoch": 306.44736842105266, "grad_norm": 1.5327366590499878, "learning_rate": 0.0001, "loss": 0.0187, "step": 46580 }, { "epoch": 306.5131578947368, "grad_norm": 2.1112375259399414, "learning_rate": 0.0001, "loss": 0.0152, "step": 46590 }, { "epoch": 306.57894736842104, "grad_norm": 1.4599958658218384, "learning_rate": 0.0001, "loss": 0.0183, "step": 46600 }, { "epoch": 306.64473684210526, "grad_norm": 1.5321639776229858, "learning_rate": 0.0001, "loss": 0.0179, "step": 46610 }, { "epoch": 306.7105263157895, "grad_norm": 1.4437108039855957, "learning_rate": 0.0001, "loss": 0.0168, "step": 46620 }, { "epoch": 306.7763157894737, "grad_norm": 1.426668405532837, "learning_rate": 0.0001, "loss": 0.0171, "step": 46630 }, { "epoch": 306.8421052631579, "grad_norm": 1.6601283550262451, "learning_rate": 0.0001, "loss": 0.0176, "step": 46640 }, { "epoch": 306.9078947368421, "grad_norm": 1.4039828777313232, "learning_rate": 0.0001, "loss": 0.0208, "step": 46650 }, { "epoch": 306.9736842105263, "grad_norm": 1.0846819877624512, "learning_rate": 0.0001, "loss": 0.0191, "step": 46660 }, { "epoch": 307.0394736842105, "grad_norm": 1.954553246498108, "learning_rate": 0.0001, "loss": 0.0226, "step": 46670 }, { "epoch": 307.10526315789474, "grad_norm": 1.5716629028320312, "learning_rate": 0.0001, "loss": 0.0227, "step": 46680 }, { "epoch": 307.17105263157896, "grad_norm": 1.418899655342102, "learning_rate": 0.0001, "loss": 0.0176, "step": 46690 }, { "epoch": 307.2368421052632, "grad_norm": 1.668788194656372, "learning_rate": 0.0001, "loss": 0.0151, "step": 46700 }, { "epoch": 307.30263157894734, "grad_norm": 1.6482064723968506, "learning_rate": 0.0001, "loss": 0.0173, "step": 46710 }, { "epoch": 307.36842105263156, "grad_norm": 1.5387554168701172, "learning_rate": 0.0001, "loss": 0.018, "step": 46720 }, { "epoch": 307.4342105263158, "grad_norm": 1.238232970237732, "learning_rate": 0.0001, "loss": 0.0217, "step": 46730 }, { "epoch": 307.5, "grad_norm": 1.337867021560669, "learning_rate": 0.0001, "loss": 0.0202, "step": 46740 }, { "epoch": 307.5657894736842, "grad_norm": 1.411401391029358, "learning_rate": 0.0001, "loss": 0.0184, "step": 46750 }, { "epoch": 307.63157894736844, "grad_norm": 1.6010618209838867, "learning_rate": 0.0001, "loss": 0.0171, "step": 46760 }, { "epoch": 307.69736842105266, "grad_norm": 1.4427157640457153, "learning_rate": 0.0001, "loss": 0.0159, "step": 46770 }, { "epoch": 307.7631578947368, "grad_norm": 1.4797656536102295, "learning_rate": 0.0001, "loss": 0.018, "step": 46780 }, { "epoch": 307.82894736842104, "grad_norm": 1.1329435110092163, "learning_rate": 0.0001, "loss": 0.0156, "step": 46790 }, { "epoch": 307.89473684210526, "grad_norm": 1.344834327697754, "learning_rate": 0.0001, "loss": 0.0204, "step": 46800 }, { "epoch": 307.9605263157895, "grad_norm": 1.581467628479004, "learning_rate": 0.0001, "loss": 0.0167, "step": 46810 }, { "epoch": 308.0263157894737, "grad_norm": 1.5287977457046509, "learning_rate": 0.0001, "loss": 0.0201, "step": 46820 }, { "epoch": 308.0921052631579, "grad_norm": 1.5047463178634644, "learning_rate": 0.0001, "loss": 0.0208, "step": 46830 }, { "epoch": 308.1578947368421, "grad_norm": 1.9370300769805908, "learning_rate": 0.0001, "loss": 0.0192, "step": 46840 }, { "epoch": 308.2236842105263, "grad_norm": 1.7529603242874146, "learning_rate": 0.0001, "loss": 0.0224, "step": 46850 }, { "epoch": 308.2894736842105, "grad_norm": 1.5577030181884766, "learning_rate": 0.0001, "loss": 0.0181, "step": 46860 }, { "epoch": 308.35526315789474, "grad_norm": 1.366290807723999, "learning_rate": 0.0001, "loss": 0.0161, "step": 46870 }, { "epoch": 308.42105263157896, "grad_norm": 1.671356201171875, "learning_rate": 0.0001, "loss": 0.0171, "step": 46880 }, { "epoch": 308.4868421052632, "grad_norm": 1.8231550455093384, "learning_rate": 0.0001, "loss": 0.0176, "step": 46890 }, { "epoch": 308.55263157894734, "grad_norm": 1.8373165130615234, "learning_rate": 0.0001, "loss": 0.017, "step": 46900 }, { "epoch": 308.61842105263156, "grad_norm": 1.5715163946151733, "learning_rate": 0.0001, "loss": 0.0184, "step": 46910 }, { "epoch": 308.6842105263158, "grad_norm": 1.5759971141815186, "learning_rate": 0.0001, "loss": 0.0195, "step": 46920 }, { "epoch": 308.75, "grad_norm": 1.2129548788070679, "learning_rate": 0.0001, "loss": 0.0182, "step": 46930 }, { "epoch": 308.8157894736842, "grad_norm": 1.5991771221160889, "learning_rate": 0.0001, "loss": 0.0179, "step": 46940 }, { "epoch": 308.88157894736844, "grad_norm": 1.6049580574035645, "learning_rate": 0.0001, "loss": 0.018, "step": 46950 }, { "epoch": 308.94736842105266, "grad_norm": 1.6151291131973267, "learning_rate": 0.0001, "loss": 0.0144, "step": 46960 }, { "epoch": 309.0131578947368, "grad_norm": 1.5033934116363525, "learning_rate": 0.0001, "loss": 0.0178, "step": 46970 }, { "epoch": 309.07894736842104, "grad_norm": 1.4705886840820312, "learning_rate": 0.0001, "loss": 0.0183, "step": 46980 }, { "epoch": 309.14473684210526, "grad_norm": 1.5265287160873413, "learning_rate": 0.0001, "loss": 0.0209, "step": 46990 }, { "epoch": 309.2105263157895, "grad_norm": 1.4632219076156616, "learning_rate": 0.0001, "loss": 0.0189, "step": 47000 }, { "epoch": 309.2763157894737, "grad_norm": 1.5056467056274414, "learning_rate": 0.0001, "loss": 0.0192, "step": 47010 }, { "epoch": 309.3421052631579, "grad_norm": 1.3841480016708374, "learning_rate": 0.0001, "loss": 0.0222, "step": 47020 }, { "epoch": 309.4078947368421, "grad_norm": 1.4890334606170654, "learning_rate": 0.0001, "loss": 0.0161, "step": 47030 }, { "epoch": 309.4736842105263, "grad_norm": 1.6313532590866089, "learning_rate": 0.0001, "loss": 0.0143, "step": 47040 }, { "epoch": 309.5394736842105, "grad_norm": 1.5217766761779785, "learning_rate": 0.0001, "loss": 0.0161, "step": 47050 }, { "epoch": 309.60526315789474, "grad_norm": 1.4616382122039795, "learning_rate": 0.0001, "loss": 0.0195, "step": 47060 }, { "epoch": 309.67105263157896, "grad_norm": 1.5641740560531616, "learning_rate": 0.0001, "loss": 0.016, "step": 47070 }, { "epoch": 309.7368421052632, "grad_norm": 1.499773383140564, "learning_rate": 0.0001, "loss": 0.0199, "step": 47080 }, { "epoch": 309.80263157894734, "grad_norm": 1.743351697921753, "learning_rate": 0.0001, "loss": 0.0176, "step": 47090 }, { "epoch": 309.86842105263156, "grad_norm": 1.1967742443084717, "learning_rate": 0.0001, "loss": 0.0164, "step": 47100 }, { "epoch": 309.9342105263158, "grad_norm": 1.4715564250946045, "learning_rate": 0.0001, "loss": 0.0158, "step": 47110 }, { "epoch": 310.0, "grad_norm": 1.5104857683181763, "learning_rate": 0.0001, "loss": 0.0202, "step": 47120 }, { "epoch": 310.0657894736842, "grad_norm": 1.484756588935852, "learning_rate": 0.0001, "loss": 0.0202, "step": 47130 }, { "epoch": 310.13157894736844, "grad_norm": 1.7109286785125732, "learning_rate": 0.0001, "loss": 0.0181, "step": 47140 }, { "epoch": 310.19736842105266, "grad_norm": 1.6897801160812378, "learning_rate": 0.0001, "loss": 0.0179, "step": 47150 }, { "epoch": 310.2631578947368, "grad_norm": 1.6113938093185425, "learning_rate": 0.0001, "loss": 0.0175, "step": 47160 }, { "epoch": 310.32894736842104, "grad_norm": 1.829939603805542, "learning_rate": 0.0001, "loss": 0.0166, "step": 47170 }, { "epoch": 310.39473684210526, "grad_norm": 1.8619146347045898, "learning_rate": 0.0001, "loss": 0.0169, "step": 47180 }, { "epoch": 310.4605263157895, "grad_norm": 1.8610225915908813, "learning_rate": 0.0001, "loss": 0.0154, "step": 47190 }, { "epoch": 310.5263157894737, "grad_norm": 1.8118908405303955, "learning_rate": 0.0001, "loss": 0.0186, "step": 47200 }, { "epoch": 310.5921052631579, "grad_norm": 1.8977290391921997, "learning_rate": 0.0001, "loss": 0.0167, "step": 47210 }, { "epoch": 310.6578947368421, "grad_norm": 2.0439157485961914, "learning_rate": 0.0001, "loss": 0.0164, "step": 47220 }, { "epoch": 310.7236842105263, "grad_norm": 1.7725433111190796, "learning_rate": 0.0001, "loss": 0.0185, "step": 47230 }, { "epoch": 310.7894736842105, "grad_norm": 2.053192615509033, "learning_rate": 0.0001, "loss": 0.0232, "step": 47240 }, { "epoch": 310.85526315789474, "grad_norm": 1.72048020362854, "learning_rate": 0.0001, "loss": 0.0189, "step": 47250 }, { "epoch": 310.92105263157896, "grad_norm": 1.865236759185791, "learning_rate": 0.0001, "loss": 0.0154, "step": 47260 }, { "epoch": 310.9868421052632, "grad_norm": 1.9846317768096924, "learning_rate": 0.0001, "loss": 0.0155, "step": 47270 }, { "epoch": 311.05263157894734, "grad_norm": 1.2775719165802002, "learning_rate": 0.0001, "loss": 0.0136, "step": 47280 }, { "epoch": 311.11842105263156, "grad_norm": 1.6498456001281738, "learning_rate": 0.0001, "loss": 0.0178, "step": 47290 }, { "epoch": 311.1842105263158, "grad_norm": 1.9948612451553345, "learning_rate": 0.0001, "loss": 0.0181, "step": 47300 }, { "epoch": 311.25, "grad_norm": 1.696533203125, "learning_rate": 0.0001, "loss": 0.021, "step": 47310 }, { "epoch": 311.3157894736842, "grad_norm": 1.9568681716918945, "learning_rate": 0.0001, "loss": 0.0155, "step": 47320 }, { "epoch": 311.38157894736844, "grad_norm": 2.0786991119384766, "learning_rate": 0.0001, "loss": 0.0197, "step": 47330 }, { "epoch": 311.44736842105266, "grad_norm": 1.9696881771087646, "learning_rate": 0.0001, "loss": 0.0158, "step": 47340 }, { "epoch": 311.5131578947368, "grad_norm": 1.9240788221359253, "learning_rate": 0.0001, "loss": 0.0181, "step": 47350 }, { "epoch": 311.57894736842104, "grad_norm": 1.348659873008728, "learning_rate": 0.0001, "loss": 0.0211, "step": 47360 }, { "epoch": 311.64473684210526, "grad_norm": 1.5219076871871948, "learning_rate": 0.0001, "loss": 0.0161, "step": 47370 }, { "epoch": 311.7105263157895, "grad_norm": 1.2764948606491089, "learning_rate": 0.0001, "loss": 0.0149, "step": 47380 }, { "epoch": 311.7763157894737, "grad_norm": 1.5306261777877808, "learning_rate": 0.0001, "loss": 0.0183, "step": 47390 }, { "epoch": 311.8421052631579, "grad_norm": 1.505691647529602, "learning_rate": 0.0001, "loss": 0.0191, "step": 47400 }, { "epoch": 311.9078947368421, "grad_norm": 1.5061795711517334, "learning_rate": 0.0001, "loss": 0.0157, "step": 47410 }, { "epoch": 311.9736842105263, "grad_norm": 1.6898895502090454, "learning_rate": 0.0001, "loss": 0.0164, "step": 47420 }, { "epoch": 312.0394736842105, "grad_norm": 1.3336178064346313, "learning_rate": 0.0001, "loss": 0.0146, "step": 47430 }, { "epoch": 312.10526315789474, "grad_norm": 1.888869285583496, "learning_rate": 0.0001, "loss": 0.0193, "step": 47440 }, { "epoch": 312.17105263157896, "grad_norm": 2.0602428913116455, "learning_rate": 0.0001, "loss": 0.0175, "step": 47450 }, { "epoch": 312.2368421052632, "grad_norm": 1.5631532669067383, "learning_rate": 0.0001, "loss": 0.0171, "step": 47460 }, { "epoch": 312.30263157894734, "grad_norm": 1.9029909372329712, "learning_rate": 0.0001, "loss": 0.0169, "step": 47470 }, { "epoch": 312.36842105263156, "grad_norm": 1.6733516454696655, "learning_rate": 0.0001, "loss": 0.0184, "step": 47480 }, { "epoch": 312.4342105263158, "grad_norm": 1.8071483373641968, "learning_rate": 0.0001, "loss": 0.0152, "step": 47490 }, { "epoch": 312.5, "grad_norm": 1.6405636072158813, "learning_rate": 0.0001, "loss": 0.0178, "step": 47500 }, { "epoch": 312.5657894736842, "grad_norm": 1.1614441871643066, "learning_rate": 0.0001, "loss": 0.0185, "step": 47510 }, { "epoch": 312.63157894736844, "grad_norm": 1.6518096923828125, "learning_rate": 0.0001, "loss": 0.0238, "step": 47520 }, { "epoch": 312.69736842105266, "grad_norm": 1.8148397207260132, "learning_rate": 0.0001, "loss": 0.0173, "step": 47530 }, { "epoch": 312.7631578947368, "grad_norm": 1.6083933115005493, "learning_rate": 0.0001, "loss": 0.0219, "step": 47540 }, { "epoch": 312.82894736842104, "grad_norm": 1.463565468788147, "learning_rate": 0.0001, "loss": 0.0145, "step": 47550 }, { "epoch": 312.89473684210526, "grad_norm": 1.495111346244812, "learning_rate": 0.0001, "loss": 0.0159, "step": 47560 }, { "epoch": 312.9605263157895, "grad_norm": 1.51217520236969, "learning_rate": 0.0001, "loss": 0.019, "step": 47570 }, { "epoch": 313.0263157894737, "grad_norm": 1.3511768579483032, "learning_rate": 0.0001, "loss": 0.0161, "step": 47580 }, { "epoch": 313.0921052631579, "grad_norm": 1.7560585737228394, "learning_rate": 0.0001, "loss": 0.0167, "step": 47590 }, { "epoch": 313.1578947368421, "grad_norm": 2.0053718090057373, "learning_rate": 0.0001, "loss": 0.017, "step": 47600 }, { "epoch": 313.2236842105263, "grad_norm": 1.5399121046066284, "learning_rate": 0.0001, "loss": 0.0153, "step": 47610 }, { "epoch": 313.2894736842105, "grad_norm": 1.5151206254959106, "learning_rate": 0.0001, "loss": 0.0178, "step": 47620 }, { "epoch": 313.35526315789474, "grad_norm": 1.1411833763122559, "learning_rate": 0.0001, "loss": 0.0145, "step": 47630 }, { "epoch": 313.42105263157896, "grad_norm": 1.580096960067749, "learning_rate": 0.0001, "loss": 0.0245, "step": 47640 }, { "epoch": 313.4868421052632, "grad_norm": 1.390628695487976, "learning_rate": 0.0001, "loss": 0.0159, "step": 47650 }, { "epoch": 313.55263157894734, "grad_norm": 1.365640640258789, "learning_rate": 0.0001, "loss": 0.016, "step": 47660 }, { "epoch": 313.61842105263156, "grad_norm": 1.6940094232559204, "learning_rate": 0.0001, "loss": 0.0168, "step": 47670 }, { "epoch": 313.6842105263158, "grad_norm": 1.4630076885223389, "learning_rate": 0.0001, "loss": 0.0189, "step": 47680 }, { "epoch": 313.75, "grad_norm": 1.2837198972702026, "learning_rate": 0.0001, "loss": 0.0184, "step": 47690 }, { "epoch": 313.8157894736842, "grad_norm": 1.7907344102859497, "learning_rate": 0.0001, "loss": 0.0185, "step": 47700 }, { "epoch": 313.88157894736844, "grad_norm": 1.8062968254089355, "learning_rate": 0.0001, "loss": 0.0177, "step": 47710 }, { "epoch": 313.94736842105266, "grad_norm": 1.3758307695388794, "learning_rate": 0.0001, "loss": 0.0223, "step": 47720 }, { "epoch": 314.0131578947368, "grad_norm": 1.2518939971923828, "learning_rate": 0.0001, "loss": 0.0174, "step": 47730 }, { "epoch": 314.07894736842104, "grad_norm": 1.4460244178771973, "learning_rate": 0.0001, "loss": 0.0167, "step": 47740 }, { "epoch": 314.14473684210526, "grad_norm": 1.2487008571624756, "learning_rate": 0.0001, "loss": 0.0212, "step": 47750 }, { "epoch": 314.2105263157895, "grad_norm": 1.369040846824646, "learning_rate": 0.0001, "loss": 0.0213, "step": 47760 }, { "epoch": 314.2763157894737, "grad_norm": 1.0697288513183594, "learning_rate": 0.0001, "loss": 0.0157, "step": 47770 }, { "epoch": 314.3421052631579, "grad_norm": 1.3890222311019897, "learning_rate": 0.0001, "loss": 0.0177, "step": 47780 }, { "epoch": 314.4078947368421, "grad_norm": 1.600687861442566, "learning_rate": 0.0001, "loss": 0.0239, "step": 47790 }, { "epoch": 314.4736842105263, "grad_norm": 1.7080296277999878, "learning_rate": 0.0001, "loss": 0.0195, "step": 47800 }, { "epoch": 314.5394736842105, "grad_norm": 1.7477803230285645, "learning_rate": 0.0001, "loss": 0.0194, "step": 47810 }, { "epoch": 314.60526315789474, "grad_norm": 1.875421404838562, "learning_rate": 0.0001, "loss": 0.0183, "step": 47820 }, { "epoch": 314.67105263157896, "grad_norm": 1.961398959159851, "learning_rate": 0.0001, "loss": 0.0194, "step": 47830 }, { "epoch": 314.7368421052632, "grad_norm": 1.6751363277435303, "learning_rate": 0.0001, "loss": 0.0153, "step": 47840 }, { "epoch": 314.80263157894734, "grad_norm": 2.1440680027008057, "learning_rate": 0.0001, "loss": 0.0155, "step": 47850 }, { "epoch": 314.86842105263156, "grad_norm": 1.7369657754898071, "learning_rate": 0.0001, "loss": 0.0174, "step": 47860 }, { "epoch": 314.9342105263158, "grad_norm": 1.5105646848678589, "learning_rate": 0.0001, "loss": 0.0157, "step": 47870 }, { "epoch": 315.0, "grad_norm": 1.651357889175415, "learning_rate": 0.0001, "loss": 0.0169, "step": 47880 }, { "epoch": 315.0657894736842, "grad_norm": 1.3023316860198975, "learning_rate": 0.0001, "loss": 0.0189, "step": 47890 }, { "epoch": 315.13157894736844, "grad_norm": 1.3813222646713257, "learning_rate": 0.0001, "loss": 0.0168, "step": 47900 }, { "epoch": 315.19736842105266, "grad_norm": 1.9526652097702026, "learning_rate": 0.0001, "loss": 0.0192, "step": 47910 }, { "epoch": 315.2631578947368, "grad_norm": 1.5487395524978638, "learning_rate": 0.0001, "loss": 0.0162, "step": 47920 }, { "epoch": 315.32894736842104, "grad_norm": 1.6921972036361694, "learning_rate": 0.0001, "loss": 0.0175, "step": 47930 }, { "epoch": 315.39473684210526, "grad_norm": 1.7141655683517456, "learning_rate": 0.0001, "loss": 0.0156, "step": 47940 }, { "epoch": 315.4605263157895, "grad_norm": 1.3464572429656982, "learning_rate": 0.0001, "loss": 0.0158, "step": 47950 }, { "epoch": 315.5263157894737, "grad_norm": 1.668638825416565, "learning_rate": 0.0001, "loss": 0.0144, "step": 47960 }, { "epoch": 315.5921052631579, "grad_norm": 2.001143217086792, "learning_rate": 0.0001, "loss": 0.0186, "step": 47970 }, { "epoch": 315.6578947368421, "grad_norm": 1.4042088985443115, "learning_rate": 0.0001, "loss": 0.0191, "step": 47980 }, { "epoch": 315.7236842105263, "grad_norm": 1.7563093900680542, "learning_rate": 0.0001, "loss": 0.0205, "step": 47990 }, { "epoch": 315.7894736842105, "grad_norm": 2.059664249420166, "learning_rate": 0.0001, "loss": 0.0184, "step": 48000 }, { "epoch": 315.85526315789474, "grad_norm": 1.6638269424438477, "learning_rate": 0.0001, "loss": 0.0193, "step": 48010 }, { "epoch": 315.92105263157896, "grad_norm": 1.4503939151763916, "learning_rate": 0.0001, "loss": 0.0157, "step": 48020 }, { "epoch": 315.9868421052632, "grad_norm": 1.4845973253250122, "learning_rate": 0.0001, "loss": 0.0182, "step": 48030 }, { "epoch": 316.05263157894734, "grad_norm": 1.565382719039917, "learning_rate": 0.0001, "loss": 0.017, "step": 48040 }, { "epoch": 316.11842105263156, "grad_norm": 1.381007194519043, "learning_rate": 0.0001, "loss": 0.0164, "step": 48050 }, { "epoch": 316.1842105263158, "grad_norm": 1.9344784021377563, "learning_rate": 0.0001, "loss": 0.0212, "step": 48060 }, { "epoch": 316.25, "grad_norm": 1.4835454225540161, "learning_rate": 0.0001, "loss": 0.0144, "step": 48070 }, { "epoch": 316.3157894736842, "grad_norm": 1.4066892862319946, "learning_rate": 0.0001, "loss": 0.0145, "step": 48080 }, { "epoch": 316.38157894736844, "grad_norm": 1.839747667312622, "learning_rate": 0.0001, "loss": 0.0161, "step": 48090 }, { "epoch": 316.44736842105266, "grad_norm": 1.7092664241790771, "learning_rate": 0.0001, "loss": 0.0191, "step": 48100 }, { "epoch": 316.5131578947368, "grad_norm": 1.771615982055664, "learning_rate": 0.0001, "loss": 0.0203, "step": 48110 }, { "epoch": 316.57894736842104, "grad_norm": 1.5654197931289673, "learning_rate": 0.0001, "loss": 0.0168, "step": 48120 }, { "epoch": 316.64473684210526, "grad_norm": 1.7810639142990112, "learning_rate": 0.0001, "loss": 0.0176, "step": 48130 }, { "epoch": 316.7105263157895, "grad_norm": 1.6459170579910278, "learning_rate": 0.0001, "loss": 0.0154, "step": 48140 }, { "epoch": 316.7763157894737, "grad_norm": 1.5702918767929077, "learning_rate": 0.0001, "loss": 0.0182, "step": 48150 }, { "epoch": 316.8421052631579, "grad_norm": 1.2241885662078857, "learning_rate": 0.0001, "loss": 0.0179, "step": 48160 }, { "epoch": 316.9078947368421, "grad_norm": 1.945556879043579, "learning_rate": 0.0001, "loss": 0.0192, "step": 48170 }, { "epoch": 316.9736842105263, "grad_norm": 1.7209275960922241, "learning_rate": 0.0001, "loss": 0.0205, "step": 48180 }, { "epoch": 317.0394736842105, "grad_norm": 1.578973650932312, "learning_rate": 0.0001, "loss": 0.0202, "step": 48190 }, { "epoch": 317.10526315789474, "grad_norm": 1.2033087015151978, "learning_rate": 0.0001, "loss": 0.0217, "step": 48200 }, { "epoch": 317.17105263157896, "grad_norm": 1.9005202054977417, "learning_rate": 0.0001, "loss": 0.0203, "step": 48210 }, { "epoch": 317.2368421052632, "grad_norm": 1.4481886625289917, "learning_rate": 0.0001, "loss": 0.0154, "step": 48220 }, { "epoch": 317.30263157894734, "grad_norm": 1.6201293468475342, "learning_rate": 0.0001, "loss": 0.0202, "step": 48230 }, { "epoch": 317.36842105263156, "grad_norm": 1.1915428638458252, "learning_rate": 0.0001, "loss": 0.0189, "step": 48240 }, { "epoch": 317.4342105263158, "grad_norm": 1.3044307231903076, "learning_rate": 0.0001, "loss": 0.0145, "step": 48250 }, { "epoch": 317.5, "grad_norm": 1.959741473197937, "learning_rate": 0.0001, "loss": 0.0153, "step": 48260 }, { "epoch": 317.5657894736842, "grad_norm": 1.3962808847427368, "learning_rate": 0.0001, "loss": 0.0207, "step": 48270 }, { "epoch": 317.63157894736844, "grad_norm": 1.8543192148208618, "learning_rate": 0.0001, "loss": 0.0182, "step": 48280 }, { "epoch": 317.69736842105266, "grad_norm": 2.1572604179382324, "learning_rate": 0.0001, "loss": 0.0161, "step": 48290 }, { "epoch": 317.7631578947368, "grad_norm": 1.6483392715454102, "learning_rate": 0.0001, "loss": 0.0178, "step": 48300 }, { "epoch": 317.82894736842104, "grad_norm": 1.66492760181427, "learning_rate": 0.0001, "loss": 0.0168, "step": 48310 }, { "epoch": 317.89473684210526, "grad_norm": 1.7164217233657837, "learning_rate": 0.0001, "loss": 0.0153, "step": 48320 }, { "epoch": 317.9605263157895, "grad_norm": 1.0019901990890503, "learning_rate": 0.0001, "loss": 0.0166, "step": 48330 }, { "epoch": 318.0263157894737, "grad_norm": 1.4462822675704956, "learning_rate": 0.0001, "loss": 0.0173, "step": 48340 }, { "epoch": 318.0921052631579, "grad_norm": 1.3309669494628906, "learning_rate": 0.0001, "loss": 0.0186, "step": 48350 }, { "epoch": 318.1578947368421, "grad_norm": 1.4818209409713745, "learning_rate": 0.0001, "loss": 0.0173, "step": 48360 }, { "epoch": 318.2236842105263, "grad_norm": 1.4894031286239624, "learning_rate": 0.0001, "loss": 0.0163, "step": 48370 }, { "epoch": 318.2894736842105, "grad_norm": 1.6278996467590332, "learning_rate": 0.0001, "loss": 0.0152, "step": 48380 }, { "epoch": 318.35526315789474, "grad_norm": 1.426228642463684, "learning_rate": 0.0001, "loss": 0.0173, "step": 48390 }, { "epoch": 318.42105263157896, "grad_norm": 1.5620708465576172, "learning_rate": 0.0001, "loss": 0.0209, "step": 48400 }, { "epoch": 318.4868421052632, "grad_norm": 1.7392812967300415, "learning_rate": 0.0001, "loss": 0.0207, "step": 48410 }, { "epoch": 318.55263157894734, "grad_norm": 1.8093184232711792, "learning_rate": 0.0001, "loss": 0.0246, "step": 48420 }, { "epoch": 318.61842105263156, "grad_norm": 1.7236640453338623, "learning_rate": 0.0001, "loss": 0.0156, "step": 48430 }, { "epoch": 318.6842105263158, "grad_norm": 1.7047078609466553, "learning_rate": 0.0001, "loss": 0.0166, "step": 48440 }, { "epoch": 318.75, "grad_norm": 1.678489089012146, "learning_rate": 0.0001, "loss": 0.0196, "step": 48450 }, { "epoch": 318.8157894736842, "grad_norm": 1.5369623899459839, "learning_rate": 0.0001, "loss": 0.0155, "step": 48460 }, { "epoch": 318.88157894736844, "grad_norm": 1.6265389919281006, "learning_rate": 0.0001, "loss": 0.0184, "step": 48470 }, { "epoch": 318.94736842105266, "grad_norm": 1.7916585206985474, "learning_rate": 0.0001, "loss": 0.0158, "step": 48480 }, { "epoch": 319.0131578947368, "grad_norm": 1.6092939376831055, "learning_rate": 0.0001, "loss": 0.0161, "step": 48490 }, { "epoch": 319.07894736842104, "grad_norm": 1.4534757137298584, "learning_rate": 0.0001, "loss": 0.0175, "step": 48500 }, { "epoch": 319.14473684210526, "grad_norm": 1.3481777906417847, "learning_rate": 0.0001, "loss": 0.0187, "step": 48510 }, { "epoch": 319.2105263157895, "grad_norm": 2.0076191425323486, "learning_rate": 0.0001, "loss": 0.0171, "step": 48520 }, { "epoch": 319.2763157894737, "grad_norm": 1.6637908220291138, "learning_rate": 0.0001, "loss": 0.0214, "step": 48530 }, { "epoch": 319.3421052631579, "grad_norm": 1.6308497190475464, "learning_rate": 0.0001, "loss": 0.017, "step": 48540 }, { "epoch": 319.4078947368421, "grad_norm": 1.7615281343460083, "learning_rate": 0.0001, "loss": 0.0154, "step": 48550 }, { "epoch": 319.4736842105263, "grad_norm": 0.9261285066604614, "learning_rate": 0.0001, "loss": 0.0157, "step": 48560 }, { "epoch": 319.5394736842105, "grad_norm": 1.476094365119934, "learning_rate": 0.0001, "loss": 0.0189, "step": 48570 }, { "epoch": 319.60526315789474, "grad_norm": 1.3211424350738525, "learning_rate": 0.0001, "loss": 0.0177, "step": 48580 }, { "epoch": 319.67105263157896, "grad_norm": 1.2656217813491821, "learning_rate": 0.0001, "loss": 0.0202, "step": 48590 }, { "epoch": 319.7368421052632, "grad_norm": 1.4323935508728027, "learning_rate": 0.0001, "loss": 0.0152, "step": 48600 }, { "epoch": 319.80263157894734, "grad_norm": 1.9711366891860962, "learning_rate": 0.0001, "loss": 0.0156, "step": 48610 }, { "epoch": 319.86842105263156, "grad_norm": 1.4336167573928833, "learning_rate": 0.0001, "loss": 0.0167, "step": 48620 }, { "epoch": 319.9342105263158, "grad_norm": 1.425201177597046, "learning_rate": 0.0001, "loss": 0.0173, "step": 48630 }, { "epoch": 320.0, "grad_norm": 1.6155577898025513, "learning_rate": 0.0001, "loss": 0.0165, "step": 48640 }, { "epoch": 320.0657894736842, "grad_norm": 1.7498080730438232, "learning_rate": 0.0001, "loss": 0.0153, "step": 48650 }, { "epoch": 320.13157894736844, "grad_norm": 2.3127691745758057, "learning_rate": 0.0001, "loss": 0.0203, "step": 48660 }, { "epoch": 320.19736842105266, "grad_norm": 1.4791053533554077, "learning_rate": 0.0001, "loss": 0.0191, "step": 48670 }, { "epoch": 320.2631578947368, "grad_norm": 1.5200303792953491, "learning_rate": 0.0001, "loss": 0.0171, "step": 48680 }, { "epoch": 320.32894736842104, "grad_norm": 1.225582242012024, "learning_rate": 0.0001, "loss": 0.0187, "step": 48690 }, { "epoch": 320.39473684210526, "grad_norm": 1.4129056930541992, "learning_rate": 0.0001, "loss": 0.0236, "step": 48700 }, { "epoch": 320.4605263157895, "grad_norm": 1.4802333116531372, "learning_rate": 0.0001, "loss": 0.0143, "step": 48710 }, { "epoch": 320.5263157894737, "grad_norm": 1.772744059562683, "learning_rate": 0.0001, "loss": 0.0175, "step": 48720 }, { "epoch": 320.5921052631579, "grad_norm": 1.6795697212219238, "learning_rate": 0.0001, "loss": 0.0198, "step": 48730 }, { "epoch": 320.6578947368421, "grad_norm": 1.2418519258499146, "learning_rate": 0.0001, "loss": 0.0172, "step": 48740 }, { "epoch": 320.7236842105263, "grad_norm": 1.7296746969223022, "learning_rate": 0.0001, "loss": 0.0168, "step": 48750 }, { "epoch": 320.7894736842105, "grad_norm": 1.5501439571380615, "learning_rate": 0.0001, "loss": 0.0164, "step": 48760 }, { "epoch": 320.85526315789474, "grad_norm": 1.7785251140594482, "learning_rate": 0.0001, "loss": 0.0154, "step": 48770 }, { "epoch": 320.92105263157896, "grad_norm": 1.583870530128479, "learning_rate": 0.0001, "loss": 0.0181, "step": 48780 }, { "epoch": 320.9868421052632, "grad_norm": 1.6623904705047607, "learning_rate": 0.0001, "loss": 0.0181, "step": 48790 }, { "epoch": 321.05263157894734, "grad_norm": 2.007920265197754, "learning_rate": 0.0001, "loss": 0.0186, "step": 48800 }, { "epoch": 321.11842105263156, "grad_norm": 1.4419032335281372, "learning_rate": 0.0001, "loss": 0.0203, "step": 48810 }, { "epoch": 321.1842105263158, "grad_norm": 1.7184045314788818, "learning_rate": 0.0001, "loss": 0.0144, "step": 48820 }, { "epoch": 321.25, "grad_norm": 1.3130673170089722, "learning_rate": 0.0001, "loss": 0.0157, "step": 48830 }, { "epoch": 321.3157894736842, "grad_norm": 1.3326605558395386, "learning_rate": 0.0001, "loss": 0.0212, "step": 48840 }, { "epoch": 321.38157894736844, "grad_norm": 1.63133704662323, "learning_rate": 0.0001, "loss": 0.0187, "step": 48850 }, { "epoch": 321.44736842105266, "grad_norm": 1.9965230226516724, "learning_rate": 0.0001, "loss": 0.0174, "step": 48860 }, { "epoch": 321.5131578947368, "grad_norm": 1.4701114892959595, "learning_rate": 0.0001, "loss": 0.0191, "step": 48870 }, { "epoch": 321.57894736842104, "grad_norm": 1.658754587173462, "learning_rate": 0.0001, "loss": 0.0175, "step": 48880 }, { "epoch": 321.64473684210526, "grad_norm": 1.4717308282852173, "learning_rate": 0.0001, "loss": 0.0167, "step": 48890 }, { "epoch": 321.7105263157895, "grad_norm": 1.7615923881530762, "learning_rate": 0.0001, "loss": 0.0159, "step": 48900 }, { "epoch": 321.7763157894737, "grad_norm": 1.7523994445800781, "learning_rate": 0.0001, "loss": 0.0138, "step": 48910 }, { "epoch": 321.8421052631579, "grad_norm": 1.5202969312667847, "learning_rate": 0.0001, "loss": 0.0207, "step": 48920 }, { "epoch": 321.9078947368421, "grad_norm": 1.8567513227462769, "learning_rate": 0.0001, "loss": 0.0172, "step": 48930 }, { "epoch": 321.9736842105263, "grad_norm": 1.1869208812713623, "learning_rate": 0.0001, "loss": 0.0179, "step": 48940 }, { "epoch": 322.0394736842105, "grad_norm": 1.4417260885238647, "learning_rate": 0.0001, "loss": 0.019, "step": 48950 }, { "epoch": 322.10526315789474, "grad_norm": 1.325147271156311, "learning_rate": 0.0001, "loss": 0.0182, "step": 48960 }, { "epoch": 322.17105263157896, "grad_norm": 1.3198597431182861, "learning_rate": 0.0001, "loss": 0.017, "step": 48970 }, { "epoch": 322.2368421052632, "grad_norm": 1.5524911880493164, "learning_rate": 0.0001, "loss": 0.0175, "step": 48980 }, { "epoch": 322.30263157894734, "grad_norm": 1.1822201013565063, "learning_rate": 0.0001, "loss": 0.0178, "step": 48990 }, { "epoch": 322.36842105263156, "grad_norm": 1.511824369430542, "learning_rate": 0.0001, "loss": 0.0156, "step": 49000 }, { "epoch": 322.4342105263158, "grad_norm": 1.4333122968673706, "learning_rate": 0.0001, "loss": 0.0174, "step": 49010 }, { "epoch": 322.5, "grad_norm": 1.9054112434387207, "learning_rate": 0.0001, "loss": 0.0145, "step": 49020 }, { "epoch": 322.5657894736842, "grad_norm": 1.5622810125350952, "learning_rate": 0.0001, "loss": 0.0139, "step": 49030 }, { "epoch": 322.63157894736844, "grad_norm": 1.9579776525497437, "learning_rate": 0.0001, "loss": 0.0203, "step": 49040 }, { "epoch": 322.69736842105266, "grad_norm": 1.7661747932434082, "learning_rate": 0.0001, "loss": 0.0172, "step": 49050 }, { "epoch": 322.7631578947368, "grad_norm": 1.8376582860946655, "learning_rate": 0.0001, "loss": 0.0178, "step": 49060 }, { "epoch": 322.82894736842104, "grad_norm": 1.6864091157913208, "learning_rate": 0.0001, "loss": 0.0215, "step": 49070 }, { "epoch": 322.89473684210526, "grad_norm": 1.6482384204864502, "learning_rate": 0.0001, "loss": 0.019, "step": 49080 }, { "epoch": 322.9605263157895, "grad_norm": 1.803493618965149, "learning_rate": 0.0001, "loss": 0.0187, "step": 49090 }, { "epoch": 323.0263157894737, "grad_norm": 1.8552426099777222, "learning_rate": 0.0001, "loss": 0.0159, "step": 49100 }, { "epoch": 323.0921052631579, "grad_norm": 1.4646451473236084, "learning_rate": 0.0001, "loss": 0.0181, "step": 49110 }, { "epoch": 323.1578947368421, "grad_norm": 1.3899227380752563, "learning_rate": 0.0001, "loss": 0.0167, "step": 49120 }, { "epoch": 323.2236842105263, "grad_norm": 1.5662225484848022, "learning_rate": 0.0001, "loss": 0.0172, "step": 49130 }, { "epoch": 323.2894736842105, "grad_norm": 1.6541731357574463, "learning_rate": 0.0001, "loss": 0.0177, "step": 49140 }, { "epoch": 323.35526315789474, "grad_norm": 1.616887092590332, "learning_rate": 0.0001, "loss": 0.0206, "step": 49150 }, { "epoch": 323.42105263157896, "grad_norm": 2.0656375885009766, "learning_rate": 0.0001, "loss": 0.0199, "step": 49160 }, { "epoch": 323.4868421052632, "grad_norm": 1.6425933837890625, "learning_rate": 0.0001, "loss": 0.0185, "step": 49170 }, { "epoch": 323.55263157894734, "grad_norm": 1.6146291494369507, "learning_rate": 0.0001, "loss": 0.0156, "step": 49180 }, { "epoch": 323.61842105263156, "grad_norm": 1.837920069694519, "learning_rate": 0.0001, "loss": 0.0173, "step": 49190 }, { "epoch": 323.6842105263158, "grad_norm": 2.2478556632995605, "learning_rate": 0.0001, "loss": 0.0133, "step": 49200 }, { "epoch": 323.75, "grad_norm": 1.764725685119629, "learning_rate": 0.0001, "loss": 0.0159, "step": 49210 }, { "epoch": 323.8157894736842, "grad_norm": 1.6534205675125122, "learning_rate": 0.0001, "loss": 0.0214, "step": 49220 }, { "epoch": 323.88157894736844, "grad_norm": 1.8020668029785156, "learning_rate": 0.0001, "loss": 0.0151, "step": 49230 }, { "epoch": 323.94736842105266, "grad_norm": 1.7826063632965088, "learning_rate": 0.0001, "loss": 0.0181, "step": 49240 }, { "epoch": 324.0131578947368, "grad_norm": 1.5395160913467407, "learning_rate": 0.0001, "loss": 0.0167, "step": 49250 }, { "epoch": 324.07894736842104, "grad_norm": 1.9046974182128906, "learning_rate": 0.0001, "loss": 0.0188, "step": 49260 }, { "epoch": 324.14473684210526, "grad_norm": 1.0891339778900146, "learning_rate": 0.0001, "loss": 0.0186, "step": 49270 }, { "epoch": 324.2105263157895, "grad_norm": 1.4977309703826904, "learning_rate": 0.0001, "loss": 0.014, "step": 49280 }, { "epoch": 324.2763157894737, "grad_norm": 1.2762622833251953, "learning_rate": 0.0001, "loss": 0.0199, "step": 49290 }, { "epoch": 324.3421052631579, "grad_norm": 1.3813307285308838, "learning_rate": 0.0001, "loss": 0.0168, "step": 49300 }, { "epoch": 324.4078947368421, "grad_norm": 1.2041089534759521, "learning_rate": 0.0001, "loss": 0.0177, "step": 49310 }, { "epoch": 324.4736842105263, "grad_norm": 1.3826967477798462, "learning_rate": 0.0001, "loss": 0.0155, "step": 49320 }, { "epoch": 324.5394736842105, "grad_norm": 1.373543620109558, "learning_rate": 0.0001, "loss": 0.0179, "step": 49330 }, { "epoch": 324.60526315789474, "grad_norm": 1.2343955039978027, "learning_rate": 0.0001, "loss": 0.0196, "step": 49340 }, { "epoch": 324.67105263157896, "grad_norm": 1.6421723365783691, "learning_rate": 0.0001, "loss": 0.016, "step": 49350 }, { "epoch": 324.7368421052632, "grad_norm": 1.5644961595535278, "learning_rate": 0.0001, "loss": 0.0169, "step": 49360 }, { "epoch": 324.80263157894734, "grad_norm": 1.3648821115493774, "learning_rate": 0.0001, "loss": 0.0204, "step": 49370 }, { "epoch": 324.86842105263156, "grad_norm": 1.2508383989334106, "learning_rate": 0.0001, "loss": 0.0215, "step": 49380 }, { "epoch": 324.9342105263158, "grad_norm": 1.5548263788223267, "learning_rate": 0.0001, "loss": 0.0151, "step": 49390 }, { "epoch": 325.0, "grad_norm": 1.6710079908370972, "learning_rate": 0.0001, "loss": 0.017, "step": 49400 }, { "epoch": 325.0657894736842, "grad_norm": 1.2708319425582886, "learning_rate": 0.0001, "loss": 0.0199, "step": 49410 }, { "epoch": 325.13157894736844, "grad_norm": 1.57786226272583, "learning_rate": 0.0001, "loss": 0.0194, "step": 49420 }, { "epoch": 325.19736842105266, "grad_norm": 1.7470730543136597, "learning_rate": 0.0001, "loss": 0.0155, "step": 49430 }, { "epoch": 325.2631578947368, "grad_norm": 1.2231848239898682, "learning_rate": 0.0001, "loss": 0.0185, "step": 49440 }, { "epoch": 325.32894736842104, "grad_norm": 1.3433468341827393, "learning_rate": 0.0001, "loss": 0.0181, "step": 49450 }, { "epoch": 325.39473684210526, "grad_norm": 1.3862559795379639, "learning_rate": 0.0001, "loss": 0.0163, "step": 49460 }, { "epoch": 325.4605263157895, "grad_norm": 1.5070297718048096, "learning_rate": 0.0001, "loss": 0.0188, "step": 49470 }, { "epoch": 325.5263157894737, "grad_norm": 1.4452400207519531, "learning_rate": 0.0001, "loss": 0.0176, "step": 49480 }, { "epoch": 325.5921052631579, "grad_norm": 1.7987565994262695, "learning_rate": 0.0001, "loss": 0.0155, "step": 49490 }, { "epoch": 325.6578947368421, "grad_norm": 1.7784428596496582, "learning_rate": 0.0001, "loss": 0.014, "step": 49500 }, { "epoch": 325.7236842105263, "grad_norm": 1.7121187448501587, "learning_rate": 0.0001, "loss": 0.0188, "step": 49510 }, { "epoch": 325.7894736842105, "grad_norm": 1.708999752998352, "learning_rate": 0.0001, "loss": 0.0161, "step": 49520 }, { "epoch": 325.85526315789474, "grad_norm": 1.8820278644561768, "learning_rate": 0.0001, "loss": 0.0168, "step": 49530 }, { "epoch": 325.92105263157896, "grad_norm": 1.6973618268966675, "learning_rate": 0.0001, "loss": 0.0177, "step": 49540 }, { "epoch": 325.9868421052632, "grad_norm": 1.3779315948486328, "learning_rate": 0.0001, "loss": 0.0208, "step": 49550 }, { "epoch": 326.05263157894734, "grad_norm": 1.1803700923919678, "learning_rate": 0.0001, "loss": 0.0167, "step": 49560 }, { "epoch": 326.11842105263156, "grad_norm": 1.5487933158874512, "learning_rate": 0.0001, "loss": 0.0182, "step": 49570 }, { "epoch": 326.1842105263158, "grad_norm": 1.0748469829559326, "learning_rate": 0.0001, "loss": 0.0185, "step": 49580 }, { "epoch": 326.25, "grad_norm": 1.2242127656936646, "learning_rate": 0.0001, "loss": 0.0192, "step": 49590 }, { "epoch": 326.3157894736842, "grad_norm": 1.767220377922058, "learning_rate": 0.0001, "loss": 0.014, "step": 49600 }, { "epoch": 326.38157894736844, "grad_norm": 1.457562804222107, "learning_rate": 0.0001, "loss": 0.0182, "step": 49610 }, { "epoch": 326.44736842105266, "grad_norm": 1.473036527633667, "learning_rate": 0.0001, "loss": 0.0184, "step": 49620 }, { "epoch": 326.5131578947368, "grad_norm": 1.3551653623580933, "learning_rate": 0.0001, "loss": 0.0194, "step": 49630 }, { "epoch": 326.57894736842104, "grad_norm": 1.4046863317489624, "learning_rate": 0.0001, "loss": 0.0194, "step": 49640 }, { "epoch": 326.64473684210526, "grad_norm": 1.1363285779953003, "learning_rate": 0.0001, "loss": 0.0193, "step": 49650 }, { "epoch": 326.7105263157895, "grad_norm": 1.347383737564087, "learning_rate": 0.0001, "loss": 0.0182, "step": 49660 }, { "epoch": 326.7763157894737, "grad_norm": 1.333208680152893, "learning_rate": 0.0001, "loss": 0.0149, "step": 49670 }, { "epoch": 326.8421052631579, "grad_norm": 1.6053721904754639, "learning_rate": 0.0001, "loss": 0.0156, "step": 49680 }, { "epoch": 326.9078947368421, "grad_norm": 1.3222782611846924, "learning_rate": 0.0001, "loss": 0.015, "step": 49690 }, { "epoch": 326.9736842105263, "grad_norm": 1.866740345954895, "learning_rate": 0.0001, "loss": 0.0184, "step": 49700 }, { "epoch": 327.0394736842105, "grad_norm": 2.228929281234741, "learning_rate": 0.0001, "loss": 0.0224, "step": 49710 }, { "epoch": 327.10526315789474, "grad_norm": 1.4728108644485474, "learning_rate": 0.0001, "loss": 0.0189, "step": 49720 }, { "epoch": 327.17105263157896, "grad_norm": 1.4685639142990112, "learning_rate": 0.0001, "loss": 0.0183, "step": 49730 }, { "epoch": 327.2368421052632, "grad_norm": 1.181406855583191, "learning_rate": 0.0001, "loss": 0.0165, "step": 49740 }, { "epoch": 327.30263157894734, "grad_norm": 1.33230721950531, "learning_rate": 0.0001, "loss": 0.0187, "step": 49750 }, { "epoch": 327.36842105263156, "grad_norm": 1.1247506141662598, "learning_rate": 0.0001, "loss": 0.0169, "step": 49760 }, { "epoch": 327.4342105263158, "grad_norm": 1.2707046270370483, "learning_rate": 0.0001, "loss": 0.0165, "step": 49770 }, { "epoch": 327.5, "grad_norm": 1.761496901512146, "learning_rate": 0.0001, "loss": 0.0169, "step": 49780 }, { "epoch": 327.5657894736842, "grad_norm": 2.2601282596588135, "learning_rate": 0.0001, "loss": 0.0143, "step": 49790 }, { "epoch": 327.63157894736844, "grad_norm": 1.9710009098052979, "learning_rate": 0.0001, "loss": 0.0183, "step": 49800 }, { "epoch": 327.69736842105266, "grad_norm": 2.1033053398132324, "learning_rate": 0.0001, "loss": 0.0175, "step": 49810 }, { "epoch": 327.7631578947368, "grad_norm": 2.205354928970337, "learning_rate": 0.0001, "loss": 0.0173, "step": 49820 }, { "epoch": 327.82894736842104, "grad_norm": 1.7871246337890625, "learning_rate": 0.0001, "loss": 0.0157, "step": 49830 }, { "epoch": 327.89473684210526, "grad_norm": 1.7474578619003296, "learning_rate": 0.0001, "loss": 0.0217, "step": 49840 }, { "epoch": 327.9605263157895, "grad_norm": 1.4927176237106323, "learning_rate": 0.0001, "loss": 0.0184, "step": 49850 }, { "epoch": 328.0263157894737, "grad_norm": 1.1468238830566406, "learning_rate": 0.0001, "loss": 0.0148, "step": 49860 }, { "epoch": 328.0921052631579, "grad_norm": 1.5920541286468506, "learning_rate": 0.0001, "loss": 0.0171, "step": 49870 }, { "epoch": 328.1578947368421, "grad_norm": 1.6532256603240967, "learning_rate": 0.0001, "loss": 0.0211, "step": 49880 }, { "epoch": 328.2236842105263, "grad_norm": 1.1406004428863525, "learning_rate": 0.0001, "loss": 0.019, "step": 49890 }, { "epoch": 328.2894736842105, "grad_norm": 1.5382630825042725, "learning_rate": 0.0001, "loss": 0.023, "step": 49900 }, { "epoch": 328.35526315789474, "grad_norm": 1.7311320304870605, "learning_rate": 0.0001, "loss": 0.017, "step": 49910 }, { "epoch": 328.42105263157896, "grad_norm": 1.5328649282455444, "learning_rate": 0.0001, "loss": 0.017, "step": 49920 }, { "epoch": 328.4868421052632, "grad_norm": 1.3142627477645874, "learning_rate": 0.0001, "loss": 0.0191, "step": 49930 }, { "epoch": 328.55263157894734, "grad_norm": 1.5801326036453247, "learning_rate": 0.0001, "loss": 0.0162, "step": 49940 }, { "epoch": 328.61842105263156, "grad_norm": 1.879833459854126, "learning_rate": 0.0001, "loss": 0.0188, "step": 49950 }, { "epoch": 328.6842105263158, "grad_norm": 1.3264657258987427, "learning_rate": 0.0001, "loss": 0.0156, "step": 49960 }, { "epoch": 328.75, "grad_norm": 1.1201131343841553, "learning_rate": 0.0001, "loss": 0.0153, "step": 49970 }, { "epoch": 328.8157894736842, "grad_norm": 1.578759789466858, "learning_rate": 0.0001, "loss": 0.0157, "step": 49980 }, { "epoch": 328.88157894736844, "grad_norm": 1.6154576539993286, "learning_rate": 0.0001, "loss": 0.0162, "step": 49990 }, { "epoch": 328.94736842105266, "grad_norm": 1.2068408727645874, "learning_rate": 0.0001, "loss": 0.0177, "step": 50000 }, { "epoch": 329.0131578947368, "grad_norm": 1.32369863986969, "learning_rate": 0.0001, "loss": 0.016, "step": 50010 }, { "epoch": 329.07894736842104, "grad_norm": 1.9599692821502686, "learning_rate": 0.0001, "loss": 0.0177, "step": 50020 }, { "epoch": 329.14473684210526, "grad_norm": 1.6888089179992676, "learning_rate": 0.0001, "loss": 0.0205, "step": 50030 }, { "epoch": 329.2105263157895, "grad_norm": 2.0224502086639404, "learning_rate": 0.0001, "loss": 0.016, "step": 50040 }, { "epoch": 329.2763157894737, "grad_norm": 1.980678915977478, "learning_rate": 0.0001, "loss": 0.0148, "step": 50050 }, { "epoch": 329.3421052631579, "grad_norm": 1.4726537466049194, "learning_rate": 0.0001, "loss": 0.0194, "step": 50060 }, { "epoch": 329.4078947368421, "grad_norm": 1.5812183618545532, "learning_rate": 0.0001, "loss": 0.017, "step": 50070 }, { "epoch": 329.4736842105263, "grad_norm": 1.3412189483642578, "learning_rate": 0.0001, "loss": 0.0185, "step": 50080 }, { "epoch": 329.5394736842105, "grad_norm": 1.7207973003387451, "learning_rate": 0.0001, "loss": 0.0162, "step": 50090 }, { "epoch": 329.60526315789474, "grad_norm": 1.673624873161316, "learning_rate": 0.0001, "loss": 0.0203, "step": 50100 }, { "epoch": 329.67105263157896, "grad_norm": 1.1052238941192627, "learning_rate": 0.0001, "loss": 0.0158, "step": 50110 }, { "epoch": 329.7368421052632, "grad_norm": 1.8208651542663574, "learning_rate": 0.0001, "loss": 0.0163, "step": 50120 }, { "epoch": 329.80263157894734, "grad_norm": 1.1571009159088135, "learning_rate": 0.0001, "loss": 0.0184, "step": 50130 }, { "epoch": 329.86842105263156, "grad_norm": 1.4418903589248657, "learning_rate": 0.0001, "loss": 0.0188, "step": 50140 }, { "epoch": 329.9342105263158, "grad_norm": 1.6973131895065308, "learning_rate": 0.0001, "loss": 0.0206, "step": 50150 }, { "epoch": 330.0, "grad_norm": 1.7565433979034424, "learning_rate": 0.0001, "loss": 0.0139, "step": 50160 }, { "epoch": 330.0657894736842, "grad_norm": 1.4966249465942383, "learning_rate": 0.0001, "loss": 0.0167, "step": 50170 }, { "epoch": 330.13157894736844, "grad_norm": 1.4898492097854614, "learning_rate": 0.0001, "loss": 0.0186, "step": 50180 }, { "epoch": 330.19736842105266, "grad_norm": 1.8785943984985352, "learning_rate": 0.0001, "loss": 0.0178, "step": 50190 }, { "epoch": 330.2631578947368, "grad_norm": 2.1176297664642334, "learning_rate": 0.0001, "loss": 0.0161, "step": 50200 }, { "epoch": 330.32894736842104, "grad_norm": 2.038410186767578, "learning_rate": 0.0001, "loss": 0.0189, "step": 50210 }, { "epoch": 330.39473684210526, "grad_norm": 1.7745918035507202, "learning_rate": 0.0001, "loss": 0.0139, "step": 50220 }, { "epoch": 330.4605263157895, "grad_norm": 1.2108184099197388, "learning_rate": 0.0001, "loss": 0.0145, "step": 50230 }, { "epoch": 330.5263157894737, "grad_norm": 1.5322394371032715, "learning_rate": 0.0001, "loss": 0.0198, "step": 50240 }, { "epoch": 330.5921052631579, "grad_norm": 1.4127014875411987, "learning_rate": 0.0001, "loss": 0.0223, "step": 50250 }, { "epoch": 330.6578947368421, "grad_norm": 2.205624580383301, "learning_rate": 0.0001, "loss": 0.0195, "step": 50260 }, { "epoch": 330.7236842105263, "grad_norm": 1.7481518983840942, "learning_rate": 0.0001, "loss": 0.017, "step": 50270 }, { "epoch": 330.7894736842105, "grad_norm": 1.6351635456085205, "learning_rate": 0.0001, "loss": 0.0192, "step": 50280 }, { "epoch": 330.85526315789474, "grad_norm": 1.2573740482330322, "learning_rate": 0.0001, "loss": 0.0172, "step": 50290 }, { "epoch": 330.92105263157896, "grad_norm": 1.5620614290237427, "learning_rate": 0.0001, "loss": 0.0136, "step": 50300 }, { "epoch": 330.9868421052632, "grad_norm": 1.5056235790252686, "learning_rate": 0.0001, "loss": 0.0135, "step": 50310 }, { "epoch": 331.05263157894734, "grad_norm": 1.0200046300888062, "learning_rate": 0.0001, "loss": 0.0182, "step": 50320 }, { "epoch": 331.11842105263156, "grad_norm": 1.54885995388031, "learning_rate": 0.0001, "loss": 0.0185, "step": 50330 }, { "epoch": 331.1842105263158, "grad_norm": 1.8004060983657837, "learning_rate": 0.0001, "loss": 0.0179, "step": 50340 }, { "epoch": 331.25, "grad_norm": 1.4381927251815796, "learning_rate": 0.0001, "loss": 0.0181, "step": 50350 }, { "epoch": 331.3157894736842, "grad_norm": 1.0231952667236328, "learning_rate": 0.0001, "loss": 0.0144, "step": 50360 }, { "epoch": 331.38157894736844, "grad_norm": 1.3526601791381836, "learning_rate": 0.0001, "loss": 0.0198, "step": 50370 }, { "epoch": 331.44736842105266, "grad_norm": 1.239790678024292, "learning_rate": 0.0001, "loss": 0.0177, "step": 50380 }, { "epoch": 331.5131578947368, "grad_norm": 1.6661639213562012, "learning_rate": 0.0001, "loss": 0.018, "step": 50390 }, { "epoch": 331.57894736842104, "grad_norm": 1.348581314086914, "learning_rate": 0.0001, "loss": 0.0166, "step": 50400 }, { "epoch": 331.64473684210526, "grad_norm": 1.3693301677703857, "learning_rate": 0.0001, "loss": 0.0197, "step": 50410 }, { "epoch": 331.7105263157895, "grad_norm": 1.5074303150177002, "learning_rate": 0.0001, "loss": 0.0202, "step": 50420 }, { "epoch": 331.7763157894737, "grad_norm": 1.549525260925293, "learning_rate": 0.0001, "loss": 0.0187, "step": 50430 }, { "epoch": 331.8421052631579, "grad_norm": 1.2087990045547485, "learning_rate": 0.0001, "loss": 0.0155, "step": 50440 }, { "epoch": 331.9078947368421, "grad_norm": 1.4949390888214111, "learning_rate": 0.0001, "loss": 0.0165, "step": 50450 }, { "epoch": 331.9736842105263, "grad_norm": 1.3481495380401611, "learning_rate": 0.0001, "loss": 0.0157, "step": 50460 }, { "epoch": 332.0394736842105, "grad_norm": 1.6495991945266724, "learning_rate": 0.0001, "loss": 0.0151, "step": 50470 }, { "epoch": 332.10526315789474, "grad_norm": 1.6227983236312866, "learning_rate": 0.0001, "loss": 0.0209, "step": 50480 }, { "epoch": 332.17105263157896, "grad_norm": 1.6646394729614258, "learning_rate": 0.0001, "loss": 0.0165, "step": 50490 }, { "epoch": 332.2368421052632, "grad_norm": 1.6508678197860718, "learning_rate": 0.0001, "loss": 0.0208, "step": 50500 }, { "epoch": 332.30263157894734, "grad_norm": 1.832358479499817, "learning_rate": 0.0001, "loss": 0.021, "step": 50510 }, { "epoch": 332.36842105263156, "grad_norm": 1.8391079902648926, "learning_rate": 0.0001, "loss": 0.0167, "step": 50520 }, { "epoch": 332.4342105263158, "grad_norm": 1.237004041671753, "learning_rate": 0.0001, "loss": 0.0157, "step": 50530 }, { "epoch": 332.5, "grad_norm": 1.701847791671753, "learning_rate": 0.0001, "loss": 0.0178, "step": 50540 }, { "epoch": 332.5657894736842, "grad_norm": 1.5481677055358887, "learning_rate": 0.0001, "loss": 0.0169, "step": 50550 }, { "epoch": 332.63157894736844, "grad_norm": 1.499887228012085, "learning_rate": 0.0001, "loss": 0.0185, "step": 50560 }, { "epoch": 332.69736842105266, "grad_norm": 1.5893709659576416, "learning_rate": 0.0001, "loss": 0.0159, "step": 50570 }, { "epoch": 332.7631578947368, "grad_norm": 1.222592830657959, "learning_rate": 0.0001, "loss": 0.0162, "step": 50580 }, { "epoch": 332.82894736842104, "grad_norm": 1.3975499868392944, "learning_rate": 0.0001, "loss": 0.0195, "step": 50590 }, { "epoch": 332.89473684210526, "grad_norm": 1.3164385557174683, "learning_rate": 0.0001, "loss": 0.0154, "step": 50600 }, { "epoch": 332.9605263157895, "grad_norm": 1.1323045492172241, "learning_rate": 0.0001, "loss": 0.0159, "step": 50610 }, { "epoch": 333.0263157894737, "grad_norm": 1.5475482940673828, "learning_rate": 0.0001, "loss": 0.017, "step": 50620 }, { "epoch": 333.0921052631579, "grad_norm": 1.4617913961410522, "learning_rate": 0.0001, "loss": 0.0192, "step": 50630 }, { "epoch": 333.1578947368421, "grad_norm": 1.2158633470535278, "learning_rate": 0.0001, "loss": 0.0169, "step": 50640 }, { "epoch": 333.2236842105263, "grad_norm": 1.3829114437103271, "learning_rate": 0.0001, "loss": 0.0161, "step": 50650 }, { "epoch": 333.2894736842105, "grad_norm": 1.0775151252746582, "learning_rate": 0.0001, "loss": 0.0152, "step": 50660 }, { "epoch": 333.35526315789474, "grad_norm": 1.007922649383545, "learning_rate": 0.0001, "loss": 0.0184, "step": 50670 }, { "epoch": 333.42105263157896, "grad_norm": 1.413653016090393, "learning_rate": 0.0001, "loss": 0.0164, "step": 50680 }, { "epoch": 333.4868421052632, "grad_norm": 1.2965213060379028, "learning_rate": 0.0001, "loss": 0.0174, "step": 50690 }, { "epoch": 333.55263157894734, "grad_norm": 1.6396592855453491, "learning_rate": 0.0001, "loss": 0.0179, "step": 50700 }, { "epoch": 333.61842105263156, "grad_norm": 1.5623513460159302, "learning_rate": 0.0001, "loss": 0.0208, "step": 50710 }, { "epoch": 333.6842105263158, "grad_norm": 1.3039321899414062, "learning_rate": 0.0001, "loss": 0.0261, "step": 50720 }, { "epoch": 333.75, "grad_norm": 1.5634112358093262, "learning_rate": 0.0001, "loss": 0.0179, "step": 50730 }, { "epoch": 333.8157894736842, "grad_norm": 1.557224154472351, "learning_rate": 0.0001, "loss": 0.0192, "step": 50740 }, { "epoch": 333.88157894736844, "grad_norm": 1.3779804706573486, "learning_rate": 0.0001, "loss": 0.0175, "step": 50750 }, { "epoch": 333.94736842105266, "grad_norm": 1.5962541103363037, "learning_rate": 0.0001, "loss": 0.0153, "step": 50760 }, { "epoch": 334.0131578947368, "grad_norm": 1.1872817277908325, "learning_rate": 0.0001, "loss": 0.0143, "step": 50770 }, { "epoch": 334.07894736842104, "grad_norm": 1.0767461061477661, "learning_rate": 0.0001, "loss": 0.0201, "step": 50780 }, { "epoch": 334.14473684210526, "grad_norm": 1.9626834392547607, "learning_rate": 0.0001, "loss": 0.0162, "step": 50790 }, { "epoch": 334.2105263157895, "grad_norm": 1.5404410362243652, "learning_rate": 0.0001, "loss": 0.0179, "step": 50800 }, { "epoch": 334.2763157894737, "grad_norm": 1.800630807876587, "learning_rate": 0.0001, "loss": 0.0165, "step": 50810 }, { "epoch": 334.3421052631579, "grad_norm": 1.9512990713119507, "learning_rate": 0.0001, "loss": 0.0156, "step": 50820 }, { "epoch": 334.4078947368421, "grad_norm": 2.0595147609710693, "learning_rate": 0.0001, "loss": 0.0174, "step": 50830 }, { "epoch": 334.4736842105263, "grad_norm": 2.2933340072631836, "learning_rate": 0.0001, "loss": 0.0159, "step": 50840 }, { "epoch": 334.5394736842105, "grad_norm": 1.544209361076355, "learning_rate": 0.0001, "loss": 0.018, "step": 50850 }, { "epoch": 334.60526315789474, "grad_norm": 1.7276822328567505, "learning_rate": 0.0001, "loss": 0.0188, "step": 50860 }, { "epoch": 334.67105263157896, "grad_norm": 1.7578703165054321, "learning_rate": 0.0001, "loss": 0.0171, "step": 50870 }, { "epoch": 334.7368421052632, "grad_norm": 2.024874210357666, "learning_rate": 0.0001, "loss": 0.0176, "step": 50880 }, { "epoch": 334.80263157894734, "grad_norm": 1.7303212881088257, "learning_rate": 0.0001, "loss": 0.0189, "step": 50890 }, { "epoch": 334.86842105263156, "grad_norm": 1.6379870176315308, "learning_rate": 0.0001, "loss": 0.0197, "step": 50900 }, { "epoch": 334.9342105263158, "grad_norm": 2.06603741645813, "learning_rate": 0.0001, "loss": 0.0148, "step": 50910 }, { "epoch": 335.0, "grad_norm": 1.707995057106018, "learning_rate": 0.0001, "loss": 0.0172, "step": 50920 }, { "epoch": 335.0657894736842, "grad_norm": 1.8973135948181152, "learning_rate": 0.0001, "loss": 0.0149, "step": 50930 }, { "epoch": 335.13157894736844, "grad_norm": 1.579654335975647, "learning_rate": 0.0001, "loss": 0.0156, "step": 50940 }, { "epoch": 335.19736842105266, "grad_norm": 1.4766969680786133, "learning_rate": 0.0001, "loss": 0.017, "step": 50950 }, { "epoch": 335.2631578947368, "grad_norm": 1.6880054473876953, "learning_rate": 0.0001, "loss": 0.0175, "step": 50960 }, { "epoch": 335.32894736842104, "grad_norm": 1.548804521560669, "learning_rate": 0.0001, "loss": 0.0152, "step": 50970 }, { "epoch": 335.39473684210526, "grad_norm": 1.783152461051941, "learning_rate": 0.0001, "loss": 0.0156, "step": 50980 }, { "epoch": 335.4605263157895, "grad_norm": 1.447503924369812, "learning_rate": 0.0001, "loss": 0.0172, "step": 50990 }, { "epoch": 335.5263157894737, "grad_norm": 1.391955018043518, "learning_rate": 0.0001, "loss": 0.0171, "step": 51000 }, { "epoch": 335.5921052631579, "grad_norm": 1.6602952480316162, "learning_rate": 0.0001, "loss": 0.0179, "step": 51010 }, { "epoch": 335.6578947368421, "grad_norm": 1.6140282154083252, "learning_rate": 0.0001, "loss": 0.0164, "step": 51020 }, { "epoch": 335.7236842105263, "grad_norm": 1.5680221319198608, "learning_rate": 0.0001, "loss": 0.019, "step": 51030 }, { "epoch": 335.7894736842105, "grad_norm": 1.744137167930603, "learning_rate": 0.0001, "loss": 0.0157, "step": 51040 }, { "epoch": 335.85526315789474, "grad_norm": 1.432529091835022, "learning_rate": 0.0001, "loss": 0.0212, "step": 51050 }, { "epoch": 335.92105263157896, "grad_norm": 1.653479814529419, "learning_rate": 0.0001, "loss": 0.0189, "step": 51060 }, { "epoch": 335.9868421052632, "grad_norm": 1.744955062866211, "learning_rate": 0.0001, "loss": 0.017, "step": 51070 }, { "epoch": 336.05263157894734, "grad_norm": 1.948224663734436, "learning_rate": 0.0001, "loss": 0.0156, "step": 51080 }, { "epoch": 336.11842105263156, "grad_norm": 1.4903290271759033, "learning_rate": 0.0001, "loss": 0.0185, "step": 51090 }, { "epoch": 336.1842105263158, "grad_norm": 1.384574294090271, "learning_rate": 0.0001, "loss": 0.0175, "step": 51100 }, { "epoch": 336.25, "grad_norm": 1.612889051437378, "learning_rate": 0.0001, "loss": 0.0187, "step": 51110 }, { "epoch": 336.3157894736842, "grad_norm": 1.5862077474594116, "learning_rate": 0.0001, "loss": 0.0141, "step": 51120 }, { "epoch": 336.38157894736844, "grad_norm": 1.398527979850769, "learning_rate": 0.0001, "loss": 0.0147, "step": 51130 }, { "epoch": 336.44736842105266, "grad_norm": 1.9477105140686035, "learning_rate": 0.0001, "loss": 0.0188, "step": 51140 }, { "epoch": 336.5131578947368, "grad_norm": 2.0223135948181152, "learning_rate": 0.0001, "loss": 0.0179, "step": 51150 }, { "epoch": 336.57894736842104, "grad_norm": 1.4757431745529175, "learning_rate": 0.0001, "loss": 0.0159, "step": 51160 }, { "epoch": 336.64473684210526, "grad_norm": 1.6595724821090698, "learning_rate": 0.0001, "loss": 0.0202, "step": 51170 }, { "epoch": 336.7105263157895, "grad_norm": 1.2984915971755981, "learning_rate": 0.0001, "loss": 0.0179, "step": 51180 }, { "epoch": 336.7763157894737, "grad_norm": 1.6158089637756348, "learning_rate": 0.0001, "loss": 0.0175, "step": 51190 }, { "epoch": 336.8421052631579, "grad_norm": 1.8986071348190308, "learning_rate": 0.0001, "loss": 0.0155, "step": 51200 }, { "epoch": 336.9078947368421, "grad_norm": 1.667414903640747, "learning_rate": 0.0001, "loss": 0.0196, "step": 51210 }, { "epoch": 336.9736842105263, "grad_norm": 1.5115540027618408, "learning_rate": 0.0001, "loss": 0.0179, "step": 51220 }, { "epoch": 337.0394736842105, "grad_norm": 1.8492456674575806, "learning_rate": 0.0001, "loss": 0.0148, "step": 51230 }, { "epoch": 337.10526315789474, "grad_norm": 1.511405348777771, "learning_rate": 0.0001, "loss": 0.0181, "step": 51240 }, { "epoch": 337.17105263157896, "grad_norm": 1.2180713415145874, "learning_rate": 0.0001, "loss": 0.0197, "step": 51250 }, { "epoch": 337.2368421052632, "grad_norm": 1.2909826040267944, "learning_rate": 0.0001, "loss": 0.0159, "step": 51260 }, { "epoch": 337.30263157894734, "grad_norm": 1.5798841714859009, "learning_rate": 0.0001, "loss": 0.0217, "step": 51270 }, { "epoch": 337.36842105263156, "grad_norm": 1.6049503087997437, "learning_rate": 0.0001, "loss": 0.0151, "step": 51280 }, { "epoch": 337.4342105263158, "grad_norm": 1.1024662256240845, "learning_rate": 0.0001, "loss": 0.0212, "step": 51290 }, { "epoch": 337.5, "grad_norm": 1.9166431427001953, "learning_rate": 0.0001, "loss": 0.0145, "step": 51300 }, { "epoch": 337.5657894736842, "grad_norm": 1.3561145067214966, "learning_rate": 0.0001, "loss": 0.0167, "step": 51310 }, { "epoch": 337.63157894736844, "grad_norm": 1.3745429515838623, "learning_rate": 0.0001, "loss": 0.0201, "step": 51320 }, { "epoch": 337.69736842105266, "grad_norm": 1.7602213621139526, "learning_rate": 0.0001, "loss": 0.0145, "step": 51330 }, { "epoch": 337.7631578947368, "grad_norm": 1.5345189571380615, "learning_rate": 0.0001, "loss": 0.0147, "step": 51340 }, { "epoch": 337.82894736842104, "grad_norm": 1.4823939800262451, "learning_rate": 0.0001, "loss": 0.022, "step": 51350 }, { "epoch": 337.89473684210526, "grad_norm": 1.6742459535598755, "learning_rate": 0.0001, "loss": 0.0184, "step": 51360 }, { "epoch": 337.9605263157895, "grad_norm": 1.2646760940551758, "learning_rate": 0.0001, "loss": 0.0182, "step": 51370 }, { "epoch": 338.0263157894737, "grad_norm": 1.6676275730133057, "learning_rate": 0.0001, "loss": 0.0166, "step": 51380 }, { "epoch": 338.0921052631579, "grad_norm": 1.7669587135314941, "learning_rate": 0.0001, "loss": 0.017, "step": 51390 }, { "epoch": 338.1578947368421, "grad_norm": 1.6379611492156982, "learning_rate": 0.0001, "loss": 0.0211, "step": 51400 }, { "epoch": 338.2236842105263, "grad_norm": 1.2648626565933228, "learning_rate": 0.0001, "loss": 0.0183, "step": 51410 }, { "epoch": 338.2894736842105, "grad_norm": 1.6611772775650024, "learning_rate": 0.0001, "loss": 0.0215, "step": 51420 }, { "epoch": 338.35526315789474, "grad_norm": 1.8010985851287842, "learning_rate": 0.0001, "loss": 0.0163, "step": 51430 }, { "epoch": 338.42105263157896, "grad_norm": 1.4374860525131226, "learning_rate": 0.0001, "loss": 0.0162, "step": 51440 }, { "epoch": 338.4868421052632, "grad_norm": 1.4846618175506592, "learning_rate": 0.0001, "loss": 0.0187, "step": 51450 }, { "epoch": 338.55263157894734, "grad_norm": 1.6310421228408813, "learning_rate": 0.0001, "loss": 0.0195, "step": 51460 }, { "epoch": 338.61842105263156, "grad_norm": 1.6322647333145142, "learning_rate": 0.0001, "loss": 0.0162, "step": 51470 }, { "epoch": 338.6842105263158, "grad_norm": 1.7295249700546265, "learning_rate": 0.0001, "loss": 0.016, "step": 51480 }, { "epoch": 338.75, "grad_norm": 1.2862756252288818, "learning_rate": 0.0001, "loss": 0.0141, "step": 51490 }, { "epoch": 338.8157894736842, "grad_norm": 1.81109619140625, "learning_rate": 0.0001, "loss": 0.0152, "step": 51500 }, { "epoch": 338.88157894736844, "grad_norm": 1.881306767463684, "learning_rate": 0.0001, "loss": 0.0159, "step": 51510 }, { "epoch": 338.94736842105266, "grad_norm": 2.1282460689544678, "learning_rate": 0.0001, "loss": 0.0159, "step": 51520 }, { "epoch": 339.0131578947368, "grad_norm": 1.7379229068756104, "learning_rate": 0.0001, "loss": 0.0219, "step": 51530 }, { "epoch": 339.07894736842104, "grad_norm": 1.9948829412460327, "learning_rate": 0.0001, "loss": 0.0202, "step": 51540 }, { "epoch": 339.14473684210526, "grad_norm": 1.640419840812683, "learning_rate": 0.0001, "loss": 0.018, "step": 51550 }, { "epoch": 339.2105263157895, "grad_norm": 1.595686674118042, "learning_rate": 0.0001, "loss": 0.0155, "step": 51560 }, { "epoch": 339.2763157894737, "grad_norm": 1.9579678773880005, "learning_rate": 0.0001, "loss": 0.0202, "step": 51570 }, { "epoch": 339.3421052631579, "grad_norm": 1.7150522470474243, "learning_rate": 0.0001, "loss": 0.0166, "step": 51580 }, { "epoch": 339.4078947368421, "grad_norm": 1.4936598539352417, "learning_rate": 0.0001, "loss": 0.0137, "step": 51590 }, { "epoch": 339.4736842105263, "grad_norm": 1.473556637763977, "learning_rate": 0.0001, "loss": 0.0169, "step": 51600 }, { "epoch": 339.5394736842105, "grad_norm": 1.75022554397583, "learning_rate": 0.0001, "loss": 0.0177, "step": 51610 }, { "epoch": 339.60526315789474, "grad_norm": 1.6410417556762695, "learning_rate": 0.0001, "loss": 0.0187, "step": 51620 }, { "epoch": 339.67105263157896, "grad_norm": 1.6581151485443115, "learning_rate": 0.0001, "loss": 0.0193, "step": 51630 }, { "epoch": 339.7368421052632, "grad_norm": 1.6499223709106445, "learning_rate": 0.0001, "loss": 0.0168, "step": 51640 }, { "epoch": 339.80263157894734, "grad_norm": 1.2184064388275146, "learning_rate": 0.0001, "loss": 0.0174, "step": 51650 }, { "epoch": 339.86842105263156, "grad_norm": 1.548829197883606, "learning_rate": 0.0001, "loss": 0.016, "step": 51660 }, { "epoch": 339.9342105263158, "grad_norm": 1.4529180526733398, "learning_rate": 0.0001, "loss": 0.0154, "step": 51670 }, { "epoch": 340.0, "grad_norm": 1.3122798204421997, "learning_rate": 0.0001, "loss": 0.0164, "step": 51680 }, { "epoch": 340.0657894736842, "grad_norm": 1.529796838760376, "learning_rate": 0.0001, "loss": 0.02, "step": 51690 }, { "epoch": 340.13157894736844, "grad_norm": 1.542589783668518, "learning_rate": 0.0001, "loss": 0.0159, "step": 51700 }, { "epoch": 340.19736842105266, "grad_norm": 1.520903468132019, "learning_rate": 0.0001, "loss": 0.0187, "step": 51710 }, { "epoch": 340.2631578947368, "grad_norm": 1.8382049798965454, "learning_rate": 0.0001, "loss": 0.0166, "step": 51720 }, { "epoch": 340.32894736842104, "grad_norm": 1.5764766931533813, "learning_rate": 0.0001, "loss": 0.018, "step": 51730 }, { "epoch": 340.39473684210526, "grad_norm": 1.6254268884658813, "learning_rate": 0.0001, "loss": 0.0146, "step": 51740 }, { "epoch": 340.4605263157895, "grad_norm": 2.3691673278808594, "learning_rate": 0.0001, "loss": 0.0185, "step": 51750 }, { "epoch": 340.5263157894737, "grad_norm": 1.8091293573379517, "learning_rate": 0.0001, "loss": 0.0147, "step": 51760 }, { "epoch": 340.5921052631579, "grad_norm": 1.7112027406692505, "learning_rate": 0.0001, "loss": 0.0193, "step": 51770 }, { "epoch": 340.6578947368421, "grad_norm": 1.441784143447876, "learning_rate": 0.0001, "loss": 0.0153, "step": 51780 }, { "epoch": 340.7236842105263, "grad_norm": 1.4256178140640259, "learning_rate": 0.0001, "loss": 0.02, "step": 51790 }, { "epoch": 340.7894736842105, "grad_norm": 1.5311036109924316, "learning_rate": 0.0001, "loss": 0.0167, "step": 51800 }, { "epoch": 340.85526315789474, "grad_norm": 1.5943217277526855, "learning_rate": 0.0001, "loss": 0.0183, "step": 51810 }, { "epoch": 340.92105263157896, "grad_norm": 1.3652406930923462, "learning_rate": 0.0001, "loss": 0.0192, "step": 51820 }, { "epoch": 340.9868421052632, "grad_norm": 1.137133002281189, "learning_rate": 0.0001, "loss": 0.0193, "step": 51830 }, { "epoch": 341.05263157894734, "grad_norm": 1.3104150295257568, "learning_rate": 0.0001, "loss": 0.0173, "step": 51840 }, { "epoch": 341.11842105263156, "grad_norm": 1.50453782081604, "learning_rate": 0.0001, "loss": 0.0219, "step": 51850 }, { "epoch": 341.1842105263158, "grad_norm": 1.3333038091659546, "learning_rate": 0.0001, "loss": 0.0196, "step": 51860 }, { "epoch": 341.25, "grad_norm": 1.612069845199585, "learning_rate": 0.0001, "loss": 0.0167, "step": 51870 }, { "epoch": 341.3157894736842, "grad_norm": 1.5777206420898438, "learning_rate": 0.0001, "loss": 0.0162, "step": 51880 }, { "epoch": 341.38157894736844, "grad_norm": 0.9976874589920044, "learning_rate": 0.0001, "loss": 0.0155, "step": 51890 }, { "epoch": 341.44736842105266, "grad_norm": 1.3445706367492676, "learning_rate": 0.0001, "loss": 0.0181, "step": 51900 }, { "epoch": 341.5131578947368, "grad_norm": 1.4868800640106201, "learning_rate": 0.0001, "loss": 0.0174, "step": 51910 }, { "epoch": 341.57894736842104, "grad_norm": 1.4125052690505981, "learning_rate": 0.0001, "loss": 0.0181, "step": 51920 }, { "epoch": 341.64473684210526, "grad_norm": 1.7078778743743896, "learning_rate": 0.0001, "loss": 0.0179, "step": 51930 }, { "epoch": 341.7105263157895, "grad_norm": 2.347681999206543, "learning_rate": 0.0001, "loss": 0.017, "step": 51940 }, { "epoch": 341.7763157894737, "grad_norm": 1.570023536682129, "learning_rate": 0.0001, "loss": 0.022, "step": 51950 }, { "epoch": 341.8421052631579, "grad_norm": 1.6024404764175415, "learning_rate": 0.0001, "loss": 0.0173, "step": 51960 }, { "epoch": 341.9078947368421, "grad_norm": 1.779620885848999, "learning_rate": 0.0001, "loss": 0.0182, "step": 51970 }, { "epoch": 341.9736842105263, "grad_norm": 1.7854881286621094, "learning_rate": 0.0001, "loss": 0.0158, "step": 51980 }, { "epoch": 342.0394736842105, "grad_norm": 1.5622003078460693, "learning_rate": 0.0001, "loss": 0.0141, "step": 51990 }, { "epoch": 342.10526315789474, "grad_norm": 1.595339298248291, "learning_rate": 0.0001, "loss": 0.0189, "step": 52000 }, { "epoch": 342.17105263157896, "grad_norm": 1.3777744770050049, "learning_rate": 0.0001, "loss": 0.0167, "step": 52010 }, { "epoch": 342.2368421052632, "grad_norm": 1.5905792713165283, "learning_rate": 0.0001, "loss": 0.0176, "step": 52020 }, { "epoch": 342.30263157894734, "grad_norm": 1.9138753414154053, "learning_rate": 0.0001, "loss": 0.0228, "step": 52030 }, { "epoch": 342.36842105263156, "grad_norm": 1.2997560501098633, "learning_rate": 0.0001, "loss": 0.0191, "step": 52040 }, { "epoch": 342.4342105263158, "grad_norm": 1.375681757926941, "learning_rate": 0.0001, "loss": 0.0182, "step": 52050 }, { "epoch": 342.5, "grad_norm": 1.3562076091766357, "learning_rate": 0.0001, "loss": 0.0174, "step": 52060 }, { "epoch": 342.5657894736842, "grad_norm": 1.5499968528747559, "learning_rate": 0.0001, "loss": 0.0145, "step": 52070 }, { "epoch": 342.63157894736844, "grad_norm": 1.456803560256958, "learning_rate": 0.0001, "loss": 0.0149, "step": 52080 }, { "epoch": 342.69736842105266, "grad_norm": 1.667575716972351, "learning_rate": 0.0001, "loss": 0.0164, "step": 52090 }, { "epoch": 342.7631578947368, "grad_norm": 1.5519905090332031, "learning_rate": 0.0001, "loss": 0.014, "step": 52100 }, { "epoch": 342.82894736842104, "grad_norm": 1.769260048866272, "learning_rate": 0.0001, "loss": 0.0157, "step": 52110 }, { "epoch": 342.89473684210526, "grad_norm": 1.4615331888198853, "learning_rate": 0.0001, "loss": 0.0174, "step": 52120 }, { "epoch": 342.9605263157895, "grad_norm": 1.3458714485168457, "learning_rate": 0.0001, "loss": 0.0175, "step": 52130 }, { "epoch": 343.0263157894737, "grad_norm": 1.6742737293243408, "learning_rate": 0.0001, "loss": 0.0141, "step": 52140 }, { "epoch": 343.0921052631579, "grad_norm": 2.0775375366210938, "learning_rate": 0.0001, "loss": 0.0183, "step": 52150 }, { "epoch": 343.1578947368421, "grad_norm": 1.315415382385254, "learning_rate": 0.0001, "loss": 0.0194, "step": 52160 }, { "epoch": 343.2236842105263, "grad_norm": 1.170437216758728, "learning_rate": 0.0001, "loss": 0.0217, "step": 52170 }, { "epoch": 343.2894736842105, "grad_norm": 1.5081045627593994, "learning_rate": 0.0001, "loss": 0.0153, "step": 52180 }, { "epoch": 343.35526315789474, "grad_norm": 1.7693700790405273, "learning_rate": 0.0001, "loss": 0.0198, "step": 52190 }, { "epoch": 343.42105263157896, "grad_norm": 1.5500675439834595, "learning_rate": 0.0001, "loss": 0.0177, "step": 52200 }, { "epoch": 343.4868421052632, "grad_norm": 1.3110986948013306, "learning_rate": 0.0001, "loss": 0.0134, "step": 52210 }, { "epoch": 343.55263157894734, "grad_norm": 1.3209624290466309, "learning_rate": 0.0001, "loss": 0.0184, "step": 52220 }, { "epoch": 343.61842105263156, "grad_norm": 1.1719533205032349, "learning_rate": 0.0001, "loss": 0.018, "step": 52230 }, { "epoch": 343.6842105263158, "grad_norm": 1.2764602899551392, "learning_rate": 0.0001, "loss": 0.0151, "step": 52240 }, { "epoch": 343.75, "grad_norm": 2.0908312797546387, "learning_rate": 0.0001, "loss": 0.0152, "step": 52250 }, { "epoch": 343.8157894736842, "grad_norm": 1.630867600440979, "learning_rate": 0.0001, "loss": 0.0178, "step": 52260 }, { "epoch": 343.88157894736844, "grad_norm": 1.5652029514312744, "learning_rate": 0.0001, "loss": 0.0172, "step": 52270 }, { "epoch": 343.94736842105266, "grad_norm": 1.832592248916626, "learning_rate": 0.0001, "loss": 0.0193, "step": 52280 }, { "epoch": 344.0131578947368, "grad_norm": 1.6471961736679077, "learning_rate": 0.0001, "loss": 0.0162, "step": 52290 }, { "epoch": 344.07894736842104, "grad_norm": 1.886033296585083, "learning_rate": 0.0001, "loss": 0.0195, "step": 52300 }, { "epoch": 344.14473684210526, "grad_norm": 2.0268843173980713, "learning_rate": 0.0001, "loss": 0.0156, "step": 52310 }, { "epoch": 344.2105263157895, "grad_norm": 2.103236436843872, "learning_rate": 0.0001, "loss": 0.0178, "step": 52320 }, { "epoch": 344.2763157894737, "grad_norm": 2.149763345718384, "learning_rate": 0.0001, "loss": 0.0185, "step": 52330 }, { "epoch": 344.3421052631579, "grad_norm": 1.9198371171951294, "learning_rate": 0.0001, "loss": 0.0144, "step": 52340 }, { "epoch": 344.4078947368421, "grad_norm": 2.107261896133423, "learning_rate": 0.0001, "loss": 0.0163, "step": 52350 }, { "epoch": 344.4736842105263, "grad_norm": 1.41112220287323, "learning_rate": 0.0001, "loss": 0.0162, "step": 52360 }, { "epoch": 344.5394736842105, "grad_norm": 1.7074733972549438, "learning_rate": 0.0001, "loss": 0.0183, "step": 52370 }, { "epoch": 344.60526315789474, "grad_norm": 1.567258596420288, "learning_rate": 0.0001, "loss": 0.0199, "step": 52380 }, { "epoch": 344.67105263157896, "grad_norm": 1.7985203266143799, "learning_rate": 0.0001, "loss": 0.0174, "step": 52390 }, { "epoch": 344.7368421052632, "grad_norm": 1.4545480012893677, "learning_rate": 0.0001, "loss": 0.0153, "step": 52400 }, { "epoch": 344.80263157894734, "grad_norm": 1.2189040184020996, "learning_rate": 0.0001, "loss": 0.0148, "step": 52410 }, { "epoch": 344.86842105263156, "grad_norm": 1.489028811454773, "learning_rate": 0.0001, "loss": 0.0156, "step": 52420 }, { "epoch": 344.9342105263158, "grad_norm": 1.1683820486068726, "learning_rate": 0.0001, "loss": 0.0214, "step": 52430 }, { "epoch": 345.0, "grad_norm": 1.419479250907898, "learning_rate": 0.0001, "loss": 0.0212, "step": 52440 }, { "epoch": 345.0657894736842, "grad_norm": 1.3743946552276611, "learning_rate": 0.0001, "loss": 0.0181, "step": 52450 }, { "epoch": 345.13157894736844, "grad_norm": 1.7872297763824463, "learning_rate": 0.0001, "loss": 0.0156, "step": 52460 }, { "epoch": 345.19736842105266, "grad_norm": 1.4258666038513184, "learning_rate": 0.0001, "loss": 0.0206, "step": 52470 }, { "epoch": 345.2631578947368, "grad_norm": 1.6574995517730713, "learning_rate": 0.0001, "loss": 0.0164, "step": 52480 }, { "epoch": 345.32894736842104, "grad_norm": 1.307613730430603, "learning_rate": 0.0001, "loss": 0.019, "step": 52490 }, { "epoch": 345.39473684210526, "grad_norm": 1.4577488899230957, "learning_rate": 0.0001, "loss": 0.0156, "step": 52500 }, { "epoch": 345.4605263157895, "grad_norm": 1.665987253189087, "learning_rate": 0.0001, "loss": 0.0181, "step": 52510 }, { "epoch": 345.5263157894737, "grad_norm": 1.448827862739563, "learning_rate": 0.0001, "loss": 0.0175, "step": 52520 }, { "epoch": 345.5921052631579, "grad_norm": 1.637852430343628, "learning_rate": 0.0001, "loss": 0.0184, "step": 52530 }, { "epoch": 345.6578947368421, "grad_norm": 1.6160212755203247, "learning_rate": 0.0001, "loss": 0.0189, "step": 52540 }, { "epoch": 345.7236842105263, "grad_norm": 1.4112215042114258, "learning_rate": 0.0001, "loss": 0.0177, "step": 52550 }, { "epoch": 345.7894736842105, "grad_norm": 1.3016849756240845, "learning_rate": 0.0001, "loss": 0.0157, "step": 52560 }, { "epoch": 345.85526315789474, "grad_norm": 1.4107558727264404, "learning_rate": 0.0001, "loss": 0.0163, "step": 52570 }, { "epoch": 345.92105263157896, "grad_norm": 1.233899712562561, "learning_rate": 0.0001, "loss": 0.0164, "step": 52580 }, { "epoch": 345.9868421052632, "grad_norm": 1.341089129447937, "learning_rate": 0.0001, "loss": 0.017, "step": 52590 }, { "epoch": 346.05263157894734, "grad_norm": 1.3046245574951172, "learning_rate": 0.0001, "loss": 0.0141, "step": 52600 }, { "epoch": 346.11842105263156, "grad_norm": 1.3325328826904297, "learning_rate": 0.0001, "loss": 0.0226, "step": 52610 }, { "epoch": 346.1842105263158, "grad_norm": 1.3425108194351196, "learning_rate": 0.0001, "loss": 0.0208, "step": 52620 }, { "epoch": 346.25, "grad_norm": 1.5380154848098755, "learning_rate": 0.0001, "loss": 0.0172, "step": 52630 }, { "epoch": 346.3157894736842, "grad_norm": 1.46867036819458, "learning_rate": 0.0001, "loss": 0.0185, "step": 52640 }, { "epoch": 346.38157894736844, "grad_norm": 2.2133572101593018, "learning_rate": 0.0001, "loss": 0.0186, "step": 52650 }, { "epoch": 346.44736842105266, "grad_norm": 1.223158597946167, "learning_rate": 0.0001, "loss": 0.019, "step": 52660 }, { "epoch": 346.5131578947368, "grad_norm": 1.178155541419983, "learning_rate": 0.0001, "loss": 0.0168, "step": 52670 }, { "epoch": 346.57894736842104, "grad_norm": 2.0482337474823, "learning_rate": 0.0001, "loss": 0.0153, "step": 52680 }, { "epoch": 346.64473684210526, "grad_norm": 1.6093649864196777, "learning_rate": 0.0001, "loss": 0.0193, "step": 52690 }, { "epoch": 346.7105263157895, "grad_norm": 1.375891923904419, "learning_rate": 0.0001, "loss": 0.0156, "step": 52700 }, { "epoch": 346.7763157894737, "grad_norm": 1.7444010972976685, "learning_rate": 0.0001, "loss": 0.017, "step": 52710 }, { "epoch": 346.8421052631579, "grad_norm": 1.5895987749099731, "learning_rate": 0.0001, "loss": 0.0192, "step": 52720 }, { "epoch": 346.9078947368421, "grad_norm": 1.575553059577942, "learning_rate": 0.0001, "loss": 0.0144, "step": 52730 }, { "epoch": 346.9736842105263, "grad_norm": 1.5973013639450073, "learning_rate": 0.0001, "loss": 0.0204, "step": 52740 }, { "epoch": 347.0394736842105, "grad_norm": 1.6227422952651978, "learning_rate": 0.0001, "loss": 0.0161, "step": 52750 }, { "epoch": 347.10526315789474, "grad_norm": 1.4470375776290894, "learning_rate": 0.0001, "loss": 0.0145, "step": 52760 }, { "epoch": 347.17105263157896, "grad_norm": 1.5237330198287964, "learning_rate": 0.0001, "loss": 0.0146, "step": 52770 }, { "epoch": 347.2368421052632, "grad_norm": 1.3602750301361084, "learning_rate": 0.0001, "loss": 0.0154, "step": 52780 }, { "epoch": 347.30263157894734, "grad_norm": 1.568346619606018, "learning_rate": 0.0001, "loss": 0.0179, "step": 52790 }, { "epoch": 347.36842105263156, "grad_norm": 1.6449992656707764, "learning_rate": 0.0001, "loss": 0.0201, "step": 52800 }, { "epoch": 347.4342105263158, "grad_norm": 1.1939854621887207, "learning_rate": 0.0001, "loss": 0.0174, "step": 52810 }, { "epoch": 347.5, "grad_norm": 1.797753930091858, "learning_rate": 0.0001, "loss": 0.0204, "step": 52820 }, { "epoch": 347.5657894736842, "grad_norm": 1.4464123249053955, "learning_rate": 0.0001, "loss": 0.0175, "step": 52830 }, { "epoch": 347.63157894736844, "grad_norm": 1.3992868661880493, "learning_rate": 0.0001, "loss": 0.0143, "step": 52840 }, { "epoch": 347.69736842105266, "grad_norm": 1.4135956764221191, "learning_rate": 0.0001, "loss": 0.02, "step": 52850 }, { "epoch": 347.7631578947368, "grad_norm": 1.7355220317840576, "learning_rate": 0.0001, "loss": 0.0224, "step": 52860 }, { "epoch": 347.82894736842104, "grad_norm": 1.6030656099319458, "learning_rate": 0.0001, "loss": 0.0201, "step": 52870 }, { "epoch": 347.89473684210526, "grad_norm": 1.7347732782363892, "learning_rate": 0.0001, "loss": 0.018, "step": 52880 }, { "epoch": 347.9605263157895, "grad_norm": 1.3370827436447144, "learning_rate": 0.0001, "loss": 0.0166, "step": 52890 }, { "epoch": 348.0263157894737, "grad_norm": 2.1268653869628906, "learning_rate": 0.0001, "loss": 0.0208, "step": 52900 }, { "epoch": 348.0921052631579, "grad_norm": 1.7019654512405396, "learning_rate": 0.0001, "loss": 0.021, "step": 52910 }, { "epoch": 348.1578947368421, "grad_norm": 1.509648084640503, "learning_rate": 0.0001, "loss": 0.0197, "step": 52920 }, { "epoch": 348.2236842105263, "grad_norm": 1.5241304636001587, "learning_rate": 0.0001, "loss": 0.0175, "step": 52930 }, { "epoch": 348.2894736842105, "grad_norm": 1.7394347190856934, "learning_rate": 0.0001, "loss": 0.0188, "step": 52940 }, { "epoch": 348.35526315789474, "grad_norm": 1.9092488288879395, "learning_rate": 0.0001, "loss": 0.0155, "step": 52950 }, { "epoch": 348.42105263157896, "grad_norm": 1.4568127393722534, "learning_rate": 0.0001, "loss": 0.0186, "step": 52960 }, { "epoch": 348.4868421052632, "grad_norm": 1.2990052700042725, "learning_rate": 0.0001, "loss": 0.019, "step": 52970 }, { "epoch": 348.55263157894734, "grad_norm": 1.3388007879257202, "learning_rate": 0.0001, "loss": 0.0165, "step": 52980 }, { "epoch": 348.61842105263156, "grad_norm": 1.449815273284912, "learning_rate": 0.0001, "loss": 0.0193, "step": 52990 }, { "epoch": 348.6842105263158, "grad_norm": 2.151838779449463, "learning_rate": 0.0001, "loss": 0.0142, "step": 53000 }, { "epoch": 348.75, "grad_norm": 1.5878361463546753, "learning_rate": 0.0001, "loss": 0.0187, "step": 53010 }, { "epoch": 348.8157894736842, "grad_norm": 1.7307326793670654, "learning_rate": 0.0001, "loss": 0.0153, "step": 53020 }, { "epoch": 348.88157894736844, "grad_norm": 1.274813175201416, "learning_rate": 0.0001, "loss": 0.0172, "step": 53030 }, { "epoch": 348.94736842105266, "grad_norm": 1.1736615896224976, "learning_rate": 0.0001, "loss": 0.0197, "step": 53040 }, { "epoch": 349.0131578947368, "grad_norm": 1.3871982097625732, "learning_rate": 0.0001, "loss": 0.0194, "step": 53050 }, { "epoch": 349.07894736842104, "grad_norm": 1.5947190523147583, "learning_rate": 0.0001, "loss": 0.0186, "step": 53060 }, { "epoch": 349.14473684210526, "grad_norm": 1.7364543676376343, "learning_rate": 0.0001, "loss": 0.0245, "step": 53070 }, { "epoch": 349.2105263157895, "grad_norm": 1.636128306388855, "learning_rate": 0.0001, "loss": 0.0175, "step": 53080 }, { "epoch": 349.2763157894737, "grad_norm": 1.3738986253738403, "learning_rate": 0.0001, "loss": 0.0175, "step": 53090 }, { "epoch": 349.3421052631579, "grad_norm": 1.3704839944839478, "learning_rate": 0.0001, "loss": 0.0186, "step": 53100 }, { "epoch": 349.4078947368421, "grad_norm": 1.4195218086242676, "learning_rate": 0.0001, "loss": 0.0228, "step": 53110 }, { "epoch": 349.4736842105263, "grad_norm": 1.3942301273345947, "learning_rate": 0.0001, "loss": 0.0178, "step": 53120 }, { "epoch": 349.5394736842105, "grad_norm": 1.607375144958496, "learning_rate": 0.0001, "loss": 0.0186, "step": 53130 }, { "epoch": 349.60526315789474, "grad_norm": 1.568157434463501, "learning_rate": 0.0001, "loss": 0.0175, "step": 53140 }, { "epoch": 349.67105263157896, "grad_norm": 1.8145006895065308, "learning_rate": 0.0001, "loss": 0.0177, "step": 53150 }, { "epoch": 349.7368421052632, "grad_norm": 1.7517499923706055, "learning_rate": 0.0001, "loss": 0.0228, "step": 53160 }, { "epoch": 349.80263157894734, "grad_norm": 1.6664791107177734, "learning_rate": 0.0001, "loss": 0.0187, "step": 53170 }, { "epoch": 349.86842105263156, "grad_norm": 1.6837363243103027, "learning_rate": 0.0001, "loss": 0.0191, "step": 53180 }, { "epoch": 349.9342105263158, "grad_norm": 1.3247390985488892, "learning_rate": 0.0001, "loss": 0.0176, "step": 53190 }, { "epoch": 350.0, "grad_norm": 1.359748363494873, "learning_rate": 0.0001, "loss": 0.0152, "step": 53200 }, { "epoch": 350.0657894736842, "grad_norm": 1.3597776889801025, "learning_rate": 0.0001, "loss": 0.0193, "step": 53210 }, { "epoch": 350.13157894736844, "grad_norm": 1.5875743627548218, "learning_rate": 0.0001, "loss": 0.0175, "step": 53220 }, { "epoch": 350.19736842105266, "grad_norm": 1.5038963556289673, "learning_rate": 0.0001, "loss": 0.0165, "step": 53230 }, { "epoch": 350.2631578947368, "grad_norm": 1.6913342475891113, "learning_rate": 0.0001, "loss": 0.0183, "step": 53240 }, { "epoch": 350.32894736842104, "grad_norm": 1.1982073783874512, "learning_rate": 0.0001, "loss": 0.0183, "step": 53250 }, { "epoch": 350.39473684210526, "grad_norm": 1.0400593280792236, "learning_rate": 0.0001, "loss": 0.0165, "step": 53260 }, { "epoch": 350.4605263157895, "grad_norm": 1.7618499994277954, "learning_rate": 0.0001, "loss": 0.0151, "step": 53270 }, { "epoch": 350.5263157894737, "grad_norm": 1.5031535625457764, "learning_rate": 0.0001, "loss": 0.0191, "step": 53280 }, { "epoch": 350.5921052631579, "grad_norm": 1.4638627767562866, "learning_rate": 0.0001, "loss": 0.0169, "step": 53290 }, { "epoch": 350.6578947368421, "grad_norm": 1.304284930229187, "learning_rate": 0.0001, "loss": 0.0177, "step": 53300 }, { "epoch": 350.7236842105263, "grad_norm": 1.792237401008606, "learning_rate": 0.0001, "loss": 0.0168, "step": 53310 }, { "epoch": 350.7894736842105, "grad_norm": 1.7908965349197388, "learning_rate": 0.0001, "loss": 0.0163, "step": 53320 }, { "epoch": 350.85526315789474, "grad_norm": 1.566118836402893, "learning_rate": 0.0001, "loss": 0.0173, "step": 53330 }, { "epoch": 350.92105263157896, "grad_norm": 1.5671510696411133, "learning_rate": 0.0001, "loss": 0.0163, "step": 53340 }, { "epoch": 350.9868421052632, "grad_norm": 1.519830346107483, "learning_rate": 0.0001, "loss": 0.0228, "step": 53350 }, { "epoch": 351.05263157894734, "grad_norm": 1.7119441032409668, "learning_rate": 0.0001, "loss": 0.0158, "step": 53360 }, { "epoch": 351.11842105263156, "grad_norm": 1.7932323217391968, "learning_rate": 0.0001, "loss": 0.0157, "step": 53370 }, { "epoch": 351.1842105263158, "grad_norm": 1.7462435960769653, "learning_rate": 0.0001, "loss": 0.0179, "step": 53380 }, { "epoch": 351.25, "grad_norm": 1.3173012733459473, "learning_rate": 0.0001, "loss": 0.0197, "step": 53390 }, { "epoch": 351.3157894736842, "grad_norm": 1.1698476076126099, "learning_rate": 0.0001, "loss": 0.0138, "step": 53400 }, { "epoch": 351.38157894736844, "grad_norm": 1.3106000423431396, "learning_rate": 0.0001, "loss": 0.0182, "step": 53410 }, { "epoch": 351.44736842105266, "grad_norm": 1.9883373975753784, "learning_rate": 0.0001, "loss": 0.018, "step": 53420 }, { "epoch": 351.5131578947368, "grad_norm": 1.458177089691162, "learning_rate": 0.0001, "loss": 0.0142, "step": 53430 }, { "epoch": 351.57894736842104, "grad_norm": 1.5631513595581055, "learning_rate": 0.0001, "loss": 0.0174, "step": 53440 }, { "epoch": 351.64473684210526, "grad_norm": 1.1956521272659302, "learning_rate": 0.0001, "loss": 0.0186, "step": 53450 }, { "epoch": 351.7105263157895, "grad_norm": 1.5008502006530762, "learning_rate": 0.0001, "loss": 0.0188, "step": 53460 }, { "epoch": 351.7763157894737, "grad_norm": 1.5151830911636353, "learning_rate": 0.0001, "loss": 0.0179, "step": 53470 }, { "epoch": 351.8421052631579, "grad_norm": 1.7532782554626465, "learning_rate": 0.0001, "loss": 0.016, "step": 53480 }, { "epoch": 351.9078947368421, "grad_norm": 1.8671541213989258, "learning_rate": 0.0001, "loss": 0.0158, "step": 53490 }, { "epoch": 351.9736842105263, "grad_norm": 1.647079348564148, "learning_rate": 0.0001, "loss": 0.0233, "step": 53500 }, { "epoch": 352.0394736842105, "grad_norm": 1.489392638206482, "learning_rate": 0.0001, "loss": 0.016, "step": 53510 }, { "epoch": 352.10526315789474, "grad_norm": 1.7595579624176025, "learning_rate": 0.0001, "loss": 0.0155, "step": 53520 }, { "epoch": 352.17105263157896, "grad_norm": 1.6749757528305054, "learning_rate": 0.0001, "loss": 0.0133, "step": 53530 }, { "epoch": 352.2368421052632, "grad_norm": 1.6539443731307983, "learning_rate": 0.0001, "loss": 0.0158, "step": 53540 }, { "epoch": 352.30263157894734, "grad_norm": 1.7292064428329468, "learning_rate": 0.0001, "loss": 0.0133, "step": 53550 }, { "epoch": 352.36842105263156, "grad_norm": 1.6728757619857788, "learning_rate": 0.0001, "loss": 0.0196, "step": 53560 }, { "epoch": 352.4342105263158, "grad_norm": 1.31680166721344, "learning_rate": 0.0001, "loss": 0.0175, "step": 53570 }, { "epoch": 352.5, "grad_norm": 1.567442774772644, "learning_rate": 0.0001, "loss": 0.017, "step": 53580 }, { "epoch": 352.5657894736842, "grad_norm": 1.3933629989624023, "learning_rate": 0.0001, "loss": 0.0191, "step": 53590 }, { "epoch": 352.63157894736844, "grad_norm": 1.4603288173675537, "learning_rate": 0.0001, "loss": 0.0169, "step": 53600 }, { "epoch": 352.69736842105266, "grad_norm": 1.3647547960281372, "learning_rate": 0.0001, "loss": 0.0143, "step": 53610 }, { "epoch": 352.7631578947368, "grad_norm": 1.8089128732681274, "learning_rate": 0.0001, "loss": 0.018, "step": 53620 }, { "epoch": 352.82894736842104, "grad_norm": 1.3791919946670532, "learning_rate": 0.0001, "loss": 0.018, "step": 53630 }, { "epoch": 352.89473684210526, "grad_norm": 1.826276421546936, "learning_rate": 0.0001, "loss": 0.0181, "step": 53640 }, { "epoch": 352.9605263157895, "grad_norm": 1.6626293659210205, "learning_rate": 0.0001, "loss": 0.02, "step": 53650 }, { "epoch": 353.0263157894737, "grad_norm": 1.3007980585098267, "learning_rate": 0.0001, "loss": 0.0196, "step": 53660 }, { "epoch": 353.0921052631579, "grad_norm": 1.5163323879241943, "learning_rate": 0.0001, "loss": 0.0187, "step": 53670 }, { "epoch": 353.1578947368421, "grad_norm": 1.7377357482910156, "learning_rate": 0.0001, "loss": 0.0154, "step": 53680 }, { "epoch": 353.2236842105263, "grad_norm": 1.4449639320373535, "learning_rate": 0.0001, "loss": 0.0193, "step": 53690 }, { "epoch": 353.2894736842105, "grad_norm": 1.6061534881591797, "learning_rate": 0.0001, "loss": 0.0151, "step": 53700 }, { "epoch": 353.35526315789474, "grad_norm": 1.4378858804702759, "learning_rate": 0.0001, "loss": 0.0152, "step": 53710 }, { "epoch": 353.42105263157896, "grad_norm": 1.6257272958755493, "learning_rate": 0.0001, "loss": 0.015, "step": 53720 }, { "epoch": 353.4868421052632, "grad_norm": 1.5565361976623535, "learning_rate": 0.0001, "loss": 0.0178, "step": 53730 }, { "epoch": 353.55263157894734, "grad_norm": 1.6655941009521484, "learning_rate": 0.0001, "loss": 0.0165, "step": 53740 }, { "epoch": 353.61842105263156, "grad_norm": 1.5499531030654907, "learning_rate": 0.0001, "loss": 0.0171, "step": 53750 }, { "epoch": 353.6842105263158, "grad_norm": 1.0223147869110107, "learning_rate": 0.0001, "loss": 0.0162, "step": 53760 }, { "epoch": 353.75, "grad_norm": 1.7939010858535767, "learning_rate": 0.0001, "loss": 0.0197, "step": 53770 }, { "epoch": 353.8157894736842, "grad_norm": 1.5748565196990967, "learning_rate": 0.0001, "loss": 0.0175, "step": 53780 }, { "epoch": 353.88157894736844, "grad_norm": 1.5822476148605347, "learning_rate": 0.0001, "loss": 0.0151, "step": 53790 }, { "epoch": 353.94736842105266, "grad_norm": 1.2422901391983032, "learning_rate": 0.0001, "loss": 0.02, "step": 53800 }, { "epoch": 354.0131578947368, "grad_norm": 1.3518717288970947, "learning_rate": 0.0001, "loss": 0.0178, "step": 53810 }, { "epoch": 354.07894736842104, "grad_norm": 1.3904623985290527, "learning_rate": 0.0001, "loss": 0.018, "step": 53820 }, { "epoch": 354.14473684210526, "grad_norm": 1.446066975593567, "learning_rate": 0.0001, "loss": 0.0158, "step": 53830 }, { "epoch": 354.2105263157895, "grad_norm": 1.3815151453018188, "learning_rate": 0.0001, "loss": 0.0168, "step": 53840 }, { "epoch": 354.2763157894737, "grad_norm": 1.225319266319275, "learning_rate": 0.0001, "loss": 0.0186, "step": 53850 }, { "epoch": 354.3421052631579, "grad_norm": 1.3360308408737183, "learning_rate": 0.0001, "loss": 0.02, "step": 53860 }, { "epoch": 354.4078947368421, "grad_norm": 1.6317639350891113, "learning_rate": 0.0001, "loss": 0.0148, "step": 53870 }, { "epoch": 354.4736842105263, "grad_norm": 1.737660527229309, "learning_rate": 0.0001, "loss": 0.0139, "step": 53880 }, { "epoch": 354.5394736842105, "grad_norm": 1.7762559652328491, "learning_rate": 0.0001, "loss": 0.0188, "step": 53890 }, { "epoch": 354.60526315789474, "grad_norm": 1.1597551107406616, "learning_rate": 0.0001, "loss": 0.0174, "step": 53900 }, { "epoch": 354.67105263157896, "grad_norm": 1.3271461725234985, "learning_rate": 0.0001, "loss": 0.0196, "step": 53910 }, { "epoch": 354.7368421052632, "grad_norm": 1.7926801443099976, "learning_rate": 0.0001, "loss": 0.0137, "step": 53920 }, { "epoch": 354.80263157894734, "grad_norm": 1.1399463415145874, "learning_rate": 0.0001, "loss": 0.016, "step": 53930 }, { "epoch": 354.86842105263156, "grad_norm": 1.0066908597946167, "learning_rate": 0.0001, "loss": 0.0177, "step": 53940 }, { "epoch": 354.9342105263158, "grad_norm": 1.5075775384902954, "learning_rate": 0.0001, "loss": 0.0166, "step": 53950 }, { "epoch": 355.0, "grad_norm": 1.197961449623108, "learning_rate": 0.0001, "loss": 0.0214, "step": 53960 }, { "epoch": 355.0657894736842, "grad_norm": 1.384781837463379, "learning_rate": 0.0001, "loss": 0.0188, "step": 53970 }, { "epoch": 355.13157894736844, "grad_norm": 1.3845198154449463, "learning_rate": 0.0001, "loss": 0.0177, "step": 53980 }, { "epoch": 355.19736842105266, "grad_norm": 2.0317320823669434, "learning_rate": 0.0001, "loss": 0.017, "step": 53990 }, { "epoch": 355.2631578947368, "grad_norm": 1.97770094871521, "learning_rate": 0.0001, "loss": 0.0162, "step": 54000 }, { "epoch": 355.32894736842104, "grad_norm": 1.2944014072418213, "learning_rate": 0.0001, "loss": 0.0173, "step": 54010 }, { "epoch": 355.39473684210526, "grad_norm": 1.7163012027740479, "learning_rate": 0.0001, "loss": 0.0175, "step": 54020 }, { "epoch": 355.4605263157895, "grad_norm": 1.4748003482818604, "learning_rate": 0.0001, "loss": 0.0163, "step": 54030 }, { "epoch": 355.5263157894737, "grad_norm": 1.5227689743041992, "learning_rate": 0.0001, "loss": 0.017, "step": 54040 }, { "epoch": 355.5921052631579, "grad_norm": 1.7188392877578735, "learning_rate": 0.0001, "loss": 0.0185, "step": 54050 }, { "epoch": 355.6578947368421, "grad_norm": 1.4615504741668701, "learning_rate": 0.0001, "loss": 0.0141, "step": 54060 }, { "epoch": 355.7236842105263, "grad_norm": 1.96817147731781, "learning_rate": 0.0001, "loss": 0.0203, "step": 54070 }, { "epoch": 355.7894736842105, "grad_norm": 1.8353638648986816, "learning_rate": 0.0001, "loss": 0.0179, "step": 54080 }, { "epoch": 355.85526315789474, "grad_norm": 1.4609136581420898, "learning_rate": 0.0001, "loss": 0.0176, "step": 54090 }, { "epoch": 355.92105263157896, "grad_norm": 1.2733691930770874, "learning_rate": 0.0001, "loss": 0.0158, "step": 54100 }, { "epoch": 355.9868421052632, "grad_norm": 1.6316425800323486, "learning_rate": 0.0001, "loss": 0.0148, "step": 54110 }, { "epoch": 356.05263157894734, "grad_norm": 1.172689437866211, "learning_rate": 0.0001, "loss": 0.0159, "step": 54120 }, { "epoch": 356.11842105263156, "grad_norm": 1.7594672441482544, "learning_rate": 0.0001, "loss": 0.0136, "step": 54130 }, { "epoch": 356.1842105263158, "grad_norm": 1.6723624467849731, "learning_rate": 0.0001, "loss": 0.0195, "step": 54140 }, { "epoch": 356.25, "grad_norm": 1.7583671808242798, "learning_rate": 0.0001, "loss": 0.0163, "step": 54150 }, { "epoch": 356.3157894736842, "grad_norm": 1.3056718111038208, "learning_rate": 0.0001, "loss": 0.0147, "step": 54160 }, { "epoch": 356.38157894736844, "grad_norm": 1.488175630569458, "learning_rate": 0.0001, "loss": 0.016, "step": 54170 }, { "epoch": 356.44736842105266, "grad_norm": 1.4702892303466797, "learning_rate": 0.0001, "loss": 0.0153, "step": 54180 }, { "epoch": 356.5131578947368, "grad_norm": 1.3429276943206787, "learning_rate": 0.0001, "loss": 0.0156, "step": 54190 }, { "epoch": 356.57894736842104, "grad_norm": 1.1535762548446655, "learning_rate": 0.0001, "loss": 0.0182, "step": 54200 }, { "epoch": 356.64473684210526, "grad_norm": 1.4470537900924683, "learning_rate": 0.0001, "loss": 0.0174, "step": 54210 }, { "epoch": 356.7105263157895, "grad_norm": 1.5333619117736816, "learning_rate": 0.0001, "loss": 0.0145, "step": 54220 }, { "epoch": 356.7763157894737, "grad_norm": 1.6268476247787476, "learning_rate": 0.0001, "loss": 0.0252, "step": 54230 }, { "epoch": 356.8421052631579, "grad_norm": 1.5271681547164917, "learning_rate": 0.0001, "loss": 0.0185, "step": 54240 }, { "epoch": 356.9078947368421, "grad_norm": 1.8361806869506836, "learning_rate": 0.0001, "loss": 0.0149, "step": 54250 }, { "epoch": 356.9736842105263, "grad_norm": 1.914542555809021, "learning_rate": 0.0001, "loss": 0.019, "step": 54260 }, { "epoch": 357.0394736842105, "grad_norm": 1.7695201635360718, "learning_rate": 0.0001, "loss": 0.0141, "step": 54270 }, { "epoch": 357.10526315789474, "grad_norm": 1.668940782546997, "learning_rate": 0.0001, "loss": 0.0159, "step": 54280 }, { "epoch": 357.17105263157896, "grad_norm": 1.4443022012710571, "learning_rate": 0.0001, "loss": 0.0151, "step": 54290 }, { "epoch": 357.2368421052632, "grad_norm": 1.5046727657318115, "learning_rate": 0.0001, "loss": 0.0185, "step": 54300 }, { "epoch": 357.30263157894734, "grad_norm": 1.5256441831588745, "learning_rate": 0.0001, "loss": 0.0168, "step": 54310 }, { "epoch": 357.36842105263156, "grad_norm": 1.6720316410064697, "learning_rate": 0.0001, "loss": 0.0167, "step": 54320 }, { "epoch": 357.4342105263158, "grad_norm": 1.623060703277588, "learning_rate": 0.0001, "loss": 0.0176, "step": 54330 }, { "epoch": 357.5, "grad_norm": 1.6064754724502563, "learning_rate": 0.0001, "loss": 0.0152, "step": 54340 }, { "epoch": 357.5657894736842, "grad_norm": 1.5602153539657593, "learning_rate": 0.0001, "loss": 0.0204, "step": 54350 }, { "epoch": 357.63157894736844, "grad_norm": 1.5169596672058105, "learning_rate": 0.0001, "loss": 0.0195, "step": 54360 }, { "epoch": 357.69736842105266, "grad_norm": 1.2743903398513794, "learning_rate": 0.0001, "loss": 0.0138, "step": 54370 }, { "epoch": 357.7631578947368, "grad_norm": 1.8714123964309692, "learning_rate": 0.0001, "loss": 0.0159, "step": 54380 }, { "epoch": 357.82894736842104, "grad_norm": 1.7210842370986938, "learning_rate": 0.0001, "loss": 0.0175, "step": 54390 }, { "epoch": 357.89473684210526, "grad_norm": 1.8364267349243164, "learning_rate": 0.0001, "loss": 0.0162, "step": 54400 }, { "epoch": 357.9605263157895, "grad_norm": 0.982020378112793, "learning_rate": 0.0001, "loss": 0.0155, "step": 54410 }, { "epoch": 358.0263157894737, "grad_norm": 1.4782559871673584, "learning_rate": 0.0001, "loss": 0.0156, "step": 54420 }, { "epoch": 358.0921052631579, "grad_norm": 1.690915584564209, "learning_rate": 0.0001, "loss": 0.0142, "step": 54430 }, { "epoch": 358.1578947368421, "grad_norm": 1.6649941205978394, "learning_rate": 0.0001, "loss": 0.0151, "step": 54440 }, { "epoch": 358.2236842105263, "grad_norm": 1.3062998056411743, "learning_rate": 0.0001, "loss": 0.0192, "step": 54450 }, { "epoch": 358.2894736842105, "grad_norm": 1.495348334312439, "learning_rate": 0.0001, "loss": 0.015, "step": 54460 }, { "epoch": 358.35526315789474, "grad_norm": 1.5280390977859497, "learning_rate": 0.0001, "loss": 0.0177, "step": 54470 }, { "epoch": 358.42105263157896, "grad_norm": 1.4045240879058838, "learning_rate": 0.0001, "loss": 0.0175, "step": 54480 }, { "epoch": 358.4868421052632, "grad_norm": 1.7877984046936035, "learning_rate": 0.0001, "loss": 0.0164, "step": 54490 }, { "epoch": 358.55263157894734, "grad_norm": 1.6511558294296265, "learning_rate": 0.0001, "loss": 0.0171, "step": 54500 }, { "epoch": 358.61842105263156, "grad_norm": 1.1793476343154907, "learning_rate": 0.0001, "loss": 0.0215, "step": 54510 }, { "epoch": 358.6842105263158, "grad_norm": 1.2287132740020752, "learning_rate": 0.0001, "loss": 0.0137, "step": 54520 }, { "epoch": 358.75, "grad_norm": 1.2868337631225586, "learning_rate": 0.0001, "loss": 0.0177, "step": 54530 }, { "epoch": 358.8157894736842, "grad_norm": 1.49101722240448, "learning_rate": 0.0001, "loss": 0.0184, "step": 54540 }, { "epoch": 358.88157894736844, "grad_norm": 1.5189485549926758, "learning_rate": 0.0001, "loss": 0.0177, "step": 54550 }, { "epoch": 358.94736842105266, "grad_norm": 1.4180183410644531, "learning_rate": 0.0001, "loss": 0.0148, "step": 54560 }, { "epoch": 359.0131578947368, "grad_norm": 1.3924213647842407, "learning_rate": 0.0001, "loss": 0.0157, "step": 54570 }, { "epoch": 359.07894736842104, "grad_norm": 1.3996708393096924, "learning_rate": 0.0001, "loss": 0.0186, "step": 54580 }, { "epoch": 359.14473684210526, "grad_norm": 1.3990732431411743, "learning_rate": 0.0001, "loss": 0.0246, "step": 54590 }, { "epoch": 359.2105263157895, "grad_norm": 1.6883550882339478, "learning_rate": 0.0001, "loss": 0.0181, "step": 54600 }, { "epoch": 359.2763157894737, "grad_norm": 1.23037850856781, "learning_rate": 0.0001, "loss": 0.0151, "step": 54610 }, { "epoch": 359.3421052631579, "grad_norm": 1.411527156829834, "learning_rate": 0.0001, "loss": 0.0137, "step": 54620 }, { "epoch": 359.4078947368421, "grad_norm": 1.4202873706817627, "learning_rate": 0.0001, "loss": 0.0174, "step": 54630 }, { "epoch": 359.4736842105263, "grad_norm": 1.3478974103927612, "learning_rate": 0.0001, "loss": 0.0192, "step": 54640 }, { "epoch": 359.5394736842105, "grad_norm": 1.3498014211654663, "learning_rate": 0.0001, "loss": 0.0201, "step": 54650 }, { "epoch": 359.60526315789474, "grad_norm": 1.432437777519226, "learning_rate": 0.0001, "loss": 0.0156, "step": 54660 }, { "epoch": 359.67105263157896, "grad_norm": 1.9507393836975098, "learning_rate": 0.0001, "loss": 0.017, "step": 54670 }, { "epoch": 359.7368421052632, "grad_norm": 1.5138309001922607, "learning_rate": 0.0001, "loss": 0.015, "step": 54680 }, { "epoch": 359.80263157894734, "grad_norm": 1.5159190893173218, "learning_rate": 0.0001, "loss": 0.0147, "step": 54690 }, { "epoch": 359.86842105263156, "grad_norm": 1.677233338356018, "learning_rate": 0.0001, "loss": 0.0153, "step": 54700 }, { "epoch": 359.9342105263158, "grad_norm": 1.8931002616882324, "learning_rate": 0.0001, "loss": 0.0161, "step": 54710 }, { "epoch": 360.0, "grad_norm": 1.7007734775543213, "learning_rate": 0.0001, "loss": 0.0154, "step": 54720 }, { "epoch": 360.0657894736842, "grad_norm": 1.8214675188064575, "learning_rate": 0.0001, "loss": 0.0155, "step": 54730 }, { "epoch": 360.13157894736844, "grad_norm": 1.8008934259414673, "learning_rate": 0.0001, "loss": 0.0183, "step": 54740 }, { "epoch": 360.19736842105266, "grad_norm": 1.6172151565551758, "learning_rate": 0.0001, "loss": 0.0199, "step": 54750 }, { "epoch": 360.2631578947368, "grad_norm": 1.5457102060317993, "learning_rate": 0.0001, "loss": 0.0179, "step": 54760 }, { "epoch": 360.32894736842104, "grad_norm": 1.4878647327423096, "learning_rate": 0.0001, "loss": 0.0191, "step": 54770 }, { "epoch": 360.39473684210526, "grad_norm": 1.3301146030426025, "learning_rate": 0.0001, "loss": 0.0152, "step": 54780 }, { "epoch": 360.4605263157895, "grad_norm": 1.3799364566802979, "learning_rate": 0.0001, "loss": 0.0216, "step": 54790 }, { "epoch": 360.5263157894737, "grad_norm": 1.4053195714950562, "learning_rate": 0.0001, "loss": 0.0163, "step": 54800 }, { "epoch": 360.5921052631579, "grad_norm": 1.4237667322158813, "learning_rate": 0.0001, "loss": 0.0184, "step": 54810 }, { "epoch": 360.6578947368421, "grad_norm": 1.1489841938018799, "learning_rate": 0.0001, "loss": 0.0138, "step": 54820 }, { "epoch": 360.7236842105263, "grad_norm": 1.199424147605896, "learning_rate": 0.0001, "loss": 0.0136, "step": 54830 }, { "epoch": 360.7894736842105, "grad_norm": 1.0899407863616943, "learning_rate": 0.0001, "loss": 0.0165, "step": 54840 }, { "epoch": 360.85526315789474, "grad_norm": 1.5247890949249268, "learning_rate": 0.0001, "loss": 0.0151, "step": 54850 }, { "epoch": 360.92105263157896, "grad_norm": 1.5132031440734863, "learning_rate": 0.0001, "loss": 0.017, "step": 54860 }, { "epoch": 360.9868421052632, "grad_norm": 1.4976085424423218, "learning_rate": 0.0001, "loss": 0.0157, "step": 54870 }, { "epoch": 361.05263157894734, "grad_norm": 1.3895379304885864, "learning_rate": 0.0001, "loss": 0.0159, "step": 54880 }, { "epoch": 361.11842105263156, "grad_norm": 1.502540946006775, "learning_rate": 0.0001, "loss": 0.0138, "step": 54890 }, { "epoch": 361.1842105263158, "grad_norm": 1.3664751052856445, "learning_rate": 0.0001, "loss": 0.0162, "step": 54900 }, { "epoch": 361.25, "grad_norm": 1.746593952178955, "learning_rate": 0.0001, "loss": 0.0184, "step": 54910 }, { "epoch": 361.3157894736842, "grad_norm": 1.6427803039550781, "learning_rate": 0.0001, "loss": 0.0168, "step": 54920 }, { "epoch": 361.38157894736844, "grad_norm": 1.5909993648529053, "learning_rate": 0.0001, "loss": 0.0157, "step": 54930 }, { "epoch": 361.44736842105266, "grad_norm": 1.80038321018219, "learning_rate": 0.0001, "loss": 0.0212, "step": 54940 }, { "epoch": 361.5131578947368, "grad_norm": 1.1862848997116089, "learning_rate": 0.0001, "loss": 0.021, "step": 54950 }, { "epoch": 361.57894736842104, "grad_norm": 1.6216156482696533, "learning_rate": 0.0001, "loss": 0.0169, "step": 54960 }, { "epoch": 361.64473684210526, "grad_norm": 1.4194923639297485, "learning_rate": 0.0001, "loss": 0.0133, "step": 54970 }, { "epoch": 361.7105263157895, "grad_norm": 1.5602408647537231, "learning_rate": 0.0001, "loss": 0.0183, "step": 54980 }, { "epoch": 361.7763157894737, "grad_norm": 1.1574900150299072, "learning_rate": 0.0001, "loss": 0.014, "step": 54990 }, { "epoch": 361.8421052631579, "grad_norm": 1.4207656383514404, "learning_rate": 0.0001, "loss": 0.0172, "step": 55000 }, { "epoch": 361.9078947368421, "grad_norm": 1.775161862373352, "learning_rate": 0.0001, "loss": 0.0164, "step": 55010 }, { "epoch": 361.9736842105263, "grad_norm": 1.7277536392211914, "learning_rate": 0.0001, "loss": 0.0181, "step": 55020 }, { "epoch": 362.0394736842105, "grad_norm": 1.4736621379852295, "learning_rate": 0.0001, "loss": 0.0175, "step": 55030 }, { "epoch": 362.10526315789474, "grad_norm": 1.6103055477142334, "learning_rate": 0.0001, "loss": 0.0155, "step": 55040 }, { "epoch": 362.17105263157896, "grad_norm": 1.8865047693252563, "learning_rate": 0.0001, "loss": 0.0148, "step": 55050 }, { "epoch": 362.2368421052632, "grad_norm": 1.8925142288208008, "learning_rate": 0.0001, "loss": 0.0194, "step": 55060 }, { "epoch": 362.30263157894734, "grad_norm": 1.8739464282989502, "learning_rate": 0.0001, "loss": 0.016, "step": 55070 }, { "epoch": 362.36842105263156, "grad_norm": 1.3018338680267334, "learning_rate": 0.0001, "loss": 0.0207, "step": 55080 }, { "epoch": 362.4342105263158, "grad_norm": 1.6159543991088867, "learning_rate": 0.0001, "loss": 0.0166, "step": 55090 }, { "epoch": 362.5, "grad_norm": 1.722641944885254, "learning_rate": 0.0001, "loss": 0.017, "step": 55100 }, { "epoch": 362.5657894736842, "grad_norm": 1.6091501712799072, "learning_rate": 0.0001, "loss": 0.0163, "step": 55110 }, { "epoch": 362.63157894736844, "grad_norm": 1.780150294303894, "learning_rate": 0.0001, "loss": 0.0142, "step": 55120 }, { "epoch": 362.69736842105266, "grad_norm": 1.696168303489685, "learning_rate": 0.0001, "loss": 0.0188, "step": 55130 }, { "epoch": 362.7631578947368, "grad_norm": 1.6023074388504028, "learning_rate": 0.0001, "loss": 0.0162, "step": 55140 }, { "epoch": 362.82894736842104, "grad_norm": 1.3187391757965088, "learning_rate": 0.0001, "loss": 0.0156, "step": 55150 }, { "epoch": 362.89473684210526, "grad_norm": 1.6923558712005615, "learning_rate": 0.0001, "loss": 0.0132, "step": 55160 }, { "epoch": 362.9605263157895, "grad_norm": 1.6201618909835815, "learning_rate": 0.0001, "loss": 0.0178, "step": 55170 }, { "epoch": 363.0263157894737, "grad_norm": 2.062826633453369, "learning_rate": 0.0001, "loss": 0.0156, "step": 55180 }, { "epoch": 363.0921052631579, "grad_norm": 1.5040078163146973, "learning_rate": 0.0001, "loss": 0.0157, "step": 55190 }, { "epoch": 363.1578947368421, "grad_norm": 1.5867013931274414, "learning_rate": 0.0001, "loss": 0.0181, "step": 55200 }, { "epoch": 363.2236842105263, "grad_norm": 1.1805633306503296, "learning_rate": 0.0001, "loss": 0.0199, "step": 55210 }, { "epoch": 363.2894736842105, "grad_norm": 1.522652268409729, "learning_rate": 0.0001, "loss": 0.0162, "step": 55220 }, { "epoch": 363.35526315789474, "grad_norm": 1.8778797388076782, "learning_rate": 0.0001, "loss": 0.0155, "step": 55230 }, { "epoch": 363.42105263157896, "grad_norm": 1.8945499658584595, "learning_rate": 0.0001, "loss": 0.0183, "step": 55240 }, { "epoch": 363.4868421052632, "grad_norm": 1.4544740915298462, "learning_rate": 0.0001, "loss": 0.0181, "step": 55250 }, { "epoch": 363.55263157894734, "grad_norm": 1.7297407388687134, "learning_rate": 0.0001, "loss": 0.0135, "step": 55260 }, { "epoch": 363.61842105263156, "grad_norm": 1.8414150476455688, "learning_rate": 0.0001, "loss": 0.0208, "step": 55270 }, { "epoch": 363.6842105263158, "grad_norm": 1.8849809169769287, "learning_rate": 0.0001, "loss": 0.0149, "step": 55280 }, { "epoch": 363.75, "grad_norm": 1.7708572149276733, "learning_rate": 0.0001, "loss": 0.0132, "step": 55290 }, { "epoch": 363.8157894736842, "grad_norm": 1.4237287044525146, "learning_rate": 0.0001, "loss": 0.0149, "step": 55300 }, { "epoch": 363.88157894736844, "grad_norm": 2.067613363265991, "learning_rate": 0.0001, "loss": 0.0178, "step": 55310 }, { "epoch": 363.94736842105266, "grad_norm": 1.9970831871032715, "learning_rate": 0.0001, "loss": 0.0178, "step": 55320 }, { "epoch": 364.0131578947368, "grad_norm": 1.6204793453216553, "learning_rate": 0.0001, "loss": 0.0163, "step": 55330 }, { "epoch": 364.07894736842104, "grad_norm": 1.4929988384246826, "learning_rate": 0.0001, "loss": 0.0151, "step": 55340 }, { "epoch": 364.14473684210526, "grad_norm": 1.4646345376968384, "learning_rate": 0.0001, "loss": 0.0165, "step": 55350 }, { "epoch": 364.2105263157895, "grad_norm": 2.0637028217315674, "learning_rate": 0.0001, "loss": 0.0182, "step": 55360 }, { "epoch": 364.2763157894737, "grad_norm": 1.4336882829666138, "learning_rate": 0.0001, "loss": 0.0153, "step": 55370 }, { "epoch": 364.3421052631579, "grad_norm": 2.100595474243164, "learning_rate": 0.0001, "loss": 0.0178, "step": 55380 }, { "epoch": 364.4078947368421, "grad_norm": 1.4184706211090088, "learning_rate": 0.0001, "loss": 0.0207, "step": 55390 }, { "epoch": 364.4736842105263, "grad_norm": 1.3103786706924438, "learning_rate": 0.0001, "loss": 0.0154, "step": 55400 }, { "epoch": 364.5394736842105, "grad_norm": 1.4150391817092896, "learning_rate": 0.0001, "loss": 0.0148, "step": 55410 }, { "epoch": 364.60526315789474, "grad_norm": 1.4981722831726074, "learning_rate": 0.0001, "loss": 0.0152, "step": 55420 }, { "epoch": 364.67105263157896, "grad_norm": 1.1429510116577148, "learning_rate": 0.0001, "loss": 0.0189, "step": 55430 }, { "epoch": 364.7368421052632, "grad_norm": 1.5171343088150024, "learning_rate": 0.0001, "loss": 0.0176, "step": 55440 }, { "epoch": 364.80263157894734, "grad_norm": 1.8238722085952759, "learning_rate": 0.0001, "loss": 0.0151, "step": 55450 }, { "epoch": 364.86842105263156, "grad_norm": 1.579099178314209, "learning_rate": 0.0001, "loss": 0.0177, "step": 55460 }, { "epoch": 364.9342105263158, "grad_norm": 2.2285594940185547, "learning_rate": 0.0001, "loss": 0.0164, "step": 55470 }, { "epoch": 365.0, "grad_norm": 1.3911491632461548, "learning_rate": 0.0001, "loss": 0.017, "step": 55480 }, { "epoch": 365.0657894736842, "grad_norm": 1.2788976430892944, "learning_rate": 0.0001, "loss": 0.0218, "step": 55490 }, { "epoch": 365.13157894736844, "grad_norm": 1.835127592086792, "learning_rate": 0.0001, "loss": 0.0147, "step": 55500 }, { "epoch": 365.19736842105266, "grad_norm": 1.8282177448272705, "learning_rate": 0.0001, "loss": 0.0157, "step": 55510 }, { "epoch": 365.2631578947368, "grad_norm": 1.4996572732925415, "learning_rate": 0.0001, "loss": 0.0151, "step": 55520 }, { "epoch": 365.32894736842104, "grad_norm": 1.261529564857483, "learning_rate": 0.0001, "loss": 0.0145, "step": 55530 }, { "epoch": 365.39473684210526, "grad_norm": 1.4566069841384888, "learning_rate": 0.0001, "loss": 0.0137, "step": 55540 }, { "epoch": 365.4605263157895, "grad_norm": 1.70866858959198, "learning_rate": 0.0001, "loss": 0.0163, "step": 55550 }, { "epoch": 365.5263157894737, "grad_norm": 1.934308409690857, "learning_rate": 0.0001, "loss": 0.0172, "step": 55560 }, { "epoch": 365.5921052631579, "grad_norm": 1.2779927253723145, "learning_rate": 0.0001, "loss": 0.0146, "step": 55570 }, { "epoch": 365.6578947368421, "grad_norm": 1.8539220094680786, "learning_rate": 0.0001, "loss": 0.0192, "step": 55580 }, { "epoch": 365.7236842105263, "grad_norm": 1.6937264204025269, "learning_rate": 0.0001, "loss": 0.0231, "step": 55590 }, { "epoch": 365.7894736842105, "grad_norm": 1.2888002395629883, "learning_rate": 0.0001, "loss": 0.0146, "step": 55600 }, { "epoch": 365.85526315789474, "grad_norm": 1.325484275817871, "learning_rate": 0.0001, "loss": 0.0151, "step": 55610 }, { "epoch": 365.92105263157896, "grad_norm": 1.8668959140777588, "learning_rate": 0.0001, "loss": 0.015, "step": 55620 }, { "epoch": 365.9868421052632, "grad_norm": 1.6742079257965088, "learning_rate": 0.0001, "loss": 0.0221, "step": 55630 }, { "epoch": 366.05263157894734, "grad_norm": 1.2880260944366455, "learning_rate": 0.0001, "loss": 0.0185, "step": 55640 }, { "epoch": 366.11842105263156, "grad_norm": 1.3981151580810547, "learning_rate": 0.0001, "loss": 0.0135, "step": 55650 }, { "epoch": 366.1842105263158, "grad_norm": 1.8298863172531128, "learning_rate": 0.0001, "loss": 0.0195, "step": 55660 }, { "epoch": 366.25, "grad_norm": 1.3886628150939941, "learning_rate": 0.0001, "loss": 0.0194, "step": 55670 }, { "epoch": 366.3157894736842, "grad_norm": 1.5217036008834839, "learning_rate": 0.0001, "loss": 0.0195, "step": 55680 }, { "epoch": 366.38157894736844, "grad_norm": 1.2379776239395142, "learning_rate": 0.0001, "loss": 0.0168, "step": 55690 }, { "epoch": 366.44736842105266, "grad_norm": 1.6097928285598755, "learning_rate": 0.0001, "loss": 0.0161, "step": 55700 }, { "epoch": 366.5131578947368, "grad_norm": 1.7318476438522339, "learning_rate": 0.0001, "loss": 0.0167, "step": 55710 }, { "epoch": 366.57894736842104, "grad_norm": 1.2666572332382202, "learning_rate": 0.0001, "loss": 0.0175, "step": 55720 }, { "epoch": 366.64473684210526, "grad_norm": 1.3376952409744263, "learning_rate": 0.0001, "loss": 0.0182, "step": 55730 }, { "epoch": 366.7105263157895, "grad_norm": 1.424842357635498, "learning_rate": 0.0001, "loss": 0.015, "step": 55740 }, { "epoch": 366.7763157894737, "grad_norm": 0.9805811047554016, "learning_rate": 0.0001, "loss": 0.0139, "step": 55750 }, { "epoch": 366.8421052631579, "grad_norm": 1.780051827430725, "learning_rate": 0.0001, "loss": 0.0178, "step": 55760 }, { "epoch": 366.9078947368421, "grad_norm": 1.4231534004211426, "learning_rate": 0.0001, "loss": 0.0151, "step": 55770 }, { "epoch": 366.9736842105263, "grad_norm": 1.4782001972198486, "learning_rate": 0.0001, "loss": 0.0181, "step": 55780 }, { "epoch": 367.0394736842105, "grad_norm": 1.940183401107788, "learning_rate": 0.0001, "loss": 0.0165, "step": 55790 }, { "epoch": 367.10526315789474, "grad_norm": 1.7892380952835083, "learning_rate": 0.0001, "loss": 0.02, "step": 55800 }, { "epoch": 367.17105263157896, "grad_norm": 1.7469680309295654, "learning_rate": 0.0001, "loss": 0.0146, "step": 55810 }, { "epoch": 367.2368421052632, "grad_norm": 1.5351835489273071, "learning_rate": 0.0001, "loss": 0.0178, "step": 55820 }, { "epoch": 367.30263157894734, "grad_norm": 2.105440855026245, "learning_rate": 0.0001, "loss": 0.0167, "step": 55830 }, { "epoch": 367.36842105263156, "grad_norm": 1.6571506261825562, "learning_rate": 0.0001, "loss": 0.0168, "step": 55840 }, { "epoch": 367.4342105263158, "grad_norm": 1.7915575504302979, "learning_rate": 0.0001, "loss": 0.0187, "step": 55850 }, { "epoch": 367.5, "grad_norm": 1.4990015029907227, "learning_rate": 0.0001, "loss": 0.0197, "step": 55860 }, { "epoch": 367.5657894736842, "grad_norm": 1.651869535446167, "learning_rate": 0.0001, "loss": 0.0147, "step": 55870 }, { "epoch": 367.63157894736844, "grad_norm": 2.068398952484131, "learning_rate": 0.0001, "loss": 0.0131, "step": 55880 }, { "epoch": 367.69736842105266, "grad_norm": 1.9691094160079956, "learning_rate": 0.0001, "loss": 0.0147, "step": 55890 }, { "epoch": 367.7631578947368, "grad_norm": 1.5697758197784424, "learning_rate": 0.0001, "loss": 0.0169, "step": 55900 }, { "epoch": 367.82894736842104, "grad_norm": 1.5825053453445435, "learning_rate": 0.0001, "loss": 0.0144, "step": 55910 }, { "epoch": 367.89473684210526, "grad_norm": 1.507236123085022, "learning_rate": 0.0001, "loss": 0.0189, "step": 55920 }, { "epoch": 367.9605263157895, "grad_norm": 1.316042423248291, "learning_rate": 0.0001, "loss": 0.0136, "step": 55930 }, { "epoch": 368.0263157894737, "grad_norm": 1.5521786212921143, "learning_rate": 0.0001, "loss": 0.0206, "step": 55940 }, { "epoch": 368.0921052631579, "grad_norm": 1.1794860363006592, "learning_rate": 0.0001, "loss": 0.0173, "step": 55950 }, { "epoch": 368.1578947368421, "grad_norm": 1.5819438695907593, "learning_rate": 0.0001, "loss": 0.0166, "step": 55960 }, { "epoch": 368.2236842105263, "grad_norm": 1.4329899549484253, "learning_rate": 0.0001, "loss": 0.0153, "step": 55970 }, { "epoch": 368.2894736842105, "grad_norm": 1.6929794549942017, "learning_rate": 0.0001, "loss": 0.0157, "step": 55980 }, { "epoch": 368.35526315789474, "grad_norm": 1.758679747581482, "learning_rate": 0.0001, "loss": 0.0181, "step": 55990 }, { "epoch": 368.42105263157896, "grad_norm": 1.4872119426727295, "learning_rate": 0.0001, "loss": 0.0196, "step": 56000 }, { "epoch": 368.4868421052632, "grad_norm": 1.5771743059158325, "learning_rate": 0.0001, "loss": 0.016, "step": 56010 }, { "epoch": 368.55263157894734, "grad_norm": 1.4324814081192017, "learning_rate": 0.0001, "loss": 0.0159, "step": 56020 }, { "epoch": 368.61842105263156, "grad_norm": 1.561370849609375, "learning_rate": 0.0001, "loss": 0.016, "step": 56030 }, { "epoch": 368.6842105263158, "grad_norm": 1.3135547637939453, "learning_rate": 0.0001, "loss": 0.0133, "step": 56040 }, { "epoch": 368.75, "grad_norm": 1.4865998029708862, "learning_rate": 0.0001, "loss": 0.0134, "step": 56050 }, { "epoch": 368.8157894736842, "grad_norm": 1.4046169519424438, "learning_rate": 0.0001, "loss": 0.0175, "step": 56060 }, { "epoch": 368.88157894736844, "grad_norm": 1.3707423210144043, "learning_rate": 0.0001, "loss": 0.0179, "step": 56070 }, { "epoch": 368.94736842105266, "grad_norm": 1.663061261177063, "learning_rate": 0.0001, "loss": 0.0171, "step": 56080 }, { "epoch": 369.0131578947368, "grad_norm": 1.3331360816955566, "learning_rate": 0.0001, "loss": 0.0174, "step": 56090 }, { "epoch": 369.07894736842104, "grad_norm": 1.3722054958343506, "learning_rate": 0.0001, "loss": 0.0189, "step": 56100 }, { "epoch": 369.14473684210526, "grad_norm": 1.4471009969711304, "learning_rate": 0.0001, "loss": 0.0172, "step": 56110 }, { "epoch": 369.2105263157895, "grad_norm": 1.2976864576339722, "learning_rate": 0.0001, "loss": 0.0202, "step": 56120 }, { "epoch": 369.2763157894737, "grad_norm": 1.3319823741912842, "learning_rate": 0.0001, "loss": 0.0169, "step": 56130 }, { "epoch": 369.3421052631579, "grad_norm": 1.368345856666565, "learning_rate": 0.0001, "loss": 0.0168, "step": 56140 }, { "epoch": 369.4078947368421, "grad_norm": 1.5703082084655762, "learning_rate": 0.0001, "loss": 0.0159, "step": 56150 }, { "epoch": 369.4736842105263, "grad_norm": 1.62283456325531, "learning_rate": 0.0001, "loss": 0.0144, "step": 56160 }, { "epoch": 369.5394736842105, "grad_norm": 1.7567051649093628, "learning_rate": 0.0001, "loss": 0.0177, "step": 56170 }, { "epoch": 369.60526315789474, "grad_norm": 1.0963680744171143, "learning_rate": 0.0001, "loss": 0.0161, "step": 56180 }, { "epoch": 369.67105263157896, "grad_norm": 1.5022265911102295, "learning_rate": 0.0001, "loss": 0.0157, "step": 56190 }, { "epoch": 369.7368421052632, "grad_norm": 1.0984909534454346, "learning_rate": 0.0001, "loss": 0.0192, "step": 56200 }, { "epoch": 369.80263157894734, "grad_norm": 1.3718883991241455, "learning_rate": 0.0001, "loss": 0.0191, "step": 56210 }, { "epoch": 369.86842105263156, "grad_norm": 1.697741150856018, "learning_rate": 0.0001, "loss": 0.0163, "step": 56220 }, { "epoch": 369.9342105263158, "grad_norm": 1.3996537923812866, "learning_rate": 0.0001, "loss": 0.0202, "step": 56230 }, { "epoch": 370.0, "grad_norm": 1.489426851272583, "learning_rate": 0.0001, "loss": 0.016, "step": 56240 }, { "epoch": 370.0657894736842, "grad_norm": 1.5883547067642212, "learning_rate": 0.0001, "loss": 0.0155, "step": 56250 }, { "epoch": 370.13157894736844, "grad_norm": 1.3936411142349243, "learning_rate": 0.0001, "loss": 0.0142, "step": 56260 }, { "epoch": 370.19736842105266, "grad_norm": 1.5481468439102173, "learning_rate": 0.0001, "loss": 0.0193, "step": 56270 }, { "epoch": 370.2631578947368, "grad_norm": 1.6796091794967651, "learning_rate": 0.0001, "loss": 0.0154, "step": 56280 }, { "epoch": 370.32894736842104, "grad_norm": 1.201174259185791, "learning_rate": 0.0001, "loss": 0.0175, "step": 56290 }, { "epoch": 370.39473684210526, "grad_norm": 1.5033103227615356, "learning_rate": 0.0001, "loss": 0.0158, "step": 56300 }, { "epoch": 370.4605263157895, "grad_norm": 1.9066828489303589, "learning_rate": 0.0001, "loss": 0.0168, "step": 56310 }, { "epoch": 370.5263157894737, "grad_norm": 1.587959885597229, "learning_rate": 0.0001, "loss": 0.0197, "step": 56320 }, { "epoch": 370.5921052631579, "grad_norm": 1.514899492263794, "learning_rate": 0.0001, "loss": 0.0141, "step": 56330 }, { "epoch": 370.6578947368421, "grad_norm": 1.7559579610824585, "learning_rate": 0.0001, "loss": 0.0153, "step": 56340 }, { "epoch": 370.7236842105263, "grad_norm": 1.6296696662902832, "learning_rate": 0.0001, "loss": 0.017, "step": 56350 }, { "epoch": 370.7894736842105, "grad_norm": 1.7876832485198975, "learning_rate": 0.0001, "loss": 0.0186, "step": 56360 }, { "epoch": 370.85526315789474, "grad_norm": 1.5987083911895752, "learning_rate": 0.0001, "loss": 0.0194, "step": 56370 }, { "epoch": 370.92105263157896, "grad_norm": 1.3100143671035767, "learning_rate": 0.0001, "loss": 0.0165, "step": 56380 }, { "epoch": 370.9868421052632, "grad_norm": 1.822366714477539, "learning_rate": 0.0001, "loss": 0.019, "step": 56390 }, { "epoch": 371.05263157894734, "grad_norm": 1.7780680656433105, "learning_rate": 0.0001, "loss": 0.017, "step": 56400 }, { "epoch": 371.11842105263156, "grad_norm": 1.7282249927520752, "learning_rate": 0.0001, "loss": 0.0172, "step": 56410 }, { "epoch": 371.1842105263158, "grad_norm": 1.7132377624511719, "learning_rate": 0.0001, "loss": 0.0173, "step": 56420 }, { "epoch": 371.25, "grad_norm": 1.563104510307312, "learning_rate": 0.0001, "loss": 0.0171, "step": 56430 }, { "epoch": 371.3157894736842, "grad_norm": 1.5265289545059204, "learning_rate": 0.0001, "loss": 0.017, "step": 56440 }, { "epoch": 371.38157894736844, "grad_norm": 1.5163369178771973, "learning_rate": 0.0001, "loss": 0.015, "step": 56450 }, { "epoch": 371.44736842105266, "grad_norm": 1.7325938940048218, "learning_rate": 0.0001, "loss": 0.0135, "step": 56460 }, { "epoch": 371.5131578947368, "grad_norm": 1.5913820266723633, "learning_rate": 0.0001, "loss": 0.0134, "step": 56470 }, { "epoch": 371.57894736842104, "grad_norm": 1.0634406805038452, "learning_rate": 0.0001, "loss": 0.0157, "step": 56480 }, { "epoch": 371.64473684210526, "grad_norm": 1.2740205526351929, "learning_rate": 0.0001, "loss": 0.0145, "step": 56490 }, { "epoch": 371.7105263157895, "grad_norm": 1.365240454673767, "learning_rate": 0.0001, "loss": 0.0212, "step": 56500 }, { "epoch": 371.7763157894737, "grad_norm": 1.7161428928375244, "learning_rate": 0.0001, "loss": 0.0173, "step": 56510 }, { "epoch": 371.8421052631579, "grad_norm": 1.4610790014266968, "learning_rate": 0.0001, "loss": 0.0207, "step": 56520 }, { "epoch": 371.9078947368421, "grad_norm": 1.866560697555542, "learning_rate": 0.0001, "loss": 0.0195, "step": 56530 }, { "epoch": 371.9736842105263, "grad_norm": 1.1497993469238281, "learning_rate": 0.0001, "loss": 0.0147, "step": 56540 }, { "epoch": 372.0394736842105, "grad_norm": 1.3922438621520996, "learning_rate": 0.0001, "loss": 0.0182, "step": 56550 }, { "epoch": 372.10526315789474, "grad_norm": 1.599608063697815, "learning_rate": 0.0001, "loss": 0.0206, "step": 56560 }, { "epoch": 372.17105263157896, "grad_norm": 1.649513840675354, "learning_rate": 0.0001, "loss": 0.0148, "step": 56570 }, { "epoch": 372.2368421052632, "grad_norm": 1.6656485795974731, "learning_rate": 0.0001, "loss": 0.0142, "step": 56580 }, { "epoch": 372.30263157894734, "grad_norm": 1.162455439567566, "learning_rate": 0.0001, "loss": 0.0167, "step": 56590 }, { "epoch": 372.36842105263156, "grad_norm": 1.7360057830810547, "learning_rate": 0.0001, "loss": 0.0133, "step": 56600 }, { "epoch": 372.4342105263158, "grad_norm": 1.4405936002731323, "learning_rate": 0.0001, "loss": 0.0154, "step": 56610 }, { "epoch": 372.5, "grad_norm": 1.887589693069458, "learning_rate": 0.0001, "loss": 0.0171, "step": 56620 }, { "epoch": 372.5657894736842, "grad_norm": 1.7270365953445435, "learning_rate": 0.0001, "loss": 0.0162, "step": 56630 }, { "epoch": 372.63157894736844, "grad_norm": 1.9033633470535278, "learning_rate": 0.0001, "loss": 0.0201, "step": 56640 }, { "epoch": 372.69736842105266, "grad_norm": 1.7245503664016724, "learning_rate": 0.0001, "loss": 0.0194, "step": 56650 }, { "epoch": 372.7631578947368, "grad_norm": 1.1773473024368286, "learning_rate": 0.0001, "loss": 0.0196, "step": 56660 }, { "epoch": 372.82894736842104, "grad_norm": 1.9694554805755615, "learning_rate": 0.0001, "loss": 0.0166, "step": 56670 }, { "epoch": 372.89473684210526, "grad_norm": 2.014662504196167, "learning_rate": 0.0001, "loss": 0.0204, "step": 56680 }, { "epoch": 372.9605263157895, "grad_norm": 1.3459968566894531, "learning_rate": 0.0001, "loss": 0.0148, "step": 56690 }, { "epoch": 373.0263157894737, "grad_norm": 1.9738984107971191, "learning_rate": 0.0001, "loss": 0.0164, "step": 56700 }, { "epoch": 373.0921052631579, "grad_norm": 1.6188879013061523, "learning_rate": 0.0001, "loss": 0.0187, "step": 56710 }, { "epoch": 373.1578947368421, "grad_norm": 2.2790749073028564, "learning_rate": 0.0001, "loss": 0.0158, "step": 56720 }, { "epoch": 373.2236842105263, "grad_norm": 1.3855371475219727, "learning_rate": 0.0001, "loss": 0.0178, "step": 56730 }, { "epoch": 373.2894736842105, "grad_norm": 1.5415394306182861, "learning_rate": 0.0001, "loss": 0.0166, "step": 56740 }, { "epoch": 373.35526315789474, "grad_norm": 1.4844014644622803, "learning_rate": 0.0001, "loss": 0.0148, "step": 56750 }, { "epoch": 373.42105263157896, "grad_norm": 1.2579971551895142, "learning_rate": 0.0001, "loss": 0.0171, "step": 56760 }, { "epoch": 373.4868421052632, "grad_norm": 1.497989535331726, "learning_rate": 0.0001, "loss": 0.0163, "step": 56770 }, { "epoch": 373.55263157894734, "grad_norm": 1.1008915901184082, "learning_rate": 0.0001, "loss": 0.0173, "step": 56780 }, { "epoch": 373.61842105263156, "grad_norm": 1.5318387746810913, "learning_rate": 0.0001, "loss": 0.0179, "step": 56790 }, { "epoch": 373.6842105263158, "grad_norm": 1.6518226861953735, "learning_rate": 0.0001, "loss": 0.0175, "step": 56800 }, { "epoch": 373.75, "grad_norm": 1.4945586919784546, "learning_rate": 0.0001, "loss": 0.0187, "step": 56810 }, { "epoch": 373.8157894736842, "grad_norm": 1.2810075283050537, "learning_rate": 0.0001, "loss": 0.0171, "step": 56820 }, { "epoch": 373.88157894736844, "grad_norm": 1.5451793670654297, "learning_rate": 0.0001, "loss": 0.0162, "step": 56830 }, { "epoch": 373.94736842105266, "grad_norm": 1.9597653150558472, "learning_rate": 0.0001, "loss": 0.0191, "step": 56840 }, { "epoch": 374.0131578947368, "grad_norm": 1.3497482538223267, "learning_rate": 0.0001, "loss": 0.0179, "step": 56850 }, { "epoch": 374.07894736842104, "grad_norm": 1.3304482698440552, "learning_rate": 0.0001, "loss": 0.0186, "step": 56860 }, { "epoch": 374.14473684210526, "grad_norm": 1.3576395511627197, "learning_rate": 0.0001, "loss": 0.0163, "step": 56870 }, { "epoch": 374.2105263157895, "grad_norm": 1.5674774646759033, "learning_rate": 0.0001, "loss": 0.0168, "step": 56880 }, { "epoch": 374.2763157894737, "grad_norm": 1.3509973287582397, "learning_rate": 0.0001, "loss": 0.0139, "step": 56890 }, { "epoch": 374.3421052631579, "grad_norm": 1.4317675828933716, "learning_rate": 0.0001, "loss": 0.016, "step": 56900 }, { "epoch": 374.4078947368421, "grad_norm": 1.6174479722976685, "learning_rate": 0.0001, "loss": 0.015, "step": 56910 }, { "epoch": 374.4736842105263, "grad_norm": 1.8508890867233276, "learning_rate": 0.0001, "loss": 0.0159, "step": 56920 }, { "epoch": 374.5394736842105, "grad_norm": 1.5931288003921509, "learning_rate": 0.0001, "loss": 0.0212, "step": 56930 }, { "epoch": 374.60526315789474, "grad_norm": 1.8496499061584473, "learning_rate": 0.0001, "loss": 0.0161, "step": 56940 }, { "epoch": 374.67105263157896, "grad_norm": 1.5312625169754028, "learning_rate": 0.0001, "loss": 0.018, "step": 56950 }, { "epoch": 374.7368421052632, "grad_norm": 1.6904314756393433, "learning_rate": 0.0001, "loss": 0.0205, "step": 56960 }, { "epoch": 374.80263157894734, "grad_norm": 1.7160310745239258, "learning_rate": 0.0001, "loss": 0.0139, "step": 56970 }, { "epoch": 374.86842105263156, "grad_norm": 1.5058043003082275, "learning_rate": 0.0001, "loss": 0.0165, "step": 56980 }, { "epoch": 374.9342105263158, "grad_norm": 1.7677322626113892, "learning_rate": 0.0001, "loss": 0.0183, "step": 56990 }, { "epoch": 375.0, "grad_norm": 2.198329210281372, "learning_rate": 0.0001, "loss": 0.0185, "step": 57000 }, { "epoch": 375.0657894736842, "grad_norm": 2.2105376720428467, "learning_rate": 0.0001, "loss": 0.0197, "step": 57010 }, { "epoch": 375.13157894736844, "grad_norm": 1.9187871217727661, "learning_rate": 0.0001, "loss": 0.0193, "step": 57020 }, { "epoch": 375.19736842105266, "grad_norm": 1.796032428741455, "learning_rate": 0.0001, "loss": 0.0146, "step": 57030 }, { "epoch": 375.2631578947368, "grad_norm": 1.355418086051941, "learning_rate": 0.0001, "loss": 0.0144, "step": 57040 }, { "epoch": 375.32894736842104, "grad_norm": 1.5528067350387573, "learning_rate": 0.0001, "loss": 0.0161, "step": 57050 }, { "epoch": 375.39473684210526, "grad_norm": 1.557374119758606, "learning_rate": 0.0001, "loss": 0.0153, "step": 57060 }, { "epoch": 375.4605263157895, "grad_norm": 1.4118678569793701, "learning_rate": 0.0001, "loss": 0.0192, "step": 57070 }, { "epoch": 375.5263157894737, "grad_norm": 1.6859291791915894, "learning_rate": 0.0001, "loss": 0.0159, "step": 57080 }, { "epoch": 375.5921052631579, "grad_norm": 1.7062078714370728, "learning_rate": 0.0001, "loss": 0.0196, "step": 57090 }, { "epoch": 375.6578947368421, "grad_norm": 1.7608461380004883, "learning_rate": 0.0001, "loss": 0.0156, "step": 57100 }, { "epoch": 375.7236842105263, "grad_norm": 1.4480544328689575, "learning_rate": 0.0001, "loss": 0.0139, "step": 57110 }, { "epoch": 375.7894736842105, "grad_norm": 1.4341135025024414, "learning_rate": 0.0001, "loss": 0.0149, "step": 57120 }, { "epoch": 375.85526315789474, "grad_norm": 1.7648075819015503, "learning_rate": 0.0001, "loss": 0.0157, "step": 57130 }, { "epoch": 375.92105263157896, "grad_norm": 1.3970409631729126, "learning_rate": 0.0001, "loss": 0.0171, "step": 57140 }, { "epoch": 375.9868421052632, "grad_norm": 1.9337774515151978, "learning_rate": 0.0001, "loss": 0.017, "step": 57150 }, { "epoch": 376.05263157894734, "grad_norm": 1.7101097106933594, "learning_rate": 0.0001, "loss": 0.0138, "step": 57160 }, { "epoch": 376.11842105263156, "grad_norm": 1.6418572664260864, "learning_rate": 0.0001, "loss": 0.0142, "step": 57170 }, { "epoch": 376.1842105263158, "grad_norm": 2.0075223445892334, "learning_rate": 0.0001, "loss": 0.0221, "step": 57180 }, { "epoch": 376.25, "grad_norm": 1.3241665363311768, "learning_rate": 0.0001, "loss": 0.0164, "step": 57190 }, { "epoch": 376.3157894736842, "grad_norm": 2.0469918251037598, "learning_rate": 0.0001, "loss": 0.0152, "step": 57200 }, { "epoch": 376.38157894736844, "grad_norm": 1.3470739126205444, "learning_rate": 0.0001, "loss": 0.0165, "step": 57210 }, { "epoch": 376.44736842105266, "grad_norm": 1.595441222190857, "learning_rate": 0.0001, "loss": 0.0158, "step": 57220 }, { "epoch": 376.5131578947368, "grad_norm": 1.9005584716796875, "learning_rate": 0.0001, "loss": 0.0172, "step": 57230 }, { "epoch": 376.57894736842104, "grad_norm": 1.4663439989089966, "learning_rate": 0.0001, "loss": 0.0171, "step": 57240 }, { "epoch": 376.64473684210526, "grad_norm": 1.6988157033920288, "learning_rate": 0.0001, "loss": 0.0149, "step": 57250 }, { "epoch": 376.7105263157895, "grad_norm": 1.7026116847991943, "learning_rate": 0.0001, "loss": 0.0179, "step": 57260 }, { "epoch": 376.7763157894737, "grad_norm": 1.922040343284607, "learning_rate": 0.0001, "loss": 0.0161, "step": 57270 }, { "epoch": 376.8421052631579, "grad_norm": 1.8122014999389648, "learning_rate": 0.0001, "loss": 0.0131, "step": 57280 }, { "epoch": 376.9078947368421, "grad_norm": 1.9087917804718018, "learning_rate": 0.0001, "loss": 0.0199, "step": 57290 }, { "epoch": 376.9736842105263, "grad_norm": 1.6369001865386963, "learning_rate": 0.0001, "loss": 0.0166, "step": 57300 }, { "epoch": 377.0394736842105, "grad_norm": 1.436479926109314, "learning_rate": 0.0001, "loss": 0.0159, "step": 57310 }, { "epoch": 377.10526315789474, "grad_norm": 1.756460189819336, "learning_rate": 0.0001, "loss": 0.0147, "step": 57320 }, { "epoch": 377.17105263157896, "grad_norm": 1.9270203113555908, "learning_rate": 0.0001, "loss": 0.0148, "step": 57330 }, { "epoch": 377.2368421052632, "grad_norm": 1.7920743227005005, "learning_rate": 0.0001, "loss": 0.0165, "step": 57340 }, { "epoch": 377.30263157894734, "grad_norm": 1.7337141036987305, "learning_rate": 0.0001, "loss": 0.0146, "step": 57350 }, { "epoch": 377.36842105263156, "grad_norm": 2.1096582412719727, "learning_rate": 0.0001, "loss": 0.0177, "step": 57360 }, { "epoch": 377.4342105263158, "grad_norm": 1.449084997177124, "learning_rate": 0.0001, "loss": 0.0232, "step": 57370 }, { "epoch": 377.5, "grad_norm": 1.754794716835022, "learning_rate": 0.0001, "loss": 0.0156, "step": 57380 }, { "epoch": 377.5657894736842, "grad_norm": 1.5197498798370361, "learning_rate": 0.0001, "loss": 0.015, "step": 57390 }, { "epoch": 377.63157894736844, "grad_norm": 1.1955915689468384, "learning_rate": 0.0001, "loss": 0.0153, "step": 57400 }, { "epoch": 377.69736842105266, "grad_norm": 1.7993227243423462, "learning_rate": 0.0001, "loss": 0.0159, "step": 57410 }, { "epoch": 377.7631578947368, "grad_norm": 1.7082363367080688, "learning_rate": 0.0001, "loss": 0.0156, "step": 57420 }, { "epoch": 377.82894736842104, "grad_norm": 1.7360738515853882, "learning_rate": 0.0001, "loss": 0.0175, "step": 57430 }, { "epoch": 377.89473684210526, "grad_norm": 1.4912428855895996, "learning_rate": 0.0001, "loss": 0.0172, "step": 57440 }, { "epoch": 377.9605263157895, "grad_norm": 1.5086647272109985, "learning_rate": 0.0001, "loss": 0.0197, "step": 57450 }, { "epoch": 378.0263157894737, "grad_norm": 1.8252780437469482, "learning_rate": 0.0001, "loss": 0.0142, "step": 57460 }, { "epoch": 378.0921052631579, "grad_norm": 1.5524814128875732, "learning_rate": 0.0001, "loss": 0.0197, "step": 57470 }, { "epoch": 378.1578947368421, "grad_norm": 1.3498573303222656, "learning_rate": 0.0001, "loss": 0.0134, "step": 57480 }, { "epoch": 378.2236842105263, "grad_norm": 1.65605890750885, "learning_rate": 0.0001, "loss": 0.0161, "step": 57490 }, { "epoch": 378.2894736842105, "grad_norm": 1.49755859375, "learning_rate": 0.0001, "loss": 0.0178, "step": 57500 }, { "epoch": 378.35526315789474, "grad_norm": 1.9108785390853882, "learning_rate": 0.0001, "loss": 0.0142, "step": 57510 }, { "epoch": 378.42105263157896, "grad_norm": 1.5260653495788574, "learning_rate": 0.0001, "loss": 0.0207, "step": 57520 }, { "epoch": 378.4868421052632, "grad_norm": 1.457156777381897, "learning_rate": 0.0001, "loss": 0.0152, "step": 57530 }, { "epoch": 378.55263157894734, "grad_norm": 1.4983795881271362, "learning_rate": 0.0001, "loss": 0.0209, "step": 57540 }, { "epoch": 378.61842105263156, "grad_norm": 1.5460437536239624, "learning_rate": 0.0001, "loss": 0.0159, "step": 57550 }, { "epoch": 378.6842105263158, "grad_norm": 1.7939168214797974, "learning_rate": 0.0001, "loss": 0.0145, "step": 57560 }, { "epoch": 378.75, "grad_norm": 1.6734007596969604, "learning_rate": 0.0001, "loss": 0.0214, "step": 57570 }, { "epoch": 378.8157894736842, "grad_norm": 1.4379606246948242, "learning_rate": 0.0001, "loss": 0.0161, "step": 57580 }, { "epoch": 378.88157894736844, "grad_norm": 1.4519842863082886, "learning_rate": 0.0001, "loss": 0.0144, "step": 57590 }, { "epoch": 378.94736842105266, "grad_norm": 1.4190927743911743, "learning_rate": 0.0001, "loss": 0.0151, "step": 57600 }, { "epoch": 379.0131578947368, "grad_norm": 2.2164344787597656, "learning_rate": 0.0001, "loss": 0.0183, "step": 57610 }, { "epoch": 379.07894736842104, "grad_norm": 1.372009515762329, "learning_rate": 0.0001, "loss": 0.0132, "step": 57620 }, { "epoch": 379.14473684210526, "grad_norm": 1.4539772272109985, "learning_rate": 0.0001, "loss": 0.0154, "step": 57630 }, { "epoch": 379.2105263157895, "grad_norm": 1.3346238136291504, "learning_rate": 0.0001, "loss": 0.0195, "step": 57640 }, { "epoch": 379.2763157894737, "grad_norm": 1.5026804208755493, "learning_rate": 0.0001, "loss": 0.0157, "step": 57650 }, { "epoch": 379.3421052631579, "grad_norm": 1.4035135507583618, "learning_rate": 0.0001, "loss": 0.0205, "step": 57660 }, { "epoch": 379.4078947368421, "grad_norm": 1.6304047107696533, "learning_rate": 0.0001, "loss": 0.0178, "step": 57670 }, { "epoch": 379.4736842105263, "grad_norm": 1.824974536895752, "learning_rate": 0.0001, "loss": 0.0159, "step": 57680 }, { "epoch": 379.5394736842105, "grad_norm": 1.7349411249160767, "learning_rate": 0.0001, "loss": 0.0165, "step": 57690 }, { "epoch": 379.60526315789474, "grad_norm": 1.541090965270996, "learning_rate": 0.0001, "loss": 0.0169, "step": 57700 }, { "epoch": 379.67105263157896, "grad_norm": 1.3484915494918823, "learning_rate": 0.0001, "loss": 0.0193, "step": 57710 }, { "epoch": 379.7368421052632, "grad_norm": 1.5281263589859009, "learning_rate": 0.0001, "loss": 0.0172, "step": 57720 }, { "epoch": 379.80263157894734, "grad_norm": 1.2851496934890747, "learning_rate": 0.0001, "loss": 0.0162, "step": 57730 }, { "epoch": 379.86842105263156, "grad_norm": 1.7152314186096191, "learning_rate": 0.0001, "loss": 0.0178, "step": 57740 }, { "epoch": 379.9342105263158, "grad_norm": 1.1442804336547852, "learning_rate": 0.0001, "loss": 0.0137, "step": 57750 }, { "epoch": 380.0, "grad_norm": 1.9151694774627686, "learning_rate": 0.0001, "loss": 0.0166, "step": 57760 }, { "epoch": 380.0657894736842, "grad_norm": 1.5069445371627808, "learning_rate": 0.0001, "loss": 0.0146, "step": 57770 }, { "epoch": 380.13157894736844, "grad_norm": 1.7224416732788086, "learning_rate": 0.0001, "loss": 0.0165, "step": 57780 }, { "epoch": 380.19736842105266, "grad_norm": 1.059928059577942, "learning_rate": 0.0001, "loss": 0.0165, "step": 57790 }, { "epoch": 380.2631578947368, "grad_norm": 1.500221848487854, "learning_rate": 0.0001, "loss": 0.0183, "step": 57800 }, { "epoch": 380.32894736842104, "grad_norm": 1.8348262310028076, "learning_rate": 0.0001, "loss": 0.0152, "step": 57810 }, { "epoch": 380.39473684210526, "grad_norm": 1.0506701469421387, "learning_rate": 0.0001, "loss": 0.0152, "step": 57820 }, { "epoch": 380.4605263157895, "grad_norm": 1.6688697338104248, "learning_rate": 0.0001, "loss": 0.0161, "step": 57830 }, { "epoch": 380.5263157894737, "grad_norm": 1.3527562618255615, "learning_rate": 0.0001, "loss": 0.0175, "step": 57840 }, { "epoch": 380.5921052631579, "grad_norm": 2.266925096511841, "learning_rate": 0.0001, "loss": 0.0191, "step": 57850 }, { "epoch": 380.6578947368421, "grad_norm": 1.7431800365447998, "learning_rate": 0.0001, "loss": 0.0181, "step": 57860 }, { "epoch": 380.7236842105263, "grad_norm": 1.2353934049606323, "learning_rate": 0.0001, "loss": 0.0135, "step": 57870 }, { "epoch": 380.7894736842105, "grad_norm": 1.5652155876159668, "learning_rate": 0.0001, "loss": 0.0169, "step": 57880 }, { "epoch": 380.85526315789474, "grad_norm": 1.7672584056854248, "learning_rate": 0.0001, "loss": 0.0166, "step": 57890 }, { "epoch": 380.92105263157896, "grad_norm": 1.827952265739441, "learning_rate": 0.0001, "loss": 0.0158, "step": 57900 }, { "epoch": 380.9868421052632, "grad_norm": 1.7426458597183228, "learning_rate": 0.0001, "loss": 0.0213, "step": 57910 }, { "epoch": 381.05263157894734, "grad_norm": 1.7591052055358887, "learning_rate": 0.0001, "loss": 0.0165, "step": 57920 }, { "epoch": 381.11842105263156, "grad_norm": 2.0776402950286865, "learning_rate": 0.0001, "loss": 0.0153, "step": 57930 }, { "epoch": 381.1842105263158, "grad_norm": 1.9292232990264893, "learning_rate": 0.0001, "loss": 0.0153, "step": 57940 }, { "epoch": 381.25, "grad_norm": 2.040409803390503, "learning_rate": 0.0001, "loss": 0.0171, "step": 57950 }, { "epoch": 381.3157894736842, "grad_norm": 1.7776579856872559, "learning_rate": 0.0001, "loss": 0.0157, "step": 57960 }, { "epoch": 381.38157894736844, "grad_norm": 1.782340407371521, "learning_rate": 0.0001, "loss": 0.0155, "step": 57970 }, { "epoch": 381.44736842105266, "grad_norm": 1.8805038928985596, "learning_rate": 0.0001, "loss": 0.0158, "step": 57980 }, { "epoch": 381.5131578947368, "grad_norm": 2.32952618598938, "learning_rate": 0.0001, "loss": 0.0177, "step": 57990 }, { "epoch": 381.57894736842104, "grad_norm": 1.947108268737793, "learning_rate": 0.0001, "loss": 0.0178, "step": 58000 }, { "epoch": 381.64473684210526, "grad_norm": 1.7467193603515625, "learning_rate": 0.0001, "loss": 0.0172, "step": 58010 }, { "epoch": 381.7105263157895, "grad_norm": 1.8632075786590576, "learning_rate": 0.0001, "loss": 0.0146, "step": 58020 }, { "epoch": 381.7763157894737, "grad_norm": 1.4259037971496582, "learning_rate": 0.0001, "loss": 0.0168, "step": 58030 }, { "epoch": 381.8421052631579, "grad_norm": 1.769444465637207, "learning_rate": 0.0001, "loss": 0.0196, "step": 58040 }, { "epoch": 381.9078947368421, "grad_norm": 1.8818155527114868, "learning_rate": 0.0001, "loss": 0.0166, "step": 58050 }, { "epoch": 381.9736842105263, "grad_norm": 1.9061728715896606, "learning_rate": 0.0001, "loss": 0.0134, "step": 58060 }, { "epoch": 382.0394736842105, "grad_norm": 2.009661912918091, "learning_rate": 0.0001, "loss": 0.0156, "step": 58070 }, { "epoch": 382.10526315789474, "grad_norm": 1.6985048055648804, "learning_rate": 0.0001, "loss": 0.02, "step": 58080 }, { "epoch": 382.17105263157896, "grad_norm": 2.1017725467681885, "learning_rate": 0.0001, "loss": 0.0324, "step": 58090 }, { "epoch": 382.2368421052632, "grad_norm": 2.079895496368408, "learning_rate": 0.0001, "loss": 0.0183, "step": 58100 }, { "epoch": 382.30263157894734, "grad_norm": 1.8875467777252197, "learning_rate": 0.0001, "loss": 0.0152, "step": 58110 }, { "epoch": 382.36842105263156, "grad_norm": 2.0238850116729736, "learning_rate": 0.0001, "loss": 0.015, "step": 58120 }, { "epoch": 382.4342105263158, "grad_norm": 2.09415602684021, "learning_rate": 0.0001, "loss": 0.0142, "step": 58130 }, { "epoch": 382.5, "grad_norm": 2.215057373046875, "learning_rate": 0.0001, "loss": 0.0168, "step": 58140 }, { "epoch": 382.5657894736842, "grad_norm": 1.9320073127746582, "learning_rate": 0.0001, "loss": 0.0147, "step": 58150 }, { "epoch": 382.63157894736844, "grad_norm": 1.497176170349121, "learning_rate": 0.0001, "loss": 0.0156, "step": 58160 }, { "epoch": 382.69736842105266, "grad_norm": 1.5587283372879028, "learning_rate": 0.0001, "loss": 0.0158, "step": 58170 }, { "epoch": 382.7631578947368, "grad_norm": 1.5144479274749756, "learning_rate": 0.0001, "loss": 0.0143, "step": 58180 }, { "epoch": 382.82894736842104, "grad_norm": 1.3788102865219116, "learning_rate": 0.0001, "loss": 0.0199, "step": 58190 }, { "epoch": 382.89473684210526, "grad_norm": 1.3870658874511719, "learning_rate": 0.0001, "loss": 0.0159, "step": 58200 }, { "epoch": 382.9605263157895, "grad_norm": 1.5789995193481445, "learning_rate": 0.0001, "loss": 0.0144, "step": 58210 }, { "epoch": 383.0263157894737, "grad_norm": 1.2747658491134644, "learning_rate": 0.0001, "loss": 0.0166, "step": 58220 }, { "epoch": 383.0921052631579, "grad_norm": 1.8504140377044678, "learning_rate": 0.0001, "loss": 0.0182, "step": 58230 }, { "epoch": 383.1578947368421, "grad_norm": 1.2231191396713257, "learning_rate": 0.0001, "loss": 0.0141, "step": 58240 }, { "epoch": 383.2236842105263, "grad_norm": 1.8377608060836792, "learning_rate": 0.0001, "loss": 0.0158, "step": 58250 }, { "epoch": 383.2894736842105, "grad_norm": 1.4137144088745117, "learning_rate": 0.0001, "loss": 0.018, "step": 58260 }, { "epoch": 383.35526315789474, "grad_norm": 1.4083014726638794, "learning_rate": 0.0001, "loss": 0.015, "step": 58270 }, { "epoch": 383.42105263157896, "grad_norm": 1.683040976524353, "learning_rate": 0.0001, "loss": 0.0167, "step": 58280 }, { "epoch": 383.4868421052632, "grad_norm": 1.581738829612732, "learning_rate": 0.0001, "loss": 0.0194, "step": 58290 }, { "epoch": 383.55263157894734, "grad_norm": 1.343382477760315, "learning_rate": 0.0001, "loss": 0.0166, "step": 58300 }, { "epoch": 383.61842105263156, "grad_norm": 1.9534549713134766, "learning_rate": 0.0001, "loss": 0.0151, "step": 58310 }, { "epoch": 383.6842105263158, "grad_norm": 1.6073013544082642, "learning_rate": 0.0001, "loss": 0.0128, "step": 58320 }, { "epoch": 383.75, "grad_norm": 1.9737210273742676, "learning_rate": 0.0001, "loss": 0.0221, "step": 58330 }, { "epoch": 383.8157894736842, "grad_norm": 1.8089182376861572, "learning_rate": 0.0001, "loss": 0.0158, "step": 58340 }, { "epoch": 383.88157894736844, "grad_norm": 1.3024438619613647, "learning_rate": 0.0001, "loss": 0.0191, "step": 58350 }, { "epoch": 383.94736842105266, "grad_norm": 1.6362946033477783, "learning_rate": 0.0001, "loss": 0.0171, "step": 58360 }, { "epoch": 384.0131578947368, "grad_norm": 1.5838582515716553, "learning_rate": 0.0001, "loss": 0.0186, "step": 58370 }, { "epoch": 384.07894736842104, "grad_norm": 1.3764060735702515, "learning_rate": 0.0001, "loss": 0.0168, "step": 58380 }, { "epoch": 384.14473684210526, "grad_norm": 2.1885087490081787, "learning_rate": 0.0001, "loss": 0.018, "step": 58390 }, { "epoch": 384.2105263157895, "grad_norm": 1.7094752788543701, "learning_rate": 0.0001, "loss": 0.0197, "step": 58400 }, { "epoch": 384.2763157894737, "grad_norm": 1.771224856376648, "learning_rate": 0.0001, "loss": 0.0145, "step": 58410 }, { "epoch": 384.3421052631579, "grad_norm": 1.4747278690338135, "learning_rate": 0.0001, "loss": 0.018, "step": 58420 }, { "epoch": 384.4078947368421, "grad_norm": 1.2324275970458984, "learning_rate": 0.0001, "loss": 0.0128, "step": 58430 }, { "epoch": 384.4736842105263, "grad_norm": 1.443075180053711, "learning_rate": 0.0001, "loss": 0.015, "step": 58440 }, { "epoch": 384.5394736842105, "grad_norm": 1.4868911504745483, "learning_rate": 0.0001, "loss": 0.014, "step": 58450 }, { "epoch": 384.60526315789474, "grad_norm": 1.4817228317260742, "learning_rate": 0.0001, "loss": 0.0191, "step": 58460 }, { "epoch": 384.67105263157896, "grad_norm": 1.6893751621246338, "learning_rate": 0.0001, "loss": 0.0145, "step": 58470 }, { "epoch": 384.7368421052632, "grad_norm": 1.4369590282440186, "learning_rate": 0.0001, "loss": 0.0172, "step": 58480 }, { "epoch": 384.80263157894734, "grad_norm": 1.9162358045578003, "learning_rate": 0.0001, "loss": 0.0187, "step": 58490 }, { "epoch": 384.86842105263156, "grad_norm": 1.6799229383468628, "learning_rate": 0.0001, "loss": 0.0174, "step": 58500 }, { "epoch": 384.9342105263158, "grad_norm": 1.2860440015792847, "learning_rate": 0.0001, "loss": 0.0179, "step": 58510 }, { "epoch": 385.0, "grad_norm": 1.801576018333435, "learning_rate": 0.0001, "loss": 0.016, "step": 58520 }, { "epoch": 385.0657894736842, "grad_norm": 1.618019938468933, "learning_rate": 0.0001, "loss": 0.0176, "step": 58530 }, { "epoch": 385.13157894736844, "grad_norm": 1.9756108522415161, "learning_rate": 0.0001, "loss": 0.0139, "step": 58540 }, { "epoch": 385.19736842105266, "grad_norm": 1.7363574504852295, "learning_rate": 0.0001, "loss": 0.0158, "step": 58550 }, { "epoch": 385.2631578947368, "grad_norm": 1.5208163261413574, "learning_rate": 0.0001, "loss": 0.0164, "step": 58560 }, { "epoch": 385.32894736842104, "grad_norm": 1.5577750205993652, "learning_rate": 0.0001, "loss": 0.0178, "step": 58570 }, { "epoch": 385.39473684210526, "grad_norm": 1.4558173418045044, "learning_rate": 0.0001, "loss": 0.0167, "step": 58580 }, { "epoch": 385.4605263157895, "grad_norm": 1.2576158046722412, "learning_rate": 0.0001, "loss": 0.0138, "step": 58590 }, { "epoch": 385.5263157894737, "grad_norm": 1.2318916320800781, "learning_rate": 0.0001, "loss": 0.0167, "step": 58600 }, { "epoch": 385.5921052631579, "grad_norm": 1.3317286968231201, "learning_rate": 0.0001, "loss": 0.0146, "step": 58610 }, { "epoch": 385.6578947368421, "grad_norm": 1.077431559562683, "learning_rate": 0.0001, "loss": 0.0173, "step": 58620 }, { "epoch": 385.7236842105263, "grad_norm": 1.0527769327163696, "learning_rate": 0.0001, "loss": 0.0184, "step": 58630 }, { "epoch": 385.7894736842105, "grad_norm": 1.1147502660751343, "learning_rate": 0.0001, "loss": 0.0174, "step": 58640 }, { "epoch": 385.85526315789474, "grad_norm": 1.4258216619491577, "learning_rate": 0.0001, "loss": 0.0232, "step": 58650 }, { "epoch": 385.92105263157896, "grad_norm": 1.2621009349822998, "learning_rate": 0.0001, "loss": 0.0173, "step": 58660 }, { "epoch": 385.9868421052632, "grad_norm": 1.601749062538147, "learning_rate": 0.0001, "loss": 0.0168, "step": 58670 }, { "epoch": 386.05263157894734, "grad_norm": 1.5798461437225342, "learning_rate": 0.0001, "loss": 0.0191, "step": 58680 }, { "epoch": 386.11842105263156, "grad_norm": 1.8758537769317627, "learning_rate": 0.0001, "loss": 0.0193, "step": 58690 }, { "epoch": 386.1842105263158, "grad_norm": 1.5866549015045166, "learning_rate": 0.0001, "loss": 0.0199, "step": 58700 }, { "epoch": 386.25, "grad_norm": 1.3787760734558105, "learning_rate": 0.0001, "loss": 0.0142, "step": 58710 }, { "epoch": 386.3157894736842, "grad_norm": 1.3960944414138794, "learning_rate": 0.0001, "loss": 0.0145, "step": 58720 }, { "epoch": 386.38157894736844, "grad_norm": 1.3719767332077026, "learning_rate": 0.0001, "loss": 0.0162, "step": 58730 }, { "epoch": 386.44736842105266, "grad_norm": 1.1479511260986328, "learning_rate": 0.0001, "loss": 0.0199, "step": 58740 }, { "epoch": 386.5131578947368, "grad_norm": 1.3123550415039062, "learning_rate": 0.0001, "loss": 0.0168, "step": 58750 }, { "epoch": 386.57894736842104, "grad_norm": 1.8702305555343628, "learning_rate": 0.0001, "loss": 0.0146, "step": 58760 }, { "epoch": 386.64473684210526, "grad_norm": 1.3136160373687744, "learning_rate": 0.0001, "loss": 0.0139, "step": 58770 }, { "epoch": 386.7105263157895, "grad_norm": 1.598852276802063, "learning_rate": 0.0001, "loss": 0.0164, "step": 58780 }, { "epoch": 386.7763157894737, "grad_norm": 1.8933569192886353, "learning_rate": 0.0001, "loss": 0.0194, "step": 58790 }, { "epoch": 386.8421052631579, "grad_norm": 1.7293550968170166, "learning_rate": 0.0001, "loss": 0.0145, "step": 58800 }, { "epoch": 386.9078947368421, "grad_norm": 1.6560256481170654, "learning_rate": 0.0001, "loss": 0.0176, "step": 58810 }, { "epoch": 386.9736842105263, "grad_norm": 1.7142913341522217, "learning_rate": 0.0001, "loss": 0.0218, "step": 58820 }, { "epoch": 387.0394736842105, "grad_norm": 1.5965625047683716, "learning_rate": 0.0001, "loss": 0.0135, "step": 58830 }, { "epoch": 387.10526315789474, "grad_norm": 1.4859086275100708, "learning_rate": 0.0001, "loss": 0.0157, "step": 58840 }, { "epoch": 387.17105263157896, "grad_norm": 1.2255446910858154, "learning_rate": 0.0001, "loss": 0.0175, "step": 58850 }, { "epoch": 387.2368421052632, "grad_norm": 1.3645042181015015, "learning_rate": 0.0001, "loss": 0.0158, "step": 58860 }, { "epoch": 387.30263157894734, "grad_norm": 1.2049075365066528, "learning_rate": 0.0001, "loss": 0.0138, "step": 58870 }, { "epoch": 387.36842105263156, "grad_norm": 1.429610252380371, "learning_rate": 0.0001, "loss": 0.0186, "step": 58880 }, { "epoch": 387.4342105263158, "grad_norm": 1.5063523054122925, "learning_rate": 0.0001, "loss": 0.0161, "step": 58890 }, { "epoch": 387.5, "grad_norm": 1.2452653646469116, "learning_rate": 0.0001, "loss": 0.0197, "step": 58900 }, { "epoch": 387.5657894736842, "grad_norm": 1.2786093950271606, "learning_rate": 0.0001, "loss": 0.0177, "step": 58910 }, { "epoch": 387.63157894736844, "grad_norm": 1.6849726438522339, "learning_rate": 0.0001, "loss": 0.0179, "step": 58920 }, { "epoch": 387.69736842105266, "grad_norm": 1.762026309967041, "learning_rate": 0.0001, "loss": 0.0172, "step": 58930 }, { "epoch": 387.7631578947368, "grad_norm": 1.8201987743377686, "learning_rate": 0.0001, "loss": 0.0185, "step": 58940 }, { "epoch": 387.82894736842104, "grad_norm": 1.292797327041626, "learning_rate": 0.0001, "loss": 0.0147, "step": 58950 }, { "epoch": 387.89473684210526, "grad_norm": 1.5284539461135864, "learning_rate": 0.0001, "loss": 0.0148, "step": 58960 }, { "epoch": 387.9605263157895, "grad_norm": 1.1682329177856445, "learning_rate": 0.0001, "loss": 0.0245, "step": 58970 }, { "epoch": 388.0263157894737, "grad_norm": 1.7847888469696045, "learning_rate": 0.0001, "loss": 0.0147, "step": 58980 }, { "epoch": 388.0921052631579, "grad_norm": 1.1924080848693848, "learning_rate": 0.0001, "loss": 0.0155, "step": 58990 }, { "epoch": 388.1578947368421, "grad_norm": 1.5760828256607056, "learning_rate": 0.0001, "loss": 0.0152, "step": 59000 }, { "epoch": 388.2236842105263, "grad_norm": 1.2211408615112305, "learning_rate": 0.0001, "loss": 0.0178, "step": 59010 }, { "epoch": 388.2894736842105, "grad_norm": 1.615585446357727, "learning_rate": 0.0001, "loss": 0.0157, "step": 59020 }, { "epoch": 388.35526315789474, "grad_norm": 1.6204023361206055, "learning_rate": 0.0001, "loss": 0.0194, "step": 59030 }, { "epoch": 388.42105263157896, "grad_norm": 1.8089137077331543, "learning_rate": 0.0001, "loss": 0.023, "step": 59040 }, { "epoch": 388.4868421052632, "grad_norm": 1.2941768169403076, "learning_rate": 0.0001, "loss": 0.0203, "step": 59050 }, { "epoch": 388.55263157894734, "grad_norm": 1.6487293243408203, "learning_rate": 0.0001, "loss": 0.0155, "step": 59060 }, { "epoch": 388.61842105263156, "grad_norm": 2.188629627227783, "learning_rate": 0.0001, "loss": 0.0162, "step": 59070 }, { "epoch": 388.6842105263158, "grad_norm": 1.2836459875106812, "learning_rate": 0.0001, "loss": 0.0142, "step": 59080 }, { "epoch": 388.75, "grad_norm": 1.559304118156433, "learning_rate": 0.0001, "loss": 0.0165, "step": 59090 }, { "epoch": 388.8157894736842, "grad_norm": 2.0348615646362305, "learning_rate": 0.0001, "loss": 0.0171, "step": 59100 }, { "epoch": 388.88157894736844, "grad_norm": 2.109220027923584, "learning_rate": 0.0001, "loss": 0.0189, "step": 59110 }, { "epoch": 388.94736842105266, "grad_norm": 1.9955267906188965, "learning_rate": 0.0001, "loss": 0.0182, "step": 59120 }, { "epoch": 389.0131578947368, "grad_norm": 2.1808249950408936, "learning_rate": 0.0001, "loss": 0.0187, "step": 59130 }, { "epoch": 389.07894736842104, "grad_norm": 2.1614835262298584, "learning_rate": 0.0001, "loss": 0.0167, "step": 59140 }, { "epoch": 389.14473684210526, "grad_norm": 1.761969804763794, "learning_rate": 0.0001, "loss": 0.0168, "step": 59150 }, { "epoch": 389.2105263157895, "grad_norm": 1.7717534303665161, "learning_rate": 0.0001, "loss": 0.0166, "step": 59160 }, { "epoch": 389.2763157894737, "grad_norm": 1.5961408615112305, "learning_rate": 0.0001, "loss": 0.0187, "step": 59170 }, { "epoch": 389.3421052631579, "grad_norm": 1.6860178709030151, "learning_rate": 0.0001, "loss": 0.0162, "step": 59180 }, { "epoch": 389.4078947368421, "grad_norm": 1.3039644956588745, "learning_rate": 0.0001, "loss": 0.0142, "step": 59190 }, { "epoch": 389.4736842105263, "grad_norm": 1.637966275215149, "learning_rate": 0.0001, "loss": 0.0131, "step": 59200 }, { "epoch": 389.5394736842105, "grad_norm": 1.3296597003936768, "learning_rate": 0.0001, "loss": 0.0202, "step": 59210 }, { "epoch": 389.60526315789474, "grad_norm": 1.5591238737106323, "learning_rate": 0.0001, "loss": 0.0155, "step": 59220 }, { "epoch": 389.67105263157896, "grad_norm": 1.3358923196792603, "learning_rate": 0.0001, "loss": 0.0177, "step": 59230 }, { "epoch": 389.7368421052632, "grad_norm": 1.151726245880127, "learning_rate": 0.0001, "loss": 0.0205, "step": 59240 }, { "epoch": 389.80263157894734, "grad_norm": 1.5059947967529297, "learning_rate": 0.0001, "loss": 0.0199, "step": 59250 }, { "epoch": 389.86842105263156, "grad_norm": 1.8767880201339722, "learning_rate": 0.0001, "loss": 0.0174, "step": 59260 }, { "epoch": 389.9342105263158, "grad_norm": 1.4376119375228882, "learning_rate": 0.0001, "loss": 0.0148, "step": 59270 }, { "epoch": 390.0, "grad_norm": 1.3685617446899414, "learning_rate": 0.0001, "loss": 0.0145, "step": 59280 }, { "epoch": 390.0657894736842, "grad_norm": 1.5241363048553467, "learning_rate": 0.0001, "loss": 0.0146, "step": 59290 }, { "epoch": 390.13157894736844, "grad_norm": 1.58831787109375, "learning_rate": 0.0001, "loss": 0.0172, "step": 59300 }, { "epoch": 390.19736842105266, "grad_norm": 1.7011619806289673, "learning_rate": 0.0001, "loss": 0.0188, "step": 59310 }, { "epoch": 390.2631578947368, "grad_norm": 1.8328841924667358, "learning_rate": 0.0001, "loss": 0.0184, "step": 59320 }, { "epoch": 390.32894736842104, "grad_norm": 1.7263926267623901, "learning_rate": 0.0001, "loss": 0.0194, "step": 59330 }, { "epoch": 390.39473684210526, "grad_norm": 1.3946986198425293, "learning_rate": 0.0001, "loss": 0.019, "step": 59340 }, { "epoch": 390.4605263157895, "grad_norm": 1.3497649431228638, "learning_rate": 0.0001, "loss": 0.0176, "step": 59350 }, { "epoch": 390.5263157894737, "grad_norm": 1.7646758556365967, "learning_rate": 0.0001, "loss": 0.0179, "step": 59360 }, { "epoch": 390.5921052631579, "grad_norm": 1.830429196357727, "learning_rate": 0.0001, "loss": 0.0162, "step": 59370 }, { "epoch": 390.6578947368421, "grad_norm": 1.964322805404663, "learning_rate": 0.0001, "loss": 0.0166, "step": 59380 }, { "epoch": 390.7236842105263, "grad_norm": 1.653138518333435, "learning_rate": 0.0001, "loss": 0.0136, "step": 59390 }, { "epoch": 390.7894736842105, "grad_norm": 1.35392427444458, "learning_rate": 0.0001, "loss": 0.015, "step": 59400 }, { "epoch": 390.85526315789474, "grad_norm": 1.493067979812622, "learning_rate": 0.0001, "loss": 0.0159, "step": 59410 }, { "epoch": 390.92105263157896, "grad_norm": 1.5267997980117798, "learning_rate": 0.0001, "loss": 0.0134, "step": 59420 }, { "epoch": 390.9868421052632, "grad_norm": 1.7080415487289429, "learning_rate": 0.0001, "loss": 0.0202, "step": 59430 }, { "epoch": 391.05263157894734, "grad_norm": 1.8717631101608276, "learning_rate": 0.0001, "loss": 0.0175, "step": 59440 }, { "epoch": 391.11842105263156, "grad_norm": 1.6515084505081177, "learning_rate": 0.0001, "loss": 0.0176, "step": 59450 }, { "epoch": 391.1842105263158, "grad_norm": 1.7376562356948853, "learning_rate": 0.0001, "loss": 0.0174, "step": 59460 }, { "epoch": 391.25, "grad_norm": 1.957140564918518, "learning_rate": 0.0001, "loss": 0.0174, "step": 59470 }, { "epoch": 391.3157894736842, "grad_norm": 1.658299446105957, "learning_rate": 0.0001, "loss": 0.0214, "step": 59480 }, { "epoch": 391.38157894736844, "grad_norm": 1.937890648841858, "learning_rate": 0.0001, "loss": 0.0155, "step": 59490 }, { "epoch": 391.44736842105266, "grad_norm": 2.2066712379455566, "learning_rate": 0.0001, "loss": 0.0141, "step": 59500 }, { "epoch": 391.5131578947368, "grad_norm": 1.6082115173339844, "learning_rate": 0.0001, "loss": 0.0152, "step": 59510 }, { "epoch": 391.57894736842104, "grad_norm": 1.3813443183898926, "learning_rate": 0.0001, "loss": 0.0145, "step": 59520 }, { "epoch": 391.64473684210526, "grad_norm": 1.709933876991272, "learning_rate": 0.0001, "loss": 0.0192, "step": 59530 }, { "epoch": 391.7105263157895, "grad_norm": 1.5498384237289429, "learning_rate": 0.0001, "loss": 0.0138, "step": 59540 }, { "epoch": 391.7763157894737, "grad_norm": 1.5508391857147217, "learning_rate": 0.0001, "loss": 0.0149, "step": 59550 }, { "epoch": 391.8421052631579, "grad_norm": 1.4611496925354004, "learning_rate": 0.0001, "loss": 0.018, "step": 59560 }, { "epoch": 391.9078947368421, "grad_norm": 1.470881700515747, "learning_rate": 0.0001, "loss": 0.0165, "step": 59570 }, { "epoch": 391.9736842105263, "grad_norm": 1.5723544359207153, "learning_rate": 0.0001, "loss": 0.0178, "step": 59580 }, { "epoch": 392.0394736842105, "grad_norm": 1.1668025255203247, "learning_rate": 0.0001, "loss": 0.0139, "step": 59590 }, { "epoch": 392.10526315789474, "grad_norm": 1.74624764919281, "learning_rate": 0.0001, "loss": 0.0139, "step": 59600 }, { "epoch": 392.17105263157896, "grad_norm": 1.4784506559371948, "learning_rate": 0.0001, "loss": 0.0154, "step": 59610 }, { "epoch": 392.2368421052632, "grad_norm": 1.463438630104065, "learning_rate": 0.0001, "loss": 0.0143, "step": 59620 }, { "epoch": 392.30263157894734, "grad_norm": 1.506483554840088, "learning_rate": 0.0001, "loss": 0.0213, "step": 59630 }, { "epoch": 392.36842105263156, "grad_norm": 1.6697335243225098, "learning_rate": 0.0001, "loss": 0.017, "step": 59640 }, { "epoch": 392.4342105263158, "grad_norm": 1.2310736179351807, "learning_rate": 0.0001, "loss": 0.0139, "step": 59650 }, { "epoch": 392.5, "grad_norm": 1.5641720294952393, "learning_rate": 0.0001, "loss": 0.0168, "step": 59660 }, { "epoch": 392.5657894736842, "grad_norm": 1.141155481338501, "learning_rate": 0.0001, "loss": 0.0196, "step": 59670 }, { "epoch": 392.63157894736844, "grad_norm": 1.3405312299728394, "learning_rate": 0.0001, "loss": 0.0154, "step": 59680 }, { "epoch": 392.69736842105266, "grad_norm": 1.6082600355148315, "learning_rate": 0.0001, "loss": 0.0222, "step": 59690 }, { "epoch": 392.7631578947368, "grad_norm": 1.618727207183838, "learning_rate": 0.0001, "loss": 0.0185, "step": 59700 }, { "epoch": 392.82894736842104, "grad_norm": 1.7246311902999878, "learning_rate": 0.0001, "loss": 0.0128, "step": 59710 }, { "epoch": 392.89473684210526, "grad_norm": 1.877476692199707, "learning_rate": 0.0001, "loss": 0.0195, "step": 59720 }, { "epoch": 392.9605263157895, "grad_norm": 1.5914465188980103, "learning_rate": 0.0001, "loss": 0.0138, "step": 59730 }, { "epoch": 393.0263157894737, "grad_norm": 1.3455697298049927, "learning_rate": 0.0001, "loss": 0.0168, "step": 59740 }, { "epoch": 393.0921052631579, "grad_norm": 1.3786636590957642, "learning_rate": 0.0001, "loss": 0.0145, "step": 59750 }, { "epoch": 393.1578947368421, "grad_norm": 1.4100984334945679, "learning_rate": 0.0001, "loss": 0.019, "step": 59760 }, { "epoch": 393.2236842105263, "grad_norm": 1.719107747077942, "learning_rate": 0.0001, "loss": 0.014, "step": 59770 }, { "epoch": 393.2894736842105, "grad_norm": 1.8582677841186523, "learning_rate": 0.0001, "loss": 0.0179, "step": 59780 }, { "epoch": 393.35526315789474, "grad_norm": 2.109654664993286, "learning_rate": 0.0001, "loss": 0.0151, "step": 59790 }, { "epoch": 393.42105263157896, "grad_norm": 1.5900132656097412, "learning_rate": 0.0001, "loss": 0.017, "step": 59800 }, { "epoch": 393.4868421052632, "grad_norm": 1.7334014177322388, "learning_rate": 0.0001, "loss": 0.0138, "step": 59810 }, { "epoch": 393.55263157894734, "grad_norm": 1.2726595401763916, "learning_rate": 0.0001, "loss": 0.0189, "step": 59820 }, { "epoch": 393.61842105263156, "grad_norm": 1.0277211666107178, "learning_rate": 0.0001, "loss": 0.0148, "step": 59830 }, { "epoch": 393.6842105263158, "grad_norm": 1.2605518102645874, "learning_rate": 0.0001, "loss": 0.0189, "step": 59840 }, { "epoch": 393.75, "grad_norm": 1.4972224235534668, "learning_rate": 0.0001, "loss": 0.0168, "step": 59850 }, { "epoch": 393.8157894736842, "grad_norm": 1.714581847190857, "learning_rate": 0.0001, "loss": 0.0166, "step": 59860 }, { "epoch": 393.88157894736844, "grad_norm": 1.6199212074279785, "learning_rate": 0.0001, "loss": 0.0167, "step": 59870 }, { "epoch": 393.94736842105266, "grad_norm": 1.388965368270874, "learning_rate": 0.0001, "loss": 0.0162, "step": 59880 }, { "epoch": 394.0131578947368, "grad_norm": 1.9019564390182495, "learning_rate": 0.0001, "loss": 0.0196, "step": 59890 }, { "epoch": 394.07894736842104, "grad_norm": 1.431050181388855, "learning_rate": 0.0001, "loss": 0.0218, "step": 59900 }, { "epoch": 394.14473684210526, "grad_norm": 1.661454677581787, "learning_rate": 0.0001, "loss": 0.0142, "step": 59910 }, { "epoch": 394.2105263157895, "grad_norm": 1.7477384805679321, "learning_rate": 0.0001, "loss": 0.0187, "step": 59920 }, { "epoch": 394.2763157894737, "grad_norm": 1.9761806726455688, "learning_rate": 0.0001, "loss": 0.0145, "step": 59930 }, { "epoch": 394.3421052631579, "grad_norm": 1.7675068378448486, "learning_rate": 0.0001, "loss": 0.0183, "step": 59940 }, { "epoch": 394.4078947368421, "grad_norm": 1.867546558380127, "learning_rate": 0.0001, "loss": 0.0171, "step": 59950 }, { "epoch": 394.4736842105263, "grad_norm": 1.616050124168396, "learning_rate": 0.0001, "loss": 0.0145, "step": 59960 }, { "epoch": 394.5394736842105, "grad_norm": 1.1974283456802368, "learning_rate": 0.0001, "loss": 0.0192, "step": 59970 }, { "epoch": 394.60526315789474, "grad_norm": 1.2197151184082031, "learning_rate": 0.0001, "loss": 0.0132, "step": 59980 }, { "epoch": 394.67105263157896, "grad_norm": 1.6291935443878174, "learning_rate": 0.0001, "loss": 0.0183, "step": 59990 }, { "epoch": 394.7368421052632, "grad_norm": 1.3612477779388428, "learning_rate": 0.0001, "loss": 0.0161, "step": 60000 }, { "epoch": 394.80263157894734, "grad_norm": 2.1272788047790527, "learning_rate": 0.0001, "loss": 0.0128, "step": 60010 }, { "epoch": 394.86842105263156, "grad_norm": 1.7800475358963013, "learning_rate": 0.0001, "loss": 0.0144, "step": 60020 }, { "epoch": 394.9342105263158, "grad_norm": 1.6284003257751465, "learning_rate": 0.0001, "loss": 0.0153, "step": 60030 }, { "epoch": 395.0, "grad_norm": 1.663114070892334, "learning_rate": 0.0001, "loss": 0.0163, "step": 60040 }, { "epoch": 395.0657894736842, "grad_norm": 1.409057855606079, "learning_rate": 0.0001, "loss": 0.0158, "step": 60050 }, { "epoch": 395.13157894736844, "grad_norm": 1.5438445806503296, "learning_rate": 0.0001, "loss": 0.0138, "step": 60060 }, { "epoch": 395.19736842105266, "grad_norm": 1.68976891040802, "learning_rate": 0.0001, "loss": 0.0169, "step": 60070 }, { "epoch": 395.2631578947368, "grad_norm": 1.401330590248108, "learning_rate": 0.0001, "loss": 0.0142, "step": 60080 }, { "epoch": 395.32894736842104, "grad_norm": 1.766840934753418, "learning_rate": 0.0001, "loss": 0.0149, "step": 60090 }, { "epoch": 395.39473684210526, "grad_norm": 1.4699451923370361, "learning_rate": 0.0001, "loss": 0.0191, "step": 60100 }, { "epoch": 395.4605263157895, "grad_norm": 1.6313817501068115, "learning_rate": 0.0001, "loss": 0.0131, "step": 60110 }, { "epoch": 395.5263157894737, "grad_norm": 1.9954255819320679, "learning_rate": 0.0001, "loss": 0.0142, "step": 60120 }, { "epoch": 395.5921052631579, "grad_norm": 1.238875389099121, "learning_rate": 0.0001, "loss": 0.0149, "step": 60130 }, { "epoch": 395.6578947368421, "grad_norm": 1.6091055870056152, "learning_rate": 0.0001, "loss": 0.0245, "step": 60140 }, { "epoch": 395.7236842105263, "grad_norm": 1.2470272779464722, "learning_rate": 0.0001, "loss": 0.015, "step": 60150 }, { "epoch": 395.7894736842105, "grad_norm": 1.727116584777832, "learning_rate": 0.0001, "loss": 0.0173, "step": 60160 }, { "epoch": 395.85526315789474, "grad_norm": 1.9549281597137451, "learning_rate": 0.0001, "loss": 0.0182, "step": 60170 }, { "epoch": 395.92105263157896, "grad_norm": 1.7942619323730469, "learning_rate": 0.0001, "loss": 0.016, "step": 60180 }, { "epoch": 395.9868421052632, "grad_norm": 1.2370905876159668, "learning_rate": 0.0001, "loss": 0.0197, "step": 60190 }, { "epoch": 396.05263157894734, "grad_norm": 1.5245704650878906, "learning_rate": 0.0001, "loss": 0.0146, "step": 60200 }, { "epoch": 396.11842105263156, "grad_norm": 1.4753719568252563, "learning_rate": 0.0001, "loss": 0.0132, "step": 60210 }, { "epoch": 396.1842105263158, "grad_norm": 1.4890739917755127, "learning_rate": 0.0001, "loss": 0.0184, "step": 60220 }, { "epoch": 396.25, "grad_norm": 1.7678086757659912, "learning_rate": 0.0001, "loss": 0.0184, "step": 60230 }, { "epoch": 396.3157894736842, "grad_norm": 1.9464613199234009, "learning_rate": 0.0001, "loss": 0.0157, "step": 60240 }, { "epoch": 396.38157894736844, "grad_norm": 1.29495370388031, "learning_rate": 0.0001, "loss": 0.0167, "step": 60250 }, { "epoch": 396.44736842105266, "grad_norm": 1.2438706159591675, "learning_rate": 0.0001, "loss": 0.0139, "step": 60260 }, { "epoch": 396.5131578947368, "grad_norm": 1.3620085716247559, "learning_rate": 0.0001, "loss": 0.0177, "step": 60270 }, { "epoch": 396.57894736842104, "grad_norm": 1.5268805027008057, "learning_rate": 0.0001, "loss": 0.0139, "step": 60280 }, { "epoch": 396.64473684210526, "grad_norm": 1.542681336402893, "learning_rate": 0.0001, "loss": 0.0174, "step": 60290 }, { "epoch": 396.7105263157895, "grad_norm": 1.5515742301940918, "learning_rate": 0.0001, "loss": 0.0176, "step": 60300 }, { "epoch": 396.7763157894737, "grad_norm": 1.9082658290863037, "learning_rate": 0.0001, "loss": 0.0175, "step": 60310 }, { "epoch": 396.8421052631579, "grad_norm": 1.8479769229888916, "learning_rate": 0.0001, "loss": 0.0221, "step": 60320 }, { "epoch": 396.9078947368421, "grad_norm": 1.52317476272583, "learning_rate": 0.0001, "loss": 0.0169, "step": 60330 }, { "epoch": 396.9736842105263, "grad_norm": 1.4275017976760864, "learning_rate": 0.0001, "loss": 0.0159, "step": 60340 }, { "epoch": 397.0394736842105, "grad_norm": 1.649312973022461, "learning_rate": 0.0001, "loss": 0.0172, "step": 60350 }, { "epoch": 397.10526315789474, "grad_norm": 1.9322015047073364, "learning_rate": 0.0001, "loss": 0.0157, "step": 60360 }, { "epoch": 397.17105263157896, "grad_norm": 1.4919013977050781, "learning_rate": 0.0001, "loss": 0.016, "step": 60370 }, { "epoch": 397.2368421052632, "grad_norm": 1.4330700635910034, "learning_rate": 0.0001, "loss": 0.0164, "step": 60380 }, { "epoch": 397.30263157894734, "grad_norm": 1.5765502452850342, "learning_rate": 0.0001, "loss": 0.0161, "step": 60390 }, { "epoch": 397.36842105263156, "grad_norm": 1.579158902168274, "learning_rate": 0.0001, "loss": 0.0184, "step": 60400 }, { "epoch": 397.4342105263158, "grad_norm": 1.6341392993927002, "learning_rate": 0.0001, "loss": 0.018, "step": 60410 }, { "epoch": 397.5, "grad_norm": 1.4362459182739258, "learning_rate": 0.0001, "loss": 0.017, "step": 60420 }, { "epoch": 397.5657894736842, "grad_norm": 1.8199599981307983, "learning_rate": 0.0001, "loss": 0.0164, "step": 60430 }, { "epoch": 397.63157894736844, "grad_norm": 1.9932130575180054, "learning_rate": 0.0001, "loss": 0.014, "step": 60440 }, { "epoch": 397.69736842105266, "grad_norm": 1.7099984884262085, "learning_rate": 0.0001, "loss": 0.0185, "step": 60450 }, { "epoch": 397.7631578947368, "grad_norm": 1.7268365621566772, "learning_rate": 0.0001, "loss": 0.0148, "step": 60460 }, { "epoch": 397.82894736842104, "grad_norm": 1.69930899143219, "learning_rate": 0.0001, "loss": 0.016, "step": 60470 }, { "epoch": 397.89473684210526, "grad_norm": 1.8428137302398682, "learning_rate": 0.0001, "loss": 0.0141, "step": 60480 }, { "epoch": 397.9605263157895, "grad_norm": 1.4585182666778564, "learning_rate": 0.0001, "loss": 0.0179, "step": 60490 }, { "epoch": 398.0263157894737, "grad_norm": 1.375370740890503, "learning_rate": 0.0001, "loss": 0.0195, "step": 60500 }, { "epoch": 398.0921052631579, "grad_norm": 1.7586466073989868, "learning_rate": 0.0001, "loss": 0.0139, "step": 60510 }, { "epoch": 398.1578947368421, "grad_norm": 1.2273732423782349, "learning_rate": 0.0001, "loss": 0.0179, "step": 60520 }, { "epoch": 398.2236842105263, "grad_norm": 1.4478251934051514, "learning_rate": 0.0001, "loss": 0.0184, "step": 60530 }, { "epoch": 398.2894736842105, "grad_norm": 1.568872332572937, "learning_rate": 0.0001, "loss": 0.0171, "step": 60540 }, { "epoch": 398.35526315789474, "grad_norm": 1.6995569467544556, "learning_rate": 0.0001, "loss": 0.0141, "step": 60550 }, { "epoch": 398.42105263157896, "grad_norm": 1.346839189529419, "learning_rate": 0.0001, "loss": 0.0142, "step": 60560 }, { "epoch": 398.4868421052632, "grad_norm": 1.2711660861968994, "learning_rate": 0.0001, "loss": 0.0168, "step": 60570 }, { "epoch": 398.55263157894734, "grad_norm": 1.683348298072815, "learning_rate": 0.0001, "loss": 0.0157, "step": 60580 }, { "epoch": 398.61842105263156, "grad_norm": 1.1494979858398438, "learning_rate": 0.0001, "loss": 0.0171, "step": 60590 }, { "epoch": 398.6842105263158, "grad_norm": 1.071856141090393, "learning_rate": 0.0001, "loss": 0.0173, "step": 60600 }, { "epoch": 398.75, "grad_norm": 1.5299055576324463, "learning_rate": 0.0001, "loss": 0.0208, "step": 60610 }, { "epoch": 398.8157894736842, "grad_norm": 1.5153446197509766, "learning_rate": 0.0001, "loss": 0.0172, "step": 60620 }, { "epoch": 398.88157894736844, "grad_norm": 1.461734414100647, "learning_rate": 0.0001, "loss": 0.0165, "step": 60630 }, { "epoch": 398.94736842105266, "grad_norm": 1.4818834066390991, "learning_rate": 0.0001, "loss": 0.013, "step": 60640 }, { "epoch": 399.0131578947368, "grad_norm": 2.012721538543701, "learning_rate": 0.0001, "loss": 0.0179, "step": 60650 }, { "epoch": 399.07894736842104, "grad_norm": 1.4953534603118896, "learning_rate": 0.0001, "loss": 0.0145, "step": 60660 }, { "epoch": 399.14473684210526, "grad_norm": 2.196293830871582, "learning_rate": 0.0001, "loss": 0.017, "step": 60670 }, { "epoch": 399.2105263157895, "grad_norm": 2.102480888366699, "learning_rate": 0.0001, "loss": 0.0153, "step": 60680 }, { "epoch": 399.2763157894737, "grad_norm": 2.0571651458740234, "learning_rate": 0.0001, "loss": 0.0168, "step": 60690 }, { "epoch": 399.3421052631579, "grad_norm": 1.7616114616394043, "learning_rate": 0.0001, "loss": 0.0185, "step": 60700 }, { "epoch": 399.4078947368421, "grad_norm": 1.4777860641479492, "learning_rate": 0.0001, "loss": 0.0161, "step": 60710 }, { "epoch": 399.4736842105263, "grad_norm": 1.6037324666976929, "learning_rate": 0.0001, "loss": 0.0179, "step": 60720 }, { "epoch": 399.5394736842105, "grad_norm": 1.7269688844680786, "learning_rate": 0.0001, "loss": 0.0158, "step": 60730 }, { "epoch": 399.60526315789474, "grad_norm": 1.293197751045227, "learning_rate": 0.0001, "loss": 0.0127, "step": 60740 }, { "epoch": 399.67105263157896, "grad_norm": 1.1974250078201294, "learning_rate": 0.0001, "loss": 0.0171, "step": 60750 }, { "epoch": 399.7368421052632, "grad_norm": 1.589356541633606, "learning_rate": 0.0001, "loss": 0.0188, "step": 60760 }, { "epoch": 399.80263157894734, "grad_norm": 1.4017289876937866, "learning_rate": 0.0001, "loss": 0.0178, "step": 60770 }, { "epoch": 399.86842105263156, "grad_norm": 0.9516300559043884, "learning_rate": 0.0001, "loss": 0.0192, "step": 60780 }, { "epoch": 399.9342105263158, "grad_norm": 1.5163041353225708, "learning_rate": 0.0001, "loss": 0.0157, "step": 60790 }, { "epoch": 400.0, "grad_norm": 1.6798067092895508, "learning_rate": 0.0001, "loss": 0.015, "step": 60800 }, { "epoch": 400.0657894736842, "grad_norm": 1.6041947603225708, "learning_rate": 0.0001, "loss": 0.0172, "step": 60810 }, { "epoch": 400.13157894736844, "grad_norm": 1.721752405166626, "learning_rate": 0.0001, "loss": 0.0173, "step": 60820 }, { "epoch": 400.19736842105266, "grad_norm": 1.5954129695892334, "learning_rate": 0.0001, "loss": 0.0166, "step": 60830 }, { "epoch": 400.2631578947368, "grad_norm": 1.552899956703186, "learning_rate": 0.0001, "loss": 0.0172, "step": 60840 }, { "epoch": 400.32894736842104, "grad_norm": 1.9305822849273682, "learning_rate": 0.0001, "loss": 0.0135, "step": 60850 }, { "epoch": 400.39473684210526, "grad_norm": 1.472434163093567, "learning_rate": 0.0001, "loss": 0.016, "step": 60860 }, { "epoch": 400.4605263157895, "grad_norm": 1.8141403198242188, "learning_rate": 0.0001, "loss": 0.0183, "step": 60870 }, { "epoch": 400.5263157894737, "grad_norm": 1.4808552265167236, "learning_rate": 0.0001, "loss": 0.0162, "step": 60880 }, { "epoch": 400.5921052631579, "grad_norm": 1.6950610876083374, "learning_rate": 0.0001, "loss": 0.0167, "step": 60890 }, { "epoch": 400.6578947368421, "grad_norm": 1.7159079313278198, "learning_rate": 0.0001, "loss": 0.0133, "step": 60900 }, { "epoch": 400.7236842105263, "grad_norm": 1.8958297967910767, "learning_rate": 0.0001, "loss": 0.015, "step": 60910 }, { "epoch": 400.7894736842105, "grad_norm": 2.1310229301452637, "learning_rate": 0.0001, "loss": 0.0167, "step": 60920 }, { "epoch": 400.85526315789474, "grad_norm": 2.051835536956787, "learning_rate": 0.0001, "loss": 0.0145, "step": 60930 }, { "epoch": 400.92105263157896, "grad_norm": 1.639346718788147, "learning_rate": 0.0001, "loss": 0.0172, "step": 60940 }, { "epoch": 400.9868421052632, "grad_norm": 1.800190806388855, "learning_rate": 0.0001, "loss": 0.0189, "step": 60950 }, { "epoch": 401.05263157894734, "grad_norm": 2.0080623626708984, "learning_rate": 0.0001, "loss": 0.0187, "step": 60960 }, { "epoch": 401.11842105263156, "grad_norm": 1.4645057916641235, "learning_rate": 0.0001, "loss": 0.0147, "step": 60970 }, { "epoch": 401.1842105263158, "grad_norm": 1.4520797729492188, "learning_rate": 0.0001, "loss": 0.0149, "step": 60980 }, { "epoch": 401.25, "grad_norm": 1.4408619403839111, "learning_rate": 0.0001, "loss": 0.0165, "step": 60990 }, { "epoch": 401.3157894736842, "grad_norm": 1.6279431581497192, "learning_rate": 0.0001, "loss": 0.019, "step": 61000 }, { "epoch": 401.38157894736844, "grad_norm": 1.411065697669983, "learning_rate": 0.0001, "loss": 0.019, "step": 61010 }, { "epoch": 401.44736842105266, "grad_norm": 1.6873530149459839, "learning_rate": 0.0001, "loss": 0.0163, "step": 61020 }, { "epoch": 401.5131578947368, "grad_norm": 1.229059100151062, "learning_rate": 0.0001, "loss": 0.0189, "step": 61030 }, { "epoch": 401.57894736842104, "grad_norm": 1.4696850776672363, "learning_rate": 0.0001, "loss": 0.0153, "step": 61040 }, { "epoch": 401.64473684210526, "grad_norm": 1.2842934131622314, "learning_rate": 0.0001, "loss": 0.0153, "step": 61050 }, { "epoch": 401.7105263157895, "grad_norm": 1.2318668365478516, "learning_rate": 0.0001, "loss": 0.0132, "step": 61060 }, { "epoch": 401.7763157894737, "grad_norm": 1.5535794496536255, "learning_rate": 0.0001, "loss": 0.0131, "step": 61070 }, { "epoch": 401.8421052631579, "grad_norm": 1.5659184455871582, "learning_rate": 0.0001, "loss": 0.0135, "step": 61080 }, { "epoch": 401.9078947368421, "grad_norm": 1.5950560569763184, "learning_rate": 0.0001, "loss": 0.0169, "step": 61090 }, { "epoch": 401.9736842105263, "grad_norm": 1.655729055404663, "learning_rate": 0.0001, "loss": 0.0149, "step": 61100 }, { "epoch": 402.0394736842105, "grad_norm": 1.4422119855880737, "learning_rate": 0.0001, "loss": 0.0207, "step": 61110 }, { "epoch": 402.10526315789474, "grad_norm": 1.8937193155288696, "learning_rate": 0.0001, "loss": 0.0149, "step": 61120 }, { "epoch": 402.17105263157896, "grad_norm": 1.287742018699646, "learning_rate": 0.0001, "loss": 0.0152, "step": 61130 }, { "epoch": 402.2368421052632, "grad_norm": 1.6795521974563599, "learning_rate": 0.0001, "loss": 0.0155, "step": 61140 }, { "epoch": 402.30263157894734, "grad_norm": 1.9510471820831299, "learning_rate": 0.0001, "loss": 0.0147, "step": 61150 }, { "epoch": 402.36842105263156, "grad_norm": 2.061443567276001, "learning_rate": 0.0001, "loss": 0.013, "step": 61160 }, { "epoch": 402.4342105263158, "grad_norm": 1.5852148532867432, "learning_rate": 0.0001, "loss": 0.0159, "step": 61170 }, { "epoch": 402.5, "grad_norm": 1.3113123178482056, "learning_rate": 0.0001, "loss": 0.0203, "step": 61180 }, { "epoch": 402.5657894736842, "grad_norm": 1.545493245124817, "learning_rate": 0.0001, "loss": 0.0175, "step": 61190 }, { "epoch": 402.63157894736844, "grad_norm": 1.3503739833831787, "learning_rate": 0.0001, "loss": 0.0162, "step": 61200 }, { "epoch": 402.69736842105266, "grad_norm": 1.4477505683898926, "learning_rate": 0.0001, "loss": 0.0184, "step": 61210 }, { "epoch": 402.7631578947368, "grad_norm": 1.9566593170166016, "learning_rate": 0.0001, "loss": 0.0156, "step": 61220 }, { "epoch": 402.82894736842104, "grad_norm": 1.983458399772644, "learning_rate": 0.0001, "loss": 0.0152, "step": 61230 }, { "epoch": 402.89473684210526, "grad_norm": 1.3763656616210938, "learning_rate": 0.0001, "loss": 0.0171, "step": 61240 }, { "epoch": 402.9605263157895, "grad_norm": 1.6158992052078247, "learning_rate": 0.0001, "loss": 0.0153, "step": 61250 }, { "epoch": 403.0263157894737, "grad_norm": 1.2295292615890503, "learning_rate": 0.0001, "loss": 0.0187, "step": 61260 }, { "epoch": 403.0921052631579, "grad_norm": 1.3811053037643433, "learning_rate": 0.0001, "loss": 0.0191, "step": 61270 }, { "epoch": 403.1578947368421, "grad_norm": 1.2631210088729858, "learning_rate": 0.0001, "loss": 0.014, "step": 61280 }, { "epoch": 403.2236842105263, "grad_norm": 1.0089226961135864, "learning_rate": 0.0001, "loss": 0.0173, "step": 61290 }, { "epoch": 403.2894736842105, "grad_norm": 1.0188719034194946, "learning_rate": 0.0001, "loss": 0.0188, "step": 61300 }, { "epoch": 403.35526315789474, "grad_norm": 1.355810523033142, "learning_rate": 0.0001, "loss": 0.0128, "step": 61310 }, { "epoch": 403.42105263157896, "grad_norm": 1.250390887260437, "learning_rate": 0.0001, "loss": 0.018, "step": 61320 }, { "epoch": 403.4868421052632, "grad_norm": 1.270668864250183, "learning_rate": 0.0001, "loss": 0.0164, "step": 61330 }, { "epoch": 403.55263157894734, "grad_norm": 1.4754079580307007, "learning_rate": 0.0001, "loss": 0.0212, "step": 61340 }, { "epoch": 403.61842105263156, "grad_norm": 1.4692509174346924, "learning_rate": 0.0001, "loss": 0.0161, "step": 61350 }, { "epoch": 403.6842105263158, "grad_norm": 1.7973846197128296, "learning_rate": 0.0001, "loss": 0.0142, "step": 61360 }, { "epoch": 403.75, "grad_norm": 1.8146002292633057, "learning_rate": 0.0001, "loss": 0.0146, "step": 61370 }, { "epoch": 403.8157894736842, "grad_norm": 1.4827961921691895, "learning_rate": 0.0001, "loss": 0.0149, "step": 61380 }, { "epoch": 403.88157894736844, "grad_norm": 1.1058157682418823, "learning_rate": 0.0001, "loss": 0.0176, "step": 61390 }, { "epoch": 403.94736842105266, "grad_norm": 1.3886042833328247, "learning_rate": 0.0001, "loss": 0.0153, "step": 61400 }, { "epoch": 404.0131578947368, "grad_norm": 1.1767812967300415, "learning_rate": 0.0001, "loss": 0.0182, "step": 61410 }, { "epoch": 404.07894736842104, "grad_norm": 1.5207698345184326, "learning_rate": 0.0001, "loss": 0.0171, "step": 61420 }, { "epoch": 404.14473684210526, "grad_norm": 1.5510616302490234, "learning_rate": 0.0001, "loss": 0.016, "step": 61430 }, { "epoch": 404.2105263157895, "grad_norm": 1.731972336769104, "learning_rate": 0.0001, "loss": 0.0189, "step": 61440 }, { "epoch": 404.2763157894737, "grad_norm": 1.635833978652954, "learning_rate": 0.0001, "loss": 0.017, "step": 61450 }, { "epoch": 404.3421052631579, "grad_norm": 1.6997956037521362, "learning_rate": 0.0001, "loss": 0.0151, "step": 61460 }, { "epoch": 404.4078947368421, "grad_norm": 1.8460444211959839, "learning_rate": 0.0001, "loss": 0.015, "step": 61470 }, { "epoch": 404.4736842105263, "grad_norm": 1.3068957328796387, "learning_rate": 0.0001, "loss": 0.0177, "step": 61480 }, { "epoch": 404.5394736842105, "grad_norm": 1.178355097770691, "learning_rate": 0.0001, "loss": 0.0178, "step": 61490 }, { "epoch": 404.60526315789474, "grad_norm": 1.9634543657302856, "learning_rate": 0.0001, "loss": 0.016, "step": 61500 }, { "epoch": 404.67105263157896, "grad_norm": 1.6977757215499878, "learning_rate": 0.0001, "loss": 0.0191, "step": 61510 }, { "epoch": 404.7368421052632, "grad_norm": 1.4159157276153564, "learning_rate": 0.0001, "loss": 0.0171, "step": 61520 }, { "epoch": 404.80263157894734, "grad_norm": 1.920867681503296, "learning_rate": 0.0001, "loss": 0.0156, "step": 61530 }, { "epoch": 404.86842105263156, "grad_norm": 1.484824776649475, "learning_rate": 0.0001, "loss": 0.014, "step": 61540 }, { "epoch": 404.9342105263158, "grad_norm": 1.6300387382507324, "learning_rate": 0.0001, "loss": 0.0142, "step": 61550 }, { "epoch": 405.0, "grad_norm": 1.6232409477233887, "learning_rate": 0.0001, "loss": 0.0162, "step": 61560 }, { "epoch": 405.0657894736842, "grad_norm": 1.5431249141693115, "learning_rate": 0.0001, "loss": 0.0185, "step": 61570 }, { "epoch": 405.13157894736844, "grad_norm": 1.4794772863388062, "learning_rate": 0.0001, "loss": 0.0179, "step": 61580 }, { "epoch": 405.19736842105266, "grad_norm": 1.476025104522705, "learning_rate": 0.0001, "loss": 0.0162, "step": 61590 }, { "epoch": 405.2631578947368, "grad_norm": 1.3814473152160645, "learning_rate": 0.0001, "loss": 0.0169, "step": 61600 }, { "epoch": 405.32894736842104, "grad_norm": 1.0683940649032593, "learning_rate": 0.0001, "loss": 0.0171, "step": 61610 }, { "epoch": 405.39473684210526, "grad_norm": 1.7210352420806885, "learning_rate": 0.0001, "loss": 0.0174, "step": 61620 }, { "epoch": 405.4605263157895, "grad_norm": 1.781412124633789, "learning_rate": 0.0001, "loss": 0.0137, "step": 61630 }, { "epoch": 405.5263157894737, "grad_norm": 1.6453654766082764, "learning_rate": 0.0001, "loss": 0.0165, "step": 61640 }, { "epoch": 405.5921052631579, "grad_norm": 1.3703323602676392, "learning_rate": 0.0001, "loss": 0.0169, "step": 61650 }, { "epoch": 405.6578947368421, "grad_norm": 1.410240888595581, "learning_rate": 0.0001, "loss": 0.0146, "step": 61660 }, { "epoch": 405.7236842105263, "grad_norm": 1.8176219463348389, "learning_rate": 0.0001, "loss": 0.016, "step": 61670 }, { "epoch": 405.7894736842105, "grad_norm": 1.556492567062378, "learning_rate": 0.0001, "loss": 0.0174, "step": 61680 }, { "epoch": 405.85526315789474, "grad_norm": 1.2990763187408447, "learning_rate": 0.0001, "loss": 0.0167, "step": 61690 }, { "epoch": 405.92105263157896, "grad_norm": 1.557777762413025, "learning_rate": 0.0001, "loss": 0.0157, "step": 61700 }, { "epoch": 405.9868421052632, "grad_norm": 1.245428442955017, "learning_rate": 0.0001, "loss": 0.0147, "step": 61710 }, { "epoch": 406.05263157894734, "grad_norm": 1.3901150226593018, "learning_rate": 0.0001, "loss": 0.0171, "step": 61720 }, { "epoch": 406.11842105263156, "grad_norm": 1.6716729402542114, "learning_rate": 0.0001, "loss": 0.0158, "step": 61730 }, { "epoch": 406.1842105263158, "grad_norm": 1.6375890970230103, "learning_rate": 0.0001, "loss": 0.0188, "step": 61740 }, { "epoch": 406.25, "grad_norm": 1.1532267332077026, "learning_rate": 0.0001, "loss": 0.0154, "step": 61750 }, { "epoch": 406.3157894736842, "grad_norm": 1.727380633354187, "learning_rate": 0.0001, "loss": 0.0135, "step": 61760 }, { "epoch": 406.38157894736844, "grad_norm": 1.5960321426391602, "learning_rate": 0.0001, "loss": 0.0143, "step": 61770 }, { "epoch": 406.44736842105266, "grad_norm": 1.9580872058868408, "learning_rate": 0.0001, "loss": 0.0177, "step": 61780 }, { "epoch": 406.5131578947368, "grad_norm": 1.6212024688720703, "learning_rate": 0.0001, "loss": 0.0175, "step": 61790 }, { "epoch": 406.57894736842104, "grad_norm": 1.9959917068481445, "learning_rate": 0.0001, "loss": 0.0141, "step": 61800 }, { "epoch": 406.64473684210526, "grad_norm": 1.7839710712432861, "learning_rate": 0.0001, "loss": 0.0173, "step": 61810 }, { "epoch": 406.7105263157895, "grad_norm": 1.4765949249267578, "learning_rate": 0.0001, "loss": 0.0155, "step": 61820 }, { "epoch": 406.7763157894737, "grad_norm": 1.6528723239898682, "learning_rate": 0.0001, "loss": 0.0176, "step": 61830 }, { "epoch": 406.8421052631579, "grad_norm": 1.869626760482788, "learning_rate": 0.0001, "loss": 0.0138, "step": 61840 }, { "epoch": 406.9078947368421, "grad_norm": 1.2340826988220215, "learning_rate": 0.0001, "loss": 0.0158, "step": 61850 }, { "epoch": 406.9736842105263, "grad_norm": 1.5291471481323242, "learning_rate": 0.0001, "loss": 0.0187, "step": 61860 }, { "epoch": 407.0394736842105, "grad_norm": 1.7433491945266724, "learning_rate": 0.0001, "loss": 0.0177, "step": 61870 }, { "epoch": 407.10526315789474, "grad_norm": 1.385090947151184, "learning_rate": 0.0001, "loss": 0.0147, "step": 61880 }, { "epoch": 407.17105263157896, "grad_norm": 1.394263505935669, "learning_rate": 0.0001, "loss": 0.0156, "step": 61890 }, { "epoch": 407.2368421052632, "grad_norm": 1.3490864038467407, "learning_rate": 0.0001, "loss": 0.0156, "step": 61900 }, { "epoch": 407.30263157894734, "grad_norm": 1.2991925477981567, "learning_rate": 0.0001, "loss": 0.0149, "step": 61910 }, { "epoch": 407.36842105263156, "grad_norm": 1.5982964038848877, "learning_rate": 0.0001, "loss": 0.0181, "step": 61920 }, { "epoch": 407.4342105263158, "grad_norm": 1.2437869310379028, "learning_rate": 0.0001, "loss": 0.0172, "step": 61930 }, { "epoch": 407.5, "grad_norm": 1.2866312265396118, "learning_rate": 0.0001, "loss": 0.0182, "step": 61940 }, { "epoch": 407.5657894736842, "grad_norm": 1.3152005672454834, "learning_rate": 0.0001, "loss": 0.0152, "step": 61950 }, { "epoch": 407.63157894736844, "grad_norm": 1.057481050491333, "learning_rate": 0.0001, "loss": 0.0222, "step": 61960 }, { "epoch": 407.69736842105266, "grad_norm": 0.9167497158050537, "learning_rate": 0.0001, "loss": 0.016, "step": 61970 }, { "epoch": 407.7631578947368, "grad_norm": 1.717684268951416, "learning_rate": 0.0001, "loss": 0.016, "step": 61980 }, { "epoch": 407.82894736842104, "grad_norm": 1.536014437675476, "learning_rate": 0.0001, "loss": 0.0133, "step": 61990 }, { "epoch": 407.89473684210526, "grad_norm": 1.0581995248794556, "learning_rate": 0.0001, "loss": 0.0142, "step": 62000 }, { "epoch": 407.9605263157895, "grad_norm": 1.3955776691436768, "learning_rate": 0.0001, "loss": 0.0153, "step": 62010 }, { "epoch": 408.0263157894737, "grad_norm": 1.5683144330978394, "learning_rate": 0.0001, "loss": 0.0193, "step": 62020 }, { "epoch": 408.0921052631579, "grad_norm": 1.1734751462936401, "learning_rate": 0.0001, "loss": 0.0166, "step": 62030 }, { "epoch": 408.1578947368421, "grad_norm": 1.4005085229873657, "learning_rate": 0.0001, "loss": 0.013, "step": 62040 }, { "epoch": 408.2236842105263, "grad_norm": 1.7002780437469482, "learning_rate": 0.0001, "loss": 0.0186, "step": 62050 }, { "epoch": 408.2894736842105, "grad_norm": 1.3642637729644775, "learning_rate": 0.0001, "loss": 0.0188, "step": 62060 }, { "epoch": 408.35526315789474, "grad_norm": 1.3153959512710571, "learning_rate": 0.0001, "loss": 0.0152, "step": 62070 }, { "epoch": 408.42105263157896, "grad_norm": 1.9570293426513672, "learning_rate": 0.0001, "loss": 0.0165, "step": 62080 }, { "epoch": 408.4868421052632, "grad_norm": 1.9395501613616943, "learning_rate": 0.0001, "loss": 0.0145, "step": 62090 }, { "epoch": 408.55263157894734, "grad_norm": 1.704514503479004, "learning_rate": 0.0001, "loss": 0.0145, "step": 62100 }, { "epoch": 408.61842105263156, "grad_norm": 1.728179693222046, "learning_rate": 0.0001, "loss": 0.0156, "step": 62110 }, { "epoch": 408.6842105263158, "grad_norm": 1.6075469255447388, "learning_rate": 0.0001, "loss": 0.016, "step": 62120 }, { "epoch": 408.75, "grad_norm": 1.402157187461853, "learning_rate": 0.0001, "loss": 0.0195, "step": 62130 }, { "epoch": 408.8157894736842, "grad_norm": 1.443162202835083, "learning_rate": 0.0001, "loss": 0.0145, "step": 62140 }, { "epoch": 408.88157894736844, "grad_norm": 1.5358211994171143, "learning_rate": 0.0001, "loss": 0.0216, "step": 62150 }, { "epoch": 408.94736842105266, "grad_norm": 1.7905914783477783, "learning_rate": 0.0001, "loss": 0.0157, "step": 62160 }, { "epoch": 409.0131578947368, "grad_norm": 1.6250230073928833, "learning_rate": 0.0001, "loss": 0.0183, "step": 62170 }, { "epoch": 409.07894736842104, "grad_norm": 1.5282878875732422, "learning_rate": 0.0001, "loss": 0.0149, "step": 62180 }, { "epoch": 409.14473684210526, "grad_norm": 1.7915515899658203, "learning_rate": 0.0001, "loss": 0.0161, "step": 62190 }, { "epoch": 409.2105263157895, "grad_norm": 1.3905689716339111, "learning_rate": 0.0001, "loss": 0.0147, "step": 62200 }, { "epoch": 409.2763157894737, "grad_norm": 1.1597661972045898, "learning_rate": 0.0001, "loss": 0.015, "step": 62210 }, { "epoch": 409.3421052631579, "grad_norm": 1.163612723350525, "learning_rate": 0.0001, "loss": 0.0189, "step": 62220 }, { "epoch": 409.4078947368421, "grad_norm": 1.5609731674194336, "learning_rate": 0.0001, "loss": 0.0156, "step": 62230 }, { "epoch": 409.4736842105263, "grad_norm": 0.9844961166381836, "learning_rate": 0.0001, "loss": 0.0168, "step": 62240 }, { "epoch": 409.5394736842105, "grad_norm": 1.2960361242294312, "learning_rate": 0.0001, "loss": 0.0203, "step": 62250 }, { "epoch": 409.60526315789474, "grad_norm": 1.5479884147644043, "learning_rate": 0.0001, "loss": 0.0153, "step": 62260 }, { "epoch": 409.67105263157896, "grad_norm": 1.724444031715393, "learning_rate": 0.0001, "loss": 0.0185, "step": 62270 }, { "epoch": 409.7368421052632, "grad_norm": 1.4411687850952148, "learning_rate": 0.0001, "loss": 0.0132, "step": 62280 }, { "epoch": 409.80263157894734, "grad_norm": 1.2761766910552979, "learning_rate": 0.0001, "loss": 0.0186, "step": 62290 }, { "epoch": 409.86842105263156, "grad_norm": 1.255781888961792, "learning_rate": 0.0001, "loss": 0.0185, "step": 62300 }, { "epoch": 409.9342105263158, "grad_norm": 1.6690516471862793, "learning_rate": 0.0001, "loss": 0.0132, "step": 62310 }, { "epoch": 410.0, "grad_norm": 1.1566661596298218, "learning_rate": 0.0001, "loss": 0.0156, "step": 62320 }, { "epoch": 410.0657894736842, "grad_norm": 0.8921098113059998, "learning_rate": 0.0001, "loss": 0.0137, "step": 62330 }, { "epoch": 410.13157894736844, "grad_norm": 1.3300411701202393, "learning_rate": 0.0001, "loss": 0.0176, "step": 62340 }, { "epoch": 410.19736842105266, "grad_norm": 1.8864973783493042, "learning_rate": 0.0001, "loss": 0.0144, "step": 62350 }, { "epoch": 410.2631578947368, "grad_norm": 1.4038581848144531, "learning_rate": 0.0001, "loss": 0.019, "step": 62360 }, { "epoch": 410.32894736842104, "grad_norm": 1.9443747997283936, "learning_rate": 0.0001, "loss": 0.0154, "step": 62370 }, { "epoch": 410.39473684210526, "grad_norm": 1.8783713579177856, "learning_rate": 0.0001, "loss": 0.0147, "step": 62380 }, { "epoch": 410.4605263157895, "grad_norm": 1.8441202640533447, "learning_rate": 0.0001, "loss": 0.014, "step": 62390 }, { "epoch": 410.5263157894737, "grad_norm": 1.1969586610794067, "learning_rate": 0.0001, "loss": 0.0178, "step": 62400 }, { "epoch": 410.5921052631579, "grad_norm": 0.9968204498291016, "learning_rate": 0.0001, "loss": 0.0159, "step": 62410 }, { "epoch": 410.6578947368421, "grad_norm": 1.5199106931686401, "learning_rate": 0.0001, "loss": 0.0143, "step": 62420 }, { "epoch": 410.7236842105263, "grad_norm": 1.8477798700332642, "learning_rate": 0.0001, "loss": 0.0138, "step": 62430 }, { "epoch": 410.7894736842105, "grad_norm": 1.6514055728912354, "learning_rate": 0.0001, "loss": 0.0212, "step": 62440 }, { "epoch": 410.85526315789474, "grad_norm": 1.2797083854675293, "learning_rate": 0.0001, "loss": 0.0195, "step": 62450 }, { "epoch": 410.92105263157896, "grad_norm": 1.3411712646484375, "learning_rate": 0.0001, "loss": 0.0192, "step": 62460 }, { "epoch": 410.9868421052632, "grad_norm": 1.707116961479187, "learning_rate": 0.0001, "loss": 0.0169, "step": 62470 }, { "epoch": 411.05263157894734, "grad_norm": 1.4658856391906738, "learning_rate": 0.0001, "loss": 0.0142, "step": 62480 }, { "epoch": 411.11842105263156, "grad_norm": 1.2052992582321167, "learning_rate": 0.0001, "loss": 0.0163, "step": 62490 }, { "epoch": 411.1842105263158, "grad_norm": 1.38649320602417, "learning_rate": 0.0001, "loss": 0.021, "step": 62500 }, { "epoch": 411.25, "grad_norm": 1.6783323287963867, "learning_rate": 0.0001, "loss": 0.0148, "step": 62510 }, { "epoch": 411.3157894736842, "grad_norm": 1.0747283697128296, "learning_rate": 0.0001, "loss": 0.0163, "step": 62520 }, { "epoch": 411.38157894736844, "grad_norm": 1.860534429550171, "learning_rate": 0.0001, "loss": 0.0151, "step": 62530 }, { "epoch": 411.44736842105266, "grad_norm": 1.5785454511642456, "learning_rate": 0.0001, "loss": 0.0208, "step": 62540 }, { "epoch": 411.5131578947368, "grad_norm": 1.7143791913986206, "learning_rate": 0.0001, "loss": 0.0133, "step": 62550 }, { "epoch": 411.57894736842104, "grad_norm": 1.5946755409240723, "learning_rate": 0.0001, "loss": 0.0141, "step": 62560 }, { "epoch": 411.64473684210526, "grad_norm": 1.5457898378372192, "learning_rate": 0.0001, "loss": 0.0172, "step": 62570 }, { "epoch": 411.7105263157895, "grad_norm": 1.5310314893722534, "learning_rate": 0.0001, "loss": 0.0163, "step": 62580 }, { "epoch": 411.7763157894737, "grad_norm": 1.2983052730560303, "learning_rate": 0.0001, "loss": 0.016, "step": 62590 }, { "epoch": 411.8421052631579, "grad_norm": 1.7219791412353516, "learning_rate": 0.0001, "loss": 0.0157, "step": 62600 }, { "epoch": 411.9078947368421, "grad_norm": 1.5697929859161377, "learning_rate": 0.0001, "loss": 0.0156, "step": 62610 }, { "epoch": 411.9736842105263, "grad_norm": 1.9293185472488403, "learning_rate": 0.0001, "loss": 0.0163, "step": 62620 }, { "epoch": 412.0394736842105, "grad_norm": 1.6472351551055908, "learning_rate": 0.0001, "loss": 0.0169, "step": 62630 }, { "epoch": 412.10526315789474, "grad_norm": 1.6138583421707153, "learning_rate": 0.0001, "loss": 0.0177, "step": 62640 }, { "epoch": 412.17105263157896, "grad_norm": 1.7507137060165405, "learning_rate": 0.0001, "loss": 0.0158, "step": 62650 }, { "epoch": 412.2368421052632, "grad_norm": 1.3270719051361084, "learning_rate": 0.0001, "loss": 0.018, "step": 62660 }, { "epoch": 412.30263157894734, "grad_norm": 1.4697394371032715, "learning_rate": 0.0001, "loss": 0.0146, "step": 62670 }, { "epoch": 412.36842105263156, "grad_norm": 1.7865115404129028, "learning_rate": 0.0001, "loss": 0.0195, "step": 62680 }, { "epoch": 412.4342105263158, "grad_norm": 1.6922149658203125, "learning_rate": 0.0001, "loss": 0.0145, "step": 62690 }, { "epoch": 412.5, "grad_norm": 1.178017258644104, "learning_rate": 0.0001, "loss": 0.0163, "step": 62700 }, { "epoch": 412.5657894736842, "grad_norm": 1.538726806640625, "learning_rate": 0.0001, "loss": 0.0155, "step": 62710 }, { "epoch": 412.63157894736844, "grad_norm": 1.8118677139282227, "learning_rate": 0.0001, "loss": 0.0161, "step": 62720 }, { "epoch": 412.69736842105266, "grad_norm": 1.5993560552597046, "learning_rate": 0.0001, "loss": 0.0126, "step": 62730 }, { "epoch": 412.7631578947368, "grad_norm": 1.357114315032959, "learning_rate": 0.0001, "loss": 0.0184, "step": 62740 }, { "epoch": 412.82894736842104, "grad_norm": 1.3791629076004028, "learning_rate": 0.0001, "loss": 0.0185, "step": 62750 }, { "epoch": 412.89473684210526, "grad_norm": 1.5014142990112305, "learning_rate": 0.0001, "loss": 0.0165, "step": 62760 }, { "epoch": 412.9605263157895, "grad_norm": 1.3612048625946045, "learning_rate": 0.0001, "loss": 0.0133, "step": 62770 }, { "epoch": 413.0263157894737, "grad_norm": 1.640514850616455, "learning_rate": 0.0001, "loss": 0.0157, "step": 62780 }, { "epoch": 413.0921052631579, "grad_norm": 2.3110105991363525, "learning_rate": 0.0001, "loss": 0.017, "step": 62790 }, { "epoch": 413.1578947368421, "grad_norm": 2.0111260414123535, "learning_rate": 0.0001, "loss": 0.018, "step": 62800 }, { "epoch": 413.2236842105263, "grad_norm": 1.8927607536315918, "learning_rate": 0.0001, "loss": 0.0151, "step": 62810 }, { "epoch": 413.2894736842105, "grad_norm": 1.9397964477539062, "learning_rate": 0.0001, "loss": 0.0184, "step": 62820 }, { "epoch": 413.35526315789474, "grad_norm": 2.1443281173706055, "learning_rate": 0.0001, "loss": 0.02, "step": 62830 }, { "epoch": 413.42105263157896, "grad_norm": 2.2852604389190674, "learning_rate": 0.0001, "loss": 0.0177, "step": 62840 }, { "epoch": 413.4868421052632, "grad_norm": 2.3656539916992188, "learning_rate": 0.0001, "loss": 0.0155, "step": 62850 }, { "epoch": 413.55263157894734, "grad_norm": 1.791249394416809, "learning_rate": 0.0001, "loss": 0.0131, "step": 62860 }, { "epoch": 413.61842105263156, "grad_norm": 1.8008053302764893, "learning_rate": 0.0001, "loss": 0.0151, "step": 62870 }, { "epoch": 413.6842105263158, "grad_norm": 1.9694030284881592, "learning_rate": 0.0001, "loss": 0.0147, "step": 62880 }, { "epoch": 413.75, "grad_norm": 1.900864839553833, "learning_rate": 0.0001, "loss": 0.0148, "step": 62890 }, { "epoch": 413.8157894736842, "grad_norm": 2.8073508739471436, "learning_rate": 0.0001, "loss": 0.0173, "step": 62900 }, { "epoch": 413.88157894736844, "grad_norm": 2.9215245246887207, "learning_rate": 0.0001, "loss": 0.0254, "step": 62910 }, { "epoch": 413.94736842105266, "grad_norm": 3.2239766120910645, "learning_rate": 0.0001, "loss": 0.0166, "step": 62920 }, { "epoch": 414.0131578947368, "grad_norm": 2.145555257797241, "learning_rate": 0.0001, "loss": 0.0149, "step": 62930 }, { "epoch": 414.07894736842104, "grad_norm": 1.7443833351135254, "learning_rate": 0.0001, "loss": 0.0162, "step": 62940 }, { "epoch": 414.14473684210526, "grad_norm": 1.8400439023971558, "learning_rate": 0.0001, "loss": 0.0154, "step": 62950 }, { "epoch": 414.2105263157895, "grad_norm": 2.006207227706909, "learning_rate": 0.0001, "loss": 0.0165, "step": 62960 }, { "epoch": 414.2763157894737, "grad_norm": 1.7218602895736694, "learning_rate": 0.0001, "loss": 0.0123, "step": 62970 }, { "epoch": 414.3421052631579, "grad_norm": 1.787772297859192, "learning_rate": 0.0001, "loss": 0.0145, "step": 62980 }, { "epoch": 414.4078947368421, "grad_norm": 1.6856647729873657, "learning_rate": 0.0001, "loss": 0.0122, "step": 62990 }, { "epoch": 414.4736842105263, "grad_norm": 1.6430188417434692, "learning_rate": 0.0001, "loss": 0.0154, "step": 63000 }, { "epoch": 414.5394736842105, "grad_norm": 1.5815410614013672, "learning_rate": 0.0001, "loss": 0.0188, "step": 63010 }, { "epoch": 414.60526315789474, "grad_norm": 1.3896393775939941, "learning_rate": 0.0001, "loss": 0.0129, "step": 63020 }, { "epoch": 414.67105263157896, "grad_norm": 1.8069902658462524, "learning_rate": 0.0001, "loss": 0.0183, "step": 63030 }, { "epoch": 414.7368421052632, "grad_norm": 1.4232251644134521, "learning_rate": 0.0001, "loss": 0.0183, "step": 63040 }, { "epoch": 414.80263157894734, "grad_norm": 1.7451257705688477, "learning_rate": 0.0001, "loss": 0.0183, "step": 63050 }, { "epoch": 414.86842105263156, "grad_norm": 1.3285224437713623, "learning_rate": 0.0001, "loss": 0.0163, "step": 63060 }, { "epoch": 414.9342105263158, "grad_norm": 1.404557466506958, "learning_rate": 0.0001, "loss": 0.0137, "step": 63070 }, { "epoch": 415.0, "grad_norm": 1.3166865110397339, "learning_rate": 0.0001, "loss": 0.0153, "step": 63080 }, { "epoch": 415.0657894736842, "grad_norm": 1.8484137058258057, "learning_rate": 0.0001, "loss": 0.0158, "step": 63090 }, { "epoch": 415.13157894736844, "grad_norm": 1.4470107555389404, "learning_rate": 0.0001, "loss": 0.0141, "step": 63100 }, { "epoch": 415.19736842105266, "grad_norm": 1.888924479484558, "learning_rate": 0.0001, "loss": 0.0193, "step": 63110 }, { "epoch": 415.2631578947368, "grad_norm": 1.7259339094161987, "learning_rate": 0.0001, "loss": 0.0146, "step": 63120 }, { "epoch": 415.32894736842104, "grad_norm": 1.0650333166122437, "learning_rate": 0.0001, "loss": 0.0158, "step": 63130 }, { "epoch": 415.39473684210526, "grad_norm": 1.2168301343917847, "learning_rate": 0.0001, "loss": 0.0176, "step": 63140 }, { "epoch": 415.4605263157895, "grad_norm": 1.6890838146209717, "learning_rate": 0.0001, "loss": 0.0183, "step": 63150 }, { "epoch": 415.5263157894737, "grad_norm": 1.6978299617767334, "learning_rate": 0.0001, "loss": 0.0162, "step": 63160 }, { "epoch": 415.5921052631579, "grad_norm": 1.5776081085205078, "learning_rate": 0.0001, "loss": 0.0176, "step": 63170 }, { "epoch": 415.6578947368421, "grad_norm": 1.9599169492721558, "learning_rate": 0.0001, "loss": 0.0127, "step": 63180 }, { "epoch": 415.7236842105263, "grad_norm": 2.0171291828155518, "learning_rate": 0.0001, "loss": 0.015, "step": 63190 }, { "epoch": 415.7894736842105, "grad_norm": 1.4216233491897583, "learning_rate": 0.0001, "loss": 0.0194, "step": 63200 }, { "epoch": 415.85526315789474, "grad_norm": 1.6998047828674316, "learning_rate": 0.0001, "loss": 0.0161, "step": 63210 }, { "epoch": 415.92105263157896, "grad_norm": 1.0946565866470337, "learning_rate": 0.0001, "loss": 0.0134, "step": 63220 }, { "epoch": 415.9868421052632, "grad_norm": 1.664526343345642, "learning_rate": 0.0001, "loss": 0.0168, "step": 63230 }, { "epoch": 416.05263157894734, "grad_norm": 1.67960786819458, "learning_rate": 0.0001, "loss": 0.0202, "step": 63240 }, { "epoch": 416.11842105263156, "grad_norm": 1.3671214580535889, "learning_rate": 0.0001, "loss": 0.0165, "step": 63250 }, { "epoch": 416.1842105263158, "grad_norm": 1.7965575456619263, "learning_rate": 0.0001, "loss": 0.0194, "step": 63260 }, { "epoch": 416.25, "grad_norm": 1.4739915132522583, "learning_rate": 0.0001, "loss": 0.0156, "step": 63270 }, { "epoch": 416.3157894736842, "grad_norm": 1.5647729635238647, "learning_rate": 0.0001, "loss": 0.0139, "step": 63280 }, { "epoch": 416.38157894736844, "grad_norm": 1.6288295984268188, "learning_rate": 0.0001, "loss": 0.0186, "step": 63290 }, { "epoch": 416.44736842105266, "grad_norm": 1.720248818397522, "learning_rate": 0.0001, "loss": 0.0135, "step": 63300 }, { "epoch": 416.5131578947368, "grad_norm": 1.8412213325500488, "learning_rate": 0.0001, "loss": 0.0144, "step": 63310 }, { "epoch": 416.57894736842104, "grad_norm": 1.7498761415481567, "learning_rate": 0.0001, "loss": 0.0137, "step": 63320 }, { "epoch": 416.64473684210526, "grad_norm": 1.3032280206680298, "learning_rate": 0.0001, "loss": 0.0186, "step": 63330 }, { "epoch": 416.7105263157895, "grad_norm": 1.4191755056381226, "learning_rate": 0.0001, "loss": 0.0159, "step": 63340 }, { "epoch": 416.7763157894737, "grad_norm": 1.3986945152282715, "learning_rate": 0.0001, "loss": 0.0191, "step": 63350 }, { "epoch": 416.8421052631579, "grad_norm": 1.3450937271118164, "learning_rate": 0.0001, "loss": 0.0169, "step": 63360 }, { "epoch": 416.9078947368421, "grad_norm": 1.6356093883514404, "learning_rate": 0.0001, "loss": 0.0149, "step": 63370 }, { "epoch": 416.9736842105263, "grad_norm": 1.9113044738769531, "learning_rate": 0.0001, "loss": 0.0136, "step": 63380 }, { "epoch": 417.0394736842105, "grad_norm": 1.9376078844070435, "learning_rate": 0.0001, "loss": 0.0188, "step": 63390 }, { "epoch": 417.10526315789474, "grad_norm": 1.7542396783828735, "learning_rate": 0.0001, "loss": 0.0161, "step": 63400 }, { "epoch": 417.17105263157896, "grad_norm": 1.5175611972808838, "learning_rate": 0.0001, "loss": 0.0198, "step": 63410 }, { "epoch": 417.2368421052632, "grad_norm": 1.713961124420166, "learning_rate": 0.0001, "loss": 0.018, "step": 63420 }, { "epoch": 417.30263157894734, "grad_norm": 1.7228977680206299, "learning_rate": 0.0001, "loss": 0.0157, "step": 63430 }, { "epoch": 417.36842105263156, "grad_norm": 1.5526574850082397, "learning_rate": 0.0001, "loss": 0.0182, "step": 63440 }, { "epoch": 417.4342105263158, "grad_norm": 1.717462420463562, "learning_rate": 0.0001, "loss": 0.0126, "step": 63450 }, { "epoch": 417.5, "grad_norm": 1.8381917476654053, "learning_rate": 0.0001, "loss": 0.0123, "step": 63460 }, { "epoch": 417.5657894736842, "grad_norm": 1.9904139041900635, "learning_rate": 0.0001, "loss": 0.0155, "step": 63470 }, { "epoch": 417.63157894736844, "grad_norm": 1.4172883033752441, "learning_rate": 0.0001, "loss": 0.0153, "step": 63480 }, { "epoch": 417.69736842105266, "grad_norm": 1.5187526941299438, "learning_rate": 0.0001, "loss": 0.0148, "step": 63490 }, { "epoch": 417.7631578947368, "grad_norm": 1.4445191621780396, "learning_rate": 0.0001, "loss": 0.0169, "step": 63500 }, { "epoch": 417.82894736842104, "grad_norm": 1.4951692819595337, "learning_rate": 0.0001, "loss": 0.0155, "step": 63510 }, { "epoch": 417.89473684210526, "grad_norm": 1.154722809791565, "learning_rate": 0.0001, "loss": 0.0161, "step": 63520 }, { "epoch": 417.9605263157895, "grad_norm": 1.2413243055343628, "learning_rate": 0.0001, "loss": 0.0207, "step": 63530 }, { "epoch": 418.0263157894737, "grad_norm": 2.1120665073394775, "learning_rate": 0.0001, "loss": 0.0166, "step": 63540 }, { "epoch": 418.0921052631579, "grad_norm": 1.3730825185775757, "learning_rate": 0.0001, "loss": 0.0164, "step": 63550 }, { "epoch": 418.1578947368421, "grad_norm": 1.0504766702651978, "learning_rate": 0.0001, "loss": 0.0134, "step": 63560 }, { "epoch": 418.2236842105263, "grad_norm": 1.8158509731292725, "learning_rate": 0.0001, "loss": 0.0146, "step": 63570 }, { "epoch": 418.2894736842105, "grad_norm": 1.8428857326507568, "learning_rate": 0.0001, "loss": 0.0186, "step": 63580 }, { "epoch": 418.35526315789474, "grad_norm": 1.0720571279525757, "learning_rate": 0.0001, "loss": 0.0126, "step": 63590 }, { "epoch": 418.42105263157896, "grad_norm": 1.271567463874817, "learning_rate": 0.0001, "loss": 0.013, "step": 63600 }, { "epoch": 418.4868421052632, "grad_norm": 1.4448227882385254, "learning_rate": 0.0001, "loss": 0.0173, "step": 63610 }, { "epoch": 418.55263157894734, "grad_norm": 1.6283619403839111, "learning_rate": 0.0001, "loss": 0.0141, "step": 63620 }, { "epoch": 418.61842105263156, "grad_norm": 1.3944379091262817, "learning_rate": 0.0001, "loss": 0.0138, "step": 63630 }, { "epoch": 418.6842105263158, "grad_norm": 1.8141411542892456, "learning_rate": 0.0001, "loss": 0.0143, "step": 63640 }, { "epoch": 418.75, "grad_norm": 1.719080924987793, "learning_rate": 0.0001, "loss": 0.02, "step": 63650 }, { "epoch": 418.8157894736842, "grad_norm": 1.5627846717834473, "learning_rate": 0.0001, "loss": 0.0192, "step": 63660 }, { "epoch": 418.88157894736844, "grad_norm": 1.568260669708252, "learning_rate": 0.0001, "loss": 0.0202, "step": 63670 }, { "epoch": 418.94736842105266, "grad_norm": 1.8063846826553345, "learning_rate": 0.0001, "loss": 0.0187, "step": 63680 }, { "epoch": 419.0131578947368, "grad_norm": 1.3876664638519287, "learning_rate": 0.0001, "loss": 0.0191, "step": 63690 }, { "epoch": 419.07894736842104, "grad_norm": 1.4397938251495361, "learning_rate": 0.0001, "loss": 0.0177, "step": 63700 }, { "epoch": 419.14473684210526, "grad_norm": 1.2299034595489502, "learning_rate": 0.0001, "loss": 0.0147, "step": 63710 }, { "epoch": 419.2105263157895, "grad_norm": 1.4465097188949585, "learning_rate": 0.0001, "loss": 0.0153, "step": 63720 }, { "epoch": 419.2763157894737, "grad_norm": 1.4938328266143799, "learning_rate": 0.0001, "loss": 0.0155, "step": 63730 }, { "epoch": 419.3421052631579, "grad_norm": 1.4760140180587769, "learning_rate": 0.0001, "loss": 0.0147, "step": 63740 }, { "epoch": 419.4078947368421, "grad_norm": 2.1524505615234375, "learning_rate": 0.0001, "loss": 0.0189, "step": 63750 }, { "epoch": 419.4736842105263, "grad_norm": 1.5775305032730103, "learning_rate": 0.0001, "loss": 0.0192, "step": 63760 }, { "epoch": 419.5394736842105, "grad_norm": 1.3649039268493652, "learning_rate": 0.0001, "loss": 0.017, "step": 63770 }, { "epoch": 419.60526315789474, "grad_norm": 1.585667371749878, "learning_rate": 0.0001, "loss": 0.0149, "step": 63780 }, { "epoch": 419.67105263157896, "grad_norm": 1.8861331939697266, "learning_rate": 0.0001, "loss": 0.0173, "step": 63790 }, { "epoch": 419.7368421052632, "grad_norm": 1.5499446392059326, "learning_rate": 0.0001, "loss": 0.0172, "step": 63800 }, { "epoch": 419.80263157894734, "grad_norm": 1.6205402612686157, "learning_rate": 0.0001, "loss": 0.0152, "step": 63810 }, { "epoch": 419.86842105263156, "grad_norm": 1.4799197912216187, "learning_rate": 0.0001, "loss": 0.013, "step": 63820 }, { "epoch": 419.9342105263158, "grad_norm": 0.9829177856445312, "learning_rate": 0.0001, "loss": 0.0183, "step": 63830 }, { "epoch": 420.0, "grad_norm": 1.3836320638656616, "learning_rate": 0.0001, "loss": 0.0135, "step": 63840 }, { "epoch": 420.0657894736842, "grad_norm": 1.2085894346237183, "learning_rate": 0.0001, "loss": 0.0156, "step": 63850 }, { "epoch": 420.13157894736844, "grad_norm": 1.2817407846450806, "learning_rate": 0.0001, "loss": 0.0145, "step": 63860 }, { "epoch": 420.19736842105266, "grad_norm": 2.0856711864471436, "learning_rate": 0.0001, "loss": 0.0171, "step": 63870 }, { "epoch": 420.2631578947368, "grad_norm": 1.6064790487289429, "learning_rate": 0.0001, "loss": 0.019, "step": 63880 }, { "epoch": 420.32894736842104, "grad_norm": 1.3463363647460938, "learning_rate": 0.0001, "loss": 0.0186, "step": 63890 }, { "epoch": 420.39473684210526, "grad_norm": 1.5096755027770996, "learning_rate": 0.0001, "loss": 0.018, "step": 63900 }, { "epoch": 420.4605263157895, "grad_norm": 1.7054377794265747, "learning_rate": 0.0001, "loss": 0.0191, "step": 63910 }, { "epoch": 420.5263157894737, "grad_norm": 1.6377859115600586, "learning_rate": 0.0001, "loss": 0.0157, "step": 63920 }, { "epoch": 420.5921052631579, "grad_norm": 1.8262252807617188, "learning_rate": 0.0001, "loss": 0.0165, "step": 63930 }, { "epoch": 420.6578947368421, "grad_norm": 2.035616636276245, "learning_rate": 0.0001, "loss": 0.0161, "step": 63940 }, { "epoch": 420.7236842105263, "grad_norm": 1.294040322303772, "learning_rate": 0.0001, "loss": 0.0154, "step": 63950 }, { "epoch": 420.7894736842105, "grad_norm": 1.5879147052764893, "learning_rate": 0.0001, "loss": 0.0151, "step": 63960 }, { "epoch": 420.85526315789474, "grad_norm": 1.703218698501587, "learning_rate": 0.0001, "loss": 0.0131, "step": 63970 }, { "epoch": 420.92105263157896, "grad_norm": 1.6879507303237915, "learning_rate": 0.0001, "loss": 0.0163, "step": 63980 }, { "epoch": 420.9868421052632, "grad_norm": 1.6945265531539917, "learning_rate": 0.0001, "loss": 0.0139, "step": 63990 }, { "epoch": 421.05263157894734, "grad_norm": 1.5152192115783691, "learning_rate": 0.0001, "loss": 0.0186, "step": 64000 }, { "epoch": 421.11842105263156, "grad_norm": 1.570601224899292, "learning_rate": 0.0001, "loss": 0.0154, "step": 64010 }, { "epoch": 421.1842105263158, "grad_norm": 1.3559678792953491, "learning_rate": 0.0001, "loss": 0.0184, "step": 64020 }, { "epoch": 421.25, "grad_norm": 1.1002789735794067, "learning_rate": 0.0001, "loss": 0.0148, "step": 64030 }, { "epoch": 421.3157894736842, "grad_norm": 1.324841022491455, "learning_rate": 0.0001, "loss": 0.0158, "step": 64040 }, { "epoch": 421.38157894736844, "grad_norm": 1.8018330335617065, "learning_rate": 0.0001, "loss": 0.014, "step": 64050 }, { "epoch": 421.44736842105266, "grad_norm": 2.0746383666992188, "learning_rate": 0.0001, "loss": 0.0192, "step": 64060 }, { "epoch": 421.5131578947368, "grad_norm": 2.1002423763275146, "learning_rate": 0.0001, "loss": 0.019, "step": 64070 }, { "epoch": 421.57894736842104, "grad_norm": 2.000267267227173, "learning_rate": 0.0001, "loss": 0.0159, "step": 64080 }, { "epoch": 421.64473684210526, "grad_norm": 1.4135088920593262, "learning_rate": 0.0001, "loss": 0.0165, "step": 64090 }, { "epoch": 421.7105263157895, "grad_norm": 1.5995800495147705, "learning_rate": 0.0001, "loss": 0.0173, "step": 64100 }, { "epoch": 421.7763157894737, "grad_norm": 1.6131948232650757, "learning_rate": 0.0001, "loss": 0.0171, "step": 64110 }, { "epoch": 421.8421052631579, "grad_norm": 1.410381555557251, "learning_rate": 0.0001, "loss": 0.017, "step": 64120 }, { "epoch": 421.9078947368421, "grad_norm": 1.4388413429260254, "learning_rate": 0.0001, "loss": 0.0173, "step": 64130 }, { "epoch": 421.9736842105263, "grad_norm": 1.7016321420669556, "learning_rate": 0.0001, "loss": 0.0151, "step": 64140 }, { "epoch": 422.0394736842105, "grad_norm": 1.7607389688491821, "learning_rate": 0.0001, "loss": 0.0141, "step": 64150 }, { "epoch": 422.10526315789474, "grad_norm": 1.3278615474700928, "learning_rate": 0.0001, "loss": 0.0169, "step": 64160 }, { "epoch": 422.17105263157896, "grad_norm": 2.1720523834228516, "learning_rate": 0.0001, "loss": 0.0212, "step": 64170 }, { "epoch": 422.2368421052632, "grad_norm": 1.7395631074905396, "learning_rate": 0.0001, "loss": 0.0162, "step": 64180 }, { "epoch": 422.30263157894734, "grad_norm": 1.9752016067504883, "learning_rate": 0.0001, "loss": 0.014, "step": 64190 }, { "epoch": 422.36842105263156, "grad_norm": 1.3927799463272095, "learning_rate": 0.0001, "loss": 0.0184, "step": 64200 }, { "epoch": 422.4342105263158, "grad_norm": 1.4149481058120728, "learning_rate": 0.0001, "loss": 0.0148, "step": 64210 }, { "epoch": 422.5, "grad_norm": 1.8036061525344849, "learning_rate": 0.0001, "loss": 0.0168, "step": 64220 }, { "epoch": 422.5657894736842, "grad_norm": 1.2087947130203247, "learning_rate": 0.0001, "loss": 0.0158, "step": 64230 }, { "epoch": 422.63157894736844, "grad_norm": 1.474379539489746, "learning_rate": 0.0001, "loss": 0.0172, "step": 64240 }, { "epoch": 422.69736842105266, "grad_norm": 1.4560306072235107, "learning_rate": 0.0001, "loss": 0.0137, "step": 64250 }, { "epoch": 422.7631578947368, "grad_norm": 1.2156238555908203, "learning_rate": 0.0001, "loss": 0.0142, "step": 64260 }, { "epoch": 422.82894736842104, "grad_norm": 1.808613657951355, "learning_rate": 0.0001, "loss": 0.0198, "step": 64270 }, { "epoch": 422.89473684210526, "grad_norm": 1.8355381488800049, "learning_rate": 0.0001, "loss": 0.0149, "step": 64280 }, { "epoch": 422.9605263157895, "grad_norm": 1.1951372623443604, "learning_rate": 0.0001, "loss": 0.0177, "step": 64290 }, { "epoch": 423.0263157894737, "grad_norm": 1.2463740110397339, "learning_rate": 0.0001, "loss": 0.0143, "step": 64300 }, { "epoch": 423.0921052631579, "grad_norm": 1.3471531867980957, "learning_rate": 0.0001, "loss": 0.0154, "step": 64310 }, { "epoch": 423.1578947368421, "grad_norm": 1.5846507549285889, "learning_rate": 0.0001, "loss": 0.0147, "step": 64320 }, { "epoch": 423.2236842105263, "grad_norm": 1.3310906887054443, "learning_rate": 0.0001, "loss": 0.0146, "step": 64330 }, { "epoch": 423.2894736842105, "grad_norm": 1.593396544456482, "learning_rate": 0.0001, "loss": 0.0225, "step": 64340 }, { "epoch": 423.35526315789474, "grad_norm": 1.4776394367218018, "learning_rate": 0.0001, "loss": 0.0189, "step": 64350 }, { "epoch": 423.42105263157896, "grad_norm": 1.2386908531188965, "learning_rate": 0.0001, "loss": 0.0174, "step": 64360 }, { "epoch": 423.4868421052632, "grad_norm": 1.6903738975524902, "learning_rate": 0.0001, "loss": 0.0191, "step": 64370 }, { "epoch": 423.55263157894734, "grad_norm": 1.6692569255828857, "learning_rate": 0.0001, "loss": 0.0151, "step": 64380 }, { "epoch": 423.61842105263156, "grad_norm": 1.7691304683685303, "learning_rate": 0.0001, "loss": 0.0142, "step": 64390 }, { "epoch": 423.6842105263158, "grad_norm": 1.7677404880523682, "learning_rate": 0.0001, "loss": 0.0166, "step": 64400 }, { "epoch": 423.75, "grad_norm": 1.765934944152832, "learning_rate": 0.0001, "loss": 0.0154, "step": 64410 }, { "epoch": 423.8157894736842, "grad_norm": 1.2203518152236938, "learning_rate": 0.0001, "loss": 0.014, "step": 64420 }, { "epoch": 423.88157894736844, "grad_norm": 1.197089672088623, "learning_rate": 0.0001, "loss": 0.0142, "step": 64430 }, { "epoch": 423.94736842105266, "grad_norm": 1.3283225297927856, "learning_rate": 0.0001, "loss": 0.018, "step": 64440 }, { "epoch": 424.0131578947368, "grad_norm": 1.435594081878662, "learning_rate": 0.0001, "loss": 0.019, "step": 64450 }, { "epoch": 424.07894736842104, "grad_norm": 1.5702917575836182, "learning_rate": 0.0001, "loss": 0.0173, "step": 64460 }, { "epoch": 424.14473684210526, "grad_norm": 1.454546570777893, "learning_rate": 0.0001, "loss": 0.0139, "step": 64470 }, { "epoch": 424.2105263157895, "grad_norm": 1.7806897163391113, "learning_rate": 0.0001, "loss": 0.0178, "step": 64480 }, { "epoch": 424.2763157894737, "grad_norm": 1.7681492567062378, "learning_rate": 0.0001, "loss": 0.0187, "step": 64490 }, { "epoch": 424.3421052631579, "grad_norm": 1.246836543083191, "learning_rate": 0.0001, "loss": 0.0181, "step": 64500 }, { "epoch": 424.4078947368421, "grad_norm": 1.400008201599121, "learning_rate": 0.0001, "loss": 0.0156, "step": 64510 }, { "epoch": 424.4736842105263, "grad_norm": 1.3594013452529907, "learning_rate": 0.0001, "loss": 0.015, "step": 64520 }, { "epoch": 424.5394736842105, "grad_norm": 1.5883020162582397, "learning_rate": 0.0001, "loss": 0.0172, "step": 64530 }, { "epoch": 424.60526315789474, "grad_norm": 1.6715621948242188, "learning_rate": 0.0001, "loss": 0.0194, "step": 64540 }, { "epoch": 424.67105263157896, "grad_norm": 1.6885555982589722, "learning_rate": 0.0001, "loss": 0.0188, "step": 64550 }, { "epoch": 424.7368421052632, "grad_norm": 1.7978986501693726, "learning_rate": 0.0001, "loss": 0.0137, "step": 64560 }, { "epoch": 424.80263157894734, "grad_norm": 1.9899424314498901, "learning_rate": 0.0001, "loss": 0.0131, "step": 64570 }, { "epoch": 424.86842105263156, "grad_norm": 1.4670534133911133, "learning_rate": 0.0001, "loss": 0.0184, "step": 64580 }, { "epoch": 424.9342105263158, "grad_norm": 1.5755852460861206, "learning_rate": 0.0001, "loss": 0.0151, "step": 64590 }, { "epoch": 425.0, "grad_norm": 1.4673627614974976, "learning_rate": 0.0001, "loss": 0.0157, "step": 64600 }, { "epoch": 425.0657894736842, "grad_norm": 1.3896477222442627, "learning_rate": 0.0001, "loss": 0.0145, "step": 64610 }, { "epoch": 425.13157894736844, "grad_norm": 1.398645043373108, "learning_rate": 0.0001, "loss": 0.0138, "step": 64620 }, { "epoch": 425.19736842105266, "grad_norm": 1.0140247344970703, "learning_rate": 0.0001, "loss": 0.019, "step": 64630 }, { "epoch": 425.2631578947368, "grad_norm": 1.3572274446487427, "learning_rate": 0.0001, "loss": 0.0164, "step": 64640 }, { "epoch": 425.32894736842104, "grad_norm": 1.7875494956970215, "learning_rate": 0.0001, "loss": 0.016, "step": 64650 }, { "epoch": 425.39473684210526, "grad_norm": 1.6056700944900513, "learning_rate": 0.0001, "loss": 0.0197, "step": 64660 }, { "epoch": 425.4605263157895, "grad_norm": 1.3455713987350464, "learning_rate": 0.0001, "loss": 0.014, "step": 64670 }, { "epoch": 425.5263157894737, "grad_norm": 2.0016653537750244, "learning_rate": 0.0001, "loss": 0.0189, "step": 64680 }, { "epoch": 425.5921052631579, "grad_norm": 1.534142017364502, "learning_rate": 0.0001, "loss": 0.015, "step": 64690 }, { "epoch": 425.6578947368421, "grad_norm": 1.3810735940933228, "learning_rate": 0.0001, "loss": 0.0157, "step": 64700 }, { "epoch": 425.7236842105263, "grad_norm": 1.7681186199188232, "learning_rate": 0.0001, "loss": 0.0189, "step": 64710 }, { "epoch": 425.7894736842105, "grad_norm": 1.6734064817428589, "learning_rate": 0.0001, "loss": 0.0145, "step": 64720 }, { "epoch": 425.85526315789474, "grad_norm": 1.1050207614898682, "learning_rate": 0.0001, "loss": 0.0144, "step": 64730 }, { "epoch": 425.92105263157896, "grad_norm": 1.344814658164978, "learning_rate": 0.0001, "loss": 0.0209, "step": 64740 }, { "epoch": 425.9868421052632, "grad_norm": 1.3301384449005127, "learning_rate": 0.0001, "loss": 0.0141, "step": 64750 }, { "epoch": 426.05263157894734, "grad_norm": 1.2036571502685547, "learning_rate": 0.0001, "loss": 0.0152, "step": 64760 }, { "epoch": 426.11842105263156, "grad_norm": 1.2922011613845825, "learning_rate": 0.0001, "loss": 0.0173, "step": 64770 }, { "epoch": 426.1842105263158, "grad_norm": 1.3609540462493896, "learning_rate": 0.0001, "loss": 0.0155, "step": 64780 }, { "epoch": 426.25, "grad_norm": 1.389357089996338, "learning_rate": 0.0001, "loss": 0.0186, "step": 64790 }, { "epoch": 426.3157894736842, "grad_norm": 1.6004573106765747, "learning_rate": 0.0001, "loss": 0.0144, "step": 64800 }, { "epoch": 426.38157894736844, "grad_norm": 1.6282579898834229, "learning_rate": 0.0001, "loss": 0.0144, "step": 64810 }, { "epoch": 426.44736842105266, "grad_norm": 1.549273133277893, "learning_rate": 0.0001, "loss": 0.0154, "step": 64820 }, { "epoch": 426.5131578947368, "grad_norm": 1.6747955083847046, "learning_rate": 0.0001, "loss": 0.0168, "step": 64830 }, { "epoch": 426.57894736842104, "grad_norm": 2.0185251235961914, "learning_rate": 0.0001, "loss": 0.0135, "step": 64840 }, { "epoch": 426.64473684210526, "grad_norm": 1.6905179023742676, "learning_rate": 0.0001, "loss": 0.0162, "step": 64850 }, { "epoch": 426.7105263157895, "grad_norm": 1.4801172018051147, "learning_rate": 0.0001, "loss": 0.0204, "step": 64860 }, { "epoch": 426.7763157894737, "grad_norm": 1.0608218908309937, "learning_rate": 0.0001, "loss": 0.0147, "step": 64870 }, { "epoch": 426.8421052631579, "grad_norm": 1.4725103378295898, "learning_rate": 0.0001, "loss": 0.021, "step": 64880 }, { "epoch": 426.9078947368421, "grad_norm": 1.0222264528274536, "learning_rate": 0.0001, "loss": 0.0202, "step": 64890 }, { "epoch": 426.9736842105263, "grad_norm": 1.4334882497787476, "learning_rate": 0.0001, "loss": 0.0143, "step": 64900 }, { "epoch": 427.0394736842105, "grad_norm": 1.2743608951568604, "learning_rate": 0.0001, "loss": 0.016, "step": 64910 }, { "epoch": 427.10526315789474, "grad_norm": 1.2196449041366577, "learning_rate": 0.0001, "loss": 0.0151, "step": 64920 }, { "epoch": 427.17105263157896, "grad_norm": 1.4641720056533813, "learning_rate": 0.0001, "loss": 0.0161, "step": 64930 }, { "epoch": 427.2368421052632, "grad_norm": 1.3911988735198975, "learning_rate": 0.0001, "loss": 0.0172, "step": 64940 }, { "epoch": 427.30263157894734, "grad_norm": 1.537190556526184, "learning_rate": 0.0001, "loss": 0.0151, "step": 64950 }, { "epoch": 427.36842105263156, "grad_norm": 1.2917835712432861, "learning_rate": 0.0001, "loss": 0.0158, "step": 64960 }, { "epoch": 427.4342105263158, "grad_norm": 1.6999305486679077, "learning_rate": 0.0001, "loss": 0.0194, "step": 64970 }, { "epoch": 427.5, "grad_norm": 1.5439519882202148, "learning_rate": 0.0001, "loss": 0.0162, "step": 64980 }, { "epoch": 427.5657894736842, "grad_norm": 1.658549189567566, "learning_rate": 0.0001, "loss": 0.0163, "step": 64990 }, { "epoch": 427.63157894736844, "grad_norm": 1.871266484260559, "learning_rate": 0.0001, "loss": 0.0197, "step": 65000 }, { "epoch": 427.69736842105266, "grad_norm": 1.4750127792358398, "learning_rate": 0.0001, "loss": 0.0167, "step": 65010 }, { "epoch": 427.7631578947368, "grad_norm": 1.5806095600128174, "learning_rate": 0.0001, "loss": 0.0139, "step": 65020 }, { "epoch": 427.82894736842104, "grad_norm": 1.3511626720428467, "learning_rate": 0.0001, "loss": 0.0177, "step": 65030 }, { "epoch": 427.89473684210526, "grad_norm": 1.7514052391052246, "learning_rate": 0.0001, "loss": 0.0154, "step": 65040 }, { "epoch": 427.9605263157895, "grad_norm": 1.4906256198883057, "learning_rate": 0.0001, "loss": 0.0161, "step": 65050 }, { "epoch": 428.0263157894737, "grad_norm": 1.6505775451660156, "learning_rate": 0.0001, "loss": 0.0179, "step": 65060 }, { "epoch": 428.0921052631579, "grad_norm": 1.4694875478744507, "learning_rate": 0.0001, "loss": 0.0174, "step": 65070 }, { "epoch": 428.1578947368421, "grad_norm": 1.804674506187439, "learning_rate": 0.0001, "loss": 0.0181, "step": 65080 }, { "epoch": 428.2236842105263, "grad_norm": 1.313333511352539, "learning_rate": 0.0001, "loss": 0.0156, "step": 65090 }, { "epoch": 428.2894736842105, "grad_norm": 2.211235761642456, "learning_rate": 0.0001, "loss": 0.0174, "step": 65100 }, { "epoch": 428.35526315789474, "grad_norm": 1.7940059900283813, "learning_rate": 0.0001, "loss": 0.0153, "step": 65110 }, { "epoch": 428.42105263157896, "grad_norm": 1.9414535760879517, "learning_rate": 0.0001, "loss": 0.014, "step": 65120 }, { "epoch": 428.4868421052632, "grad_norm": 1.0843182802200317, "learning_rate": 0.0001, "loss": 0.0179, "step": 65130 }, { "epoch": 428.55263157894734, "grad_norm": 1.606815218925476, "learning_rate": 0.0001, "loss": 0.0166, "step": 65140 }, { "epoch": 428.61842105263156, "grad_norm": 2.1725103855133057, "learning_rate": 0.0001, "loss": 0.016, "step": 65150 }, { "epoch": 428.6842105263158, "grad_norm": 1.553497552871704, "learning_rate": 0.0001, "loss": 0.0185, "step": 65160 }, { "epoch": 428.75, "grad_norm": 1.926210641860962, "learning_rate": 0.0001, "loss": 0.0132, "step": 65170 }, { "epoch": 428.8157894736842, "grad_norm": 1.8815821409225464, "learning_rate": 0.0001, "loss": 0.0144, "step": 65180 }, { "epoch": 428.88157894736844, "grad_norm": 1.9079499244689941, "learning_rate": 0.0001, "loss": 0.0117, "step": 65190 }, { "epoch": 428.94736842105266, "grad_norm": 1.4385358095169067, "learning_rate": 0.0001, "loss": 0.0144, "step": 65200 }, { "epoch": 429.0131578947368, "grad_norm": 1.5687540769577026, "learning_rate": 0.0001, "loss": 0.016, "step": 65210 }, { "epoch": 429.07894736842104, "grad_norm": 1.6752949953079224, "learning_rate": 0.0001, "loss": 0.0171, "step": 65220 }, { "epoch": 429.14473684210526, "grad_norm": 1.0942200422286987, "learning_rate": 0.0001, "loss": 0.014, "step": 65230 }, { "epoch": 429.2105263157895, "grad_norm": 1.5730403661727905, "learning_rate": 0.0001, "loss": 0.0147, "step": 65240 }, { "epoch": 429.2763157894737, "grad_norm": 1.6450098752975464, "learning_rate": 0.0001, "loss": 0.0179, "step": 65250 }, { "epoch": 429.3421052631579, "grad_norm": 1.530624270439148, "learning_rate": 0.0001, "loss": 0.0179, "step": 65260 }, { "epoch": 429.4078947368421, "grad_norm": 1.5592809915542603, "learning_rate": 0.0001, "loss": 0.0172, "step": 65270 }, { "epoch": 429.4736842105263, "grad_norm": 2.033545970916748, "learning_rate": 0.0001, "loss": 0.0151, "step": 65280 }, { "epoch": 429.5394736842105, "grad_norm": 1.7441858053207397, "learning_rate": 0.0001, "loss": 0.0157, "step": 65290 }, { "epoch": 429.60526315789474, "grad_norm": 1.7947709560394287, "learning_rate": 0.0001, "loss": 0.0149, "step": 65300 }, { "epoch": 429.67105263157896, "grad_norm": 1.6988414525985718, "learning_rate": 0.0001, "loss": 0.0169, "step": 65310 }, { "epoch": 429.7368421052632, "grad_norm": 1.4352039098739624, "learning_rate": 0.0001, "loss": 0.0129, "step": 65320 }, { "epoch": 429.80263157894734, "grad_norm": 1.307625651359558, "learning_rate": 0.0001, "loss": 0.0163, "step": 65330 }, { "epoch": 429.86842105263156, "grad_norm": 1.0806955099105835, "learning_rate": 0.0001, "loss": 0.0162, "step": 65340 }, { "epoch": 429.9342105263158, "grad_norm": 1.5672444105148315, "learning_rate": 0.0001, "loss": 0.0185, "step": 65350 }, { "epoch": 430.0, "grad_norm": 1.5125333070755005, "learning_rate": 0.0001, "loss": 0.0154, "step": 65360 }, { "epoch": 430.0657894736842, "grad_norm": 1.8699471950531006, "learning_rate": 0.0001, "loss": 0.0148, "step": 65370 }, { "epoch": 430.13157894736844, "grad_norm": 1.8611268997192383, "learning_rate": 0.0001, "loss": 0.0174, "step": 65380 }, { "epoch": 430.19736842105266, "grad_norm": 1.5545696020126343, "learning_rate": 0.0001, "loss": 0.0149, "step": 65390 }, { "epoch": 430.2631578947368, "grad_norm": 1.7504551410675049, "learning_rate": 0.0001, "loss": 0.0147, "step": 65400 }, { "epoch": 430.32894736842104, "grad_norm": 1.4761931896209717, "learning_rate": 0.0001, "loss": 0.0124, "step": 65410 }, { "epoch": 430.39473684210526, "grad_norm": 1.6099004745483398, "learning_rate": 0.0001, "loss": 0.0198, "step": 65420 }, { "epoch": 430.4605263157895, "grad_norm": 1.7601474523544312, "learning_rate": 0.0001, "loss": 0.0146, "step": 65430 }, { "epoch": 430.5263157894737, "grad_norm": 2.0692808628082275, "learning_rate": 0.0001, "loss": 0.0182, "step": 65440 }, { "epoch": 430.5921052631579, "grad_norm": 1.9419091939926147, "learning_rate": 0.0001, "loss": 0.0158, "step": 65450 }, { "epoch": 430.6578947368421, "grad_norm": 2.00838041305542, "learning_rate": 0.0001, "loss": 0.0139, "step": 65460 }, { "epoch": 430.7236842105263, "grad_norm": 1.6085525751113892, "learning_rate": 0.0001, "loss": 0.0137, "step": 65470 }, { "epoch": 430.7894736842105, "grad_norm": 1.8091028928756714, "learning_rate": 0.0001, "loss": 0.0173, "step": 65480 }, { "epoch": 430.85526315789474, "grad_norm": 1.746740460395813, "learning_rate": 0.0001, "loss": 0.0144, "step": 65490 }, { "epoch": 430.92105263157896, "grad_norm": 1.4116277694702148, "learning_rate": 0.0001, "loss": 0.0196, "step": 65500 }, { "epoch": 430.9868421052632, "grad_norm": 1.7578072547912598, "learning_rate": 0.0001, "loss": 0.018, "step": 65510 }, { "epoch": 431.05263157894734, "grad_norm": 1.8480395078659058, "learning_rate": 0.0001, "loss": 0.0164, "step": 65520 }, { "epoch": 431.11842105263156, "grad_norm": 1.5719242095947266, "learning_rate": 0.0001, "loss": 0.0157, "step": 65530 }, { "epoch": 431.1842105263158, "grad_norm": 1.4312275648117065, "learning_rate": 0.0001, "loss": 0.0142, "step": 65540 }, { "epoch": 431.25, "grad_norm": 1.6855942010879517, "learning_rate": 0.0001, "loss": 0.0159, "step": 65550 }, { "epoch": 431.3157894736842, "grad_norm": 1.4500163793563843, "learning_rate": 0.0001, "loss": 0.0145, "step": 65560 }, { "epoch": 431.38157894736844, "grad_norm": 1.3627002239227295, "learning_rate": 0.0001, "loss": 0.019, "step": 65570 }, { "epoch": 431.44736842105266, "grad_norm": 1.5152374505996704, "learning_rate": 0.0001, "loss": 0.0165, "step": 65580 }, { "epoch": 431.5131578947368, "grad_norm": 1.727562427520752, "learning_rate": 0.0001, "loss": 0.0128, "step": 65590 }, { "epoch": 431.57894736842104, "grad_norm": 1.2641851902008057, "learning_rate": 0.0001, "loss": 0.0168, "step": 65600 }, { "epoch": 431.64473684210526, "grad_norm": 1.9601101875305176, "learning_rate": 0.0001, "loss": 0.0174, "step": 65610 }, { "epoch": 431.7105263157895, "grad_norm": 1.6302005052566528, "learning_rate": 0.0001, "loss": 0.013, "step": 65620 }, { "epoch": 431.7763157894737, "grad_norm": 1.8203566074371338, "learning_rate": 0.0001, "loss": 0.0183, "step": 65630 }, { "epoch": 431.8421052631579, "grad_norm": 1.9533419609069824, "learning_rate": 0.0001, "loss": 0.0172, "step": 65640 }, { "epoch": 431.9078947368421, "grad_norm": 2.0709121227264404, "learning_rate": 0.0001, "loss": 0.0179, "step": 65650 }, { "epoch": 431.9736842105263, "grad_norm": 1.6884535551071167, "learning_rate": 0.0001, "loss": 0.0142, "step": 65660 }, { "epoch": 432.0394736842105, "grad_norm": 1.6033779382705688, "learning_rate": 0.0001, "loss": 0.0145, "step": 65670 }, { "epoch": 432.10526315789474, "grad_norm": 1.6959598064422607, "learning_rate": 0.0001, "loss": 0.0153, "step": 65680 }, { "epoch": 432.17105263157896, "grad_norm": 1.8398752212524414, "learning_rate": 0.0001, "loss": 0.0127, "step": 65690 }, { "epoch": 432.2368421052632, "grad_norm": 2.3185994625091553, "learning_rate": 0.0001, "loss": 0.0163, "step": 65700 }, { "epoch": 432.30263157894734, "grad_norm": 2.441807985305786, "learning_rate": 0.0001, "loss": 0.0156, "step": 65710 }, { "epoch": 432.36842105263156, "grad_norm": 1.5897974967956543, "learning_rate": 0.0001, "loss": 0.0166, "step": 65720 }, { "epoch": 432.4342105263158, "grad_norm": 1.8068225383758545, "learning_rate": 0.0001, "loss": 0.0198, "step": 65730 }, { "epoch": 432.5, "grad_norm": 1.6991547346115112, "learning_rate": 0.0001, "loss": 0.0171, "step": 65740 }, { "epoch": 432.5657894736842, "grad_norm": 1.818811058998108, "learning_rate": 0.0001, "loss": 0.014, "step": 65750 }, { "epoch": 432.63157894736844, "grad_norm": 1.7648192644119263, "learning_rate": 0.0001, "loss": 0.014, "step": 65760 }, { "epoch": 432.69736842105266, "grad_norm": 1.426168441772461, "learning_rate": 0.0001, "loss": 0.0153, "step": 65770 }, { "epoch": 432.7631578947368, "grad_norm": 1.5382457971572876, "learning_rate": 0.0001, "loss": 0.015, "step": 65780 }, { "epoch": 432.82894736842104, "grad_norm": 1.8785645961761475, "learning_rate": 0.0001, "loss": 0.0121, "step": 65790 }, { "epoch": 432.89473684210526, "grad_norm": 1.8295351266860962, "learning_rate": 0.0001, "loss": 0.0153, "step": 65800 }, { "epoch": 432.9605263157895, "grad_norm": 1.9943040609359741, "learning_rate": 0.0001, "loss": 0.02, "step": 65810 }, { "epoch": 433.0263157894737, "grad_norm": 1.4237312078475952, "learning_rate": 0.0001, "loss": 0.0146, "step": 65820 }, { "epoch": 433.0921052631579, "grad_norm": 1.5380693674087524, "learning_rate": 0.0001, "loss": 0.0142, "step": 65830 }, { "epoch": 433.1578947368421, "grad_norm": 1.6690596342086792, "learning_rate": 0.0001, "loss": 0.0138, "step": 65840 }, { "epoch": 433.2236842105263, "grad_norm": 1.628563404083252, "learning_rate": 0.0001, "loss": 0.0157, "step": 65850 }, { "epoch": 433.2894736842105, "grad_norm": 1.521632194519043, "learning_rate": 0.0001, "loss": 0.0166, "step": 65860 }, { "epoch": 433.35526315789474, "grad_norm": 1.5600249767303467, "learning_rate": 0.0001, "loss": 0.014, "step": 65870 }, { "epoch": 433.42105263157896, "grad_norm": 1.7062968015670776, "learning_rate": 0.0001, "loss": 0.0147, "step": 65880 }, { "epoch": 433.4868421052632, "grad_norm": 1.845317006111145, "learning_rate": 0.0001, "loss": 0.0168, "step": 65890 }, { "epoch": 433.55263157894734, "grad_norm": 1.6420001983642578, "learning_rate": 0.0001, "loss": 0.0147, "step": 65900 }, { "epoch": 433.61842105263156, "grad_norm": 1.5226026773452759, "learning_rate": 0.0001, "loss": 0.0161, "step": 65910 }, { "epoch": 433.6842105263158, "grad_norm": 1.8679405450820923, "learning_rate": 0.0001, "loss": 0.0197, "step": 65920 }, { "epoch": 433.75, "grad_norm": 1.6490278244018555, "learning_rate": 0.0001, "loss": 0.0183, "step": 65930 }, { "epoch": 433.8157894736842, "grad_norm": 1.6731834411621094, "learning_rate": 0.0001, "loss": 0.0153, "step": 65940 }, { "epoch": 433.88157894736844, "grad_norm": 1.7430604696273804, "learning_rate": 0.0001, "loss": 0.0127, "step": 65950 }, { "epoch": 433.94736842105266, "grad_norm": 1.4350908994674683, "learning_rate": 0.0001, "loss": 0.0163, "step": 65960 }, { "epoch": 434.0131578947368, "grad_norm": 1.4795188903808594, "learning_rate": 0.0001, "loss": 0.0187, "step": 65970 }, { "epoch": 434.07894736842104, "grad_norm": 1.4325557947158813, "learning_rate": 0.0001, "loss": 0.017, "step": 65980 }, { "epoch": 434.14473684210526, "grad_norm": 1.9462008476257324, "learning_rate": 0.0001, "loss": 0.015, "step": 65990 }, { "epoch": 434.2105263157895, "grad_norm": 1.6799287796020508, "learning_rate": 0.0001, "loss": 0.0173, "step": 66000 }, { "epoch": 434.2763157894737, "grad_norm": 1.4300123453140259, "learning_rate": 0.0001, "loss": 0.0159, "step": 66010 }, { "epoch": 434.3421052631579, "grad_norm": 1.710886836051941, "learning_rate": 0.0001, "loss": 0.015, "step": 66020 }, { "epoch": 434.4078947368421, "grad_norm": 1.5077048540115356, "learning_rate": 0.0001, "loss": 0.0155, "step": 66030 }, { "epoch": 434.4736842105263, "grad_norm": 1.3419674634933472, "learning_rate": 0.0001, "loss": 0.0166, "step": 66040 }, { "epoch": 434.5394736842105, "grad_norm": 1.4665526151657104, "learning_rate": 0.0001, "loss": 0.0148, "step": 66050 }, { "epoch": 434.60526315789474, "grad_norm": 1.766516089439392, "learning_rate": 0.0001, "loss": 0.0149, "step": 66060 }, { "epoch": 434.67105263157896, "grad_norm": 1.357021450996399, "learning_rate": 0.0001, "loss": 0.0149, "step": 66070 }, { "epoch": 434.7368421052632, "grad_norm": 1.6754416227340698, "learning_rate": 0.0001, "loss": 0.016, "step": 66080 }, { "epoch": 434.80263157894734, "grad_norm": 1.7080883979797363, "learning_rate": 0.0001, "loss": 0.0181, "step": 66090 }, { "epoch": 434.86842105263156, "grad_norm": 1.534162163734436, "learning_rate": 0.0001, "loss": 0.0181, "step": 66100 }, { "epoch": 434.9342105263158, "grad_norm": 1.3521064519882202, "learning_rate": 0.0001, "loss": 0.0147, "step": 66110 }, { "epoch": 435.0, "grad_norm": 1.1515756845474243, "learning_rate": 0.0001, "loss": 0.0172, "step": 66120 }, { "epoch": 435.0657894736842, "grad_norm": 1.5999515056610107, "learning_rate": 0.0001, "loss": 0.0184, "step": 66130 }, { "epoch": 435.13157894736844, "grad_norm": 1.4286339282989502, "learning_rate": 0.0001, "loss": 0.0205, "step": 66140 }, { "epoch": 435.19736842105266, "grad_norm": 1.6917093992233276, "learning_rate": 0.0001, "loss": 0.014, "step": 66150 }, { "epoch": 435.2631578947368, "grad_norm": 1.3654884099960327, "learning_rate": 0.0001, "loss": 0.0175, "step": 66160 }, { "epoch": 435.32894736842104, "grad_norm": 1.607859492301941, "learning_rate": 0.0001, "loss": 0.0163, "step": 66170 }, { "epoch": 435.39473684210526, "grad_norm": 2.299489974975586, "learning_rate": 0.0001, "loss": 0.0188, "step": 66180 }, { "epoch": 435.4605263157895, "grad_norm": 1.443752408027649, "learning_rate": 0.0001, "loss": 0.0156, "step": 66190 }, { "epoch": 435.5263157894737, "grad_norm": 1.3764832019805908, "learning_rate": 0.0001, "loss": 0.0148, "step": 66200 }, { "epoch": 435.5921052631579, "grad_norm": 1.1112139225006104, "learning_rate": 0.0001, "loss": 0.0145, "step": 66210 }, { "epoch": 435.6578947368421, "grad_norm": 1.1696738004684448, "learning_rate": 0.0001, "loss": 0.0144, "step": 66220 }, { "epoch": 435.7236842105263, "grad_norm": 1.1646664142608643, "learning_rate": 0.0001, "loss": 0.0152, "step": 66230 }, { "epoch": 435.7894736842105, "grad_norm": 1.264693021774292, "learning_rate": 0.0001, "loss": 0.015, "step": 66240 }, { "epoch": 435.85526315789474, "grad_norm": 1.1505508422851562, "learning_rate": 0.0001, "loss": 0.0134, "step": 66250 }, { "epoch": 435.92105263157896, "grad_norm": 1.2841137647628784, "learning_rate": 0.0001, "loss": 0.0148, "step": 66260 }, { "epoch": 435.9868421052632, "grad_norm": 1.361230731010437, "learning_rate": 0.0001, "loss": 0.017, "step": 66270 }, { "epoch": 436.05263157894734, "grad_norm": 1.2665225267410278, "learning_rate": 0.0001, "loss": 0.0175, "step": 66280 }, { "epoch": 436.11842105263156, "grad_norm": 1.6301605701446533, "learning_rate": 0.0001, "loss": 0.0155, "step": 66290 }, { "epoch": 436.1842105263158, "grad_norm": 1.4133784770965576, "learning_rate": 0.0001, "loss": 0.0171, "step": 66300 }, { "epoch": 436.25, "grad_norm": 1.3436528444290161, "learning_rate": 0.0001, "loss": 0.0161, "step": 66310 }, { "epoch": 436.3157894736842, "grad_norm": 1.4818278551101685, "learning_rate": 0.0001, "loss": 0.0171, "step": 66320 }, { "epoch": 436.38157894736844, "grad_norm": 1.3692187070846558, "learning_rate": 0.0001, "loss": 0.017, "step": 66330 }, { "epoch": 436.44736842105266, "grad_norm": 1.459871530532837, "learning_rate": 0.0001, "loss": 0.0152, "step": 66340 }, { "epoch": 436.5131578947368, "grad_norm": 1.4473750591278076, "learning_rate": 0.0001, "loss": 0.0193, "step": 66350 }, { "epoch": 436.57894736842104, "grad_norm": 1.4416097402572632, "learning_rate": 0.0001, "loss": 0.0141, "step": 66360 }, { "epoch": 436.64473684210526, "grad_norm": 1.9907071590423584, "learning_rate": 0.0001, "loss": 0.0161, "step": 66370 }, { "epoch": 436.7105263157895, "grad_norm": 1.5464571714401245, "learning_rate": 0.0001, "loss": 0.0159, "step": 66380 }, { "epoch": 436.7763157894737, "grad_norm": 1.7256735563278198, "learning_rate": 0.0001, "loss": 0.0159, "step": 66390 }, { "epoch": 436.8421052631579, "grad_norm": 1.0477362871170044, "learning_rate": 0.0001, "loss": 0.0176, "step": 66400 }, { "epoch": 436.9078947368421, "grad_norm": 1.8013197183609009, "learning_rate": 0.0001, "loss": 0.0165, "step": 66410 }, { "epoch": 436.9736842105263, "grad_norm": 1.8752796649932861, "learning_rate": 0.0001, "loss": 0.0211, "step": 66420 }, { "epoch": 437.0394736842105, "grad_norm": 1.2635549306869507, "learning_rate": 0.0001, "loss": 0.0192, "step": 66430 }, { "epoch": 437.10526315789474, "grad_norm": 1.2891181707382202, "learning_rate": 0.0001, "loss": 0.0124, "step": 66440 }, { "epoch": 437.17105263157896, "grad_norm": 1.6430362462997437, "learning_rate": 0.0001, "loss": 0.0152, "step": 66450 }, { "epoch": 437.2368421052632, "grad_norm": 1.5804061889648438, "learning_rate": 0.0001, "loss": 0.0156, "step": 66460 }, { "epoch": 437.30263157894734, "grad_norm": 1.690047025680542, "learning_rate": 0.0001, "loss": 0.0151, "step": 66470 }, { "epoch": 437.36842105263156, "grad_norm": 1.6171338558197021, "learning_rate": 0.0001, "loss": 0.0183, "step": 66480 }, { "epoch": 437.4342105263158, "grad_norm": 1.4990118741989136, "learning_rate": 0.0001, "loss": 0.0175, "step": 66490 }, { "epoch": 437.5, "grad_norm": 1.620448112487793, "learning_rate": 0.0001, "loss": 0.0154, "step": 66500 }, { "epoch": 437.5657894736842, "grad_norm": 1.2104620933532715, "learning_rate": 0.0001, "loss": 0.017, "step": 66510 }, { "epoch": 437.63157894736844, "grad_norm": 1.381149411201477, "learning_rate": 0.0001, "loss": 0.0189, "step": 66520 }, { "epoch": 437.69736842105266, "grad_norm": 1.6095219850540161, "learning_rate": 0.0001, "loss": 0.0159, "step": 66530 }, { "epoch": 437.7631578947368, "grad_norm": 1.8365843296051025, "learning_rate": 0.0001, "loss": 0.0149, "step": 66540 }, { "epoch": 437.82894736842104, "grad_norm": 1.6798007488250732, "learning_rate": 0.0001, "loss": 0.0143, "step": 66550 }, { "epoch": 437.89473684210526, "grad_norm": 2.0232887268066406, "learning_rate": 0.0001, "loss": 0.0165, "step": 66560 }, { "epoch": 437.9605263157895, "grad_norm": 1.6396117210388184, "learning_rate": 0.0001, "loss": 0.0121, "step": 66570 }, { "epoch": 438.0263157894737, "grad_norm": 1.4945775270462036, "learning_rate": 0.0001, "loss": 0.0184, "step": 66580 }, { "epoch": 438.0921052631579, "grad_norm": 1.5572675466537476, "learning_rate": 0.0001, "loss": 0.017, "step": 66590 }, { "epoch": 438.1578947368421, "grad_norm": 1.4689875841140747, "learning_rate": 0.0001, "loss": 0.0173, "step": 66600 }, { "epoch": 438.2236842105263, "grad_norm": 1.9798469543457031, "learning_rate": 0.0001, "loss": 0.0148, "step": 66610 }, { "epoch": 438.2894736842105, "grad_norm": 1.9612364768981934, "learning_rate": 0.0001, "loss": 0.0167, "step": 66620 }, { "epoch": 438.35526315789474, "grad_norm": 1.885369896888733, "learning_rate": 0.0001, "loss": 0.0159, "step": 66630 }, { "epoch": 438.42105263157896, "grad_norm": 1.9020441770553589, "learning_rate": 0.0001, "loss": 0.0192, "step": 66640 }, { "epoch": 438.4868421052632, "grad_norm": 1.5715254545211792, "learning_rate": 0.0001, "loss": 0.0213, "step": 66650 }, { "epoch": 438.55263157894734, "grad_norm": 1.5852208137512207, "learning_rate": 0.0001, "loss": 0.0146, "step": 66660 }, { "epoch": 438.61842105263156, "grad_norm": 1.188326120376587, "learning_rate": 0.0001, "loss": 0.016, "step": 66670 }, { "epoch": 438.6842105263158, "grad_norm": 1.3778369426727295, "learning_rate": 0.0001, "loss": 0.0145, "step": 66680 }, { "epoch": 438.75, "grad_norm": 1.5934265851974487, "learning_rate": 0.0001, "loss": 0.0122, "step": 66690 }, { "epoch": 438.8157894736842, "grad_norm": 1.5935447216033936, "learning_rate": 0.0001, "loss": 0.014, "step": 66700 }, { "epoch": 438.88157894736844, "grad_norm": 1.5707346200942993, "learning_rate": 0.0001, "loss": 0.016, "step": 66710 }, { "epoch": 438.94736842105266, "grad_norm": 1.4899011850357056, "learning_rate": 0.0001, "loss": 0.0154, "step": 66720 }, { "epoch": 439.0131578947368, "grad_norm": 1.4844484329223633, "learning_rate": 0.0001, "loss": 0.0153, "step": 66730 }, { "epoch": 439.07894736842104, "grad_norm": 1.7988895177841187, "learning_rate": 0.0001, "loss": 0.0142, "step": 66740 }, { "epoch": 439.14473684210526, "grad_norm": 1.65980863571167, "learning_rate": 0.0001, "loss": 0.0185, "step": 66750 }, { "epoch": 439.2105263157895, "grad_norm": 1.9215619564056396, "learning_rate": 0.0001, "loss": 0.0134, "step": 66760 }, { "epoch": 439.2763157894737, "grad_norm": 1.7377233505249023, "learning_rate": 0.0001, "loss": 0.0147, "step": 66770 }, { "epoch": 439.3421052631579, "grad_norm": 1.3525060415267944, "learning_rate": 0.0001, "loss": 0.022, "step": 66780 }, { "epoch": 439.4078947368421, "grad_norm": 1.844903588294983, "learning_rate": 0.0001, "loss": 0.0191, "step": 66790 }, { "epoch": 439.4736842105263, "grad_norm": 1.3906168937683105, "learning_rate": 0.0001, "loss": 0.015, "step": 66800 }, { "epoch": 439.5394736842105, "grad_norm": 1.594735026359558, "learning_rate": 0.0001, "loss": 0.0174, "step": 66810 }, { "epoch": 439.60526315789474, "grad_norm": 1.2511659860610962, "learning_rate": 0.0001, "loss": 0.0142, "step": 66820 }, { "epoch": 439.67105263157896, "grad_norm": 1.2243714332580566, "learning_rate": 0.0001, "loss": 0.0133, "step": 66830 }, { "epoch": 439.7368421052632, "grad_norm": 1.559815764427185, "learning_rate": 0.0001, "loss": 0.014, "step": 66840 }, { "epoch": 439.80263157894734, "grad_norm": 1.041866660118103, "learning_rate": 0.0001, "loss": 0.0174, "step": 66850 }, { "epoch": 439.86842105263156, "grad_norm": 1.612639307975769, "learning_rate": 0.0001, "loss": 0.0125, "step": 66860 }, { "epoch": 439.9342105263158, "grad_norm": 1.6874133348464966, "learning_rate": 0.0001, "loss": 0.0158, "step": 66870 }, { "epoch": 440.0, "grad_norm": 1.38831627368927, "learning_rate": 0.0001, "loss": 0.0173, "step": 66880 }, { "epoch": 440.0657894736842, "grad_norm": 1.7818228006362915, "learning_rate": 0.0001, "loss": 0.0146, "step": 66890 }, { "epoch": 440.13157894736844, "grad_norm": 1.4446629285812378, "learning_rate": 0.0001, "loss": 0.017, "step": 66900 }, { "epoch": 440.19736842105266, "grad_norm": 1.9599292278289795, "learning_rate": 0.0001, "loss": 0.0157, "step": 66910 }, { "epoch": 440.2631578947368, "grad_norm": 1.8100590705871582, "learning_rate": 0.0001, "loss": 0.0147, "step": 66920 }, { "epoch": 440.32894736842104, "grad_norm": 1.391022801399231, "learning_rate": 0.0001, "loss": 0.0178, "step": 66930 }, { "epoch": 440.39473684210526, "grad_norm": 1.4607990980148315, "learning_rate": 0.0001, "loss": 0.0173, "step": 66940 }, { "epoch": 440.4605263157895, "grad_norm": 1.6694928407669067, "learning_rate": 0.0001, "loss": 0.0171, "step": 66950 }, { "epoch": 440.5263157894737, "grad_norm": 1.474492073059082, "learning_rate": 0.0001, "loss": 0.0163, "step": 66960 }, { "epoch": 440.5921052631579, "grad_norm": 1.43230140209198, "learning_rate": 0.0001, "loss": 0.0163, "step": 66970 }, { "epoch": 440.6578947368421, "grad_norm": 1.173116683959961, "learning_rate": 0.0001, "loss": 0.0176, "step": 66980 }, { "epoch": 440.7236842105263, "grad_norm": 1.4485670328140259, "learning_rate": 0.0001, "loss": 0.0193, "step": 66990 }, { "epoch": 440.7894736842105, "grad_norm": 1.5735095739364624, "learning_rate": 0.0001, "loss": 0.0142, "step": 67000 }, { "epoch": 440.85526315789474, "grad_norm": 1.4150785207748413, "learning_rate": 0.0001, "loss": 0.0163, "step": 67010 }, { "epoch": 440.92105263157896, "grad_norm": 1.0509610176086426, "learning_rate": 0.0001, "loss": 0.0135, "step": 67020 }, { "epoch": 440.9868421052632, "grad_norm": 1.3418278694152832, "learning_rate": 0.0001, "loss": 0.0141, "step": 67030 }, { "epoch": 441.05263157894734, "grad_norm": 1.600019931793213, "learning_rate": 0.0001, "loss": 0.0164, "step": 67040 }, { "epoch": 441.11842105263156, "grad_norm": 1.2452374696731567, "learning_rate": 0.0001, "loss": 0.0206, "step": 67050 }, { "epoch": 441.1842105263158, "grad_norm": 1.9107017517089844, "learning_rate": 0.0001, "loss": 0.0167, "step": 67060 }, { "epoch": 441.25, "grad_norm": 1.5733647346496582, "learning_rate": 0.0001, "loss": 0.0131, "step": 67070 }, { "epoch": 441.3157894736842, "grad_norm": 1.6333765983581543, "learning_rate": 0.0001, "loss": 0.014, "step": 67080 }, { "epoch": 441.38157894736844, "grad_norm": 1.375079870223999, "learning_rate": 0.0001, "loss": 0.0144, "step": 67090 }, { "epoch": 441.44736842105266, "grad_norm": 1.1596455574035645, "learning_rate": 0.0001, "loss": 0.021, "step": 67100 }, { "epoch": 441.5131578947368, "grad_norm": 1.6202712059020996, "learning_rate": 0.0001, "loss": 0.0142, "step": 67110 }, { "epoch": 441.57894736842104, "grad_norm": 1.7057219743728638, "learning_rate": 0.0001, "loss": 0.0177, "step": 67120 }, { "epoch": 441.64473684210526, "grad_norm": 1.4627302885055542, "learning_rate": 0.0001, "loss": 0.018, "step": 67130 }, { "epoch": 441.7105263157895, "grad_norm": 1.276390552520752, "learning_rate": 0.0001, "loss": 0.0161, "step": 67140 }, { "epoch": 441.7763157894737, "grad_norm": 1.6434766054153442, "learning_rate": 0.0001, "loss": 0.0147, "step": 67150 }, { "epoch": 441.8421052631579, "grad_norm": 1.705259084701538, "learning_rate": 0.0001, "loss": 0.0122, "step": 67160 }, { "epoch": 441.9078947368421, "grad_norm": 1.8847898244857788, "learning_rate": 0.0001, "loss": 0.013, "step": 67170 }, { "epoch": 441.9736842105263, "grad_norm": 2.2770981788635254, "learning_rate": 0.0001, "loss": 0.0156, "step": 67180 }, { "epoch": 442.0394736842105, "grad_norm": 1.726339340209961, "learning_rate": 0.0001, "loss": 0.0166, "step": 67190 }, { "epoch": 442.10526315789474, "grad_norm": 1.6961181163787842, "learning_rate": 0.0001, "loss": 0.0153, "step": 67200 }, { "epoch": 442.17105263157896, "grad_norm": 1.3408693075180054, "learning_rate": 0.0001, "loss": 0.0178, "step": 67210 }, { "epoch": 442.2368421052632, "grad_norm": 1.7456254959106445, "learning_rate": 0.0001, "loss": 0.0162, "step": 67220 }, { "epoch": 442.30263157894734, "grad_norm": 2.027292251586914, "learning_rate": 0.0001, "loss": 0.0158, "step": 67230 }, { "epoch": 442.36842105263156, "grad_norm": 1.4629685878753662, "learning_rate": 0.0001, "loss": 0.0165, "step": 67240 }, { "epoch": 442.4342105263158, "grad_norm": 1.6028162240982056, "learning_rate": 0.0001, "loss": 0.0187, "step": 67250 }, { "epoch": 442.5, "grad_norm": 1.6421170234680176, "learning_rate": 0.0001, "loss": 0.0134, "step": 67260 }, { "epoch": 442.5657894736842, "grad_norm": 1.603140950202942, "learning_rate": 0.0001, "loss": 0.0161, "step": 67270 }, { "epoch": 442.63157894736844, "grad_norm": 1.4958165884017944, "learning_rate": 0.0001, "loss": 0.0149, "step": 67280 }, { "epoch": 442.69736842105266, "grad_norm": 1.6890358924865723, "learning_rate": 0.0001, "loss": 0.0158, "step": 67290 }, { "epoch": 442.7631578947368, "grad_norm": 1.6768217086791992, "learning_rate": 0.0001, "loss": 0.0158, "step": 67300 }, { "epoch": 442.82894736842104, "grad_norm": 0.9974784255027771, "learning_rate": 0.0001, "loss": 0.0138, "step": 67310 }, { "epoch": 442.89473684210526, "grad_norm": 1.8166464567184448, "learning_rate": 0.0001, "loss": 0.0145, "step": 67320 }, { "epoch": 442.9605263157895, "grad_norm": 1.3122309446334839, "learning_rate": 0.0001, "loss": 0.0157, "step": 67330 }, { "epoch": 443.0263157894737, "grad_norm": 1.2060108184814453, "learning_rate": 0.0001, "loss": 0.0169, "step": 67340 }, { "epoch": 443.0921052631579, "grad_norm": 1.5836529731750488, "learning_rate": 0.0001, "loss": 0.016, "step": 67350 }, { "epoch": 443.1578947368421, "grad_norm": 1.2851907014846802, "learning_rate": 0.0001, "loss": 0.0123, "step": 67360 }, { "epoch": 443.2236842105263, "grad_norm": 1.2322843074798584, "learning_rate": 0.0001, "loss": 0.0164, "step": 67370 }, { "epoch": 443.2894736842105, "grad_norm": 1.3180180788040161, "learning_rate": 0.0001, "loss": 0.0149, "step": 67380 }, { "epoch": 443.35526315789474, "grad_norm": 1.445618987083435, "learning_rate": 0.0001, "loss": 0.0174, "step": 67390 }, { "epoch": 443.42105263157896, "grad_norm": 1.4589817523956299, "learning_rate": 0.0001, "loss": 0.0142, "step": 67400 }, { "epoch": 443.4868421052632, "grad_norm": 1.496898889541626, "learning_rate": 0.0001, "loss": 0.0168, "step": 67410 }, { "epoch": 443.55263157894734, "grad_norm": 1.7086408138275146, "learning_rate": 0.0001, "loss": 0.0177, "step": 67420 }, { "epoch": 443.61842105263156, "grad_norm": 1.7592344284057617, "learning_rate": 0.0001, "loss": 0.0137, "step": 67430 }, { "epoch": 443.6842105263158, "grad_norm": 1.8497931957244873, "learning_rate": 0.0001, "loss": 0.0162, "step": 67440 }, { "epoch": 443.75, "grad_norm": 1.7580186128616333, "learning_rate": 0.0001, "loss": 0.0178, "step": 67450 }, { "epoch": 443.8157894736842, "grad_norm": 1.7724099159240723, "learning_rate": 0.0001, "loss": 0.0186, "step": 67460 }, { "epoch": 443.88157894736844, "grad_norm": 1.302243709564209, "learning_rate": 0.0001, "loss": 0.0156, "step": 67470 }, { "epoch": 443.94736842105266, "grad_norm": 1.6950867176055908, "learning_rate": 0.0001, "loss": 0.0168, "step": 67480 }, { "epoch": 444.0131578947368, "grad_norm": 1.4520251750946045, "learning_rate": 0.0001, "loss": 0.016, "step": 67490 }, { "epoch": 444.07894736842104, "grad_norm": 1.2725820541381836, "learning_rate": 0.0001, "loss": 0.0165, "step": 67500 }, { "epoch": 444.14473684210526, "grad_norm": 1.6092497110366821, "learning_rate": 0.0001, "loss": 0.0169, "step": 67510 }, { "epoch": 444.2105263157895, "grad_norm": 2.0164332389831543, "learning_rate": 0.0001, "loss": 0.0158, "step": 67520 }, { "epoch": 444.2763157894737, "grad_norm": 1.911678433418274, "learning_rate": 0.0001, "loss": 0.0169, "step": 67530 }, { "epoch": 444.3421052631579, "grad_norm": 1.7298485040664673, "learning_rate": 0.0001, "loss": 0.0153, "step": 67540 }, { "epoch": 444.4078947368421, "grad_norm": 1.919305682182312, "learning_rate": 0.0001, "loss": 0.0124, "step": 67550 }, { "epoch": 444.4736842105263, "grad_norm": 1.903716802597046, "learning_rate": 0.0001, "loss": 0.0143, "step": 67560 }, { "epoch": 444.5394736842105, "grad_norm": 1.965874195098877, "learning_rate": 0.0001, "loss": 0.0172, "step": 67570 }, { "epoch": 444.60526315789474, "grad_norm": 1.860333800315857, "learning_rate": 0.0001, "loss": 0.0151, "step": 67580 }, { "epoch": 444.67105263157896, "grad_norm": 1.3845722675323486, "learning_rate": 0.0001, "loss": 0.0138, "step": 67590 }, { "epoch": 444.7368421052632, "grad_norm": 1.5346081256866455, "learning_rate": 0.0001, "loss": 0.0171, "step": 67600 }, { "epoch": 444.80263157894734, "grad_norm": 1.570194125175476, "learning_rate": 0.0001, "loss": 0.0217, "step": 67610 }, { "epoch": 444.86842105263156, "grad_norm": 2.3313844203948975, "learning_rate": 0.0001, "loss": 0.0145, "step": 67620 }, { "epoch": 444.9342105263158, "grad_norm": 1.9984580278396606, "learning_rate": 0.0001, "loss": 0.0127, "step": 67630 }, { "epoch": 445.0, "grad_norm": 1.4391192197799683, "learning_rate": 0.0001, "loss": 0.015, "step": 67640 }, { "epoch": 445.0657894736842, "grad_norm": 1.5455493927001953, "learning_rate": 0.0001, "loss": 0.0134, "step": 67650 }, { "epoch": 445.13157894736844, "grad_norm": 1.6250615119934082, "learning_rate": 0.0001, "loss": 0.0177, "step": 67660 }, { "epoch": 445.19736842105266, "grad_norm": 1.651536226272583, "learning_rate": 0.0001, "loss": 0.015, "step": 67670 }, { "epoch": 445.2631578947368, "grad_norm": 1.5108317136764526, "learning_rate": 0.0001, "loss": 0.0169, "step": 67680 }, { "epoch": 445.32894736842104, "grad_norm": 1.6559478044509888, "learning_rate": 0.0001, "loss": 0.015, "step": 67690 }, { "epoch": 445.39473684210526, "grad_norm": 1.725618600845337, "learning_rate": 0.0001, "loss": 0.0137, "step": 67700 }, { "epoch": 445.4605263157895, "grad_norm": 1.2527648210525513, "learning_rate": 0.0001, "loss": 0.0141, "step": 67710 }, { "epoch": 445.5263157894737, "grad_norm": 1.8772752285003662, "learning_rate": 0.0001, "loss": 0.0156, "step": 67720 }, { "epoch": 445.5921052631579, "grad_norm": 1.9443391561508179, "learning_rate": 0.0001, "loss": 0.0144, "step": 67730 }, { "epoch": 445.6578947368421, "grad_norm": 1.9219636917114258, "learning_rate": 0.0001, "loss": 0.017, "step": 67740 }, { "epoch": 445.7236842105263, "grad_norm": 1.704880714416504, "learning_rate": 0.0001, "loss": 0.0182, "step": 67750 }, { "epoch": 445.7894736842105, "grad_norm": 1.4242959022521973, "learning_rate": 0.0001, "loss": 0.0156, "step": 67760 }, { "epoch": 445.85526315789474, "grad_norm": 1.2560526132583618, "learning_rate": 0.0001, "loss": 0.0162, "step": 67770 }, { "epoch": 445.92105263157896, "grad_norm": 1.1371699571609497, "learning_rate": 0.0001, "loss": 0.0206, "step": 67780 }, { "epoch": 445.9868421052632, "grad_norm": 1.3195013999938965, "learning_rate": 0.0001, "loss": 0.0129, "step": 67790 }, { "epoch": 446.05263157894734, "grad_norm": 1.2765334844589233, "learning_rate": 0.0001, "loss": 0.0174, "step": 67800 }, { "epoch": 446.11842105263156, "grad_norm": 1.3807088136672974, "learning_rate": 0.0001, "loss": 0.0176, "step": 67810 }, { "epoch": 446.1842105263158, "grad_norm": 1.4846590757369995, "learning_rate": 0.0001, "loss": 0.0141, "step": 67820 }, { "epoch": 446.25, "grad_norm": 2.1906211376190186, "learning_rate": 0.0001, "loss": 0.0157, "step": 67830 }, { "epoch": 446.3157894736842, "grad_norm": 1.8868906497955322, "learning_rate": 0.0001, "loss": 0.0164, "step": 67840 }, { "epoch": 446.38157894736844, "grad_norm": 1.2865748405456543, "learning_rate": 0.0001, "loss": 0.0152, "step": 67850 }, { "epoch": 446.44736842105266, "grad_norm": 1.0954068899154663, "learning_rate": 0.0001, "loss": 0.0189, "step": 67860 }, { "epoch": 446.5131578947368, "grad_norm": 1.421001672744751, "learning_rate": 0.0001, "loss": 0.0141, "step": 67870 }, { "epoch": 446.57894736842104, "grad_norm": 1.685901165008545, "learning_rate": 0.0001, "loss": 0.0166, "step": 67880 }, { "epoch": 446.64473684210526, "grad_norm": 1.60703444480896, "learning_rate": 0.0001, "loss": 0.0159, "step": 67890 }, { "epoch": 446.7105263157895, "grad_norm": 2.00750470161438, "learning_rate": 0.0001, "loss": 0.013, "step": 67900 }, { "epoch": 446.7763157894737, "grad_norm": 1.8372867107391357, "learning_rate": 0.0001, "loss": 0.0183, "step": 67910 }, { "epoch": 446.8421052631579, "grad_norm": 1.6554441452026367, "learning_rate": 0.0001, "loss": 0.0136, "step": 67920 }, { "epoch": 446.9078947368421, "grad_norm": 1.7708173990249634, "learning_rate": 0.0001, "loss": 0.0166, "step": 67930 }, { "epoch": 446.9736842105263, "grad_norm": 1.8595088720321655, "learning_rate": 0.0001, "loss": 0.0184, "step": 67940 }, { "epoch": 447.0394736842105, "grad_norm": 1.9030386209487915, "learning_rate": 0.0001, "loss": 0.0126, "step": 67950 }, { "epoch": 447.10526315789474, "grad_norm": 1.5728614330291748, "learning_rate": 0.0001, "loss": 0.0163, "step": 67960 }, { "epoch": 447.17105263157896, "grad_norm": 1.3208955526351929, "learning_rate": 0.0001, "loss": 0.0198, "step": 67970 }, { "epoch": 447.2368421052632, "grad_norm": 1.9942539930343628, "learning_rate": 0.0001, "loss": 0.0147, "step": 67980 }, { "epoch": 447.30263157894734, "grad_norm": 1.5109418630599976, "learning_rate": 0.0001, "loss": 0.0126, "step": 67990 }, { "epoch": 447.36842105263156, "grad_norm": 1.4384342432022095, "learning_rate": 0.0001, "loss": 0.0195, "step": 68000 }, { "epoch": 447.4342105263158, "grad_norm": 1.6358622312545776, "learning_rate": 0.0001, "loss": 0.0132, "step": 68010 }, { "epoch": 447.5, "grad_norm": 1.142863392829895, "learning_rate": 0.0001, "loss": 0.0142, "step": 68020 }, { "epoch": 447.5657894736842, "grad_norm": 1.6500952243804932, "learning_rate": 0.0001, "loss": 0.0174, "step": 68030 }, { "epoch": 447.63157894736844, "grad_norm": 1.7925487756729126, "learning_rate": 0.0001, "loss": 0.0178, "step": 68040 }, { "epoch": 447.69736842105266, "grad_norm": 1.9732260704040527, "learning_rate": 0.0001, "loss": 0.0133, "step": 68050 }, { "epoch": 447.7631578947368, "grad_norm": 1.6831268072128296, "learning_rate": 0.0001, "loss": 0.0155, "step": 68060 }, { "epoch": 447.82894736842104, "grad_norm": 1.7109929323196411, "learning_rate": 0.0001, "loss": 0.0192, "step": 68070 }, { "epoch": 447.89473684210526, "grad_norm": 1.2701226472854614, "learning_rate": 0.0001, "loss": 0.0131, "step": 68080 }, { "epoch": 447.9605263157895, "grad_norm": 1.818865418434143, "learning_rate": 0.0001, "loss": 0.0162, "step": 68090 }, { "epoch": 448.0263157894737, "grad_norm": 1.3173214197158813, "learning_rate": 0.0001, "loss": 0.0168, "step": 68100 }, { "epoch": 448.0921052631579, "grad_norm": 1.7740166187286377, "learning_rate": 0.0001, "loss": 0.0147, "step": 68110 }, { "epoch": 448.1578947368421, "grad_norm": 1.795584797859192, "learning_rate": 0.0001, "loss": 0.0151, "step": 68120 }, { "epoch": 448.2236842105263, "grad_norm": 1.9565315246582031, "learning_rate": 0.0001, "loss": 0.0177, "step": 68130 }, { "epoch": 448.2894736842105, "grad_norm": 1.6367372274398804, "learning_rate": 0.0001, "loss": 0.0134, "step": 68140 }, { "epoch": 448.35526315789474, "grad_norm": 1.5651240348815918, "learning_rate": 0.0001, "loss": 0.0215, "step": 68150 }, { "epoch": 448.42105263157896, "grad_norm": 1.3883031606674194, "learning_rate": 0.0001, "loss": 0.0162, "step": 68160 }, { "epoch": 448.4868421052632, "grad_norm": 1.6750556230545044, "learning_rate": 0.0001, "loss": 0.0175, "step": 68170 }, { "epoch": 448.55263157894734, "grad_norm": 1.098543643951416, "learning_rate": 0.0001, "loss": 0.0156, "step": 68180 }, { "epoch": 448.61842105263156, "grad_norm": 1.4670321941375732, "learning_rate": 0.0001, "loss": 0.0133, "step": 68190 }, { "epoch": 448.6842105263158, "grad_norm": 1.6408122777938843, "learning_rate": 0.0001, "loss": 0.0166, "step": 68200 }, { "epoch": 448.75, "grad_norm": 1.438441276550293, "learning_rate": 0.0001, "loss": 0.0157, "step": 68210 }, { "epoch": 448.8157894736842, "grad_norm": 1.270943284034729, "learning_rate": 0.0001, "loss": 0.0159, "step": 68220 }, { "epoch": 448.88157894736844, "grad_norm": 1.6163471937179565, "learning_rate": 0.0001, "loss": 0.0153, "step": 68230 }, { "epoch": 448.94736842105266, "grad_norm": 2.0782713890075684, "learning_rate": 0.0001, "loss": 0.017, "step": 68240 }, { "epoch": 449.0131578947368, "grad_norm": 1.7491602897644043, "learning_rate": 0.0001, "loss": 0.0126, "step": 68250 }, { "epoch": 449.07894736842104, "grad_norm": 1.4846762418746948, "learning_rate": 0.0001, "loss": 0.0144, "step": 68260 }, { "epoch": 449.14473684210526, "grad_norm": 1.141202449798584, "learning_rate": 0.0001, "loss": 0.0149, "step": 68270 }, { "epoch": 449.2105263157895, "grad_norm": 1.1649725437164307, "learning_rate": 0.0001, "loss": 0.0178, "step": 68280 }, { "epoch": 449.2763157894737, "grad_norm": 1.628136157989502, "learning_rate": 0.0001, "loss": 0.0197, "step": 68290 }, { "epoch": 449.3421052631579, "grad_norm": 1.7594140768051147, "learning_rate": 0.0001, "loss": 0.0195, "step": 68300 }, { "epoch": 449.4078947368421, "grad_norm": 1.4278372526168823, "learning_rate": 0.0001, "loss": 0.0182, "step": 68310 }, { "epoch": 449.4736842105263, "grad_norm": 1.6866015195846558, "learning_rate": 0.0001, "loss": 0.0209, "step": 68320 }, { "epoch": 449.5394736842105, "grad_norm": 1.348793387413025, "learning_rate": 0.0001, "loss": 0.0157, "step": 68330 }, { "epoch": 449.60526315789474, "grad_norm": 1.7368462085723877, "learning_rate": 0.0001, "loss": 0.0151, "step": 68340 }, { "epoch": 449.67105263157896, "grad_norm": 1.2351951599121094, "learning_rate": 0.0001, "loss": 0.0155, "step": 68350 }, { "epoch": 449.7368421052632, "grad_norm": 1.6246947050094604, "learning_rate": 0.0001, "loss": 0.0177, "step": 68360 }, { "epoch": 449.80263157894734, "grad_norm": 1.5875359773635864, "learning_rate": 0.0001, "loss": 0.0181, "step": 68370 }, { "epoch": 449.86842105263156, "grad_norm": 1.6101961135864258, "learning_rate": 0.0001, "loss": 0.0172, "step": 68380 }, { "epoch": 449.9342105263158, "grad_norm": 1.8390727043151855, "learning_rate": 0.0001, "loss": 0.0146, "step": 68390 }, { "epoch": 450.0, "grad_norm": 1.9508402347564697, "learning_rate": 0.0001, "loss": 0.0169, "step": 68400 }, { "epoch": 450.0657894736842, "grad_norm": 1.7610273361206055, "learning_rate": 0.0001, "loss": 0.019, "step": 68410 }, { "epoch": 450.13157894736844, "grad_norm": 1.9748998880386353, "learning_rate": 0.0001, "loss": 0.0191, "step": 68420 }, { "epoch": 450.19736842105266, "grad_norm": 1.4825643301010132, "learning_rate": 0.0001, "loss": 0.0148, "step": 68430 }, { "epoch": 450.2631578947368, "grad_norm": 1.3443365097045898, "learning_rate": 0.0001, "loss": 0.0184, "step": 68440 }, { "epoch": 450.32894736842104, "grad_norm": 1.5289788246154785, "learning_rate": 0.0001, "loss": 0.0208, "step": 68450 }, { "epoch": 450.39473684210526, "grad_norm": 1.6673589944839478, "learning_rate": 0.0001, "loss": 0.0149, "step": 68460 }, { "epoch": 450.4605263157895, "grad_norm": 1.944858193397522, "learning_rate": 0.0001, "loss": 0.0168, "step": 68470 }, { "epoch": 450.5263157894737, "grad_norm": 1.4937046766281128, "learning_rate": 0.0001, "loss": 0.0154, "step": 68480 }, { "epoch": 450.5921052631579, "grad_norm": 1.673138976097107, "learning_rate": 0.0001, "loss": 0.0161, "step": 68490 }, { "epoch": 450.6578947368421, "grad_norm": 1.6593061685562134, "learning_rate": 0.0001, "loss": 0.0174, "step": 68500 }, { "epoch": 450.7236842105263, "grad_norm": 1.6945701837539673, "learning_rate": 0.0001, "loss": 0.0178, "step": 68510 }, { "epoch": 450.7894736842105, "grad_norm": 1.419158697128296, "learning_rate": 0.0001, "loss": 0.0142, "step": 68520 }, { "epoch": 450.85526315789474, "grad_norm": 1.1687541007995605, "learning_rate": 0.0001, "loss": 0.0145, "step": 68530 }, { "epoch": 450.92105263157896, "grad_norm": 1.3740487098693848, "learning_rate": 0.0001, "loss": 0.0132, "step": 68540 }, { "epoch": 450.9868421052632, "grad_norm": 1.081101894378662, "learning_rate": 0.0001, "loss": 0.0189, "step": 68550 }, { "epoch": 451.05263157894734, "grad_norm": 1.3532030582427979, "learning_rate": 0.0001, "loss": 0.0151, "step": 68560 }, { "epoch": 451.11842105263156, "grad_norm": 1.6785584688186646, "learning_rate": 0.0001, "loss": 0.0169, "step": 68570 }, { "epoch": 451.1842105263158, "grad_norm": 1.5640218257904053, "learning_rate": 0.0001, "loss": 0.0181, "step": 68580 }, { "epoch": 451.25, "grad_norm": 1.6060552597045898, "learning_rate": 0.0001, "loss": 0.0174, "step": 68590 }, { "epoch": 451.3157894736842, "grad_norm": 1.6774439811706543, "learning_rate": 0.0001, "loss": 0.0166, "step": 68600 }, { "epoch": 451.38157894736844, "grad_norm": 1.2663160562515259, "learning_rate": 0.0001, "loss": 0.0197, "step": 68610 }, { "epoch": 451.44736842105266, "grad_norm": 1.4851584434509277, "learning_rate": 0.0001, "loss": 0.0151, "step": 68620 }, { "epoch": 451.5131578947368, "grad_norm": 1.9051936864852905, "learning_rate": 0.0001, "loss": 0.017, "step": 68630 }, { "epoch": 451.57894736842104, "grad_norm": 1.5444802045822144, "learning_rate": 0.0001, "loss": 0.0206, "step": 68640 }, { "epoch": 451.64473684210526, "grad_norm": 1.6425232887268066, "learning_rate": 0.0001, "loss": 0.0152, "step": 68650 }, { "epoch": 451.7105263157895, "grad_norm": 1.753028154373169, "learning_rate": 0.0001, "loss": 0.0162, "step": 68660 }, { "epoch": 451.7763157894737, "grad_norm": 1.4641366004943848, "learning_rate": 0.0001, "loss": 0.0166, "step": 68670 }, { "epoch": 451.8421052631579, "grad_norm": 1.5968050956726074, "learning_rate": 0.0001, "loss": 0.0129, "step": 68680 }, { "epoch": 451.9078947368421, "grad_norm": 1.3469767570495605, "learning_rate": 0.0001, "loss": 0.0146, "step": 68690 }, { "epoch": 451.9736842105263, "grad_norm": 1.5844801664352417, "learning_rate": 0.0001, "loss": 0.0147, "step": 68700 }, { "epoch": 452.0394736842105, "grad_norm": 1.8394261598587036, "learning_rate": 0.0001, "loss": 0.0177, "step": 68710 }, { "epoch": 452.10526315789474, "grad_norm": 1.6954989433288574, "learning_rate": 0.0001, "loss": 0.0161, "step": 68720 }, { "epoch": 452.17105263157896, "grad_norm": 1.3250858783721924, "learning_rate": 0.0001, "loss": 0.0197, "step": 68730 }, { "epoch": 452.2368421052632, "grad_norm": 1.2990448474884033, "learning_rate": 0.0001, "loss": 0.0131, "step": 68740 }, { "epoch": 452.30263157894734, "grad_norm": 1.6927416324615479, "learning_rate": 0.0001, "loss": 0.0188, "step": 68750 }, { "epoch": 452.36842105263156, "grad_norm": 1.3942703008651733, "learning_rate": 0.0001, "loss": 0.0196, "step": 68760 }, { "epoch": 452.4342105263158, "grad_norm": 1.2769826650619507, "learning_rate": 0.0001, "loss": 0.0161, "step": 68770 }, { "epoch": 452.5, "grad_norm": 1.5621038675308228, "learning_rate": 0.0001, "loss": 0.0165, "step": 68780 }, { "epoch": 452.5657894736842, "grad_norm": 1.6862101554870605, "learning_rate": 0.0001, "loss": 0.018, "step": 68790 }, { "epoch": 452.63157894736844, "grad_norm": 1.5763345956802368, "learning_rate": 0.0001, "loss": 0.0165, "step": 68800 }, { "epoch": 452.69736842105266, "grad_norm": 1.450648307800293, "learning_rate": 0.0001, "loss": 0.0137, "step": 68810 }, { "epoch": 452.7631578947368, "grad_norm": 1.8747830390930176, "learning_rate": 0.0001, "loss": 0.0149, "step": 68820 }, { "epoch": 452.82894736842104, "grad_norm": 1.3091591596603394, "learning_rate": 0.0001, "loss": 0.0149, "step": 68830 }, { "epoch": 452.89473684210526, "grad_norm": 1.536516785621643, "learning_rate": 0.0001, "loss": 0.0145, "step": 68840 }, { "epoch": 452.9605263157895, "grad_norm": 1.351621389389038, "learning_rate": 0.0001, "loss": 0.0181, "step": 68850 }, { "epoch": 453.0263157894737, "grad_norm": 1.764398455619812, "learning_rate": 0.0001, "loss": 0.0171, "step": 68860 }, { "epoch": 453.0921052631579, "grad_norm": 1.6063152551651, "learning_rate": 0.0001, "loss": 0.0165, "step": 68870 }, { "epoch": 453.1578947368421, "grad_norm": 1.7573987245559692, "learning_rate": 0.0001, "loss": 0.019, "step": 68880 }, { "epoch": 453.2236842105263, "grad_norm": 1.1043437719345093, "learning_rate": 0.0001, "loss": 0.016, "step": 68890 }, { "epoch": 453.2894736842105, "grad_norm": 1.3764774799346924, "learning_rate": 0.0001, "loss": 0.0171, "step": 68900 }, { "epoch": 453.35526315789474, "grad_norm": 1.7119014263153076, "learning_rate": 0.0001, "loss": 0.0122, "step": 68910 }, { "epoch": 453.42105263157896, "grad_norm": 1.59248948097229, "learning_rate": 0.0001, "loss": 0.0141, "step": 68920 }, { "epoch": 453.4868421052632, "grad_norm": 1.3202654123306274, "learning_rate": 0.0001, "loss": 0.0142, "step": 68930 }, { "epoch": 453.55263157894734, "grad_norm": 1.557739496231079, "learning_rate": 0.0001, "loss": 0.0134, "step": 68940 }, { "epoch": 453.61842105263156, "grad_norm": 1.587322473526001, "learning_rate": 0.0001, "loss": 0.0179, "step": 68950 }, { "epoch": 453.6842105263158, "grad_norm": 1.7527823448181152, "learning_rate": 0.0001, "loss": 0.0137, "step": 68960 }, { "epoch": 453.75, "grad_norm": 1.3328200578689575, "learning_rate": 0.0001, "loss": 0.0155, "step": 68970 }, { "epoch": 453.8157894736842, "grad_norm": 1.61650550365448, "learning_rate": 0.0001, "loss": 0.0161, "step": 68980 }, { "epoch": 453.88157894736844, "grad_norm": 1.3458311557769775, "learning_rate": 0.0001, "loss": 0.016, "step": 68990 }, { "epoch": 453.94736842105266, "grad_norm": 1.7100414037704468, "learning_rate": 0.0001, "loss": 0.0146, "step": 69000 }, { "epoch": 454.0131578947368, "grad_norm": 1.4378361701965332, "learning_rate": 0.0001, "loss": 0.0214, "step": 69010 }, { "epoch": 454.07894736842104, "grad_norm": 1.6323425769805908, "learning_rate": 0.0001, "loss": 0.0166, "step": 69020 }, { "epoch": 454.14473684210526, "grad_norm": 1.430550217628479, "learning_rate": 0.0001, "loss": 0.0178, "step": 69030 }, { "epoch": 454.2105263157895, "grad_norm": 1.3909461498260498, "learning_rate": 0.0001, "loss": 0.0163, "step": 69040 }, { "epoch": 454.2763157894737, "grad_norm": 1.163329005241394, "learning_rate": 0.0001, "loss": 0.0163, "step": 69050 }, { "epoch": 454.3421052631579, "grad_norm": 1.4179999828338623, "learning_rate": 0.0001, "loss": 0.0164, "step": 69060 }, { "epoch": 454.4078947368421, "grad_norm": 1.2851765155792236, "learning_rate": 0.0001, "loss": 0.0144, "step": 69070 }, { "epoch": 454.4736842105263, "grad_norm": 1.7366690635681152, "learning_rate": 0.0001, "loss": 0.0156, "step": 69080 }, { "epoch": 454.5394736842105, "grad_norm": 1.386594533920288, "learning_rate": 0.0001, "loss": 0.015, "step": 69090 }, { "epoch": 454.60526315789474, "grad_norm": 1.173795461654663, "learning_rate": 0.0001, "loss": 0.014, "step": 69100 }, { "epoch": 454.67105263157896, "grad_norm": 1.3249881267547607, "learning_rate": 0.0001, "loss": 0.0174, "step": 69110 }, { "epoch": 454.7368421052632, "grad_norm": 1.6791611909866333, "learning_rate": 0.0001, "loss": 0.0136, "step": 69120 }, { "epoch": 454.80263157894734, "grad_norm": 1.954138159751892, "learning_rate": 0.0001, "loss": 0.0181, "step": 69130 }, { "epoch": 454.86842105263156, "grad_norm": 1.6696093082427979, "learning_rate": 0.0001, "loss": 0.0156, "step": 69140 }, { "epoch": 454.9342105263158, "grad_norm": 1.5230001211166382, "learning_rate": 0.0001, "loss": 0.0134, "step": 69150 }, { "epoch": 455.0, "grad_norm": 1.2234083414077759, "learning_rate": 0.0001, "loss": 0.0144, "step": 69160 }, { "epoch": 455.0657894736842, "grad_norm": 1.3259953260421753, "learning_rate": 0.0001, "loss": 0.0127, "step": 69170 }, { "epoch": 455.13157894736844, "grad_norm": 1.524163842201233, "learning_rate": 0.0001, "loss": 0.0121, "step": 69180 }, { "epoch": 455.19736842105266, "grad_norm": 1.5877163410186768, "learning_rate": 0.0001, "loss": 0.0194, "step": 69190 }, { "epoch": 455.2631578947368, "grad_norm": 1.683769941329956, "learning_rate": 0.0001, "loss": 0.0129, "step": 69200 }, { "epoch": 455.32894736842104, "grad_norm": 1.5395351648330688, "learning_rate": 0.0001, "loss": 0.0164, "step": 69210 }, { "epoch": 455.39473684210526, "grad_norm": 1.2807444334030151, "learning_rate": 0.0001, "loss": 0.0174, "step": 69220 }, { "epoch": 455.4605263157895, "grad_norm": 1.4594813585281372, "learning_rate": 0.0001, "loss": 0.0159, "step": 69230 }, { "epoch": 455.5263157894737, "grad_norm": 1.808986783027649, "learning_rate": 0.0001, "loss": 0.0152, "step": 69240 }, { "epoch": 455.5921052631579, "grad_norm": 1.213382363319397, "learning_rate": 0.0001, "loss": 0.0151, "step": 69250 }, { "epoch": 455.6578947368421, "grad_norm": 1.392394781112671, "learning_rate": 0.0001, "loss": 0.0191, "step": 69260 }, { "epoch": 455.7236842105263, "grad_norm": 1.9672471284866333, "learning_rate": 0.0001, "loss": 0.0164, "step": 69270 }, { "epoch": 455.7894736842105, "grad_norm": 1.3133021593093872, "learning_rate": 0.0001, "loss": 0.0203, "step": 69280 }, { "epoch": 455.85526315789474, "grad_norm": 1.3617335557937622, "learning_rate": 0.0001, "loss": 0.0136, "step": 69290 }, { "epoch": 455.92105263157896, "grad_norm": 1.5029066801071167, "learning_rate": 0.0001, "loss": 0.0135, "step": 69300 }, { "epoch": 455.9868421052632, "grad_norm": 1.6852145195007324, "learning_rate": 0.0001, "loss": 0.012, "step": 69310 }, { "epoch": 456.05263157894734, "grad_norm": 1.796532392501831, "learning_rate": 0.0001, "loss": 0.0263, "step": 69320 }, { "epoch": 456.11842105263156, "grad_norm": 1.6068540811538696, "learning_rate": 0.0001, "loss": 0.0144, "step": 69330 }, { "epoch": 456.1842105263158, "grad_norm": 1.6590403318405151, "learning_rate": 0.0001, "loss": 0.0154, "step": 69340 }, { "epoch": 456.25, "grad_norm": 1.3348864316940308, "learning_rate": 0.0001, "loss": 0.0154, "step": 69350 }, { "epoch": 456.3157894736842, "grad_norm": 1.48292076587677, "learning_rate": 0.0001, "loss": 0.0146, "step": 69360 }, { "epoch": 456.38157894736844, "grad_norm": 1.3870041370391846, "learning_rate": 0.0001, "loss": 0.0127, "step": 69370 }, { "epoch": 456.44736842105266, "grad_norm": 1.1907392740249634, "learning_rate": 0.0001, "loss": 0.013, "step": 69380 }, { "epoch": 456.5131578947368, "grad_norm": 1.6586958169937134, "learning_rate": 0.0001, "loss": 0.0132, "step": 69390 }, { "epoch": 456.57894736842104, "grad_norm": 1.5206295251846313, "learning_rate": 0.0001, "loss": 0.015, "step": 69400 }, { "epoch": 456.64473684210526, "grad_norm": 1.1371376514434814, "learning_rate": 0.0001, "loss": 0.0166, "step": 69410 }, { "epoch": 456.7105263157895, "grad_norm": 1.4733757972717285, "learning_rate": 0.0001, "loss": 0.0162, "step": 69420 }, { "epoch": 456.7763157894737, "grad_norm": 1.4185104370117188, "learning_rate": 0.0001, "loss": 0.0134, "step": 69430 }, { "epoch": 456.8421052631579, "grad_norm": 1.3083539009094238, "learning_rate": 0.0001, "loss": 0.0154, "step": 69440 }, { "epoch": 456.9078947368421, "grad_norm": 0.9358235597610474, "learning_rate": 0.0001, "loss": 0.0223, "step": 69450 }, { "epoch": 456.9736842105263, "grad_norm": 1.6654810905456543, "learning_rate": 0.0001, "loss": 0.0138, "step": 69460 }, { "epoch": 457.0394736842105, "grad_norm": 1.5073693990707397, "learning_rate": 0.0001, "loss": 0.0164, "step": 69470 }, { "epoch": 457.10526315789474, "grad_norm": 1.4735900163650513, "learning_rate": 0.0001, "loss": 0.0169, "step": 69480 }, { "epoch": 457.17105263157896, "grad_norm": 1.573425054550171, "learning_rate": 0.0001, "loss": 0.0159, "step": 69490 }, { "epoch": 457.2368421052632, "grad_norm": 1.5380914211273193, "learning_rate": 0.0001, "loss": 0.0132, "step": 69500 }, { "epoch": 457.30263157894734, "grad_norm": 1.975436806678772, "learning_rate": 0.0001, "loss": 0.0149, "step": 69510 }, { "epoch": 457.36842105263156, "grad_norm": 1.6175874471664429, "learning_rate": 0.0001, "loss": 0.0176, "step": 69520 }, { "epoch": 457.4342105263158, "grad_norm": 1.9178597927093506, "learning_rate": 0.0001, "loss": 0.0128, "step": 69530 }, { "epoch": 457.5, "grad_norm": 1.7674823999404907, "learning_rate": 0.0001, "loss": 0.0163, "step": 69540 }, { "epoch": 457.5657894736842, "grad_norm": 1.650315761566162, "learning_rate": 0.0001, "loss": 0.0164, "step": 69550 }, { "epoch": 457.63157894736844, "grad_norm": 1.8698525428771973, "learning_rate": 0.0001, "loss": 0.0162, "step": 69560 }, { "epoch": 457.69736842105266, "grad_norm": 1.7850409746170044, "learning_rate": 0.0001, "loss": 0.0141, "step": 69570 }, { "epoch": 457.7631578947368, "grad_norm": 1.7477065324783325, "learning_rate": 0.0001, "loss": 0.0154, "step": 69580 }, { "epoch": 457.82894736842104, "grad_norm": 1.2485586404800415, "learning_rate": 0.0001, "loss": 0.0172, "step": 69590 }, { "epoch": 457.89473684210526, "grad_norm": 1.621709942817688, "learning_rate": 0.0001, "loss": 0.0127, "step": 69600 }, { "epoch": 457.9605263157895, "grad_norm": 1.371073842048645, "learning_rate": 0.0001, "loss": 0.0158, "step": 69610 }, { "epoch": 458.0263157894737, "grad_norm": 1.932740330696106, "learning_rate": 0.0001, "loss": 0.0151, "step": 69620 }, { "epoch": 458.0921052631579, "grad_norm": 1.6770697832107544, "learning_rate": 0.0001, "loss": 0.0137, "step": 69630 }, { "epoch": 458.1578947368421, "grad_norm": 1.2540737390518188, "learning_rate": 0.0001, "loss": 0.0128, "step": 69640 }, { "epoch": 458.2236842105263, "grad_norm": 1.351926565170288, "learning_rate": 0.0001, "loss": 0.0164, "step": 69650 }, { "epoch": 458.2894736842105, "grad_norm": 1.332767128944397, "learning_rate": 0.0001, "loss": 0.0182, "step": 69660 }, { "epoch": 458.35526315789474, "grad_norm": 1.3239047527313232, "learning_rate": 0.0001, "loss": 0.0197, "step": 69670 }, { "epoch": 458.42105263157896, "grad_norm": 1.260873794555664, "learning_rate": 0.0001, "loss": 0.0149, "step": 69680 }, { "epoch": 458.4868421052632, "grad_norm": 1.7345510721206665, "learning_rate": 0.0001, "loss": 0.0152, "step": 69690 }, { "epoch": 458.55263157894734, "grad_norm": 1.683102011680603, "learning_rate": 0.0001, "loss": 0.0149, "step": 69700 }, { "epoch": 458.61842105263156, "grad_norm": 1.6005388498306274, "learning_rate": 0.0001, "loss": 0.015, "step": 69710 }, { "epoch": 458.6842105263158, "grad_norm": 1.4819605350494385, "learning_rate": 0.0001, "loss": 0.0151, "step": 69720 }, { "epoch": 458.75, "grad_norm": 1.3727751970291138, "learning_rate": 0.0001, "loss": 0.016, "step": 69730 }, { "epoch": 458.8157894736842, "grad_norm": 1.5933035612106323, "learning_rate": 0.0001, "loss": 0.0124, "step": 69740 }, { "epoch": 458.88157894736844, "grad_norm": 1.6349194049835205, "learning_rate": 0.0001, "loss": 0.0218, "step": 69750 }, { "epoch": 458.94736842105266, "grad_norm": 1.7266526222229004, "learning_rate": 0.0001, "loss": 0.0132, "step": 69760 }, { "epoch": 459.0131578947368, "grad_norm": 1.557104468345642, "learning_rate": 0.0001, "loss": 0.0144, "step": 69770 }, { "epoch": 459.07894736842104, "grad_norm": 1.8013153076171875, "learning_rate": 0.0001, "loss": 0.0166, "step": 69780 }, { "epoch": 459.14473684210526, "grad_norm": 1.4524097442626953, "learning_rate": 0.0001, "loss": 0.0171, "step": 69790 }, { "epoch": 459.2105263157895, "grad_norm": 1.5467981100082397, "learning_rate": 0.0001, "loss": 0.0208, "step": 69800 }, { "epoch": 459.2763157894737, "grad_norm": 1.8655962944030762, "learning_rate": 0.0001, "loss": 0.0126, "step": 69810 }, { "epoch": 459.3421052631579, "grad_norm": 1.88294517993927, "learning_rate": 0.0001, "loss": 0.0149, "step": 69820 }, { "epoch": 459.4078947368421, "grad_norm": 1.8175716400146484, "learning_rate": 0.0001, "loss": 0.014, "step": 69830 }, { "epoch": 459.4736842105263, "grad_norm": 1.261814832687378, "learning_rate": 0.0001, "loss": 0.0155, "step": 69840 }, { "epoch": 459.5394736842105, "grad_norm": 1.6733351945877075, "learning_rate": 0.0001, "loss": 0.0173, "step": 69850 }, { "epoch": 459.60526315789474, "grad_norm": 1.7308354377746582, "learning_rate": 0.0001, "loss": 0.0158, "step": 69860 }, { "epoch": 459.67105263157896, "grad_norm": 1.4445691108703613, "learning_rate": 0.0001, "loss": 0.0119, "step": 69870 }, { "epoch": 459.7368421052632, "grad_norm": 1.46225106716156, "learning_rate": 0.0001, "loss": 0.0143, "step": 69880 }, { "epoch": 459.80263157894734, "grad_norm": 1.7080320119857788, "learning_rate": 0.0001, "loss": 0.0151, "step": 69890 }, { "epoch": 459.86842105263156, "grad_norm": 2.0960159301757812, "learning_rate": 0.0001, "loss": 0.0175, "step": 69900 }, { "epoch": 459.9342105263158, "grad_norm": 1.8708678483963013, "learning_rate": 0.0001, "loss": 0.0179, "step": 69910 }, { "epoch": 460.0, "grad_norm": 1.7453155517578125, "learning_rate": 0.0001, "loss": 0.0134, "step": 69920 }, { "epoch": 460.0657894736842, "grad_norm": 1.1936016082763672, "learning_rate": 0.0001, "loss": 0.0162, "step": 69930 }, { "epoch": 460.13157894736844, "grad_norm": 1.393593430519104, "learning_rate": 0.0001, "loss": 0.0188, "step": 69940 }, { "epoch": 460.19736842105266, "grad_norm": 1.4417222738265991, "learning_rate": 0.0001, "loss": 0.0133, "step": 69950 }, { "epoch": 460.2631578947368, "grad_norm": 1.9801045656204224, "learning_rate": 0.0001, "loss": 0.017, "step": 69960 }, { "epoch": 460.32894736842104, "grad_norm": 1.7555561065673828, "learning_rate": 0.0001, "loss": 0.0152, "step": 69970 }, { "epoch": 460.39473684210526, "grad_norm": 1.2609838247299194, "learning_rate": 0.0001, "loss": 0.0146, "step": 69980 }, { "epoch": 460.4605263157895, "grad_norm": 1.1191706657409668, "learning_rate": 0.0001, "loss": 0.0155, "step": 69990 }, { "epoch": 460.5263157894737, "grad_norm": 1.4808310270309448, "learning_rate": 0.0001, "loss": 0.0154, "step": 70000 }, { "epoch": 460.5921052631579, "grad_norm": 1.4580432176589966, "learning_rate": 0.0001, "loss": 0.0132, "step": 70010 }, { "epoch": 460.6578947368421, "grad_norm": 1.1449720859527588, "learning_rate": 0.0001, "loss": 0.0146, "step": 70020 }, { "epoch": 460.7236842105263, "grad_norm": 1.5376418828964233, "learning_rate": 0.0001, "loss": 0.0162, "step": 70030 }, { "epoch": 460.7894736842105, "grad_norm": 1.3791627883911133, "learning_rate": 0.0001, "loss": 0.0122, "step": 70040 }, { "epoch": 460.85526315789474, "grad_norm": 1.2019579410552979, "learning_rate": 0.0001, "loss": 0.0179, "step": 70050 }, { "epoch": 460.92105263157896, "grad_norm": 1.9968510866165161, "learning_rate": 0.0001, "loss": 0.0185, "step": 70060 }, { "epoch": 460.9868421052632, "grad_norm": 1.0512560606002808, "learning_rate": 0.0001, "loss": 0.0187, "step": 70070 }, { "epoch": 461.05263157894734, "grad_norm": 1.6900826692581177, "learning_rate": 0.0001, "loss": 0.0173, "step": 70080 }, { "epoch": 461.11842105263156, "grad_norm": 1.7631200551986694, "learning_rate": 0.0001, "loss": 0.0143, "step": 70090 }, { "epoch": 461.1842105263158, "grad_norm": 1.6366294622421265, "learning_rate": 0.0001, "loss": 0.0173, "step": 70100 }, { "epoch": 461.25, "grad_norm": 1.4653241634368896, "learning_rate": 0.0001, "loss": 0.0166, "step": 70110 }, { "epoch": 461.3157894736842, "grad_norm": 1.9392138719558716, "learning_rate": 0.0001, "loss": 0.0164, "step": 70120 }, { "epoch": 461.38157894736844, "grad_norm": 1.8768231868743896, "learning_rate": 0.0001, "loss": 0.0147, "step": 70130 }, { "epoch": 461.44736842105266, "grad_norm": 1.2898213863372803, "learning_rate": 0.0001, "loss": 0.0167, "step": 70140 }, { "epoch": 461.5131578947368, "grad_norm": 1.3720935583114624, "learning_rate": 0.0001, "loss": 0.0183, "step": 70150 }, { "epoch": 461.57894736842104, "grad_norm": 1.7546051740646362, "learning_rate": 0.0001, "loss": 0.0163, "step": 70160 }, { "epoch": 461.64473684210526, "grad_norm": 1.4036643505096436, "learning_rate": 0.0001, "loss": 0.0163, "step": 70170 }, { "epoch": 461.7105263157895, "grad_norm": 1.3117611408233643, "learning_rate": 0.0001, "loss": 0.014, "step": 70180 }, { "epoch": 461.7763157894737, "grad_norm": 1.7968146800994873, "learning_rate": 0.0001, "loss": 0.0153, "step": 70190 }, { "epoch": 461.8421052631579, "grad_norm": 1.6296019554138184, "learning_rate": 0.0001, "loss": 0.0145, "step": 70200 }, { "epoch": 461.9078947368421, "grad_norm": 1.9190239906311035, "learning_rate": 0.0001, "loss": 0.0152, "step": 70210 }, { "epoch": 461.9736842105263, "grad_norm": 1.7490113973617554, "learning_rate": 0.0001, "loss": 0.0142, "step": 70220 }, { "epoch": 462.0394736842105, "grad_norm": 1.8932032585144043, "learning_rate": 0.0001, "loss": 0.0155, "step": 70230 }, { "epoch": 462.10526315789474, "grad_norm": 2.041884660720825, "learning_rate": 0.0001, "loss": 0.0183, "step": 70240 }, { "epoch": 462.17105263157896, "grad_norm": 1.6981117725372314, "learning_rate": 0.0001, "loss": 0.0156, "step": 70250 }, { "epoch": 462.2368421052632, "grad_norm": 1.5204780101776123, "learning_rate": 0.0001, "loss": 0.0151, "step": 70260 }, { "epoch": 462.30263157894734, "grad_norm": 1.572152018547058, "learning_rate": 0.0001, "loss": 0.0152, "step": 70270 }, { "epoch": 462.36842105263156, "grad_norm": 1.266072392463684, "learning_rate": 0.0001, "loss": 0.0156, "step": 70280 }, { "epoch": 462.4342105263158, "grad_norm": 1.5004706382751465, "learning_rate": 0.0001, "loss": 0.0178, "step": 70290 }, { "epoch": 462.5, "grad_norm": 1.319403052330017, "learning_rate": 0.0001, "loss": 0.0138, "step": 70300 }, { "epoch": 462.5657894736842, "grad_norm": 1.5849862098693848, "learning_rate": 0.0001, "loss": 0.0142, "step": 70310 }, { "epoch": 462.63157894736844, "grad_norm": 1.354051947593689, "learning_rate": 0.0001, "loss": 0.0161, "step": 70320 }, { "epoch": 462.69736842105266, "grad_norm": 1.4037859439849854, "learning_rate": 0.0001, "loss": 0.0155, "step": 70330 }, { "epoch": 462.7631578947368, "grad_norm": 1.4438475370407104, "learning_rate": 0.0001, "loss": 0.0164, "step": 70340 }, { "epoch": 462.82894736842104, "grad_norm": 1.2555265426635742, "learning_rate": 0.0001, "loss": 0.0162, "step": 70350 }, { "epoch": 462.89473684210526, "grad_norm": 1.3591375350952148, "learning_rate": 0.0001, "loss": 0.0166, "step": 70360 }, { "epoch": 462.9605263157895, "grad_norm": 1.551222562789917, "learning_rate": 0.0001, "loss": 0.0135, "step": 70370 }, { "epoch": 463.0263157894737, "grad_norm": 1.2671229839324951, "learning_rate": 0.0001, "loss": 0.0145, "step": 70380 }, { "epoch": 463.0921052631579, "grad_norm": 1.7678836584091187, "learning_rate": 0.0001, "loss": 0.0148, "step": 70390 }, { "epoch": 463.1578947368421, "grad_norm": 1.8516225814819336, "learning_rate": 0.0001, "loss": 0.0166, "step": 70400 }, { "epoch": 463.2236842105263, "grad_norm": 1.7282814979553223, "learning_rate": 0.0001, "loss": 0.0197, "step": 70410 }, { "epoch": 463.2894736842105, "grad_norm": 1.5039981603622437, "learning_rate": 0.0001, "loss": 0.0132, "step": 70420 }, { "epoch": 463.35526315789474, "grad_norm": 1.068317174911499, "learning_rate": 0.0001, "loss": 0.0123, "step": 70430 }, { "epoch": 463.42105263157896, "grad_norm": 1.733543872833252, "learning_rate": 0.0001, "loss": 0.0145, "step": 70440 }, { "epoch": 463.4868421052632, "grad_norm": 1.938082218170166, "learning_rate": 0.0001, "loss": 0.0159, "step": 70450 }, { "epoch": 463.55263157894734, "grad_norm": 1.7242599725723267, "learning_rate": 0.0001, "loss": 0.0175, "step": 70460 }, { "epoch": 463.61842105263156, "grad_norm": 1.6068545579910278, "learning_rate": 0.0001, "loss": 0.0175, "step": 70470 }, { "epoch": 463.6842105263158, "grad_norm": 1.404812216758728, "learning_rate": 0.0001, "loss": 0.0159, "step": 70480 }, { "epoch": 463.75, "grad_norm": 1.1177066564559937, "learning_rate": 0.0001, "loss": 0.0162, "step": 70490 }, { "epoch": 463.8157894736842, "grad_norm": 1.2271690368652344, "learning_rate": 0.0001, "loss": 0.0112, "step": 70500 }, { "epoch": 463.88157894736844, "grad_norm": 1.7706197500228882, "learning_rate": 0.0001, "loss": 0.0183, "step": 70510 }, { "epoch": 463.94736842105266, "grad_norm": 1.633400797843933, "learning_rate": 0.0001, "loss": 0.0138, "step": 70520 }, { "epoch": 464.0131578947368, "grad_norm": 0.8890622854232788, "learning_rate": 0.0001, "loss": 0.0156, "step": 70530 }, { "epoch": 464.07894736842104, "grad_norm": 1.4249227046966553, "learning_rate": 0.0001, "loss": 0.0173, "step": 70540 }, { "epoch": 464.14473684210526, "grad_norm": 1.3524497747421265, "learning_rate": 0.0001, "loss": 0.0146, "step": 70550 }, { "epoch": 464.2105263157895, "grad_norm": 1.0431554317474365, "learning_rate": 0.0001, "loss": 0.0162, "step": 70560 }, { "epoch": 464.2763157894737, "grad_norm": 1.4330523014068604, "learning_rate": 0.0001, "loss": 0.0181, "step": 70570 }, { "epoch": 464.3421052631579, "grad_norm": 1.5023703575134277, "learning_rate": 0.0001, "loss": 0.0194, "step": 70580 }, { "epoch": 464.4078947368421, "grad_norm": 1.6279706954956055, "learning_rate": 0.0001, "loss": 0.0155, "step": 70590 }, { "epoch": 464.4736842105263, "grad_norm": 1.4923616647720337, "learning_rate": 0.0001, "loss": 0.0175, "step": 70600 }, { "epoch": 464.5394736842105, "grad_norm": 1.269059181213379, "learning_rate": 0.0001, "loss": 0.0176, "step": 70610 }, { "epoch": 464.60526315789474, "grad_norm": 1.3820507526397705, "learning_rate": 0.0001, "loss": 0.0126, "step": 70620 }, { "epoch": 464.67105263157896, "grad_norm": 1.588585376739502, "learning_rate": 0.0001, "loss": 0.0149, "step": 70630 }, { "epoch": 464.7368421052632, "grad_norm": 1.403121829032898, "learning_rate": 0.0001, "loss": 0.0144, "step": 70640 }, { "epoch": 464.80263157894734, "grad_norm": 1.740326166152954, "learning_rate": 0.0001, "loss": 0.0153, "step": 70650 }, { "epoch": 464.86842105263156, "grad_norm": 1.4435845613479614, "learning_rate": 0.0001, "loss": 0.0141, "step": 70660 }, { "epoch": 464.9342105263158, "grad_norm": 2.1043689250946045, "learning_rate": 0.0001, "loss": 0.0138, "step": 70670 }, { "epoch": 465.0, "grad_norm": 1.7511683702468872, "learning_rate": 0.0001, "loss": 0.0134, "step": 70680 }, { "epoch": 465.0657894736842, "grad_norm": 1.4779539108276367, "learning_rate": 0.0001, "loss": 0.0169, "step": 70690 }, { "epoch": 465.13157894736844, "grad_norm": 1.6760350465774536, "learning_rate": 0.0001, "loss": 0.0177, "step": 70700 }, { "epoch": 465.19736842105266, "grad_norm": 1.0465861558914185, "learning_rate": 0.0001, "loss": 0.0155, "step": 70710 }, { "epoch": 465.2631578947368, "grad_norm": 1.4910246133804321, "learning_rate": 0.0001, "loss": 0.0158, "step": 70720 }, { "epoch": 465.32894736842104, "grad_norm": 1.2839100360870361, "learning_rate": 0.0001, "loss": 0.016, "step": 70730 }, { "epoch": 465.39473684210526, "grad_norm": 1.44446861743927, "learning_rate": 0.0001, "loss": 0.0139, "step": 70740 }, { "epoch": 465.4605263157895, "grad_norm": 1.4013389348983765, "learning_rate": 0.0001, "loss": 0.0122, "step": 70750 }, { "epoch": 465.5263157894737, "grad_norm": 1.4317208528518677, "learning_rate": 0.0001, "loss": 0.0169, "step": 70760 }, { "epoch": 465.5921052631579, "grad_norm": 1.5502283573150635, "learning_rate": 0.0001, "loss": 0.0142, "step": 70770 }, { "epoch": 465.6578947368421, "grad_norm": 1.3934698104858398, "learning_rate": 0.0001, "loss": 0.0158, "step": 70780 }, { "epoch": 465.7236842105263, "grad_norm": 1.5977383852005005, "learning_rate": 0.0001, "loss": 0.016, "step": 70790 }, { "epoch": 465.7894736842105, "grad_norm": 1.3795751333236694, "learning_rate": 0.0001, "loss": 0.0225, "step": 70800 }, { "epoch": 465.85526315789474, "grad_norm": 1.3938792943954468, "learning_rate": 0.0001, "loss": 0.0125, "step": 70810 }, { "epoch": 465.92105263157896, "grad_norm": 1.9437862634658813, "learning_rate": 0.0001, "loss": 0.012, "step": 70820 }, { "epoch": 465.9868421052632, "grad_norm": 1.9054239988327026, "learning_rate": 0.0001, "loss": 0.0176, "step": 70830 }, { "epoch": 466.05263157894734, "grad_norm": 1.6017059087753296, "learning_rate": 0.0001, "loss": 0.0131, "step": 70840 }, { "epoch": 466.11842105263156, "grad_norm": 1.5471240282058716, "learning_rate": 0.0001, "loss": 0.0154, "step": 70850 }, { "epoch": 466.1842105263158, "grad_norm": 1.7671669721603394, "learning_rate": 0.0001, "loss": 0.0128, "step": 70860 }, { "epoch": 466.25, "grad_norm": 1.0443124771118164, "learning_rate": 0.0001, "loss": 0.0175, "step": 70870 }, { "epoch": 466.3157894736842, "grad_norm": 1.8501709699630737, "learning_rate": 0.0001, "loss": 0.0136, "step": 70880 }, { "epoch": 466.38157894736844, "grad_norm": 1.6644883155822754, "learning_rate": 0.0001, "loss": 0.0183, "step": 70890 }, { "epoch": 466.44736842105266, "grad_norm": 1.740768551826477, "learning_rate": 0.0001, "loss": 0.0224, "step": 70900 }, { "epoch": 466.5131578947368, "grad_norm": 1.730405569076538, "learning_rate": 0.0001, "loss": 0.0128, "step": 70910 }, { "epoch": 466.57894736842104, "grad_norm": 1.7452621459960938, "learning_rate": 0.0001, "loss": 0.0149, "step": 70920 }, { "epoch": 466.64473684210526, "grad_norm": 1.865479826927185, "learning_rate": 0.0001, "loss": 0.0159, "step": 70930 }, { "epoch": 466.7105263157895, "grad_norm": 1.187947392463684, "learning_rate": 0.0001, "loss": 0.0151, "step": 70940 }, { "epoch": 466.7763157894737, "grad_norm": 1.688268780708313, "learning_rate": 0.0001, "loss": 0.0163, "step": 70950 }, { "epoch": 466.8421052631579, "grad_norm": 2.028414011001587, "learning_rate": 0.0001, "loss": 0.0138, "step": 70960 }, { "epoch": 466.9078947368421, "grad_norm": 1.7195439338684082, "learning_rate": 0.0001, "loss": 0.0131, "step": 70970 }, { "epoch": 466.9736842105263, "grad_norm": 1.3745405673980713, "learning_rate": 0.0001, "loss": 0.0138, "step": 70980 }, { "epoch": 467.0394736842105, "grad_norm": 1.5170868635177612, "learning_rate": 0.0001, "loss": 0.0179, "step": 70990 }, { "epoch": 467.10526315789474, "grad_norm": 2.164524555206299, "learning_rate": 0.0001, "loss": 0.0194, "step": 71000 }, { "epoch": 467.17105263157896, "grad_norm": 1.6909769773483276, "learning_rate": 0.0001, "loss": 0.0145, "step": 71010 }, { "epoch": 467.2368421052632, "grad_norm": 1.3761460781097412, "learning_rate": 0.0001, "loss": 0.0146, "step": 71020 }, { "epoch": 467.30263157894734, "grad_norm": 1.1385833024978638, "learning_rate": 0.0001, "loss": 0.0124, "step": 71030 }, { "epoch": 467.36842105263156, "grad_norm": 1.454524278640747, "learning_rate": 0.0001, "loss": 0.0141, "step": 71040 }, { "epoch": 467.4342105263158, "grad_norm": 1.3806633949279785, "learning_rate": 0.0001, "loss": 0.0165, "step": 71050 }, { "epoch": 467.5, "grad_norm": 1.8850992918014526, "learning_rate": 0.0001, "loss": 0.0151, "step": 71060 }, { "epoch": 467.5657894736842, "grad_norm": 1.8595985174179077, "learning_rate": 0.0001, "loss": 0.0155, "step": 71070 }, { "epoch": 467.63157894736844, "grad_norm": 1.4198901653289795, "learning_rate": 0.0001, "loss": 0.014, "step": 71080 }, { "epoch": 467.69736842105266, "grad_norm": 1.6048731803894043, "learning_rate": 0.0001, "loss": 0.013, "step": 71090 }, { "epoch": 467.7631578947368, "grad_norm": 1.532891035079956, "learning_rate": 0.0001, "loss": 0.0197, "step": 71100 }, { "epoch": 467.82894736842104, "grad_norm": 1.0774173736572266, "learning_rate": 0.0001, "loss": 0.0178, "step": 71110 }, { "epoch": 467.89473684210526, "grad_norm": 1.2318171262741089, "learning_rate": 0.0001, "loss": 0.0146, "step": 71120 }, { "epoch": 467.9605263157895, "grad_norm": 1.4100641012191772, "learning_rate": 0.0001, "loss": 0.0141, "step": 71130 }, { "epoch": 468.0263157894737, "grad_norm": 1.2289830446243286, "learning_rate": 0.0001, "loss": 0.0145, "step": 71140 }, { "epoch": 468.0921052631579, "grad_norm": 1.5142031908035278, "learning_rate": 0.0001, "loss": 0.016, "step": 71150 }, { "epoch": 468.1578947368421, "grad_norm": 1.4365257024765015, "learning_rate": 0.0001, "loss": 0.0179, "step": 71160 }, { "epoch": 468.2236842105263, "grad_norm": 1.6656630039215088, "learning_rate": 0.0001, "loss": 0.0161, "step": 71170 }, { "epoch": 468.2894736842105, "grad_norm": 1.7788236141204834, "learning_rate": 0.0001, "loss": 0.0157, "step": 71180 }, { "epoch": 468.35526315789474, "grad_norm": 1.8691142797470093, "learning_rate": 0.0001, "loss": 0.0189, "step": 71190 }, { "epoch": 468.42105263157896, "grad_norm": 1.6484986543655396, "learning_rate": 0.0001, "loss": 0.0162, "step": 71200 }, { "epoch": 468.4868421052632, "grad_norm": 1.7149133682250977, "learning_rate": 0.0001, "loss": 0.0145, "step": 71210 }, { "epoch": 468.55263157894734, "grad_norm": 1.6760655641555786, "learning_rate": 0.0001, "loss": 0.0149, "step": 71220 }, { "epoch": 468.61842105263156, "grad_norm": 1.2820948362350464, "learning_rate": 0.0001, "loss": 0.0145, "step": 71230 }, { "epoch": 468.6842105263158, "grad_norm": 1.2294707298278809, "learning_rate": 0.0001, "loss": 0.0195, "step": 71240 }, { "epoch": 468.75, "grad_norm": 1.2735177278518677, "learning_rate": 0.0001, "loss": 0.0135, "step": 71250 }, { "epoch": 468.8157894736842, "grad_norm": 1.370228886604309, "learning_rate": 0.0001, "loss": 0.0145, "step": 71260 }, { "epoch": 468.88157894736844, "grad_norm": 1.4931796789169312, "learning_rate": 0.0001, "loss": 0.02, "step": 71270 }, { "epoch": 468.94736842105266, "grad_norm": 1.314003586769104, "learning_rate": 0.0001, "loss": 0.0128, "step": 71280 }, { "epoch": 469.0131578947368, "grad_norm": 1.414503812789917, "learning_rate": 0.0001, "loss": 0.0143, "step": 71290 }, { "epoch": 469.07894736842104, "grad_norm": 1.697017788887024, "learning_rate": 0.0001, "loss": 0.0125, "step": 71300 }, { "epoch": 469.14473684210526, "grad_norm": 1.5583628416061401, "learning_rate": 0.0001, "loss": 0.0158, "step": 71310 }, { "epoch": 469.2105263157895, "grad_norm": 1.448448896408081, "learning_rate": 0.0001, "loss": 0.0133, "step": 71320 }, { "epoch": 469.2763157894737, "grad_norm": 1.5050476789474487, "learning_rate": 0.0001, "loss": 0.0155, "step": 71330 }, { "epoch": 469.3421052631579, "grad_norm": 1.8165885210037231, "learning_rate": 0.0001, "loss": 0.0155, "step": 71340 }, { "epoch": 469.4078947368421, "grad_norm": 1.5809986591339111, "learning_rate": 0.0001, "loss": 0.0136, "step": 71350 }, { "epoch": 469.4736842105263, "grad_norm": 1.3418208360671997, "learning_rate": 0.0001, "loss": 0.0157, "step": 71360 }, { "epoch": 469.5394736842105, "grad_norm": 1.3505558967590332, "learning_rate": 0.0001, "loss": 0.0161, "step": 71370 }, { "epoch": 469.60526315789474, "grad_norm": 1.643871545791626, "learning_rate": 0.0001, "loss": 0.0171, "step": 71380 }, { "epoch": 469.67105263157896, "grad_norm": 1.3109872341156006, "learning_rate": 0.0001, "loss": 0.0183, "step": 71390 }, { "epoch": 469.7368421052632, "grad_norm": 1.6277658939361572, "learning_rate": 0.0001, "loss": 0.0132, "step": 71400 }, { "epoch": 469.80263157894734, "grad_norm": 1.4948947429656982, "learning_rate": 0.0001, "loss": 0.0133, "step": 71410 }, { "epoch": 469.86842105263156, "grad_norm": 1.741771936416626, "learning_rate": 0.0001, "loss": 0.017, "step": 71420 }, { "epoch": 469.9342105263158, "grad_norm": 1.3590710163116455, "learning_rate": 0.0001, "loss": 0.0193, "step": 71430 }, { "epoch": 470.0, "grad_norm": 1.5735094547271729, "learning_rate": 0.0001, "loss": 0.0191, "step": 71440 }, { "epoch": 470.0657894736842, "grad_norm": 1.6384754180908203, "learning_rate": 0.0001, "loss": 0.019, "step": 71450 }, { "epoch": 470.13157894736844, "grad_norm": 1.5427271127700806, "learning_rate": 0.0001, "loss": 0.0158, "step": 71460 }, { "epoch": 470.19736842105266, "grad_norm": 1.3202970027923584, "learning_rate": 0.0001, "loss": 0.0179, "step": 71470 }, { "epoch": 470.2631578947368, "grad_norm": 1.4436639547348022, "learning_rate": 0.0001, "loss": 0.0153, "step": 71480 }, { "epoch": 470.32894736842104, "grad_norm": 1.6033507585525513, "learning_rate": 0.0001, "loss": 0.0125, "step": 71490 }, { "epoch": 470.39473684210526, "grad_norm": 1.5431034564971924, "learning_rate": 0.0001, "loss": 0.0128, "step": 71500 }, { "epoch": 470.4605263157895, "grad_norm": 1.5073713064193726, "learning_rate": 0.0001, "loss": 0.014, "step": 71510 }, { "epoch": 470.5263157894737, "grad_norm": 1.0566364526748657, "learning_rate": 0.0001, "loss": 0.0161, "step": 71520 }, { "epoch": 470.5921052631579, "grad_norm": 1.2565081119537354, "learning_rate": 0.0001, "loss": 0.0138, "step": 71530 }, { "epoch": 470.6578947368421, "grad_norm": 1.2019505500793457, "learning_rate": 0.0001, "loss": 0.0169, "step": 71540 }, { "epoch": 470.7236842105263, "grad_norm": 1.453676462173462, "learning_rate": 0.0001, "loss": 0.0146, "step": 71550 }, { "epoch": 470.7894736842105, "grad_norm": 1.2191977500915527, "learning_rate": 0.0001, "loss": 0.0155, "step": 71560 }, { "epoch": 470.85526315789474, "grad_norm": 1.5858066082000732, "learning_rate": 0.0001, "loss": 0.0137, "step": 71570 }, { "epoch": 470.92105263157896, "grad_norm": 1.281145453453064, "learning_rate": 0.0001, "loss": 0.0143, "step": 71580 }, { "epoch": 470.9868421052632, "grad_norm": 0.9527329206466675, "learning_rate": 0.0001, "loss": 0.017, "step": 71590 }, { "epoch": 471.05263157894734, "grad_norm": 1.3117713928222656, "learning_rate": 0.0001, "loss": 0.0187, "step": 71600 }, { "epoch": 471.11842105263156, "grad_norm": 1.706547737121582, "learning_rate": 0.0001, "loss": 0.0175, "step": 71610 }, { "epoch": 471.1842105263158, "grad_norm": 1.8807752132415771, "learning_rate": 0.0001, "loss": 0.0177, "step": 71620 }, { "epoch": 471.25, "grad_norm": 1.5237853527069092, "learning_rate": 0.0001, "loss": 0.0159, "step": 71630 }, { "epoch": 471.3157894736842, "grad_norm": 1.7118357419967651, "learning_rate": 0.0001, "loss": 0.0129, "step": 71640 }, { "epoch": 471.38157894736844, "grad_norm": 1.8359836339950562, "learning_rate": 0.0001, "loss": 0.0153, "step": 71650 }, { "epoch": 471.44736842105266, "grad_norm": 1.5200464725494385, "learning_rate": 0.0001, "loss": 0.0127, "step": 71660 }, { "epoch": 471.5131578947368, "grad_norm": 1.002292513847351, "learning_rate": 0.0001, "loss": 0.0149, "step": 71670 }, { "epoch": 471.57894736842104, "grad_norm": 1.5470978021621704, "learning_rate": 0.0001, "loss": 0.0144, "step": 71680 }, { "epoch": 471.64473684210526, "grad_norm": 1.2576056718826294, "learning_rate": 0.0001, "loss": 0.0175, "step": 71690 }, { "epoch": 471.7105263157895, "grad_norm": 1.6599750518798828, "learning_rate": 0.0001, "loss": 0.0118, "step": 71700 }, { "epoch": 471.7763157894737, "grad_norm": 1.5976037979125977, "learning_rate": 0.0001, "loss": 0.0143, "step": 71710 }, { "epoch": 471.8421052631579, "grad_norm": 1.902833342552185, "learning_rate": 0.0001, "loss": 0.021, "step": 71720 }, { "epoch": 471.9078947368421, "grad_norm": 1.739035964012146, "learning_rate": 0.0001, "loss": 0.0172, "step": 71730 }, { "epoch": 471.9736842105263, "grad_norm": 1.2280492782592773, "learning_rate": 0.0001, "loss": 0.0142, "step": 71740 }, { "epoch": 472.0394736842105, "grad_norm": 1.434783697128296, "learning_rate": 0.0001, "loss": 0.0134, "step": 71750 }, { "epoch": 472.10526315789474, "grad_norm": 1.740842580795288, "learning_rate": 0.0001, "loss": 0.013, "step": 71760 }, { "epoch": 472.17105263157896, "grad_norm": 1.7777490615844727, "learning_rate": 0.0001, "loss": 0.016, "step": 71770 }, { "epoch": 472.2368421052632, "grad_norm": 1.3075305223464966, "learning_rate": 0.0001, "loss": 0.0163, "step": 71780 }, { "epoch": 472.30263157894734, "grad_norm": 1.2506290674209595, "learning_rate": 0.0001, "loss": 0.0197, "step": 71790 }, { "epoch": 472.36842105263156, "grad_norm": 1.4435471296310425, "learning_rate": 0.0001, "loss": 0.0137, "step": 71800 }, { "epoch": 472.4342105263158, "grad_norm": 1.5599722862243652, "learning_rate": 0.0001, "loss": 0.016, "step": 71810 }, { "epoch": 472.5, "grad_norm": 1.6186202764511108, "learning_rate": 0.0001, "loss": 0.0135, "step": 71820 }, { "epoch": 472.5657894736842, "grad_norm": 1.5431476831436157, "learning_rate": 0.0001, "loss": 0.0151, "step": 71830 }, { "epoch": 472.63157894736844, "grad_norm": 1.4118298292160034, "learning_rate": 0.0001, "loss": 0.0147, "step": 71840 }, { "epoch": 472.69736842105266, "grad_norm": 1.4307887554168701, "learning_rate": 0.0001, "loss": 0.0146, "step": 71850 }, { "epoch": 472.7631578947368, "grad_norm": 1.421120524406433, "learning_rate": 0.0001, "loss": 0.0154, "step": 71860 }, { "epoch": 472.82894736842104, "grad_norm": 1.8035629987716675, "learning_rate": 0.0001, "loss": 0.0146, "step": 71870 }, { "epoch": 472.89473684210526, "grad_norm": 1.819551944732666, "learning_rate": 0.0001, "loss": 0.017, "step": 71880 }, { "epoch": 472.9605263157895, "grad_norm": 1.2584269046783447, "learning_rate": 0.0001, "loss": 0.0185, "step": 71890 }, { "epoch": 473.0263157894737, "grad_norm": 1.374531626701355, "learning_rate": 0.0001, "loss": 0.0176, "step": 71900 }, { "epoch": 473.0921052631579, "grad_norm": 1.364597201347351, "learning_rate": 0.0001, "loss": 0.0154, "step": 71910 }, { "epoch": 473.1578947368421, "grad_norm": 1.917728066444397, "learning_rate": 0.0001, "loss": 0.0159, "step": 71920 }, { "epoch": 473.2236842105263, "grad_norm": 1.3913052082061768, "learning_rate": 0.0001, "loss": 0.015, "step": 71930 }, { "epoch": 473.2894736842105, "grad_norm": 1.1730279922485352, "learning_rate": 0.0001, "loss": 0.015, "step": 71940 }, { "epoch": 473.35526315789474, "grad_norm": 1.4338197708129883, "learning_rate": 0.0001, "loss": 0.0158, "step": 71950 }, { "epoch": 473.42105263157896, "grad_norm": 1.4859975576400757, "learning_rate": 0.0001, "loss": 0.0151, "step": 71960 }, { "epoch": 473.4868421052632, "grad_norm": 1.8870073556900024, "learning_rate": 0.0001, "loss": 0.0138, "step": 71970 }, { "epoch": 473.55263157894734, "grad_norm": 1.611767292022705, "learning_rate": 0.0001, "loss": 0.0161, "step": 71980 }, { "epoch": 473.61842105263156, "grad_norm": 1.1482802629470825, "learning_rate": 0.0001, "loss": 0.0149, "step": 71990 }, { "epoch": 473.6842105263158, "grad_norm": 1.5529899597167969, "learning_rate": 0.0001, "loss": 0.0135, "step": 72000 }, { "epoch": 473.75, "grad_norm": 1.8564552068710327, "learning_rate": 0.0001, "loss": 0.0172, "step": 72010 }, { "epoch": 473.8157894736842, "grad_norm": 1.599692463874817, "learning_rate": 0.0001, "loss": 0.0189, "step": 72020 }, { "epoch": 473.88157894736844, "grad_norm": 1.3397053480148315, "learning_rate": 0.0001, "loss": 0.0149, "step": 72030 }, { "epoch": 473.94736842105266, "grad_norm": 1.2365679740905762, "learning_rate": 0.0001, "loss": 0.0191, "step": 72040 }, { "epoch": 474.0131578947368, "grad_norm": 1.364189624786377, "learning_rate": 0.0001, "loss": 0.0149, "step": 72050 }, { "epoch": 474.07894736842104, "grad_norm": 1.4554461240768433, "learning_rate": 0.0001, "loss": 0.0165, "step": 72060 }, { "epoch": 474.14473684210526, "grad_norm": 1.8293308019638062, "learning_rate": 0.0001, "loss": 0.0135, "step": 72070 }, { "epoch": 474.2105263157895, "grad_norm": 1.4347549676895142, "learning_rate": 0.0001, "loss": 0.0157, "step": 72080 }, { "epoch": 474.2763157894737, "grad_norm": 1.4232851266860962, "learning_rate": 0.0001, "loss": 0.0149, "step": 72090 }, { "epoch": 474.3421052631579, "grad_norm": 1.5057817697525024, "learning_rate": 0.0001, "loss": 0.0121, "step": 72100 }, { "epoch": 474.4078947368421, "grad_norm": 1.6094924211502075, "learning_rate": 0.0001, "loss": 0.0154, "step": 72110 }, { "epoch": 474.4736842105263, "grad_norm": 1.7998003959655762, "learning_rate": 0.0001, "loss": 0.018, "step": 72120 }, { "epoch": 474.5394736842105, "grad_norm": 1.8872114419937134, "learning_rate": 0.0001, "loss": 0.0136, "step": 72130 }, { "epoch": 474.60526315789474, "grad_norm": 1.7537007331848145, "learning_rate": 0.0001, "loss": 0.015, "step": 72140 }, { "epoch": 474.67105263157896, "grad_norm": 1.5925770998001099, "learning_rate": 0.0001, "loss": 0.0128, "step": 72150 }, { "epoch": 474.7368421052632, "grad_norm": 1.769515037536621, "learning_rate": 0.0001, "loss": 0.0176, "step": 72160 }, { "epoch": 474.80263157894734, "grad_norm": 1.7562705278396606, "learning_rate": 0.0001, "loss": 0.02, "step": 72170 }, { "epoch": 474.86842105263156, "grad_norm": 1.7212797403335571, "learning_rate": 0.0001, "loss": 0.017, "step": 72180 }, { "epoch": 474.9342105263158, "grad_norm": 1.6153982877731323, "learning_rate": 0.0001, "loss": 0.0154, "step": 72190 }, { "epoch": 475.0, "grad_norm": 1.648604154586792, "learning_rate": 0.0001, "loss": 0.0154, "step": 72200 }, { "epoch": 475.0657894736842, "grad_norm": 1.6120530366897583, "learning_rate": 0.0001, "loss": 0.016, "step": 72210 }, { "epoch": 475.13157894736844, "grad_norm": 2.152064800262451, "learning_rate": 0.0001, "loss": 0.0156, "step": 72220 }, { "epoch": 475.19736842105266, "grad_norm": 2.0019941329956055, "learning_rate": 0.0001, "loss": 0.0145, "step": 72230 }, { "epoch": 475.2631578947368, "grad_norm": 1.7622014284133911, "learning_rate": 0.0001, "loss": 0.019, "step": 72240 }, { "epoch": 475.32894736842104, "grad_norm": 1.5692740678787231, "learning_rate": 0.0001, "loss": 0.0172, "step": 72250 }, { "epoch": 475.39473684210526, "grad_norm": 1.982250690460205, "learning_rate": 0.0001, "loss": 0.0131, "step": 72260 }, { "epoch": 475.4605263157895, "grad_norm": 1.72684645652771, "learning_rate": 0.0001, "loss": 0.0155, "step": 72270 }, { "epoch": 475.5263157894737, "grad_norm": 1.4640594720840454, "learning_rate": 0.0001, "loss": 0.017, "step": 72280 }, { "epoch": 475.5921052631579, "grad_norm": 1.762850046157837, "learning_rate": 0.0001, "loss": 0.0127, "step": 72290 }, { "epoch": 475.6578947368421, "grad_norm": 1.4820760488510132, "learning_rate": 0.0001, "loss": 0.0119, "step": 72300 }, { "epoch": 475.7236842105263, "grad_norm": 1.2893441915512085, "learning_rate": 0.0001, "loss": 0.0197, "step": 72310 }, { "epoch": 475.7894736842105, "grad_norm": 0.8915643095970154, "learning_rate": 0.0001, "loss": 0.0181, "step": 72320 }, { "epoch": 475.85526315789474, "grad_norm": 1.7238891124725342, "learning_rate": 0.0001, "loss": 0.0116, "step": 72330 }, { "epoch": 475.92105263157896, "grad_norm": 1.6755975484848022, "learning_rate": 0.0001, "loss": 0.0164, "step": 72340 }, { "epoch": 475.9868421052632, "grad_norm": 1.59906804561615, "learning_rate": 0.0001, "loss": 0.0137, "step": 72350 }, { "epoch": 476.05263157894734, "grad_norm": 1.586000919342041, "learning_rate": 0.0001, "loss": 0.0163, "step": 72360 }, { "epoch": 476.11842105263156, "grad_norm": 1.713797688484192, "learning_rate": 0.0001, "loss": 0.0141, "step": 72370 }, { "epoch": 476.1842105263158, "grad_norm": 1.498211145401001, "learning_rate": 0.0001, "loss": 0.0134, "step": 72380 }, { "epoch": 476.25, "grad_norm": 1.6731606721878052, "learning_rate": 0.0001, "loss": 0.016, "step": 72390 }, { "epoch": 476.3157894736842, "grad_norm": 1.6435304880142212, "learning_rate": 0.0001, "loss": 0.0133, "step": 72400 }, { "epoch": 476.38157894736844, "grad_norm": 1.753783106803894, "learning_rate": 0.0001, "loss": 0.0152, "step": 72410 }, { "epoch": 476.44736842105266, "grad_norm": 1.5169423818588257, "learning_rate": 0.0001, "loss": 0.0149, "step": 72420 }, { "epoch": 476.5131578947368, "grad_norm": 1.420357584953308, "learning_rate": 0.0001, "loss": 0.015, "step": 72430 }, { "epoch": 476.57894736842104, "grad_norm": 1.2480303049087524, "learning_rate": 0.0001, "loss": 0.0146, "step": 72440 }, { "epoch": 476.64473684210526, "grad_norm": 1.2801772356033325, "learning_rate": 0.0001, "loss": 0.021, "step": 72450 }, { "epoch": 476.7105263157895, "grad_norm": 1.8527448177337646, "learning_rate": 0.0001, "loss": 0.0143, "step": 72460 }, { "epoch": 476.7763157894737, "grad_norm": 1.598344326019287, "learning_rate": 0.0001, "loss": 0.0121, "step": 72470 }, { "epoch": 476.8421052631579, "grad_norm": 0.9705625772476196, "learning_rate": 0.0001, "loss": 0.0175, "step": 72480 }, { "epoch": 476.9078947368421, "grad_norm": 1.6487805843353271, "learning_rate": 0.0001, "loss": 0.0188, "step": 72490 }, { "epoch": 476.9736842105263, "grad_norm": 1.3463419675827026, "learning_rate": 0.0001, "loss": 0.0151, "step": 72500 }, { "epoch": 477.0394736842105, "grad_norm": 1.5116993188858032, "learning_rate": 0.0001, "loss": 0.0139, "step": 72510 }, { "epoch": 477.10526315789474, "grad_norm": 1.2813403606414795, "learning_rate": 0.0001, "loss": 0.0155, "step": 72520 }, { "epoch": 477.17105263157896, "grad_norm": 1.3496750593185425, "learning_rate": 0.0001, "loss": 0.0154, "step": 72530 }, { "epoch": 477.2368421052632, "grad_norm": 1.3964858055114746, "learning_rate": 0.0001, "loss": 0.0128, "step": 72540 }, { "epoch": 477.30263157894734, "grad_norm": 1.7896513938903809, "learning_rate": 0.0001, "loss": 0.0147, "step": 72550 }, { "epoch": 477.36842105263156, "grad_norm": 2.0585856437683105, "learning_rate": 0.0001, "loss": 0.015, "step": 72560 }, { "epoch": 477.4342105263158, "grad_norm": 1.9837523698806763, "learning_rate": 0.0001, "loss": 0.0169, "step": 72570 }, { "epoch": 477.5, "grad_norm": 1.705234408378601, "learning_rate": 0.0001, "loss": 0.0137, "step": 72580 }, { "epoch": 477.5657894736842, "grad_norm": 1.795784831047058, "learning_rate": 0.0001, "loss": 0.017, "step": 72590 }, { "epoch": 477.63157894736844, "grad_norm": 1.452804446220398, "learning_rate": 0.0001, "loss": 0.0144, "step": 72600 }, { "epoch": 477.69736842105266, "grad_norm": 1.7960149049758911, "learning_rate": 0.0001, "loss": 0.0192, "step": 72610 }, { "epoch": 477.7631578947368, "grad_norm": 1.5105633735656738, "learning_rate": 0.0001, "loss": 0.0158, "step": 72620 }, { "epoch": 477.82894736842104, "grad_norm": 1.6311664581298828, "learning_rate": 0.0001, "loss": 0.0119, "step": 72630 }, { "epoch": 477.89473684210526, "grad_norm": 1.8633275032043457, "learning_rate": 0.0001, "loss": 0.0182, "step": 72640 }, { "epoch": 477.9605263157895, "grad_norm": 1.364715576171875, "learning_rate": 0.0001, "loss": 0.0179, "step": 72650 }, { "epoch": 478.0263157894737, "grad_norm": 1.4199533462524414, "learning_rate": 0.0001, "loss": 0.0154, "step": 72660 }, { "epoch": 478.0921052631579, "grad_norm": 1.3692951202392578, "learning_rate": 0.0001, "loss": 0.0156, "step": 72670 }, { "epoch": 478.1578947368421, "grad_norm": 1.4855948686599731, "learning_rate": 0.0001, "loss": 0.0133, "step": 72680 }, { "epoch": 478.2236842105263, "grad_norm": 1.533079981803894, "learning_rate": 0.0001, "loss": 0.0168, "step": 72690 }, { "epoch": 478.2894736842105, "grad_norm": 1.191662311553955, "learning_rate": 0.0001, "loss": 0.0186, "step": 72700 }, { "epoch": 478.35526315789474, "grad_norm": 1.2179467678070068, "learning_rate": 0.0001, "loss": 0.016, "step": 72710 }, { "epoch": 478.42105263157896, "grad_norm": 1.9005252122879028, "learning_rate": 0.0001, "loss": 0.0181, "step": 72720 }, { "epoch": 478.4868421052632, "grad_norm": 1.7308893203735352, "learning_rate": 0.0001, "loss": 0.0164, "step": 72730 }, { "epoch": 478.55263157894734, "grad_norm": 1.8332786560058594, "learning_rate": 0.0001, "loss": 0.0132, "step": 72740 }, { "epoch": 478.61842105263156, "grad_norm": 1.3487495183944702, "learning_rate": 0.0001, "loss": 0.0144, "step": 72750 }, { "epoch": 478.6842105263158, "grad_norm": 1.6505239009857178, "learning_rate": 0.0001, "loss": 0.016, "step": 72760 }, { "epoch": 478.75, "grad_norm": 1.148978590965271, "learning_rate": 0.0001, "loss": 0.0173, "step": 72770 }, { "epoch": 478.8157894736842, "grad_norm": 1.1190227270126343, "learning_rate": 0.0001, "loss": 0.0157, "step": 72780 }, { "epoch": 478.88157894736844, "grad_norm": 1.6515069007873535, "learning_rate": 0.0001, "loss": 0.0165, "step": 72790 }, { "epoch": 478.94736842105266, "grad_norm": 1.6388260126113892, "learning_rate": 0.0001, "loss": 0.0117, "step": 72800 }, { "epoch": 479.0131578947368, "grad_norm": 1.8408125638961792, "learning_rate": 0.0001, "loss": 0.0162, "step": 72810 }, { "epoch": 479.07894736842104, "grad_norm": 1.2837852239608765, "learning_rate": 0.0001, "loss": 0.0128, "step": 72820 }, { "epoch": 479.14473684210526, "grad_norm": 1.7073440551757812, "learning_rate": 0.0001, "loss": 0.0131, "step": 72830 }, { "epoch": 479.2105263157895, "grad_norm": 1.3732340335845947, "learning_rate": 0.0001, "loss": 0.014, "step": 72840 }, { "epoch": 479.2763157894737, "grad_norm": 1.466614007949829, "learning_rate": 0.0001, "loss": 0.0185, "step": 72850 }, { "epoch": 479.3421052631579, "grad_norm": 1.3512035608291626, "learning_rate": 0.0001, "loss": 0.0151, "step": 72860 }, { "epoch": 479.4078947368421, "grad_norm": 1.2232993841171265, "learning_rate": 0.0001, "loss": 0.0194, "step": 72870 }, { "epoch": 479.4736842105263, "grad_norm": 1.1553601026535034, "learning_rate": 0.0001, "loss": 0.0151, "step": 72880 }, { "epoch": 479.5394736842105, "grad_norm": 1.198777675628662, "learning_rate": 0.0001, "loss": 0.0154, "step": 72890 }, { "epoch": 479.60526315789474, "grad_norm": 1.5431034564971924, "learning_rate": 0.0001, "loss": 0.0137, "step": 72900 }, { "epoch": 479.67105263157896, "grad_norm": 1.4319827556610107, "learning_rate": 0.0001, "loss": 0.017, "step": 72910 }, { "epoch": 479.7368421052632, "grad_norm": 1.3184876441955566, "learning_rate": 0.0001, "loss": 0.0149, "step": 72920 }, { "epoch": 479.80263157894734, "grad_norm": 1.6336610317230225, "learning_rate": 0.0001, "loss": 0.0201, "step": 72930 }, { "epoch": 479.86842105263156, "grad_norm": 1.6287697553634644, "learning_rate": 0.0001, "loss": 0.017, "step": 72940 }, { "epoch": 479.9342105263158, "grad_norm": 1.4460896253585815, "learning_rate": 0.0001, "loss": 0.0119, "step": 72950 }, { "epoch": 480.0, "grad_norm": 1.6723976135253906, "learning_rate": 0.0001, "loss": 0.0139, "step": 72960 }, { "epoch": 480.0657894736842, "grad_norm": 1.6785616874694824, "learning_rate": 0.0001, "loss": 0.0141, "step": 72970 }, { "epoch": 480.13157894736844, "grad_norm": 1.366433024406433, "learning_rate": 0.0001, "loss": 0.014, "step": 72980 }, { "epoch": 480.19736842105266, "grad_norm": 1.5112102031707764, "learning_rate": 0.0001, "loss": 0.0157, "step": 72990 }, { "epoch": 480.2631578947368, "grad_norm": 1.2236528396606445, "learning_rate": 0.0001, "loss": 0.0171, "step": 73000 }, { "epoch": 480.32894736842104, "grad_norm": 1.4023321866989136, "learning_rate": 0.0001, "loss": 0.0189, "step": 73010 }, { "epoch": 480.39473684210526, "grad_norm": 1.421924114227295, "learning_rate": 0.0001, "loss": 0.0148, "step": 73020 }, { "epoch": 480.4605263157895, "grad_norm": 1.5252985954284668, "learning_rate": 0.0001, "loss": 0.017, "step": 73030 }, { "epoch": 480.5263157894737, "grad_norm": 1.310792088508606, "learning_rate": 0.0001, "loss": 0.015, "step": 73040 }, { "epoch": 480.5921052631579, "grad_norm": 1.8150792121887207, "learning_rate": 0.0001, "loss": 0.0188, "step": 73050 }, { "epoch": 480.6578947368421, "grad_norm": 1.5174299478530884, "learning_rate": 0.0001, "loss": 0.0176, "step": 73060 }, { "epoch": 480.7236842105263, "grad_norm": 1.1313353776931763, "learning_rate": 0.0001, "loss": 0.015, "step": 73070 }, { "epoch": 480.7894736842105, "grad_norm": 1.755873441696167, "learning_rate": 0.0001, "loss": 0.0134, "step": 73080 }, { "epoch": 480.85526315789474, "grad_norm": 1.6593451499938965, "learning_rate": 0.0001, "loss": 0.0164, "step": 73090 }, { "epoch": 480.92105263157896, "grad_norm": 1.6174206733703613, "learning_rate": 0.0001, "loss": 0.0204, "step": 73100 }, { "epoch": 480.9868421052632, "grad_norm": 1.1830601692199707, "learning_rate": 0.0001, "loss": 0.0153, "step": 73110 }, { "epoch": 481.05263157894734, "grad_norm": 1.277011752128601, "learning_rate": 0.0001, "loss": 0.0143, "step": 73120 }, { "epoch": 481.11842105263156, "grad_norm": 1.8538713455200195, "learning_rate": 0.0001, "loss": 0.0151, "step": 73130 }, { "epoch": 481.1842105263158, "grad_norm": 1.4536818265914917, "learning_rate": 0.0001, "loss": 0.0181, "step": 73140 }, { "epoch": 481.25, "grad_norm": 1.793221354484558, "learning_rate": 0.0001, "loss": 0.0144, "step": 73150 }, { "epoch": 481.3157894736842, "grad_norm": 1.2435450553894043, "learning_rate": 0.0001, "loss": 0.0166, "step": 73160 }, { "epoch": 481.38157894736844, "grad_norm": 1.6364669799804688, "learning_rate": 0.0001, "loss": 0.0162, "step": 73170 }, { "epoch": 481.44736842105266, "grad_norm": 1.262495756149292, "learning_rate": 0.0001, "loss": 0.0162, "step": 73180 }, { "epoch": 481.5131578947368, "grad_norm": 1.8581310510635376, "learning_rate": 0.0001, "loss": 0.0178, "step": 73190 }, { "epoch": 481.57894736842104, "grad_norm": 1.57065749168396, "learning_rate": 0.0001, "loss": 0.0155, "step": 73200 }, { "epoch": 481.64473684210526, "grad_norm": 1.3801536560058594, "learning_rate": 0.0001, "loss": 0.0147, "step": 73210 }, { "epoch": 481.7105263157895, "grad_norm": 1.5393670797348022, "learning_rate": 0.0001, "loss": 0.0162, "step": 73220 }, { "epoch": 481.7763157894737, "grad_norm": 1.504785180091858, "learning_rate": 0.0001, "loss": 0.017, "step": 73230 }, { "epoch": 481.8421052631579, "grad_norm": 1.5978022813796997, "learning_rate": 0.0001, "loss": 0.0146, "step": 73240 }, { "epoch": 481.9078947368421, "grad_norm": 1.4341341257095337, "learning_rate": 0.0001, "loss": 0.0201, "step": 73250 }, { "epoch": 481.9736842105263, "grad_norm": 1.20071280002594, "learning_rate": 0.0001, "loss": 0.0143, "step": 73260 }, { "epoch": 482.0394736842105, "grad_norm": 1.3888726234436035, "learning_rate": 0.0001, "loss": 0.0123, "step": 73270 }, { "epoch": 482.10526315789474, "grad_norm": 1.9207711219787598, "learning_rate": 0.0001, "loss": 0.0152, "step": 73280 }, { "epoch": 482.17105263157896, "grad_norm": 1.7440704107284546, "learning_rate": 0.0001, "loss": 0.0158, "step": 73290 }, { "epoch": 482.2368421052632, "grad_norm": 1.8095762729644775, "learning_rate": 0.0001, "loss": 0.0148, "step": 73300 }, { "epoch": 482.30263157894734, "grad_norm": 1.6204136610031128, "learning_rate": 0.0001, "loss": 0.0152, "step": 73310 }, { "epoch": 482.36842105263156, "grad_norm": 1.8396586179733276, "learning_rate": 0.0001, "loss": 0.0153, "step": 73320 }, { "epoch": 482.4342105263158, "grad_norm": 1.6253526210784912, "learning_rate": 0.0001, "loss": 0.0144, "step": 73330 }, { "epoch": 482.5, "grad_norm": 1.2977607250213623, "learning_rate": 0.0001, "loss": 0.0167, "step": 73340 }, { "epoch": 482.5657894736842, "grad_norm": 1.4482314586639404, "learning_rate": 0.0001, "loss": 0.0136, "step": 73350 }, { "epoch": 482.63157894736844, "grad_norm": 1.5853208303451538, "learning_rate": 0.0001, "loss": 0.0217, "step": 73360 }, { "epoch": 482.69736842105266, "grad_norm": 1.4333887100219727, "learning_rate": 0.0001, "loss": 0.0154, "step": 73370 }, { "epoch": 482.7631578947368, "grad_norm": 1.3682780265808105, "learning_rate": 0.0001, "loss": 0.016, "step": 73380 }, { "epoch": 482.82894736842104, "grad_norm": 1.8372102975845337, "learning_rate": 0.0001, "loss": 0.0164, "step": 73390 }, { "epoch": 482.89473684210526, "grad_norm": 1.5939040184020996, "learning_rate": 0.0001, "loss": 0.0136, "step": 73400 }, { "epoch": 482.9605263157895, "grad_norm": 1.7103286981582642, "learning_rate": 0.0001, "loss": 0.0156, "step": 73410 }, { "epoch": 483.0263157894737, "grad_norm": 1.6189241409301758, "learning_rate": 0.0001, "loss": 0.0156, "step": 73420 }, { "epoch": 483.0921052631579, "grad_norm": 2.1173691749572754, "learning_rate": 0.0001, "loss": 0.0119, "step": 73430 }, { "epoch": 483.1578947368421, "grad_norm": 1.9115917682647705, "learning_rate": 0.0001, "loss": 0.0183, "step": 73440 }, { "epoch": 483.2236842105263, "grad_norm": 1.5806829929351807, "learning_rate": 0.0001, "loss": 0.0178, "step": 73450 }, { "epoch": 483.2894736842105, "grad_norm": 1.8070560693740845, "learning_rate": 0.0001, "loss": 0.0126, "step": 73460 }, { "epoch": 483.35526315789474, "grad_norm": 1.395277738571167, "learning_rate": 0.0001, "loss": 0.0141, "step": 73470 }, { "epoch": 483.42105263157896, "grad_norm": 1.5196993350982666, "learning_rate": 0.0001, "loss": 0.0165, "step": 73480 }, { "epoch": 483.4868421052632, "grad_norm": 1.5830446481704712, "learning_rate": 0.0001, "loss": 0.0155, "step": 73490 }, { "epoch": 483.55263157894734, "grad_norm": 1.784831166267395, "learning_rate": 0.0001, "loss": 0.0119, "step": 73500 }, { "epoch": 483.61842105263156, "grad_norm": 1.6125684976577759, "learning_rate": 0.0001, "loss": 0.0151, "step": 73510 }, { "epoch": 483.6842105263158, "grad_norm": 1.2988429069519043, "learning_rate": 0.0001, "loss": 0.0169, "step": 73520 }, { "epoch": 483.75, "grad_norm": 1.5137778520584106, "learning_rate": 0.0001, "loss": 0.0168, "step": 73530 }, { "epoch": 483.8157894736842, "grad_norm": 1.7422313690185547, "learning_rate": 0.0001, "loss": 0.017, "step": 73540 }, { "epoch": 483.88157894736844, "grad_norm": 1.551801323890686, "learning_rate": 0.0001, "loss": 0.0144, "step": 73550 }, { "epoch": 483.94736842105266, "grad_norm": 1.544216275215149, "learning_rate": 0.0001, "loss": 0.0135, "step": 73560 }, { "epoch": 484.0131578947368, "grad_norm": 1.495015263557434, "learning_rate": 0.0001, "loss": 0.0187, "step": 73570 }, { "epoch": 484.07894736842104, "grad_norm": 1.5922025442123413, "learning_rate": 0.0001, "loss": 0.0173, "step": 73580 }, { "epoch": 484.14473684210526, "grad_norm": 1.1577439308166504, "learning_rate": 0.0001, "loss": 0.0157, "step": 73590 }, { "epoch": 484.2105263157895, "grad_norm": 1.4392797946929932, "learning_rate": 0.0001, "loss": 0.0145, "step": 73600 }, { "epoch": 484.2763157894737, "grad_norm": 1.6966114044189453, "learning_rate": 0.0001, "loss": 0.0166, "step": 73610 }, { "epoch": 484.3421052631579, "grad_norm": 1.2808177471160889, "learning_rate": 0.0001, "loss": 0.0121, "step": 73620 }, { "epoch": 484.4078947368421, "grad_norm": 1.343335747718811, "learning_rate": 0.0001, "loss": 0.0167, "step": 73630 }, { "epoch": 484.4736842105263, "grad_norm": 1.6293078660964966, "learning_rate": 0.0001, "loss": 0.0164, "step": 73640 }, { "epoch": 484.5394736842105, "grad_norm": 1.8852710723876953, "learning_rate": 0.0001, "loss": 0.0178, "step": 73650 }, { "epoch": 484.60526315789474, "grad_norm": 1.956536889076233, "learning_rate": 0.0001, "loss": 0.0147, "step": 73660 }, { "epoch": 484.67105263157896, "grad_norm": 1.4900034666061401, "learning_rate": 0.0001, "loss": 0.0176, "step": 73670 }, { "epoch": 484.7368421052632, "grad_norm": 1.689527153968811, "learning_rate": 0.0001, "loss": 0.0146, "step": 73680 }, { "epoch": 484.80263157894734, "grad_norm": 1.257413625717163, "learning_rate": 0.0001, "loss": 0.0167, "step": 73690 }, { "epoch": 484.86842105263156, "grad_norm": 1.6924216747283936, "learning_rate": 0.0001, "loss": 0.0116, "step": 73700 }, { "epoch": 484.9342105263158, "grad_norm": 1.860607624053955, "learning_rate": 0.0001, "loss": 0.0128, "step": 73710 }, { "epoch": 485.0, "grad_norm": 1.6869357824325562, "learning_rate": 0.0001, "loss": 0.018, "step": 73720 }, { "epoch": 485.0657894736842, "grad_norm": 1.4959484338760376, "learning_rate": 0.0001, "loss": 0.0183, "step": 73730 }, { "epoch": 485.13157894736844, "grad_norm": 2.3734138011932373, "learning_rate": 0.0001, "loss": 0.0117, "step": 73740 }, { "epoch": 485.19736842105266, "grad_norm": 1.9835890531539917, "learning_rate": 0.0001, "loss": 0.0176, "step": 73750 }, { "epoch": 485.2631578947368, "grad_norm": 1.8593541383743286, "learning_rate": 0.0001, "loss": 0.0177, "step": 73760 }, { "epoch": 485.32894736842104, "grad_norm": 1.6677038669586182, "learning_rate": 0.0001, "loss": 0.0192, "step": 73770 }, { "epoch": 485.39473684210526, "grad_norm": 1.7722278833389282, "learning_rate": 0.0001, "loss": 0.014, "step": 73780 }, { "epoch": 485.4605263157895, "grad_norm": 1.7572386264801025, "learning_rate": 0.0001, "loss": 0.0131, "step": 73790 }, { "epoch": 485.5263157894737, "grad_norm": 1.6819255352020264, "learning_rate": 0.0001, "loss": 0.0166, "step": 73800 }, { "epoch": 485.5921052631579, "grad_norm": 1.85662841796875, "learning_rate": 0.0001, "loss": 0.0136, "step": 73810 }, { "epoch": 485.6578947368421, "grad_norm": 1.5615159273147583, "learning_rate": 0.0001, "loss": 0.0115, "step": 73820 }, { "epoch": 485.7236842105263, "grad_norm": 1.045891523361206, "learning_rate": 0.0001, "loss": 0.0199, "step": 73830 }, { "epoch": 485.7894736842105, "grad_norm": 1.7202205657958984, "learning_rate": 0.0001, "loss": 0.0126, "step": 73840 }, { "epoch": 485.85526315789474, "grad_norm": 1.438513159751892, "learning_rate": 0.0001, "loss": 0.0135, "step": 73850 }, { "epoch": 485.92105263157896, "grad_norm": 1.4965465068817139, "learning_rate": 0.0001, "loss": 0.0144, "step": 73860 }, { "epoch": 485.9868421052632, "grad_norm": 1.6399815082550049, "learning_rate": 0.0001, "loss": 0.0116, "step": 73870 }, { "epoch": 486.05263157894734, "grad_norm": 1.4363659620285034, "learning_rate": 0.0001, "loss": 0.0162, "step": 73880 }, { "epoch": 486.11842105263156, "grad_norm": 2.0631067752838135, "learning_rate": 0.0001, "loss": 0.0154, "step": 73890 }, { "epoch": 486.1842105263158, "grad_norm": 1.910330891609192, "learning_rate": 0.0001, "loss": 0.0137, "step": 73900 }, { "epoch": 486.25, "grad_norm": 1.6987998485565186, "learning_rate": 0.0001, "loss": 0.0119, "step": 73910 }, { "epoch": 486.3157894736842, "grad_norm": 1.3558193445205688, "learning_rate": 0.0001, "loss": 0.0145, "step": 73920 }, { "epoch": 486.38157894736844, "grad_norm": 1.4514920711517334, "learning_rate": 0.0001, "loss": 0.0136, "step": 73930 }, { "epoch": 486.44736842105266, "grad_norm": 1.5962271690368652, "learning_rate": 0.0001, "loss": 0.0132, "step": 73940 }, { "epoch": 486.5131578947368, "grad_norm": 1.413343071937561, "learning_rate": 0.0001, "loss": 0.0155, "step": 73950 }, { "epoch": 486.57894736842104, "grad_norm": 1.4175866842269897, "learning_rate": 0.0001, "loss": 0.0182, "step": 73960 }, { "epoch": 486.64473684210526, "grad_norm": 1.6496278047561646, "learning_rate": 0.0001, "loss": 0.0153, "step": 73970 }, { "epoch": 486.7105263157895, "grad_norm": 1.5183906555175781, "learning_rate": 0.0001, "loss": 0.0194, "step": 73980 }, { "epoch": 486.7763157894737, "grad_norm": 1.7516525983810425, "learning_rate": 0.0001, "loss": 0.0167, "step": 73990 }, { "epoch": 486.8421052631579, "grad_norm": 1.57106351852417, "learning_rate": 0.0001, "loss": 0.0148, "step": 74000 }, { "epoch": 486.9078947368421, "grad_norm": 1.2284294366836548, "learning_rate": 0.0001, "loss": 0.0165, "step": 74010 }, { "epoch": 486.9736842105263, "grad_norm": 1.71976900100708, "learning_rate": 0.0001, "loss": 0.0151, "step": 74020 }, { "epoch": 487.0394736842105, "grad_norm": 1.5367405414581299, "learning_rate": 0.0001, "loss": 0.0154, "step": 74030 }, { "epoch": 487.10526315789474, "grad_norm": 1.4705289602279663, "learning_rate": 0.0001, "loss": 0.0162, "step": 74040 }, { "epoch": 487.17105263157896, "grad_norm": 1.2708576917648315, "learning_rate": 0.0001, "loss": 0.0129, "step": 74050 }, { "epoch": 487.2368421052632, "grad_norm": 1.1667615175247192, "learning_rate": 0.0001, "loss": 0.0163, "step": 74060 }, { "epoch": 487.30263157894734, "grad_norm": 1.7003848552703857, "learning_rate": 0.0001, "loss": 0.0163, "step": 74070 }, { "epoch": 487.36842105263156, "grad_norm": 1.5277069807052612, "learning_rate": 0.0001, "loss": 0.0186, "step": 74080 }, { "epoch": 487.4342105263158, "grad_norm": 1.4692274332046509, "learning_rate": 0.0001, "loss": 0.0149, "step": 74090 }, { "epoch": 487.5, "grad_norm": 1.213343858718872, "learning_rate": 0.0001, "loss": 0.0166, "step": 74100 }, { "epoch": 487.5657894736842, "grad_norm": 1.2715420722961426, "learning_rate": 0.0001, "loss": 0.012, "step": 74110 }, { "epoch": 487.63157894736844, "grad_norm": 1.616267204284668, "learning_rate": 0.0001, "loss": 0.0201, "step": 74120 }, { "epoch": 487.69736842105266, "grad_norm": 1.7908202409744263, "learning_rate": 0.0001, "loss": 0.0182, "step": 74130 }, { "epoch": 487.7631578947368, "grad_norm": 1.760057806968689, "learning_rate": 0.0001, "loss": 0.0135, "step": 74140 }, { "epoch": 487.82894736842104, "grad_norm": 1.4815711975097656, "learning_rate": 0.0001, "loss": 0.0139, "step": 74150 }, { "epoch": 487.89473684210526, "grad_norm": 1.6620450019836426, "learning_rate": 0.0001, "loss": 0.0167, "step": 74160 }, { "epoch": 487.9605263157895, "grad_norm": 1.5381675958633423, "learning_rate": 0.0001, "loss": 0.0132, "step": 74170 }, { "epoch": 488.0263157894737, "grad_norm": 1.4252058267593384, "learning_rate": 0.0001, "loss": 0.0135, "step": 74180 }, { "epoch": 488.0921052631579, "grad_norm": 1.7052509784698486, "learning_rate": 0.0001, "loss": 0.0141, "step": 74190 }, { "epoch": 488.1578947368421, "grad_norm": 1.7025905847549438, "learning_rate": 0.0001, "loss": 0.015, "step": 74200 }, { "epoch": 488.2236842105263, "grad_norm": 1.9770759344100952, "learning_rate": 0.0001, "loss": 0.0147, "step": 74210 }, { "epoch": 488.2894736842105, "grad_norm": 1.3426848649978638, "learning_rate": 0.0001, "loss": 0.0155, "step": 74220 }, { "epoch": 488.35526315789474, "grad_norm": 1.637891173362732, "learning_rate": 0.0001, "loss": 0.0141, "step": 74230 }, { "epoch": 488.42105263157896, "grad_norm": 1.3777875900268555, "learning_rate": 0.0001, "loss": 0.0126, "step": 74240 }, { "epoch": 488.4868421052632, "grad_norm": 1.7839299440383911, "learning_rate": 0.0001, "loss": 0.0171, "step": 74250 }, { "epoch": 488.55263157894734, "grad_norm": 1.648047924041748, "learning_rate": 0.0001, "loss": 0.0136, "step": 74260 }, { "epoch": 488.61842105263156, "grad_norm": 1.7340471744537354, "learning_rate": 0.0001, "loss": 0.0186, "step": 74270 }, { "epoch": 488.6842105263158, "grad_norm": 1.614567756652832, "learning_rate": 0.0001, "loss": 0.014, "step": 74280 }, { "epoch": 488.75, "grad_norm": 1.3855961561203003, "learning_rate": 0.0001, "loss": 0.02, "step": 74290 }, { "epoch": 488.8157894736842, "grad_norm": 1.6390423774719238, "learning_rate": 0.0001, "loss": 0.013, "step": 74300 }, { "epoch": 488.88157894736844, "grad_norm": 1.5941063165664673, "learning_rate": 0.0001, "loss": 0.0154, "step": 74310 }, { "epoch": 488.94736842105266, "grad_norm": 1.630723237991333, "learning_rate": 0.0001, "loss": 0.0192, "step": 74320 }, { "epoch": 489.0131578947368, "grad_norm": 1.8886736631393433, "learning_rate": 0.0001, "loss": 0.0134, "step": 74330 }, { "epoch": 489.07894736842104, "grad_norm": 0.9564910531044006, "learning_rate": 0.0001, "loss": 0.0153, "step": 74340 }, { "epoch": 489.14473684210526, "grad_norm": 1.5263104438781738, "learning_rate": 0.0001, "loss": 0.0188, "step": 74350 }, { "epoch": 489.2105263157895, "grad_norm": 1.4359550476074219, "learning_rate": 0.0001, "loss": 0.0158, "step": 74360 }, { "epoch": 489.2763157894737, "grad_norm": 1.662703037261963, "learning_rate": 0.0001, "loss": 0.0172, "step": 74370 }, { "epoch": 489.3421052631579, "grad_norm": 1.1771249771118164, "learning_rate": 0.0001, "loss": 0.0135, "step": 74380 }, { "epoch": 489.4078947368421, "grad_norm": 1.5129754543304443, "learning_rate": 0.0001, "loss": 0.0139, "step": 74390 }, { "epoch": 489.4736842105263, "grad_norm": 1.480371117591858, "learning_rate": 0.0001, "loss": 0.0165, "step": 74400 }, { "epoch": 489.5394736842105, "grad_norm": 1.366495966911316, "learning_rate": 0.0001, "loss": 0.0199, "step": 74410 }, { "epoch": 489.60526315789474, "grad_norm": 1.578455924987793, "learning_rate": 0.0001, "loss": 0.0176, "step": 74420 }, { "epoch": 489.67105263157896, "grad_norm": 1.7333837747573853, "learning_rate": 0.0001, "loss": 0.0131, "step": 74430 }, { "epoch": 489.7368421052632, "grad_norm": 1.5983977317810059, "learning_rate": 0.0001, "loss": 0.0127, "step": 74440 }, { "epoch": 489.80263157894734, "grad_norm": 1.6500701904296875, "learning_rate": 0.0001, "loss": 0.0152, "step": 74450 }, { "epoch": 489.86842105263156, "grad_norm": 1.628440022468567, "learning_rate": 0.0001, "loss": 0.0145, "step": 74460 }, { "epoch": 489.9342105263158, "grad_norm": 1.2045156955718994, "learning_rate": 0.0001, "loss": 0.0163, "step": 74470 }, { "epoch": 490.0, "grad_norm": 2.0960516929626465, "learning_rate": 0.0001, "loss": 0.012, "step": 74480 }, { "epoch": 490.0657894736842, "grad_norm": 1.7199608087539673, "learning_rate": 0.0001, "loss": 0.0137, "step": 74490 }, { "epoch": 490.13157894736844, "grad_norm": 1.5395195484161377, "learning_rate": 0.0001, "loss": 0.0128, "step": 74500 }, { "epoch": 490.19736842105266, "grad_norm": 1.5113719701766968, "learning_rate": 0.0001, "loss": 0.0152, "step": 74510 }, { "epoch": 490.2631578947368, "grad_norm": 1.6536701917648315, "learning_rate": 0.0001, "loss": 0.0153, "step": 74520 }, { "epoch": 490.32894736842104, "grad_norm": 1.7509065866470337, "learning_rate": 0.0001, "loss": 0.0177, "step": 74530 }, { "epoch": 490.39473684210526, "grad_norm": 1.5526535511016846, "learning_rate": 0.0001, "loss": 0.0129, "step": 74540 }, { "epoch": 490.4605263157895, "grad_norm": 1.6670259237289429, "learning_rate": 0.0001, "loss": 0.0149, "step": 74550 }, { "epoch": 490.5263157894737, "grad_norm": 1.5559817552566528, "learning_rate": 0.0001, "loss": 0.0145, "step": 74560 }, { "epoch": 490.5921052631579, "grad_norm": 1.9505417346954346, "learning_rate": 0.0001, "loss": 0.0149, "step": 74570 }, { "epoch": 490.6578947368421, "grad_norm": 1.8257253170013428, "learning_rate": 0.0001, "loss": 0.014, "step": 74580 }, { "epoch": 490.7236842105263, "grad_norm": 1.800431728363037, "learning_rate": 0.0001, "loss": 0.018, "step": 74590 }, { "epoch": 490.7894736842105, "grad_norm": 1.736080288887024, "learning_rate": 0.0001, "loss": 0.0206, "step": 74600 }, { "epoch": 490.85526315789474, "grad_norm": 1.107716679573059, "learning_rate": 0.0001, "loss": 0.0115, "step": 74610 }, { "epoch": 490.92105263157896, "grad_norm": 1.5515798330307007, "learning_rate": 0.0001, "loss": 0.0181, "step": 74620 }, { "epoch": 490.9868421052632, "grad_norm": 1.2995723485946655, "learning_rate": 0.0001, "loss": 0.0161, "step": 74630 }, { "epoch": 491.05263157894734, "grad_norm": 1.0372647047042847, "learning_rate": 0.0001, "loss": 0.0148, "step": 74640 }, { "epoch": 491.11842105263156, "grad_norm": 1.0404351949691772, "learning_rate": 0.0001, "loss": 0.0146, "step": 74650 }, { "epoch": 491.1842105263158, "grad_norm": 2.045642137527466, "learning_rate": 0.0001, "loss": 0.0157, "step": 74660 }, { "epoch": 491.25, "grad_norm": 1.944337010383606, "learning_rate": 0.0001, "loss": 0.0177, "step": 74670 }, { "epoch": 491.3157894736842, "grad_norm": 1.5520919561386108, "learning_rate": 0.0001, "loss": 0.0158, "step": 74680 }, { "epoch": 491.38157894736844, "grad_norm": 1.458781361579895, "learning_rate": 0.0001, "loss": 0.0139, "step": 74690 }, { "epoch": 491.44736842105266, "grad_norm": 1.490908145904541, "learning_rate": 0.0001, "loss": 0.0139, "step": 74700 }, { "epoch": 491.5131578947368, "grad_norm": 1.6765751838684082, "learning_rate": 0.0001, "loss": 0.0151, "step": 74710 }, { "epoch": 491.57894736842104, "grad_norm": 1.8491967916488647, "learning_rate": 0.0001, "loss": 0.0192, "step": 74720 }, { "epoch": 491.64473684210526, "grad_norm": 1.5434801578521729, "learning_rate": 0.0001, "loss": 0.0192, "step": 74730 }, { "epoch": 491.7105263157895, "grad_norm": 1.6810177564620972, "learning_rate": 0.0001, "loss": 0.0157, "step": 74740 }, { "epoch": 491.7763157894737, "grad_norm": 1.2771492004394531, "learning_rate": 0.0001, "loss": 0.0133, "step": 74750 }, { "epoch": 491.8421052631579, "grad_norm": 1.643244981765747, "learning_rate": 0.0001, "loss": 0.0139, "step": 74760 }, { "epoch": 491.9078947368421, "grad_norm": 1.4065173864364624, "learning_rate": 0.0001, "loss": 0.0156, "step": 74770 }, { "epoch": 491.9736842105263, "grad_norm": 1.7784180641174316, "learning_rate": 0.0001, "loss": 0.0148, "step": 74780 }, { "epoch": 492.0394736842105, "grad_norm": 1.5924729108810425, "learning_rate": 0.0001, "loss": 0.0136, "step": 74790 }, { "epoch": 492.10526315789474, "grad_norm": 1.5205471515655518, "learning_rate": 0.0001, "loss": 0.014, "step": 74800 }, { "epoch": 492.17105263157896, "grad_norm": 1.8967278003692627, "learning_rate": 0.0001, "loss": 0.0143, "step": 74810 }, { "epoch": 492.2368421052632, "grad_norm": 1.4468446969985962, "learning_rate": 0.0001, "loss": 0.0146, "step": 74820 }, { "epoch": 492.30263157894734, "grad_norm": 1.3297902345657349, "learning_rate": 0.0001, "loss": 0.0119, "step": 74830 }, { "epoch": 492.36842105263156, "grad_norm": 1.3253377676010132, "learning_rate": 0.0001, "loss": 0.0183, "step": 74840 }, { "epoch": 492.4342105263158, "grad_norm": 1.7172260284423828, "learning_rate": 0.0001, "loss": 0.0153, "step": 74850 }, { "epoch": 492.5, "grad_norm": 1.1522328853607178, "learning_rate": 0.0001, "loss": 0.0197, "step": 74860 }, { "epoch": 492.5657894736842, "grad_norm": 1.2597742080688477, "learning_rate": 0.0001, "loss": 0.0144, "step": 74870 }, { "epoch": 492.63157894736844, "grad_norm": 1.4998412132263184, "learning_rate": 0.0001, "loss": 0.0153, "step": 74880 }, { "epoch": 492.69736842105266, "grad_norm": 1.416616678237915, "learning_rate": 0.0001, "loss": 0.015, "step": 74890 }, { "epoch": 492.7631578947368, "grad_norm": 1.4358317852020264, "learning_rate": 0.0001, "loss": 0.0172, "step": 74900 }, { "epoch": 492.82894736842104, "grad_norm": 1.3453574180603027, "learning_rate": 0.0001, "loss": 0.0138, "step": 74910 }, { "epoch": 492.89473684210526, "grad_norm": 1.2535940408706665, "learning_rate": 0.0001, "loss": 0.016, "step": 74920 }, { "epoch": 492.9605263157895, "grad_norm": 1.0607192516326904, "learning_rate": 0.0001, "loss": 0.0183, "step": 74930 }, { "epoch": 493.0263157894737, "grad_norm": 1.349910855293274, "learning_rate": 0.0001, "loss": 0.0132, "step": 74940 }, { "epoch": 493.0921052631579, "grad_norm": 1.2419925928115845, "learning_rate": 0.0001, "loss": 0.0188, "step": 74950 }, { "epoch": 493.1578947368421, "grad_norm": 1.5120632648468018, "learning_rate": 0.0001, "loss": 0.0151, "step": 74960 }, { "epoch": 493.2236842105263, "grad_norm": 1.5862847566604614, "learning_rate": 0.0001, "loss": 0.0166, "step": 74970 }, { "epoch": 493.2894736842105, "grad_norm": 1.613669753074646, "learning_rate": 0.0001, "loss": 0.0205, "step": 74980 }, { "epoch": 493.35526315789474, "grad_norm": 1.9215439558029175, "learning_rate": 0.0001, "loss": 0.0145, "step": 74990 }, { "epoch": 493.42105263157896, "grad_norm": 1.3301177024841309, "learning_rate": 0.0001, "loss": 0.0134, "step": 75000 }, { "epoch": 493.4868421052632, "grad_norm": 1.736956238746643, "learning_rate": 0.0001, "loss": 0.0136, "step": 75010 }, { "epoch": 493.55263157894734, "grad_norm": 1.471624732017517, "learning_rate": 0.0001, "loss": 0.0173, "step": 75020 }, { "epoch": 493.61842105263156, "grad_norm": 1.189897060394287, "learning_rate": 0.0001, "loss": 0.013, "step": 75030 }, { "epoch": 493.6842105263158, "grad_norm": 1.9770426750183105, "learning_rate": 0.0001, "loss": 0.0143, "step": 75040 }, { "epoch": 493.75, "grad_norm": 1.684238076210022, "learning_rate": 0.0001, "loss": 0.0148, "step": 75050 }, { "epoch": 493.8157894736842, "grad_norm": 1.358088493347168, "learning_rate": 0.0001, "loss": 0.0187, "step": 75060 }, { "epoch": 493.88157894736844, "grad_norm": 1.4946000576019287, "learning_rate": 0.0001, "loss": 0.0151, "step": 75070 }, { "epoch": 493.94736842105266, "grad_norm": 1.6851356029510498, "learning_rate": 0.0001, "loss": 0.015, "step": 75080 }, { "epoch": 494.0131578947368, "grad_norm": 1.579123854637146, "learning_rate": 0.0001, "loss": 0.0187, "step": 75090 }, { "epoch": 494.07894736842104, "grad_norm": 2.0007808208465576, "learning_rate": 0.0001, "loss": 0.0174, "step": 75100 }, { "epoch": 494.14473684210526, "grad_norm": 1.2310092449188232, "learning_rate": 0.0001, "loss": 0.0169, "step": 75110 }, { "epoch": 494.2105263157895, "grad_norm": 1.1292651891708374, "learning_rate": 0.0001, "loss": 0.0121, "step": 75120 }, { "epoch": 494.2763157894737, "grad_norm": 1.214745044708252, "learning_rate": 0.0001, "loss": 0.0137, "step": 75130 }, { "epoch": 494.3421052631579, "grad_norm": 2.045175075531006, "learning_rate": 0.0001, "loss": 0.0158, "step": 75140 }, { "epoch": 494.4078947368421, "grad_norm": 1.5874539613723755, "learning_rate": 0.0001, "loss": 0.015, "step": 75150 }, { "epoch": 494.4736842105263, "grad_norm": 1.5616523027420044, "learning_rate": 0.0001, "loss": 0.0142, "step": 75160 }, { "epoch": 494.5394736842105, "grad_norm": 1.327771782875061, "learning_rate": 0.0001, "loss": 0.0167, "step": 75170 }, { "epoch": 494.60526315789474, "grad_norm": 1.2547054290771484, "learning_rate": 0.0001, "loss": 0.0163, "step": 75180 }, { "epoch": 494.67105263157896, "grad_norm": 1.4321614503860474, "learning_rate": 0.0001, "loss": 0.0127, "step": 75190 }, { "epoch": 494.7368421052632, "grad_norm": 1.1247665882110596, "learning_rate": 0.0001, "loss": 0.0129, "step": 75200 }, { "epoch": 494.80263157894734, "grad_norm": 1.4643479585647583, "learning_rate": 0.0001, "loss": 0.0166, "step": 75210 }, { "epoch": 494.86842105263156, "grad_norm": 1.4726155996322632, "learning_rate": 0.0001, "loss": 0.0181, "step": 75220 }, { "epoch": 494.9342105263158, "grad_norm": 1.717370629310608, "learning_rate": 0.0001, "loss": 0.0164, "step": 75230 }, { "epoch": 495.0, "grad_norm": 1.2008554935455322, "learning_rate": 0.0001, "loss": 0.0165, "step": 75240 }, { "epoch": 495.0657894736842, "grad_norm": 1.626676082611084, "learning_rate": 0.0001, "loss": 0.0153, "step": 75250 }, { "epoch": 495.13157894736844, "grad_norm": 1.2546825408935547, "learning_rate": 0.0001, "loss": 0.014, "step": 75260 }, { "epoch": 495.19736842105266, "grad_norm": 1.684253454208374, "learning_rate": 0.0001, "loss": 0.0135, "step": 75270 }, { "epoch": 495.2631578947368, "grad_norm": 1.557003378868103, "learning_rate": 0.0001, "loss": 0.0142, "step": 75280 }, { "epoch": 495.32894736842104, "grad_norm": 1.562964916229248, "learning_rate": 0.0001, "loss": 0.0132, "step": 75290 }, { "epoch": 495.39473684210526, "grad_norm": 1.8066328763961792, "learning_rate": 0.0001, "loss": 0.0157, "step": 75300 }, { "epoch": 495.4605263157895, "grad_norm": 1.7218683958053589, "learning_rate": 0.0001, "loss": 0.0141, "step": 75310 }, { "epoch": 495.5263157894737, "grad_norm": 1.7601563930511475, "learning_rate": 0.0001, "loss": 0.0147, "step": 75320 }, { "epoch": 495.5921052631579, "grad_norm": 1.6939051151275635, "learning_rate": 0.0001, "loss": 0.0139, "step": 75330 }, { "epoch": 495.6578947368421, "grad_norm": 1.522874355316162, "learning_rate": 0.0001, "loss": 0.0193, "step": 75340 }, { "epoch": 495.7236842105263, "grad_norm": 1.4833590984344482, "learning_rate": 0.0001, "loss": 0.0146, "step": 75350 }, { "epoch": 495.7894736842105, "grad_norm": 1.4060044288635254, "learning_rate": 0.0001, "loss": 0.0176, "step": 75360 }, { "epoch": 495.85526315789474, "grad_norm": 1.2887039184570312, "learning_rate": 0.0001, "loss": 0.0165, "step": 75370 }, { "epoch": 495.92105263157896, "grad_norm": 1.5669200420379639, "learning_rate": 0.0001, "loss": 0.018, "step": 75380 }, { "epoch": 495.9868421052632, "grad_norm": 1.886147379875183, "learning_rate": 0.0001, "loss": 0.0156, "step": 75390 }, { "epoch": 496.05263157894734, "grad_norm": 1.2757431268692017, "learning_rate": 0.0001, "loss": 0.0219, "step": 75400 }, { "epoch": 496.11842105263156, "grad_norm": 1.5884013175964355, "learning_rate": 0.0001, "loss": 0.0153, "step": 75410 }, { "epoch": 496.1842105263158, "grad_norm": 1.2938841581344604, "learning_rate": 0.0001, "loss": 0.0167, "step": 75420 }, { "epoch": 496.25, "grad_norm": 1.3004776239395142, "learning_rate": 0.0001, "loss": 0.0146, "step": 75430 }, { "epoch": 496.3157894736842, "grad_norm": 1.4195659160614014, "learning_rate": 0.0001, "loss": 0.0144, "step": 75440 }, { "epoch": 496.38157894736844, "grad_norm": 1.3218244314193726, "learning_rate": 0.0001, "loss": 0.0145, "step": 75450 }, { "epoch": 496.44736842105266, "grad_norm": 1.7006980180740356, "learning_rate": 0.0001, "loss": 0.0138, "step": 75460 }, { "epoch": 496.5131578947368, "grad_norm": 1.3617141246795654, "learning_rate": 0.0001, "loss": 0.0134, "step": 75470 }, { "epoch": 496.57894736842104, "grad_norm": 1.5790252685546875, "learning_rate": 0.0001, "loss": 0.0131, "step": 75480 }, { "epoch": 496.64473684210526, "grad_norm": 1.7572280168533325, "learning_rate": 0.0001, "loss": 0.0138, "step": 75490 }, { "epoch": 496.7105263157895, "grad_norm": 1.2066361904144287, "learning_rate": 0.0001, "loss": 0.0153, "step": 75500 }, { "epoch": 496.7763157894737, "grad_norm": 1.1958571672439575, "learning_rate": 0.0001, "loss": 0.0187, "step": 75510 }, { "epoch": 496.8421052631579, "grad_norm": 0.9217551350593567, "learning_rate": 0.0001, "loss": 0.0123, "step": 75520 }, { "epoch": 496.9078947368421, "grad_norm": 1.7868154048919678, "learning_rate": 0.0001, "loss": 0.0156, "step": 75530 }, { "epoch": 496.9736842105263, "grad_norm": 1.5625473260879517, "learning_rate": 0.0001, "loss": 0.0171, "step": 75540 }, { "epoch": 497.0394736842105, "grad_norm": 1.7580294609069824, "learning_rate": 0.0001, "loss": 0.0193, "step": 75550 }, { "epoch": 497.10526315789474, "grad_norm": 1.1691160202026367, "learning_rate": 0.0001, "loss": 0.0149, "step": 75560 }, { "epoch": 497.17105263157896, "grad_norm": 1.3096272945404053, "learning_rate": 0.0001, "loss": 0.0142, "step": 75570 }, { "epoch": 497.2368421052632, "grad_norm": 1.4808069467544556, "learning_rate": 0.0001, "loss": 0.0148, "step": 75580 }, { "epoch": 497.30263157894734, "grad_norm": 1.4883862733840942, "learning_rate": 0.0001, "loss": 0.0121, "step": 75590 }, { "epoch": 497.36842105263156, "grad_norm": 1.0892980098724365, "learning_rate": 0.0001, "loss": 0.0199, "step": 75600 }, { "epoch": 497.4342105263158, "grad_norm": 1.52272367477417, "learning_rate": 0.0001, "loss": 0.0162, "step": 75610 }, { "epoch": 497.5, "grad_norm": 1.5838699340820312, "learning_rate": 0.0001, "loss": 0.0129, "step": 75620 }, { "epoch": 497.5657894736842, "grad_norm": 1.3396941423416138, "learning_rate": 0.0001, "loss": 0.0195, "step": 75630 }, { "epoch": 497.63157894736844, "grad_norm": 1.606084942817688, "learning_rate": 0.0001, "loss": 0.0178, "step": 75640 }, { "epoch": 497.69736842105266, "grad_norm": 1.52082359790802, "learning_rate": 0.0001, "loss": 0.0151, "step": 75650 }, { "epoch": 497.7631578947368, "grad_norm": 1.4945443868637085, "learning_rate": 0.0001, "loss": 0.0168, "step": 75660 }, { "epoch": 497.82894736842104, "grad_norm": 2.0107929706573486, "learning_rate": 0.0001, "loss": 0.0172, "step": 75670 }, { "epoch": 497.89473684210526, "grad_norm": 1.730396032333374, "learning_rate": 0.0001, "loss": 0.012, "step": 75680 }, { "epoch": 497.9605263157895, "grad_norm": 1.540014624595642, "learning_rate": 0.0001, "loss": 0.0121, "step": 75690 }, { "epoch": 498.0263157894737, "grad_norm": 1.4945517778396606, "learning_rate": 0.0001, "loss": 0.0142, "step": 75700 }, { "epoch": 498.0921052631579, "grad_norm": 1.7161483764648438, "learning_rate": 0.0001, "loss": 0.0149, "step": 75710 }, { "epoch": 498.1578947368421, "grad_norm": 2.0245625972747803, "learning_rate": 0.0001, "loss": 0.0157, "step": 75720 }, { "epoch": 498.2236842105263, "grad_norm": 1.6228915452957153, "learning_rate": 0.0001, "loss": 0.0166, "step": 75730 }, { "epoch": 498.2894736842105, "grad_norm": 1.533990740776062, "learning_rate": 0.0001, "loss": 0.0152, "step": 75740 }, { "epoch": 498.35526315789474, "grad_norm": 1.2881989479064941, "learning_rate": 0.0001, "loss": 0.0175, "step": 75750 }, { "epoch": 498.42105263157896, "grad_norm": 1.6370660066604614, "learning_rate": 0.0001, "loss": 0.0138, "step": 75760 }, { "epoch": 498.4868421052632, "grad_norm": 1.209816813468933, "learning_rate": 0.0001, "loss": 0.016, "step": 75770 }, { "epoch": 498.55263157894734, "grad_norm": 1.2758313417434692, "learning_rate": 0.0001, "loss": 0.0137, "step": 75780 }, { "epoch": 498.61842105263156, "grad_norm": 1.7474883794784546, "learning_rate": 0.0001, "loss": 0.0169, "step": 75790 }, { "epoch": 498.6842105263158, "grad_norm": 1.9390380382537842, "learning_rate": 0.0001, "loss": 0.015, "step": 75800 }, { "epoch": 498.75, "grad_norm": 1.5253591537475586, "learning_rate": 0.0001, "loss": 0.0164, "step": 75810 }, { "epoch": 498.8157894736842, "grad_norm": 1.4320752620697021, "learning_rate": 0.0001, "loss": 0.0133, "step": 75820 }, { "epoch": 498.88157894736844, "grad_norm": 1.578909158706665, "learning_rate": 0.0001, "loss": 0.0141, "step": 75830 }, { "epoch": 498.94736842105266, "grad_norm": 1.8057433366775513, "learning_rate": 0.0001, "loss": 0.0154, "step": 75840 }, { "epoch": 499.0131578947368, "grad_norm": 1.8929871320724487, "learning_rate": 0.0001, "loss": 0.016, "step": 75850 }, { "epoch": 499.07894736842104, "grad_norm": 1.6886101961135864, "learning_rate": 0.0001, "loss": 0.0158, "step": 75860 }, { "epoch": 499.14473684210526, "grad_norm": 1.4030511379241943, "learning_rate": 0.0001, "loss": 0.0144, "step": 75870 }, { "epoch": 499.2105263157895, "grad_norm": 1.2224236726760864, "learning_rate": 0.0001, "loss": 0.0174, "step": 75880 }, { "epoch": 499.2763157894737, "grad_norm": 1.0661137104034424, "learning_rate": 0.0001, "loss": 0.0114, "step": 75890 }, { "epoch": 499.3421052631579, "grad_norm": 1.7457972764968872, "learning_rate": 0.0001, "loss": 0.0151, "step": 75900 }, { "epoch": 499.4078947368421, "grad_norm": 1.3644424676895142, "learning_rate": 0.0001, "loss": 0.0133, "step": 75910 }, { "epoch": 499.4736842105263, "grad_norm": 1.6266237497329712, "learning_rate": 0.0001, "loss": 0.0194, "step": 75920 }, { "epoch": 499.5394736842105, "grad_norm": 1.3405017852783203, "learning_rate": 0.0001, "loss": 0.0136, "step": 75930 }, { "epoch": 499.60526315789474, "grad_norm": 1.7841978073120117, "learning_rate": 0.0001, "loss": 0.0178, "step": 75940 }, { "epoch": 499.67105263157896, "grad_norm": 2.229814052581787, "learning_rate": 0.0001, "loss": 0.0203, "step": 75950 }, { "epoch": 499.7368421052632, "grad_norm": 2.0990922451019287, "learning_rate": 0.0001, "loss": 0.0129, "step": 75960 }, { "epoch": 499.80263157894734, "grad_norm": 2.226536750793457, "learning_rate": 0.0001, "loss": 0.0148, "step": 75970 }, { "epoch": 499.86842105263156, "grad_norm": 2.0536763668060303, "learning_rate": 0.0001, "loss": 0.0115, "step": 75980 }, { "epoch": 499.9342105263158, "grad_norm": 1.987932562828064, "learning_rate": 0.0001, "loss": 0.0133, "step": 75990 }, { "epoch": 500.0, "grad_norm": 1.7613911628723145, "learning_rate": 0.0001, "loss": 0.0144, "step": 76000 }, { "epoch": 500.0657894736842, "grad_norm": 1.4743658304214478, "learning_rate": 0.0001, "loss": 0.0131, "step": 76010 }, { "epoch": 500.13157894736844, "grad_norm": 1.5464051961898804, "learning_rate": 0.0001, "loss": 0.0116, "step": 76020 }, { "epoch": 500.19736842105266, "grad_norm": 1.8309727907180786, "learning_rate": 0.0001, "loss": 0.0185, "step": 76030 }, { "epoch": 500.2631578947368, "grad_norm": 1.7634416818618774, "learning_rate": 0.0001, "loss": 0.017, "step": 76040 }, { "epoch": 500.32894736842104, "grad_norm": 1.3458037376403809, "learning_rate": 0.0001, "loss": 0.0139, "step": 76050 }, { "epoch": 500.39473684210526, "grad_norm": 1.545896291732788, "learning_rate": 0.0001, "loss": 0.0148, "step": 76060 }, { "epoch": 500.4605263157895, "grad_norm": 1.5607752799987793, "learning_rate": 0.0001, "loss": 0.0174, "step": 76070 }, { "epoch": 500.5263157894737, "grad_norm": 1.4145301580429077, "learning_rate": 0.0001, "loss": 0.0126, "step": 76080 }, { "epoch": 500.5921052631579, "grad_norm": 1.5525330305099487, "learning_rate": 0.0001, "loss": 0.0168, "step": 76090 }, { "epoch": 500.6578947368421, "grad_norm": 1.2276066541671753, "learning_rate": 0.0001, "loss": 0.0128, "step": 76100 }, { "epoch": 500.7236842105263, "grad_norm": 1.1514543294906616, "learning_rate": 0.0001, "loss": 0.0155, "step": 76110 }, { "epoch": 500.7894736842105, "grad_norm": 1.066955327987671, "learning_rate": 0.0001, "loss": 0.0183, "step": 76120 }, { "epoch": 500.85526315789474, "grad_norm": 1.2278119325637817, "learning_rate": 0.0001, "loss": 0.0172, "step": 76130 }, { "epoch": 500.92105263157896, "grad_norm": 1.279248595237732, "learning_rate": 0.0001, "loss": 0.0161, "step": 76140 }, { "epoch": 500.9868421052632, "grad_norm": 1.2112776041030884, "learning_rate": 0.0001, "loss": 0.0129, "step": 76150 }, { "epoch": 501.05263157894734, "grad_norm": 1.5649547576904297, "learning_rate": 0.0001, "loss": 0.0138, "step": 76160 }, { "epoch": 501.11842105263156, "grad_norm": 1.557420015335083, "learning_rate": 0.0001, "loss": 0.0163, "step": 76170 }, { "epoch": 501.1842105263158, "grad_norm": 1.5765289068222046, "learning_rate": 0.0001, "loss": 0.0157, "step": 76180 }, { "epoch": 501.25, "grad_norm": 1.548924446105957, "learning_rate": 0.0001, "loss": 0.0171, "step": 76190 }, { "epoch": 501.3157894736842, "grad_norm": 1.146908164024353, "learning_rate": 0.0001, "loss": 0.0156, "step": 76200 }, { "epoch": 501.38157894736844, "grad_norm": 1.448534607887268, "learning_rate": 0.0001, "loss": 0.0128, "step": 76210 }, { "epoch": 501.44736842105266, "grad_norm": 1.4075782299041748, "learning_rate": 0.0001, "loss": 0.0126, "step": 76220 }, { "epoch": 501.5131578947368, "grad_norm": 1.5880481004714966, "learning_rate": 0.0001, "loss": 0.0167, "step": 76230 }, { "epoch": 501.57894736842104, "grad_norm": 1.6421693563461304, "learning_rate": 0.0001, "loss": 0.0165, "step": 76240 }, { "epoch": 501.64473684210526, "grad_norm": 1.777201771736145, "learning_rate": 0.0001, "loss": 0.0134, "step": 76250 }, { "epoch": 501.7105263157895, "grad_norm": 1.222374439239502, "learning_rate": 0.0001, "loss": 0.0163, "step": 76260 }, { "epoch": 501.7763157894737, "grad_norm": 1.4947500228881836, "learning_rate": 0.0001, "loss": 0.0152, "step": 76270 }, { "epoch": 501.8421052631579, "grad_norm": 1.794013261795044, "learning_rate": 0.0001, "loss": 0.012, "step": 76280 }, { "epoch": 501.9078947368421, "grad_norm": 1.6305394172668457, "learning_rate": 0.0001, "loss": 0.0225, "step": 76290 }, { "epoch": 501.9736842105263, "grad_norm": 1.3636395931243896, "learning_rate": 0.0001, "loss": 0.013, "step": 76300 }, { "epoch": 502.0394736842105, "grad_norm": 1.324034333229065, "learning_rate": 0.0001, "loss": 0.0138, "step": 76310 }, { "epoch": 502.10526315789474, "grad_norm": 1.8349496126174927, "learning_rate": 0.0001, "loss": 0.0166, "step": 76320 }, { "epoch": 502.17105263157896, "grad_norm": 1.7131320238113403, "learning_rate": 0.0001, "loss": 0.0163, "step": 76330 }, { "epoch": 502.2368421052632, "grad_norm": 1.7274248600006104, "learning_rate": 0.0001, "loss": 0.0132, "step": 76340 }, { "epoch": 502.30263157894734, "grad_norm": 1.5870102643966675, "learning_rate": 0.0001, "loss": 0.0159, "step": 76350 }, { "epoch": 502.36842105263156, "grad_norm": 1.5715090036392212, "learning_rate": 0.0001, "loss": 0.0163, "step": 76360 }, { "epoch": 502.4342105263158, "grad_norm": 1.556337833404541, "learning_rate": 0.0001, "loss": 0.0163, "step": 76370 }, { "epoch": 502.5, "grad_norm": 1.1685328483581543, "learning_rate": 0.0001, "loss": 0.0121, "step": 76380 }, { "epoch": 502.5657894736842, "grad_norm": 1.2846142053604126, "learning_rate": 0.0001, "loss": 0.0139, "step": 76390 }, { "epoch": 502.63157894736844, "grad_norm": 1.265857219696045, "learning_rate": 0.0001, "loss": 0.0136, "step": 76400 }, { "epoch": 502.69736842105266, "grad_norm": 1.8013732433319092, "learning_rate": 0.0001, "loss": 0.0158, "step": 76410 }, { "epoch": 502.7631578947368, "grad_norm": 1.6494481563568115, "learning_rate": 0.0001, "loss": 0.0155, "step": 76420 }, { "epoch": 502.82894736842104, "grad_norm": 1.6062067747116089, "learning_rate": 0.0001, "loss": 0.0182, "step": 76430 }, { "epoch": 502.89473684210526, "grad_norm": 1.4816275835037231, "learning_rate": 0.0001, "loss": 0.0198, "step": 76440 }, { "epoch": 502.9605263157895, "grad_norm": 1.3154710531234741, "learning_rate": 0.0001, "loss": 0.0132, "step": 76450 }, { "epoch": 503.0263157894737, "grad_norm": 1.7331410646438599, "learning_rate": 0.0001, "loss": 0.0128, "step": 76460 }, { "epoch": 503.0921052631579, "grad_norm": 1.8145204782485962, "learning_rate": 0.0001, "loss": 0.0152, "step": 76470 }, { "epoch": 503.1578947368421, "grad_norm": 1.8147236108779907, "learning_rate": 0.0001, "loss": 0.0201, "step": 76480 }, { "epoch": 503.2236842105263, "grad_norm": 1.5386651754379272, "learning_rate": 0.0001, "loss": 0.0206, "step": 76490 }, { "epoch": 503.2894736842105, "grad_norm": 1.5711113214492798, "learning_rate": 0.0001, "loss": 0.0145, "step": 76500 }, { "epoch": 503.35526315789474, "grad_norm": 1.3862504959106445, "learning_rate": 0.0001, "loss": 0.0146, "step": 76510 }, { "epoch": 503.42105263157896, "grad_norm": 1.2290573120117188, "learning_rate": 0.0001, "loss": 0.0123, "step": 76520 }, { "epoch": 503.4868421052632, "grad_norm": 1.6941728591918945, "learning_rate": 0.0001, "loss": 0.0149, "step": 76530 }, { "epoch": 503.55263157894734, "grad_norm": 1.4830228090286255, "learning_rate": 0.0001, "loss": 0.0138, "step": 76540 }, { "epoch": 503.61842105263156, "grad_norm": 1.6203079223632812, "learning_rate": 0.0001, "loss": 0.0147, "step": 76550 }, { "epoch": 503.6842105263158, "grad_norm": 1.6575158834457397, "learning_rate": 0.0001, "loss": 0.0149, "step": 76560 }, { "epoch": 503.75, "grad_norm": 2.018319606781006, "learning_rate": 0.0001, "loss": 0.0129, "step": 76570 }, { "epoch": 503.8157894736842, "grad_norm": 1.8110536336898804, "learning_rate": 0.0001, "loss": 0.0151, "step": 76580 }, { "epoch": 503.88157894736844, "grad_norm": 1.7123920917510986, "learning_rate": 0.0001, "loss": 0.0137, "step": 76590 }, { "epoch": 503.94736842105266, "grad_norm": 1.4634456634521484, "learning_rate": 0.0001, "loss": 0.0161, "step": 76600 }, { "epoch": 504.0131578947368, "grad_norm": 1.4903208017349243, "learning_rate": 0.0001, "loss": 0.0164, "step": 76610 }, { "epoch": 504.07894736842104, "grad_norm": 1.8763118982315063, "learning_rate": 0.0001, "loss": 0.0137, "step": 76620 }, { "epoch": 504.14473684210526, "grad_norm": 1.7086598873138428, "learning_rate": 0.0001, "loss": 0.0126, "step": 76630 }, { "epoch": 504.2105263157895, "grad_norm": 1.9782443046569824, "learning_rate": 0.0001, "loss": 0.0152, "step": 76640 }, { "epoch": 504.2763157894737, "grad_norm": 1.469672679901123, "learning_rate": 0.0001, "loss": 0.0169, "step": 76650 }, { "epoch": 504.3421052631579, "grad_norm": 1.766440510749817, "learning_rate": 0.0001, "loss": 0.0166, "step": 76660 }, { "epoch": 504.4078947368421, "grad_norm": 1.877692699432373, "learning_rate": 0.0001, "loss": 0.017, "step": 76670 }, { "epoch": 504.4736842105263, "grad_norm": 1.5706199407577515, "learning_rate": 0.0001, "loss": 0.0138, "step": 76680 }, { "epoch": 504.5394736842105, "grad_norm": 1.4606688022613525, "learning_rate": 0.0001, "loss": 0.0137, "step": 76690 }, { "epoch": 504.60526315789474, "grad_norm": 1.3683446645736694, "learning_rate": 0.0001, "loss": 0.0179, "step": 76700 }, { "epoch": 504.67105263157896, "grad_norm": 1.5420887470245361, "learning_rate": 0.0001, "loss": 0.0169, "step": 76710 }, { "epoch": 504.7368421052632, "grad_norm": 1.891044020652771, "learning_rate": 0.0001, "loss": 0.0148, "step": 76720 }, { "epoch": 504.80263157894734, "grad_norm": 1.364005208015442, "learning_rate": 0.0001, "loss": 0.0111, "step": 76730 }, { "epoch": 504.86842105263156, "grad_norm": 1.434608817100525, "learning_rate": 0.0001, "loss": 0.0134, "step": 76740 }, { "epoch": 504.9342105263158, "grad_norm": 1.3712475299835205, "learning_rate": 0.0001, "loss": 0.0184, "step": 76750 }, { "epoch": 505.0, "grad_norm": 1.4252753257751465, "learning_rate": 0.0001, "loss": 0.0144, "step": 76760 }, { "epoch": 505.0657894736842, "grad_norm": 1.1719422340393066, "learning_rate": 0.0001, "loss": 0.0179, "step": 76770 }, { "epoch": 505.13157894736844, "grad_norm": 1.4199156761169434, "learning_rate": 0.0001, "loss": 0.0155, "step": 76780 }, { "epoch": 505.19736842105266, "grad_norm": 1.6136332750320435, "learning_rate": 0.0001, "loss": 0.0122, "step": 76790 }, { "epoch": 505.2631578947368, "grad_norm": 1.73476243019104, "learning_rate": 0.0001, "loss": 0.016, "step": 76800 }, { "epoch": 505.32894736842104, "grad_norm": 2.0110623836517334, "learning_rate": 0.0001, "loss": 0.0155, "step": 76810 }, { "epoch": 505.39473684210526, "grad_norm": 2.109638214111328, "learning_rate": 0.0001, "loss": 0.0149, "step": 76820 }, { "epoch": 505.4605263157895, "grad_norm": 1.7814531326293945, "learning_rate": 0.0001, "loss": 0.0108, "step": 76830 }, { "epoch": 505.5263157894737, "grad_norm": 1.3284722566604614, "learning_rate": 0.0001, "loss": 0.0129, "step": 76840 }, { "epoch": 505.5921052631579, "grad_norm": 1.4390125274658203, "learning_rate": 0.0001, "loss": 0.0131, "step": 76850 }, { "epoch": 505.6578947368421, "grad_norm": 1.6426329612731934, "learning_rate": 0.0001, "loss": 0.0166, "step": 76860 }, { "epoch": 505.7236842105263, "grad_norm": 1.408854365348816, "learning_rate": 0.0001, "loss": 0.0164, "step": 76870 }, { "epoch": 505.7894736842105, "grad_norm": 1.204198956489563, "learning_rate": 0.0001, "loss": 0.0177, "step": 76880 }, { "epoch": 505.85526315789474, "grad_norm": 1.339617133140564, "learning_rate": 0.0001, "loss": 0.0138, "step": 76890 }, { "epoch": 505.92105263157896, "grad_norm": 1.4974995851516724, "learning_rate": 0.0001, "loss": 0.0172, "step": 76900 }, { "epoch": 505.9868421052632, "grad_norm": 1.629817008972168, "learning_rate": 0.0001, "loss": 0.0184, "step": 76910 }, { "epoch": 506.05263157894734, "grad_norm": 1.0922902822494507, "learning_rate": 0.0001, "loss": 0.0136, "step": 76920 }, { "epoch": 506.11842105263156, "grad_norm": 1.5583748817443848, "learning_rate": 0.0001, "loss": 0.016, "step": 76930 }, { "epoch": 506.1842105263158, "grad_norm": 1.8364464044570923, "learning_rate": 0.0001, "loss": 0.0139, "step": 76940 }, { "epoch": 506.25, "grad_norm": 1.6445331573486328, "learning_rate": 0.0001, "loss": 0.0163, "step": 76950 }, { "epoch": 506.3157894736842, "grad_norm": 1.558959722518921, "learning_rate": 0.0001, "loss": 0.0146, "step": 76960 }, { "epoch": 506.38157894736844, "grad_norm": 2.0076141357421875, "learning_rate": 0.0001, "loss": 0.0149, "step": 76970 }, { "epoch": 506.44736842105266, "grad_norm": 1.3191195726394653, "learning_rate": 0.0001, "loss": 0.0175, "step": 76980 }, { "epoch": 506.5131578947368, "grad_norm": 1.8769149780273438, "learning_rate": 0.0001, "loss": 0.0195, "step": 76990 }, { "epoch": 506.57894736842104, "grad_norm": 1.2733697891235352, "learning_rate": 0.0001, "loss": 0.0136, "step": 77000 }, { "epoch": 506.64473684210526, "grad_norm": 1.367136001586914, "learning_rate": 0.0001, "loss": 0.0168, "step": 77010 }, { "epoch": 506.7105263157895, "grad_norm": 1.1979756355285645, "learning_rate": 0.0001, "loss": 0.0125, "step": 77020 }, { "epoch": 506.7763157894737, "grad_norm": 1.4339121580123901, "learning_rate": 0.0001, "loss": 0.0175, "step": 77030 }, { "epoch": 506.8421052631579, "grad_norm": 1.590680718421936, "learning_rate": 0.0001, "loss": 0.0208, "step": 77040 }, { "epoch": 506.9078947368421, "grad_norm": 1.8360675573349, "learning_rate": 0.0001, "loss": 0.0153, "step": 77050 }, { "epoch": 506.9736842105263, "grad_norm": 1.664429783821106, "learning_rate": 0.0001, "loss": 0.0177, "step": 77060 }, { "epoch": 507.0394736842105, "grad_norm": 1.5647484064102173, "learning_rate": 0.0001, "loss": 0.0162, "step": 77070 }, { "epoch": 507.10526315789474, "grad_norm": 1.3811861276626587, "learning_rate": 0.0001, "loss": 0.0134, "step": 77080 }, { "epoch": 507.17105263157896, "grad_norm": 2.0667150020599365, "learning_rate": 0.0001, "loss": 0.0166, "step": 77090 }, { "epoch": 507.2368421052632, "grad_norm": 1.3923759460449219, "learning_rate": 0.0001, "loss": 0.0151, "step": 77100 }, { "epoch": 507.30263157894734, "grad_norm": 1.4544848203659058, "learning_rate": 0.0001, "loss": 0.0176, "step": 77110 }, { "epoch": 507.36842105263156, "grad_norm": 1.4646955728530884, "learning_rate": 0.0001, "loss": 0.0154, "step": 77120 }, { "epoch": 507.4342105263158, "grad_norm": 1.508428931236267, "learning_rate": 0.0001, "loss": 0.0182, "step": 77130 }, { "epoch": 507.5, "grad_norm": 1.4568811655044556, "learning_rate": 0.0001, "loss": 0.0175, "step": 77140 }, { "epoch": 507.5657894736842, "grad_norm": 1.3912413120269775, "learning_rate": 0.0001, "loss": 0.0206, "step": 77150 }, { "epoch": 507.63157894736844, "grad_norm": 1.5328011512756348, "learning_rate": 0.0001, "loss": 0.0191, "step": 77160 }, { "epoch": 507.69736842105266, "grad_norm": 1.9439849853515625, "learning_rate": 0.0001, "loss": 0.0192, "step": 77170 }, { "epoch": 507.7631578947368, "grad_norm": 1.1428592205047607, "learning_rate": 0.0001, "loss": 0.0193, "step": 77180 }, { "epoch": 507.82894736842104, "grad_norm": 1.5593217611312866, "learning_rate": 0.0001, "loss": 0.0159, "step": 77190 }, { "epoch": 507.89473684210526, "grad_norm": 1.7237592935562134, "learning_rate": 0.0001, "loss": 0.0171, "step": 77200 }, { "epoch": 507.9605263157895, "grad_norm": 1.7297126054763794, "learning_rate": 0.0001, "loss": 0.0141, "step": 77210 }, { "epoch": 508.0263157894737, "grad_norm": 1.0659903287887573, "learning_rate": 0.0001, "loss": 0.0165, "step": 77220 }, { "epoch": 508.0921052631579, "grad_norm": 1.518301010131836, "learning_rate": 0.0001, "loss": 0.0167, "step": 77230 }, { "epoch": 508.1578947368421, "grad_norm": 1.4003773927688599, "learning_rate": 0.0001, "loss": 0.0159, "step": 77240 }, { "epoch": 508.2236842105263, "grad_norm": 1.694061040878296, "learning_rate": 0.0001, "loss": 0.0178, "step": 77250 }, { "epoch": 508.2894736842105, "grad_norm": 1.6392738819122314, "learning_rate": 0.0001, "loss": 0.016, "step": 77260 }, { "epoch": 508.35526315789474, "grad_norm": 1.9456757307052612, "learning_rate": 0.0001, "loss": 0.0129, "step": 77270 }, { "epoch": 508.42105263157896, "grad_norm": 1.5068894624710083, "learning_rate": 0.0001, "loss": 0.0184, "step": 77280 }, { "epoch": 508.4868421052632, "grad_norm": 1.308677077293396, "learning_rate": 0.0001, "loss": 0.0195, "step": 77290 }, { "epoch": 508.55263157894734, "grad_norm": 1.6808819770812988, "learning_rate": 0.0001, "loss": 0.0164, "step": 77300 }, { "epoch": 508.61842105263156, "grad_norm": 1.3213839530944824, "learning_rate": 0.0001, "loss": 0.0151, "step": 77310 }, { "epoch": 508.6842105263158, "grad_norm": 1.5215811729431152, "learning_rate": 0.0001, "loss": 0.0132, "step": 77320 }, { "epoch": 508.75, "grad_norm": 1.8284014463424683, "learning_rate": 0.0001, "loss": 0.0197, "step": 77330 }, { "epoch": 508.8157894736842, "grad_norm": 1.3556766510009766, "learning_rate": 0.0001, "loss": 0.0186, "step": 77340 }, { "epoch": 508.88157894736844, "grad_norm": 1.1228227615356445, "learning_rate": 0.0001, "loss": 0.0177, "step": 77350 }, { "epoch": 508.94736842105266, "grad_norm": 1.715314507484436, "learning_rate": 0.0001, "loss": 0.0155, "step": 77360 }, { "epoch": 509.0131578947368, "grad_norm": 1.8598716259002686, "learning_rate": 0.0001, "loss": 0.0151, "step": 77370 }, { "epoch": 509.07894736842104, "grad_norm": 1.8754881620407104, "learning_rate": 0.0001, "loss": 0.0164, "step": 77380 }, { "epoch": 509.14473684210526, "grad_norm": 1.4238076210021973, "learning_rate": 0.0001, "loss": 0.0168, "step": 77390 }, { "epoch": 509.2105263157895, "grad_norm": 1.2830214500427246, "learning_rate": 0.0001, "loss": 0.017, "step": 77400 }, { "epoch": 509.2763157894737, "grad_norm": 1.6974343061447144, "learning_rate": 0.0001, "loss": 0.0135, "step": 77410 }, { "epoch": 509.3421052631579, "grad_norm": 1.572974443435669, "learning_rate": 0.0001, "loss": 0.0141, "step": 77420 }, { "epoch": 509.4078947368421, "grad_norm": 1.2885088920593262, "learning_rate": 0.0001, "loss": 0.0187, "step": 77430 }, { "epoch": 509.4736842105263, "grad_norm": 1.6441909074783325, "learning_rate": 0.0001, "loss": 0.0121, "step": 77440 }, { "epoch": 509.5394736842105, "grad_norm": 1.3734464645385742, "learning_rate": 0.0001, "loss": 0.0147, "step": 77450 }, { "epoch": 509.60526315789474, "grad_norm": 1.5201541185379028, "learning_rate": 0.0001, "loss": 0.0191, "step": 77460 }, { "epoch": 509.67105263157896, "grad_norm": 1.0554879903793335, "learning_rate": 0.0001, "loss": 0.0215, "step": 77470 }, { "epoch": 509.7368421052632, "grad_norm": 1.7521145343780518, "learning_rate": 0.0001, "loss": 0.0153, "step": 77480 }, { "epoch": 509.80263157894734, "grad_norm": 1.8063573837280273, "learning_rate": 0.0001, "loss": 0.0148, "step": 77490 }, { "epoch": 509.86842105263156, "grad_norm": 1.758393406867981, "learning_rate": 0.0001, "loss": 0.0189, "step": 77500 }, { "epoch": 509.9342105263158, "grad_norm": 1.442395567893982, "learning_rate": 0.0001, "loss": 0.0166, "step": 77510 }, { "epoch": 510.0, "grad_norm": 2.1094565391540527, "learning_rate": 0.0001, "loss": 0.0126, "step": 77520 }, { "epoch": 510.0657894736842, "grad_norm": 1.763189435005188, "learning_rate": 0.0001, "loss": 0.0129, "step": 77530 }, { "epoch": 510.13157894736844, "grad_norm": 1.8948922157287598, "learning_rate": 0.0001, "loss": 0.0155, "step": 77540 }, { "epoch": 510.19736842105266, "grad_norm": 1.5151996612548828, "learning_rate": 0.0001, "loss": 0.0153, "step": 77550 }, { "epoch": 510.2631578947368, "grad_norm": 1.9426159858703613, "learning_rate": 0.0001, "loss": 0.0182, "step": 77560 }, { "epoch": 510.32894736842104, "grad_norm": 1.9744888544082642, "learning_rate": 0.0001, "loss": 0.0134, "step": 77570 }, { "epoch": 510.39473684210526, "grad_norm": 1.5750515460968018, "learning_rate": 0.0001, "loss": 0.012, "step": 77580 }, { "epoch": 510.4605263157895, "grad_norm": 1.8997995853424072, "learning_rate": 0.0001, "loss": 0.016, "step": 77590 }, { "epoch": 510.5263157894737, "grad_norm": 1.8158766031265259, "learning_rate": 0.0001, "loss": 0.0191, "step": 77600 }, { "epoch": 510.5921052631579, "grad_norm": 1.7237852811813354, "learning_rate": 0.0001, "loss": 0.015, "step": 77610 }, { "epoch": 510.6578947368421, "grad_norm": 1.7467293739318848, "learning_rate": 0.0001, "loss": 0.0137, "step": 77620 }, { "epoch": 510.7236842105263, "grad_norm": 1.800412893295288, "learning_rate": 0.0001, "loss": 0.0163, "step": 77630 }, { "epoch": 510.7894736842105, "grad_norm": 1.3875648975372314, "learning_rate": 0.0001, "loss": 0.0142, "step": 77640 }, { "epoch": 510.85526315789474, "grad_norm": 1.7114397287368774, "learning_rate": 0.0001, "loss": 0.0162, "step": 77650 }, { "epoch": 510.92105263157896, "grad_norm": 1.4041330814361572, "learning_rate": 0.0001, "loss": 0.0188, "step": 77660 }, { "epoch": 510.9868421052632, "grad_norm": 1.2582380771636963, "learning_rate": 0.0001, "loss": 0.0149, "step": 77670 }, { "epoch": 511.05263157894734, "grad_norm": 1.632538080215454, "learning_rate": 0.0001, "loss": 0.0169, "step": 77680 }, { "epoch": 511.11842105263156, "grad_norm": 1.2210134267807007, "learning_rate": 0.0001, "loss": 0.0128, "step": 77690 }, { "epoch": 511.1842105263158, "grad_norm": 1.1994540691375732, "learning_rate": 0.0001, "loss": 0.0148, "step": 77700 }, { "epoch": 511.25, "grad_norm": 1.9242167472839355, "learning_rate": 0.0001, "loss": 0.0149, "step": 77710 }, { "epoch": 511.3157894736842, "grad_norm": 1.5822391510009766, "learning_rate": 0.0001, "loss": 0.0133, "step": 77720 }, { "epoch": 511.38157894736844, "grad_norm": 1.1216644048690796, "learning_rate": 0.0001, "loss": 0.016, "step": 77730 }, { "epoch": 511.44736842105266, "grad_norm": 1.693570613861084, "learning_rate": 0.0001, "loss": 0.0188, "step": 77740 }, { "epoch": 511.5131578947368, "grad_norm": 1.6481693983078003, "learning_rate": 0.0001, "loss": 0.0146, "step": 77750 }, { "epoch": 511.57894736842104, "grad_norm": 1.5308988094329834, "learning_rate": 0.0001, "loss": 0.0143, "step": 77760 }, { "epoch": 511.64473684210526, "grad_norm": 1.50753915309906, "learning_rate": 0.0001, "loss": 0.0174, "step": 77770 }, { "epoch": 511.7105263157895, "grad_norm": 1.2912451028823853, "learning_rate": 0.0001, "loss": 0.0134, "step": 77780 }, { "epoch": 511.7763157894737, "grad_norm": 1.2288223505020142, "learning_rate": 0.0001, "loss": 0.0145, "step": 77790 }, { "epoch": 511.8421052631579, "grad_norm": 1.74163818359375, "learning_rate": 0.0001, "loss": 0.0171, "step": 77800 }, { "epoch": 511.9078947368421, "grad_norm": 1.6143931150436401, "learning_rate": 0.0001, "loss": 0.013, "step": 77810 }, { "epoch": 511.9736842105263, "grad_norm": 1.641135573387146, "learning_rate": 0.0001, "loss": 0.0154, "step": 77820 }, { "epoch": 512.0394736842105, "grad_norm": 1.3185769319534302, "learning_rate": 0.0001, "loss": 0.0141, "step": 77830 }, { "epoch": 512.1052631578947, "grad_norm": 1.3028333187103271, "learning_rate": 0.0001, "loss": 0.0152, "step": 77840 }, { "epoch": 512.171052631579, "grad_norm": 1.6586172580718994, "learning_rate": 0.0001, "loss": 0.0126, "step": 77850 }, { "epoch": 512.2368421052631, "grad_norm": 1.4883986711502075, "learning_rate": 0.0001, "loss": 0.0166, "step": 77860 }, { "epoch": 512.3026315789474, "grad_norm": 0.9735058546066284, "learning_rate": 0.0001, "loss": 0.0124, "step": 77870 }, { "epoch": 512.3684210526316, "grad_norm": 1.5856417417526245, "learning_rate": 0.0001, "loss": 0.0156, "step": 77880 }, { "epoch": 512.4342105263158, "grad_norm": 1.389802098274231, "learning_rate": 0.0001, "loss": 0.0129, "step": 77890 }, { "epoch": 512.5, "grad_norm": 1.6556857824325562, "learning_rate": 0.0001, "loss": 0.0177, "step": 77900 }, { "epoch": 512.5657894736842, "grad_norm": 1.2825406789779663, "learning_rate": 0.0001, "loss": 0.0142, "step": 77910 }, { "epoch": 512.6315789473684, "grad_norm": 1.813141107559204, "learning_rate": 0.0001, "loss": 0.0156, "step": 77920 }, { "epoch": 512.6973684210526, "grad_norm": 1.6931178569793701, "learning_rate": 0.0001, "loss": 0.0159, "step": 77930 }, { "epoch": 512.7631578947369, "grad_norm": 1.3170137405395508, "learning_rate": 0.0001, "loss": 0.0125, "step": 77940 }, { "epoch": 512.828947368421, "grad_norm": 1.395915150642395, "learning_rate": 0.0001, "loss": 0.014, "step": 77950 }, { "epoch": 512.8947368421053, "grad_norm": 1.4704581499099731, "learning_rate": 0.0001, "loss": 0.0181, "step": 77960 }, { "epoch": 512.9605263157895, "grad_norm": 1.1529408693313599, "learning_rate": 0.0001, "loss": 0.016, "step": 77970 }, { "epoch": 513.0263157894736, "grad_norm": 1.7060043811798096, "learning_rate": 0.0001, "loss": 0.0168, "step": 77980 }, { "epoch": 513.0921052631579, "grad_norm": 1.321708083152771, "learning_rate": 0.0001, "loss": 0.0126, "step": 77990 }, { "epoch": 513.1578947368421, "grad_norm": 1.2095351219177246, "learning_rate": 0.0001, "loss": 0.0189, "step": 78000 }, { "epoch": 513.2236842105264, "grad_norm": 0.825718879699707, "learning_rate": 0.0001, "loss": 0.015, "step": 78010 }, { "epoch": 513.2894736842105, "grad_norm": 1.4805757999420166, "learning_rate": 0.0001, "loss": 0.0164, "step": 78020 }, { "epoch": 513.3552631578947, "grad_norm": 1.5771158933639526, "learning_rate": 0.0001, "loss": 0.0133, "step": 78030 }, { "epoch": 513.421052631579, "grad_norm": 1.4778016805648804, "learning_rate": 0.0001, "loss": 0.0148, "step": 78040 }, { "epoch": 513.4868421052631, "grad_norm": 1.5813030004501343, "learning_rate": 0.0001, "loss": 0.02, "step": 78050 }, { "epoch": 513.5526315789474, "grad_norm": 1.7232757806777954, "learning_rate": 0.0001, "loss": 0.0121, "step": 78060 }, { "epoch": 513.6184210526316, "grad_norm": 1.2340940237045288, "learning_rate": 0.0001, "loss": 0.0139, "step": 78070 }, { "epoch": 513.6842105263158, "grad_norm": 1.2930692434310913, "learning_rate": 0.0001, "loss": 0.0133, "step": 78080 }, { "epoch": 513.75, "grad_norm": 1.5621823072433472, "learning_rate": 0.0001, "loss": 0.0147, "step": 78090 }, { "epoch": 513.8157894736842, "grad_norm": 1.4232274293899536, "learning_rate": 0.0001, "loss": 0.0117, "step": 78100 }, { "epoch": 513.8815789473684, "grad_norm": 1.8806426525115967, "learning_rate": 0.0001, "loss": 0.0208, "step": 78110 }, { "epoch": 513.9473684210526, "grad_norm": 1.553922414779663, "learning_rate": 0.0001, "loss": 0.019, "step": 78120 }, { "epoch": 514.0131578947369, "grad_norm": 1.5312786102294922, "learning_rate": 0.0001, "loss": 0.0121, "step": 78130 }, { "epoch": 514.078947368421, "grad_norm": 1.7677679061889648, "learning_rate": 0.0001, "loss": 0.0179, "step": 78140 }, { "epoch": 514.1447368421053, "grad_norm": 1.6004903316497803, "learning_rate": 0.0001, "loss": 0.0141, "step": 78150 }, { "epoch": 514.2105263157895, "grad_norm": 1.257819652557373, "learning_rate": 0.0001, "loss": 0.0178, "step": 78160 }, { "epoch": 514.2763157894736, "grad_norm": 1.4945974349975586, "learning_rate": 0.0001, "loss": 0.0141, "step": 78170 }, { "epoch": 514.3421052631579, "grad_norm": 1.6964620351791382, "learning_rate": 0.0001, "loss": 0.015, "step": 78180 }, { "epoch": 514.4078947368421, "grad_norm": 1.3544543981552124, "learning_rate": 0.0001, "loss": 0.0167, "step": 78190 }, { "epoch": 514.4736842105264, "grad_norm": 1.7583383321762085, "learning_rate": 0.0001, "loss": 0.0142, "step": 78200 }, { "epoch": 514.5394736842105, "grad_norm": 1.5257833003997803, "learning_rate": 0.0001, "loss": 0.0158, "step": 78210 }, { "epoch": 514.6052631578947, "grad_norm": 0.9430021047592163, "learning_rate": 0.0001, "loss": 0.0149, "step": 78220 }, { "epoch": 514.671052631579, "grad_norm": 1.6034228801727295, "learning_rate": 0.0001, "loss": 0.0121, "step": 78230 }, { "epoch": 514.7368421052631, "grad_norm": 1.1656447649002075, "learning_rate": 0.0001, "loss": 0.0185, "step": 78240 }, { "epoch": 514.8026315789474, "grad_norm": 1.039567470550537, "learning_rate": 0.0001, "loss": 0.013, "step": 78250 }, { "epoch": 514.8684210526316, "grad_norm": 1.444722294807434, "learning_rate": 0.0001, "loss": 0.0119, "step": 78260 }, { "epoch": 514.9342105263158, "grad_norm": 1.4351261854171753, "learning_rate": 0.0001, "loss": 0.0135, "step": 78270 }, { "epoch": 515.0, "grad_norm": 1.3331912755966187, "learning_rate": 0.0001, "loss": 0.0166, "step": 78280 }, { "epoch": 515.0657894736842, "grad_norm": 1.417351245880127, "learning_rate": 0.0001, "loss": 0.0141, "step": 78290 }, { "epoch": 515.1315789473684, "grad_norm": 1.4987530708312988, "learning_rate": 0.0001, "loss": 0.0145, "step": 78300 }, { "epoch": 515.1973684210526, "grad_norm": 1.7775640487670898, "learning_rate": 0.0001, "loss": 0.0171, "step": 78310 }, { "epoch": 515.2631578947369, "grad_norm": 1.4931070804595947, "learning_rate": 0.0001, "loss": 0.0145, "step": 78320 }, { "epoch": 515.328947368421, "grad_norm": 1.3183127641677856, "learning_rate": 0.0001, "loss": 0.0157, "step": 78330 }, { "epoch": 515.3947368421053, "grad_norm": 1.618046760559082, "learning_rate": 0.0001, "loss": 0.0158, "step": 78340 }, { "epoch": 515.4605263157895, "grad_norm": 1.7723746299743652, "learning_rate": 0.0001, "loss": 0.0145, "step": 78350 }, { "epoch": 515.5263157894736, "grad_norm": 1.3359922170639038, "learning_rate": 0.0001, "loss": 0.0164, "step": 78360 }, { "epoch": 515.5921052631579, "grad_norm": 1.2634923458099365, "learning_rate": 0.0001, "loss": 0.012, "step": 78370 }, { "epoch": 515.6578947368421, "grad_norm": 1.2991467714309692, "learning_rate": 0.0001, "loss": 0.0188, "step": 78380 }, { "epoch": 515.7236842105264, "grad_norm": 1.5693758726119995, "learning_rate": 0.0001, "loss": 0.0181, "step": 78390 }, { "epoch": 515.7894736842105, "grad_norm": 1.7329325675964355, "learning_rate": 0.0001, "loss": 0.0136, "step": 78400 }, { "epoch": 515.8552631578947, "grad_norm": 1.576362133026123, "learning_rate": 0.0001, "loss": 0.0145, "step": 78410 }, { "epoch": 515.921052631579, "grad_norm": 1.469512939453125, "learning_rate": 0.0001, "loss": 0.016, "step": 78420 }, { "epoch": 515.9868421052631, "grad_norm": 1.2047951221466064, "learning_rate": 0.0001, "loss": 0.0137, "step": 78430 }, { "epoch": 516.0526315789474, "grad_norm": 1.7149769067764282, "learning_rate": 0.0001, "loss": 0.0131, "step": 78440 }, { "epoch": 516.1184210526316, "grad_norm": 1.8351999521255493, "learning_rate": 0.0001, "loss": 0.0138, "step": 78450 }, { "epoch": 516.1842105263158, "grad_norm": 1.1972216367721558, "learning_rate": 0.0001, "loss": 0.018, "step": 78460 }, { "epoch": 516.25, "grad_norm": 1.5160845518112183, "learning_rate": 0.0001, "loss": 0.0131, "step": 78470 }, { "epoch": 516.3157894736842, "grad_norm": 1.7287389039993286, "learning_rate": 0.0001, "loss": 0.0136, "step": 78480 }, { "epoch": 516.3815789473684, "grad_norm": 1.2624974250793457, "learning_rate": 0.0001, "loss": 0.015, "step": 78490 }, { "epoch": 516.4473684210526, "grad_norm": 1.6984004974365234, "learning_rate": 0.0001, "loss": 0.0148, "step": 78500 }, { "epoch": 516.5131578947369, "grad_norm": 1.6915675401687622, "learning_rate": 0.0001, "loss": 0.0181, "step": 78510 }, { "epoch": 516.578947368421, "grad_norm": 1.0260801315307617, "learning_rate": 0.0001, "loss": 0.0166, "step": 78520 }, { "epoch": 516.6447368421053, "grad_norm": 1.4905065298080444, "learning_rate": 0.0001, "loss": 0.0165, "step": 78530 }, { "epoch": 516.7105263157895, "grad_norm": 1.804322361946106, "learning_rate": 0.0001, "loss": 0.0148, "step": 78540 }, { "epoch": 516.7763157894736, "grad_norm": 1.8529058694839478, "learning_rate": 0.0001, "loss": 0.0129, "step": 78550 }, { "epoch": 516.8421052631579, "grad_norm": 1.7282814979553223, "learning_rate": 0.0001, "loss": 0.016, "step": 78560 }, { "epoch": 516.9078947368421, "grad_norm": 1.2927968502044678, "learning_rate": 0.0001, "loss": 0.0139, "step": 78570 }, { "epoch": 516.9736842105264, "grad_norm": 1.2695995569229126, "learning_rate": 0.0001, "loss": 0.0181, "step": 78580 }, { "epoch": 517.0394736842105, "grad_norm": 1.7084729671478271, "learning_rate": 0.0001, "loss": 0.0155, "step": 78590 }, { "epoch": 517.1052631578947, "grad_norm": 1.3217990398406982, "learning_rate": 0.0001, "loss": 0.0143, "step": 78600 }, { "epoch": 517.171052631579, "grad_norm": 1.2165368795394897, "learning_rate": 0.0001, "loss": 0.0159, "step": 78610 }, { "epoch": 517.2368421052631, "grad_norm": 1.5522578954696655, "learning_rate": 0.0001, "loss": 0.0136, "step": 78620 }, { "epoch": 517.3026315789474, "grad_norm": 1.510021686553955, "learning_rate": 0.0001, "loss": 0.0114, "step": 78630 }, { "epoch": 517.3684210526316, "grad_norm": 1.4730907678604126, "learning_rate": 0.0001, "loss": 0.0156, "step": 78640 }, { "epoch": 517.4342105263158, "grad_norm": 1.7008671760559082, "learning_rate": 0.0001, "loss": 0.0151, "step": 78650 }, { "epoch": 517.5, "grad_norm": 1.6367347240447998, "learning_rate": 0.0001, "loss": 0.0165, "step": 78660 }, { "epoch": 517.5657894736842, "grad_norm": 1.8011568784713745, "learning_rate": 0.0001, "loss": 0.016, "step": 78670 }, { "epoch": 517.6315789473684, "grad_norm": 2.3476805686950684, "learning_rate": 0.0001, "loss": 0.0135, "step": 78680 }, { "epoch": 517.6973684210526, "grad_norm": 1.8247379064559937, "learning_rate": 0.0001, "loss": 0.0119, "step": 78690 }, { "epoch": 517.7631578947369, "grad_norm": 1.588510513305664, "learning_rate": 0.0001, "loss": 0.0179, "step": 78700 }, { "epoch": 517.828947368421, "grad_norm": 1.7647888660430908, "learning_rate": 0.0001, "loss": 0.0169, "step": 78710 }, { "epoch": 517.8947368421053, "grad_norm": 1.7572252750396729, "learning_rate": 0.0001, "loss": 0.0128, "step": 78720 }, { "epoch": 517.9605263157895, "grad_norm": 1.57066810131073, "learning_rate": 0.0001, "loss": 0.0175, "step": 78730 }, { "epoch": 518.0263157894736, "grad_norm": 1.6701877117156982, "learning_rate": 0.0001, "loss": 0.0133, "step": 78740 }, { "epoch": 518.0921052631579, "grad_norm": 1.5915749073028564, "learning_rate": 0.0001, "loss": 0.0141, "step": 78750 }, { "epoch": 518.1578947368421, "grad_norm": 1.3961418867111206, "learning_rate": 0.0001, "loss": 0.0135, "step": 78760 }, { "epoch": 518.2236842105264, "grad_norm": 0.999305009841919, "learning_rate": 0.0001, "loss": 0.0153, "step": 78770 }, { "epoch": 518.2894736842105, "grad_norm": 1.8741719722747803, "learning_rate": 0.0001, "loss": 0.0157, "step": 78780 }, { "epoch": 518.3552631578947, "grad_norm": 1.2114322185516357, "learning_rate": 0.0001, "loss": 0.0148, "step": 78790 }, { "epoch": 518.421052631579, "grad_norm": 1.4081777334213257, "learning_rate": 0.0001, "loss": 0.0129, "step": 78800 }, { "epoch": 518.4868421052631, "grad_norm": 1.865637183189392, "learning_rate": 0.0001, "loss": 0.0136, "step": 78810 }, { "epoch": 518.5526315789474, "grad_norm": 1.3998806476593018, "learning_rate": 0.0001, "loss": 0.0169, "step": 78820 }, { "epoch": 518.6184210526316, "grad_norm": 2.003396511077881, "learning_rate": 0.0001, "loss": 0.0147, "step": 78830 }, { "epoch": 518.6842105263158, "grad_norm": 0.8156841397285461, "learning_rate": 0.0001, "loss": 0.0149, "step": 78840 }, { "epoch": 518.75, "grad_norm": 1.863356590270996, "learning_rate": 0.0001, "loss": 0.0166, "step": 78850 }, { "epoch": 518.8157894736842, "grad_norm": 1.6926389932632446, "learning_rate": 0.0001, "loss": 0.0151, "step": 78860 }, { "epoch": 518.8815789473684, "grad_norm": 1.7204402685165405, "learning_rate": 0.0001, "loss": 0.0171, "step": 78870 }, { "epoch": 518.9473684210526, "grad_norm": 2.2538721561431885, "learning_rate": 0.0001, "loss": 0.0128, "step": 78880 }, { "epoch": 519.0131578947369, "grad_norm": 1.6247926950454712, "learning_rate": 0.0001, "loss": 0.0149, "step": 78890 }, { "epoch": 519.078947368421, "grad_norm": 1.237427830696106, "learning_rate": 0.0001, "loss": 0.0119, "step": 78900 }, { "epoch": 519.1447368421053, "grad_norm": 1.5956469774246216, "learning_rate": 0.0001, "loss": 0.0161, "step": 78910 }, { "epoch": 519.2105263157895, "grad_norm": 1.262107491493225, "learning_rate": 0.0001, "loss": 0.0132, "step": 78920 }, { "epoch": 519.2763157894736, "grad_norm": 1.3959956169128418, "learning_rate": 0.0001, "loss": 0.0186, "step": 78930 }, { "epoch": 519.3421052631579, "grad_norm": 1.2872241735458374, "learning_rate": 0.0001, "loss": 0.0146, "step": 78940 }, { "epoch": 519.4078947368421, "grad_norm": 1.4573534727096558, "learning_rate": 0.0001, "loss": 0.0142, "step": 78950 }, { "epoch": 519.4736842105264, "grad_norm": 1.8440661430358887, "learning_rate": 0.0001, "loss": 0.0183, "step": 78960 }, { "epoch": 519.5394736842105, "grad_norm": 1.487473487854004, "learning_rate": 0.0001, "loss": 0.0165, "step": 78970 }, { "epoch": 519.6052631578947, "grad_norm": 1.6062387228012085, "learning_rate": 0.0001, "loss": 0.0171, "step": 78980 }, { "epoch": 519.671052631579, "grad_norm": 1.520719051361084, "learning_rate": 0.0001, "loss": 0.0116, "step": 78990 }, { "epoch": 519.7368421052631, "grad_norm": 1.3740997314453125, "learning_rate": 0.0001, "loss": 0.0118, "step": 79000 }, { "epoch": 519.8026315789474, "grad_norm": 1.838800311088562, "learning_rate": 0.0001, "loss": 0.0154, "step": 79010 }, { "epoch": 519.8684210526316, "grad_norm": 1.7863680124282837, "learning_rate": 0.0001, "loss": 0.0136, "step": 79020 }, { "epoch": 519.9342105263158, "grad_norm": 1.4240894317626953, "learning_rate": 0.0001, "loss": 0.0158, "step": 79030 }, { "epoch": 520.0, "grad_norm": 1.7241368293762207, "learning_rate": 0.0001, "loss": 0.0158, "step": 79040 }, { "epoch": 520.0657894736842, "grad_norm": 1.4255616664886475, "learning_rate": 0.0001, "loss": 0.0117, "step": 79050 }, { "epoch": 520.1315789473684, "grad_norm": 2.0090198516845703, "learning_rate": 0.0001, "loss": 0.0146, "step": 79060 }, { "epoch": 520.1973684210526, "grad_norm": 1.6994414329528809, "learning_rate": 0.0001, "loss": 0.0118, "step": 79070 }, { "epoch": 520.2631578947369, "grad_norm": 1.3389440774917603, "learning_rate": 0.0001, "loss": 0.0129, "step": 79080 }, { "epoch": 520.328947368421, "grad_norm": 1.7537122964859009, "learning_rate": 0.0001, "loss": 0.0144, "step": 79090 }, { "epoch": 520.3947368421053, "grad_norm": 1.399065375328064, "learning_rate": 0.0001, "loss": 0.0144, "step": 79100 }, { "epoch": 520.4605263157895, "grad_norm": 1.6914067268371582, "learning_rate": 0.0001, "loss": 0.0143, "step": 79110 }, { "epoch": 520.5263157894736, "grad_norm": 1.4344407320022583, "learning_rate": 0.0001, "loss": 0.0223, "step": 79120 }, { "epoch": 520.5921052631579, "grad_norm": 1.7404720783233643, "learning_rate": 0.0001, "loss": 0.0157, "step": 79130 }, { "epoch": 520.6578947368421, "grad_norm": 1.7119760513305664, "learning_rate": 0.0001, "loss": 0.0173, "step": 79140 }, { "epoch": 520.7236842105264, "grad_norm": 1.2749295234680176, "learning_rate": 0.0001, "loss": 0.0126, "step": 79150 }, { "epoch": 520.7894736842105, "grad_norm": 1.586730718612671, "learning_rate": 0.0001, "loss": 0.0153, "step": 79160 }, { "epoch": 520.8552631578947, "grad_norm": 1.7551357746124268, "learning_rate": 0.0001, "loss": 0.0154, "step": 79170 }, { "epoch": 520.921052631579, "grad_norm": 1.7971206903457642, "learning_rate": 0.0001, "loss": 0.013, "step": 79180 }, { "epoch": 520.9868421052631, "grad_norm": 1.3483809232711792, "learning_rate": 0.0001, "loss": 0.017, "step": 79190 }, { "epoch": 521.0526315789474, "grad_norm": 1.5869762897491455, "learning_rate": 0.0001, "loss": 0.0143, "step": 79200 }, { "epoch": 521.1184210526316, "grad_norm": 1.521907091140747, "learning_rate": 0.0001, "loss": 0.0167, "step": 79210 }, { "epoch": 521.1842105263158, "grad_norm": 1.4516512155532837, "learning_rate": 0.0001, "loss": 0.0203, "step": 79220 }, { "epoch": 521.25, "grad_norm": 1.7292051315307617, "learning_rate": 0.0001, "loss": 0.0148, "step": 79230 }, { "epoch": 521.3157894736842, "grad_norm": 1.3430010080337524, "learning_rate": 0.0001, "loss": 0.015, "step": 79240 }, { "epoch": 521.3815789473684, "grad_norm": 1.5055731534957886, "learning_rate": 0.0001, "loss": 0.0128, "step": 79250 }, { "epoch": 521.4473684210526, "grad_norm": 1.864823579788208, "learning_rate": 0.0001, "loss": 0.0118, "step": 79260 }, { "epoch": 521.5131578947369, "grad_norm": 1.5422966480255127, "learning_rate": 0.0001, "loss": 0.0115, "step": 79270 }, { "epoch": 521.578947368421, "grad_norm": 1.7012726068496704, "learning_rate": 0.0001, "loss": 0.0131, "step": 79280 }, { "epoch": 521.6447368421053, "grad_norm": 1.23032546043396, "learning_rate": 0.0001, "loss": 0.017, "step": 79290 }, { "epoch": 521.7105263157895, "grad_norm": 1.4074506759643555, "learning_rate": 0.0001, "loss": 0.0145, "step": 79300 }, { "epoch": 521.7763157894736, "grad_norm": 1.2455923557281494, "learning_rate": 0.0001, "loss": 0.0128, "step": 79310 }, { "epoch": 521.8421052631579, "grad_norm": 1.4207082986831665, "learning_rate": 0.0001, "loss": 0.0139, "step": 79320 }, { "epoch": 521.9078947368421, "grad_norm": 0.9342218041419983, "learning_rate": 0.0001, "loss": 0.0153, "step": 79330 }, { "epoch": 521.9736842105264, "grad_norm": 1.493393063545227, "learning_rate": 0.0001, "loss": 0.0148, "step": 79340 }, { "epoch": 522.0394736842105, "grad_norm": 1.4183131456375122, "learning_rate": 0.0001, "loss": 0.02, "step": 79350 }, { "epoch": 522.1052631578947, "grad_norm": 1.3750898838043213, "learning_rate": 0.0001, "loss": 0.0153, "step": 79360 }, { "epoch": 522.171052631579, "grad_norm": 1.8555625677108765, "learning_rate": 0.0001, "loss": 0.0159, "step": 79370 }, { "epoch": 522.2368421052631, "grad_norm": 1.5366929769515991, "learning_rate": 0.0001, "loss": 0.0144, "step": 79380 }, { "epoch": 522.3026315789474, "grad_norm": 1.3235769271850586, "learning_rate": 0.0001, "loss": 0.012, "step": 79390 }, { "epoch": 522.3684210526316, "grad_norm": 1.584281325340271, "learning_rate": 0.0001, "loss": 0.0143, "step": 79400 }, { "epoch": 522.4342105263158, "grad_norm": 1.2774513959884644, "learning_rate": 0.0001, "loss": 0.0128, "step": 79410 }, { "epoch": 522.5, "grad_norm": 1.3047441244125366, "learning_rate": 0.0001, "loss": 0.0146, "step": 79420 }, { "epoch": 522.5657894736842, "grad_norm": 1.223201036453247, "learning_rate": 0.0001, "loss": 0.0151, "step": 79430 }, { "epoch": 522.6315789473684, "grad_norm": 1.3120249509811401, "learning_rate": 0.0001, "loss": 0.0161, "step": 79440 }, { "epoch": 522.6973684210526, "grad_norm": 1.3300862312316895, "learning_rate": 0.0001, "loss": 0.0208, "step": 79450 }, { "epoch": 522.7631578947369, "grad_norm": 1.5978916883468628, "learning_rate": 0.0001, "loss": 0.0161, "step": 79460 }, { "epoch": 522.828947368421, "grad_norm": 1.2107418775558472, "learning_rate": 0.0001, "loss": 0.0132, "step": 79470 }, { "epoch": 522.8947368421053, "grad_norm": 1.2458696365356445, "learning_rate": 0.0001, "loss": 0.0128, "step": 79480 }, { "epoch": 522.9605263157895, "grad_norm": 1.289477825164795, "learning_rate": 0.0001, "loss": 0.0149, "step": 79490 }, { "epoch": 523.0263157894736, "grad_norm": 1.095612645149231, "learning_rate": 0.0001, "loss": 0.018, "step": 79500 }, { "epoch": 523.0921052631579, "grad_norm": 1.2509464025497437, "learning_rate": 0.0001, "loss": 0.0131, "step": 79510 }, { "epoch": 523.1578947368421, "grad_norm": 1.6297239065170288, "learning_rate": 0.0001, "loss": 0.014, "step": 79520 }, { "epoch": 523.2236842105264, "grad_norm": 1.9298033714294434, "learning_rate": 0.0001, "loss": 0.0157, "step": 79530 }, { "epoch": 523.2894736842105, "grad_norm": 1.755323052406311, "learning_rate": 0.0001, "loss": 0.0176, "step": 79540 }, { "epoch": 523.3552631578947, "grad_norm": 1.5747076272964478, "learning_rate": 0.0001, "loss": 0.0153, "step": 79550 }, { "epoch": 523.421052631579, "grad_norm": 1.8858953714370728, "learning_rate": 0.0001, "loss": 0.0125, "step": 79560 }, { "epoch": 523.4868421052631, "grad_norm": 1.1096670627593994, "learning_rate": 0.0001, "loss": 0.0159, "step": 79570 }, { "epoch": 523.5526315789474, "grad_norm": 1.7840094566345215, "learning_rate": 0.0001, "loss": 0.0148, "step": 79580 }, { "epoch": 523.6184210526316, "grad_norm": 1.6429518461227417, "learning_rate": 0.0001, "loss": 0.0147, "step": 79590 }, { "epoch": 523.6842105263158, "grad_norm": 1.5526165962219238, "learning_rate": 0.0001, "loss": 0.017, "step": 79600 }, { "epoch": 523.75, "grad_norm": 1.4685629606246948, "learning_rate": 0.0001, "loss": 0.0137, "step": 79610 }, { "epoch": 523.8157894736842, "grad_norm": 1.8418145179748535, "learning_rate": 0.0001, "loss": 0.016, "step": 79620 }, { "epoch": 523.8815789473684, "grad_norm": 1.7747350931167603, "learning_rate": 0.0001, "loss": 0.0165, "step": 79630 }, { "epoch": 523.9473684210526, "grad_norm": 1.2194863557815552, "learning_rate": 0.0001, "loss": 0.0142, "step": 79640 }, { "epoch": 524.0131578947369, "grad_norm": 1.517539143562317, "learning_rate": 0.0001, "loss": 0.0174, "step": 79650 }, { "epoch": 524.078947368421, "grad_norm": 1.3809399604797363, "learning_rate": 0.0001, "loss": 0.0209, "step": 79660 }, { "epoch": 524.1447368421053, "grad_norm": 1.4752272367477417, "learning_rate": 0.0001, "loss": 0.0144, "step": 79670 }, { "epoch": 524.2105263157895, "grad_norm": 1.7056692838668823, "learning_rate": 0.0001, "loss": 0.0135, "step": 79680 }, { "epoch": 524.2763157894736, "grad_norm": 2.044816493988037, "learning_rate": 0.0001, "loss": 0.0147, "step": 79690 }, { "epoch": 524.3421052631579, "grad_norm": 1.639399528503418, "learning_rate": 0.0001, "loss": 0.013, "step": 79700 }, { "epoch": 524.4078947368421, "grad_norm": 1.3413691520690918, "learning_rate": 0.0001, "loss": 0.012, "step": 79710 }, { "epoch": 524.4736842105264, "grad_norm": 1.339583158493042, "learning_rate": 0.0001, "loss": 0.0175, "step": 79720 }, { "epoch": 524.5394736842105, "grad_norm": 1.2523384094238281, "learning_rate": 0.0001, "loss": 0.0154, "step": 79730 }, { "epoch": 524.6052631578947, "grad_norm": 1.275567889213562, "learning_rate": 0.0001, "loss": 0.0146, "step": 79740 }, { "epoch": 524.671052631579, "grad_norm": 1.3582254648208618, "learning_rate": 0.0001, "loss": 0.0135, "step": 79750 }, { "epoch": 524.7368421052631, "grad_norm": 1.703102946281433, "learning_rate": 0.0001, "loss": 0.0171, "step": 79760 }, { "epoch": 524.8026315789474, "grad_norm": 1.5917879343032837, "learning_rate": 0.0001, "loss": 0.0157, "step": 79770 }, { "epoch": 524.8684210526316, "grad_norm": 1.3786401748657227, "learning_rate": 0.0001, "loss": 0.016, "step": 79780 }, { "epoch": 524.9342105263158, "grad_norm": 1.4341083765029907, "learning_rate": 0.0001, "loss": 0.0134, "step": 79790 }, { "epoch": 525.0, "grad_norm": 1.3615684509277344, "learning_rate": 0.0001, "loss": 0.015, "step": 79800 }, { "epoch": 525.0657894736842, "grad_norm": 1.6206470727920532, "learning_rate": 0.0001, "loss": 0.0163, "step": 79810 }, { "epoch": 525.1315789473684, "grad_norm": 1.2525004148483276, "learning_rate": 0.0001, "loss": 0.0162, "step": 79820 }, { "epoch": 525.1973684210526, "grad_norm": 1.323256015777588, "learning_rate": 0.0001, "loss": 0.0172, "step": 79830 }, { "epoch": 525.2631578947369, "grad_norm": 1.7311328649520874, "learning_rate": 0.0001, "loss": 0.0176, "step": 79840 }, { "epoch": 525.328947368421, "grad_norm": 1.38692045211792, "learning_rate": 0.0001, "loss": 0.0144, "step": 79850 }, { "epoch": 525.3947368421053, "grad_norm": 1.3585596084594727, "learning_rate": 0.0001, "loss": 0.014, "step": 79860 }, { "epoch": 525.4605263157895, "grad_norm": 1.6047114133834839, "learning_rate": 0.0001, "loss": 0.0138, "step": 79870 }, { "epoch": 525.5263157894736, "grad_norm": 1.0237034559249878, "learning_rate": 0.0001, "loss": 0.022, "step": 79880 }, { "epoch": 525.5921052631579, "grad_norm": 1.5210776329040527, "learning_rate": 0.0001, "loss": 0.0152, "step": 79890 }, { "epoch": 525.6578947368421, "grad_norm": 1.2930454015731812, "learning_rate": 0.0001, "loss": 0.0133, "step": 79900 }, { "epoch": 525.7236842105264, "grad_norm": 1.2324477434158325, "learning_rate": 0.0001, "loss": 0.0162, "step": 79910 }, { "epoch": 525.7894736842105, "grad_norm": 1.2471582889556885, "learning_rate": 0.0001, "loss": 0.0127, "step": 79920 }, { "epoch": 525.8552631578947, "grad_norm": 1.3026273250579834, "learning_rate": 0.0001, "loss": 0.0134, "step": 79930 }, { "epoch": 525.921052631579, "grad_norm": 1.316771388053894, "learning_rate": 0.0001, "loss": 0.0156, "step": 79940 }, { "epoch": 525.9868421052631, "grad_norm": 1.732629418373108, "learning_rate": 0.0001, "loss": 0.0132, "step": 79950 }, { "epoch": 526.0526315789474, "grad_norm": 1.3699272871017456, "learning_rate": 0.0001, "loss": 0.015, "step": 79960 }, { "epoch": 526.1184210526316, "grad_norm": 1.5622645616531372, "learning_rate": 0.0001, "loss": 0.0164, "step": 79970 }, { "epoch": 526.1842105263158, "grad_norm": 1.3124730587005615, "learning_rate": 0.0001, "loss": 0.0138, "step": 79980 }, { "epoch": 526.25, "grad_norm": 1.6033742427825928, "learning_rate": 0.0001, "loss": 0.0159, "step": 79990 }, { "epoch": 526.3157894736842, "grad_norm": 1.3908029794692993, "learning_rate": 0.0001, "loss": 0.0137, "step": 80000 }, { "epoch": 526.3815789473684, "grad_norm": 1.4633218050003052, "learning_rate": 0.0001, "loss": 0.0165, "step": 80010 }, { "epoch": 526.4473684210526, "grad_norm": 1.4716192483901978, "learning_rate": 0.0001, "loss": 0.0148, "step": 80020 }, { "epoch": 526.5131578947369, "grad_norm": 2.07474422454834, "learning_rate": 0.0001, "loss": 0.0147, "step": 80030 }, { "epoch": 526.578947368421, "grad_norm": 1.3329999446868896, "learning_rate": 0.0001, "loss": 0.0136, "step": 80040 }, { "epoch": 526.6447368421053, "grad_norm": 1.3131816387176514, "learning_rate": 0.0001, "loss": 0.019, "step": 80050 }, { "epoch": 526.7105263157895, "grad_norm": 1.6769335269927979, "learning_rate": 0.0001, "loss": 0.0125, "step": 80060 }, { "epoch": 526.7763157894736, "grad_norm": 1.259644865989685, "learning_rate": 0.0001, "loss": 0.0117, "step": 80070 }, { "epoch": 526.8421052631579, "grad_norm": 1.6030129194259644, "learning_rate": 0.0001, "loss": 0.0177, "step": 80080 }, { "epoch": 526.9078947368421, "grad_norm": 1.242616891860962, "learning_rate": 0.0001, "loss": 0.0152, "step": 80090 }, { "epoch": 526.9736842105264, "grad_norm": 1.4458589553833008, "learning_rate": 0.0001, "loss": 0.0149, "step": 80100 }, { "epoch": 527.0394736842105, "grad_norm": 1.2229293584823608, "learning_rate": 0.0001, "loss": 0.0142, "step": 80110 }, { "epoch": 527.1052631578947, "grad_norm": 1.6411150693893433, "learning_rate": 0.0001, "loss": 0.013, "step": 80120 }, { "epoch": 527.171052631579, "grad_norm": 1.5800772905349731, "learning_rate": 0.0001, "loss": 0.0169, "step": 80130 }, { "epoch": 527.2368421052631, "grad_norm": 1.5666711330413818, "learning_rate": 0.0001, "loss": 0.0134, "step": 80140 }, { "epoch": 527.3026315789474, "grad_norm": 1.6092156171798706, "learning_rate": 0.0001, "loss": 0.0167, "step": 80150 }, { "epoch": 527.3684210526316, "grad_norm": 1.0313538312911987, "learning_rate": 0.0001, "loss": 0.0148, "step": 80160 }, { "epoch": 527.4342105263158, "grad_norm": 1.7157047986984253, "learning_rate": 0.0001, "loss": 0.0124, "step": 80170 }, { "epoch": 527.5, "grad_norm": 1.4934759140014648, "learning_rate": 0.0001, "loss": 0.0152, "step": 80180 }, { "epoch": 527.5657894736842, "grad_norm": 1.4990994930267334, "learning_rate": 0.0001, "loss": 0.0182, "step": 80190 }, { "epoch": 527.6315789473684, "grad_norm": 1.5815012454986572, "learning_rate": 0.0001, "loss": 0.0138, "step": 80200 }, { "epoch": 527.6973684210526, "grad_norm": 1.4959518909454346, "learning_rate": 0.0001, "loss": 0.0133, "step": 80210 }, { "epoch": 527.7631578947369, "grad_norm": 1.4053148031234741, "learning_rate": 0.0001, "loss": 0.0151, "step": 80220 }, { "epoch": 527.828947368421, "grad_norm": 1.582045078277588, "learning_rate": 0.0001, "loss": 0.0139, "step": 80230 }, { "epoch": 527.8947368421053, "grad_norm": 1.287261724472046, "learning_rate": 0.0001, "loss": 0.0181, "step": 80240 }, { "epoch": 527.9605263157895, "grad_norm": 1.3031500577926636, "learning_rate": 0.0001, "loss": 0.0164, "step": 80250 }, { "epoch": 528.0263157894736, "grad_norm": 1.4735586643218994, "learning_rate": 0.0001, "loss": 0.0146, "step": 80260 }, { "epoch": 528.0921052631579, "grad_norm": 1.5825307369232178, "learning_rate": 0.0001, "loss": 0.0131, "step": 80270 }, { "epoch": 528.1578947368421, "grad_norm": 1.6319355964660645, "learning_rate": 0.0001, "loss": 0.0142, "step": 80280 }, { "epoch": 528.2236842105264, "grad_norm": 1.7350597381591797, "learning_rate": 0.0001, "loss": 0.013, "step": 80290 }, { "epoch": 528.2894736842105, "grad_norm": 1.5361098051071167, "learning_rate": 0.0001, "loss": 0.018, "step": 80300 }, { "epoch": 528.3552631578947, "grad_norm": 1.4900190830230713, "learning_rate": 0.0001, "loss": 0.0138, "step": 80310 }, { "epoch": 528.421052631579, "grad_norm": 1.8265743255615234, "learning_rate": 0.0001, "loss": 0.015, "step": 80320 }, { "epoch": 528.4868421052631, "grad_norm": 1.2781810760498047, "learning_rate": 0.0001, "loss": 0.0169, "step": 80330 }, { "epoch": 528.5526315789474, "grad_norm": 1.8287792205810547, "learning_rate": 0.0001, "loss": 0.019, "step": 80340 }, { "epoch": 528.6184210526316, "grad_norm": 1.562859296798706, "learning_rate": 0.0001, "loss": 0.016, "step": 80350 }, { "epoch": 528.6842105263158, "grad_norm": 1.4391525983810425, "learning_rate": 0.0001, "loss": 0.0142, "step": 80360 }, { "epoch": 528.75, "grad_norm": 1.5240473747253418, "learning_rate": 0.0001, "loss": 0.0113, "step": 80370 }, { "epoch": 528.8157894736842, "grad_norm": 1.312242865562439, "learning_rate": 0.0001, "loss": 0.0163, "step": 80380 }, { "epoch": 528.8815789473684, "grad_norm": 1.6118967533111572, "learning_rate": 0.0001, "loss": 0.0144, "step": 80390 }, { "epoch": 528.9473684210526, "grad_norm": 1.7901450395584106, "learning_rate": 0.0001, "loss": 0.0165, "step": 80400 }, { "epoch": 529.0131578947369, "grad_norm": 1.2341535091400146, "learning_rate": 0.0001, "loss": 0.0161, "step": 80410 }, { "epoch": 529.078947368421, "grad_norm": 1.369691014289856, "learning_rate": 0.0001, "loss": 0.0175, "step": 80420 }, { "epoch": 529.1447368421053, "grad_norm": 1.6043181419372559, "learning_rate": 0.0001, "loss": 0.0152, "step": 80430 }, { "epoch": 529.2105263157895, "grad_norm": 0.9708279371261597, "learning_rate": 0.0001, "loss": 0.0144, "step": 80440 }, { "epoch": 529.2763157894736, "grad_norm": 1.8322980403900146, "learning_rate": 0.0001, "loss": 0.0168, "step": 80450 }, { "epoch": 529.3421052631579, "grad_norm": 1.7646974325180054, "learning_rate": 0.0001, "loss": 0.0116, "step": 80460 }, { "epoch": 529.4078947368421, "grad_norm": 1.9026564359664917, "learning_rate": 0.0001, "loss": 0.016, "step": 80470 }, { "epoch": 529.4736842105264, "grad_norm": 1.3870747089385986, "learning_rate": 0.0001, "loss": 0.0129, "step": 80480 }, { "epoch": 529.5394736842105, "grad_norm": 1.4140759706497192, "learning_rate": 0.0001, "loss": 0.0126, "step": 80490 }, { "epoch": 529.6052631578947, "grad_norm": 1.1774367094039917, "learning_rate": 0.0001, "loss": 0.0136, "step": 80500 }, { "epoch": 529.671052631579, "grad_norm": 2.0363216400146484, "learning_rate": 0.0001, "loss": 0.016, "step": 80510 }, { "epoch": 529.7368421052631, "grad_norm": 1.3702092170715332, "learning_rate": 0.0001, "loss": 0.015, "step": 80520 }, { "epoch": 529.8026315789474, "grad_norm": 1.4623204469680786, "learning_rate": 0.0001, "loss": 0.0125, "step": 80530 }, { "epoch": 529.8684210526316, "grad_norm": 1.4077658653259277, "learning_rate": 0.0001, "loss": 0.0185, "step": 80540 }, { "epoch": 529.9342105263158, "grad_norm": 1.7905974388122559, "learning_rate": 0.0001, "loss": 0.0182, "step": 80550 }, { "epoch": 530.0, "grad_norm": 1.9791425466537476, "learning_rate": 0.0001, "loss": 0.0151, "step": 80560 }, { "epoch": 530.0657894736842, "grad_norm": 1.4928035736083984, "learning_rate": 0.0001, "loss": 0.0178, "step": 80570 }, { "epoch": 530.1315789473684, "grad_norm": 1.6543877124786377, "learning_rate": 0.0001, "loss": 0.0161, "step": 80580 }, { "epoch": 530.1973684210526, "grad_norm": 1.5735337734222412, "learning_rate": 0.0001, "loss": 0.0155, "step": 80590 }, { "epoch": 530.2631578947369, "grad_norm": 1.7627605199813843, "learning_rate": 0.0001, "loss": 0.0143, "step": 80600 }, { "epoch": 530.328947368421, "grad_norm": 1.649845004081726, "learning_rate": 0.0001, "loss": 0.0133, "step": 80610 }, { "epoch": 530.3947368421053, "grad_norm": 1.5767978429794312, "learning_rate": 0.0001, "loss": 0.0145, "step": 80620 }, { "epoch": 530.4605263157895, "grad_norm": 1.641520619392395, "learning_rate": 0.0001, "loss": 0.0151, "step": 80630 }, { "epoch": 530.5263157894736, "grad_norm": 1.1951018571853638, "learning_rate": 0.0001, "loss": 0.0169, "step": 80640 }, { "epoch": 530.5921052631579, "grad_norm": 1.3762896060943604, "learning_rate": 0.0001, "loss": 0.0132, "step": 80650 }, { "epoch": 530.6578947368421, "grad_norm": 1.3630194664001465, "learning_rate": 0.0001, "loss": 0.0144, "step": 80660 }, { "epoch": 530.7236842105264, "grad_norm": 1.4164966344833374, "learning_rate": 0.0001, "loss": 0.0142, "step": 80670 }, { "epoch": 530.7894736842105, "grad_norm": 1.4385826587677002, "learning_rate": 0.0001, "loss": 0.0166, "step": 80680 }, { "epoch": 530.8552631578947, "grad_norm": 1.406311273574829, "learning_rate": 0.0001, "loss": 0.0153, "step": 80690 }, { "epoch": 530.921052631579, "grad_norm": 1.7487131357192993, "learning_rate": 0.0001, "loss": 0.0131, "step": 80700 }, { "epoch": 530.9868421052631, "grad_norm": 1.8451213836669922, "learning_rate": 0.0001, "loss": 0.0156, "step": 80710 }, { "epoch": 531.0526315789474, "grad_norm": 1.5335261821746826, "learning_rate": 0.0001, "loss": 0.0131, "step": 80720 }, { "epoch": 531.1184210526316, "grad_norm": 1.6302638053894043, "learning_rate": 0.0001, "loss": 0.014, "step": 80730 }, { "epoch": 531.1842105263158, "grad_norm": 1.6095701456069946, "learning_rate": 0.0001, "loss": 0.0121, "step": 80740 }, { "epoch": 531.25, "grad_norm": 1.1401814222335815, "learning_rate": 0.0001, "loss": 0.0119, "step": 80750 }, { "epoch": 531.3157894736842, "grad_norm": 1.248806357383728, "learning_rate": 0.0001, "loss": 0.0161, "step": 80760 }, { "epoch": 531.3815789473684, "grad_norm": 1.6223336458206177, "learning_rate": 0.0001, "loss": 0.0186, "step": 80770 }, { "epoch": 531.4473684210526, "grad_norm": 1.2290114164352417, "learning_rate": 0.0001, "loss": 0.0167, "step": 80780 }, { "epoch": 531.5131578947369, "grad_norm": 1.2272759675979614, "learning_rate": 0.0001, "loss": 0.0152, "step": 80790 }, { "epoch": 531.578947368421, "grad_norm": 1.5089548826217651, "learning_rate": 0.0001, "loss": 0.0149, "step": 80800 }, { "epoch": 531.6447368421053, "grad_norm": 1.4120644330978394, "learning_rate": 0.0001, "loss": 0.0186, "step": 80810 }, { "epoch": 531.7105263157895, "grad_norm": 1.693547010421753, "learning_rate": 0.0001, "loss": 0.0166, "step": 80820 }, { "epoch": 531.7763157894736, "grad_norm": 1.7752634286880493, "learning_rate": 0.0001, "loss": 0.0146, "step": 80830 }, { "epoch": 531.8421052631579, "grad_norm": 1.2594187259674072, "learning_rate": 0.0001, "loss": 0.0151, "step": 80840 }, { "epoch": 531.9078947368421, "grad_norm": 1.4254436492919922, "learning_rate": 0.0001, "loss": 0.0154, "step": 80850 }, { "epoch": 531.9736842105264, "grad_norm": 1.5467069149017334, "learning_rate": 0.0001, "loss": 0.0161, "step": 80860 }, { "epoch": 532.0394736842105, "grad_norm": 1.3708326816558838, "learning_rate": 0.0001, "loss": 0.013, "step": 80870 }, { "epoch": 532.1052631578947, "grad_norm": 1.7534289360046387, "learning_rate": 0.0001, "loss": 0.0147, "step": 80880 }, { "epoch": 532.171052631579, "grad_norm": 1.28664231300354, "learning_rate": 0.0001, "loss": 0.0157, "step": 80890 }, { "epoch": 532.2368421052631, "grad_norm": 1.4169259071350098, "learning_rate": 0.0001, "loss": 0.012, "step": 80900 }, { "epoch": 532.3026315789474, "grad_norm": 1.6619502305984497, "learning_rate": 0.0001, "loss": 0.0135, "step": 80910 }, { "epoch": 532.3684210526316, "grad_norm": 1.1986693143844604, "learning_rate": 0.0001, "loss": 0.0164, "step": 80920 }, { "epoch": 532.4342105263158, "grad_norm": 1.1938501596450806, "learning_rate": 0.0001, "loss": 0.0172, "step": 80930 }, { "epoch": 532.5, "grad_norm": 1.7883375883102417, "learning_rate": 0.0001, "loss": 0.0162, "step": 80940 }, { "epoch": 532.5657894736842, "grad_norm": 1.2743563652038574, "learning_rate": 0.0001, "loss": 0.0149, "step": 80950 }, { "epoch": 532.6315789473684, "grad_norm": 1.6572188138961792, "learning_rate": 0.0001, "loss": 0.0132, "step": 80960 }, { "epoch": 532.6973684210526, "grad_norm": 1.2241045236587524, "learning_rate": 0.0001, "loss": 0.0173, "step": 80970 }, { "epoch": 532.7631578947369, "grad_norm": 1.327074408531189, "learning_rate": 0.0001, "loss": 0.0164, "step": 80980 }, { "epoch": 532.828947368421, "grad_norm": 1.2590224742889404, "learning_rate": 0.0001, "loss": 0.0166, "step": 80990 }, { "epoch": 532.8947368421053, "grad_norm": 1.5635645389556885, "learning_rate": 0.0001, "loss": 0.0122, "step": 81000 }, { "epoch": 532.9605263157895, "grad_norm": 1.3392990827560425, "learning_rate": 0.0001, "loss": 0.0132, "step": 81010 }, { "epoch": 533.0263157894736, "grad_norm": 1.7670267820358276, "learning_rate": 0.0001, "loss": 0.0179, "step": 81020 }, { "epoch": 533.0921052631579, "grad_norm": 1.2230597734451294, "learning_rate": 0.0001, "loss": 0.0147, "step": 81030 }, { "epoch": 533.1578947368421, "grad_norm": 1.6184121370315552, "learning_rate": 0.0001, "loss": 0.0138, "step": 81040 }, { "epoch": 533.2236842105264, "grad_norm": 1.5308752059936523, "learning_rate": 0.0001, "loss": 0.0124, "step": 81050 }, { "epoch": 533.2894736842105, "grad_norm": 1.875679850578308, "learning_rate": 0.0001, "loss": 0.0183, "step": 81060 }, { "epoch": 533.3552631578947, "grad_norm": 1.6020861864089966, "learning_rate": 0.0001, "loss": 0.0132, "step": 81070 }, { "epoch": 533.421052631579, "grad_norm": 1.419869065284729, "learning_rate": 0.0001, "loss": 0.0121, "step": 81080 }, { "epoch": 533.4868421052631, "grad_norm": 1.6877670288085938, "learning_rate": 0.0001, "loss": 0.0136, "step": 81090 }, { "epoch": 533.5526315789474, "grad_norm": 1.1632726192474365, "learning_rate": 0.0001, "loss": 0.0162, "step": 81100 }, { "epoch": 533.6184210526316, "grad_norm": 1.7739782333374023, "learning_rate": 0.0001, "loss": 0.0189, "step": 81110 }, { "epoch": 533.6842105263158, "grad_norm": 1.70905339717865, "learning_rate": 0.0001, "loss": 0.0171, "step": 81120 }, { "epoch": 533.75, "grad_norm": 1.5189541578292847, "learning_rate": 0.0001, "loss": 0.014, "step": 81130 }, { "epoch": 533.8157894736842, "grad_norm": 1.5684700012207031, "learning_rate": 0.0001, "loss": 0.0178, "step": 81140 }, { "epoch": 533.8815789473684, "grad_norm": 0.8826894164085388, "learning_rate": 0.0001, "loss": 0.0144, "step": 81150 }, { "epoch": 533.9473684210526, "grad_norm": 1.308947205543518, "learning_rate": 0.0001, "loss": 0.016, "step": 81160 }, { "epoch": 534.0131578947369, "grad_norm": 1.772660493850708, "learning_rate": 0.0001, "loss": 0.0136, "step": 81170 }, { "epoch": 534.078947368421, "grad_norm": 1.4692038297653198, "learning_rate": 0.0001, "loss": 0.0126, "step": 81180 }, { "epoch": 534.1447368421053, "grad_norm": 1.6801111698150635, "learning_rate": 0.0001, "loss": 0.0132, "step": 81190 }, { "epoch": 534.2105263157895, "grad_norm": 1.7434461116790771, "learning_rate": 0.0001, "loss": 0.0124, "step": 81200 }, { "epoch": 534.2763157894736, "grad_norm": 1.5177425146102905, "learning_rate": 0.0001, "loss": 0.0156, "step": 81210 }, { "epoch": 534.3421052631579, "grad_norm": 1.2447545528411865, "learning_rate": 0.0001, "loss": 0.0171, "step": 81220 }, { "epoch": 534.4078947368421, "grad_norm": 1.2827783823013306, "learning_rate": 0.0001, "loss": 0.0155, "step": 81230 }, { "epoch": 534.4736842105264, "grad_norm": 1.4236663579940796, "learning_rate": 0.0001, "loss": 0.015, "step": 81240 }, { "epoch": 534.5394736842105, "grad_norm": 1.6420739889144897, "learning_rate": 0.0001, "loss": 0.0122, "step": 81250 }, { "epoch": 534.6052631578947, "grad_norm": 1.1811920404434204, "learning_rate": 0.0001, "loss": 0.0123, "step": 81260 }, { "epoch": 534.671052631579, "grad_norm": 1.5700269937515259, "learning_rate": 0.0001, "loss": 0.0154, "step": 81270 }, { "epoch": 534.7368421052631, "grad_norm": 1.7134915590286255, "learning_rate": 0.0001, "loss": 0.0179, "step": 81280 }, { "epoch": 534.8026315789474, "grad_norm": 2.09460186958313, "learning_rate": 0.0001, "loss": 0.0166, "step": 81290 }, { "epoch": 534.8684210526316, "grad_norm": 1.865478515625, "learning_rate": 0.0001, "loss": 0.0161, "step": 81300 }, { "epoch": 534.9342105263158, "grad_norm": 1.5831091403961182, "learning_rate": 0.0001, "loss": 0.0148, "step": 81310 }, { "epoch": 535.0, "grad_norm": 1.6841051578521729, "learning_rate": 0.0001, "loss": 0.0192, "step": 81320 }, { "epoch": 535.0657894736842, "grad_norm": 1.4260293245315552, "learning_rate": 0.0001, "loss": 0.0161, "step": 81330 }, { "epoch": 535.1315789473684, "grad_norm": 1.9814919233322144, "learning_rate": 0.0001, "loss": 0.0124, "step": 81340 }, { "epoch": 535.1973684210526, "grad_norm": 1.4352827072143555, "learning_rate": 0.0001, "loss": 0.013, "step": 81350 }, { "epoch": 535.2631578947369, "grad_norm": 1.5279399156570435, "learning_rate": 0.0001, "loss": 0.0141, "step": 81360 }, { "epoch": 535.328947368421, "grad_norm": 1.568665862083435, "learning_rate": 0.0001, "loss": 0.015, "step": 81370 }, { "epoch": 535.3947368421053, "grad_norm": 1.5592379570007324, "learning_rate": 0.0001, "loss": 0.0188, "step": 81380 }, { "epoch": 535.4605263157895, "grad_norm": 1.7706671953201294, "learning_rate": 0.0001, "loss": 0.0182, "step": 81390 }, { "epoch": 535.5263157894736, "grad_norm": 1.4576255083084106, "learning_rate": 0.0001, "loss": 0.0148, "step": 81400 }, { "epoch": 535.5921052631579, "grad_norm": 1.789363980293274, "learning_rate": 0.0001, "loss": 0.0115, "step": 81410 }, { "epoch": 535.6578947368421, "grad_norm": 1.8225457668304443, "learning_rate": 0.0001, "loss": 0.0153, "step": 81420 }, { "epoch": 535.7236842105264, "grad_norm": 1.4309340715408325, "learning_rate": 0.0001, "loss": 0.0127, "step": 81430 }, { "epoch": 535.7894736842105, "grad_norm": 1.6319985389709473, "learning_rate": 0.0001, "loss": 0.0178, "step": 81440 }, { "epoch": 535.8552631578947, "grad_norm": 1.5413730144500732, "learning_rate": 0.0001, "loss": 0.0119, "step": 81450 }, { "epoch": 535.921052631579, "grad_norm": 1.8647651672363281, "learning_rate": 0.0001, "loss": 0.0123, "step": 81460 }, { "epoch": 535.9868421052631, "grad_norm": 1.5518134832382202, "learning_rate": 0.0001, "loss": 0.0161, "step": 81470 }, { "epoch": 536.0526315789474, "grad_norm": 1.5864611864089966, "learning_rate": 0.0001, "loss": 0.0139, "step": 81480 }, { "epoch": 536.1184210526316, "grad_norm": 1.4064335823059082, "learning_rate": 0.0001, "loss": 0.0123, "step": 81490 }, { "epoch": 536.1842105263158, "grad_norm": 1.2399344444274902, "learning_rate": 0.0001, "loss": 0.0134, "step": 81500 }, { "epoch": 536.25, "grad_norm": 1.4134180545806885, "learning_rate": 0.0001, "loss": 0.0161, "step": 81510 }, { "epoch": 536.3157894736842, "grad_norm": 1.3409417867660522, "learning_rate": 0.0001, "loss": 0.0143, "step": 81520 }, { "epoch": 536.3815789473684, "grad_norm": 1.5445396900177002, "learning_rate": 0.0001, "loss": 0.0153, "step": 81530 }, { "epoch": 536.4473684210526, "grad_norm": 0.9332454204559326, "learning_rate": 0.0001, "loss": 0.0169, "step": 81540 }, { "epoch": 536.5131578947369, "grad_norm": 1.3171385526657104, "learning_rate": 0.0001, "loss": 0.0163, "step": 81550 }, { "epoch": 536.578947368421, "grad_norm": 1.0414003133773804, "learning_rate": 0.0001, "loss": 0.0146, "step": 81560 }, { "epoch": 536.6447368421053, "grad_norm": 1.8713520765304565, "learning_rate": 0.0001, "loss": 0.0164, "step": 81570 }, { "epoch": 536.7105263157895, "grad_norm": 1.5036673545837402, "learning_rate": 0.0001, "loss": 0.0155, "step": 81580 }, { "epoch": 536.7763157894736, "grad_norm": 2.0524587631225586, "learning_rate": 0.0001, "loss": 0.0144, "step": 81590 }, { "epoch": 536.8421052631579, "grad_norm": 1.8785336017608643, "learning_rate": 0.0001, "loss": 0.0132, "step": 81600 }, { "epoch": 536.9078947368421, "grad_norm": 1.6223664283752441, "learning_rate": 0.0001, "loss": 0.0159, "step": 81610 }, { "epoch": 536.9736842105264, "grad_norm": 1.4918690919876099, "learning_rate": 0.0001, "loss": 0.0134, "step": 81620 }, { "epoch": 537.0394736842105, "grad_norm": 1.9063459634780884, "learning_rate": 0.0001, "loss": 0.016, "step": 81630 }, { "epoch": 537.1052631578947, "grad_norm": 2.0645580291748047, "learning_rate": 0.0001, "loss": 0.0146, "step": 81640 }, { "epoch": 537.171052631579, "grad_norm": 1.153830885887146, "learning_rate": 0.0001, "loss": 0.0113, "step": 81650 }, { "epoch": 537.2368421052631, "grad_norm": 1.5038799047470093, "learning_rate": 0.0001, "loss": 0.0115, "step": 81660 }, { "epoch": 537.3026315789474, "grad_norm": 1.4954423904418945, "learning_rate": 0.0001, "loss": 0.0151, "step": 81670 }, { "epoch": 537.3684210526316, "grad_norm": 1.8136004209518433, "learning_rate": 0.0001, "loss": 0.0143, "step": 81680 }, { "epoch": 537.4342105263158, "grad_norm": 1.018322229385376, "learning_rate": 0.0001, "loss": 0.0137, "step": 81690 }, { "epoch": 537.5, "grad_norm": 1.3032034635543823, "learning_rate": 0.0001, "loss": 0.0142, "step": 81700 }, { "epoch": 537.5657894736842, "grad_norm": 1.5259714126586914, "learning_rate": 0.0001, "loss": 0.0164, "step": 81710 }, { "epoch": 537.6315789473684, "grad_norm": 1.815312385559082, "learning_rate": 0.0001, "loss": 0.0127, "step": 81720 }, { "epoch": 537.6973684210526, "grad_norm": 1.8345699310302734, "learning_rate": 0.0001, "loss": 0.0178, "step": 81730 }, { "epoch": 537.7631578947369, "grad_norm": 2.0618021488189697, "learning_rate": 0.0001, "loss": 0.0132, "step": 81740 }, { "epoch": 537.828947368421, "grad_norm": 2.0571067333221436, "learning_rate": 0.0001, "loss": 0.0197, "step": 81750 }, { "epoch": 537.8947368421053, "grad_norm": 2.1441967487335205, "learning_rate": 0.0001, "loss": 0.0137, "step": 81760 }, { "epoch": 537.9605263157895, "grad_norm": 1.6712493896484375, "learning_rate": 0.0001, "loss": 0.0168, "step": 81770 }, { "epoch": 538.0263157894736, "grad_norm": 1.6378908157348633, "learning_rate": 0.0001, "loss": 0.0153, "step": 81780 }, { "epoch": 538.0921052631579, "grad_norm": 1.5633963346481323, "learning_rate": 0.0001, "loss": 0.0176, "step": 81790 }, { "epoch": 538.1578947368421, "grad_norm": 2.1372053623199463, "learning_rate": 0.0001, "loss": 0.0117, "step": 81800 }, { "epoch": 538.2236842105264, "grad_norm": 1.264477252960205, "learning_rate": 0.0001, "loss": 0.0126, "step": 81810 }, { "epoch": 538.2894736842105, "grad_norm": 1.479417324066162, "learning_rate": 0.0001, "loss": 0.0167, "step": 81820 }, { "epoch": 538.3552631578947, "grad_norm": 1.6010982990264893, "learning_rate": 0.0001, "loss": 0.0132, "step": 81830 }, { "epoch": 538.421052631579, "grad_norm": 1.9203084707260132, "learning_rate": 0.0001, "loss": 0.0145, "step": 81840 }, { "epoch": 538.4868421052631, "grad_norm": 1.785528540611267, "learning_rate": 0.0001, "loss": 0.0133, "step": 81850 }, { "epoch": 538.5526315789474, "grad_norm": 1.1813907623291016, "learning_rate": 0.0001, "loss": 0.0143, "step": 81860 }, { "epoch": 538.6184210526316, "grad_norm": 1.1023783683776855, "learning_rate": 0.0001, "loss": 0.0118, "step": 81870 }, { "epoch": 538.6842105263158, "grad_norm": 1.6014553308486938, "learning_rate": 0.0001, "loss": 0.0137, "step": 81880 }, { "epoch": 538.75, "grad_norm": 1.8960716724395752, "learning_rate": 0.0001, "loss": 0.0179, "step": 81890 }, { "epoch": 538.8157894736842, "grad_norm": 1.9209967851638794, "learning_rate": 0.0001, "loss": 0.0144, "step": 81900 }, { "epoch": 538.8815789473684, "grad_norm": 1.6359492540359497, "learning_rate": 0.0001, "loss": 0.0154, "step": 81910 }, { "epoch": 538.9473684210526, "grad_norm": 1.3974671363830566, "learning_rate": 0.0001, "loss": 0.0128, "step": 81920 }, { "epoch": 539.0131578947369, "grad_norm": 1.5484881401062012, "learning_rate": 0.0001, "loss": 0.02, "step": 81930 }, { "epoch": 539.078947368421, "grad_norm": 1.4891020059585571, "learning_rate": 0.0001, "loss": 0.0182, "step": 81940 }, { "epoch": 539.1447368421053, "grad_norm": 1.6829500198364258, "learning_rate": 0.0001, "loss": 0.0134, "step": 81950 }, { "epoch": 539.2105263157895, "grad_norm": 1.741212010383606, "learning_rate": 0.0001, "loss": 0.0138, "step": 81960 }, { "epoch": 539.2763157894736, "grad_norm": 1.378859043121338, "learning_rate": 0.0001, "loss": 0.0158, "step": 81970 }, { "epoch": 539.3421052631579, "grad_norm": 1.1557037830352783, "learning_rate": 0.0001, "loss": 0.0182, "step": 81980 }, { "epoch": 539.4078947368421, "grad_norm": 1.175278663635254, "learning_rate": 0.0001, "loss": 0.017, "step": 81990 }, { "epoch": 539.4736842105264, "grad_norm": 1.435336947441101, "learning_rate": 0.0001, "loss": 0.0145, "step": 82000 }, { "epoch": 539.5394736842105, "grad_norm": 1.6423745155334473, "learning_rate": 0.0001, "loss": 0.0142, "step": 82010 }, { "epoch": 539.6052631578947, "grad_norm": 1.490557074546814, "learning_rate": 0.0001, "loss": 0.0169, "step": 82020 }, { "epoch": 539.671052631579, "grad_norm": 1.5819512605667114, "learning_rate": 0.0001, "loss": 0.0139, "step": 82030 }, { "epoch": 539.7368421052631, "grad_norm": 1.608263373374939, "learning_rate": 0.0001, "loss": 0.0131, "step": 82040 }, { "epoch": 539.8026315789474, "grad_norm": 1.6127790212631226, "learning_rate": 0.0001, "loss": 0.0155, "step": 82050 }, { "epoch": 539.8684210526316, "grad_norm": 1.4573118686676025, "learning_rate": 0.0001, "loss": 0.0141, "step": 82060 }, { "epoch": 539.9342105263158, "grad_norm": 1.4426038265228271, "learning_rate": 0.0001, "loss": 0.0118, "step": 82070 }, { "epoch": 540.0, "grad_norm": 1.2891063690185547, "learning_rate": 0.0001, "loss": 0.0151, "step": 82080 }, { "epoch": 540.0657894736842, "grad_norm": 1.7050213813781738, "learning_rate": 0.0001, "loss": 0.0122, "step": 82090 }, { "epoch": 540.1315789473684, "grad_norm": 1.5654021501541138, "learning_rate": 0.0001, "loss": 0.0145, "step": 82100 }, { "epoch": 540.1973684210526, "grad_norm": 1.7297507524490356, "learning_rate": 0.0001, "loss": 0.0144, "step": 82110 }, { "epoch": 540.2631578947369, "grad_norm": 1.3498557806015015, "learning_rate": 0.0001, "loss": 0.0147, "step": 82120 }, { "epoch": 540.328947368421, "grad_norm": 1.4877068996429443, "learning_rate": 0.0001, "loss": 0.0144, "step": 82130 }, { "epoch": 540.3947368421053, "grad_norm": 1.4819025993347168, "learning_rate": 0.0001, "loss": 0.0189, "step": 82140 }, { "epoch": 540.4605263157895, "grad_norm": 1.5175414085388184, "learning_rate": 0.0001, "loss": 0.015, "step": 82150 }, { "epoch": 540.5263157894736, "grad_norm": 1.3569287061691284, "learning_rate": 0.0001, "loss": 0.0153, "step": 82160 }, { "epoch": 540.5921052631579, "grad_norm": 1.5668613910675049, "learning_rate": 0.0001, "loss": 0.015, "step": 82170 }, { "epoch": 540.6578947368421, "grad_norm": 1.0309587717056274, "learning_rate": 0.0001, "loss": 0.0126, "step": 82180 }, { "epoch": 540.7236842105264, "grad_norm": 1.817919135093689, "learning_rate": 0.0001, "loss": 0.0167, "step": 82190 }, { "epoch": 540.7894736842105, "grad_norm": 1.475248098373413, "learning_rate": 0.0001, "loss": 0.0117, "step": 82200 }, { "epoch": 540.8552631578947, "grad_norm": 1.249004602432251, "learning_rate": 0.0001, "loss": 0.0187, "step": 82210 }, { "epoch": 540.921052631579, "grad_norm": 1.388653039932251, "learning_rate": 0.0001, "loss": 0.0142, "step": 82220 }, { "epoch": 540.9868421052631, "grad_norm": 1.3401439189910889, "learning_rate": 0.0001, "loss": 0.0155, "step": 82230 }, { "epoch": 541.0526315789474, "grad_norm": 1.5937236547470093, "learning_rate": 0.0001, "loss": 0.0172, "step": 82240 }, { "epoch": 541.1184210526316, "grad_norm": 1.7072265148162842, "learning_rate": 0.0001, "loss": 0.0175, "step": 82250 }, { "epoch": 541.1842105263158, "grad_norm": 1.7640790939331055, "learning_rate": 0.0001, "loss": 0.0172, "step": 82260 }, { "epoch": 541.25, "grad_norm": 1.4502534866333008, "learning_rate": 0.0001, "loss": 0.0127, "step": 82270 }, { "epoch": 541.3157894736842, "grad_norm": 1.1114617586135864, "learning_rate": 0.0001, "loss": 0.013, "step": 82280 }, { "epoch": 541.3815789473684, "grad_norm": 1.7959061861038208, "learning_rate": 0.0001, "loss": 0.0183, "step": 82290 }, { "epoch": 541.4473684210526, "grad_norm": 1.3370335102081299, "learning_rate": 0.0001, "loss": 0.0156, "step": 82300 }, { "epoch": 541.5131578947369, "grad_norm": 1.539262056350708, "learning_rate": 0.0001, "loss": 0.0123, "step": 82310 }, { "epoch": 541.578947368421, "grad_norm": 1.3929426670074463, "learning_rate": 0.0001, "loss": 0.0143, "step": 82320 }, { "epoch": 541.6447368421053, "grad_norm": 1.3120044469833374, "learning_rate": 0.0001, "loss": 0.0134, "step": 82330 }, { "epoch": 541.7105263157895, "grad_norm": 1.234708547592163, "learning_rate": 0.0001, "loss": 0.0137, "step": 82340 }, { "epoch": 541.7763157894736, "grad_norm": 1.2250511646270752, "learning_rate": 0.0001, "loss": 0.0152, "step": 82350 }, { "epoch": 541.8421052631579, "grad_norm": 1.390665054321289, "learning_rate": 0.0001, "loss": 0.0179, "step": 82360 }, { "epoch": 541.9078947368421, "grad_norm": 1.3751157522201538, "learning_rate": 0.0001, "loss": 0.0154, "step": 82370 }, { "epoch": 541.9736842105264, "grad_norm": 1.5997108221054077, "learning_rate": 0.0001, "loss": 0.0151, "step": 82380 }, { "epoch": 542.0394736842105, "grad_norm": 1.6898236274719238, "learning_rate": 0.0001, "loss": 0.0156, "step": 82390 }, { "epoch": 542.1052631578947, "grad_norm": 1.4964953660964966, "learning_rate": 0.0001, "loss": 0.0176, "step": 82400 }, { "epoch": 542.171052631579, "grad_norm": 1.1351877450942993, "learning_rate": 0.0001, "loss": 0.0144, "step": 82410 }, { "epoch": 542.2368421052631, "grad_norm": 1.280053734779358, "learning_rate": 0.0001, "loss": 0.0143, "step": 82420 }, { "epoch": 542.3026315789474, "grad_norm": 1.1642926931381226, "learning_rate": 0.0001, "loss": 0.0134, "step": 82430 }, { "epoch": 542.3684210526316, "grad_norm": 1.2769442796707153, "learning_rate": 0.0001, "loss": 0.0176, "step": 82440 }, { "epoch": 542.4342105263158, "grad_norm": 1.4774997234344482, "learning_rate": 0.0001, "loss": 0.0149, "step": 82450 }, { "epoch": 542.5, "grad_norm": 1.3947595357894897, "learning_rate": 0.0001, "loss": 0.0192, "step": 82460 }, { "epoch": 542.5657894736842, "grad_norm": 1.6647441387176514, "learning_rate": 0.0001, "loss": 0.0153, "step": 82470 }, { "epoch": 542.6315789473684, "grad_norm": 1.8894613981246948, "learning_rate": 0.0001, "loss": 0.0134, "step": 82480 }, { "epoch": 542.6973684210526, "grad_norm": 1.8650307655334473, "learning_rate": 0.0001, "loss": 0.0131, "step": 82490 }, { "epoch": 542.7631578947369, "grad_norm": 1.385155200958252, "learning_rate": 0.0001, "loss": 0.0126, "step": 82500 }, { "epoch": 542.828947368421, "grad_norm": 1.1739743947982788, "learning_rate": 0.0001, "loss": 0.0136, "step": 82510 }, { "epoch": 542.8947368421053, "grad_norm": 1.2639235258102417, "learning_rate": 0.0001, "loss": 0.0158, "step": 82520 }, { "epoch": 542.9605263157895, "grad_norm": 1.5821739435195923, "learning_rate": 0.0001, "loss": 0.0174, "step": 82530 }, { "epoch": 543.0263157894736, "grad_norm": 1.7579395771026611, "learning_rate": 0.0001, "loss": 0.0159, "step": 82540 }, { "epoch": 543.0921052631579, "grad_norm": 1.2478959560394287, "learning_rate": 0.0001, "loss": 0.0163, "step": 82550 }, { "epoch": 543.1578947368421, "grad_norm": 1.9073776006698608, "learning_rate": 0.0001, "loss": 0.0198, "step": 82560 }, { "epoch": 543.2236842105264, "grad_norm": 1.1746896505355835, "learning_rate": 0.0001, "loss": 0.0147, "step": 82570 }, { "epoch": 543.2894736842105, "grad_norm": 1.2974933385849, "learning_rate": 0.0001, "loss": 0.0124, "step": 82580 }, { "epoch": 543.3552631578947, "grad_norm": 1.4370925426483154, "learning_rate": 0.0001, "loss": 0.0151, "step": 82590 }, { "epoch": 543.421052631579, "grad_norm": 1.459963321685791, "learning_rate": 0.0001, "loss": 0.012, "step": 82600 }, { "epoch": 543.4868421052631, "grad_norm": 1.111807942390442, "learning_rate": 0.0001, "loss": 0.0153, "step": 82610 }, { "epoch": 543.5526315789474, "grad_norm": 1.7755359411239624, "learning_rate": 0.0001, "loss": 0.019, "step": 82620 }, { "epoch": 543.6184210526316, "grad_norm": 1.8824591636657715, "learning_rate": 0.0001, "loss": 0.0176, "step": 82630 }, { "epoch": 543.6842105263158, "grad_norm": 1.2773665189743042, "learning_rate": 0.0001, "loss": 0.0153, "step": 82640 }, { "epoch": 543.75, "grad_norm": 1.3574389219284058, "learning_rate": 0.0001, "loss": 0.017, "step": 82650 }, { "epoch": 543.8157894736842, "grad_norm": 1.8751872777938843, "learning_rate": 0.0001, "loss": 0.0131, "step": 82660 }, { "epoch": 543.8815789473684, "grad_norm": 1.8009850978851318, "learning_rate": 0.0001, "loss": 0.0118, "step": 82670 }, { "epoch": 543.9473684210526, "grad_norm": 1.374057650566101, "learning_rate": 0.0001, "loss": 0.0137, "step": 82680 }, { "epoch": 544.0131578947369, "grad_norm": 1.6685047149658203, "learning_rate": 0.0001, "loss": 0.013, "step": 82690 }, { "epoch": 544.078947368421, "grad_norm": 1.3949476480484009, "learning_rate": 0.0001, "loss": 0.018, "step": 82700 }, { "epoch": 544.1447368421053, "grad_norm": 1.662072777748108, "learning_rate": 0.0001, "loss": 0.0111, "step": 82710 }, { "epoch": 544.2105263157895, "grad_norm": 1.4417328834533691, "learning_rate": 0.0001, "loss": 0.0153, "step": 82720 }, { "epoch": 544.2763157894736, "grad_norm": 1.432751178741455, "learning_rate": 0.0001, "loss": 0.0145, "step": 82730 }, { "epoch": 544.3421052631579, "grad_norm": 1.1834758520126343, "learning_rate": 0.0001, "loss": 0.0138, "step": 82740 }, { "epoch": 544.4078947368421, "grad_norm": 1.4023183584213257, "learning_rate": 0.0001, "loss": 0.0174, "step": 82750 }, { "epoch": 544.4736842105264, "grad_norm": 1.119325041770935, "learning_rate": 0.0001, "loss": 0.0143, "step": 82760 }, { "epoch": 544.5394736842105, "grad_norm": 1.5780833959579468, "learning_rate": 0.0001, "loss": 0.0122, "step": 82770 }, { "epoch": 544.6052631578947, "grad_norm": 1.8935977220535278, "learning_rate": 0.0001, "loss": 0.0138, "step": 82780 }, { "epoch": 544.671052631579, "grad_norm": 1.5657554864883423, "learning_rate": 0.0001, "loss": 0.0139, "step": 82790 }, { "epoch": 544.7368421052631, "grad_norm": 1.7608250379562378, "learning_rate": 0.0001, "loss": 0.0149, "step": 82800 }, { "epoch": 544.8026315789474, "grad_norm": 1.332313895225525, "learning_rate": 0.0001, "loss": 0.0152, "step": 82810 }, { "epoch": 544.8684210526316, "grad_norm": 1.542446494102478, "learning_rate": 0.0001, "loss": 0.0174, "step": 82820 }, { "epoch": 544.9342105263158, "grad_norm": 1.6571738719940186, "learning_rate": 0.0001, "loss": 0.0138, "step": 82830 }, { "epoch": 545.0, "grad_norm": 1.4167624711990356, "learning_rate": 0.0001, "loss": 0.0161, "step": 82840 }, { "epoch": 545.0657894736842, "grad_norm": 1.6229537725448608, "learning_rate": 0.0001, "loss": 0.0182, "step": 82850 }, { "epoch": 545.1315789473684, "grad_norm": 1.5522602796554565, "learning_rate": 0.0001, "loss": 0.013, "step": 82860 }, { "epoch": 545.1973684210526, "grad_norm": 1.6247566938400269, "learning_rate": 0.0001, "loss": 0.0132, "step": 82870 }, { "epoch": 545.2631578947369, "grad_norm": 1.1186586618423462, "learning_rate": 0.0001, "loss": 0.0149, "step": 82880 }, { "epoch": 545.328947368421, "grad_norm": 1.057180643081665, "learning_rate": 0.0001, "loss": 0.0174, "step": 82890 }, { "epoch": 545.3947368421053, "grad_norm": 1.3710753917694092, "learning_rate": 0.0001, "loss": 0.0161, "step": 82900 }, { "epoch": 545.4605263157895, "grad_norm": 1.6228405237197876, "learning_rate": 0.0001, "loss": 0.0167, "step": 82910 }, { "epoch": 545.5263157894736, "grad_norm": 1.5483043193817139, "learning_rate": 0.0001, "loss": 0.0126, "step": 82920 }, { "epoch": 545.5921052631579, "grad_norm": 1.6697028875350952, "learning_rate": 0.0001, "loss": 0.0135, "step": 82930 }, { "epoch": 545.6578947368421, "grad_norm": 1.2553569078445435, "learning_rate": 0.0001, "loss": 0.0174, "step": 82940 }, { "epoch": 545.7236842105264, "grad_norm": 1.4572420120239258, "learning_rate": 0.0001, "loss": 0.0153, "step": 82950 }, { "epoch": 545.7894736842105, "grad_norm": 1.5026021003723145, "learning_rate": 0.0001, "loss": 0.0146, "step": 82960 }, { "epoch": 545.8552631578947, "grad_norm": 1.8010183572769165, "learning_rate": 0.0001, "loss": 0.0132, "step": 82970 }, { "epoch": 545.921052631579, "grad_norm": 1.9805711507797241, "learning_rate": 0.0001, "loss": 0.0151, "step": 82980 }, { "epoch": 545.9868421052631, "grad_norm": 1.230023741722107, "learning_rate": 0.0001, "loss": 0.0153, "step": 82990 }, { "epoch": 546.0526315789474, "grad_norm": 1.636271357536316, "learning_rate": 0.0001, "loss": 0.0145, "step": 83000 }, { "epoch": 546.1184210526316, "grad_norm": 1.5474729537963867, "learning_rate": 0.0001, "loss": 0.0164, "step": 83010 }, { "epoch": 546.1842105263158, "grad_norm": 1.3044278621673584, "learning_rate": 0.0001, "loss": 0.0154, "step": 83020 }, { "epoch": 546.25, "grad_norm": 1.2585197687149048, "learning_rate": 0.0001, "loss": 0.0156, "step": 83030 }, { "epoch": 546.3157894736842, "grad_norm": 1.4269715547561646, "learning_rate": 0.0001, "loss": 0.012, "step": 83040 }, { "epoch": 546.3815789473684, "grad_norm": 1.2035720348358154, "learning_rate": 0.0001, "loss": 0.0164, "step": 83050 }, { "epoch": 546.4473684210526, "grad_norm": 1.2373571395874023, "learning_rate": 0.0001, "loss": 0.0128, "step": 83060 }, { "epoch": 546.5131578947369, "grad_norm": 1.2914094924926758, "learning_rate": 0.0001, "loss": 0.0136, "step": 83070 }, { "epoch": 546.578947368421, "grad_norm": 1.3556712865829468, "learning_rate": 0.0001, "loss": 0.014, "step": 83080 }, { "epoch": 546.6447368421053, "grad_norm": 1.197016716003418, "learning_rate": 0.0001, "loss": 0.015, "step": 83090 }, { "epoch": 546.7105263157895, "grad_norm": 1.3359124660491943, "learning_rate": 0.0001, "loss": 0.0195, "step": 83100 }, { "epoch": 546.7763157894736, "grad_norm": 1.5458754301071167, "learning_rate": 0.0001, "loss": 0.0131, "step": 83110 }, { "epoch": 546.8421052631579, "grad_norm": 1.1762892007827759, "learning_rate": 0.0001, "loss": 0.0149, "step": 83120 }, { "epoch": 546.9078947368421, "grad_norm": 1.3803610801696777, "learning_rate": 0.0001, "loss": 0.0158, "step": 83130 }, { "epoch": 546.9736842105264, "grad_norm": 1.9473928213119507, "learning_rate": 0.0001, "loss": 0.0149, "step": 83140 }, { "epoch": 547.0394736842105, "grad_norm": 1.5595457553863525, "learning_rate": 0.0001, "loss": 0.0163, "step": 83150 }, { "epoch": 547.1052631578947, "grad_norm": 1.7990833520889282, "learning_rate": 0.0001, "loss": 0.0122, "step": 83160 }, { "epoch": 547.171052631579, "grad_norm": 1.495032548904419, "learning_rate": 0.0001, "loss": 0.0147, "step": 83170 }, { "epoch": 547.2368421052631, "grad_norm": 1.213646650314331, "learning_rate": 0.0001, "loss": 0.0146, "step": 83180 }, { "epoch": 547.3026315789474, "grad_norm": 1.5901466608047485, "learning_rate": 0.0001, "loss": 0.0137, "step": 83190 }, { "epoch": 547.3684210526316, "grad_norm": 1.4984221458435059, "learning_rate": 0.0001, "loss": 0.0183, "step": 83200 }, { "epoch": 547.4342105263158, "grad_norm": 1.3827086687088013, "learning_rate": 0.0001, "loss": 0.0151, "step": 83210 }, { "epoch": 547.5, "grad_norm": 1.614188551902771, "learning_rate": 0.0001, "loss": 0.015, "step": 83220 }, { "epoch": 547.5657894736842, "grad_norm": 1.6898562908172607, "learning_rate": 0.0001, "loss": 0.0202, "step": 83230 }, { "epoch": 547.6315789473684, "grad_norm": 1.5861438512802124, "learning_rate": 0.0001, "loss": 0.0124, "step": 83240 }, { "epoch": 547.6973684210526, "grad_norm": 1.4326103925704956, "learning_rate": 0.0001, "loss": 0.0182, "step": 83250 }, { "epoch": 547.7631578947369, "grad_norm": 0.9979020357131958, "learning_rate": 0.0001, "loss": 0.0156, "step": 83260 }, { "epoch": 547.828947368421, "grad_norm": 1.2166898250579834, "learning_rate": 0.0001, "loss": 0.0142, "step": 83270 }, { "epoch": 547.8947368421053, "grad_norm": 1.3587838411331177, "learning_rate": 0.0001, "loss": 0.0117, "step": 83280 }, { "epoch": 547.9605263157895, "grad_norm": 1.6448793411254883, "learning_rate": 0.0001, "loss": 0.0135, "step": 83290 }, { "epoch": 548.0263157894736, "grad_norm": 1.5012280941009521, "learning_rate": 0.0001, "loss": 0.0175, "step": 83300 }, { "epoch": 548.0921052631579, "grad_norm": 1.4841604232788086, "learning_rate": 0.0001, "loss": 0.0137, "step": 83310 }, { "epoch": 548.1578947368421, "grad_norm": 1.3853431940078735, "learning_rate": 0.0001, "loss": 0.0154, "step": 83320 }, { "epoch": 548.2236842105264, "grad_norm": 1.3606687784194946, "learning_rate": 0.0001, "loss": 0.0114, "step": 83330 }, { "epoch": 548.2894736842105, "grad_norm": 1.8763455152511597, "learning_rate": 0.0001, "loss": 0.0166, "step": 83340 }, { "epoch": 548.3552631578947, "grad_norm": 1.826149821281433, "learning_rate": 0.0001, "loss": 0.0165, "step": 83350 }, { "epoch": 548.421052631579, "grad_norm": 1.073411464691162, "learning_rate": 0.0001, "loss": 0.0111, "step": 83360 }, { "epoch": 548.4868421052631, "grad_norm": 1.9140489101409912, "learning_rate": 0.0001, "loss": 0.0182, "step": 83370 }, { "epoch": 548.5526315789474, "grad_norm": 1.3885191679000854, "learning_rate": 0.0001, "loss": 0.0134, "step": 83380 }, { "epoch": 548.6184210526316, "grad_norm": 1.6723103523254395, "learning_rate": 0.0001, "loss": 0.0121, "step": 83390 }, { "epoch": 548.6842105263158, "grad_norm": 1.1862767934799194, "learning_rate": 0.0001, "loss": 0.0147, "step": 83400 }, { "epoch": 548.75, "grad_norm": 1.513465166091919, "learning_rate": 0.0001, "loss": 0.0156, "step": 83410 }, { "epoch": 548.8157894736842, "grad_norm": 1.5787501335144043, "learning_rate": 0.0001, "loss": 0.0135, "step": 83420 }, { "epoch": 548.8815789473684, "grad_norm": 1.2925306558609009, "learning_rate": 0.0001, "loss": 0.0151, "step": 83430 }, { "epoch": 548.9473684210526, "grad_norm": 1.6140016317367554, "learning_rate": 0.0001, "loss": 0.016, "step": 83440 }, { "epoch": 549.0131578947369, "grad_norm": 1.5698037147521973, "learning_rate": 0.0001, "loss": 0.0141, "step": 83450 }, { "epoch": 549.078947368421, "grad_norm": 2.0104212760925293, "learning_rate": 0.0001, "loss": 0.0127, "step": 83460 }, { "epoch": 549.1447368421053, "grad_norm": 1.6998322010040283, "learning_rate": 0.0001, "loss": 0.0136, "step": 83470 }, { "epoch": 549.2105263157895, "grad_norm": 1.495953917503357, "learning_rate": 0.0001, "loss": 0.013, "step": 83480 }, { "epoch": 549.2763157894736, "grad_norm": 1.7215218544006348, "learning_rate": 0.0001, "loss": 0.0158, "step": 83490 }, { "epoch": 549.3421052631579, "grad_norm": 1.4227533340454102, "learning_rate": 0.0001, "loss": 0.0174, "step": 83500 }, { "epoch": 549.4078947368421, "grad_norm": 1.6959538459777832, "learning_rate": 0.0001, "loss": 0.0178, "step": 83510 }, { "epoch": 549.4736842105264, "grad_norm": 1.5525250434875488, "learning_rate": 0.0001, "loss": 0.0133, "step": 83520 }, { "epoch": 549.5394736842105, "grad_norm": 1.9274924993515015, "learning_rate": 0.0001, "loss": 0.0131, "step": 83530 }, { "epoch": 549.6052631578947, "grad_norm": 1.8298594951629639, "learning_rate": 0.0001, "loss": 0.0134, "step": 83540 }, { "epoch": 549.671052631579, "grad_norm": 1.2811952829360962, "learning_rate": 0.0001, "loss": 0.0153, "step": 83550 }, { "epoch": 549.7368421052631, "grad_norm": 1.1526199579238892, "learning_rate": 0.0001, "loss": 0.0139, "step": 83560 }, { "epoch": 549.8026315789474, "grad_norm": 1.5475744009017944, "learning_rate": 0.0001, "loss": 0.0136, "step": 83570 }, { "epoch": 549.8684210526316, "grad_norm": 1.4253531694412231, "learning_rate": 0.0001, "loss": 0.0154, "step": 83580 }, { "epoch": 549.9342105263158, "grad_norm": 1.672846794128418, "learning_rate": 0.0001, "loss": 0.0169, "step": 83590 }, { "epoch": 550.0, "grad_norm": 1.5016850233078003, "learning_rate": 0.0001, "loss": 0.0163, "step": 83600 }, { "epoch": 550.0657894736842, "grad_norm": 1.7257745265960693, "learning_rate": 0.0001, "loss": 0.0136, "step": 83610 }, { "epoch": 550.1315789473684, "grad_norm": 1.3684920072555542, "learning_rate": 0.0001, "loss": 0.0151, "step": 83620 }, { "epoch": 550.1973684210526, "grad_norm": 1.5804156064987183, "learning_rate": 0.0001, "loss": 0.0143, "step": 83630 }, { "epoch": 550.2631578947369, "grad_norm": 1.898728370666504, "learning_rate": 0.0001, "loss": 0.0181, "step": 83640 }, { "epoch": 550.328947368421, "grad_norm": 1.4493920803070068, "learning_rate": 0.0001, "loss": 0.0149, "step": 83650 }, { "epoch": 550.3947368421053, "grad_norm": 1.4574203491210938, "learning_rate": 0.0001, "loss": 0.0115, "step": 83660 }, { "epoch": 550.4605263157895, "grad_norm": 1.3992433547973633, "learning_rate": 0.0001, "loss": 0.0174, "step": 83670 }, { "epoch": 550.5263157894736, "grad_norm": 1.3574750423431396, "learning_rate": 0.0001, "loss": 0.0156, "step": 83680 }, { "epoch": 550.5921052631579, "grad_norm": 1.7822781801223755, "learning_rate": 0.0001, "loss": 0.0154, "step": 83690 }, { "epoch": 550.6578947368421, "grad_norm": 1.7219433784484863, "learning_rate": 0.0001, "loss": 0.0147, "step": 83700 }, { "epoch": 550.7236842105264, "grad_norm": 1.3753505945205688, "learning_rate": 0.0001, "loss": 0.0173, "step": 83710 }, { "epoch": 550.7894736842105, "grad_norm": 1.4566619396209717, "learning_rate": 0.0001, "loss": 0.0136, "step": 83720 }, { "epoch": 550.8552631578947, "grad_norm": 1.1299071311950684, "learning_rate": 0.0001, "loss": 0.0132, "step": 83730 }, { "epoch": 550.921052631579, "grad_norm": 1.547942876815796, "learning_rate": 0.0001, "loss": 0.0134, "step": 83740 }, { "epoch": 550.9868421052631, "grad_norm": 1.906644582748413, "learning_rate": 0.0001, "loss": 0.0125, "step": 83750 }, { "epoch": 551.0526315789474, "grad_norm": 2.091578722000122, "learning_rate": 0.0001, "loss": 0.0184, "step": 83760 }, { "epoch": 551.1184210526316, "grad_norm": 1.8491286039352417, "learning_rate": 0.0001, "loss": 0.0147, "step": 83770 }, { "epoch": 551.1842105263158, "grad_norm": 1.6487220525741577, "learning_rate": 0.0001, "loss": 0.0129, "step": 83780 }, { "epoch": 551.25, "grad_norm": 1.6457829475402832, "learning_rate": 0.0001, "loss": 0.0156, "step": 83790 }, { "epoch": 551.3157894736842, "grad_norm": 1.4630769491195679, "learning_rate": 0.0001, "loss": 0.0147, "step": 83800 }, { "epoch": 551.3815789473684, "grad_norm": 1.53042471408844, "learning_rate": 0.0001, "loss": 0.0129, "step": 83810 }, { "epoch": 551.4473684210526, "grad_norm": 1.9436345100402832, "learning_rate": 0.0001, "loss": 0.0155, "step": 83820 }, { "epoch": 551.5131578947369, "grad_norm": 1.3784300088882446, "learning_rate": 0.0001, "loss": 0.0142, "step": 83830 }, { "epoch": 551.578947368421, "grad_norm": 1.4010249376296997, "learning_rate": 0.0001, "loss": 0.0181, "step": 83840 }, { "epoch": 551.6447368421053, "grad_norm": 1.504724144935608, "learning_rate": 0.0001, "loss": 0.0122, "step": 83850 }, { "epoch": 551.7105263157895, "grad_norm": 1.9052183628082275, "learning_rate": 0.0001, "loss": 0.0176, "step": 83860 }, { "epoch": 551.7763157894736, "grad_norm": 1.8086398839950562, "learning_rate": 0.0001, "loss": 0.0114, "step": 83870 }, { "epoch": 551.8421052631579, "grad_norm": 1.59208345413208, "learning_rate": 0.0001, "loss": 0.0163, "step": 83880 }, { "epoch": 551.9078947368421, "grad_norm": 1.5027062892913818, "learning_rate": 0.0001, "loss": 0.0145, "step": 83890 }, { "epoch": 551.9736842105264, "grad_norm": 1.6946215629577637, "learning_rate": 0.0001, "loss": 0.0157, "step": 83900 }, { "epoch": 552.0394736842105, "grad_norm": 1.3571676015853882, "learning_rate": 0.0001, "loss": 0.0122, "step": 83910 }, { "epoch": 552.1052631578947, "grad_norm": 1.4489296674728394, "learning_rate": 0.0001, "loss": 0.0157, "step": 83920 }, { "epoch": 552.171052631579, "grad_norm": 1.3860976696014404, "learning_rate": 0.0001, "loss": 0.0146, "step": 83930 }, { "epoch": 552.2368421052631, "grad_norm": 1.3254799842834473, "learning_rate": 0.0001, "loss": 0.0131, "step": 83940 }, { "epoch": 552.3026315789474, "grad_norm": 2.294823408126831, "learning_rate": 0.0001, "loss": 0.0129, "step": 83950 }, { "epoch": 552.3684210526316, "grad_norm": 2.1880805492401123, "learning_rate": 0.0001, "loss": 0.0138, "step": 83960 }, { "epoch": 552.4342105263158, "grad_norm": 1.6297900676727295, "learning_rate": 0.0001, "loss": 0.0205, "step": 83970 }, { "epoch": 552.5, "grad_norm": 1.2224512100219727, "learning_rate": 0.0001, "loss": 0.0118, "step": 83980 }, { "epoch": 552.5657894736842, "grad_norm": 1.6131137609481812, "learning_rate": 0.0001, "loss": 0.0135, "step": 83990 }, { "epoch": 552.6315789473684, "grad_norm": 1.3168327808380127, "learning_rate": 0.0001, "loss": 0.0143, "step": 84000 }, { "epoch": 552.6973684210526, "grad_norm": 1.0803016424179077, "learning_rate": 0.0001, "loss": 0.0147, "step": 84010 }, { "epoch": 552.7631578947369, "grad_norm": 1.6696184873580933, "learning_rate": 0.0001, "loss": 0.0183, "step": 84020 }, { "epoch": 552.828947368421, "grad_norm": 1.552722692489624, "learning_rate": 0.0001, "loss": 0.0176, "step": 84030 }, { "epoch": 552.8947368421053, "grad_norm": 1.682380199432373, "learning_rate": 0.0001, "loss": 0.0166, "step": 84040 }, { "epoch": 552.9605263157895, "grad_norm": 1.875828504562378, "learning_rate": 0.0001, "loss": 0.0137, "step": 84050 }, { "epoch": 553.0263157894736, "grad_norm": 1.2623361349105835, "learning_rate": 0.0001, "loss": 0.0133, "step": 84060 }, { "epoch": 553.0921052631579, "grad_norm": 1.6714956760406494, "learning_rate": 0.0001, "loss": 0.0178, "step": 84070 }, { "epoch": 553.1578947368421, "grad_norm": 1.094237208366394, "learning_rate": 0.0001, "loss": 0.0111, "step": 84080 }, { "epoch": 553.2236842105264, "grad_norm": 1.329588532447815, "learning_rate": 0.0001, "loss": 0.0165, "step": 84090 }, { "epoch": 553.2894736842105, "grad_norm": 1.6168252229690552, "learning_rate": 0.0001, "loss": 0.017, "step": 84100 }, { "epoch": 553.3552631578947, "grad_norm": 1.461397409439087, "learning_rate": 0.0001, "loss": 0.0148, "step": 84110 }, { "epoch": 553.421052631579, "grad_norm": 1.8595025539398193, "learning_rate": 0.0001, "loss": 0.0162, "step": 84120 }, { "epoch": 553.4868421052631, "grad_norm": 1.4316059350967407, "learning_rate": 0.0001, "loss": 0.0144, "step": 84130 }, { "epoch": 553.5526315789474, "grad_norm": 1.215129017829895, "learning_rate": 0.0001, "loss": 0.0117, "step": 84140 }, { "epoch": 553.6184210526316, "grad_norm": 1.5108624696731567, "learning_rate": 0.0001, "loss": 0.0121, "step": 84150 }, { "epoch": 553.6842105263158, "grad_norm": 1.377280592918396, "learning_rate": 0.0001, "loss": 0.0129, "step": 84160 }, { "epoch": 553.75, "grad_norm": 1.7312026023864746, "learning_rate": 0.0001, "loss": 0.0161, "step": 84170 }, { "epoch": 553.8157894736842, "grad_norm": 1.253363847732544, "learning_rate": 0.0001, "loss": 0.0161, "step": 84180 }, { "epoch": 553.8815789473684, "grad_norm": 1.3091174364089966, "learning_rate": 0.0001, "loss": 0.015, "step": 84190 }, { "epoch": 553.9473684210526, "grad_norm": 1.4040309190750122, "learning_rate": 0.0001, "loss": 0.0118, "step": 84200 }, { "epoch": 554.0131578947369, "grad_norm": 1.6696528196334839, "learning_rate": 0.0001, "loss": 0.016, "step": 84210 }, { "epoch": 554.078947368421, "grad_norm": 1.5168676376342773, "learning_rate": 0.0001, "loss": 0.011, "step": 84220 }, { "epoch": 554.1447368421053, "grad_norm": 1.297972559928894, "learning_rate": 0.0001, "loss": 0.0133, "step": 84230 }, { "epoch": 554.2105263157895, "grad_norm": 1.3779776096343994, "learning_rate": 0.0001, "loss": 0.015, "step": 84240 }, { "epoch": 554.2763157894736, "grad_norm": 1.5109691619873047, "learning_rate": 0.0001, "loss": 0.0123, "step": 84250 }, { "epoch": 554.3421052631579, "grad_norm": 1.9440971612930298, "learning_rate": 0.0001, "loss": 0.0139, "step": 84260 }, { "epoch": 554.4078947368421, "grad_norm": 1.569202184677124, "learning_rate": 0.0001, "loss": 0.0175, "step": 84270 }, { "epoch": 554.4736842105264, "grad_norm": 1.5523256063461304, "learning_rate": 0.0001, "loss": 0.0115, "step": 84280 }, { "epoch": 554.5394736842105, "grad_norm": 1.2167264223098755, "learning_rate": 0.0001, "loss": 0.0142, "step": 84290 }, { "epoch": 554.6052631578947, "grad_norm": 1.3256298303604126, "learning_rate": 0.0001, "loss": 0.0193, "step": 84300 }, { "epoch": 554.671052631579, "grad_norm": 1.4297287464141846, "learning_rate": 0.0001, "loss": 0.0156, "step": 84310 }, { "epoch": 554.7368421052631, "grad_norm": 1.0162357091903687, "learning_rate": 0.0001, "loss": 0.0174, "step": 84320 }, { "epoch": 554.8026315789474, "grad_norm": 1.3793514966964722, "learning_rate": 0.0001, "loss": 0.0147, "step": 84330 }, { "epoch": 554.8684210526316, "grad_norm": 1.8385928869247437, "learning_rate": 0.0001, "loss": 0.0155, "step": 84340 }, { "epoch": 554.9342105263158, "grad_norm": 1.8691816329956055, "learning_rate": 0.0001, "loss": 0.0185, "step": 84350 }, { "epoch": 555.0, "grad_norm": 1.8451818227767944, "learning_rate": 0.0001, "loss": 0.0175, "step": 84360 }, { "epoch": 555.0657894736842, "grad_norm": 1.8059436082839966, "learning_rate": 0.0001, "loss": 0.0163, "step": 84370 }, { "epoch": 555.1315789473684, "grad_norm": 1.76826012134552, "learning_rate": 0.0001, "loss": 0.0171, "step": 84380 }, { "epoch": 555.1973684210526, "grad_norm": 1.5755997896194458, "learning_rate": 0.0001, "loss": 0.0155, "step": 84390 }, { "epoch": 555.2631578947369, "grad_norm": 1.5092872381210327, "learning_rate": 0.0001, "loss": 0.0138, "step": 84400 }, { "epoch": 555.328947368421, "grad_norm": 1.98942232131958, "learning_rate": 0.0001, "loss": 0.0134, "step": 84410 }, { "epoch": 555.3947368421053, "grad_norm": 1.3360170125961304, "learning_rate": 0.0001, "loss": 0.0126, "step": 84420 }, { "epoch": 555.4605263157895, "grad_norm": 1.5345121622085571, "learning_rate": 0.0001, "loss": 0.0115, "step": 84430 }, { "epoch": 555.5263157894736, "grad_norm": 1.7215805053710938, "learning_rate": 0.0001, "loss": 0.0161, "step": 84440 }, { "epoch": 555.5921052631579, "grad_norm": 2.0860469341278076, "learning_rate": 0.0001, "loss": 0.0179, "step": 84450 }, { "epoch": 555.6578947368421, "grad_norm": 1.5729376077651978, "learning_rate": 0.0001, "loss": 0.0194, "step": 84460 }, { "epoch": 555.7236842105264, "grad_norm": 1.5540016889572144, "learning_rate": 0.0001, "loss": 0.0143, "step": 84470 }, { "epoch": 555.7894736842105, "grad_norm": 1.3470932245254517, "learning_rate": 0.0001, "loss": 0.013, "step": 84480 }, { "epoch": 555.8552631578947, "grad_norm": 1.671277642250061, "learning_rate": 0.0001, "loss": 0.0114, "step": 84490 }, { "epoch": 555.921052631579, "grad_norm": 1.4621022939682007, "learning_rate": 0.0001, "loss": 0.0165, "step": 84500 }, { "epoch": 555.9868421052631, "grad_norm": 1.4224588871002197, "learning_rate": 0.0001, "loss": 0.0137, "step": 84510 }, { "epoch": 556.0526315789474, "grad_norm": 1.4487292766571045, "learning_rate": 0.0001, "loss": 0.0133, "step": 84520 }, { "epoch": 556.1184210526316, "grad_norm": 1.4672666788101196, "learning_rate": 0.0001, "loss": 0.0186, "step": 84530 }, { "epoch": 556.1842105263158, "grad_norm": 1.8151893615722656, "learning_rate": 0.0001, "loss": 0.0132, "step": 84540 }, { "epoch": 556.25, "grad_norm": 1.5680279731750488, "learning_rate": 0.0001, "loss": 0.0176, "step": 84550 }, { "epoch": 556.3157894736842, "grad_norm": 1.5875056982040405, "learning_rate": 0.0001, "loss": 0.0169, "step": 84560 }, { "epoch": 556.3815789473684, "grad_norm": 1.60444974899292, "learning_rate": 0.0001, "loss": 0.0123, "step": 84570 }, { "epoch": 556.4473684210526, "grad_norm": 1.5450639724731445, "learning_rate": 0.0001, "loss": 0.0143, "step": 84580 }, { "epoch": 556.5131578947369, "grad_norm": 1.3005048036575317, "learning_rate": 0.0001, "loss": 0.0125, "step": 84590 }, { "epoch": 556.578947368421, "grad_norm": 1.7762401103973389, "learning_rate": 0.0001, "loss": 0.0175, "step": 84600 }, { "epoch": 556.6447368421053, "grad_norm": 1.4299089908599854, "learning_rate": 0.0001, "loss": 0.0142, "step": 84610 }, { "epoch": 556.7105263157895, "grad_norm": 1.482814073562622, "learning_rate": 0.0001, "loss": 0.0151, "step": 84620 }, { "epoch": 556.7763157894736, "grad_norm": 1.9516997337341309, "learning_rate": 0.0001, "loss": 0.0145, "step": 84630 }, { "epoch": 556.8421052631579, "grad_norm": 1.7517163753509521, "learning_rate": 0.0001, "loss": 0.0144, "step": 84640 }, { "epoch": 556.9078947368421, "grad_norm": 1.831649899482727, "learning_rate": 0.0001, "loss": 0.0137, "step": 84650 }, { "epoch": 556.9736842105264, "grad_norm": 2.011685848236084, "learning_rate": 0.0001, "loss": 0.0137, "step": 84660 }, { "epoch": 557.0394736842105, "grad_norm": 1.4498176574707031, "learning_rate": 0.0001, "loss": 0.0127, "step": 84670 }, { "epoch": 557.1052631578947, "grad_norm": 1.5810576677322388, "learning_rate": 0.0001, "loss": 0.013, "step": 84680 }, { "epoch": 557.171052631579, "grad_norm": 0.9445865154266357, "learning_rate": 0.0001, "loss": 0.0118, "step": 84690 }, { "epoch": 557.2368421052631, "grad_norm": 1.4775872230529785, "learning_rate": 0.0001, "loss": 0.0163, "step": 84700 }, { "epoch": 557.3026315789474, "grad_norm": 1.5766427516937256, "learning_rate": 0.0001, "loss": 0.0151, "step": 84710 }, { "epoch": 557.3684210526316, "grad_norm": 1.331926941871643, "learning_rate": 0.0001, "loss": 0.0178, "step": 84720 }, { "epoch": 557.4342105263158, "grad_norm": 1.2441962957382202, "learning_rate": 0.0001, "loss": 0.0198, "step": 84730 }, { "epoch": 557.5, "grad_norm": 1.418134093284607, "learning_rate": 0.0001, "loss": 0.0128, "step": 84740 }, { "epoch": 557.5657894736842, "grad_norm": 0.9768937230110168, "learning_rate": 0.0001, "loss": 0.0124, "step": 84750 }, { "epoch": 557.6315789473684, "grad_norm": 1.595792293548584, "learning_rate": 0.0001, "loss": 0.0134, "step": 84760 }, { "epoch": 557.6973684210526, "grad_norm": 1.755558967590332, "learning_rate": 0.0001, "loss": 0.0151, "step": 84770 }, { "epoch": 557.7631578947369, "grad_norm": 1.8355498313903809, "learning_rate": 0.0001, "loss": 0.0177, "step": 84780 }, { "epoch": 557.828947368421, "grad_norm": 1.72767972946167, "learning_rate": 0.0001, "loss": 0.0159, "step": 84790 }, { "epoch": 557.8947368421053, "grad_norm": 1.9212244749069214, "learning_rate": 0.0001, "loss": 0.0148, "step": 84800 }, { "epoch": 557.9605263157895, "grad_norm": 1.5097993612289429, "learning_rate": 0.0001, "loss": 0.0159, "step": 84810 }, { "epoch": 558.0263157894736, "grad_norm": 1.4798463582992554, "learning_rate": 0.0001, "loss": 0.0132, "step": 84820 }, { "epoch": 558.0921052631579, "grad_norm": 1.6054027080535889, "learning_rate": 0.0001, "loss": 0.0114, "step": 84830 }, { "epoch": 558.1578947368421, "grad_norm": 1.2995930910110474, "learning_rate": 0.0001, "loss": 0.0166, "step": 84840 }, { "epoch": 558.2236842105264, "grad_norm": 1.4788295030593872, "learning_rate": 0.0001, "loss": 0.0129, "step": 84850 }, { "epoch": 558.2894736842105, "grad_norm": 1.4727380275726318, "learning_rate": 0.0001, "loss": 0.0186, "step": 84860 }, { "epoch": 558.3552631578947, "grad_norm": 1.4078514575958252, "learning_rate": 0.0001, "loss": 0.0126, "step": 84870 }, { "epoch": 558.421052631579, "grad_norm": 1.9210137128829956, "learning_rate": 0.0001, "loss": 0.0212, "step": 84880 }, { "epoch": 558.4868421052631, "grad_norm": 1.3578596115112305, "learning_rate": 0.0001, "loss": 0.0135, "step": 84890 }, { "epoch": 558.5526315789474, "grad_norm": 1.3593952655792236, "learning_rate": 0.0001, "loss": 0.0158, "step": 84900 }, { "epoch": 558.6184210526316, "grad_norm": 1.4462486505508423, "learning_rate": 0.0001, "loss": 0.0135, "step": 84910 }, { "epoch": 558.6842105263158, "grad_norm": 1.46589994430542, "learning_rate": 0.0001, "loss": 0.019, "step": 84920 }, { "epoch": 558.75, "grad_norm": 1.4066838026046753, "learning_rate": 0.0001, "loss": 0.0144, "step": 84930 }, { "epoch": 558.8157894736842, "grad_norm": 1.6743745803833008, "learning_rate": 0.0001, "loss": 0.0145, "step": 84940 }, { "epoch": 558.8815789473684, "grad_norm": 1.0696133375167847, "learning_rate": 0.0001, "loss": 0.0144, "step": 84950 }, { "epoch": 558.9473684210526, "grad_norm": 1.3440583944320679, "learning_rate": 0.0001, "loss": 0.0138, "step": 84960 }, { "epoch": 559.0131578947369, "grad_norm": 1.2600966691970825, "learning_rate": 0.0001, "loss": 0.0158, "step": 84970 }, { "epoch": 559.078947368421, "grad_norm": 1.4696316719055176, "learning_rate": 0.0001, "loss": 0.015, "step": 84980 }, { "epoch": 559.1447368421053, "grad_norm": 0.9143999814987183, "learning_rate": 0.0001, "loss": 0.0145, "step": 84990 }, { "epoch": 559.2105263157895, "grad_norm": 0.7415622472763062, "learning_rate": 0.0001, "loss": 0.0149, "step": 85000 }, { "epoch": 559.2763157894736, "grad_norm": 1.5793741941452026, "learning_rate": 0.0001, "loss": 0.0162, "step": 85010 }, { "epoch": 559.3421052631579, "grad_norm": 1.3296786546707153, "learning_rate": 0.0001, "loss": 0.0174, "step": 85020 }, { "epoch": 559.4078947368421, "grad_norm": 1.5502090454101562, "learning_rate": 0.0001, "loss": 0.0171, "step": 85030 }, { "epoch": 559.4736842105264, "grad_norm": 1.6531933546066284, "learning_rate": 0.0001, "loss": 0.0144, "step": 85040 }, { "epoch": 559.5394736842105, "grad_norm": 1.4943125247955322, "learning_rate": 0.0001, "loss": 0.0153, "step": 85050 }, { "epoch": 559.6052631578947, "grad_norm": 1.095595359802246, "learning_rate": 0.0001, "loss": 0.013, "step": 85060 }, { "epoch": 559.671052631579, "grad_norm": 1.572675108909607, "learning_rate": 0.0001, "loss": 0.0115, "step": 85070 }, { "epoch": 559.7368421052631, "grad_norm": 1.3948439359664917, "learning_rate": 0.0001, "loss": 0.0119, "step": 85080 }, { "epoch": 559.8026315789474, "grad_norm": 1.5179723501205444, "learning_rate": 0.0001, "loss": 0.0162, "step": 85090 }, { "epoch": 559.8684210526316, "grad_norm": 1.9711090326309204, "learning_rate": 0.0001, "loss": 0.0168, "step": 85100 }, { "epoch": 559.9342105263158, "grad_norm": 1.4045884609222412, "learning_rate": 0.0001, "loss": 0.0137, "step": 85110 }, { "epoch": 560.0, "grad_norm": 1.3347855806350708, "learning_rate": 0.0001, "loss": 0.0154, "step": 85120 }, { "epoch": 560.0657894736842, "grad_norm": 1.270739197731018, "learning_rate": 0.0001, "loss": 0.0196, "step": 85130 }, { "epoch": 560.1315789473684, "grad_norm": 1.7627331018447876, "learning_rate": 0.0001, "loss": 0.0153, "step": 85140 }, { "epoch": 560.1973684210526, "grad_norm": 1.6346405744552612, "learning_rate": 0.0001, "loss": 0.0136, "step": 85150 }, { "epoch": 560.2631578947369, "grad_norm": 1.361761450767517, "learning_rate": 0.0001, "loss": 0.0136, "step": 85160 }, { "epoch": 560.328947368421, "grad_norm": 1.4640510082244873, "learning_rate": 0.0001, "loss": 0.0153, "step": 85170 }, { "epoch": 560.3947368421053, "grad_norm": 1.7056646347045898, "learning_rate": 0.0001, "loss": 0.0162, "step": 85180 }, { "epoch": 560.4605263157895, "grad_norm": 1.0188897848129272, "learning_rate": 0.0001, "loss": 0.016, "step": 85190 }, { "epoch": 560.5263157894736, "grad_norm": 1.4161872863769531, "learning_rate": 0.0001, "loss": 0.011, "step": 85200 }, { "epoch": 560.5921052631579, "grad_norm": 1.4543670415878296, "learning_rate": 0.0001, "loss": 0.0188, "step": 85210 }, { "epoch": 560.6578947368421, "grad_norm": 1.4289796352386475, "learning_rate": 0.0001, "loss": 0.0153, "step": 85220 }, { "epoch": 560.7236842105264, "grad_norm": 1.6936848163604736, "learning_rate": 0.0001, "loss": 0.0148, "step": 85230 }, { "epoch": 560.7894736842105, "grad_norm": 1.5952025651931763, "learning_rate": 0.0001, "loss": 0.014, "step": 85240 }, { "epoch": 560.8552631578947, "grad_norm": 2.0158698558807373, "learning_rate": 0.0001, "loss": 0.0129, "step": 85250 }, { "epoch": 560.921052631579, "grad_norm": 1.7156411409378052, "learning_rate": 0.0001, "loss": 0.0139, "step": 85260 }, { "epoch": 560.9868421052631, "grad_norm": 1.1190816164016724, "learning_rate": 0.0001, "loss": 0.0118, "step": 85270 }, { "epoch": 561.0526315789474, "grad_norm": 1.625895619392395, "learning_rate": 0.0001, "loss": 0.0123, "step": 85280 }, { "epoch": 561.1184210526316, "grad_norm": 1.277498483657837, "learning_rate": 0.0001, "loss": 0.0147, "step": 85290 }, { "epoch": 561.1842105263158, "grad_norm": 1.4464055299758911, "learning_rate": 0.0001, "loss": 0.0152, "step": 85300 }, { "epoch": 561.25, "grad_norm": 1.4115123748779297, "learning_rate": 0.0001, "loss": 0.0177, "step": 85310 }, { "epoch": 561.3157894736842, "grad_norm": 1.299361228942871, "learning_rate": 0.0001, "loss": 0.0154, "step": 85320 }, { "epoch": 561.3815789473684, "grad_norm": 1.9691554307937622, "learning_rate": 0.0001, "loss": 0.0197, "step": 85330 }, { "epoch": 561.4473684210526, "grad_norm": 1.5327922105789185, "learning_rate": 0.0001, "loss": 0.0162, "step": 85340 }, { "epoch": 561.5131578947369, "grad_norm": 1.7858856916427612, "learning_rate": 0.0001, "loss": 0.0113, "step": 85350 }, { "epoch": 561.578947368421, "grad_norm": 1.9425185918807983, "learning_rate": 0.0001, "loss": 0.0143, "step": 85360 }, { "epoch": 561.6447368421053, "grad_norm": 1.498075008392334, "learning_rate": 0.0001, "loss": 0.0127, "step": 85370 }, { "epoch": 561.7105263157895, "grad_norm": 1.2820146083831787, "learning_rate": 0.0001, "loss": 0.0148, "step": 85380 }, { "epoch": 561.7763157894736, "grad_norm": 1.1977096796035767, "learning_rate": 0.0001, "loss": 0.0123, "step": 85390 }, { "epoch": 561.8421052631579, "grad_norm": 1.1685240268707275, "learning_rate": 0.0001, "loss": 0.0166, "step": 85400 }, { "epoch": 561.9078947368421, "grad_norm": 1.8336867094039917, "learning_rate": 0.0001, "loss": 0.016, "step": 85410 }, { "epoch": 561.9736842105264, "grad_norm": 2.2812516689300537, "learning_rate": 0.0001, "loss": 0.0122, "step": 85420 }, { "epoch": 562.0394736842105, "grad_norm": 2.127413749694824, "learning_rate": 0.0001, "loss": 0.0146, "step": 85430 }, { "epoch": 562.1052631578947, "grad_norm": 1.8199725151062012, "learning_rate": 0.0001, "loss": 0.0126, "step": 85440 }, { "epoch": 562.171052631579, "grad_norm": 1.3830945491790771, "learning_rate": 0.0001, "loss": 0.0165, "step": 85450 }, { "epoch": 562.2368421052631, "grad_norm": 1.5521953105926514, "learning_rate": 0.0001, "loss": 0.0147, "step": 85460 }, { "epoch": 562.3026315789474, "grad_norm": 1.6338026523590088, "learning_rate": 0.0001, "loss": 0.0122, "step": 85470 }, { "epoch": 562.3684210526316, "grad_norm": 1.6342169046401978, "learning_rate": 0.0001, "loss": 0.0158, "step": 85480 }, { "epoch": 562.4342105263158, "grad_norm": 1.2276031970977783, "learning_rate": 0.0001, "loss": 0.0185, "step": 85490 }, { "epoch": 562.5, "grad_norm": 1.6296532154083252, "learning_rate": 0.0001, "loss": 0.0168, "step": 85500 }, { "epoch": 562.5657894736842, "grad_norm": 1.585095763206482, "learning_rate": 0.0001, "loss": 0.0153, "step": 85510 }, { "epoch": 562.6315789473684, "grad_norm": 1.6188313961029053, "learning_rate": 0.0001, "loss": 0.0127, "step": 85520 }, { "epoch": 562.6973684210526, "grad_norm": 1.6124937534332275, "learning_rate": 0.0001, "loss": 0.0137, "step": 85530 }, { "epoch": 562.7631578947369, "grad_norm": 1.5898551940917969, "learning_rate": 0.0001, "loss": 0.0153, "step": 85540 }, { "epoch": 562.828947368421, "grad_norm": 1.6842424869537354, "learning_rate": 0.0001, "loss": 0.0145, "step": 85550 }, { "epoch": 562.8947368421053, "grad_norm": 1.7317845821380615, "learning_rate": 0.0001, "loss": 0.0149, "step": 85560 }, { "epoch": 562.9605263157895, "grad_norm": 1.2847421169281006, "learning_rate": 0.0001, "loss": 0.0129, "step": 85570 }, { "epoch": 563.0263157894736, "grad_norm": 1.5976451635360718, "learning_rate": 0.0001, "loss": 0.0157, "step": 85580 }, { "epoch": 563.0921052631579, "grad_norm": 1.4178425073623657, "learning_rate": 0.0001, "loss": 0.0156, "step": 85590 }, { "epoch": 563.1578947368421, "grad_norm": 1.295615315437317, "learning_rate": 0.0001, "loss": 0.0136, "step": 85600 }, { "epoch": 563.2236842105264, "grad_norm": 1.5699670314788818, "learning_rate": 0.0001, "loss": 0.0153, "step": 85610 }, { "epoch": 563.2894736842105, "grad_norm": 1.473158836364746, "learning_rate": 0.0001, "loss": 0.0139, "step": 85620 }, { "epoch": 563.3552631578947, "grad_norm": 1.714137077331543, "learning_rate": 0.0001, "loss": 0.0154, "step": 85630 }, { "epoch": 563.421052631579, "grad_norm": 1.443620204925537, "learning_rate": 0.0001, "loss": 0.0121, "step": 85640 }, { "epoch": 563.4868421052631, "grad_norm": 1.1236541271209717, "learning_rate": 0.0001, "loss": 0.0186, "step": 85650 }, { "epoch": 563.5526315789474, "grad_norm": 1.3824838399887085, "learning_rate": 0.0001, "loss": 0.0179, "step": 85660 }, { "epoch": 563.6184210526316, "grad_norm": 1.3121225833892822, "learning_rate": 0.0001, "loss": 0.0135, "step": 85670 }, { "epoch": 563.6842105263158, "grad_norm": 1.5337930917739868, "learning_rate": 0.0001, "loss": 0.0123, "step": 85680 }, { "epoch": 563.75, "grad_norm": 1.4727555513381958, "learning_rate": 0.0001, "loss": 0.017, "step": 85690 }, { "epoch": 563.8157894736842, "grad_norm": 2.1116268634796143, "learning_rate": 0.0001, "loss": 0.0142, "step": 85700 }, { "epoch": 563.8815789473684, "grad_norm": 1.6304728984832764, "learning_rate": 0.0001, "loss": 0.0176, "step": 85710 }, { "epoch": 563.9473684210526, "grad_norm": 1.3637564182281494, "learning_rate": 0.0001, "loss": 0.0128, "step": 85720 }, { "epoch": 564.0131578947369, "grad_norm": 1.4969972372055054, "learning_rate": 0.0001, "loss": 0.0129, "step": 85730 }, { "epoch": 564.078947368421, "grad_norm": 1.1857138872146606, "learning_rate": 0.0001, "loss": 0.0127, "step": 85740 }, { "epoch": 564.1447368421053, "grad_norm": 1.385517954826355, "learning_rate": 0.0001, "loss": 0.014, "step": 85750 }, { "epoch": 564.2105263157895, "grad_norm": 1.5348467826843262, "learning_rate": 0.0001, "loss": 0.0143, "step": 85760 }, { "epoch": 564.2763157894736, "grad_norm": 1.522399663925171, "learning_rate": 0.0001, "loss": 0.0139, "step": 85770 }, { "epoch": 564.3421052631579, "grad_norm": 1.4502613544464111, "learning_rate": 0.0001, "loss": 0.0176, "step": 85780 }, { "epoch": 564.4078947368421, "grad_norm": 1.6325780153274536, "learning_rate": 0.0001, "loss": 0.0147, "step": 85790 }, { "epoch": 564.4736842105264, "grad_norm": 1.3188508749008179, "learning_rate": 0.0001, "loss": 0.0139, "step": 85800 }, { "epoch": 564.5394736842105, "grad_norm": 1.268407940864563, "learning_rate": 0.0001, "loss": 0.0164, "step": 85810 }, { "epoch": 564.6052631578947, "grad_norm": 1.3513436317443848, "learning_rate": 0.0001, "loss": 0.0143, "step": 85820 }, { "epoch": 564.671052631579, "grad_norm": 1.900767207145691, "learning_rate": 0.0001, "loss": 0.014, "step": 85830 }, { "epoch": 564.7368421052631, "grad_norm": 1.540355920791626, "learning_rate": 0.0001, "loss": 0.0178, "step": 85840 }, { "epoch": 564.8026315789474, "grad_norm": 1.5693838596343994, "learning_rate": 0.0001, "loss": 0.0123, "step": 85850 }, { "epoch": 564.8684210526316, "grad_norm": 1.7318428754806519, "learning_rate": 0.0001, "loss": 0.0186, "step": 85860 }, { "epoch": 564.9342105263158, "grad_norm": 1.842075228691101, "learning_rate": 0.0001, "loss": 0.0125, "step": 85870 }, { "epoch": 565.0, "grad_norm": 1.2516850233078003, "learning_rate": 0.0001, "loss": 0.0169, "step": 85880 }, { "epoch": 565.0657894736842, "grad_norm": 1.043332815170288, "learning_rate": 0.0001, "loss": 0.0138, "step": 85890 }, { "epoch": 565.1315789473684, "grad_norm": 1.1843609809875488, "learning_rate": 0.0001, "loss": 0.0149, "step": 85900 }, { "epoch": 565.1973684210526, "grad_norm": 1.3165593147277832, "learning_rate": 0.0001, "loss": 0.0148, "step": 85910 }, { "epoch": 565.2631578947369, "grad_norm": 1.2591749429702759, "learning_rate": 0.0001, "loss": 0.0147, "step": 85920 }, { "epoch": 565.328947368421, "grad_norm": 1.2328333854675293, "learning_rate": 0.0001, "loss": 0.0181, "step": 85930 }, { "epoch": 565.3947368421053, "grad_norm": 1.5068926811218262, "learning_rate": 0.0001, "loss": 0.0142, "step": 85940 }, { "epoch": 565.4605263157895, "grad_norm": 1.200635313987732, "learning_rate": 0.0001, "loss": 0.0165, "step": 85950 }, { "epoch": 565.5263157894736, "grad_norm": 1.252205491065979, "learning_rate": 0.0001, "loss": 0.0152, "step": 85960 }, { "epoch": 565.5921052631579, "grad_norm": 1.4719053506851196, "learning_rate": 0.0001, "loss": 0.014, "step": 85970 }, { "epoch": 565.6578947368421, "grad_norm": 1.5021042823791504, "learning_rate": 0.0001, "loss": 0.0128, "step": 85980 }, { "epoch": 565.7236842105264, "grad_norm": 1.357844591140747, "learning_rate": 0.0001, "loss": 0.0116, "step": 85990 }, { "epoch": 565.7894736842105, "grad_norm": 1.7366654872894287, "learning_rate": 0.0001, "loss": 0.0147, "step": 86000 }, { "epoch": 565.8552631578947, "grad_norm": 1.5699540376663208, "learning_rate": 0.0001, "loss": 0.0162, "step": 86010 }, { "epoch": 565.921052631579, "grad_norm": 1.4972573518753052, "learning_rate": 0.0001, "loss": 0.0172, "step": 86020 }, { "epoch": 565.9868421052631, "grad_norm": 1.3250941038131714, "learning_rate": 0.0001, "loss": 0.0163, "step": 86030 }, { "epoch": 566.0526315789474, "grad_norm": 1.2476089000701904, "learning_rate": 0.0001, "loss": 0.0127, "step": 86040 }, { "epoch": 566.1184210526316, "grad_norm": 1.1431740522384644, "learning_rate": 0.0001, "loss": 0.014, "step": 86050 }, { "epoch": 566.1842105263158, "grad_norm": 1.6035784482955933, "learning_rate": 0.0001, "loss": 0.0135, "step": 86060 }, { "epoch": 566.25, "grad_norm": 1.2695852518081665, "learning_rate": 0.0001, "loss": 0.0137, "step": 86070 }, { "epoch": 566.3157894736842, "grad_norm": 1.6045368909835815, "learning_rate": 0.0001, "loss": 0.013, "step": 86080 }, { "epoch": 566.3815789473684, "grad_norm": 1.4930212497711182, "learning_rate": 0.0001, "loss": 0.0167, "step": 86090 }, { "epoch": 566.4473684210526, "grad_norm": 1.582694172859192, "learning_rate": 0.0001, "loss": 0.0164, "step": 86100 }, { "epoch": 566.5131578947369, "grad_norm": 1.0641850233078003, "learning_rate": 0.0001, "loss": 0.0136, "step": 86110 }, { "epoch": 566.578947368421, "grad_norm": 1.4700920581817627, "learning_rate": 0.0001, "loss": 0.0191, "step": 86120 }, { "epoch": 566.6447368421053, "grad_norm": 1.2261745929718018, "learning_rate": 0.0001, "loss": 0.0155, "step": 86130 }, { "epoch": 566.7105263157895, "grad_norm": 1.3735620975494385, "learning_rate": 0.0001, "loss": 0.0129, "step": 86140 }, { "epoch": 566.7763157894736, "grad_norm": 1.3327710628509521, "learning_rate": 0.0001, "loss": 0.0117, "step": 86150 }, { "epoch": 566.8421052631579, "grad_norm": 1.2571219205856323, "learning_rate": 0.0001, "loss": 0.0188, "step": 86160 }, { "epoch": 566.9078947368421, "grad_norm": 1.249358057975769, "learning_rate": 0.0001, "loss": 0.0165, "step": 86170 }, { "epoch": 566.9736842105264, "grad_norm": 1.244036316871643, "learning_rate": 0.0001, "loss": 0.0146, "step": 86180 }, { "epoch": 567.0394736842105, "grad_norm": 1.7571674585342407, "learning_rate": 0.0001, "loss": 0.012, "step": 86190 }, { "epoch": 567.1052631578947, "grad_norm": 1.5118809938430786, "learning_rate": 0.0001, "loss": 0.0157, "step": 86200 }, { "epoch": 567.171052631579, "grad_norm": 1.837617039680481, "learning_rate": 0.0001, "loss": 0.019, "step": 86210 }, { "epoch": 567.2368421052631, "grad_norm": 1.0813583135604858, "learning_rate": 0.0001, "loss": 0.0151, "step": 86220 }, { "epoch": 567.3026315789474, "grad_norm": 2.2787675857543945, "learning_rate": 0.0001, "loss": 0.0129, "step": 86230 }, { "epoch": 567.3684210526316, "grad_norm": 1.7668850421905518, "learning_rate": 0.0001, "loss": 0.0113, "step": 86240 }, { "epoch": 567.4342105263158, "grad_norm": 1.4125908613204956, "learning_rate": 0.0001, "loss": 0.0151, "step": 86250 }, { "epoch": 567.5, "grad_norm": 1.1958041191101074, "learning_rate": 0.0001, "loss": 0.0114, "step": 86260 }, { "epoch": 567.5657894736842, "grad_norm": 1.4706217050552368, "learning_rate": 0.0001, "loss": 0.0151, "step": 86270 }, { "epoch": 567.6315789473684, "grad_norm": 1.7044920921325684, "learning_rate": 0.0001, "loss": 0.0181, "step": 86280 }, { "epoch": 567.6973684210526, "grad_norm": 1.3734842538833618, "learning_rate": 0.0001, "loss": 0.013, "step": 86290 }, { "epoch": 567.7631578947369, "grad_norm": 1.1944985389709473, "learning_rate": 0.0001, "loss": 0.0168, "step": 86300 }, { "epoch": 567.828947368421, "grad_norm": 1.7512550354003906, "learning_rate": 0.0001, "loss": 0.0156, "step": 86310 }, { "epoch": 567.8947368421053, "grad_norm": 1.3902010917663574, "learning_rate": 0.0001, "loss": 0.0147, "step": 86320 }, { "epoch": 567.9605263157895, "grad_norm": 1.3653416633605957, "learning_rate": 0.0001, "loss": 0.0161, "step": 86330 }, { "epoch": 568.0263157894736, "grad_norm": 1.6189249753952026, "learning_rate": 0.0001, "loss": 0.0141, "step": 86340 }, { "epoch": 568.0921052631579, "grad_norm": 1.8560415506362915, "learning_rate": 0.0001, "loss": 0.0146, "step": 86350 }, { "epoch": 568.1578947368421, "grad_norm": 1.9289429187774658, "learning_rate": 0.0001, "loss": 0.015, "step": 86360 }, { "epoch": 568.2236842105264, "grad_norm": 2.109386444091797, "learning_rate": 0.0001, "loss": 0.0146, "step": 86370 }, { "epoch": 568.2894736842105, "grad_norm": 1.7920029163360596, "learning_rate": 0.0001, "loss": 0.0122, "step": 86380 }, { "epoch": 568.3552631578947, "grad_norm": 1.5712206363677979, "learning_rate": 0.0001, "loss": 0.0132, "step": 86390 }, { "epoch": 568.421052631579, "grad_norm": 1.5181488990783691, "learning_rate": 0.0001, "loss": 0.0141, "step": 86400 }, { "epoch": 568.4868421052631, "grad_norm": 1.2398194074630737, "learning_rate": 0.0001, "loss": 0.0149, "step": 86410 }, { "epoch": 568.5526315789474, "grad_norm": 1.4778939485549927, "learning_rate": 0.0001, "loss": 0.0187, "step": 86420 }, { "epoch": 568.6184210526316, "grad_norm": 1.8581976890563965, "learning_rate": 0.0001, "loss": 0.0133, "step": 86430 }, { "epoch": 568.6842105263158, "grad_norm": 1.3468512296676636, "learning_rate": 0.0001, "loss": 0.0118, "step": 86440 }, { "epoch": 568.75, "grad_norm": 1.64116370677948, "learning_rate": 0.0001, "loss": 0.0122, "step": 86450 }, { "epoch": 568.8157894736842, "grad_norm": 1.6647287607192993, "learning_rate": 0.0001, "loss": 0.0183, "step": 86460 }, { "epoch": 568.8815789473684, "grad_norm": 1.223968744277954, "learning_rate": 0.0001, "loss": 0.0194, "step": 86470 }, { "epoch": 568.9473684210526, "grad_norm": 1.5685454607009888, "learning_rate": 0.0001, "loss": 0.0145, "step": 86480 }, { "epoch": 569.0131578947369, "grad_norm": 1.631350040435791, "learning_rate": 0.0001, "loss": 0.0124, "step": 86490 }, { "epoch": 569.078947368421, "grad_norm": 1.5004124641418457, "learning_rate": 0.0001, "loss": 0.0132, "step": 86500 }, { "epoch": 569.1447368421053, "grad_norm": 1.5797256231307983, "learning_rate": 0.0001, "loss": 0.0149, "step": 86510 }, { "epoch": 569.2105263157895, "grad_norm": 1.355843424797058, "learning_rate": 0.0001, "loss": 0.014, "step": 86520 }, { "epoch": 569.2763157894736, "grad_norm": 1.327889323234558, "learning_rate": 0.0001, "loss": 0.0149, "step": 86530 }, { "epoch": 569.3421052631579, "grad_norm": 1.2165501117706299, "learning_rate": 0.0001, "loss": 0.0149, "step": 86540 }, { "epoch": 569.4078947368421, "grad_norm": 1.1318728923797607, "learning_rate": 0.0001, "loss": 0.0167, "step": 86550 }, { "epoch": 569.4736842105264, "grad_norm": 1.8622710704803467, "learning_rate": 0.0001, "loss": 0.0194, "step": 86560 }, { "epoch": 569.5394736842105, "grad_norm": 1.509390950202942, "learning_rate": 0.0001, "loss": 0.0191, "step": 86570 }, { "epoch": 569.6052631578947, "grad_norm": 1.7645913362503052, "learning_rate": 0.0001, "loss": 0.0136, "step": 86580 }, { "epoch": 569.671052631579, "grad_norm": 1.5354851484298706, "learning_rate": 0.0001, "loss": 0.0135, "step": 86590 }, { "epoch": 569.7368421052631, "grad_norm": 1.6485265493392944, "learning_rate": 0.0001, "loss": 0.0113, "step": 86600 }, { "epoch": 569.8026315789474, "grad_norm": 1.5010101795196533, "learning_rate": 0.0001, "loss": 0.0159, "step": 86610 }, { "epoch": 569.8684210526316, "grad_norm": 1.1346133947372437, "learning_rate": 0.0001, "loss": 0.0134, "step": 86620 }, { "epoch": 569.9342105263158, "grad_norm": 1.4242507219314575, "learning_rate": 0.0001, "loss": 0.0147, "step": 86630 }, { "epoch": 570.0, "grad_norm": 1.2631951570510864, "learning_rate": 0.0001, "loss": 0.0143, "step": 86640 }, { "epoch": 570.0657894736842, "grad_norm": 1.5311230421066284, "learning_rate": 0.0001, "loss": 0.0115, "step": 86650 }, { "epoch": 570.1315789473684, "grad_norm": 1.4352283477783203, "learning_rate": 0.0001, "loss": 0.0153, "step": 86660 }, { "epoch": 570.1973684210526, "grad_norm": 1.4969233274459839, "learning_rate": 0.0001, "loss": 0.0132, "step": 86670 }, { "epoch": 570.2631578947369, "grad_norm": 1.5302940607070923, "learning_rate": 0.0001, "loss": 0.0124, "step": 86680 }, { "epoch": 570.328947368421, "grad_norm": 1.5749859809875488, "learning_rate": 0.0001, "loss": 0.0184, "step": 86690 }, { "epoch": 570.3947368421053, "grad_norm": 1.7194427251815796, "learning_rate": 0.0001, "loss": 0.017, "step": 86700 }, { "epoch": 570.4605263157895, "grad_norm": 1.1243551969528198, "learning_rate": 0.0001, "loss": 0.0144, "step": 86710 }, { "epoch": 570.5263157894736, "grad_norm": 1.1702775955200195, "learning_rate": 0.0001, "loss": 0.0175, "step": 86720 }, { "epoch": 570.5921052631579, "grad_norm": 1.9406770467758179, "learning_rate": 0.0001, "loss": 0.0124, "step": 86730 }, { "epoch": 570.6578947368421, "grad_norm": 1.3663856983184814, "learning_rate": 0.0001, "loss": 0.0161, "step": 86740 }, { "epoch": 570.7236842105264, "grad_norm": 0.9773868918418884, "learning_rate": 0.0001, "loss": 0.0167, "step": 86750 }, { "epoch": 570.7894736842105, "grad_norm": 1.0084121227264404, "learning_rate": 0.0001, "loss": 0.0129, "step": 86760 }, { "epoch": 570.8552631578947, "grad_norm": 1.3967101573944092, "learning_rate": 0.0001, "loss": 0.0165, "step": 86770 }, { "epoch": 570.921052631579, "grad_norm": 1.691788673400879, "learning_rate": 0.0001, "loss": 0.0121, "step": 86780 }, { "epoch": 570.9868421052631, "grad_norm": 1.2086161375045776, "learning_rate": 0.0001, "loss": 0.017, "step": 86790 }, { "epoch": 571.0526315789474, "grad_norm": 1.6112903356552124, "learning_rate": 0.0001, "loss": 0.017, "step": 86800 }, { "epoch": 571.1184210526316, "grad_norm": 1.6443878412246704, "learning_rate": 0.0001, "loss": 0.0135, "step": 86810 }, { "epoch": 571.1842105263158, "grad_norm": 1.6934781074523926, "learning_rate": 0.0001, "loss": 0.0136, "step": 86820 }, { "epoch": 571.25, "grad_norm": 1.5611627101898193, "learning_rate": 0.0001, "loss": 0.0201, "step": 86830 }, { "epoch": 571.3157894736842, "grad_norm": 1.5109394788742065, "learning_rate": 0.0001, "loss": 0.0166, "step": 86840 }, { "epoch": 571.3815789473684, "grad_norm": 1.6566964387893677, "learning_rate": 0.0001, "loss": 0.0128, "step": 86850 }, { "epoch": 571.4473684210526, "grad_norm": 1.4867939949035645, "learning_rate": 0.0001, "loss": 0.0123, "step": 86860 }, { "epoch": 571.5131578947369, "grad_norm": 1.4024735689163208, "learning_rate": 0.0001, "loss": 0.0167, "step": 86870 }, { "epoch": 571.578947368421, "grad_norm": 1.647398591041565, "learning_rate": 0.0001, "loss": 0.0144, "step": 86880 }, { "epoch": 571.6447368421053, "grad_norm": 1.2771854400634766, "learning_rate": 0.0001, "loss": 0.0141, "step": 86890 }, { "epoch": 571.7105263157895, "grad_norm": 1.6968144178390503, "learning_rate": 0.0001, "loss": 0.0143, "step": 86900 }, { "epoch": 571.7763157894736, "grad_norm": 1.1842554807662964, "learning_rate": 0.0001, "loss": 0.0158, "step": 86910 }, { "epoch": 571.8421052631579, "grad_norm": 1.4667508602142334, "learning_rate": 0.0001, "loss": 0.0148, "step": 86920 }, { "epoch": 571.9078947368421, "grad_norm": 1.1372261047363281, "learning_rate": 0.0001, "loss": 0.0114, "step": 86930 }, { "epoch": 571.9736842105264, "grad_norm": 1.5105564594268799, "learning_rate": 0.0001, "loss": 0.0107, "step": 86940 }, { "epoch": 572.0394736842105, "grad_norm": 1.6603349447250366, "learning_rate": 0.0001, "loss": 0.0193, "step": 86950 }, { "epoch": 572.1052631578947, "grad_norm": 1.6338516473770142, "learning_rate": 0.0001, "loss": 0.0121, "step": 86960 }, { "epoch": 572.171052631579, "grad_norm": 1.4620648622512817, "learning_rate": 0.0001, "loss": 0.0136, "step": 86970 }, { "epoch": 572.2368421052631, "grad_norm": 1.3898859024047852, "learning_rate": 0.0001, "loss": 0.0147, "step": 86980 }, { "epoch": 572.3026315789474, "grad_norm": 1.441612958908081, "learning_rate": 0.0001, "loss": 0.0174, "step": 86990 }, { "epoch": 572.3684210526316, "grad_norm": 1.873490571975708, "learning_rate": 0.0001, "loss": 0.0118, "step": 87000 }, { "epoch": 572.4342105263158, "grad_norm": 1.9745879173278809, "learning_rate": 0.0001, "loss": 0.0151, "step": 87010 }, { "epoch": 572.5, "grad_norm": 1.6518950462341309, "learning_rate": 0.0001, "loss": 0.0146, "step": 87020 }, { "epoch": 572.5657894736842, "grad_norm": 1.7415915727615356, "learning_rate": 0.0001, "loss": 0.0119, "step": 87030 }, { "epoch": 572.6315789473684, "grad_norm": 1.6201395988464355, "learning_rate": 0.0001, "loss": 0.0145, "step": 87040 }, { "epoch": 572.6973684210526, "grad_norm": 1.8694651126861572, "learning_rate": 0.0001, "loss": 0.0173, "step": 87050 }, { "epoch": 572.7631578947369, "grad_norm": 1.3491582870483398, "learning_rate": 0.0001, "loss": 0.0104, "step": 87060 }, { "epoch": 572.828947368421, "grad_norm": 1.4487825632095337, "learning_rate": 0.0001, "loss": 0.0166, "step": 87070 }, { "epoch": 572.8947368421053, "grad_norm": 1.561672568321228, "learning_rate": 0.0001, "loss": 0.0147, "step": 87080 }, { "epoch": 572.9605263157895, "grad_norm": 1.9067714214324951, "learning_rate": 0.0001, "loss": 0.0148, "step": 87090 }, { "epoch": 573.0263157894736, "grad_norm": 1.8776054382324219, "learning_rate": 0.0001, "loss": 0.0161, "step": 87100 }, { "epoch": 573.0921052631579, "grad_norm": 1.442747950553894, "learning_rate": 0.0001, "loss": 0.0141, "step": 87110 }, { "epoch": 573.1578947368421, "grad_norm": 1.4753555059432983, "learning_rate": 0.0001, "loss": 0.0111, "step": 87120 }, { "epoch": 573.2236842105264, "grad_norm": 1.4989293813705444, "learning_rate": 0.0001, "loss": 0.0133, "step": 87130 }, { "epoch": 573.2894736842105, "grad_norm": 1.2877835035324097, "learning_rate": 0.0001, "loss": 0.0157, "step": 87140 }, { "epoch": 573.3552631578947, "grad_norm": 1.415516972541809, "learning_rate": 0.0001, "loss": 0.0116, "step": 87150 }, { "epoch": 573.421052631579, "grad_norm": 1.1959604024887085, "learning_rate": 0.0001, "loss": 0.0121, "step": 87160 }, { "epoch": 573.4868421052631, "grad_norm": 1.1960251331329346, "learning_rate": 0.0001, "loss": 0.012, "step": 87170 }, { "epoch": 573.5526315789474, "grad_norm": 1.4185941219329834, "learning_rate": 0.0001, "loss": 0.0212, "step": 87180 }, { "epoch": 573.6184210526316, "grad_norm": 1.4025217294692993, "learning_rate": 0.0001, "loss": 0.0165, "step": 87190 }, { "epoch": 573.6842105263158, "grad_norm": 1.3483737707138062, "learning_rate": 0.0001, "loss": 0.0138, "step": 87200 }, { "epoch": 573.75, "grad_norm": 1.619146704673767, "learning_rate": 0.0001, "loss": 0.0163, "step": 87210 }, { "epoch": 573.8157894736842, "grad_norm": 1.834143042564392, "learning_rate": 0.0001, "loss": 0.0172, "step": 87220 }, { "epoch": 573.8815789473684, "grad_norm": 1.1943591833114624, "learning_rate": 0.0001, "loss": 0.0148, "step": 87230 }, { "epoch": 573.9473684210526, "grad_norm": 1.4302765130996704, "learning_rate": 0.0001, "loss": 0.016, "step": 87240 }, { "epoch": 574.0131578947369, "grad_norm": 1.6002215147018433, "learning_rate": 0.0001, "loss": 0.0153, "step": 87250 }, { "epoch": 574.078947368421, "grad_norm": 1.294883131980896, "learning_rate": 0.0001, "loss": 0.0157, "step": 87260 }, { "epoch": 574.1447368421053, "grad_norm": 1.297235369682312, "learning_rate": 0.0001, "loss": 0.0121, "step": 87270 }, { "epoch": 574.2105263157895, "grad_norm": 1.6582493782043457, "learning_rate": 0.0001, "loss": 0.0151, "step": 87280 }, { "epoch": 574.2763157894736, "grad_norm": 1.6350191831588745, "learning_rate": 0.0001, "loss": 0.0179, "step": 87290 }, { "epoch": 574.3421052631579, "grad_norm": 1.1994398832321167, "learning_rate": 0.0001, "loss": 0.0128, "step": 87300 }, { "epoch": 574.4078947368421, "grad_norm": 1.2149055004119873, "learning_rate": 0.0001, "loss": 0.0166, "step": 87310 }, { "epoch": 574.4736842105264, "grad_norm": 1.19181489944458, "learning_rate": 0.0001, "loss": 0.0168, "step": 87320 }, { "epoch": 574.5394736842105, "grad_norm": 1.6896535158157349, "learning_rate": 0.0001, "loss": 0.0145, "step": 87330 }, { "epoch": 574.6052631578947, "grad_norm": 1.7736496925354004, "learning_rate": 0.0001, "loss": 0.0172, "step": 87340 }, { "epoch": 574.671052631579, "grad_norm": 1.6299793720245361, "learning_rate": 0.0001, "loss": 0.0139, "step": 87350 }, { "epoch": 574.7368421052631, "grad_norm": 1.8294655084609985, "learning_rate": 0.0001, "loss": 0.0131, "step": 87360 }, { "epoch": 574.8026315789474, "grad_norm": 1.413948655128479, "learning_rate": 0.0001, "loss": 0.0124, "step": 87370 }, { "epoch": 574.8684210526316, "grad_norm": 1.4278432130813599, "learning_rate": 0.0001, "loss": 0.0163, "step": 87380 }, { "epoch": 574.9342105263158, "grad_norm": 1.4603970050811768, "learning_rate": 0.0001, "loss": 0.0173, "step": 87390 }, { "epoch": 575.0, "grad_norm": 1.1538769006729126, "learning_rate": 0.0001, "loss": 0.0113, "step": 87400 }, { "epoch": 575.0657894736842, "grad_norm": 1.9579126834869385, "learning_rate": 0.0001, "loss": 0.0106, "step": 87410 }, { "epoch": 575.1315789473684, "grad_norm": 1.5503981113433838, "learning_rate": 0.0001, "loss": 0.0152, "step": 87420 }, { "epoch": 575.1973684210526, "grad_norm": 1.962584376335144, "learning_rate": 0.0001, "loss": 0.0161, "step": 87430 }, { "epoch": 575.2631578947369, "grad_norm": 1.699212908744812, "learning_rate": 0.0001, "loss": 0.0147, "step": 87440 }, { "epoch": 575.328947368421, "grad_norm": 1.5999000072479248, "learning_rate": 0.0001, "loss": 0.0158, "step": 87450 }, { "epoch": 575.3947368421053, "grad_norm": 1.7322505712509155, "learning_rate": 0.0001, "loss": 0.0145, "step": 87460 }, { "epoch": 575.4605263157895, "grad_norm": 1.517648458480835, "learning_rate": 0.0001, "loss": 0.0138, "step": 87470 }, { "epoch": 575.5263157894736, "grad_norm": 1.686344027519226, "learning_rate": 0.0001, "loss": 0.0185, "step": 87480 }, { "epoch": 575.5921052631579, "grad_norm": 1.2106194496154785, "learning_rate": 0.0001, "loss": 0.0126, "step": 87490 }, { "epoch": 575.6578947368421, "grad_norm": 1.8237855434417725, "learning_rate": 0.0001, "loss": 0.0118, "step": 87500 }, { "epoch": 575.7236842105264, "grad_norm": 1.3441084623336792, "learning_rate": 0.0001, "loss": 0.0111, "step": 87510 }, { "epoch": 575.7894736842105, "grad_norm": 1.7665501832962036, "learning_rate": 0.0001, "loss": 0.0152, "step": 87520 }, { "epoch": 575.8552631578947, "grad_norm": 1.8284032344818115, "learning_rate": 0.0001, "loss": 0.0135, "step": 87530 }, { "epoch": 575.921052631579, "grad_norm": 1.9049657583236694, "learning_rate": 0.0001, "loss": 0.0172, "step": 87540 }, { "epoch": 575.9868421052631, "grad_norm": 1.8580753803253174, "learning_rate": 0.0001, "loss": 0.014, "step": 87550 }, { "epoch": 576.0526315789474, "grad_norm": 1.3230680227279663, "learning_rate": 0.0001, "loss": 0.0142, "step": 87560 }, { "epoch": 576.1184210526316, "grad_norm": 1.649842619895935, "learning_rate": 0.0001, "loss": 0.0147, "step": 87570 }, { "epoch": 576.1842105263158, "grad_norm": 1.766444206237793, "learning_rate": 0.0001, "loss": 0.0132, "step": 87580 }, { "epoch": 576.25, "grad_norm": 1.7784048318862915, "learning_rate": 0.0001, "loss": 0.0161, "step": 87590 }, { "epoch": 576.3157894736842, "grad_norm": 1.754856824874878, "learning_rate": 0.0001, "loss": 0.0172, "step": 87600 }, { "epoch": 576.3815789473684, "grad_norm": 1.5187896490097046, "learning_rate": 0.0001, "loss": 0.0148, "step": 87610 }, { "epoch": 576.4473684210526, "grad_norm": 1.6247272491455078, "learning_rate": 0.0001, "loss": 0.0133, "step": 87620 }, { "epoch": 576.5131578947369, "grad_norm": 1.2423484325408936, "learning_rate": 0.0001, "loss": 0.0134, "step": 87630 }, { "epoch": 576.578947368421, "grad_norm": 1.3603765964508057, "learning_rate": 0.0001, "loss": 0.0193, "step": 87640 }, { "epoch": 576.6447368421053, "grad_norm": 2.2211389541625977, "learning_rate": 0.0001, "loss": 0.0136, "step": 87650 }, { "epoch": 576.7105263157895, "grad_norm": 1.4014244079589844, "learning_rate": 0.0001, "loss": 0.0125, "step": 87660 }, { "epoch": 576.7763157894736, "grad_norm": 2.0304644107818604, "learning_rate": 0.0001, "loss": 0.0138, "step": 87670 }, { "epoch": 576.8421052631579, "grad_norm": 1.6968389749526978, "learning_rate": 0.0001, "loss": 0.0153, "step": 87680 }, { "epoch": 576.9078947368421, "grad_norm": 1.400782823562622, "learning_rate": 0.0001, "loss": 0.0131, "step": 87690 }, { "epoch": 576.9736842105264, "grad_norm": 1.566798210144043, "learning_rate": 0.0001, "loss": 0.0141, "step": 87700 }, { "epoch": 577.0394736842105, "grad_norm": 1.6360336542129517, "learning_rate": 0.0001, "loss": 0.0139, "step": 87710 }, { "epoch": 577.1052631578947, "grad_norm": 1.4671573638916016, "learning_rate": 0.0001, "loss": 0.0178, "step": 87720 }, { "epoch": 577.171052631579, "grad_norm": 1.1514326333999634, "learning_rate": 0.0001, "loss": 0.0144, "step": 87730 }, { "epoch": 577.2368421052631, "grad_norm": 1.4572126865386963, "learning_rate": 0.0001, "loss": 0.0139, "step": 87740 }, { "epoch": 577.3026315789474, "grad_norm": 1.7702685594558716, "learning_rate": 0.0001, "loss": 0.0143, "step": 87750 }, { "epoch": 577.3684210526316, "grad_norm": 1.5295169353485107, "learning_rate": 0.0001, "loss": 0.0121, "step": 87760 }, { "epoch": 577.4342105263158, "grad_norm": 1.7197767496109009, "learning_rate": 0.0001, "loss": 0.0175, "step": 87770 }, { "epoch": 577.5, "grad_norm": 0.9767845869064331, "learning_rate": 0.0001, "loss": 0.0118, "step": 87780 }, { "epoch": 577.5657894736842, "grad_norm": 1.920710563659668, "learning_rate": 0.0001, "loss": 0.011, "step": 87790 }, { "epoch": 577.6315789473684, "grad_norm": 1.6457922458648682, "learning_rate": 0.0001, "loss": 0.0133, "step": 87800 }, { "epoch": 577.6973684210526, "grad_norm": 1.29263436794281, "learning_rate": 0.0001, "loss": 0.0136, "step": 87810 }, { "epoch": 577.7631578947369, "grad_norm": 1.5949400663375854, "learning_rate": 0.0001, "loss": 0.0198, "step": 87820 }, { "epoch": 577.828947368421, "grad_norm": 1.826181411743164, "learning_rate": 0.0001, "loss": 0.0136, "step": 87830 }, { "epoch": 577.8947368421053, "grad_norm": 1.4681410789489746, "learning_rate": 0.0001, "loss": 0.0186, "step": 87840 }, { "epoch": 577.9605263157895, "grad_norm": 1.5737792253494263, "learning_rate": 0.0001, "loss": 0.0138, "step": 87850 }, { "epoch": 578.0263157894736, "grad_norm": 1.5414100885391235, "learning_rate": 0.0001, "loss": 0.014, "step": 87860 }, { "epoch": 578.0921052631579, "grad_norm": 1.1122286319732666, "learning_rate": 0.0001, "loss": 0.0116, "step": 87870 }, { "epoch": 578.1578947368421, "grad_norm": 1.1134926080703735, "learning_rate": 0.0001, "loss": 0.0132, "step": 87880 }, { "epoch": 578.2236842105264, "grad_norm": 1.4852521419525146, "learning_rate": 0.0001, "loss": 0.0207, "step": 87890 }, { "epoch": 578.2894736842105, "grad_norm": 1.8866889476776123, "learning_rate": 0.0001, "loss": 0.0147, "step": 87900 }, { "epoch": 578.3552631578947, "grad_norm": 1.4450727701187134, "learning_rate": 0.0001, "loss": 0.0156, "step": 87910 }, { "epoch": 578.421052631579, "grad_norm": 1.3344097137451172, "learning_rate": 0.0001, "loss": 0.0145, "step": 87920 }, { "epoch": 578.4868421052631, "grad_norm": 1.5885945558547974, "learning_rate": 0.0001, "loss": 0.0118, "step": 87930 }, { "epoch": 578.5526315789474, "grad_norm": 1.401248574256897, "learning_rate": 0.0001, "loss": 0.0156, "step": 87940 }, { "epoch": 578.6184210526316, "grad_norm": 1.5212807655334473, "learning_rate": 0.0001, "loss": 0.0162, "step": 87950 }, { "epoch": 578.6842105263158, "grad_norm": 1.0099167823791504, "learning_rate": 0.0001, "loss": 0.0117, "step": 87960 }, { "epoch": 578.75, "grad_norm": 1.469174861907959, "learning_rate": 0.0001, "loss": 0.0139, "step": 87970 }, { "epoch": 578.8157894736842, "grad_norm": 2.086904525756836, "learning_rate": 0.0001, "loss": 0.0151, "step": 87980 }, { "epoch": 578.8815789473684, "grad_norm": 1.3882114887237549, "learning_rate": 0.0001, "loss": 0.0159, "step": 87990 }, { "epoch": 578.9473684210526, "grad_norm": 1.519317388534546, "learning_rate": 0.0001, "loss": 0.0153, "step": 88000 }, { "epoch": 579.0131578947369, "grad_norm": 2.0692524909973145, "learning_rate": 0.0001, "loss": 0.0146, "step": 88010 }, { "epoch": 579.078947368421, "grad_norm": 1.718111515045166, "learning_rate": 0.0001, "loss": 0.0138, "step": 88020 }, { "epoch": 579.1447368421053, "grad_norm": 1.5769202709197998, "learning_rate": 0.0001, "loss": 0.0175, "step": 88030 }, { "epoch": 579.2105263157895, "grad_norm": 1.5846788883209229, "learning_rate": 0.0001, "loss": 0.0146, "step": 88040 }, { "epoch": 579.2763157894736, "grad_norm": 1.5134646892547607, "learning_rate": 0.0001, "loss": 0.0119, "step": 88050 }, { "epoch": 579.3421052631579, "grad_norm": 1.3397741317749023, "learning_rate": 0.0001, "loss": 0.0164, "step": 88060 }, { "epoch": 579.4078947368421, "grad_norm": 1.228058099746704, "learning_rate": 0.0001, "loss": 0.0158, "step": 88070 }, { "epoch": 579.4736842105264, "grad_norm": 1.0171515941619873, "learning_rate": 0.0001, "loss": 0.016, "step": 88080 }, { "epoch": 579.5394736842105, "grad_norm": 1.4678679704666138, "learning_rate": 0.0001, "loss": 0.0151, "step": 88090 }, { "epoch": 579.6052631578947, "grad_norm": 1.5713248252868652, "learning_rate": 0.0001, "loss": 0.0141, "step": 88100 }, { "epoch": 579.671052631579, "grad_norm": 1.5237946510314941, "learning_rate": 0.0001, "loss": 0.0142, "step": 88110 }, { "epoch": 579.7368421052631, "grad_norm": 1.4443926811218262, "learning_rate": 0.0001, "loss": 0.0144, "step": 88120 }, { "epoch": 579.8026315789474, "grad_norm": 1.5550788640975952, "learning_rate": 0.0001, "loss": 0.0173, "step": 88130 }, { "epoch": 579.8684210526316, "grad_norm": 1.6643893718719482, "learning_rate": 0.0001, "loss": 0.0124, "step": 88140 }, { "epoch": 579.9342105263158, "grad_norm": 1.6924406290054321, "learning_rate": 0.0001, "loss": 0.0115, "step": 88150 }, { "epoch": 580.0, "grad_norm": 1.5448200702667236, "learning_rate": 0.0001, "loss": 0.0132, "step": 88160 }, { "epoch": 580.0657894736842, "grad_norm": 1.1896681785583496, "learning_rate": 0.0001, "loss": 0.0147, "step": 88170 }, { "epoch": 580.1315789473684, "grad_norm": 1.4523502588272095, "learning_rate": 0.0001, "loss": 0.0135, "step": 88180 }, { "epoch": 580.1973684210526, "grad_norm": 1.731911540031433, "learning_rate": 0.0001, "loss": 0.0138, "step": 88190 }, { "epoch": 580.2631578947369, "grad_norm": 1.6073687076568604, "learning_rate": 0.0001, "loss": 0.0116, "step": 88200 }, { "epoch": 580.328947368421, "grad_norm": 1.9188308715820312, "learning_rate": 0.0001, "loss": 0.0137, "step": 88210 }, { "epoch": 580.3947368421053, "grad_norm": 1.726137399673462, "learning_rate": 0.0001, "loss": 0.0157, "step": 88220 }, { "epoch": 580.4605263157895, "grad_norm": 2.051661968231201, "learning_rate": 0.0001, "loss": 0.0171, "step": 88230 }, { "epoch": 580.5263157894736, "grad_norm": 1.9131361246109009, "learning_rate": 0.0001, "loss": 0.0136, "step": 88240 }, { "epoch": 580.5921052631579, "grad_norm": 1.3211764097213745, "learning_rate": 0.0001, "loss": 0.0129, "step": 88250 }, { "epoch": 580.6578947368421, "grad_norm": 1.8062870502471924, "learning_rate": 0.0001, "loss": 0.0149, "step": 88260 }, { "epoch": 580.7236842105264, "grad_norm": 1.9391127824783325, "learning_rate": 0.0001, "loss": 0.0204, "step": 88270 }, { "epoch": 580.7894736842105, "grad_norm": 1.7653053998947144, "learning_rate": 0.0001, "loss": 0.0152, "step": 88280 }, { "epoch": 580.8552631578947, "grad_norm": 1.939575433731079, "learning_rate": 0.0001, "loss": 0.0154, "step": 88290 }, { "epoch": 580.921052631579, "grad_norm": 2.1394565105438232, "learning_rate": 0.0001, "loss": 0.0141, "step": 88300 }, { "epoch": 580.9868421052631, "grad_norm": 1.5637688636779785, "learning_rate": 0.0001, "loss": 0.0124, "step": 88310 }, { "epoch": 581.0526315789474, "grad_norm": 1.6443886756896973, "learning_rate": 0.0001, "loss": 0.0167, "step": 88320 }, { "epoch": 581.1184210526316, "grad_norm": 1.7359801530838013, "learning_rate": 0.0001, "loss": 0.0137, "step": 88330 }, { "epoch": 581.1842105263158, "grad_norm": 2.0428404808044434, "learning_rate": 0.0001, "loss": 0.0149, "step": 88340 }, { "epoch": 581.25, "grad_norm": 1.2302874326705933, "learning_rate": 0.0001, "loss": 0.0133, "step": 88350 }, { "epoch": 581.3157894736842, "grad_norm": 1.0474082231521606, "learning_rate": 0.0001, "loss": 0.0142, "step": 88360 }, { "epoch": 581.3815789473684, "grad_norm": 1.6005029678344727, "learning_rate": 0.0001, "loss": 0.0121, "step": 88370 }, { "epoch": 581.4473684210526, "grad_norm": 1.6207308769226074, "learning_rate": 0.0001, "loss": 0.0117, "step": 88380 }, { "epoch": 581.5131578947369, "grad_norm": 1.8664768934249878, "learning_rate": 0.0001, "loss": 0.0158, "step": 88390 }, { "epoch": 581.578947368421, "grad_norm": 1.534019112586975, "learning_rate": 0.0001, "loss": 0.0131, "step": 88400 }, { "epoch": 581.6447368421053, "grad_norm": 1.7899595499038696, "learning_rate": 0.0001, "loss": 0.0165, "step": 88410 }, { "epoch": 581.7105263157895, "grad_norm": 1.5154780149459839, "learning_rate": 0.0001, "loss": 0.0166, "step": 88420 }, { "epoch": 581.7763157894736, "grad_norm": 1.4468926191329956, "learning_rate": 0.0001, "loss": 0.0128, "step": 88430 }, { "epoch": 581.8421052631579, "grad_norm": 1.4893330335617065, "learning_rate": 0.0001, "loss": 0.0158, "step": 88440 }, { "epoch": 581.9078947368421, "grad_norm": 1.5724375247955322, "learning_rate": 0.0001, "loss": 0.0128, "step": 88450 }, { "epoch": 581.9736842105264, "grad_norm": 1.9422985315322876, "learning_rate": 0.0001, "loss": 0.0168, "step": 88460 }, { "epoch": 582.0394736842105, "grad_norm": 1.4643545150756836, "learning_rate": 0.0001, "loss": 0.0159, "step": 88470 }, { "epoch": 582.1052631578947, "grad_norm": 1.587235689163208, "learning_rate": 0.0001, "loss": 0.0144, "step": 88480 }, { "epoch": 582.171052631579, "grad_norm": 1.4217454195022583, "learning_rate": 0.0001, "loss": 0.0135, "step": 88490 }, { "epoch": 582.2368421052631, "grad_norm": 1.874976634979248, "learning_rate": 0.0001, "loss": 0.0186, "step": 88500 }, { "epoch": 582.3026315789474, "grad_norm": 1.9949692487716675, "learning_rate": 0.0001, "loss": 0.0143, "step": 88510 }, { "epoch": 582.3684210526316, "grad_norm": 1.5725237131118774, "learning_rate": 0.0001, "loss": 0.0148, "step": 88520 }, { "epoch": 582.4342105263158, "grad_norm": 1.255191683769226, "learning_rate": 0.0001, "loss": 0.0134, "step": 88530 }, { "epoch": 582.5, "grad_norm": 1.7883614301681519, "learning_rate": 0.0001, "loss": 0.0161, "step": 88540 }, { "epoch": 582.5657894736842, "grad_norm": 1.3500796556472778, "learning_rate": 0.0001, "loss": 0.0129, "step": 88550 }, { "epoch": 582.6315789473684, "grad_norm": 1.5151628255844116, "learning_rate": 0.0001, "loss": 0.014, "step": 88560 }, { "epoch": 582.6973684210526, "grad_norm": 1.0433545112609863, "learning_rate": 0.0001, "loss": 0.011, "step": 88570 }, { "epoch": 582.7631578947369, "grad_norm": 1.608307957649231, "learning_rate": 0.0001, "loss": 0.0151, "step": 88580 }, { "epoch": 582.828947368421, "grad_norm": 1.4233063459396362, "learning_rate": 0.0001, "loss": 0.0175, "step": 88590 }, { "epoch": 582.8947368421053, "grad_norm": 1.718360185623169, "learning_rate": 0.0001, "loss": 0.0137, "step": 88600 }, { "epoch": 582.9605263157895, "grad_norm": 1.6085697412490845, "learning_rate": 0.0001, "loss": 0.0134, "step": 88610 }, { "epoch": 583.0263157894736, "grad_norm": 1.8754245042800903, "learning_rate": 0.0001, "loss": 0.0186, "step": 88620 }, { "epoch": 583.0921052631579, "grad_norm": 1.9076552391052246, "learning_rate": 0.0001, "loss": 0.0153, "step": 88630 }, { "epoch": 583.1578947368421, "grad_norm": 1.302823781967163, "learning_rate": 0.0001, "loss": 0.0141, "step": 88640 }, { "epoch": 583.2236842105264, "grad_norm": 1.1668832302093506, "learning_rate": 0.0001, "loss": 0.0166, "step": 88650 }, { "epoch": 583.2894736842105, "grad_norm": 1.6826287508010864, "learning_rate": 0.0001, "loss": 0.0157, "step": 88660 }, { "epoch": 583.3552631578947, "grad_norm": 1.4687474966049194, "learning_rate": 0.0001, "loss": 0.0142, "step": 88670 }, { "epoch": 583.421052631579, "grad_norm": 1.111264944076538, "learning_rate": 0.0001, "loss": 0.0162, "step": 88680 }, { "epoch": 583.4868421052631, "grad_norm": 1.7206848859786987, "learning_rate": 0.0001, "loss": 0.0135, "step": 88690 }, { "epoch": 583.5526315789474, "grad_norm": 1.0835837125778198, "learning_rate": 0.0001, "loss": 0.0132, "step": 88700 }, { "epoch": 583.6184210526316, "grad_norm": 1.269447684288025, "learning_rate": 0.0001, "loss": 0.017, "step": 88710 }, { "epoch": 583.6842105263158, "grad_norm": 1.4816648960113525, "learning_rate": 0.0001, "loss": 0.0158, "step": 88720 }, { "epoch": 583.75, "grad_norm": 1.8864340782165527, "learning_rate": 0.0001, "loss": 0.012, "step": 88730 }, { "epoch": 583.8157894736842, "grad_norm": 1.6203691959381104, "learning_rate": 0.0001, "loss": 0.015, "step": 88740 }, { "epoch": 583.8815789473684, "grad_norm": 1.5537625551223755, "learning_rate": 0.0001, "loss": 0.0166, "step": 88750 }, { "epoch": 583.9473684210526, "grad_norm": 1.5239927768707275, "learning_rate": 0.0001, "loss": 0.0148, "step": 88760 }, { "epoch": 584.0131578947369, "grad_norm": 1.5840685367584229, "learning_rate": 0.0001, "loss": 0.0107, "step": 88770 }, { "epoch": 584.078947368421, "grad_norm": 1.595685601234436, "learning_rate": 0.0001, "loss": 0.0153, "step": 88780 }, { "epoch": 584.1447368421053, "grad_norm": 1.3008538484573364, "learning_rate": 0.0001, "loss": 0.015, "step": 88790 }, { "epoch": 584.2105263157895, "grad_norm": 1.4697030782699585, "learning_rate": 0.0001, "loss": 0.013, "step": 88800 }, { "epoch": 584.2763157894736, "grad_norm": 1.305833101272583, "learning_rate": 0.0001, "loss": 0.0125, "step": 88810 }, { "epoch": 584.3421052631579, "grad_norm": 1.5529290437698364, "learning_rate": 0.0001, "loss": 0.0171, "step": 88820 }, { "epoch": 584.4078947368421, "grad_norm": 1.0900903940200806, "learning_rate": 0.0001, "loss": 0.0114, "step": 88830 }, { "epoch": 584.4736842105264, "grad_norm": 1.6660840511322021, "learning_rate": 0.0001, "loss": 0.022, "step": 88840 }, { "epoch": 584.5394736842105, "grad_norm": 1.6749274730682373, "learning_rate": 0.0001, "loss": 0.0152, "step": 88850 }, { "epoch": 584.6052631578947, "grad_norm": 1.8593238592147827, "learning_rate": 0.0001, "loss": 0.0157, "step": 88860 }, { "epoch": 584.671052631579, "grad_norm": 1.2061132192611694, "learning_rate": 0.0001, "loss": 0.0116, "step": 88870 }, { "epoch": 584.7368421052631, "grad_norm": 1.624653697013855, "learning_rate": 0.0001, "loss": 0.0132, "step": 88880 }, { "epoch": 584.8026315789474, "grad_norm": 1.5403834581375122, "learning_rate": 0.0001, "loss": 0.0127, "step": 88890 }, { "epoch": 584.8684210526316, "grad_norm": 1.5290995836257935, "learning_rate": 0.0001, "loss": 0.0153, "step": 88900 }, { "epoch": 584.9342105263158, "grad_norm": 1.183858036994934, "learning_rate": 0.0001, "loss": 0.0147, "step": 88910 }, { "epoch": 585.0, "grad_norm": 1.4930602312088013, "learning_rate": 0.0001, "loss": 0.016, "step": 88920 }, { "epoch": 585.0657894736842, "grad_norm": 1.54842209815979, "learning_rate": 0.0001, "loss": 0.0133, "step": 88930 }, { "epoch": 585.1315789473684, "grad_norm": 1.4436925649642944, "learning_rate": 0.0001, "loss": 0.0152, "step": 88940 }, { "epoch": 585.1973684210526, "grad_norm": 1.312326431274414, "learning_rate": 0.0001, "loss": 0.0131, "step": 88950 }, { "epoch": 585.2631578947369, "grad_norm": 1.3709172010421753, "learning_rate": 0.0001, "loss": 0.0165, "step": 88960 }, { "epoch": 585.328947368421, "grad_norm": 1.165312647819519, "learning_rate": 0.0001, "loss": 0.0154, "step": 88970 }, { "epoch": 585.3947368421053, "grad_norm": 1.3528225421905518, "learning_rate": 0.0001, "loss": 0.0139, "step": 88980 }, { "epoch": 585.4605263157895, "grad_norm": 1.4751193523406982, "learning_rate": 0.0001, "loss": 0.0169, "step": 88990 }, { "epoch": 585.5263157894736, "grad_norm": 1.2613584995269775, "learning_rate": 0.0001, "loss": 0.0177, "step": 89000 }, { "epoch": 585.5921052631579, "grad_norm": 1.4436922073364258, "learning_rate": 0.0001, "loss": 0.0161, "step": 89010 }, { "epoch": 585.6578947368421, "grad_norm": 1.652085781097412, "learning_rate": 0.0001, "loss": 0.0176, "step": 89020 }, { "epoch": 585.7236842105264, "grad_norm": 1.5059902667999268, "learning_rate": 0.0001, "loss": 0.0174, "step": 89030 }, { "epoch": 585.7894736842105, "grad_norm": 1.6219805479049683, "learning_rate": 0.0001, "loss": 0.0189, "step": 89040 }, { "epoch": 585.8552631578947, "grad_norm": 1.482740044593811, "learning_rate": 0.0001, "loss": 0.0139, "step": 89050 }, { "epoch": 585.921052631579, "grad_norm": 1.7438865900039673, "learning_rate": 0.0001, "loss": 0.0158, "step": 89060 }, { "epoch": 585.9868421052631, "grad_norm": 1.5948326587677002, "learning_rate": 0.0001, "loss": 0.0207, "step": 89070 }, { "epoch": 586.0526315789474, "grad_norm": 1.4451720714569092, "learning_rate": 0.0001, "loss": 0.015, "step": 89080 }, { "epoch": 586.1184210526316, "grad_norm": 1.2451319694519043, "learning_rate": 0.0001, "loss": 0.0154, "step": 89090 }, { "epoch": 586.1842105263158, "grad_norm": 1.205500602722168, "learning_rate": 0.0001, "loss": 0.0134, "step": 89100 }, { "epoch": 586.25, "grad_norm": 0.9669931530952454, "learning_rate": 0.0001, "loss": 0.016, "step": 89110 }, { "epoch": 586.3157894736842, "grad_norm": 1.4495222568511963, "learning_rate": 0.0001, "loss": 0.0182, "step": 89120 }, { "epoch": 586.3815789473684, "grad_norm": 1.6223924160003662, "learning_rate": 0.0001, "loss": 0.0144, "step": 89130 }, { "epoch": 586.4473684210526, "grad_norm": 1.2620947360992432, "learning_rate": 0.0001, "loss": 0.0134, "step": 89140 }, { "epoch": 586.5131578947369, "grad_norm": 1.1747592687606812, "learning_rate": 0.0001, "loss": 0.0126, "step": 89150 }, { "epoch": 586.578947368421, "grad_norm": 1.4415704011917114, "learning_rate": 0.0001, "loss": 0.0179, "step": 89160 }, { "epoch": 586.6447368421053, "grad_norm": 1.5945563316345215, "learning_rate": 0.0001, "loss": 0.0167, "step": 89170 }, { "epoch": 586.7105263157895, "grad_norm": 1.742804765701294, "learning_rate": 0.0001, "loss": 0.0182, "step": 89180 }, { "epoch": 586.7763157894736, "grad_norm": 1.626235842704773, "learning_rate": 0.0001, "loss": 0.0173, "step": 89190 }, { "epoch": 586.8421052631579, "grad_norm": 1.4374204874038696, "learning_rate": 0.0001, "loss": 0.0173, "step": 89200 }, { "epoch": 586.9078947368421, "grad_norm": 1.4780364036560059, "learning_rate": 0.0001, "loss": 0.0172, "step": 89210 }, { "epoch": 586.9736842105264, "grad_norm": 1.286087155342102, "learning_rate": 0.0001, "loss": 0.0145, "step": 89220 }, { "epoch": 587.0394736842105, "grad_norm": 1.5999199151992798, "learning_rate": 0.0001, "loss": 0.014, "step": 89230 }, { "epoch": 587.1052631578947, "grad_norm": 1.3496490716934204, "learning_rate": 0.0001, "loss": 0.0127, "step": 89240 }, { "epoch": 587.171052631579, "grad_norm": 1.8326003551483154, "learning_rate": 0.0001, "loss": 0.0146, "step": 89250 }, { "epoch": 587.2368421052631, "grad_norm": 1.2437591552734375, "learning_rate": 0.0001, "loss": 0.0177, "step": 89260 }, { "epoch": 587.3026315789474, "grad_norm": 1.3369476795196533, "learning_rate": 0.0001, "loss": 0.0149, "step": 89270 }, { "epoch": 587.3684210526316, "grad_norm": 1.0863621234893799, "learning_rate": 0.0001, "loss": 0.0176, "step": 89280 }, { "epoch": 587.4342105263158, "grad_norm": 1.3258700370788574, "learning_rate": 0.0001, "loss": 0.0132, "step": 89290 }, { "epoch": 587.5, "grad_norm": 1.9592903852462769, "learning_rate": 0.0001, "loss": 0.0128, "step": 89300 }, { "epoch": 587.5657894736842, "grad_norm": 1.4493521451950073, "learning_rate": 0.0001, "loss": 0.0155, "step": 89310 }, { "epoch": 587.6315789473684, "grad_norm": 1.7030129432678223, "learning_rate": 0.0001, "loss": 0.0129, "step": 89320 }, { "epoch": 587.6973684210526, "grad_norm": 1.6875720024108887, "learning_rate": 0.0001, "loss": 0.015, "step": 89330 }, { "epoch": 587.7631578947369, "grad_norm": 1.7353432178497314, "learning_rate": 0.0001, "loss": 0.0144, "step": 89340 }, { "epoch": 587.828947368421, "grad_norm": 1.7893266677856445, "learning_rate": 0.0001, "loss": 0.0133, "step": 89350 }, { "epoch": 587.8947368421053, "grad_norm": 1.9556901454925537, "learning_rate": 0.0001, "loss": 0.0199, "step": 89360 }, { "epoch": 587.9605263157895, "grad_norm": 1.5885682106018066, "learning_rate": 0.0001, "loss": 0.0163, "step": 89370 }, { "epoch": 588.0263157894736, "grad_norm": 1.0492125749588013, "learning_rate": 0.0001, "loss": 0.0113, "step": 89380 }, { "epoch": 588.0921052631579, "grad_norm": 0.930172324180603, "learning_rate": 0.0001, "loss": 0.0148, "step": 89390 }, { "epoch": 588.1578947368421, "grad_norm": 1.6416212320327759, "learning_rate": 0.0001, "loss": 0.0149, "step": 89400 }, { "epoch": 588.2236842105264, "grad_norm": 1.617223858833313, "learning_rate": 0.0001, "loss": 0.0139, "step": 89410 }, { "epoch": 588.2894736842105, "grad_norm": 1.4955581426620483, "learning_rate": 0.0001, "loss": 0.0141, "step": 89420 }, { "epoch": 588.3552631578947, "grad_norm": 1.4126851558685303, "learning_rate": 0.0001, "loss": 0.0161, "step": 89430 }, { "epoch": 588.421052631579, "grad_norm": 1.3572373390197754, "learning_rate": 0.0001, "loss": 0.013, "step": 89440 }, { "epoch": 588.4868421052631, "grad_norm": 1.5282076597213745, "learning_rate": 0.0001, "loss": 0.0113, "step": 89450 }, { "epoch": 588.5526315789474, "grad_norm": 1.2431023120880127, "learning_rate": 0.0001, "loss": 0.0127, "step": 89460 }, { "epoch": 588.6184210526316, "grad_norm": 1.3339005708694458, "learning_rate": 0.0001, "loss": 0.0159, "step": 89470 }, { "epoch": 588.6842105263158, "grad_norm": 1.546920657157898, "learning_rate": 0.0001, "loss": 0.0156, "step": 89480 }, { "epoch": 588.75, "grad_norm": 1.5009009838104248, "learning_rate": 0.0001, "loss": 0.0151, "step": 89490 }, { "epoch": 588.8157894736842, "grad_norm": 1.311005711555481, "learning_rate": 0.0001, "loss": 0.0136, "step": 89500 }, { "epoch": 588.8815789473684, "grad_norm": 1.282527208328247, "learning_rate": 0.0001, "loss": 0.0167, "step": 89510 }, { "epoch": 588.9473684210526, "grad_norm": 1.2324057817459106, "learning_rate": 0.0001, "loss": 0.0166, "step": 89520 }, { "epoch": 589.0131578947369, "grad_norm": 1.3375377655029297, "learning_rate": 0.0001, "loss": 0.0182, "step": 89530 }, { "epoch": 589.078947368421, "grad_norm": 1.276978850364685, "learning_rate": 0.0001, "loss": 0.0141, "step": 89540 }, { "epoch": 589.1447368421053, "grad_norm": 1.2085378170013428, "learning_rate": 0.0001, "loss": 0.015, "step": 89550 }, { "epoch": 589.2105263157895, "grad_norm": 1.4367884397506714, "learning_rate": 0.0001, "loss": 0.017, "step": 89560 }, { "epoch": 589.2763157894736, "grad_norm": 1.2636103630065918, "learning_rate": 0.0001, "loss": 0.0157, "step": 89570 }, { "epoch": 589.3421052631579, "grad_norm": 1.3654303550720215, "learning_rate": 0.0001, "loss": 0.015, "step": 89580 }, { "epoch": 589.4078947368421, "grad_norm": 1.0297788381576538, "learning_rate": 0.0001, "loss": 0.0167, "step": 89590 }, { "epoch": 589.4736842105264, "grad_norm": 1.4748284816741943, "learning_rate": 0.0001, "loss": 0.0147, "step": 89600 }, { "epoch": 589.5394736842105, "grad_norm": 1.2759491205215454, "learning_rate": 0.0001, "loss": 0.0133, "step": 89610 }, { "epoch": 589.6052631578947, "grad_norm": 1.2992377281188965, "learning_rate": 0.0001, "loss": 0.0134, "step": 89620 }, { "epoch": 589.671052631579, "grad_norm": 1.4852521419525146, "learning_rate": 0.0001, "loss": 0.0127, "step": 89630 }, { "epoch": 589.7368421052631, "grad_norm": 1.0737380981445312, "learning_rate": 0.0001, "loss": 0.0159, "step": 89640 }, { "epoch": 589.8026315789474, "grad_norm": 1.3087488412857056, "learning_rate": 0.0001, "loss": 0.0155, "step": 89650 }, { "epoch": 589.8684210526316, "grad_norm": 2.001751184463501, "learning_rate": 0.0001, "loss": 0.0137, "step": 89660 }, { "epoch": 589.9342105263158, "grad_norm": 1.344231128692627, "learning_rate": 0.0001, "loss": 0.0144, "step": 89670 }, { "epoch": 590.0, "grad_norm": 1.4394936561584473, "learning_rate": 0.0001, "loss": 0.0139, "step": 89680 }, { "epoch": 590.0657894736842, "grad_norm": 1.5622042417526245, "learning_rate": 0.0001, "loss": 0.018, "step": 89690 }, { "epoch": 590.1315789473684, "grad_norm": 1.3942835330963135, "learning_rate": 0.0001, "loss": 0.0157, "step": 89700 }, { "epoch": 590.1973684210526, "grad_norm": 1.5273241996765137, "learning_rate": 0.0001, "loss": 0.0134, "step": 89710 }, { "epoch": 590.2631578947369, "grad_norm": 1.6664490699768066, "learning_rate": 0.0001, "loss": 0.0114, "step": 89720 }, { "epoch": 590.328947368421, "grad_norm": 1.8227530717849731, "learning_rate": 0.0001, "loss": 0.0117, "step": 89730 }, { "epoch": 590.3947368421053, "grad_norm": 1.6371231079101562, "learning_rate": 0.0001, "loss": 0.0108, "step": 89740 }, { "epoch": 590.4605263157895, "grad_norm": 1.696510672569275, "learning_rate": 0.0001, "loss": 0.0131, "step": 89750 }, { "epoch": 590.5263157894736, "grad_norm": 1.3041877746582031, "learning_rate": 0.0001, "loss": 0.0148, "step": 89760 }, { "epoch": 590.5921052631579, "grad_norm": 1.5886505842208862, "learning_rate": 0.0001, "loss": 0.0134, "step": 89770 }, { "epoch": 590.6578947368421, "grad_norm": 1.6721113920211792, "learning_rate": 0.0001, "loss": 0.0129, "step": 89780 }, { "epoch": 590.7236842105264, "grad_norm": 1.5308001041412354, "learning_rate": 0.0001, "loss": 0.0175, "step": 89790 }, { "epoch": 590.7894736842105, "grad_norm": 1.2509340047836304, "learning_rate": 0.0001, "loss": 0.0151, "step": 89800 }, { "epoch": 590.8552631578947, "grad_norm": 1.7290018796920776, "learning_rate": 0.0001, "loss": 0.0148, "step": 89810 }, { "epoch": 590.921052631579, "grad_norm": 1.6302984952926636, "learning_rate": 0.0001, "loss": 0.0166, "step": 89820 }, { "epoch": 590.9868421052631, "grad_norm": 1.1045904159545898, "learning_rate": 0.0001, "loss": 0.0166, "step": 89830 }, { "epoch": 591.0526315789474, "grad_norm": 1.5274417400360107, "learning_rate": 0.0001, "loss": 0.0149, "step": 89840 }, { "epoch": 591.1184210526316, "grad_norm": 1.613624095916748, "learning_rate": 0.0001, "loss": 0.0111, "step": 89850 }, { "epoch": 591.1842105263158, "grad_norm": 1.0909075736999512, "learning_rate": 0.0001, "loss": 0.0141, "step": 89860 }, { "epoch": 591.25, "grad_norm": 1.673050880432129, "learning_rate": 0.0001, "loss": 0.0131, "step": 89870 }, { "epoch": 591.3157894736842, "grad_norm": 1.3909592628479004, "learning_rate": 0.0001, "loss": 0.0135, "step": 89880 }, { "epoch": 591.3815789473684, "grad_norm": 1.5663087368011475, "learning_rate": 0.0001, "loss": 0.014, "step": 89890 }, { "epoch": 591.4473684210526, "grad_norm": 1.2431169748306274, "learning_rate": 0.0001, "loss": 0.0135, "step": 89900 }, { "epoch": 591.5131578947369, "grad_norm": 1.3824272155761719, "learning_rate": 0.0001, "loss": 0.0157, "step": 89910 }, { "epoch": 591.578947368421, "grad_norm": 1.6026712656021118, "learning_rate": 0.0001, "loss": 0.0109, "step": 89920 }, { "epoch": 591.6447368421053, "grad_norm": 1.0617461204528809, "learning_rate": 0.0001, "loss": 0.0145, "step": 89930 }, { "epoch": 591.7105263157895, "grad_norm": 1.2127119302749634, "learning_rate": 0.0001, "loss": 0.0193, "step": 89940 }, { "epoch": 591.7763157894736, "grad_norm": 1.5704594850540161, "learning_rate": 0.0001, "loss": 0.0129, "step": 89950 }, { "epoch": 591.8421052631579, "grad_norm": 1.3580398559570312, "learning_rate": 0.0001, "loss": 0.0169, "step": 89960 }, { "epoch": 591.9078947368421, "grad_norm": 1.579621434211731, "learning_rate": 0.0001, "loss": 0.0157, "step": 89970 }, { "epoch": 591.9736842105264, "grad_norm": 1.4112714529037476, "learning_rate": 0.0001, "loss": 0.014, "step": 89980 }, { "epoch": 592.0394736842105, "grad_norm": 1.5258386135101318, "learning_rate": 0.0001, "loss": 0.0154, "step": 89990 }, { "epoch": 592.1052631578947, "grad_norm": 1.829720377922058, "learning_rate": 0.0001, "loss": 0.0128, "step": 90000 }, { "epoch": 592.171052631579, "grad_norm": 1.7421555519104004, "learning_rate": 0.0001, "loss": 0.0147, "step": 90010 }, { "epoch": 592.2368421052631, "grad_norm": 1.3377453088760376, "learning_rate": 0.0001, "loss": 0.0169, "step": 90020 }, { "epoch": 592.3026315789474, "grad_norm": 1.3579550981521606, "learning_rate": 0.0001, "loss": 0.0119, "step": 90030 }, { "epoch": 592.3684210526316, "grad_norm": 1.552735447883606, "learning_rate": 0.0001, "loss": 0.0144, "step": 90040 }, { "epoch": 592.4342105263158, "grad_norm": 1.2098300457000732, "learning_rate": 0.0001, "loss": 0.0137, "step": 90050 }, { "epoch": 592.5, "grad_norm": 1.6831995248794556, "learning_rate": 0.0001, "loss": 0.0184, "step": 90060 }, { "epoch": 592.5657894736842, "grad_norm": 1.2708821296691895, "learning_rate": 0.0001, "loss": 0.0135, "step": 90070 }, { "epoch": 592.6315789473684, "grad_norm": 1.1910942792892456, "learning_rate": 0.0001, "loss": 0.0145, "step": 90080 }, { "epoch": 592.6973684210526, "grad_norm": 1.5769959688186646, "learning_rate": 0.0001, "loss": 0.0114, "step": 90090 }, { "epoch": 592.7631578947369, "grad_norm": 1.7189909219741821, "learning_rate": 0.0001, "loss": 0.0163, "step": 90100 }, { "epoch": 592.828947368421, "grad_norm": 1.9246007204055786, "learning_rate": 0.0001, "loss": 0.0136, "step": 90110 }, { "epoch": 592.8947368421053, "grad_norm": 1.4955015182495117, "learning_rate": 0.0001, "loss": 0.0184, "step": 90120 }, { "epoch": 592.9605263157895, "grad_norm": 1.1953061819076538, "learning_rate": 0.0001, "loss": 0.0109, "step": 90130 }, { "epoch": 593.0263157894736, "grad_norm": 1.4319850206375122, "learning_rate": 0.0001, "loss": 0.0143, "step": 90140 }, { "epoch": 593.0921052631579, "grad_norm": 1.146694302558899, "learning_rate": 0.0001, "loss": 0.0158, "step": 90150 }, { "epoch": 593.1578947368421, "grad_norm": 1.3561257123947144, "learning_rate": 0.0001, "loss": 0.0155, "step": 90160 }, { "epoch": 593.2236842105264, "grad_norm": 1.7110464572906494, "learning_rate": 0.0001, "loss": 0.0136, "step": 90170 }, { "epoch": 593.2894736842105, "grad_norm": 2.030297040939331, "learning_rate": 0.0001, "loss": 0.017, "step": 90180 }, { "epoch": 593.3552631578947, "grad_norm": 1.8891319036483765, "learning_rate": 0.0001, "loss": 0.0149, "step": 90190 }, { "epoch": 593.421052631579, "grad_norm": 1.509638786315918, "learning_rate": 0.0001, "loss": 0.014, "step": 90200 }, { "epoch": 593.4868421052631, "grad_norm": 1.5983757972717285, "learning_rate": 0.0001, "loss": 0.0126, "step": 90210 }, { "epoch": 593.5526315789474, "grad_norm": 1.790971279144287, "learning_rate": 0.0001, "loss": 0.0137, "step": 90220 }, { "epoch": 593.6184210526316, "grad_norm": 1.921878457069397, "learning_rate": 0.0001, "loss": 0.0146, "step": 90230 }, { "epoch": 593.6842105263158, "grad_norm": 1.3126342296600342, "learning_rate": 0.0001, "loss": 0.0154, "step": 90240 }, { "epoch": 593.75, "grad_norm": 1.5555343627929688, "learning_rate": 0.0001, "loss": 0.0142, "step": 90250 }, { "epoch": 593.8157894736842, "grad_norm": 1.949202537536621, "learning_rate": 0.0001, "loss": 0.011, "step": 90260 }, { "epoch": 593.8815789473684, "grad_norm": 1.759472370147705, "learning_rate": 0.0001, "loss": 0.0158, "step": 90270 }, { "epoch": 593.9473684210526, "grad_norm": 1.5919221639633179, "learning_rate": 0.0001, "loss": 0.0141, "step": 90280 }, { "epoch": 594.0131578947369, "grad_norm": 1.1924927234649658, "learning_rate": 0.0001, "loss": 0.0149, "step": 90290 }, { "epoch": 594.078947368421, "grad_norm": 1.2231703996658325, "learning_rate": 0.0001, "loss": 0.0145, "step": 90300 }, { "epoch": 594.1447368421053, "grad_norm": 1.511355996131897, "learning_rate": 0.0001, "loss": 0.0125, "step": 90310 }, { "epoch": 594.2105263157895, "grad_norm": 1.4276049137115479, "learning_rate": 0.0001, "loss": 0.0158, "step": 90320 }, { "epoch": 594.2763157894736, "grad_norm": 1.4588286876678467, "learning_rate": 0.0001, "loss": 0.0149, "step": 90330 }, { "epoch": 594.3421052631579, "grad_norm": 1.563511610031128, "learning_rate": 0.0001, "loss": 0.0131, "step": 90340 }, { "epoch": 594.4078947368421, "grad_norm": 1.5251682996749878, "learning_rate": 0.0001, "loss": 0.0117, "step": 90350 }, { "epoch": 594.4736842105264, "grad_norm": 1.3208504915237427, "learning_rate": 0.0001, "loss": 0.0128, "step": 90360 }, { "epoch": 594.5394736842105, "grad_norm": 1.8441206216812134, "learning_rate": 0.0001, "loss": 0.0179, "step": 90370 }, { "epoch": 594.6052631578947, "grad_norm": 1.640066146850586, "learning_rate": 0.0001, "loss": 0.0127, "step": 90380 }, { "epoch": 594.671052631579, "grad_norm": 1.6370633840560913, "learning_rate": 0.0001, "loss": 0.0153, "step": 90390 }, { "epoch": 594.7368421052631, "grad_norm": 1.3078007698059082, "learning_rate": 0.0001, "loss": 0.016, "step": 90400 }, { "epoch": 594.8026315789474, "grad_norm": 1.7889823913574219, "learning_rate": 0.0001, "loss": 0.0131, "step": 90410 }, { "epoch": 594.8684210526316, "grad_norm": 1.246291160583496, "learning_rate": 0.0001, "loss": 0.0128, "step": 90420 }, { "epoch": 594.9342105263158, "grad_norm": 1.16410493850708, "learning_rate": 0.0001, "loss": 0.0159, "step": 90430 }, { "epoch": 595.0, "grad_norm": 1.1543374061584473, "learning_rate": 0.0001, "loss": 0.0131, "step": 90440 }, { "epoch": 595.0657894736842, "grad_norm": 1.7804603576660156, "learning_rate": 0.0001, "loss": 0.0136, "step": 90450 }, { "epoch": 595.1315789473684, "grad_norm": 1.2542158365249634, "learning_rate": 0.0001, "loss": 0.0149, "step": 90460 }, { "epoch": 595.1973684210526, "grad_norm": 1.9631177186965942, "learning_rate": 0.0001, "loss": 0.013, "step": 90470 }, { "epoch": 595.2631578947369, "grad_norm": 1.2770017385482788, "learning_rate": 0.0001, "loss": 0.0143, "step": 90480 }, { "epoch": 595.328947368421, "grad_norm": 1.233656883239746, "learning_rate": 0.0001, "loss": 0.0195, "step": 90490 }, { "epoch": 595.3947368421053, "grad_norm": 1.3281257152557373, "learning_rate": 0.0001, "loss": 0.0128, "step": 90500 }, { "epoch": 595.4605263157895, "grad_norm": 1.5449819564819336, "learning_rate": 0.0001, "loss": 0.0137, "step": 90510 }, { "epoch": 595.5263157894736, "grad_norm": 1.200919270515442, "learning_rate": 0.0001, "loss": 0.0173, "step": 90520 }, { "epoch": 595.5921052631579, "grad_norm": 1.567063808441162, "learning_rate": 0.0001, "loss": 0.0136, "step": 90530 }, { "epoch": 595.6578947368421, "grad_norm": 1.9288145303726196, "learning_rate": 0.0001, "loss": 0.0115, "step": 90540 }, { "epoch": 595.7236842105264, "grad_norm": 1.24027419090271, "learning_rate": 0.0001, "loss": 0.0131, "step": 90550 }, { "epoch": 595.7894736842105, "grad_norm": 1.8287056684494019, "learning_rate": 0.0001, "loss": 0.0142, "step": 90560 }, { "epoch": 595.8552631578947, "grad_norm": 1.7215269804000854, "learning_rate": 0.0001, "loss": 0.0143, "step": 90570 }, { "epoch": 595.921052631579, "grad_norm": 1.5772963762283325, "learning_rate": 0.0001, "loss": 0.0122, "step": 90580 }, { "epoch": 595.9868421052631, "grad_norm": 1.6600154638290405, "learning_rate": 0.0001, "loss": 0.0173, "step": 90590 }, { "epoch": 596.0526315789474, "grad_norm": 1.7186954021453857, "learning_rate": 0.0001, "loss": 0.0131, "step": 90600 }, { "epoch": 596.1184210526316, "grad_norm": 1.3595256805419922, "learning_rate": 0.0001, "loss": 0.0157, "step": 90610 }, { "epoch": 596.1842105263158, "grad_norm": 1.6355605125427246, "learning_rate": 0.0001, "loss": 0.0148, "step": 90620 }, { "epoch": 596.25, "grad_norm": 1.6628704071044922, "learning_rate": 0.0001, "loss": 0.0145, "step": 90630 }, { "epoch": 596.3157894736842, "grad_norm": 1.6175072193145752, "learning_rate": 0.0001, "loss": 0.0129, "step": 90640 }, { "epoch": 596.3815789473684, "grad_norm": 1.580611228942871, "learning_rate": 0.0001, "loss": 0.0141, "step": 90650 }, { "epoch": 596.4473684210526, "grad_norm": 1.9260023832321167, "learning_rate": 0.0001, "loss": 0.014, "step": 90660 }, { "epoch": 596.5131578947369, "grad_norm": 1.6479620933532715, "learning_rate": 0.0001, "loss": 0.0149, "step": 90670 }, { "epoch": 596.578947368421, "grad_norm": 1.736072301864624, "learning_rate": 0.0001, "loss": 0.0179, "step": 90680 }, { "epoch": 596.6447368421053, "grad_norm": 1.6878455877304077, "learning_rate": 0.0001, "loss": 0.0152, "step": 90690 }, { "epoch": 596.7105263157895, "grad_norm": 1.418851375579834, "learning_rate": 0.0001, "loss": 0.0122, "step": 90700 }, { "epoch": 596.7763157894736, "grad_norm": 1.543292760848999, "learning_rate": 0.0001, "loss": 0.0131, "step": 90710 }, { "epoch": 596.8421052631579, "grad_norm": 1.7386550903320312, "learning_rate": 0.0001, "loss": 0.0146, "step": 90720 }, { "epoch": 596.9078947368421, "grad_norm": 1.7804595232009888, "learning_rate": 0.0001, "loss": 0.0156, "step": 90730 }, { "epoch": 596.9736842105264, "grad_norm": 1.9940831661224365, "learning_rate": 0.0001, "loss": 0.0113, "step": 90740 }, { "epoch": 597.0394736842105, "grad_norm": 1.5518369674682617, "learning_rate": 0.0001, "loss": 0.0119, "step": 90750 }, { "epoch": 597.1052631578947, "grad_norm": 1.5528274774551392, "learning_rate": 0.0001, "loss": 0.0155, "step": 90760 }, { "epoch": 597.171052631579, "grad_norm": 1.3265670537948608, "learning_rate": 0.0001, "loss": 0.0143, "step": 90770 }, { "epoch": 597.2368421052631, "grad_norm": 1.8670344352722168, "learning_rate": 0.0001, "loss": 0.0144, "step": 90780 }, { "epoch": 597.3026315789474, "grad_norm": 1.2698296308517456, "learning_rate": 0.0001, "loss": 0.0146, "step": 90790 }, { "epoch": 597.3684210526316, "grad_norm": 1.514722228050232, "learning_rate": 0.0001, "loss": 0.0135, "step": 90800 }, { "epoch": 597.4342105263158, "grad_norm": 1.4581044912338257, "learning_rate": 0.0001, "loss": 0.0145, "step": 90810 }, { "epoch": 597.5, "grad_norm": 1.482538104057312, "learning_rate": 0.0001, "loss": 0.0129, "step": 90820 }, { "epoch": 597.5657894736842, "grad_norm": 0.9724946618080139, "learning_rate": 0.0001, "loss": 0.0113, "step": 90830 }, { "epoch": 597.6315789473684, "grad_norm": 1.5042518377304077, "learning_rate": 0.0001, "loss": 0.0147, "step": 90840 }, { "epoch": 597.6973684210526, "grad_norm": 1.6173828840255737, "learning_rate": 0.0001, "loss": 0.0157, "step": 90850 }, { "epoch": 597.7631578947369, "grad_norm": 1.5885653495788574, "learning_rate": 0.0001, "loss": 0.0138, "step": 90860 }, { "epoch": 597.828947368421, "grad_norm": 1.3953889608383179, "learning_rate": 0.0001, "loss": 0.014, "step": 90870 }, { "epoch": 597.8947368421053, "grad_norm": 0.9215596318244934, "learning_rate": 0.0001, "loss": 0.0172, "step": 90880 }, { "epoch": 597.9605263157895, "grad_norm": 1.4898171424865723, "learning_rate": 0.0001, "loss": 0.0147, "step": 90890 }, { "epoch": 598.0263157894736, "grad_norm": 1.4405778646469116, "learning_rate": 0.0001, "loss": 0.0135, "step": 90900 }, { "epoch": 598.0921052631579, "grad_norm": 1.7642369270324707, "learning_rate": 0.0001, "loss": 0.0144, "step": 90910 }, { "epoch": 598.1578947368421, "grad_norm": 1.7036501169204712, "learning_rate": 0.0001, "loss": 0.0198, "step": 90920 }, { "epoch": 598.2236842105264, "grad_norm": 1.7649840116500854, "learning_rate": 0.0001, "loss": 0.013, "step": 90930 }, { "epoch": 598.2894736842105, "grad_norm": 1.55158269405365, "learning_rate": 0.0001, "loss": 0.0161, "step": 90940 }, { "epoch": 598.3552631578947, "grad_norm": 1.4757537841796875, "learning_rate": 0.0001, "loss": 0.0123, "step": 90950 }, { "epoch": 598.421052631579, "grad_norm": 1.1924363374710083, "learning_rate": 0.0001, "loss": 0.0184, "step": 90960 }, { "epoch": 598.4868421052631, "grad_norm": 1.8854072093963623, "learning_rate": 0.0001, "loss": 0.0149, "step": 90970 }, { "epoch": 598.5526315789474, "grad_norm": 1.8055076599121094, "learning_rate": 0.0001, "loss": 0.0124, "step": 90980 }, { "epoch": 598.6184210526316, "grad_norm": 1.529906988143921, "learning_rate": 0.0001, "loss": 0.0131, "step": 90990 }, { "epoch": 598.6842105263158, "grad_norm": 1.5539703369140625, "learning_rate": 0.0001, "loss": 0.013, "step": 91000 }, { "epoch": 598.75, "grad_norm": 1.594032645225525, "learning_rate": 0.0001, "loss": 0.0162, "step": 91010 }, { "epoch": 598.8157894736842, "grad_norm": 1.3181934356689453, "learning_rate": 0.0001, "loss": 0.0145, "step": 91020 }, { "epoch": 598.8815789473684, "grad_norm": 1.2206188440322876, "learning_rate": 0.0001, "loss": 0.0129, "step": 91030 }, { "epoch": 598.9473684210526, "grad_norm": 1.3527475595474243, "learning_rate": 0.0001, "loss": 0.014, "step": 91040 }, { "epoch": 599.0131578947369, "grad_norm": 1.2335411310195923, "learning_rate": 0.0001, "loss": 0.0136, "step": 91050 }, { "epoch": 599.078947368421, "grad_norm": 1.2318850755691528, "learning_rate": 0.0001, "loss": 0.0139, "step": 91060 }, { "epoch": 599.1447368421053, "grad_norm": 1.7721383571624756, "learning_rate": 0.0001, "loss": 0.0156, "step": 91070 }, { "epoch": 599.2105263157895, "grad_norm": 1.7304784059524536, "learning_rate": 0.0001, "loss": 0.0122, "step": 91080 }, { "epoch": 599.2763157894736, "grad_norm": 1.3394969701766968, "learning_rate": 0.0001, "loss": 0.0136, "step": 91090 }, { "epoch": 599.3421052631579, "grad_norm": 1.1997987031936646, "learning_rate": 0.0001, "loss": 0.0121, "step": 91100 }, { "epoch": 599.4078947368421, "grad_norm": 1.3762366771697998, "learning_rate": 0.0001, "loss": 0.0174, "step": 91110 }, { "epoch": 599.4736842105264, "grad_norm": 1.4432183504104614, "learning_rate": 0.0001, "loss": 0.0181, "step": 91120 }, { "epoch": 599.5394736842105, "grad_norm": 1.2506145238876343, "learning_rate": 0.0001, "loss": 0.0161, "step": 91130 }, { "epoch": 599.6052631578947, "grad_norm": 1.46250319480896, "learning_rate": 0.0001, "loss": 0.0128, "step": 91140 }, { "epoch": 599.671052631579, "grad_norm": 1.6363298892974854, "learning_rate": 0.0001, "loss": 0.0183, "step": 91150 }, { "epoch": 599.7368421052631, "grad_norm": 1.3476448059082031, "learning_rate": 0.0001, "loss": 0.0138, "step": 91160 }, { "epoch": 599.8026315789474, "grad_norm": 1.1473647356033325, "learning_rate": 0.0001, "loss": 0.0131, "step": 91170 }, { "epoch": 599.8684210526316, "grad_norm": 1.3397090435028076, "learning_rate": 0.0001, "loss": 0.0152, "step": 91180 }, { "epoch": 599.9342105263158, "grad_norm": 1.2879292964935303, "learning_rate": 0.0001, "loss": 0.0141, "step": 91190 }, { "epoch": 600.0, "grad_norm": 1.0604857206344604, "learning_rate": 0.0001, "loss": 0.0141, "step": 91200 }, { "epoch": 600.0657894736842, "grad_norm": 1.7179081439971924, "learning_rate": 0.0001, "loss": 0.0161, "step": 91210 }, { "epoch": 600.1315789473684, "grad_norm": 1.5016474723815918, "learning_rate": 0.0001, "loss": 0.0117, "step": 91220 }, { "epoch": 600.1973684210526, "grad_norm": 1.6058257818222046, "learning_rate": 0.0001, "loss": 0.0122, "step": 91230 }, { "epoch": 600.2631578947369, "grad_norm": 1.9656308889389038, "learning_rate": 0.0001, "loss": 0.0186, "step": 91240 }, { "epoch": 600.328947368421, "grad_norm": 2.072763442993164, "learning_rate": 0.0001, "loss": 0.0147, "step": 91250 }, { "epoch": 600.3947368421053, "grad_norm": 1.1926944255828857, "learning_rate": 0.0001, "loss": 0.0128, "step": 91260 }, { "epoch": 600.4605263157895, "grad_norm": 1.2898404598236084, "learning_rate": 0.0001, "loss": 0.0122, "step": 91270 }, { "epoch": 600.5263157894736, "grad_norm": 1.45271635055542, "learning_rate": 0.0001, "loss": 0.0156, "step": 91280 }, { "epoch": 600.5921052631579, "grad_norm": 1.3171106576919556, "learning_rate": 0.0001, "loss": 0.0166, "step": 91290 }, { "epoch": 600.6578947368421, "grad_norm": 1.6433625221252441, "learning_rate": 0.0001, "loss": 0.0151, "step": 91300 }, { "epoch": 600.7236842105264, "grad_norm": 1.69329035282135, "learning_rate": 0.0001, "loss": 0.0154, "step": 91310 }, { "epoch": 600.7894736842105, "grad_norm": 1.6210912466049194, "learning_rate": 0.0001, "loss": 0.0137, "step": 91320 }, { "epoch": 600.8552631578947, "grad_norm": 1.5284314155578613, "learning_rate": 0.0001, "loss": 0.0133, "step": 91330 }, { "epoch": 600.921052631579, "grad_norm": 1.7429399490356445, "learning_rate": 0.0001, "loss": 0.0193, "step": 91340 }, { "epoch": 600.9868421052631, "grad_norm": 1.7374472618103027, "learning_rate": 0.0001, "loss": 0.0113, "step": 91350 }, { "epoch": 601.0526315789474, "grad_norm": 1.187409520149231, "learning_rate": 0.0001, "loss": 0.0129, "step": 91360 }, { "epoch": 601.1184210526316, "grad_norm": 1.1925441026687622, "learning_rate": 0.0001, "loss": 0.0136, "step": 91370 }, { "epoch": 601.1842105263158, "grad_norm": 1.3307373523712158, "learning_rate": 0.0001, "loss": 0.0135, "step": 91380 }, { "epoch": 601.25, "grad_norm": 1.5834788084030151, "learning_rate": 0.0001, "loss": 0.0188, "step": 91390 }, { "epoch": 601.3157894736842, "grad_norm": 1.1461962461471558, "learning_rate": 0.0001, "loss": 0.0178, "step": 91400 }, { "epoch": 601.3815789473684, "grad_norm": 1.4464187622070312, "learning_rate": 0.0001, "loss": 0.016, "step": 91410 }, { "epoch": 601.4473684210526, "grad_norm": 1.7083404064178467, "learning_rate": 0.0001, "loss": 0.0117, "step": 91420 }, { "epoch": 601.5131578947369, "grad_norm": 1.7712465524673462, "learning_rate": 0.0001, "loss": 0.013, "step": 91430 }, { "epoch": 601.578947368421, "grad_norm": 1.6790566444396973, "learning_rate": 0.0001, "loss": 0.0137, "step": 91440 }, { "epoch": 601.6447368421053, "grad_norm": 1.9528616666793823, "learning_rate": 0.0001, "loss": 0.0144, "step": 91450 }, { "epoch": 601.7105263157895, "grad_norm": 1.2076797485351562, "learning_rate": 0.0001, "loss": 0.0149, "step": 91460 }, { "epoch": 601.7763157894736, "grad_norm": 1.2318183183670044, "learning_rate": 0.0001, "loss": 0.0114, "step": 91470 }, { "epoch": 601.8421052631579, "grad_norm": 1.8110421895980835, "learning_rate": 0.0001, "loss": 0.0163, "step": 91480 }, { "epoch": 601.9078947368421, "grad_norm": 1.6690336465835571, "learning_rate": 0.0001, "loss": 0.0149, "step": 91490 }, { "epoch": 601.9736842105264, "grad_norm": 1.7798408269882202, "learning_rate": 0.0001, "loss": 0.0145, "step": 91500 }, { "epoch": 602.0394736842105, "grad_norm": 1.1658085584640503, "learning_rate": 0.0001, "loss": 0.0114, "step": 91510 }, { "epoch": 602.1052631578947, "grad_norm": 1.5625485181808472, "learning_rate": 0.0001, "loss": 0.0132, "step": 91520 }, { "epoch": 602.171052631579, "grad_norm": 1.3059790134429932, "learning_rate": 0.0001, "loss": 0.011, "step": 91530 }, { "epoch": 602.2368421052631, "grad_norm": 1.3359750509262085, "learning_rate": 0.0001, "loss": 0.0143, "step": 91540 }, { "epoch": 602.3026315789474, "grad_norm": 1.54613196849823, "learning_rate": 0.0001, "loss": 0.0152, "step": 91550 }, { "epoch": 602.3684210526316, "grad_norm": 0.9957064390182495, "learning_rate": 0.0001, "loss": 0.0158, "step": 91560 }, { "epoch": 602.4342105263158, "grad_norm": 1.3146653175354004, "learning_rate": 0.0001, "loss": 0.0145, "step": 91570 }, { "epoch": 602.5, "grad_norm": 1.3499927520751953, "learning_rate": 0.0001, "loss": 0.0168, "step": 91580 }, { "epoch": 602.5657894736842, "grad_norm": 1.5431345701217651, "learning_rate": 0.0001, "loss": 0.0134, "step": 91590 }, { "epoch": 602.6315789473684, "grad_norm": 1.526903510093689, "learning_rate": 0.0001, "loss": 0.0138, "step": 91600 }, { "epoch": 602.6973684210526, "grad_norm": 1.4770076274871826, "learning_rate": 0.0001, "loss": 0.0142, "step": 91610 }, { "epoch": 602.7631578947369, "grad_norm": 1.5546627044677734, "learning_rate": 0.0001, "loss": 0.0182, "step": 91620 }, { "epoch": 602.828947368421, "grad_norm": 1.7346965074539185, "learning_rate": 0.0001, "loss": 0.0151, "step": 91630 }, { "epoch": 602.8947368421053, "grad_norm": 1.5048152208328247, "learning_rate": 0.0001, "loss": 0.017, "step": 91640 }, { "epoch": 602.9605263157895, "grad_norm": 1.8266630172729492, "learning_rate": 0.0001, "loss": 0.0122, "step": 91650 }, { "epoch": 603.0263157894736, "grad_norm": 1.2584953308105469, "learning_rate": 0.0001, "loss": 0.016, "step": 91660 }, { "epoch": 603.0921052631579, "grad_norm": 1.5982239246368408, "learning_rate": 0.0001, "loss": 0.0129, "step": 91670 }, { "epoch": 603.1578947368421, "grad_norm": 1.1963727474212646, "learning_rate": 0.0001, "loss": 0.0133, "step": 91680 }, { "epoch": 603.2236842105264, "grad_norm": 1.7116053104400635, "learning_rate": 0.0001, "loss": 0.0105, "step": 91690 }, { "epoch": 603.2894736842105, "grad_norm": 1.7058660984039307, "learning_rate": 0.0001, "loss": 0.0181, "step": 91700 }, { "epoch": 603.3552631578947, "grad_norm": 1.4653412103652954, "learning_rate": 0.0001, "loss": 0.0131, "step": 91710 }, { "epoch": 603.421052631579, "grad_norm": 1.494301199913025, "learning_rate": 0.0001, "loss": 0.0144, "step": 91720 }, { "epoch": 603.4868421052631, "grad_norm": 1.7563410997390747, "learning_rate": 0.0001, "loss": 0.0144, "step": 91730 }, { "epoch": 603.5526315789474, "grad_norm": 1.2242238521575928, "learning_rate": 0.0001, "loss": 0.0169, "step": 91740 }, { "epoch": 603.6184210526316, "grad_norm": 1.518961787223816, "learning_rate": 0.0001, "loss": 0.0121, "step": 91750 }, { "epoch": 603.6842105263158, "grad_norm": 1.1383845806121826, "learning_rate": 0.0001, "loss": 0.0115, "step": 91760 }, { "epoch": 603.75, "grad_norm": 1.3394712209701538, "learning_rate": 0.0001, "loss": 0.0156, "step": 91770 }, { "epoch": 603.8157894736842, "grad_norm": 1.1606727838516235, "learning_rate": 0.0001, "loss": 0.0124, "step": 91780 }, { "epoch": 603.8815789473684, "grad_norm": 1.207497239112854, "learning_rate": 0.0001, "loss": 0.0141, "step": 91790 }, { "epoch": 603.9473684210526, "grad_norm": 1.5418425798416138, "learning_rate": 0.0001, "loss": 0.0188, "step": 91800 }, { "epoch": 604.0131578947369, "grad_norm": 1.2215567827224731, "learning_rate": 0.0001, "loss": 0.0164, "step": 91810 }, { "epoch": 604.078947368421, "grad_norm": 1.7994898557662964, "learning_rate": 0.0001, "loss": 0.0197, "step": 91820 }, { "epoch": 604.1447368421053, "grad_norm": 1.5934447050094604, "learning_rate": 0.0001, "loss": 0.0172, "step": 91830 }, { "epoch": 604.2105263157895, "grad_norm": 1.2797911167144775, "learning_rate": 0.0001, "loss": 0.012, "step": 91840 }, { "epoch": 604.2763157894736, "grad_norm": 1.3598380088806152, "learning_rate": 0.0001, "loss": 0.0143, "step": 91850 }, { "epoch": 604.3421052631579, "grad_norm": 1.3747899532318115, "learning_rate": 0.0001, "loss": 0.0131, "step": 91860 }, { "epoch": 604.4078947368421, "grad_norm": 1.5976351499557495, "learning_rate": 0.0001, "loss": 0.011, "step": 91870 }, { "epoch": 604.4736842105264, "grad_norm": 1.5211949348449707, "learning_rate": 0.0001, "loss": 0.0206, "step": 91880 }, { "epoch": 604.5394736842105, "grad_norm": 1.915334939956665, "learning_rate": 0.0001, "loss": 0.0122, "step": 91890 }, { "epoch": 604.6052631578947, "grad_norm": 1.7105629444122314, "learning_rate": 0.0001, "loss": 0.0119, "step": 91900 }, { "epoch": 604.671052631579, "grad_norm": 1.8888837099075317, "learning_rate": 0.0001, "loss": 0.0147, "step": 91910 }, { "epoch": 604.7368421052631, "grad_norm": 1.6319857835769653, "learning_rate": 0.0001, "loss": 0.0138, "step": 91920 }, { "epoch": 604.8026315789474, "grad_norm": 1.5820955038070679, "learning_rate": 0.0001, "loss": 0.0157, "step": 91930 }, { "epoch": 604.8684210526316, "grad_norm": 1.4353865385055542, "learning_rate": 0.0001, "loss": 0.0138, "step": 91940 }, { "epoch": 604.9342105263158, "grad_norm": 1.5089056491851807, "learning_rate": 0.0001, "loss": 0.0141, "step": 91950 }, { "epoch": 605.0, "grad_norm": 1.8820525407791138, "learning_rate": 0.0001, "loss": 0.0108, "step": 91960 }, { "epoch": 605.0657894736842, "grad_norm": 1.8181865215301514, "learning_rate": 0.0001, "loss": 0.0164, "step": 91970 }, { "epoch": 605.1315789473684, "grad_norm": 1.4751107692718506, "learning_rate": 0.0001, "loss": 0.015, "step": 91980 }, { "epoch": 605.1973684210526, "grad_norm": 1.718797206878662, "learning_rate": 0.0001, "loss": 0.011, "step": 91990 }, { "epoch": 605.2631578947369, "grad_norm": 1.3082659244537354, "learning_rate": 0.0001, "loss": 0.0129, "step": 92000 }, { "epoch": 605.328947368421, "grad_norm": 1.275185227394104, "learning_rate": 0.0001, "loss": 0.0144, "step": 92010 }, { "epoch": 605.3947368421053, "grad_norm": 1.5094918012619019, "learning_rate": 0.0001, "loss": 0.0125, "step": 92020 }, { "epoch": 605.4605263157895, "grad_norm": 1.8535429239273071, "learning_rate": 0.0001, "loss": 0.0142, "step": 92030 }, { "epoch": 605.5263157894736, "grad_norm": 1.5933488607406616, "learning_rate": 0.0001, "loss": 0.016, "step": 92040 }, { "epoch": 605.5921052631579, "grad_norm": 1.6106326580047607, "learning_rate": 0.0001, "loss": 0.0129, "step": 92050 }, { "epoch": 605.6578947368421, "grad_norm": 1.0791200399398804, "learning_rate": 0.0001, "loss": 0.012, "step": 92060 }, { "epoch": 605.7236842105264, "grad_norm": 1.2717955112457275, "learning_rate": 0.0001, "loss": 0.0148, "step": 92070 }, { "epoch": 605.7894736842105, "grad_norm": 1.744714379310608, "learning_rate": 0.0001, "loss": 0.0156, "step": 92080 }, { "epoch": 605.8552631578947, "grad_norm": 1.5197994709014893, "learning_rate": 0.0001, "loss": 0.0136, "step": 92090 }, { "epoch": 605.921052631579, "grad_norm": 1.7210065126419067, "learning_rate": 0.0001, "loss": 0.0134, "step": 92100 }, { "epoch": 605.9868421052631, "grad_norm": 1.325380802154541, "learning_rate": 0.0001, "loss": 0.0183, "step": 92110 }, { "epoch": 606.0526315789474, "grad_norm": 1.4498324394226074, "learning_rate": 0.0001, "loss": 0.0194, "step": 92120 }, { "epoch": 606.1184210526316, "grad_norm": 1.0178314447402954, "learning_rate": 0.0001, "loss": 0.0158, "step": 92130 }, { "epoch": 606.1842105263158, "grad_norm": 1.7929694652557373, "learning_rate": 0.0001, "loss": 0.014, "step": 92140 }, { "epoch": 606.25, "grad_norm": 1.571848750114441, "learning_rate": 0.0001, "loss": 0.0131, "step": 92150 }, { "epoch": 606.3157894736842, "grad_norm": 1.6240819692611694, "learning_rate": 0.0001, "loss": 0.0153, "step": 92160 }, { "epoch": 606.3815789473684, "grad_norm": 1.4534615278244019, "learning_rate": 0.0001, "loss": 0.0111, "step": 92170 }, { "epoch": 606.4473684210526, "grad_norm": 1.6374915838241577, "learning_rate": 0.0001, "loss": 0.012, "step": 92180 }, { "epoch": 606.5131578947369, "grad_norm": 1.746711015701294, "learning_rate": 0.0001, "loss": 0.0177, "step": 92190 }, { "epoch": 606.578947368421, "grad_norm": 2.0012011528015137, "learning_rate": 0.0001, "loss": 0.0142, "step": 92200 }, { "epoch": 606.6447368421053, "grad_norm": 1.670074462890625, "learning_rate": 0.0001, "loss": 0.0122, "step": 92210 }, { "epoch": 606.7105263157895, "grad_norm": 1.5083121061325073, "learning_rate": 0.0001, "loss": 0.0169, "step": 92220 }, { "epoch": 606.7763157894736, "grad_norm": 1.4179027080535889, "learning_rate": 0.0001, "loss": 0.0116, "step": 92230 }, { "epoch": 606.8421052631579, "grad_norm": 1.448513150215149, "learning_rate": 0.0001, "loss": 0.0119, "step": 92240 }, { "epoch": 606.9078947368421, "grad_norm": 1.844315767288208, "learning_rate": 0.0001, "loss": 0.0167, "step": 92250 }, { "epoch": 606.9736842105264, "grad_norm": 1.7176170349121094, "learning_rate": 0.0001, "loss": 0.0143, "step": 92260 }, { "epoch": 607.0394736842105, "grad_norm": 1.5724797248840332, "learning_rate": 0.0001, "loss": 0.0143, "step": 92270 }, { "epoch": 607.1052631578947, "grad_norm": 1.553462028503418, "learning_rate": 0.0001, "loss": 0.0183, "step": 92280 }, { "epoch": 607.171052631579, "grad_norm": 1.7357813119888306, "learning_rate": 0.0001, "loss": 0.0112, "step": 92290 }, { "epoch": 607.2368421052631, "grad_norm": 1.857598066329956, "learning_rate": 0.0001, "loss": 0.0129, "step": 92300 }, { "epoch": 607.3026315789474, "grad_norm": 1.3217034339904785, "learning_rate": 0.0001, "loss": 0.0142, "step": 92310 }, { "epoch": 607.3684210526316, "grad_norm": 1.6385631561279297, "learning_rate": 0.0001, "loss": 0.0197, "step": 92320 }, { "epoch": 607.4342105263158, "grad_norm": 1.2746033668518066, "learning_rate": 0.0001, "loss": 0.0128, "step": 92330 }, { "epoch": 607.5, "grad_norm": 1.616307258605957, "learning_rate": 0.0001, "loss": 0.014, "step": 92340 }, { "epoch": 607.5657894736842, "grad_norm": 1.5341449975967407, "learning_rate": 0.0001, "loss": 0.016, "step": 92350 }, { "epoch": 607.6315789473684, "grad_norm": 1.9955527782440186, "learning_rate": 0.0001, "loss": 0.0128, "step": 92360 }, { "epoch": 607.6973684210526, "grad_norm": 1.862309455871582, "learning_rate": 0.0001, "loss": 0.0117, "step": 92370 }, { "epoch": 607.7631578947369, "grad_norm": 1.257180094718933, "learning_rate": 0.0001, "loss": 0.014, "step": 92380 }, { "epoch": 607.828947368421, "grad_norm": 1.8274834156036377, "learning_rate": 0.0001, "loss": 0.0168, "step": 92390 }, { "epoch": 607.8947368421053, "grad_norm": 1.3034188747406006, "learning_rate": 0.0001, "loss": 0.0144, "step": 92400 }, { "epoch": 607.9605263157895, "grad_norm": 1.6174110174179077, "learning_rate": 0.0001, "loss": 0.0136, "step": 92410 }, { "epoch": 608.0263157894736, "grad_norm": 1.7511669397354126, "learning_rate": 0.0001, "loss": 0.0161, "step": 92420 }, { "epoch": 608.0921052631579, "grad_norm": 1.2524495124816895, "learning_rate": 0.0001, "loss": 0.0104, "step": 92430 }, { "epoch": 608.1578947368421, "grad_norm": 1.4738426208496094, "learning_rate": 0.0001, "loss": 0.0141, "step": 92440 }, { "epoch": 608.2236842105264, "grad_norm": 1.8645989894866943, "learning_rate": 0.0001, "loss": 0.0149, "step": 92450 }, { "epoch": 608.2894736842105, "grad_norm": 1.1484025716781616, "learning_rate": 0.0001, "loss": 0.0142, "step": 92460 }, { "epoch": 608.3552631578947, "grad_norm": 1.236446738243103, "learning_rate": 0.0001, "loss": 0.0153, "step": 92470 }, { "epoch": 608.421052631579, "grad_norm": 2.0769665241241455, "learning_rate": 0.0001, "loss": 0.0158, "step": 92480 }, { "epoch": 608.4868421052631, "grad_norm": 1.4528990983963013, "learning_rate": 0.0001, "loss": 0.0109, "step": 92490 }, { "epoch": 608.5526315789474, "grad_norm": 1.3042925596237183, "learning_rate": 0.0001, "loss": 0.0155, "step": 92500 }, { "epoch": 608.6184210526316, "grad_norm": 1.403342604637146, "learning_rate": 0.0001, "loss": 0.0148, "step": 92510 }, { "epoch": 608.6842105263158, "grad_norm": 1.2597814798355103, "learning_rate": 0.0001, "loss": 0.0171, "step": 92520 }, { "epoch": 608.75, "grad_norm": 1.200413703918457, "learning_rate": 0.0001, "loss": 0.0132, "step": 92530 }, { "epoch": 608.8157894736842, "grad_norm": 1.106375813484192, "learning_rate": 0.0001, "loss": 0.0134, "step": 92540 }, { "epoch": 608.8815789473684, "grad_norm": 1.234183430671692, "learning_rate": 0.0001, "loss": 0.0115, "step": 92550 }, { "epoch": 608.9473684210526, "grad_norm": 0.9568272233009338, "learning_rate": 0.0001, "loss": 0.0127, "step": 92560 }, { "epoch": 609.0131578947369, "grad_norm": 1.0368776321411133, "learning_rate": 0.0001, "loss": 0.0185, "step": 92570 }, { "epoch": 609.078947368421, "grad_norm": 1.595565676689148, "learning_rate": 0.0001, "loss": 0.0152, "step": 92580 }, { "epoch": 609.1447368421053, "grad_norm": 1.5293688774108887, "learning_rate": 0.0001, "loss": 0.0114, "step": 92590 }, { "epoch": 609.2105263157895, "grad_norm": 1.3293417692184448, "learning_rate": 0.0001, "loss": 0.0182, "step": 92600 }, { "epoch": 609.2763157894736, "grad_norm": 1.4366012811660767, "learning_rate": 0.0001, "loss": 0.0141, "step": 92610 }, { "epoch": 609.3421052631579, "grad_norm": 1.8051968812942505, "learning_rate": 0.0001, "loss": 0.014, "step": 92620 }, { "epoch": 609.4078947368421, "grad_norm": 1.6243634223937988, "learning_rate": 0.0001, "loss": 0.013, "step": 92630 }, { "epoch": 609.4736842105264, "grad_norm": 1.2539639472961426, "learning_rate": 0.0001, "loss": 0.014, "step": 92640 }, { "epoch": 609.5394736842105, "grad_norm": 1.1962765455245972, "learning_rate": 0.0001, "loss": 0.0107, "step": 92650 }, { "epoch": 609.6052631578947, "grad_norm": 1.250677466392517, "learning_rate": 0.0001, "loss": 0.0136, "step": 92660 }, { "epoch": 609.671052631579, "grad_norm": 1.2809258699417114, "learning_rate": 0.0001, "loss": 0.0167, "step": 92670 }, { "epoch": 609.7368421052631, "grad_norm": 1.5064893960952759, "learning_rate": 0.0001, "loss": 0.0155, "step": 92680 }, { "epoch": 609.8026315789474, "grad_norm": 1.3284412622451782, "learning_rate": 0.0001, "loss": 0.013, "step": 92690 }, { "epoch": 609.8684210526316, "grad_norm": 1.5630950927734375, "learning_rate": 0.0001, "loss": 0.0118, "step": 92700 }, { "epoch": 609.9342105263158, "grad_norm": 1.4848287105560303, "learning_rate": 0.0001, "loss": 0.0237, "step": 92710 }, { "epoch": 610.0, "grad_norm": 1.8330094814300537, "learning_rate": 0.0001, "loss": 0.0129, "step": 92720 }, { "epoch": 610.0657894736842, "grad_norm": 1.6181557178497314, "learning_rate": 0.0001, "loss": 0.0171, "step": 92730 }, { "epoch": 610.1315789473684, "grad_norm": 1.360074520111084, "learning_rate": 0.0001, "loss": 0.0137, "step": 92740 }, { "epoch": 610.1973684210526, "grad_norm": 1.8045299053192139, "learning_rate": 0.0001, "loss": 0.0151, "step": 92750 }, { "epoch": 610.2631578947369, "grad_norm": 1.7724617719650269, "learning_rate": 0.0001, "loss": 0.0142, "step": 92760 }, { "epoch": 610.328947368421, "grad_norm": 1.672054648399353, "learning_rate": 0.0001, "loss": 0.0136, "step": 92770 }, { "epoch": 610.3947368421053, "grad_norm": 1.489882469177246, "learning_rate": 0.0001, "loss": 0.0113, "step": 92780 }, { "epoch": 610.4605263157895, "grad_norm": 1.7637628316879272, "learning_rate": 0.0001, "loss": 0.0133, "step": 92790 }, { "epoch": 610.5263157894736, "grad_norm": 1.904448390007019, "learning_rate": 0.0001, "loss": 0.0124, "step": 92800 }, { "epoch": 610.5921052631579, "grad_norm": 1.7366063594818115, "learning_rate": 0.0001, "loss": 0.0152, "step": 92810 }, { "epoch": 610.6578947368421, "grad_norm": 1.4250562191009521, "learning_rate": 0.0001, "loss": 0.0175, "step": 92820 }, { "epoch": 610.7236842105264, "grad_norm": 1.390302062034607, "learning_rate": 0.0001, "loss": 0.0118, "step": 92830 }, { "epoch": 610.7894736842105, "grad_norm": 1.3312389850616455, "learning_rate": 0.0001, "loss": 0.0157, "step": 92840 }, { "epoch": 610.8552631578947, "grad_norm": 1.8163528442382812, "learning_rate": 0.0001, "loss": 0.0128, "step": 92850 }, { "epoch": 610.921052631579, "grad_norm": 1.3178684711456299, "learning_rate": 0.0001, "loss": 0.0153, "step": 92860 }, { "epoch": 610.9868421052631, "grad_norm": 1.060535192489624, "learning_rate": 0.0001, "loss": 0.0122, "step": 92870 }, { "epoch": 611.0526315789474, "grad_norm": 1.5938503742218018, "learning_rate": 0.0001, "loss": 0.0179, "step": 92880 }, { "epoch": 611.1184210526316, "grad_norm": 1.0232598781585693, "learning_rate": 0.0001, "loss": 0.0112, "step": 92890 }, { "epoch": 611.1842105263158, "grad_norm": 1.427054762840271, "learning_rate": 0.0001, "loss": 0.0121, "step": 92900 }, { "epoch": 611.25, "grad_norm": 1.355728268623352, "learning_rate": 0.0001, "loss": 0.0142, "step": 92910 }, { "epoch": 611.3157894736842, "grad_norm": 1.691823124885559, "learning_rate": 0.0001, "loss": 0.014, "step": 92920 }, { "epoch": 611.3815789473684, "grad_norm": 1.5131405591964722, "learning_rate": 0.0001, "loss": 0.016, "step": 92930 }, { "epoch": 611.4473684210526, "grad_norm": 1.6125506162643433, "learning_rate": 0.0001, "loss": 0.0117, "step": 92940 }, { "epoch": 611.5131578947369, "grad_norm": 1.425939679145813, "learning_rate": 0.0001, "loss": 0.0119, "step": 92950 }, { "epoch": 611.578947368421, "grad_norm": 1.5540167093276978, "learning_rate": 0.0001, "loss": 0.013, "step": 92960 }, { "epoch": 611.6447368421053, "grad_norm": 1.369292140007019, "learning_rate": 0.0001, "loss": 0.018, "step": 92970 }, { "epoch": 611.7105263157895, "grad_norm": 1.3717882633209229, "learning_rate": 0.0001, "loss": 0.0148, "step": 92980 }, { "epoch": 611.7763157894736, "grad_norm": 1.2681150436401367, "learning_rate": 0.0001, "loss": 0.0161, "step": 92990 }, { "epoch": 611.8421052631579, "grad_norm": 1.226815938949585, "learning_rate": 0.0001, "loss": 0.0173, "step": 93000 }, { "epoch": 611.9078947368421, "grad_norm": 1.4740545749664307, "learning_rate": 0.0001, "loss": 0.0158, "step": 93010 }, { "epoch": 611.9736842105264, "grad_norm": 1.6737030744552612, "learning_rate": 0.0001, "loss": 0.0144, "step": 93020 }, { "epoch": 612.0394736842105, "grad_norm": 1.0779457092285156, "learning_rate": 0.0001, "loss": 0.0126, "step": 93030 }, { "epoch": 612.1052631578947, "grad_norm": 1.5124826431274414, "learning_rate": 0.0001, "loss": 0.0162, "step": 93040 }, { "epoch": 612.171052631579, "grad_norm": 0.9780311584472656, "learning_rate": 0.0001, "loss": 0.0139, "step": 93050 }, { "epoch": 612.2368421052631, "grad_norm": 1.3161379098892212, "learning_rate": 0.0001, "loss": 0.0176, "step": 93060 }, { "epoch": 612.3026315789474, "grad_norm": 0.9013703465461731, "learning_rate": 0.0001, "loss": 0.0111, "step": 93070 }, { "epoch": 612.3684210526316, "grad_norm": 1.2098509073257446, "learning_rate": 0.0001, "loss": 0.0142, "step": 93080 }, { "epoch": 612.4342105263158, "grad_norm": 1.42634916305542, "learning_rate": 0.0001, "loss": 0.0144, "step": 93090 }, { "epoch": 612.5, "grad_norm": 1.4325687885284424, "learning_rate": 0.0001, "loss": 0.018, "step": 93100 }, { "epoch": 612.5657894736842, "grad_norm": 1.3643927574157715, "learning_rate": 0.0001, "loss": 0.0131, "step": 93110 }, { "epoch": 612.6315789473684, "grad_norm": 1.2860593795776367, "learning_rate": 0.0001, "loss": 0.016, "step": 93120 }, { "epoch": 612.6973684210526, "grad_norm": 1.470247507095337, "learning_rate": 0.0001, "loss": 0.0149, "step": 93130 }, { "epoch": 612.7631578947369, "grad_norm": 1.9715921878814697, "learning_rate": 0.0001, "loss": 0.0129, "step": 93140 }, { "epoch": 612.828947368421, "grad_norm": 1.8064006567001343, "learning_rate": 0.0001, "loss": 0.0142, "step": 93150 }, { "epoch": 612.8947368421053, "grad_norm": 1.4941825866699219, "learning_rate": 0.0001, "loss": 0.0156, "step": 93160 }, { "epoch": 612.9605263157895, "grad_norm": 1.866654634475708, "learning_rate": 0.0001, "loss": 0.0138, "step": 93170 }, { "epoch": 613.0263157894736, "grad_norm": 1.3863224983215332, "learning_rate": 0.0001, "loss": 0.0199, "step": 93180 }, { "epoch": 613.0921052631579, "grad_norm": 1.5714713335037231, "learning_rate": 0.0001, "loss": 0.0153, "step": 93190 }, { "epoch": 613.1578947368421, "grad_norm": 1.2009713649749756, "learning_rate": 0.0001, "loss": 0.0162, "step": 93200 }, { "epoch": 613.2236842105264, "grad_norm": 1.2947360277175903, "learning_rate": 0.0001, "loss": 0.0146, "step": 93210 }, { "epoch": 613.2894736842105, "grad_norm": 1.4466824531555176, "learning_rate": 0.0001, "loss": 0.0143, "step": 93220 }, { "epoch": 613.3552631578947, "grad_norm": 1.572486400604248, "learning_rate": 0.0001, "loss": 0.0111, "step": 93230 }, { "epoch": 613.421052631579, "grad_norm": 1.3918827772140503, "learning_rate": 0.0001, "loss": 0.0185, "step": 93240 }, { "epoch": 613.4868421052631, "grad_norm": 0.9660008549690247, "learning_rate": 0.0001, "loss": 0.0173, "step": 93250 }, { "epoch": 613.5526315789474, "grad_norm": 1.401936650276184, "learning_rate": 0.0001, "loss": 0.0176, "step": 93260 }, { "epoch": 613.6184210526316, "grad_norm": 1.0553017854690552, "learning_rate": 0.0001, "loss": 0.0153, "step": 93270 }, { "epoch": 613.6842105263158, "grad_norm": 1.5253205299377441, "learning_rate": 0.0001, "loss": 0.015, "step": 93280 }, { "epoch": 613.75, "grad_norm": 1.3238880634307861, "learning_rate": 0.0001, "loss": 0.0121, "step": 93290 }, { "epoch": 613.8157894736842, "grad_norm": 1.8250588178634644, "learning_rate": 0.0001, "loss": 0.0142, "step": 93300 }, { "epoch": 613.8815789473684, "grad_norm": 1.460098385810852, "learning_rate": 0.0001, "loss": 0.0154, "step": 93310 }, { "epoch": 613.9473684210526, "grad_norm": 0.9992056488990784, "learning_rate": 0.0001, "loss": 0.0105, "step": 93320 }, { "epoch": 614.0131578947369, "grad_norm": 1.7277848720550537, "learning_rate": 0.0001, "loss": 0.0176, "step": 93330 }, { "epoch": 614.078947368421, "grad_norm": 1.184828519821167, "learning_rate": 0.0001, "loss": 0.0169, "step": 93340 }, { "epoch": 614.1447368421053, "grad_norm": 1.2798082828521729, "learning_rate": 0.0001, "loss": 0.0164, "step": 93350 }, { "epoch": 614.2105263157895, "grad_norm": 1.5608394145965576, "learning_rate": 0.0001, "loss": 0.0147, "step": 93360 }, { "epoch": 614.2763157894736, "grad_norm": 1.3426207304000854, "learning_rate": 0.0001, "loss": 0.0143, "step": 93370 }, { "epoch": 614.3421052631579, "grad_norm": 1.2308522462844849, "learning_rate": 0.0001, "loss": 0.011, "step": 93380 }, { "epoch": 614.4078947368421, "grad_norm": 1.1283576488494873, "learning_rate": 0.0001, "loss": 0.0144, "step": 93390 }, { "epoch": 614.4736842105264, "grad_norm": 1.5287100076675415, "learning_rate": 0.0001, "loss": 0.0171, "step": 93400 }, { "epoch": 614.5394736842105, "grad_norm": 1.7038443088531494, "learning_rate": 0.0001, "loss": 0.0155, "step": 93410 }, { "epoch": 614.6052631578947, "grad_norm": 1.4039983749389648, "learning_rate": 0.0001, "loss": 0.0127, "step": 93420 }, { "epoch": 614.671052631579, "grad_norm": 1.5602318048477173, "learning_rate": 0.0001, "loss": 0.0151, "step": 93430 }, { "epoch": 614.7368421052631, "grad_norm": 1.357468605041504, "learning_rate": 0.0001, "loss": 0.0113, "step": 93440 }, { "epoch": 614.8026315789474, "grad_norm": 1.6328331232070923, "learning_rate": 0.0001, "loss": 0.0114, "step": 93450 }, { "epoch": 614.8684210526316, "grad_norm": 1.70880126953125, "learning_rate": 0.0001, "loss": 0.013, "step": 93460 }, { "epoch": 614.9342105263158, "grad_norm": 1.3253332376480103, "learning_rate": 0.0001, "loss": 0.0136, "step": 93470 }, { "epoch": 615.0, "grad_norm": 1.6503499746322632, "learning_rate": 0.0001, "loss": 0.0163, "step": 93480 }, { "epoch": 615.0657894736842, "grad_norm": 1.8975048065185547, "learning_rate": 0.0001, "loss": 0.0114, "step": 93490 }, { "epoch": 615.1315789473684, "grad_norm": 2.074446439743042, "learning_rate": 0.0001, "loss": 0.0135, "step": 93500 }, { "epoch": 615.1973684210526, "grad_norm": 1.4858649969100952, "learning_rate": 0.0001, "loss": 0.0141, "step": 93510 }, { "epoch": 615.2631578947369, "grad_norm": 1.9239931106567383, "learning_rate": 0.0001, "loss": 0.0121, "step": 93520 }, { "epoch": 615.328947368421, "grad_norm": 1.337907314300537, "learning_rate": 0.0001, "loss": 0.0122, "step": 93530 }, { "epoch": 615.3947368421053, "grad_norm": 2.0076191425323486, "learning_rate": 0.0001, "loss": 0.0122, "step": 93540 }, { "epoch": 615.4605263157895, "grad_norm": 1.4854017496109009, "learning_rate": 0.0001, "loss": 0.0151, "step": 93550 }, { "epoch": 615.5263157894736, "grad_norm": 1.053727626800537, "learning_rate": 0.0001, "loss": 0.0165, "step": 93560 }, { "epoch": 615.5921052631579, "grad_norm": 1.270326018333435, "learning_rate": 0.0001, "loss": 0.0167, "step": 93570 }, { "epoch": 615.6578947368421, "grad_norm": 1.9763606786727905, "learning_rate": 0.0001, "loss": 0.0133, "step": 93580 }, { "epoch": 615.7236842105264, "grad_norm": 1.6074168682098389, "learning_rate": 0.0001, "loss": 0.0129, "step": 93590 }, { "epoch": 615.7894736842105, "grad_norm": 0.9472919702529907, "learning_rate": 0.0001, "loss": 0.0158, "step": 93600 }, { "epoch": 615.8552631578947, "grad_norm": 1.539153814315796, "learning_rate": 0.0001, "loss": 0.0165, "step": 93610 }, { "epoch": 615.921052631579, "grad_norm": 1.349604606628418, "learning_rate": 0.0001, "loss": 0.018, "step": 93620 }, { "epoch": 615.9868421052631, "grad_norm": 1.344521164894104, "learning_rate": 0.0001, "loss": 0.0123, "step": 93630 }, { "epoch": 616.0526315789474, "grad_norm": 1.5395475625991821, "learning_rate": 0.0001, "loss": 0.0183, "step": 93640 }, { "epoch": 616.1184210526316, "grad_norm": 1.3333547115325928, "learning_rate": 0.0001, "loss": 0.0135, "step": 93650 }, { "epoch": 616.1842105263158, "grad_norm": 1.3653374910354614, "learning_rate": 0.0001, "loss": 0.0116, "step": 93660 }, { "epoch": 616.25, "grad_norm": 1.4988678693771362, "learning_rate": 0.0001, "loss": 0.0157, "step": 93670 }, { "epoch": 616.3157894736842, "grad_norm": 1.3001377582550049, "learning_rate": 0.0001, "loss": 0.0119, "step": 93680 }, { "epoch": 616.3815789473684, "grad_norm": 1.223145604133606, "learning_rate": 0.0001, "loss": 0.015, "step": 93690 }, { "epoch": 616.4473684210526, "grad_norm": 1.2378053665161133, "learning_rate": 0.0001, "loss": 0.011, "step": 93700 }, { "epoch": 616.5131578947369, "grad_norm": 1.6871130466461182, "learning_rate": 0.0001, "loss": 0.0132, "step": 93710 }, { "epoch": 616.578947368421, "grad_norm": 1.5591105222702026, "learning_rate": 0.0001, "loss": 0.0129, "step": 93720 }, { "epoch": 616.6447368421053, "grad_norm": 1.6673470735549927, "learning_rate": 0.0001, "loss": 0.0137, "step": 93730 }, { "epoch": 616.7105263157895, "grad_norm": 1.3629592657089233, "learning_rate": 0.0001, "loss": 0.0143, "step": 93740 }, { "epoch": 616.7763157894736, "grad_norm": 1.5778701305389404, "learning_rate": 0.0001, "loss": 0.0162, "step": 93750 }, { "epoch": 616.8421052631579, "grad_norm": 1.8891053199768066, "learning_rate": 0.0001, "loss": 0.0171, "step": 93760 }, { "epoch": 616.9078947368421, "grad_norm": 1.8977711200714111, "learning_rate": 0.0001, "loss": 0.0134, "step": 93770 }, { "epoch": 616.9736842105264, "grad_norm": 1.3571147918701172, "learning_rate": 0.0001, "loss": 0.017, "step": 93780 }, { "epoch": 617.0394736842105, "grad_norm": 1.1272388696670532, "learning_rate": 0.0001, "loss": 0.0173, "step": 93790 }, { "epoch": 617.1052631578947, "grad_norm": 1.7015471458435059, "learning_rate": 0.0001, "loss": 0.0122, "step": 93800 }, { "epoch": 617.171052631579, "grad_norm": 1.415403127670288, "learning_rate": 0.0001, "loss": 0.0173, "step": 93810 }, { "epoch": 617.2368421052631, "grad_norm": 1.7283236980438232, "learning_rate": 0.0001, "loss": 0.0159, "step": 93820 }, { "epoch": 617.3026315789474, "grad_norm": 1.5926841497421265, "learning_rate": 0.0001, "loss": 0.0189, "step": 93830 }, { "epoch": 617.3684210526316, "grad_norm": 1.5541276931762695, "learning_rate": 0.0001, "loss": 0.0145, "step": 93840 }, { "epoch": 617.4342105263158, "grad_norm": 1.6073728799819946, "learning_rate": 0.0001, "loss": 0.0168, "step": 93850 }, { "epoch": 617.5, "grad_norm": 2.0834381580352783, "learning_rate": 0.0001, "loss": 0.0133, "step": 93860 }, { "epoch": 617.5657894736842, "grad_norm": 2.0488572120666504, "learning_rate": 0.0001, "loss": 0.0152, "step": 93870 }, { "epoch": 617.6315789473684, "grad_norm": 1.805153727531433, "learning_rate": 0.0001, "loss": 0.0135, "step": 93880 }, { "epoch": 617.6973684210526, "grad_norm": 1.7411459684371948, "learning_rate": 0.0001, "loss": 0.0129, "step": 93890 }, { "epoch": 617.7631578947369, "grad_norm": 1.6730915307998657, "learning_rate": 0.0001, "loss": 0.0146, "step": 93900 }, { "epoch": 617.828947368421, "grad_norm": 1.328204870223999, "learning_rate": 0.0001, "loss": 0.0108, "step": 93910 }, { "epoch": 617.8947368421053, "grad_norm": 1.3965438604354858, "learning_rate": 0.0001, "loss": 0.012, "step": 93920 }, { "epoch": 617.9605263157895, "grad_norm": 1.4256353378295898, "learning_rate": 0.0001, "loss": 0.0151, "step": 93930 }, { "epoch": 618.0263157894736, "grad_norm": 1.9538389444351196, "learning_rate": 0.0001, "loss": 0.0127, "step": 93940 }, { "epoch": 618.0921052631579, "grad_norm": 1.8725394010543823, "learning_rate": 0.0001, "loss": 0.0122, "step": 93950 }, { "epoch": 618.1578947368421, "grad_norm": 1.6077258586883545, "learning_rate": 0.0001, "loss": 0.0166, "step": 93960 }, { "epoch": 618.2236842105264, "grad_norm": 1.7303931713104248, "learning_rate": 0.0001, "loss": 0.0128, "step": 93970 }, { "epoch": 618.2894736842105, "grad_norm": 1.4691967964172363, "learning_rate": 0.0001, "loss": 0.0118, "step": 93980 }, { "epoch": 618.3552631578947, "grad_norm": 1.7252826690673828, "learning_rate": 0.0001, "loss": 0.0133, "step": 93990 }, { "epoch": 618.421052631579, "grad_norm": 2.2815232276916504, "learning_rate": 0.0001, "loss": 0.0196, "step": 94000 }, { "epoch": 618.4868421052631, "grad_norm": 1.8729618787765503, "learning_rate": 0.0001, "loss": 0.0139, "step": 94010 }, { "epoch": 618.5526315789474, "grad_norm": 1.390114426612854, "learning_rate": 0.0001, "loss": 0.0133, "step": 94020 }, { "epoch": 618.6184210526316, "grad_norm": 1.65127432346344, "learning_rate": 0.0001, "loss": 0.0137, "step": 94030 }, { "epoch": 618.6842105263158, "grad_norm": 1.6182560920715332, "learning_rate": 0.0001, "loss": 0.0154, "step": 94040 }, { "epoch": 618.75, "grad_norm": 1.405646800994873, "learning_rate": 0.0001, "loss": 0.0166, "step": 94050 }, { "epoch": 618.8157894736842, "grad_norm": 1.6270359754562378, "learning_rate": 0.0001, "loss": 0.0157, "step": 94060 }, { "epoch": 618.8815789473684, "grad_norm": 1.3740259408950806, "learning_rate": 0.0001, "loss": 0.0123, "step": 94070 }, { "epoch": 618.9473684210526, "grad_norm": 1.7179104089736938, "learning_rate": 0.0001, "loss": 0.0118, "step": 94080 }, { "epoch": 619.0131578947369, "grad_norm": 1.3112187385559082, "learning_rate": 0.0001, "loss": 0.013, "step": 94090 }, { "epoch": 619.078947368421, "grad_norm": 1.330979824066162, "learning_rate": 0.0001, "loss": 0.0192, "step": 94100 }, { "epoch": 619.1447368421053, "grad_norm": 1.566560983657837, "learning_rate": 0.0001, "loss": 0.0125, "step": 94110 }, { "epoch": 619.2105263157895, "grad_norm": 1.3042333126068115, "learning_rate": 0.0001, "loss": 0.0142, "step": 94120 }, { "epoch": 619.2763157894736, "grad_norm": 1.6572989225387573, "learning_rate": 0.0001, "loss": 0.0111, "step": 94130 }, { "epoch": 619.3421052631579, "grad_norm": 1.7381526231765747, "learning_rate": 0.0001, "loss": 0.0144, "step": 94140 }, { "epoch": 619.4078947368421, "grad_norm": 1.5176512002944946, "learning_rate": 0.0001, "loss": 0.0113, "step": 94150 }, { "epoch": 619.4736842105264, "grad_norm": 1.1513441801071167, "learning_rate": 0.0001, "loss": 0.0184, "step": 94160 }, { "epoch": 619.5394736842105, "grad_norm": 1.5236968994140625, "learning_rate": 0.0001, "loss": 0.0149, "step": 94170 }, { "epoch": 619.6052631578947, "grad_norm": 1.0676898956298828, "learning_rate": 0.0001, "loss": 0.0151, "step": 94180 }, { "epoch": 619.671052631579, "grad_norm": 1.490614652633667, "learning_rate": 0.0001, "loss": 0.0132, "step": 94190 }, { "epoch": 619.7368421052631, "grad_norm": 0.9626327157020569, "learning_rate": 0.0001, "loss": 0.0133, "step": 94200 }, { "epoch": 619.8026315789474, "grad_norm": 1.1341259479522705, "learning_rate": 0.0001, "loss": 0.0149, "step": 94210 }, { "epoch": 619.8684210526316, "grad_norm": 1.0546938180923462, "learning_rate": 0.0001, "loss": 0.0168, "step": 94220 }, { "epoch": 619.9342105263158, "grad_norm": 1.4920153617858887, "learning_rate": 0.0001, "loss": 0.0139, "step": 94230 }, { "epoch": 620.0, "grad_norm": 1.2878488302230835, "learning_rate": 0.0001, "loss": 0.0122, "step": 94240 }, { "epoch": 620.0657894736842, "grad_norm": 1.3948622941970825, "learning_rate": 0.0001, "loss": 0.0134, "step": 94250 }, { "epoch": 620.1315789473684, "grad_norm": 1.3875935077667236, "learning_rate": 0.0001, "loss": 0.0114, "step": 94260 }, { "epoch": 620.1973684210526, "grad_norm": 1.807205080986023, "learning_rate": 0.0001, "loss": 0.0128, "step": 94270 }, { "epoch": 620.2631578947369, "grad_norm": 1.6251097917556763, "learning_rate": 0.0001, "loss": 0.0154, "step": 94280 }, { "epoch": 620.328947368421, "grad_norm": 1.2503892183303833, "learning_rate": 0.0001, "loss": 0.0156, "step": 94290 }, { "epoch": 620.3947368421053, "grad_norm": 1.5992799997329712, "learning_rate": 0.0001, "loss": 0.0138, "step": 94300 }, { "epoch": 620.4605263157895, "grad_norm": 1.6897950172424316, "learning_rate": 0.0001, "loss": 0.0173, "step": 94310 }, { "epoch": 620.5263157894736, "grad_norm": 1.5733613967895508, "learning_rate": 0.0001, "loss": 0.0139, "step": 94320 }, { "epoch": 620.5921052631579, "grad_norm": 1.6780332326889038, "learning_rate": 0.0001, "loss": 0.0153, "step": 94330 }, { "epoch": 620.6578947368421, "grad_norm": 1.397017478942871, "learning_rate": 0.0001, "loss": 0.0127, "step": 94340 }, { "epoch": 620.7236842105264, "grad_norm": 1.337769627571106, "learning_rate": 0.0001, "loss": 0.0148, "step": 94350 }, { "epoch": 620.7894736842105, "grad_norm": 1.6259630918502808, "learning_rate": 0.0001, "loss": 0.0188, "step": 94360 }, { "epoch": 620.8552631578947, "grad_norm": 1.2901817560195923, "learning_rate": 0.0001, "loss": 0.0127, "step": 94370 }, { "epoch": 620.921052631579, "grad_norm": 1.910873532295227, "learning_rate": 0.0001, "loss": 0.014, "step": 94380 }, { "epoch": 620.9868421052631, "grad_norm": 1.7831941843032837, "learning_rate": 0.0001, "loss": 0.0176, "step": 94390 }, { "epoch": 621.0526315789474, "grad_norm": 1.7219923734664917, "learning_rate": 0.0001, "loss": 0.0145, "step": 94400 }, { "epoch": 621.1184210526316, "grad_norm": 1.153316617012024, "learning_rate": 0.0001, "loss": 0.0123, "step": 94410 }, { "epoch": 621.1842105263158, "grad_norm": 1.430829644203186, "learning_rate": 0.0001, "loss": 0.0162, "step": 94420 }, { "epoch": 621.25, "grad_norm": 1.3649533987045288, "learning_rate": 0.0001, "loss": 0.0127, "step": 94430 }, { "epoch": 621.3157894736842, "grad_norm": 1.4217991828918457, "learning_rate": 0.0001, "loss": 0.0191, "step": 94440 }, { "epoch": 621.3815789473684, "grad_norm": 1.6258635520935059, "learning_rate": 0.0001, "loss": 0.0119, "step": 94450 }, { "epoch": 621.4473684210526, "grad_norm": 1.375792145729065, "learning_rate": 0.0001, "loss": 0.0153, "step": 94460 }, { "epoch": 621.5131578947369, "grad_norm": 1.6781328916549683, "learning_rate": 0.0001, "loss": 0.018, "step": 94470 }, { "epoch": 621.578947368421, "grad_norm": 1.3467776775360107, "learning_rate": 0.0001, "loss": 0.0127, "step": 94480 }, { "epoch": 621.6447368421053, "grad_norm": 1.1874933242797852, "learning_rate": 0.0001, "loss": 0.0109, "step": 94490 }, { "epoch": 621.7105263157895, "grad_norm": 1.4064929485321045, "learning_rate": 0.0001, "loss": 0.0144, "step": 94500 }, { "epoch": 621.7763157894736, "grad_norm": 1.788388967514038, "learning_rate": 0.0001, "loss": 0.0144, "step": 94510 }, { "epoch": 621.8421052631579, "grad_norm": 1.4797937870025635, "learning_rate": 0.0001, "loss": 0.0145, "step": 94520 }, { "epoch": 621.9078947368421, "grad_norm": 1.4855873584747314, "learning_rate": 0.0001, "loss": 0.0133, "step": 94530 }, { "epoch": 621.9736842105264, "grad_norm": 1.1724265813827515, "learning_rate": 0.0001, "loss": 0.0138, "step": 94540 }, { "epoch": 622.0394736842105, "grad_norm": 1.1621872186660767, "learning_rate": 0.0001, "loss": 0.0147, "step": 94550 }, { "epoch": 622.1052631578947, "grad_norm": 1.200097918510437, "learning_rate": 0.0001, "loss": 0.0139, "step": 94560 }, { "epoch": 622.171052631579, "grad_norm": 1.7727525234222412, "learning_rate": 0.0001, "loss": 0.0155, "step": 94570 }, { "epoch": 622.2368421052631, "grad_norm": 1.3437281847000122, "learning_rate": 0.0001, "loss": 0.0139, "step": 94580 }, { "epoch": 622.3026315789474, "grad_norm": 1.6331126689910889, "learning_rate": 0.0001, "loss": 0.0144, "step": 94590 }, { "epoch": 622.3684210526316, "grad_norm": 1.5001986026763916, "learning_rate": 0.0001, "loss": 0.0133, "step": 94600 }, { "epoch": 622.4342105263158, "grad_norm": 1.7738431692123413, "learning_rate": 0.0001, "loss": 0.0143, "step": 94610 }, { "epoch": 622.5, "grad_norm": 1.5146037340164185, "learning_rate": 0.0001, "loss": 0.0191, "step": 94620 }, { "epoch": 622.5657894736842, "grad_norm": 1.2280369997024536, "learning_rate": 0.0001, "loss": 0.0139, "step": 94630 }, { "epoch": 622.6315789473684, "grad_norm": 1.4471373558044434, "learning_rate": 0.0001, "loss": 0.0183, "step": 94640 }, { "epoch": 622.6973684210526, "grad_norm": 1.5757242441177368, "learning_rate": 0.0001, "loss": 0.013, "step": 94650 }, { "epoch": 622.7631578947369, "grad_norm": 1.6042855978012085, "learning_rate": 0.0001, "loss": 0.016, "step": 94660 }, { "epoch": 622.828947368421, "grad_norm": 1.1343742609024048, "learning_rate": 0.0001, "loss": 0.0118, "step": 94670 }, { "epoch": 622.8947368421053, "grad_norm": 1.1328858137130737, "learning_rate": 0.0001, "loss": 0.0137, "step": 94680 }, { "epoch": 622.9605263157895, "grad_norm": 1.463112473487854, "learning_rate": 0.0001, "loss": 0.0123, "step": 94690 }, { "epoch": 623.0263157894736, "grad_norm": 1.2816216945648193, "learning_rate": 0.0001, "loss": 0.0166, "step": 94700 }, { "epoch": 623.0921052631579, "grad_norm": 1.3276457786560059, "learning_rate": 0.0001, "loss": 0.0175, "step": 94710 }, { "epoch": 623.1578947368421, "grad_norm": 1.3107936382293701, "learning_rate": 0.0001, "loss": 0.0179, "step": 94720 }, { "epoch": 623.2236842105264, "grad_norm": 1.101150393486023, "learning_rate": 0.0001, "loss": 0.0128, "step": 94730 }, { "epoch": 623.2894736842105, "grad_norm": 1.6409263610839844, "learning_rate": 0.0001, "loss": 0.0139, "step": 94740 }, { "epoch": 623.3552631578947, "grad_norm": 1.458800196647644, "learning_rate": 0.0001, "loss": 0.0162, "step": 94750 }, { "epoch": 623.421052631579, "grad_norm": 1.6304935216903687, "learning_rate": 0.0001, "loss": 0.0147, "step": 94760 }, { "epoch": 623.4868421052631, "grad_norm": 1.2176066637039185, "learning_rate": 0.0001, "loss": 0.0112, "step": 94770 }, { "epoch": 623.5526315789474, "grad_norm": 1.4246866703033447, "learning_rate": 0.0001, "loss": 0.0123, "step": 94780 }, { "epoch": 623.6184210526316, "grad_norm": 1.6359922885894775, "learning_rate": 0.0001, "loss": 0.0147, "step": 94790 }, { "epoch": 623.6842105263158, "grad_norm": 1.1465574502944946, "learning_rate": 0.0001, "loss": 0.0133, "step": 94800 }, { "epoch": 623.75, "grad_norm": 1.6363935470581055, "learning_rate": 0.0001, "loss": 0.014, "step": 94810 }, { "epoch": 623.8157894736842, "grad_norm": 1.5467060804367065, "learning_rate": 0.0001, "loss": 0.0147, "step": 94820 }, { "epoch": 623.8815789473684, "grad_norm": 1.681422233581543, "learning_rate": 0.0001, "loss": 0.0112, "step": 94830 }, { "epoch": 623.9473684210526, "grad_norm": 1.2731908559799194, "learning_rate": 0.0001, "loss": 0.0156, "step": 94840 }, { "epoch": 624.0131578947369, "grad_norm": 1.9723868370056152, "learning_rate": 0.0001, "loss": 0.0146, "step": 94850 }, { "epoch": 624.078947368421, "grad_norm": 1.5348080396652222, "learning_rate": 0.0001, "loss": 0.0165, "step": 94860 }, { "epoch": 624.1447368421053, "grad_norm": 1.417400598526001, "learning_rate": 0.0001, "loss": 0.0122, "step": 94870 }, { "epoch": 624.2105263157895, "grad_norm": 1.354766607284546, "learning_rate": 0.0001, "loss": 0.0126, "step": 94880 }, { "epoch": 624.2763157894736, "grad_norm": 1.5933325290679932, "learning_rate": 0.0001, "loss": 0.0156, "step": 94890 }, { "epoch": 624.3421052631579, "grad_norm": 1.0052461624145508, "learning_rate": 0.0001, "loss": 0.0148, "step": 94900 }, { "epoch": 624.4078947368421, "grad_norm": 1.4386221170425415, "learning_rate": 0.0001, "loss": 0.0146, "step": 94910 }, { "epoch": 624.4736842105264, "grad_norm": 1.0365712642669678, "learning_rate": 0.0001, "loss": 0.0122, "step": 94920 }, { "epoch": 624.5394736842105, "grad_norm": 1.526994228363037, "learning_rate": 0.0001, "loss": 0.015, "step": 94930 }, { "epoch": 624.6052631578947, "grad_norm": 1.227858304977417, "learning_rate": 0.0001, "loss": 0.0146, "step": 94940 }, { "epoch": 624.671052631579, "grad_norm": 1.5367504358291626, "learning_rate": 0.0001, "loss": 0.0142, "step": 94950 }, { "epoch": 624.7368421052631, "grad_norm": 1.2391098737716675, "learning_rate": 0.0001, "loss": 0.0162, "step": 94960 }, { "epoch": 624.8026315789474, "grad_norm": 1.5392818450927734, "learning_rate": 0.0001, "loss": 0.0124, "step": 94970 }, { "epoch": 624.8684210526316, "grad_norm": 1.3145573139190674, "learning_rate": 0.0001, "loss": 0.0115, "step": 94980 }, { "epoch": 624.9342105263158, "grad_norm": 1.1207493543624878, "learning_rate": 0.0001, "loss": 0.018, "step": 94990 }, { "epoch": 625.0, "grad_norm": 1.8182200193405151, "learning_rate": 0.0001, "loss": 0.015, "step": 95000 }, { "epoch": 625.0657894736842, "grad_norm": 1.4508240222930908, "learning_rate": 0.0001, "loss": 0.0121, "step": 95010 }, { "epoch": 625.1315789473684, "grad_norm": 1.081078290939331, "learning_rate": 0.0001, "loss": 0.0143, "step": 95020 }, { "epoch": 625.1973684210526, "grad_norm": 1.4255188703536987, "learning_rate": 0.0001, "loss": 0.012, "step": 95030 }, { "epoch": 625.2631578947369, "grad_norm": 1.5655438899993896, "learning_rate": 0.0001, "loss": 0.0163, "step": 95040 }, { "epoch": 625.328947368421, "grad_norm": 1.122459888458252, "learning_rate": 0.0001, "loss": 0.0133, "step": 95050 }, { "epoch": 625.3947368421053, "grad_norm": 1.548951506614685, "learning_rate": 0.0001, "loss": 0.0156, "step": 95060 }, { "epoch": 625.4605263157895, "grad_norm": 1.0309174060821533, "learning_rate": 0.0001, "loss": 0.013, "step": 95070 }, { "epoch": 625.5263157894736, "grad_norm": 1.8548225164413452, "learning_rate": 0.0001, "loss": 0.015, "step": 95080 }, { "epoch": 625.5921052631579, "grad_norm": 1.6974000930786133, "learning_rate": 0.0001, "loss": 0.0151, "step": 95090 }, { "epoch": 625.6578947368421, "grad_norm": 1.0630043745040894, "learning_rate": 0.0001, "loss": 0.0154, "step": 95100 }, { "epoch": 625.7236842105264, "grad_norm": 1.5423157215118408, "learning_rate": 0.0001, "loss": 0.0176, "step": 95110 }, { "epoch": 625.7894736842105, "grad_norm": 1.0810271501541138, "learning_rate": 0.0001, "loss": 0.0136, "step": 95120 }, { "epoch": 625.8552631578947, "grad_norm": 1.375510811805725, "learning_rate": 0.0001, "loss": 0.0155, "step": 95130 }, { "epoch": 625.921052631579, "grad_norm": 1.6588075160980225, "learning_rate": 0.0001, "loss": 0.0106, "step": 95140 }, { "epoch": 625.9868421052631, "grad_norm": 1.3801558017730713, "learning_rate": 0.0001, "loss": 0.0143, "step": 95150 }, { "epoch": 626.0526315789474, "grad_norm": 1.6740808486938477, "learning_rate": 0.0001, "loss": 0.012, "step": 95160 }, { "epoch": 626.1184210526316, "grad_norm": 1.7536768913269043, "learning_rate": 0.0001, "loss": 0.0112, "step": 95170 }, { "epoch": 626.1842105263158, "grad_norm": 1.3237700462341309, "learning_rate": 0.0001, "loss": 0.0183, "step": 95180 }, { "epoch": 626.25, "grad_norm": 1.5839407444000244, "learning_rate": 0.0001, "loss": 0.017, "step": 95190 }, { "epoch": 626.3157894736842, "grad_norm": 1.382008671760559, "learning_rate": 0.0001, "loss": 0.0117, "step": 95200 }, { "epoch": 626.3815789473684, "grad_norm": 1.63702392578125, "learning_rate": 0.0001, "loss": 0.0161, "step": 95210 }, { "epoch": 626.4473684210526, "grad_norm": 1.7183876037597656, "learning_rate": 0.0001, "loss": 0.0121, "step": 95220 }, { "epoch": 626.5131578947369, "grad_norm": 1.5290453433990479, "learning_rate": 0.0001, "loss": 0.0133, "step": 95230 }, { "epoch": 626.578947368421, "grad_norm": 1.1307204961776733, "learning_rate": 0.0001, "loss": 0.0177, "step": 95240 }, { "epoch": 626.6447368421053, "grad_norm": 1.3581713438034058, "learning_rate": 0.0001, "loss": 0.0125, "step": 95250 }, { "epoch": 626.7105263157895, "grad_norm": 1.5146902799606323, "learning_rate": 0.0001, "loss": 0.0137, "step": 95260 }, { "epoch": 626.7763157894736, "grad_norm": 1.3881968259811401, "learning_rate": 0.0001, "loss": 0.0147, "step": 95270 }, { "epoch": 626.8421052631579, "grad_norm": 1.329717993736267, "learning_rate": 0.0001, "loss": 0.0175, "step": 95280 }, { "epoch": 626.9078947368421, "grad_norm": 1.4694972038269043, "learning_rate": 0.0001, "loss": 0.013, "step": 95290 }, { "epoch": 626.9736842105264, "grad_norm": 1.8643757104873657, "learning_rate": 0.0001, "loss": 0.014, "step": 95300 }, { "epoch": 627.0394736842105, "grad_norm": 1.4549899101257324, "learning_rate": 0.0001, "loss": 0.0131, "step": 95310 }, { "epoch": 627.1052631578947, "grad_norm": 1.6118087768554688, "learning_rate": 0.0001, "loss": 0.0153, "step": 95320 }, { "epoch": 627.171052631579, "grad_norm": 1.20799720287323, "learning_rate": 0.0001, "loss": 0.0167, "step": 95330 }, { "epoch": 627.2368421052631, "grad_norm": 1.312339425086975, "learning_rate": 0.0001, "loss": 0.0127, "step": 95340 }, { "epoch": 627.3026315789474, "grad_norm": 0.9613479375839233, "learning_rate": 0.0001, "loss": 0.0192, "step": 95350 }, { "epoch": 627.3684210526316, "grad_norm": 1.7028402090072632, "learning_rate": 0.0001, "loss": 0.012, "step": 95360 }, { "epoch": 627.4342105263158, "grad_norm": 1.7872111797332764, "learning_rate": 0.0001, "loss": 0.0116, "step": 95370 }, { "epoch": 627.5, "grad_norm": 1.4311447143554688, "learning_rate": 0.0001, "loss": 0.0141, "step": 95380 }, { "epoch": 627.5657894736842, "grad_norm": 1.4985744953155518, "learning_rate": 0.0001, "loss": 0.0131, "step": 95390 }, { "epoch": 627.6315789473684, "grad_norm": 1.7665016651153564, "learning_rate": 0.0001, "loss": 0.0127, "step": 95400 }, { "epoch": 627.6973684210526, "grad_norm": 1.682669997215271, "learning_rate": 0.0001, "loss": 0.0165, "step": 95410 }, { "epoch": 627.7631578947369, "grad_norm": 1.5021069049835205, "learning_rate": 0.0001, "loss": 0.0144, "step": 95420 }, { "epoch": 627.828947368421, "grad_norm": 1.2567671537399292, "learning_rate": 0.0001, "loss": 0.0136, "step": 95430 }, { "epoch": 627.8947368421053, "grad_norm": 1.510865330696106, "learning_rate": 0.0001, "loss": 0.0142, "step": 95440 }, { "epoch": 627.9605263157895, "grad_norm": 1.8488068580627441, "learning_rate": 0.0001, "loss": 0.0177, "step": 95450 }, { "epoch": 628.0263157894736, "grad_norm": 1.8647600412368774, "learning_rate": 0.0001, "loss": 0.0128, "step": 95460 }, { "epoch": 628.0921052631579, "grad_norm": 1.6601122617721558, "learning_rate": 0.0001, "loss": 0.014, "step": 95470 }, { "epoch": 628.1578947368421, "grad_norm": 1.2109230756759644, "learning_rate": 0.0001, "loss": 0.0146, "step": 95480 }, { "epoch": 628.2236842105264, "grad_norm": 1.6607916355133057, "learning_rate": 0.0001, "loss": 0.0133, "step": 95490 }, { "epoch": 628.2894736842105, "grad_norm": 1.9679172039031982, "learning_rate": 0.0001, "loss": 0.0169, "step": 95500 }, { "epoch": 628.3552631578947, "grad_norm": 1.74032723903656, "learning_rate": 0.0001, "loss": 0.0142, "step": 95510 }, { "epoch": 628.421052631579, "grad_norm": 1.4583183526992798, "learning_rate": 0.0001, "loss": 0.0132, "step": 95520 }, { "epoch": 628.4868421052631, "grad_norm": 1.477353572845459, "learning_rate": 0.0001, "loss": 0.0126, "step": 95530 }, { "epoch": 628.5526315789474, "grad_norm": 1.3836733102798462, "learning_rate": 0.0001, "loss": 0.0121, "step": 95540 }, { "epoch": 628.6184210526316, "grad_norm": 1.45274817943573, "learning_rate": 0.0001, "loss": 0.0149, "step": 95550 }, { "epoch": 628.6842105263158, "grad_norm": 1.1777970790863037, "learning_rate": 0.0001, "loss": 0.0132, "step": 95560 }, { "epoch": 628.75, "grad_norm": 1.109070897102356, "learning_rate": 0.0001, "loss": 0.0141, "step": 95570 }, { "epoch": 628.8157894736842, "grad_norm": 1.5020532608032227, "learning_rate": 0.0001, "loss": 0.0127, "step": 95580 }, { "epoch": 628.8815789473684, "grad_norm": 1.9702543020248413, "learning_rate": 0.0001, "loss": 0.0173, "step": 95590 }, { "epoch": 628.9473684210526, "grad_norm": 2.030344009399414, "learning_rate": 0.0001, "loss": 0.0144, "step": 95600 }, { "epoch": 629.0131578947369, "grad_norm": 1.5028027296066284, "learning_rate": 0.0001, "loss": 0.0138, "step": 95610 }, { "epoch": 629.078947368421, "grad_norm": 1.599937081336975, "learning_rate": 0.0001, "loss": 0.011, "step": 95620 }, { "epoch": 629.1447368421053, "grad_norm": 1.1205267906188965, "learning_rate": 0.0001, "loss": 0.0128, "step": 95630 }, { "epoch": 629.2105263157895, "grad_norm": 1.4751348495483398, "learning_rate": 0.0001, "loss": 0.0194, "step": 95640 }, { "epoch": 629.2763157894736, "grad_norm": 1.4107749462127686, "learning_rate": 0.0001, "loss": 0.0171, "step": 95650 }, { "epoch": 629.3421052631579, "grad_norm": 1.5016987323760986, "learning_rate": 0.0001, "loss": 0.014, "step": 95660 }, { "epoch": 629.4078947368421, "grad_norm": 1.517250657081604, "learning_rate": 0.0001, "loss": 0.0153, "step": 95670 }, { "epoch": 629.4736842105264, "grad_norm": 1.5425872802734375, "learning_rate": 0.0001, "loss": 0.0125, "step": 95680 }, { "epoch": 629.5394736842105, "grad_norm": 1.3836116790771484, "learning_rate": 0.0001, "loss": 0.0132, "step": 95690 }, { "epoch": 629.6052631578947, "grad_norm": 1.7259184122085571, "learning_rate": 0.0001, "loss": 0.0135, "step": 95700 }, { "epoch": 629.671052631579, "grad_norm": 1.9034082889556885, "learning_rate": 0.0001, "loss": 0.0169, "step": 95710 }, { "epoch": 629.7368421052631, "grad_norm": 1.894127368927002, "learning_rate": 0.0001, "loss": 0.0133, "step": 95720 }, { "epoch": 629.8026315789474, "grad_norm": 1.718407154083252, "learning_rate": 0.0001, "loss": 0.0134, "step": 95730 }, { "epoch": 629.8684210526316, "grad_norm": 1.3452147245407104, "learning_rate": 0.0001, "loss": 0.0151, "step": 95740 }, { "epoch": 629.9342105263158, "grad_norm": 1.5568420886993408, "learning_rate": 0.0001, "loss": 0.0139, "step": 95750 }, { "epoch": 630.0, "grad_norm": 1.4073114395141602, "learning_rate": 0.0001, "loss": 0.0114, "step": 95760 }, { "epoch": 630.0657894736842, "grad_norm": 1.8934260606765747, "learning_rate": 0.0001, "loss": 0.0148, "step": 95770 }, { "epoch": 630.1315789473684, "grad_norm": 1.2475708723068237, "learning_rate": 0.0001, "loss": 0.0115, "step": 95780 }, { "epoch": 630.1973684210526, "grad_norm": 2.010725498199463, "learning_rate": 0.0001, "loss": 0.0149, "step": 95790 }, { "epoch": 630.2631578947369, "grad_norm": 1.67949640750885, "learning_rate": 0.0001, "loss": 0.0122, "step": 95800 }, { "epoch": 630.328947368421, "grad_norm": 1.4753836393356323, "learning_rate": 0.0001, "loss": 0.0141, "step": 95810 }, { "epoch": 630.3947368421053, "grad_norm": 1.5310852527618408, "learning_rate": 0.0001, "loss": 0.0147, "step": 95820 }, { "epoch": 630.4605263157895, "grad_norm": 1.6996136903762817, "learning_rate": 0.0001, "loss": 0.0111, "step": 95830 }, { "epoch": 630.5263157894736, "grad_norm": 1.8904908895492554, "learning_rate": 0.0001, "loss": 0.0123, "step": 95840 }, { "epoch": 630.5921052631579, "grad_norm": 1.7409143447875977, "learning_rate": 0.0001, "loss": 0.0173, "step": 95850 }, { "epoch": 630.6578947368421, "grad_norm": 1.919129490852356, "learning_rate": 0.0001, "loss": 0.0157, "step": 95860 }, { "epoch": 630.7236842105264, "grad_norm": 1.6093025207519531, "learning_rate": 0.0001, "loss": 0.0142, "step": 95870 }, { "epoch": 630.7894736842105, "grad_norm": 1.9728070497512817, "learning_rate": 0.0001, "loss": 0.0138, "step": 95880 }, { "epoch": 630.8552631578947, "grad_norm": 2.2335686683654785, "learning_rate": 0.0001, "loss": 0.0109, "step": 95890 }, { "epoch": 630.921052631579, "grad_norm": 1.6482101678848267, "learning_rate": 0.0001, "loss": 0.0131, "step": 95900 }, { "epoch": 630.9868421052631, "grad_norm": 1.6733131408691406, "learning_rate": 0.0001, "loss": 0.021, "step": 95910 }, { "epoch": 631.0526315789474, "grad_norm": 1.9339771270751953, "learning_rate": 0.0001, "loss": 0.0172, "step": 95920 }, { "epoch": 631.1184210526316, "grad_norm": 2.0677223205566406, "learning_rate": 0.0001, "loss": 0.0126, "step": 95930 }, { "epoch": 631.1842105263158, "grad_norm": 1.52869713306427, "learning_rate": 0.0001, "loss": 0.0155, "step": 95940 }, { "epoch": 631.25, "grad_norm": 1.5419844388961792, "learning_rate": 0.0001, "loss": 0.017, "step": 95950 }, { "epoch": 631.3157894736842, "grad_norm": 1.8941580057144165, "learning_rate": 0.0001, "loss": 0.0116, "step": 95960 }, { "epoch": 631.3815789473684, "grad_norm": 1.5814929008483887, "learning_rate": 0.0001, "loss": 0.0147, "step": 95970 }, { "epoch": 631.4473684210526, "grad_norm": 1.6708356142044067, "learning_rate": 0.0001, "loss": 0.0178, "step": 95980 }, { "epoch": 631.5131578947369, "grad_norm": 1.5629093647003174, "learning_rate": 0.0001, "loss": 0.0133, "step": 95990 }, { "epoch": 631.578947368421, "grad_norm": 1.1294509172439575, "learning_rate": 0.0001, "loss": 0.0107, "step": 96000 }, { "epoch": 631.6447368421053, "grad_norm": 1.2737135887145996, "learning_rate": 0.0001, "loss": 0.0123, "step": 96010 }, { "epoch": 631.7105263157895, "grad_norm": 1.8970757722854614, "learning_rate": 0.0001, "loss": 0.0202, "step": 96020 }, { "epoch": 631.7763157894736, "grad_norm": 1.6761878728866577, "learning_rate": 0.0001, "loss": 0.011, "step": 96030 }, { "epoch": 631.8421052631579, "grad_norm": 2.006120443344116, "learning_rate": 0.0001, "loss": 0.0125, "step": 96040 }, { "epoch": 631.9078947368421, "grad_norm": 1.8815923929214478, "learning_rate": 0.0001, "loss": 0.0146, "step": 96050 }, { "epoch": 631.9736842105264, "grad_norm": 1.6786121129989624, "learning_rate": 0.0001, "loss": 0.014, "step": 96060 }, { "epoch": 632.0394736842105, "grad_norm": 1.3133262395858765, "learning_rate": 0.0001, "loss": 0.0128, "step": 96070 }, { "epoch": 632.1052631578947, "grad_norm": 1.7052568197250366, "learning_rate": 0.0001, "loss": 0.0128, "step": 96080 }, { "epoch": 632.171052631579, "grad_norm": 1.5580768585205078, "learning_rate": 0.0001, "loss": 0.0128, "step": 96090 }, { "epoch": 632.2368421052631, "grad_norm": 1.3733549118041992, "learning_rate": 0.0001, "loss": 0.0123, "step": 96100 }, { "epoch": 632.3026315789474, "grad_norm": 1.5170273780822754, "learning_rate": 0.0001, "loss": 0.0144, "step": 96110 }, { "epoch": 632.3684210526316, "grad_norm": 1.4133424758911133, "learning_rate": 0.0001, "loss": 0.0123, "step": 96120 }, { "epoch": 632.4342105263158, "grad_norm": 1.2954063415527344, "learning_rate": 0.0001, "loss": 0.019, "step": 96130 }, { "epoch": 632.5, "grad_norm": 1.5712491273880005, "learning_rate": 0.0001, "loss": 0.0118, "step": 96140 }, { "epoch": 632.5657894736842, "grad_norm": 1.4638234376907349, "learning_rate": 0.0001, "loss": 0.0159, "step": 96150 }, { "epoch": 632.6315789473684, "grad_norm": 1.3806297779083252, "learning_rate": 0.0001, "loss": 0.0167, "step": 96160 }, { "epoch": 632.6973684210526, "grad_norm": 1.5464938879013062, "learning_rate": 0.0001, "loss": 0.0162, "step": 96170 }, { "epoch": 632.7631578947369, "grad_norm": 1.3217220306396484, "learning_rate": 0.0001, "loss": 0.0141, "step": 96180 }, { "epoch": 632.828947368421, "grad_norm": 1.387773871421814, "learning_rate": 0.0001, "loss": 0.0131, "step": 96190 }, { "epoch": 632.8947368421053, "grad_norm": 1.1293550729751587, "learning_rate": 0.0001, "loss": 0.0135, "step": 96200 }, { "epoch": 632.9605263157895, "grad_norm": 1.3618817329406738, "learning_rate": 0.0001, "loss": 0.0155, "step": 96210 }, { "epoch": 633.0263157894736, "grad_norm": 1.7040969133377075, "learning_rate": 0.0001, "loss": 0.0137, "step": 96220 }, { "epoch": 633.0921052631579, "grad_norm": 1.637637972831726, "learning_rate": 0.0001, "loss": 0.0124, "step": 96230 }, { "epoch": 633.1578947368421, "grad_norm": 1.863473653793335, "learning_rate": 0.0001, "loss": 0.0157, "step": 96240 }, { "epoch": 633.2236842105264, "grad_norm": 1.243005633354187, "learning_rate": 0.0001, "loss": 0.0154, "step": 96250 }, { "epoch": 633.2894736842105, "grad_norm": 1.169284701347351, "learning_rate": 0.0001, "loss": 0.0142, "step": 96260 }, { "epoch": 633.3552631578947, "grad_norm": 1.2291523218154907, "learning_rate": 0.0001, "loss": 0.0125, "step": 96270 }, { "epoch": 633.421052631579, "grad_norm": 1.436650037765503, "learning_rate": 0.0001, "loss": 0.014, "step": 96280 }, { "epoch": 633.4868421052631, "grad_norm": 1.1946349143981934, "learning_rate": 0.0001, "loss": 0.0152, "step": 96290 }, { "epoch": 633.5526315789474, "grad_norm": 1.725032925605774, "learning_rate": 0.0001, "loss": 0.0117, "step": 96300 }, { "epoch": 633.6184210526316, "grad_norm": 1.2643718719482422, "learning_rate": 0.0001, "loss": 0.018, "step": 96310 }, { "epoch": 633.6842105263158, "grad_norm": 1.0595756769180298, "learning_rate": 0.0001, "loss": 0.0164, "step": 96320 }, { "epoch": 633.75, "grad_norm": 1.388962745666504, "learning_rate": 0.0001, "loss": 0.0134, "step": 96330 }, { "epoch": 633.8157894736842, "grad_norm": 1.3563628196716309, "learning_rate": 0.0001, "loss": 0.0147, "step": 96340 }, { "epoch": 633.8815789473684, "grad_norm": 1.589447259902954, "learning_rate": 0.0001, "loss": 0.013, "step": 96350 }, { "epoch": 633.9473684210526, "grad_norm": 1.520743489265442, "learning_rate": 0.0001, "loss": 0.0156, "step": 96360 }, { "epoch": 634.0131578947369, "grad_norm": 1.8560118675231934, "learning_rate": 0.0001, "loss": 0.0144, "step": 96370 }, { "epoch": 634.078947368421, "grad_norm": 1.4948909282684326, "learning_rate": 0.0001, "loss": 0.0195, "step": 96380 }, { "epoch": 634.1447368421053, "grad_norm": 1.2510247230529785, "learning_rate": 0.0001, "loss": 0.0116, "step": 96390 }, { "epoch": 634.2105263157895, "grad_norm": 1.535610318183899, "learning_rate": 0.0001, "loss": 0.0115, "step": 96400 }, { "epoch": 634.2763157894736, "grad_norm": 1.3868600130081177, "learning_rate": 0.0001, "loss": 0.0152, "step": 96410 }, { "epoch": 634.3421052631579, "grad_norm": 1.7002758979797363, "learning_rate": 0.0001, "loss": 0.0123, "step": 96420 }, { "epoch": 634.4078947368421, "grad_norm": 1.7519985437393188, "learning_rate": 0.0001, "loss": 0.0176, "step": 96430 }, { "epoch": 634.4736842105264, "grad_norm": 1.375597596168518, "learning_rate": 0.0001, "loss": 0.0142, "step": 96440 }, { "epoch": 634.5394736842105, "grad_norm": 1.7369439601898193, "learning_rate": 0.0001, "loss": 0.0128, "step": 96450 }, { "epoch": 634.6052631578947, "grad_norm": 1.405240774154663, "learning_rate": 0.0001, "loss": 0.0138, "step": 96460 }, { "epoch": 634.671052631579, "grad_norm": 1.4766896963119507, "learning_rate": 0.0001, "loss": 0.0162, "step": 96470 }, { "epoch": 634.7368421052631, "grad_norm": 1.2359305620193481, "learning_rate": 0.0001, "loss": 0.0112, "step": 96480 }, { "epoch": 634.8026315789474, "grad_norm": 1.3159340620040894, "learning_rate": 0.0001, "loss": 0.014, "step": 96490 }, { "epoch": 634.8684210526316, "grad_norm": 1.14046049118042, "learning_rate": 0.0001, "loss": 0.0132, "step": 96500 }, { "epoch": 634.9342105263158, "grad_norm": 1.4737844467163086, "learning_rate": 0.0001, "loss": 0.0184, "step": 96510 }, { "epoch": 635.0, "grad_norm": 1.4119244813919067, "learning_rate": 0.0001, "loss": 0.0158, "step": 96520 }, { "epoch": 635.0657894736842, "grad_norm": 1.2333717346191406, "learning_rate": 0.0001, "loss": 0.0137, "step": 96530 }, { "epoch": 635.1315789473684, "grad_norm": 1.5841785669326782, "learning_rate": 0.0001, "loss": 0.0135, "step": 96540 }, { "epoch": 635.1973684210526, "grad_norm": 1.170016884803772, "learning_rate": 0.0001, "loss": 0.0152, "step": 96550 }, { "epoch": 635.2631578947369, "grad_norm": 1.2473196983337402, "learning_rate": 0.0001, "loss": 0.0142, "step": 96560 }, { "epoch": 635.328947368421, "grad_norm": 0.891914427280426, "learning_rate": 0.0001, "loss": 0.0143, "step": 96570 }, { "epoch": 635.3947368421053, "grad_norm": 1.4546024799346924, "learning_rate": 0.0001, "loss": 0.0123, "step": 96580 }, { "epoch": 635.4605263157895, "grad_norm": 1.5147340297698975, "learning_rate": 0.0001, "loss": 0.0146, "step": 96590 }, { "epoch": 635.5263157894736, "grad_norm": 1.2652738094329834, "learning_rate": 0.0001, "loss": 0.0175, "step": 96600 }, { "epoch": 635.5921052631579, "grad_norm": 1.2907648086547852, "learning_rate": 0.0001, "loss": 0.0196, "step": 96610 }, { "epoch": 635.6578947368421, "grad_norm": 1.26150643825531, "learning_rate": 0.0001, "loss": 0.015, "step": 96620 }, { "epoch": 635.7236842105264, "grad_norm": 1.4778274297714233, "learning_rate": 0.0001, "loss": 0.0173, "step": 96630 }, { "epoch": 635.7894736842105, "grad_norm": 1.729503870010376, "learning_rate": 0.0001, "loss": 0.0128, "step": 96640 }, { "epoch": 635.8552631578947, "grad_norm": 1.5819528102874756, "learning_rate": 0.0001, "loss": 0.0128, "step": 96650 }, { "epoch": 635.921052631579, "grad_norm": 1.2438647747039795, "learning_rate": 0.0001, "loss": 0.0117, "step": 96660 }, { "epoch": 635.9868421052631, "grad_norm": 1.1876966953277588, "learning_rate": 0.0001, "loss": 0.0193, "step": 96670 }, { "epoch": 636.0526315789474, "grad_norm": 1.4870411157608032, "learning_rate": 0.0001, "loss": 0.0124, "step": 96680 }, { "epoch": 636.1184210526316, "grad_norm": 1.158522367477417, "learning_rate": 0.0001, "loss": 0.0148, "step": 96690 }, { "epoch": 636.1842105263158, "grad_norm": 1.4166046380996704, "learning_rate": 0.0001, "loss": 0.0144, "step": 96700 }, { "epoch": 636.25, "grad_norm": 1.2362728118896484, "learning_rate": 0.0001, "loss": 0.0165, "step": 96710 }, { "epoch": 636.3157894736842, "grad_norm": 1.3222719430923462, "learning_rate": 0.0001, "loss": 0.0157, "step": 96720 }, { "epoch": 636.3815789473684, "grad_norm": 1.6412543058395386, "learning_rate": 0.0001, "loss": 0.0151, "step": 96730 }, { "epoch": 636.4473684210526, "grad_norm": 1.7199935913085938, "learning_rate": 0.0001, "loss": 0.0118, "step": 96740 }, { "epoch": 636.5131578947369, "grad_norm": 1.226649522781372, "learning_rate": 0.0001, "loss": 0.0193, "step": 96750 }, { "epoch": 636.578947368421, "grad_norm": 1.6265565156936646, "learning_rate": 0.0001, "loss": 0.0179, "step": 96760 }, { "epoch": 636.6447368421053, "grad_norm": 1.0478254556655884, "learning_rate": 0.0001, "loss": 0.0111, "step": 96770 }, { "epoch": 636.7105263157895, "grad_norm": 1.3821771144866943, "learning_rate": 0.0001, "loss": 0.0127, "step": 96780 }, { "epoch": 636.7763157894736, "grad_norm": 1.3936867713928223, "learning_rate": 0.0001, "loss": 0.0142, "step": 96790 }, { "epoch": 636.8421052631579, "grad_norm": 1.9602359533309937, "learning_rate": 0.0001, "loss": 0.0146, "step": 96800 }, { "epoch": 636.9078947368421, "grad_norm": 1.6711690425872803, "learning_rate": 0.0001, "loss": 0.0133, "step": 96810 }, { "epoch": 636.9736842105264, "grad_norm": 1.5321507453918457, "learning_rate": 0.0001, "loss": 0.0144, "step": 96820 }, { "epoch": 637.0394736842105, "grad_norm": 1.2968147993087769, "learning_rate": 0.0001, "loss": 0.0171, "step": 96830 }, { "epoch": 637.1052631578947, "grad_norm": 1.414872169494629, "learning_rate": 0.0001, "loss": 0.0131, "step": 96840 }, { "epoch": 637.171052631579, "grad_norm": 1.0489630699157715, "learning_rate": 0.0001, "loss": 0.0113, "step": 96850 }, { "epoch": 637.2368421052631, "grad_norm": 1.2648329734802246, "learning_rate": 0.0001, "loss": 0.0138, "step": 96860 }, { "epoch": 637.3026315789474, "grad_norm": 1.8306876420974731, "learning_rate": 0.0001, "loss": 0.015, "step": 96870 }, { "epoch": 637.3684210526316, "grad_norm": 1.3504763841629028, "learning_rate": 0.0001, "loss": 0.0177, "step": 96880 }, { "epoch": 637.4342105263158, "grad_norm": 1.7527333498001099, "learning_rate": 0.0001, "loss": 0.014, "step": 96890 }, { "epoch": 637.5, "grad_norm": 1.5774661302566528, "learning_rate": 0.0001, "loss": 0.014, "step": 96900 }, { "epoch": 637.5657894736842, "grad_norm": 1.5372776985168457, "learning_rate": 0.0001, "loss": 0.0145, "step": 96910 }, { "epoch": 637.6315789473684, "grad_norm": 1.7736845016479492, "learning_rate": 0.0001, "loss": 0.0128, "step": 96920 }, { "epoch": 637.6973684210526, "grad_norm": 1.7474267482757568, "learning_rate": 0.0001, "loss": 0.0162, "step": 96930 }, { "epoch": 637.7631578947369, "grad_norm": 1.582843542098999, "learning_rate": 0.0001, "loss": 0.0142, "step": 96940 }, { "epoch": 637.828947368421, "grad_norm": 1.7385975122451782, "learning_rate": 0.0001, "loss": 0.0165, "step": 96950 }, { "epoch": 637.8947368421053, "grad_norm": 1.5427277088165283, "learning_rate": 0.0001, "loss": 0.0126, "step": 96960 }, { "epoch": 637.9605263157895, "grad_norm": 1.6851274967193604, "learning_rate": 0.0001, "loss": 0.0132, "step": 96970 }, { "epoch": 638.0263157894736, "grad_norm": 1.3525211811065674, "learning_rate": 0.0001, "loss": 0.0162, "step": 96980 }, { "epoch": 638.0921052631579, "grad_norm": 1.31646728515625, "learning_rate": 0.0001, "loss": 0.0124, "step": 96990 }, { "epoch": 638.1578947368421, "grad_norm": 1.8184192180633545, "learning_rate": 0.0001, "loss": 0.0147, "step": 97000 }, { "epoch": 638.2236842105264, "grad_norm": 1.1422711610794067, "learning_rate": 0.0001, "loss": 0.0124, "step": 97010 }, { "epoch": 638.2894736842105, "grad_norm": 1.6575555801391602, "learning_rate": 0.0001, "loss": 0.0178, "step": 97020 }, { "epoch": 638.3552631578947, "grad_norm": 1.710818886756897, "learning_rate": 0.0001, "loss": 0.0125, "step": 97030 }, { "epoch": 638.421052631579, "grad_norm": 1.5783318281173706, "learning_rate": 0.0001, "loss": 0.0134, "step": 97040 }, { "epoch": 638.4868421052631, "grad_norm": 1.4614591598510742, "learning_rate": 0.0001, "loss": 0.0127, "step": 97050 }, { "epoch": 638.5526315789474, "grad_norm": 1.6170120239257812, "learning_rate": 0.0001, "loss": 0.0169, "step": 97060 }, { "epoch": 638.6184210526316, "grad_norm": 1.6675089597702026, "learning_rate": 0.0001, "loss": 0.0122, "step": 97070 }, { "epoch": 638.6842105263158, "grad_norm": 1.594895839691162, "learning_rate": 0.0001, "loss": 0.0162, "step": 97080 }, { "epoch": 638.75, "grad_norm": 1.6648448705673218, "learning_rate": 0.0001, "loss": 0.015, "step": 97090 }, { "epoch": 638.8157894736842, "grad_norm": 1.6519887447357178, "learning_rate": 0.0001, "loss": 0.0143, "step": 97100 }, { "epoch": 638.8815789473684, "grad_norm": 1.7532795667648315, "learning_rate": 0.0001, "loss": 0.0134, "step": 97110 }, { "epoch": 638.9473684210526, "grad_norm": 1.6914961338043213, "learning_rate": 0.0001, "loss": 0.0098, "step": 97120 }, { "epoch": 639.0131578947369, "grad_norm": 1.0127583742141724, "learning_rate": 0.0001, "loss": 0.0143, "step": 97130 }, { "epoch": 639.078947368421, "grad_norm": 1.180738925933838, "learning_rate": 0.0001, "loss": 0.012, "step": 97140 }, { "epoch": 639.1447368421053, "grad_norm": 1.5282714366912842, "learning_rate": 0.0001, "loss": 0.0134, "step": 97150 }, { "epoch": 639.2105263157895, "grad_norm": 1.5325016975402832, "learning_rate": 0.0001, "loss": 0.0168, "step": 97160 }, { "epoch": 639.2763157894736, "grad_norm": 1.2519947290420532, "learning_rate": 0.0001, "loss": 0.0163, "step": 97170 }, { "epoch": 639.3421052631579, "grad_norm": 1.9383831024169922, "learning_rate": 0.0001, "loss": 0.0205, "step": 97180 }, { "epoch": 639.4078947368421, "grad_norm": 1.312631607055664, "learning_rate": 0.0001, "loss": 0.0127, "step": 97190 }, { "epoch": 639.4736842105264, "grad_norm": 1.3426138162612915, "learning_rate": 0.0001, "loss": 0.0144, "step": 97200 }, { "epoch": 639.5394736842105, "grad_norm": 1.5085958242416382, "learning_rate": 0.0001, "loss": 0.0111, "step": 97210 }, { "epoch": 639.6052631578947, "grad_norm": 1.7219374179840088, "learning_rate": 0.0001, "loss": 0.0148, "step": 97220 }, { "epoch": 639.671052631579, "grad_norm": 1.7564787864685059, "learning_rate": 0.0001, "loss": 0.0127, "step": 97230 }, { "epoch": 639.7368421052631, "grad_norm": 1.5815284252166748, "learning_rate": 0.0001, "loss": 0.0119, "step": 97240 }, { "epoch": 639.8026315789474, "grad_norm": 1.6150422096252441, "learning_rate": 0.0001, "loss": 0.0122, "step": 97250 }, { "epoch": 639.8684210526316, "grad_norm": 1.5414516925811768, "learning_rate": 0.0001, "loss": 0.0158, "step": 97260 }, { "epoch": 639.9342105263158, "grad_norm": 1.812207579612732, "learning_rate": 0.0001, "loss": 0.0143, "step": 97270 }, { "epoch": 640.0, "grad_norm": 1.9364979267120361, "learning_rate": 0.0001, "loss": 0.0104, "step": 97280 }, { "epoch": 640.0657894736842, "grad_norm": 1.3147571086883545, "learning_rate": 0.0001, "loss": 0.0154, "step": 97290 }, { "epoch": 640.1315789473684, "grad_norm": 1.6455888748168945, "learning_rate": 0.0001, "loss": 0.0127, "step": 97300 }, { "epoch": 640.1973684210526, "grad_norm": 1.5908089876174927, "learning_rate": 0.0001, "loss": 0.0114, "step": 97310 }, { "epoch": 640.2631578947369, "grad_norm": 1.6563081741333008, "learning_rate": 0.0001, "loss": 0.0165, "step": 97320 }, { "epoch": 640.328947368421, "grad_norm": 1.1534498929977417, "learning_rate": 0.0001, "loss": 0.0105, "step": 97330 }, { "epoch": 640.3947368421053, "grad_norm": 1.5748350620269775, "learning_rate": 0.0001, "loss": 0.0139, "step": 97340 }, { "epoch": 640.4605263157895, "grad_norm": 1.4938029050827026, "learning_rate": 0.0001, "loss": 0.0127, "step": 97350 }, { "epoch": 640.5263157894736, "grad_norm": 1.053973913192749, "learning_rate": 0.0001, "loss": 0.0141, "step": 97360 }, { "epoch": 640.5921052631579, "grad_norm": 1.1920907497406006, "learning_rate": 0.0001, "loss": 0.0154, "step": 97370 }, { "epoch": 640.6578947368421, "grad_norm": 1.3373732566833496, "learning_rate": 0.0001, "loss": 0.0175, "step": 97380 }, { "epoch": 640.7236842105264, "grad_norm": 1.3628710508346558, "learning_rate": 0.0001, "loss": 0.0183, "step": 97390 }, { "epoch": 640.7894736842105, "grad_norm": 1.6499195098876953, "learning_rate": 0.0001, "loss": 0.0172, "step": 97400 }, { "epoch": 640.8552631578947, "grad_norm": 1.8398059606552124, "learning_rate": 0.0001, "loss": 0.0112, "step": 97410 }, { "epoch": 640.921052631579, "grad_norm": 1.3841995000839233, "learning_rate": 0.0001, "loss": 0.0109, "step": 97420 }, { "epoch": 640.9868421052631, "grad_norm": 1.7290416955947876, "learning_rate": 0.0001, "loss": 0.0127, "step": 97430 }, { "epoch": 641.0526315789474, "grad_norm": 1.1730364561080933, "learning_rate": 0.0001, "loss": 0.017, "step": 97440 }, { "epoch": 641.1184210526316, "grad_norm": 1.631635308265686, "learning_rate": 0.0001, "loss": 0.0118, "step": 97450 }, { "epoch": 641.1842105263158, "grad_norm": 1.5255568027496338, "learning_rate": 0.0001, "loss": 0.0122, "step": 97460 }, { "epoch": 641.25, "grad_norm": 1.5991746187210083, "learning_rate": 0.0001, "loss": 0.0128, "step": 97470 }, { "epoch": 641.3157894736842, "grad_norm": 2.079037666320801, "learning_rate": 0.0001, "loss": 0.0119, "step": 97480 }, { "epoch": 641.3815789473684, "grad_norm": 2.158939838409424, "learning_rate": 0.0001, "loss": 0.0145, "step": 97490 }, { "epoch": 641.4473684210526, "grad_norm": 1.8053075075149536, "learning_rate": 0.0001, "loss": 0.0126, "step": 97500 }, { "epoch": 641.5131578947369, "grad_norm": 1.3606595993041992, "learning_rate": 0.0001, "loss": 0.0117, "step": 97510 }, { "epoch": 641.578947368421, "grad_norm": 1.487082839012146, "learning_rate": 0.0001, "loss": 0.0154, "step": 97520 }, { "epoch": 641.6447368421053, "grad_norm": 1.8967869281768799, "learning_rate": 0.0001, "loss": 0.016, "step": 97530 }, { "epoch": 641.7105263157895, "grad_norm": 3.0673394203186035, "learning_rate": 0.0001, "loss": 0.012, "step": 97540 }, { "epoch": 641.7763157894736, "grad_norm": 1.9299070835113525, "learning_rate": 0.0001, "loss": 0.0163, "step": 97550 }, { "epoch": 641.8421052631579, "grad_norm": 1.4036179780960083, "learning_rate": 0.0001, "loss": 0.0138, "step": 97560 }, { "epoch": 641.9078947368421, "grad_norm": 1.4683327674865723, "learning_rate": 0.0001, "loss": 0.0141, "step": 97570 }, { "epoch": 641.9736842105264, "grad_norm": 1.379162311553955, "learning_rate": 0.0001, "loss": 0.0154, "step": 97580 }, { "epoch": 642.0394736842105, "grad_norm": 1.5018597841262817, "learning_rate": 0.0001, "loss": 0.0135, "step": 97590 }, { "epoch": 642.1052631578947, "grad_norm": 1.4235798120498657, "learning_rate": 0.0001, "loss": 0.0131, "step": 97600 }, { "epoch": 642.171052631579, "grad_norm": 1.3677996397018433, "learning_rate": 0.0001, "loss": 0.0148, "step": 97610 }, { "epoch": 642.2368421052631, "grad_norm": 1.4880130290985107, "learning_rate": 0.0001, "loss": 0.0177, "step": 97620 }, { "epoch": 642.3026315789474, "grad_norm": 1.9231739044189453, "learning_rate": 0.0001, "loss": 0.0129, "step": 97630 }, { "epoch": 642.3684210526316, "grad_norm": 1.5820367336273193, "learning_rate": 0.0001, "loss": 0.0201, "step": 97640 }, { "epoch": 642.4342105263158, "grad_norm": 1.5721684694290161, "learning_rate": 0.0001, "loss": 0.0122, "step": 97650 }, { "epoch": 642.5, "grad_norm": 1.9338840246200562, "learning_rate": 0.0001, "loss": 0.0127, "step": 97660 }, { "epoch": 642.5657894736842, "grad_norm": 1.891754150390625, "learning_rate": 0.0001, "loss": 0.0134, "step": 97670 }, { "epoch": 642.6315789473684, "grad_norm": 1.6350501775741577, "learning_rate": 0.0001, "loss": 0.0154, "step": 97680 }, { "epoch": 642.6973684210526, "grad_norm": 1.8301894664764404, "learning_rate": 0.0001, "loss": 0.013, "step": 97690 }, { "epoch": 642.7631578947369, "grad_norm": 1.4198501110076904, "learning_rate": 0.0001, "loss": 0.0141, "step": 97700 }, { "epoch": 642.828947368421, "grad_norm": 1.3381602764129639, "learning_rate": 0.0001, "loss": 0.0108, "step": 97710 }, { "epoch": 642.8947368421053, "grad_norm": 1.5247653722763062, "learning_rate": 0.0001, "loss": 0.0139, "step": 97720 }, { "epoch": 642.9605263157895, "grad_norm": 1.806050181388855, "learning_rate": 0.0001, "loss": 0.0142, "step": 97730 }, { "epoch": 643.0263157894736, "grad_norm": 1.5309646129608154, "learning_rate": 0.0001, "loss": 0.0147, "step": 97740 }, { "epoch": 643.0921052631579, "grad_norm": 1.407538652420044, "learning_rate": 0.0001, "loss": 0.0109, "step": 97750 }, { "epoch": 643.1578947368421, "grad_norm": 1.3065599203109741, "learning_rate": 0.0001, "loss": 0.0144, "step": 97760 }, { "epoch": 643.2236842105264, "grad_norm": 1.2762681245803833, "learning_rate": 0.0001, "loss": 0.0123, "step": 97770 }, { "epoch": 643.2894736842105, "grad_norm": 2.2645392417907715, "learning_rate": 0.0001, "loss": 0.0129, "step": 97780 }, { "epoch": 643.3552631578947, "grad_norm": 1.5414012670516968, "learning_rate": 0.0001, "loss": 0.0113, "step": 97790 }, { "epoch": 643.421052631579, "grad_norm": 1.5669469833374023, "learning_rate": 0.0001, "loss": 0.0128, "step": 97800 }, { "epoch": 643.4868421052631, "grad_norm": 1.4336634874343872, "learning_rate": 0.0001, "loss": 0.0142, "step": 97810 }, { "epoch": 643.5526315789474, "grad_norm": 1.3889977931976318, "learning_rate": 0.0001, "loss": 0.0175, "step": 97820 }, { "epoch": 643.6184210526316, "grad_norm": 1.8637906312942505, "learning_rate": 0.0001, "loss": 0.0168, "step": 97830 }, { "epoch": 643.6842105263158, "grad_norm": 1.0799930095672607, "learning_rate": 0.0001, "loss": 0.0163, "step": 97840 }, { "epoch": 643.75, "grad_norm": 1.1487760543823242, "learning_rate": 0.0001, "loss": 0.0114, "step": 97850 }, { "epoch": 643.8157894736842, "grad_norm": 1.7201297283172607, "learning_rate": 0.0001, "loss": 0.0166, "step": 97860 }, { "epoch": 643.8815789473684, "grad_norm": 1.9286941289901733, "learning_rate": 0.0001, "loss": 0.0176, "step": 97870 }, { "epoch": 643.9473684210526, "grad_norm": 1.8106156587600708, "learning_rate": 0.0001, "loss": 0.014, "step": 97880 }, { "epoch": 644.0131578947369, "grad_norm": 1.3187024593353271, "learning_rate": 0.0001, "loss": 0.0142, "step": 97890 }, { "epoch": 644.078947368421, "grad_norm": 1.7303036451339722, "learning_rate": 0.0001, "loss": 0.0196, "step": 97900 }, { "epoch": 644.1447368421053, "grad_norm": 1.8786617517471313, "learning_rate": 0.0001, "loss": 0.0118, "step": 97910 }, { "epoch": 644.2105263157895, "grad_norm": 1.6653996706008911, "learning_rate": 0.0001, "loss": 0.0135, "step": 97920 }, { "epoch": 644.2763157894736, "grad_norm": 1.8623236417770386, "learning_rate": 0.0001, "loss": 0.0132, "step": 97930 }, { "epoch": 644.3421052631579, "grad_norm": 1.6048425436019897, "learning_rate": 0.0001, "loss": 0.0186, "step": 97940 }, { "epoch": 644.4078947368421, "grad_norm": 1.7216517925262451, "learning_rate": 0.0001, "loss": 0.0113, "step": 97950 }, { "epoch": 644.4736842105264, "grad_norm": 1.0172786712646484, "learning_rate": 0.0001, "loss": 0.0136, "step": 97960 }, { "epoch": 644.5394736842105, "grad_norm": 1.3484917879104614, "learning_rate": 0.0001, "loss": 0.0102, "step": 97970 }, { "epoch": 644.6052631578947, "grad_norm": 1.4222803115844727, "learning_rate": 0.0001, "loss": 0.0139, "step": 97980 }, { "epoch": 644.671052631579, "grad_norm": 1.4334046840667725, "learning_rate": 0.0001, "loss": 0.0156, "step": 97990 }, { "epoch": 644.7368421052631, "grad_norm": 1.5825092792510986, "learning_rate": 0.0001, "loss": 0.0102, "step": 98000 }, { "epoch": 644.8026315789474, "grad_norm": 1.6854428052902222, "learning_rate": 0.0001, "loss": 0.0148, "step": 98010 }, { "epoch": 644.8684210526316, "grad_norm": 1.4716030359268188, "learning_rate": 0.0001, "loss": 0.0178, "step": 98020 }, { "epoch": 644.9342105263158, "grad_norm": 2.007645845413208, "learning_rate": 0.0001, "loss": 0.0126, "step": 98030 }, { "epoch": 645.0, "grad_norm": 1.3001545667648315, "learning_rate": 0.0001, "loss": 0.0151, "step": 98040 }, { "epoch": 645.0657894736842, "grad_norm": 1.6635832786560059, "learning_rate": 0.0001, "loss": 0.0142, "step": 98050 }, { "epoch": 645.1315789473684, "grad_norm": 1.3984086513519287, "learning_rate": 0.0001, "loss": 0.0155, "step": 98060 }, { "epoch": 645.1973684210526, "grad_norm": 1.1921745538711548, "learning_rate": 0.0001, "loss": 0.0135, "step": 98070 }, { "epoch": 645.2631578947369, "grad_norm": 1.1333657503128052, "learning_rate": 0.0001, "loss": 0.0127, "step": 98080 }, { "epoch": 645.328947368421, "grad_norm": 1.1271134614944458, "learning_rate": 0.0001, "loss": 0.0126, "step": 98090 }, { "epoch": 645.3947368421053, "grad_norm": 1.686841368675232, "learning_rate": 0.0001, "loss": 0.0215, "step": 98100 }, { "epoch": 645.4605263157895, "grad_norm": 1.7706865072250366, "learning_rate": 0.0001, "loss": 0.0126, "step": 98110 }, { "epoch": 645.5263157894736, "grad_norm": 1.799390196800232, "learning_rate": 0.0001, "loss": 0.0182, "step": 98120 }, { "epoch": 645.5921052631579, "grad_norm": 1.705464243888855, "learning_rate": 0.0001, "loss": 0.0164, "step": 98130 }, { "epoch": 645.6578947368421, "grad_norm": 1.6376920938491821, "learning_rate": 0.0001, "loss": 0.0122, "step": 98140 }, { "epoch": 645.7236842105264, "grad_norm": 1.5638222694396973, "learning_rate": 0.0001, "loss": 0.0127, "step": 98150 }, { "epoch": 645.7894736842105, "grad_norm": 1.5775651931762695, "learning_rate": 0.0001, "loss": 0.0139, "step": 98160 }, { "epoch": 645.8552631578947, "grad_norm": 1.084963321685791, "learning_rate": 0.0001, "loss": 0.0111, "step": 98170 }, { "epoch": 645.921052631579, "grad_norm": 1.1831392049789429, "learning_rate": 0.0001, "loss": 0.0141, "step": 98180 }, { "epoch": 645.9868421052631, "grad_norm": 1.226335048675537, "learning_rate": 0.0001, "loss": 0.018, "step": 98190 }, { "epoch": 646.0526315789474, "grad_norm": 1.4894065856933594, "learning_rate": 0.0001, "loss": 0.014, "step": 98200 }, { "epoch": 646.1184210526316, "grad_norm": 1.3023427724838257, "learning_rate": 0.0001, "loss": 0.0129, "step": 98210 }, { "epoch": 646.1842105263158, "grad_norm": 1.4644712209701538, "learning_rate": 0.0001, "loss": 0.019, "step": 98220 }, { "epoch": 646.25, "grad_norm": 1.340895414352417, "learning_rate": 0.0001, "loss": 0.0111, "step": 98230 }, { "epoch": 646.3157894736842, "grad_norm": 1.28600013256073, "learning_rate": 0.0001, "loss": 0.0126, "step": 98240 }, { "epoch": 646.3815789473684, "grad_norm": 1.4328070878982544, "learning_rate": 0.0001, "loss": 0.0147, "step": 98250 }, { "epoch": 646.4473684210526, "grad_norm": 1.5184048414230347, "learning_rate": 0.0001, "loss": 0.0132, "step": 98260 }, { "epoch": 646.5131578947369, "grad_norm": 0.9938166737556458, "learning_rate": 0.0001, "loss": 0.0121, "step": 98270 }, { "epoch": 646.578947368421, "grad_norm": 1.2083964347839355, "learning_rate": 0.0001, "loss": 0.0142, "step": 98280 }, { "epoch": 646.6447368421053, "grad_norm": 1.711020827293396, "learning_rate": 0.0001, "loss": 0.0147, "step": 98290 }, { "epoch": 646.7105263157895, "grad_norm": 1.2086596488952637, "learning_rate": 0.0001, "loss": 0.0183, "step": 98300 }, { "epoch": 646.7763157894736, "grad_norm": 1.8373183012008667, "learning_rate": 0.0001, "loss": 0.0142, "step": 98310 }, { "epoch": 646.8421052631579, "grad_norm": 1.9289220571517944, "learning_rate": 0.0001, "loss": 0.0135, "step": 98320 }, { "epoch": 646.9078947368421, "grad_norm": 1.115249752998352, "learning_rate": 0.0001, "loss": 0.0152, "step": 98330 }, { "epoch": 646.9736842105264, "grad_norm": 1.254502534866333, "learning_rate": 0.0001, "loss": 0.0174, "step": 98340 }, { "epoch": 647.0394736842105, "grad_norm": 1.1338419914245605, "learning_rate": 0.0001, "loss": 0.0139, "step": 98350 }, { "epoch": 647.1052631578947, "grad_norm": 1.6829938888549805, "learning_rate": 0.0001, "loss": 0.0148, "step": 98360 }, { "epoch": 647.171052631579, "grad_norm": 1.2407395839691162, "learning_rate": 0.0001, "loss": 0.0161, "step": 98370 }, { "epoch": 647.2368421052631, "grad_norm": 1.2323788404464722, "learning_rate": 0.0001, "loss": 0.0164, "step": 98380 }, { "epoch": 647.3026315789474, "grad_norm": 0.9293558597564697, "learning_rate": 0.0001, "loss": 0.0107, "step": 98390 }, { "epoch": 647.3684210526316, "grad_norm": 1.3367559909820557, "learning_rate": 0.0001, "loss": 0.0165, "step": 98400 }, { "epoch": 647.4342105263158, "grad_norm": 1.4633709192276, "learning_rate": 0.0001, "loss": 0.0151, "step": 98410 }, { "epoch": 647.5, "grad_norm": 1.2761743068695068, "learning_rate": 0.0001, "loss": 0.0118, "step": 98420 }, { "epoch": 647.5657894736842, "grad_norm": 1.2658240795135498, "learning_rate": 0.0001, "loss": 0.014, "step": 98430 }, { "epoch": 647.6315789473684, "grad_norm": 1.329299807548523, "learning_rate": 0.0001, "loss": 0.0122, "step": 98440 }, { "epoch": 647.6973684210526, "grad_norm": 1.6623252630233765, "learning_rate": 0.0001, "loss": 0.017, "step": 98450 }, { "epoch": 647.7631578947369, "grad_norm": 1.7295993566513062, "learning_rate": 0.0001, "loss": 0.014, "step": 98460 }, { "epoch": 647.828947368421, "grad_norm": 1.4347811937332153, "learning_rate": 0.0001, "loss": 0.0123, "step": 98470 }, { "epoch": 647.8947368421053, "grad_norm": 1.5048797130584717, "learning_rate": 0.0001, "loss": 0.0106, "step": 98480 }, { "epoch": 647.9605263157895, "grad_norm": 1.7771178483963013, "learning_rate": 0.0001, "loss": 0.0145, "step": 98490 }, { "epoch": 648.0263157894736, "grad_norm": 1.4180512428283691, "learning_rate": 0.0001, "loss": 0.0204, "step": 98500 }, { "epoch": 648.0921052631579, "grad_norm": 1.4586045742034912, "learning_rate": 0.0001, "loss": 0.0171, "step": 98510 }, { "epoch": 648.1578947368421, "grad_norm": 1.6425726413726807, "learning_rate": 0.0001, "loss": 0.0118, "step": 98520 }, { "epoch": 648.2236842105264, "grad_norm": 1.455430030822754, "learning_rate": 0.0001, "loss": 0.0125, "step": 98530 }, { "epoch": 648.2894736842105, "grad_norm": 1.414422631263733, "learning_rate": 0.0001, "loss": 0.0126, "step": 98540 }, { "epoch": 648.3552631578947, "grad_norm": 1.3623816967010498, "learning_rate": 0.0001, "loss": 0.0143, "step": 98550 }, { "epoch": 648.421052631579, "grad_norm": 1.4979503154754639, "learning_rate": 0.0001, "loss": 0.014, "step": 98560 }, { "epoch": 648.4868421052631, "grad_norm": 1.4568527936935425, "learning_rate": 0.0001, "loss": 0.019, "step": 98570 }, { "epoch": 648.5526315789474, "grad_norm": 1.2658889293670654, "learning_rate": 0.0001, "loss": 0.0151, "step": 98580 }, { "epoch": 648.6184210526316, "grad_norm": 1.7887870073318481, "learning_rate": 0.0001, "loss": 0.0107, "step": 98590 }, { "epoch": 648.6842105263158, "grad_norm": 1.8330589532852173, "learning_rate": 0.0001, "loss": 0.0152, "step": 98600 }, { "epoch": 648.75, "grad_norm": 1.5234498977661133, "learning_rate": 0.0001, "loss": 0.0146, "step": 98610 }, { "epoch": 648.8157894736842, "grad_norm": 1.9076141119003296, "learning_rate": 0.0001, "loss": 0.0133, "step": 98620 }, { "epoch": 648.8815789473684, "grad_norm": 1.67471444606781, "learning_rate": 0.0001, "loss": 0.0163, "step": 98630 }, { "epoch": 648.9473684210526, "grad_norm": 1.7850143909454346, "learning_rate": 0.0001, "loss": 0.0116, "step": 98640 }, { "epoch": 649.0131578947369, "grad_norm": 1.4982386827468872, "learning_rate": 0.0001, "loss": 0.0156, "step": 98650 }, { "epoch": 649.078947368421, "grad_norm": 1.4330147504806519, "learning_rate": 0.0001, "loss": 0.0109, "step": 98660 }, { "epoch": 649.1447368421053, "grad_norm": 1.7099114656448364, "learning_rate": 0.0001, "loss": 0.017, "step": 98670 }, { "epoch": 649.2105263157895, "grad_norm": 1.5588148832321167, "learning_rate": 0.0001, "loss": 0.0113, "step": 98680 }, { "epoch": 649.2763157894736, "grad_norm": 1.6291265487670898, "learning_rate": 0.0001, "loss": 0.014, "step": 98690 }, { "epoch": 649.3421052631579, "grad_norm": 1.620471477508545, "learning_rate": 0.0001, "loss": 0.012, "step": 98700 }, { "epoch": 649.4078947368421, "grad_norm": 1.6654231548309326, "learning_rate": 0.0001, "loss": 0.0128, "step": 98710 }, { "epoch": 649.4736842105264, "grad_norm": 1.5054638385772705, "learning_rate": 0.0001, "loss": 0.0174, "step": 98720 }, { "epoch": 649.5394736842105, "grad_norm": 1.763431191444397, "learning_rate": 0.0001, "loss": 0.0141, "step": 98730 }, { "epoch": 649.6052631578947, "grad_norm": 1.8181591033935547, "learning_rate": 0.0001, "loss": 0.0201, "step": 98740 }, { "epoch": 649.671052631579, "grad_norm": 1.7035443782806396, "learning_rate": 0.0001, "loss": 0.014, "step": 98750 }, { "epoch": 649.7368421052631, "grad_norm": 1.368418574333191, "learning_rate": 0.0001, "loss": 0.0151, "step": 98760 }, { "epoch": 649.8026315789474, "grad_norm": 1.6879395246505737, "learning_rate": 0.0001, "loss": 0.014, "step": 98770 }, { "epoch": 649.8684210526316, "grad_norm": 1.4662857055664062, "learning_rate": 0.0001, "loss": 0.0139, "step": 98780 }, { "epoch": 649.9342105263158, "grad_norm": 1.803283452987671, "learning_rate": 0.0001, "loss": 0.013, "step": 98790 }, { "epoch": 650.0, "grad_norm": 1.3631783723831177, "learning_rate": 0.0001, "loss": 0.0121, "step": 98800 }, { "epoch": 650.0657894736842, "grad_norm": 0.9901971220970154, "learning_rate": 0.0001, "loss": 0.0169, "step": 98810 }, { "epoch": 650.1315789473684, "grad_norm": 0.8920199871063232, "learning_rate": 0.0001, "loss": 0.0139, "step": 98820 }, { "epoch": 650.1973684210526, "grad_norm": 1.0951554775238037, "learning_rate": 0.0001, "loss": 0.0157, "step": 98830 }, { "epoch": 650.2631578947369, "grad_norm": 1.5200912952423096, "learning_rate": 0.0001, "loss": 0.0127, "step": 98840 }, { "epoch": 650.328947368421, "grad_norm": 1.581021785736084, "learning_rate": 0.0001, "loss": 0.013, "step": 98850 }, { "epoch": 650.3947368421053, "grad_norm": 1.3074588775634766, "learning_rate": 0.0001, "loss": 0.0151, "step": 98860 }, { "epoch": 650.4605263157895, "grad_norm": 1.7947031259536743, "learning_rate": 0.0001, "loss": 0.0177, "step": 98870 }, { "epoch": 650.5263157894736, "grad_norm": 1.4725512266159058, "learning_rate": 0.0001, "loss": 0.0141, "step": 98880 }, { "epoch": 650.5921052631579, "grad_norm": 1.4197663068771362, "learning_rate": 0.0001, "loss": 0.012, "step": 98890 }, { "epoch": 650.6578947368421, "grad_norm": 1.0327662229537964, "learning_rate": 0.0001, "loss": 0.0176, "step": 98900 }, { "epoch": 650.7236842105264, "grad_norm": 1.446526288986206, "learning_rate": 0.0001, "loss": 0.013, "step": 98910 }, { "epoch": 650.7894736842105, "grad_norm": 1.0819199085235596, "learning_rate": 0.0001, "loss": 0.0115, "step": 98920 }, { "epoch": 650.8552631578947, "grad_norm": 1.376663327217102, "learning_rate": 0.0001, "loss": 0.0127, "step": 98930 }, { "epoch": 650.921052631579, "grad_norm": 1.4842361211776733, "learning_rate": 0.0001, "loss": 0.0122, "step": 98940 }, { "epoch": 650.9868421052631, "grad_norm": 1.2470808029174805, "learning_rate": 0.0001, "loss": 0.0151, "step": 98950 }, { "epoch": 651.0526315789474, "grad_norm": 1.5150947570800781, "learning_rate": 0.0001, "loss": 0.0125, "step": 98960 }, { "epoch": 651.1184210526316, "grad_norm": 1.4211759567260742, "learning_rate": 0.0001, "loss": 0.0115, "step": 98970 }, { "epoch": 651.1842105263158, "grad_norm": 1.322890281677246, "learning_rate": 0.0001, "loss": 0.0138, "step": 98980 }, { "epoch": 651.25, "grad_norm": 1.239020824432373, "learning_rate": 0.0001, "loss": 0.0145, "step": 98990 }, { "epoch": 651.3157894736842, "grad_norm": 1.2672350406646729, "learning_rate": 0.0001, "loss": 0.0162, "step": 99000 }, { "epoch": 651.3815789473684, "grad_norm": 1.1051305532455444, "learning_rate": 0.0001, "loss": 0.0147, "step": 99010 }, { "epoch": 651.4473684210526, "grad_norm": 1.1238877773284912, "learning_rate": 0.0001, "loss": 0.0117, "step": 99020 }, { "epoch": 651.5131578947369, "grad_norm": 1.1392898559570312, "learning_rate": 0.0001, "loss": 0.0151, "step": 99030 }, { "epoch": 651.578947368421, "grad_norm": 1.3655987977981567, "learning_rate": 0.0001, "loss": 0.0149, "step": 99040 }, { "epoch": 651.6447368421053, "grad_norm": 1.5106006860733032, "learning_rate": 0.0001, "loss": 0.0152, "step": 99050 }, { "epoch": 651.7105263157895, "grad_norm": 1.4813565015792847, "learning_rate": 0.0001, "loss": 0.0149, "step": 99060 }, { "epoch": 651.7763157894736, "grad_norm": 1.450028896331787, "learning_rate": 0.0001, "loss": 0.0135, "step": 99070 }, { "epoch": 651.8421052631579, "grad_norm": 1.597104787826538, "learning_rate": 0.0001, "loss": 0.0163, "step": 99080 }, { "epoch": 651.9078947368421, "grad_norm": 1.0357555150985718, "learning_rate": 0.0001, "loss": 0.0136, "step": 99090 }, { "epoch": 651.9736842105264, "grad_norm": 1.3253254890441895, "learning_rate": 0.0001, "loss": 0.0159, "step": 99100 }, { "epoch": 652.0394736842105, "grad_norm": 1.4230469465255737, "learning_rate": 0.0001, "loss": 0.0154, "step": 99110 }, { "epoch": 652.1052631578947, "grad_norm": 1.4875531196594238, "learning_rate": 0.0001, "loss": 0.0141, "step": 99120 }, { "epoch": 652.171052631579, "grad_norm": 1.6215685606002808, "learning_rate": 0.0001, "loss": 0.0124, "step": 99130 }, { "epoch": 652.2368421052631, "grad_norm": 1.0999730825424194, "learning_rate": 0.0001, "loss": 0.0147, "step": 99140 }, { "epoch": 652.3026315789474, "grad_norm": 1.3480134010314941, "learning_rate": 0.0001, "loss": 0.0137, "step": 99150 }, { "epoch": 652.3684210526316, "grad_norm": 1.0894657373428345, "learning_rate": 0.0001, "loss": 0.0162, "step": 99160 }, { "epoch": 652.4342105263158, "grad_norm": 1.363486886024475, "learning_rate": 0.0001, "loss": 0.0141, "step": 99170 }, { "epoch": 652.5, "grad_norm": 1.38773775100708, "learning_rate": 0.0001, "loss": 0.0149, "step": 99180 }, { "epoch": 652.5657894736842, "grad_norm": 1.9911456108093262, "learning_rate": 0.0001, "loss": 0.0128, "step": 99190 }, { "epoch": 652.6315789473684, "grad_norm": 1.5165098905563354, "learning_rate": 0.0001, "loss": 0.0137, "step": 99200 }, { "epoch": 652.6973684210526, "grad_norm": 1.665044903755188, "learning_rate": 0.0001, "loss": 0.0135, "step": 99210 }, { "epoch": 652.7631578947369, "grad_norm": 1.1982930898666382, "learning_rate": 0.0001, "loss": 0.0132, "step": 99220 }, { "epoch": 652.828947368421, "grad_norm": 1.1825675964355469, "learning_rate": 0.0001, "loss": 0.015, "step": 99230 }, { "epoch": 652.8947368421053, "grad_norm": 1.426061749458313, "learning_rate": 0.0001, "loss": 0.0157, "step": 99240 }, { "epoch": 652.9605263157895, "grad_norm": 1.2960582971572876, "learning_rate": 0.0001, "loss": 0.0146, "step": 99250 }, { "epoch": 653.0263157894736, "grad_norm": 0.9073168039321899, "learning_rate": 0.0001, "loss": 0.0116, "step": 99260 }, { "epoch": 653.0921052631579, "grad_norm": 0.8420706391334534, "learning_rate": 0.0001, "loss": 0.0129, "step": 99270 }, { "epoch": 653.1578947368421, "grad_norm": 1.3374481201171875, "learning_rate": 0.0001, "loss": 0.0122, "step": 99280 }, { "epoch": 653.2236842105264, "grad_norm": 1.5079525709152222, "learning_rate": 0.0001, "loss": 0.0147, "step": 99290 }, { "epoch": 653.2894736842105, "grad_norm": 1.5309754610061646, "learning_rate": 0.0001, "loss": 0.0135, "step": 99300 }, { "epoch": 653.3552631578947, "grad_norm": 1.5405062437057495, "learning_rate": 0.0001, "loss": 0.0118, "step": 99310 }, { "epoch": 653.421052631579, "grad_norm": 1.478661060333252, "learning_rate": 0.0001, "loss": 0.0137, "step": 99320 }, { "epoch": 653.4868421052631, "grad_norm": 1.630933165550232, "learning_rate": 0.0001, "loss": 0.0138, "step": 99330 }, { "epoch": 653.5526315789474, "grad_norm": 1.7978785037994385, "learning_rate": 0.0001, "loss": 0.0145, "step": 99340 }, { "epoch": 653.6184210526316, "grad_norm": 1.532563328742981, "learning_rate": 0.0001, "loss": 0.0145, "step": 99350 }, { "epoch": 653.6842105263158, "grad_norm": 1.7591814994812012, "learning_rate": 0.0001, "loss": 0.0198, "step": 99360 }, { "epoch": 653.75, "grad_norm": 1.6917716264724731, "learning_rate": 0.0001, "loss": 0.0133, "step": 99370 }, { "epoch": 653.8157894736842, "grad_norm": 1.5664466619491577, "learning_rate": 0.0001, "loss": 0.0187, "step": 99380 }, { "epoch": 653.8815789473684, "grad_norm": 1.5816140174865723, "learning_rate": 0.0001, "loss": 0.0173, "step": 99390 }, { "epoch": 653.9473684210526, "grad_norm": 2.1850013732910156, "learning_rate": 0.0001, "loss": 0.0141, "step": 99400 }, { "epoch": 654.0131578947369, "grad_norm": 1.4353842735290527, "learning_rate": 0.0001, "loss": 0.0143, "step": 99410 }, { "epoch": 654.078947368421, "grad_norm": 1.3499820232391357, "learning_rate": 0.0001, "loss": 0.0159, "step": 99420 }, { "epoch": 654.1447368421053, "grad_norm": 1.4665298461914062, "learning_rate": 0.0001, "loss": 0.0128, "step": 99430 }, { "epoch": 654.2105263157895, "grad_norm": 1.7134921550750732, "learning_rate": 0.0001, "loss": 0.0169, "step": 99440 }, { "epoch": 654.2763157894736, "grad_norm": 1.5984541177749634, "learning_rate": 0.0001, "loss": 0.0143, "step": 99450 }, { "epoch": 654.3421052631579, "grad_norm": 1.5557457208633423, "learning_rate": 0.0001, "loss": 0.0107, "step": 99460 }, { "epoch": 654.4078947368421, "grad_norm": 1.2628164291381836, "learning_rate": 0.0001, "loss": 0.0139, "step": 99470 }, { "epoch": 654.4736842105264, "grad_norm": 1.4418234825134277, "learning_rate": 0.0001, "loss": 0.0137, "step": 99480 }, { "epoch": 654.5394736842105, "grad_norm": 1.4705849885940552, "learning_rate": 0.0001, "loss": 0.0168, "step": 99490 }, { "epoch": 654.6052631578947, "grad_norm": 1.619503378868103, "learning_rate": 0.0001, "loss": 0.0151, "step": 99500 }, { "epoch": 654.671052631579, "grad_norm": 1.493683099746704, "learning_rate": 0.0001, "loss": 0.0138, "step": 99510 }, { "epoch": 654.7368421052631, "grad_norm": 1.251630425453186, "learning_rate": 0.0001, "loss": 0.0111, "step": 99520 }, { "epoch": 654.8026315789474, "grad_norm": 1.5336605310440063, "learning_rate": 0.0001, "loss": 0.0141, "step": 99530 }, { "epoch": 654.8684210526316, "grad_norm": 1.5149976015090942, "learning_rate": 0.0001, "loss": 0.0166, "step": 99540 }, { "epoch": 654.9342105263158, "grad_norm": 1.2904130220413208, "learning_rate": 0.0001, "loss": 0.0137, "step": 99550 }, { "epoch": 655.0, "grad_norm": 1.2314459085464478, "learning_rate": 0.0001, "loss": 0.0151, "step": 99560 }, { "epoch": 655.0657894736842, "grad_norm": 1.323135495185852, "learning_rate": 0.0001, "loss": 0.0113, "step": 99570 }, { "epoch": 655.1315789473684, "grad_norm": 1.6532052755355835, "learning_rate": 0.0001, "loss": 0.0126, "step": 99580 }, { "epoch": 655.1973684210526, "grad_norm": 1.7263647317886353, "learning_rate": 0.0001, "loss": 0.0136, "step": 99590 }, { "epoch": 655.2631578947369, "grad_norm": 1.4214309453964233, "learning_rate": 0.0001, "loss": 0.0153, "step": 99600 }, { "epoch": 655.328947368421, "grad_norm": 1.4629292488098145, "learning_rate": 0.0001, "loss": 0.0186, "step": 99610 }, { "epoch": 655.3947368421053, "grad_norm": 2.0028321743011475, "learning_rate": 0.0001, "loss": 0.0164, "step": 99620 }, { "epoch": 655.4605263157895, "grad_norm": 1.7603309154510498, "learning_rate": 0.0001, "loss": 0.0121, "step": 99630 }, { "epoch": 655.5263157894736, "grad_norm": 2.0371265411376953, "learning_rate": 0.0001, "loss": 0.0104, "step": 99640 }, { "epoch": 655.5921052631579, "grad_norm": 1.868977427482605, "learning_rate": 0.0001, "loss": 0.0167, "step": 99650 }, { "epoch": 655.6578947368421, "grad_norm": 1.2871769666671753, "learning_rate": 0.0001, "loss": 0.0126, "step": 99660 }, { "epoch": 655.7236842105264, "grad_norm": 1.5166733264923096, "learning_rate": 0.0001, "loss": 0.0124, "step": 99670 }, { "epoch": 655.7894736842105, "grad_norm": 2.0239808559417725, "learning_rate": 0.0001, "loss": 0.0148, "step": 99680 }, { "epoch": 655.8552631578947, "grad_norm": 1.8280515670776367, "learning_rate": 0.0001, "loss": 0.0127, "step": 99690 }, { "epoch": 655.921052631579, "grad_norm": 1.9800857305526733, "learning_rate": 0.0001, "loss": 0.0153, "step": 99700 }, { "epoch": 655.9868421052631, "grad_norm": 1.6099097728729248, "learning_rate": 0.0001, "loss": 0.0137, "step": 99710 }, { "epoch": 656.0526315789474, "grad_norm": 1.5974280834197998, "learning_rate": 0.0001, "loss": 0.0148, "step": 99720 }, { "epoch": 656.1184210526316, "grad_norm": 1.5241988897323608, "learning_rate": 0.0001, "loss": 0.0133, "step": 99730 }, { "epoch": 656.1842105263158, "grad_norm": 1.5846809148788452, "learning_rate": 0.0001, "loss": 0.013, "step": 99740 }, { "epoch": 656.25, "grad_norm": 1.6756348609924316, "learning_rate": 0.0001, "loss": 0.0138, "step": 99750 }, { "epoch": 656.3157894736842, "grad_norm": 1.4892830848693848, "learning_rate": 0.0001, "loss": 0.0122, "step": 99760 }, { "epoch": 656.3815789473684, "grad_norm": 1.2836188077926636, "learning_rate": 0.0001, "loss": 0.0127, "step": 99770 }, { "epoch": 656.4473684210526, "grad_norm": 1.3354846239089966, "learning_rate": 0.0001, "loss": 0.0149, "step": 99780 }, { "epoch": 656.5131578947369, "grad_norm": 1.8164206743240356, "learning_rate": 0.0001, "loss": 0.0152, "step": 99790 }, { "epoch": 656.578947368421, "grad_norm": 1.4122098684310913, "learning_rate": 0.0001, "loss": 0.0142, "step": 99800 }, { "epoch": 656.6447368421053, "grad_norm": 1.5861254930496216, "learning_rate": 0.0001, "loss": 0.0141, "step": 99810 }, { "epoch": 656.7105263157895, "grad_norm": 1.4165818691253662, "learning_rate": 0.0001, "loss": 0.0141, "step": 99820 }, { "epoch": 656.7763157894736, "grad_norm": 1.4023139476776123, "learning_rate": 0.0001, "loss": 0.0167, "step": 99830 }, { "epoch": 656.8421052631579, "grad_norm": 1.7014917135238647, "learning_rate": 0.0001, "loss": 0.0121, "step": 99840 }, { "epoch": 656.9078947368421, "grad_norm": 1.1533515453338623, "learning_rate": 0.0001, "loss": 0.016, "step": 99850 }, { "epoch": 656.9736842105264, "grad_norm": 0.9241900444030762, "learning_rate": 0.0001, "loss": 0.0135, "step": 99860 }, { "epoch": 657.0394736842105, "grad_norm": 1.3152748346328735, "learning_rate": 0.0001, "loss": 0.0109, "step": 99870 }, { "epoch": 657.1052631578947, "grad_norm": 1.1813493967056274, "learning_rate": 0.0001, "loss": 0.0134, "step": 99880 }, { "epoch": 657.171052631579, "grad_norm": 1.4383012056350708, "learning_rate": 0.0001, "loss": 0.0114, "step": 99890 }, { "epoch": 657.2368421052631, "grad_norm": 1.487945318222046, "learning_rate": 0.0001, "loss": 0.017, "step": 99900 }, { "epoch": 657.3026315789474, "grad_norm": 1.756786584854126, "learning_rate": 0.0001, "loss": 0.0104, "step": 99910 }, { "epoch": 657.3684210526316, "grad_norm": 1.5647450685501099, "learning_rate": 0.0001, "loss": 0.0112, "step": 99920 }, { "epoch": 657.4342105263158, "grad_norm": 1.5084104537963867, "learning_rate": 0.0001, "loss": 0.0158, "step": 99930 }, { "epoch": 657.5, "grad_norm": 1.5708245038986206, "learning_rate": 0.0001, "loss": 0.0192, "step": 99940 }, { "epoch": 657.5657894736842, "grad_norm": 1.4567352533340454, "learning_rate": 0.0001, "loss": 0.0164, "step": 99950 }, { "epoch": 657.6315789473684, "grad_norm": 1.115037202835083, "learning_rate": 0.0001, "loss": 0.0135, "step": 99960 }, { "epoch": 657.6973684210526, "grad_norm": 1.7519994974136353, "learning_rate": 0.0001, "loss": 0.0145, "step": 99970 }, { "epoch": 657.7631578947369, "grad_norm": 1.7191239595413208, "learning_rate": 0.0001, "loss": 0.0168, "step": 99980 }, { "epoch": 657.828947368421, "grad_norm": 1.1616376638412476, "learning_rate": 0.0001, "loss": 0.0134, "step": 99990 }, { "epoch": 657.8947368421053, "grad_norm": 1.6162021160125732, "learning_rate": 0.0001, "loss": 0.012, "step": 100000 }, { "epoch": 657.9605263157895, "grad_norm": 1.5904672145843506, "learning_rate": 0.0001, "loss": 0.0119, "step": 100010 }, { "epoch": 658.0263157894736, "grad_norm": 1.93049156665802, "learning_rate": 0.0001, "loss": 0.0159, "step": 100020 }, { "epoch": 658.0921052631579, "grad_norm": 1.5760831832885742, "learning_rate": 0.0001, "loss": 0.0117, "step": 100030 }, { "epoch": 658.1578947368421, "grad_norm": 1.399505615234375, "learning_rate": 0.0001, "loss": 0.0129, "step": 100040 }, { "epoch": 658.2236842105264, "grad_norm": 1.4150285720825195, "learning_rate": 0.0001, "loss": 0.0168, "step": 100050 }, { "epoch": 658.2894736842105, "grad_norm": 1.2088143825531006, "learning_rate": 0.0001, "loss": 0.0116, "step": 100060 }, { "epoch": 658.3552631578947, "grad_norm": 1.5761711597442627, "learning_rate": 0.0001, "loss": 0.0132, "step": 100070 }, { "epoch": 658.421052631579, "grad_norm": 1.2430332899093628, "learning_rate": 0.0001, "loss": 0.0112, "step": 100080 }, { "epoch": 658.4868421052631, "grad_norm": 1.3375636339187622, "learning_rate": 0.0001, "loss": 0.014, "step": 100090 }, { "epoch": 658.5526315789474, "grad_norm": 1.226406216621399, "learning_rate": 0.0001, "loss": 0.0186, "step": 100100 }, { "epoch": 658.6184210526316, "grad_norm": 1.4428435564041138, "learning_rate": 0.0001, "loss": 0.0161, "step": 100110 }, { "epoch": 658.6842105263158, "grad_norm": 1.3143362998962402, "learning_rate": 0.0001, "loss": 0.0157, "step": 100120 }, { "epoch": 658.75, "grad_norm": 1.6263707876205444, "learning_rate": 0.0001, "loss": 0.0146, "step": 100130 }, { "epoch": 658.8157894736842, "grad_norm": 2.020498752593994, "learning_rate": 0.0001, "loss": 0.0139, "step": 100140 }, { "epoch": 658.8815789473684, "grad_norm": 1.8654664754867554, "learning_rate": 0.0001, "loss": 0.0148, "step": 100150 }, { "epoch": 658.9473684210526, "grad_norm": 1.4486947059631348, "learning_rate": 0.0001, "loss": 0.0151, "step": 100160 }, { "epoch": 659.0131578947369, "grad_norm": 1.4107340574264526, "learning_rate": 0.0001, "loss": 0.0143, "step": 100170 }, { "epoch": 659.078947368421, "grad_norm": 1.5536203384399414, "learning_rate": 0.0001, "loss": 0.0125, "step": 100180 }, { "epoch": 659.1447368421053, "grad_norm": 1.6443750858306885, "learning_rate": 0.0001, "loss": 0.0144, "step": 100190 }, { "epoch": 659.2105263157895, "grad_norm": 1.8415921926498413, "learning_rate": 0.0001, "loss": 0.0144, "step": 100200 }, { "epoch": 659.2763157894736, "grad_norm": 1.4048024415969849, "learning_rate": 0.0001, "loss": 0.0113, "step": 100210 }, { "epoch": 659.3421052631579, "grad_norm": 1.5422619581222534, "learning_rate": 0.0001, "loss": 0.0136, "step": 100220 }, { "epoch": 659.4078947368421, "grad_norm": 1.4084168672561646, "learning_rate": 0.0001, "loss": 0.0134, "step": 100230 }, { "epoch": 659.4736842105264, "grad_norm": 1.0728864669799805, "learning_rate": 0.0001, "loss": 0.0149, "step": 100240 }, { "epoch": 659.5394736842105, "grad_norm": 1.243173599243164, "learning_rate": 0.0001, "loss": 0.0133, "step": 100250 }, { "epoch": 659.6052631578947, "grad_norm": 1.5850187540054321, "learning_rate": 0.0001, "loss": 0.0126, "step": 100260 }, { "epoch": 659.671052631579, "grad_norm": 1.3156620264053345, "learning_rate": 0.0001, "loss": 0.0161, "step": 100270 }, { "epoch": 659.7368421052631, "grad_norm": 1.1001430749893188, "learning_rate": 0.0001, "loss": 0.0146, "step": 100280 }, { "epoch": 659.8026315789474, "grad_norm": 1.7387642860412598, "learning_rate": 0.0001, "loss": 0.0143, "step": 100290 }, { "epoch": 659.8684210526316, "grad_norm": 1.7298007011413574, "learning_rate": 0.0001, "loss": 0.0122, "step": 100300 }, { "epoch": 659.9342105263158, "grad_norm": 1.6323493719100952, "learning_rate": 0.0001, "loss": 0.0159, "step": 100310 }, { "epoch": 660.0, "grad_norm": 1.4088869094848633, "learning_rate": 0.0001, "loss": 0.0157, "step": 100320 }, { "epoch": 660.0657894736842, "grad_norm": 1.3427406549453735, "learning_rate": 0.0001, "loss": 0.0111, "step": 100330 }, { "epoch": 660.1315789473684, "grad_norm": 1.3421337604522705, "learning_rate": 0.0001, "loss": 0.0116, "step": 100340 }, { "epoch": 660.1973684210526, "grad_norm": 1.5844635963439941, "learning_rate": 0.0001, "loss": 0.0154, "step": 100350 }, { "epoch": 660.2631578947369, "grad_norm": 1.3079715967178345, "learning_rate": 0.0001, "loss": 0.014, "step": 100360 }, { "epoch": 660.328947368421, "grad_norm": 1.0409802198410034, "learning_rate": 0.0001, "loss": 0.0137, "step": 100370 }, { "epoch": 660.3947368421053, "grad_norm": 1.503555178642273, "learning_rate": 0.0001, "loss": 0.0116, "step": 100380 }, { "epoch": 660.4605263157895, "grad_norm": 1.654420256614685, "learning_rate": 0.0001, "loss": 0.0157, "step": 100390 }, { "epoch": 660.5263157894736, "grad_norm": 1.3949792385101318, "learning_rate": 0.0001, "loss": 0.0148, "step": 100400 }, { "epoch": 660.5921052631579, "grad_norm": 1.700950026512146, "learning_rate": 0.0001, "loss": 0.0125, "step": 100410 }, { "epoch": 660.6578947368421, "grad_norm": 1.8208073377609253, "learning_rate": 0.0001, "loss": 0.0139, "step": 100420 }, { "epoch": 660.7236842105264, "grad_norm": 1.4685170650482178, "learning_rate": 0.0001, "loss": 0.015, "step": 100430 }, { "epoch": 660.7894736842105, "grad_norm": 1.4893543720245361, "learning_rate": 0.0001, "loss": 0.017, "step": 100440 }, { "epoch": 660.8552631578947, "grad_norm": 1.6849647760391235, "learning_rate": 0.0001, "loss": 0.0137, "step": 100450 }, { "epoch": 660.921052631579, "grad_norm": 1.6647247076034546, "learning_rate": 0.0001, "loss": 0.0164, "step": 100460 }, { "epoch": 660.9868421052631, "grad_norm": 1.7279733419418335, "learning_rate": 0.0001, "loss": 0.0168, "step": 100470 }, { "epoch": 661.0526315789474, "grad_norm": 2.1209347248077393, "learning_rate": 0.0001, "loss": 0.0101, "step": 100480 }, { "epoch": 661.1184210526316, "grad_norm": 1.7841423749923706, "learning_rate": 0.0001, "loss": 0.0222, "step": 100490 }, { "epoch": 661.1842105263158, "grad_norm": 1.6613913774490356, "learning_rate": 0.0001, "loss": 0.0121, "step": 100500 }, { "epoch": 661.25, "grad_norm": 1.185748815536499, "learning_rate": 0.0001, "loss": 0.0121, "step": 100510 }, { "epoch": 661.3157894736842, "grad_norm": 1.3320484161376953, "learning_rate": 0.0001, "loss": 0.0113, "step": 100520 }, { "epoch": 661.3815789473684, "grad_norm": 1.632772445678711, "learning_rate": 0.0001, "loss": 0.0147, "step": 100530 }, { "epoch": 661.4473684210526, "grad_norm": 1.3837417364120483, "learning_rate": 0.0001, "loss": 0.014, "step": 100540 }, { "epoch": 661.5131578947369, "grad_norm": 1.6592509746551514, "learning_rate": 0.0001, "loss": 0.0126, "step": 100550 }, { "epoch": 661.578947368421, "grad_norm": 1.5708378553390503, "learning_rate": 0.0001, "loss": 0.0133, "step": 100560 }, { "epoch": 661.6447368421053, "grad_norm": 1.41875159740448, "learning_rate": 0.0001, "loss": 0.0134, "step": 100570 }, { "epoch": 661.7105263157895, "grad_norm": 1.3496323823928833, "learning_rate": 0.0001, "loss": 0.0185, "step": 100580 }, { "epoch": 661.7763157894736, "grad_norm": 1.1874994039535522, "learning_rate": 0.0001, "loss": 0.0146, "step": 100590 }, { "epoch": 661.8421052631579, "grad_norm": 1.7131526470184326, "learning_rate": 0.0001, "loss": 0.0138, "step": 100600 }, { "epoch": 661.9078947368421, "grad_norm": 1.605792760848999, "learning_rate": 0.0001, "loss": 0.0137, "step": 100610 }, { "epoch": 661.9736842105264, "grad_norm": 1.4374533891677856, "learning_rate": 0.0001, "loss": 0.0124, "step": 100620 }, { "epoch": 662.0394736842105, "grad_norm": 1.7385550737380981, "learning_rate": 0.0001, "loss": 0.0119, "step": 100630 }, { "epoch": 662.1052631578947, "grad_norm": 1.15663480758667, "learning_rate": 0.0001, "loss": 0.0129, "step": 100640 }, { "epoch": 662.171052631579, "grad_norm": 1.4373624324798584, "learning_rate": 0.0001, "loss": 0.0124, "step": 100650 }, { "epoch": 662.2368421052631, "grad_norm": 1.638513445854187, "learning_rate": 0.0001, "loss": 0.0124, "step": 100660 }, { "epoch": 662.3026315789474, "grad_norm": 1.4481453895568848, "learning_rate": 0.0001, "loss": 0.0168, "step": 100670 }, { "epoch": 662.3684210526316, "grad_norm": 1.4384658336639404, "learning_rate": 0.0001, "loss": 0.0111, "step": 100680 }, { "epoch": 662.4342105263158, "grad_norm": 1.9326026439666748, "learning_rate": 0.0001, "loss": 0.0148, "step": 100690 }, { "epoch": 662.5, "grad_norm": 1.38618004322052, "learning_rate": 0.0001, "loss": 0.0154, "step": 100700 }, { "epoch": 662.5657894736842, "grad_norm": 1.947064757347107, "learning_rate": 0.0001, "loss": 0.0132, "step": 100710 }, { "epoch": 662.6315789473684, "grad_norm": 1.6921683549880981, "learning_rate": 0.0001, "loss": 0.0138, "step": 100720 }, { "epoch": 662.6973684210526, "grad_norm": 1.603821039199829, "learning_rate": 0.0001, "loss": 0.0119, "step": 100730 }, { "epoch": 662.7631578947369, "grad_norm": 1.4925041198730469, "learning_rate": 0.0001, "loss": 0.0115, "step": 100740 }, { "epoch": 662.828947368421, "grad_norm": 1.4188570976257324, "learning_rate": 0.0001, "loss": 0.0178, "step": 100750 }, { "epoch": 662.8947368421053, "grad_norm": 1.325272798538208, "learning_rate": 0.0001, "loss": 0.0172, "step": 100760 }, { "epoch": 662.9605263157895, "grad_norm": 1.2705585956573486, "learning_rate": 0.0001, "loss": 0.0164, "step": 100770 }, { "epoch": 663.0263157894736, "grad_norm": 1.7721436023712158, "learning_rate": 0.0001, "loss": 0.0145, "step": 100780 }, { "epoch": 663.0921052631579, "grad_norm": 1.5780460834503174, "learning_rate": 0.0001, "loss": 0.0179, "step": 100790 }, { "epoch": 663.1578947368421, "grad_norm": 2.0283362865448, "learning_rate": 0.0001, "loss": 0.0145, "step": 100800 }, { "epoch": 663.2236842105264, "grad_norm": 1.2524551153182983, "learning_rate": 0.0001, "loss": 0.0151, "step": 100810 }, { "epoch": 663.2894736842105, "grad_norm": 1.8606141805648804, "learning_rate": 0.0001, "loss": 0.0133, "step": 100820 }, { "epoch": 663.3552631578947, "grad_norm": 1.3847877979278564, "learning_rate": 0.0001, "loss": 0.0102, "step": 100830 }, { "epoch": 663.421052631579, "grad_norm": 1.2618350982666016, "learning_rate": 0.0001, "loss": 0.0137, "step": 100840 }, { "epoch": 663.4868421052631, "grad_norm": 1.9055010080337524, "learning_rate": 0.0001, "loss": 0.0167, "step": 100850 }, { "epoch": 663.5526315789474, "grad_norm": 1.389708399772644, "learning_rate": 0.0001, "loss": 0.0123, "step": 100860 }, { "epoch": 663.6184210526316, "grad_norm": 1.0234835147857666, "learning_rate": 0.0001, "loss": 0.0131, "step": 100870 }, { "epoch": 663.6842105263158, "grad_norm": 1.644355297088623, "learning_rate": 0.0001, "loss": 0.0149, "step": 100880 }, { "epoch": 663.75, "grad_norm": 1.317884922027588, "learning_rate": 0.0001, "loss": 0.0134, "step": 100890 }, { "epoch": 663.8157894736842, "grad_norm": 1.4444526433944702, "learning_rate": 0.0001, "loss": 0.0152, "step": 100900 }, { "epoch": 663.8815789473684, "grad_norm": 1.388906478881836, "learning_rate": 0.0001, "loss": 0.0104, "step": 100910 }, { "epoch": 663.9473684210526, "grad_norm": 1.7397305965423584, "learning_rate": 0.0001, "loss": 0.015, "step": 100920 }, { "epoch": 664.0131578947369, "grad_norm": 1.426111102104187, "learning_rate": 0.0001, "loss": 0.0126, "step": 100930 }, { "epoch": 664.078947368421, "grad_norm": 1.4932746887207031, "learning_rate": 0.0001, "loss": 0.012, "step": 100940 }, { "epoch": 664.1447368421053, "grad_norm": 1.1664596796035767, "learning_rate": 0.0001, "loss": 0.0171, "step": 100950 }, { "epoch": 664.2105263157895, "grad_norm": 1.32081937789917, "learning_rate": 0.0001, "loss": 0.0155, "step": 100960 }, { "epoch": 664.2763157894736, "grad_norm": 1.3790475130081177, "learning_rate": 0.0001, "loss": 0.0112, "step": 100970 }, { "epoch": 664.3421052631579, "grad_norm": 1.9312633275985718, "learning_rate": 0.0001, "loss": 0.0168, "step": 100980 }, { "epoch": 664.4078947368421, "grad_norm": 1.9421758651733398, "learning_rate": 0.0001, "loss": 0.0147, "step": 100990 }, { "epoch": 664.4736842105264, "grad_norm": 2.087830066680908, "learning_rate": 0.0001, "loss": 0.0161, "step": 101000 }, { "epoch": 664.5394736842105, "grad_norm": 2.0917787551879883, "learning_rate": 0.0001, "loss": 0.0184, "step": 101010 }, { "epoch": 664.6052631578947, "grad_norm": 1.6445151567459106, "learning_rate": 0.0001, "loss": 0.0116, "step": 101020 }, { "epoch": 664.671052631579, "grad_norm": 1.7625837326049805, "learning_rate": 0.0001, "loss": 0.0136, "step": 101030 }, { "epoch": 664.7368421052631, "grad_norm": 1.2015464305877686, "learning_rate": 0.0001, "loss": 0.013, "step": 101040 }, { "epoch": 664.8026315789474, "grad_norm": 2.0472166538238525, "learning_rate": 0.0001, "loss": 0.0142, "step": 101050 }, { "epoch": 664.8684210526316, "grad_norm": 1.8680822849273682, "learning_rate": 0.0001, "loss": 0.0114, "step": 101060 }, { "epoch": 664.9342105263158, "grad_norm": 1.4993771314620972, "learning_rate": 0.0001, "loss": 0.0136, "step": 101070 }, { "epoch": 665.0, "grad_norm": 1.122968316078186, "learning_rate": 0.0001, "loss": 0.0135, "step": 101080 }, { "epoch": 665.0657894736842, "grad_norm": 1.3625813722610474, "learning_rate": 0.0001, "loss": 0.0129, "step": 101090 }, { "epoch": 665.1315789473684, "grad_norm": 1.6600464582443237, "learning_rate": 0.0001, "loss": 0.0137, "step": 101100 }, { "epoch": 665.1973684210526, "grad_norm": 1.3496841192245483, "learning_rate": 0.0001, "loss": 0.013, "step": 101110 }, { "epoch": 665.2631578947369, "grad_norm": 1.395987868309021, "learning_rate": 0.0001, "loss": 0.0108, "step": 101120 }, { "epoch": 665.328947368421, "grad_norm": 1.4155951738357544, "learning_rate": 0.0001, "loss": 0.016, "step": 101130 }, { "epoch": 665.3947368421053, "grad_norm": 1.4962137937545776, "learning_rate": 0.0001, "loss": 0.0111, "step": 101140 }, { "epoch": 665.4605263157895, "grad_norm": 1.4199906587600708, "learning_rate": 0.0001, "loss": 0.0124, "step": 101150 }, { "epoch": 665.5263157894736, "grad_norm": 1.6613688468933105, "learning_rate": 0.0001, "loss": 0.0128, "step": 101160 }, { "epoch": 665.5921052631579, "grad_norm": 1.178261637687683, "learning_rate": 0.0001, "loss": 0.0131, "step": 101170 }, { "epoch": 665.6578947368421, "grad_norm": 1.5457396507263184, "learning_rate": 0.0001, "loss": 0.0204, "step": 101180 }, { "epoch": 665.7236842105264, "grad_norm": 1.1140555143356323, "learning_rate": 0.0001, "loss": 0.0164, "step": 101190 }, { "epoch": 665.7894736842105, "grad_norm": 1.6804251670837402, "learning_rate": 0.0001, "loss": 0.0181, "step": 101200 }, { "epoch": 665.8552631578947, "grad_norm": 1.8458647727966309, "learning_rate": 0.0001, "loss": 0.0144, "step": 101210 }, { "epoch": 665.921052631579, "grad_norm": 1.7133121490478516, "learning_rate": 0.0001, "loss": 0.0122, "step": 101220 }, { "epoch": 665.9868421052631, "grad_norm": 2.0223889350891113, "learning_rate": 0.0001, "loss": 0.0128, "step": 101230 }, { "epoch": 666.0526315789474, "grad_norm": 0.8817074298858643, "learning_rate": 0.0001, "loss": 0.0147, "step": 101240 }, { "epoch": 666.1184210526316, "grad_norm": 1.636371374130249, "learning_rate": 0.0001, "loss": 0.0144, "step": 101250 }, { "epoch": 666.1842105263158, "grad_norm": 1.7447898387908936, "learning_rate": 0.0001, "loss": 0.0167, "step": 101260 }, { "epoch": 666.25, "grad_norm": 1.9358270168304443, "learning_rate": 0.0001, "loss": 0.0142, "step": 101270 }, { "epoch": 666.3157894736842, "grad_norm": 1.2495453357696533, "learning_rate": 0.0001, "loss": 0.0129, "step": 101280 }, { "epoch": 666.3815789473684, "grad_norm": 1.0606147050857544, "learning_rate": 0.0001, "loss": 0.0136, "step": 101290 }, { "epoch": 666.4473684210526, "grad_norm": 1.2421720027923584, "learning_rate": 0.0001, "loss": 0.0137, "step": 101300 }, { "epoch": 666.5131578947369, "grad_norm": 1.300516128540039, "learning_rate": 0.0001, "loss": 0.0128, "step": 101310 }, { "epoch": 666.578947368421, "grad_norm": 1.8597713708877563, "learning_rate": 0.0001, "loss": 0.0159, "step": 101320 }, { "epoch": 666.6447368421053, "grad_norm": 1.7609648704528809, "learning_rate": 0.0001, "loss": 0.0128, "step": 101330 }, { "epoch": 666.7105263157895, "grad_norm": 1.6971265077590942, "learning_rate": 0.0001, "loss": 0.01, "step": 101340 }, { "epoch": 666.7763157894736, "grad_norm": 1.6641792058944702, "learning_rate": 0.0001, "loss": 0.015, "step": 101350 }, { "epoch": 666.8421052631579, "grad_norm": 1.516397476196289, "learning_rate": 0.0001, "loss": 0.0144, "step": 101360 }, { "epoch": 666.9078947368421, "grad_norm": 1.3999930620193481, "learning_rate": 0.0001, "loss": 0.012, "step": 101370 }, { "epoch": 666.9736842105264, "grad_norm": 1.7803245782852173, "learning_rate": 0.0001, "loss": 0.0165, "step": 101380 }, { "epoch": 667.0394736842105, "grad_norm": 1.2789785861968994, "learning_rate": 0.0001, "loss": 0.0133, "step": 101390 }, { "epoch": 667.1052631578947, "grad_norm": 1.1071512699127197, "learning_rate": 0.0001, "loss": 0.0109, "step": 101400 }, { "epoch": 667.171052631579, "grad_norm": 1.7411240339279175, "learning_rate": 0.0001, "loss": 0.0131, "step": 101410 }, { "epoch": 667.2368421052631, "grad_norm": 1.1308962106704712, "learning_rate": 0.0001, "loss": 0.0189, "step": 101420 }, { "epoch": 667.3026315789474, "grad_norm": 1.6705423593521118, "learning_rate": 0.0001, "loss": 0.0137, "step": 101430 }, { "epoch": 667.3684210526316, "grad_norm": 1.374957799911499, "learning_rate": 0.0001, "loss": 0.0147, "step": 101440 }, { "epoch": 667.4342105263158, "grad_norm": 1.7083691358566284, "learning_rate": 0.0001, "loss": 0.017, "step": 101450 }, { "epoch": 667.5, "grad_norm": 1.8185067176818848, "learning_rate": 0.0001, "loss": 0.0127, "step": 101460 }, { "epoch": 667.5657894736842, "grad_norm": 1.9962791204452515, "learning_rate": 0.0001, "loss": 0.0165, "step": 101470 }, { "epoch": 667.6315789473684, "grad_norm": 1.9203301668167114, "learning_rate": 0.0001, "loss": 0.0117, "step": 101480 }, { "epoch": 667.6973684210526, "grad_norm": 1.4889893531799316, "learning_rate": 0.0001, "loss": 0.0132, "step": 101490 }, { "epoch": 667.7631578947369, "grad_norm": 1.4623842239379883, "learning_rate": 0.0001, "loss": 0.0188, "step": 101500 }, { "epoch": 667.828947368421, "grad_norm": 1.2434139251708984, "learning_rate": 0.0001, "loss": 0.0106, "step": 101510 }, { "epoch": 667.8947368421053, "grad_norm": 1.406906008720398, "learning_rate": 0.0001, "loss": 0.0171, "step": 101520 }, { "epoch": 667.9605263157895, "grad_norm": 1.6383956670761108, "learning_rate": 0.0001, "loss": 0.01, "step": 101530 }, { "epoch": 668.0263157894736, "grad_norm": 1.6094093322753906, "learning_rate": 0.0001, "loss": 0.0125, "step": 101540 }, { "epoch": 668.0921052631579, "grad_norm": 1.8967750072479248, "learning_rate": 0.0001, "loss": 0.0127, "step": 101550 }, { "epoch": 668.1578947368421, "grad_norm": 1.136540174484253, "learning_rate": 0.0001, "loss": 0.0141, "step": 101560 }, { "epoch": 668.2236842105264, "grad_norm": 1.251471996307373, "learning_rate": 0.0001, "loss": 0.0136, "step": 101570 }, { "epoch": 668.2894736842105, "grad_norm": 1.8368035554885864, "learning_rate": 0.0001, "loss": 0.014, "step": 101580 }, { "epoch": 668.3552631578947, "grad_norm": 1.8011173009872437, "learning_rate": 0.0001, "loss": 0.0157, "step": 101590 }, { "epoch": 668.421052631579, "grad_norm": 1.522466778755188, "learning_rate": 0.0001, "loss": 0.0144, "step": 101600 }, { "epoch": 668.4868421052631, "grad_norm": 1.4038563966751099, "learning_rate": 0.0001, "loss": 0.0125, "step": 101610 }, { "epoch": 668.5526315789474, "grad_norm": 1.5245778560638428, "learning_rate": 0.0001, "loss": 0.0104, "step": 101620 }, { "epoch": 668.6184210526316, "grad_norm": 1.4883170127868652, "learning_rate": 0.0001, "loss": 0.0105, "step": 101630 }, { "epoch": 668.6842105263158, "grad_norm": 1.5736546516418457, "learning_rate": 0.0001, "loss": 0.0181, "step": 101640 }, { "epoch": 668.75, "grad_norm": 1.6000726222991943, "learning_rate": 0.0001, "loss": 0.0179, "step": 101650 }, { "epoch": 668.8157894736842, "grad_norm": 1.7990397214889526, "learning_rate": 0.0001, "loss": 0.012, "step": 101660 }, { "epoch": 668.8815789473684, "grad_norm": 1.7134641408920288, "learning_rate": 0.0001, "loss": 0.0125, "step": 101670 }, { "epoch": 668.9473684210526, "grad_norm": 1.7568379640579224, "learning_rate": 0.0001, "loss": 0.0147, "step": 101680 }, { "epoch": 669.0131578947369, "grad_norm": 1.5276861190795898, "learning_rate": 0.0001, "loss": 0.0162, "step": 101690 }, { "epoch": 669.078947368421, "grad_norm": 1.7764405012130737, "learning_rate": 0.0001, "loss": 0.0133, "step": 101700 }, { "epoch": 669.1447368421053, "grad_norm": 1.430853247642517, "learning_rate": 0.0001, "loss": 0.0165, "step": 101710 }, { "epoch": 669.2105263157895, "grad_norm": 1.2006642818450928, "learning_rate": 0.0001, "loss": 0.0167, "step": 101720 }, { "epoch": 669.2763157894736, "grad_norm": 1.327859878540039, "learning_rate": 0.0001, "loss": 0.0124, "step": 101730 }, { "epoch": 669.3421052631579, "grad_norm": 1.3441792726516724, "learning_rate": 0.0001, "loss": 0.0168, "step": 101740 }, { "epoch": 669.4078947368421, "grad_norm": 1.420095443725586, "learning_rate": 0.0001, "loss": 0.0131, "step": 101750 }, { "epoch": 669.4736842105264, "grad_norm": 1.10997474193573, "learning_rate": 0.0001, "loss": 0.0133, "step": 101760 }, { "epoch": 669.5394736842105, "grad_norm": 1.255806565284729, "learning_rate": 0.0001, "loss": 0.014, "step": 101770 }, { "epoch": 669.6052631578947, "grad_norm": 1.3806350231170654, "learning_rate": 0.0001, "loss": 0.0133, "step": 101780 }, { "epoch": 669.671052631579, "grad_norm": 1.3888806104660034, "learning_rate": 0.0001, "loss": 0.0119, "step": 101790 }, { "epoch": 669.7368421052631, "grad_norm": 1.2973408699035645, "learning_rate": 0.0001, "loss": 0.0121, "step": 101800 }, { "epoch": 669.8026315789474, "grad_norm": 1.523041009902954, "learning_rate": 0.0001, "loss": 0.0161, "step": 101810 }, { "epoch": 669.8684210526316, "grad_norm": 1.081453561782837, "learning_rate": 0.0001, "loss": 0.0199, "step": 101820 }, { "epoch": 669.9342105263158, "grad_norm": 1.517839789390564, "learning_rate": 0.0001, "loss": 0.0127, "step": 101830 }, { "epoch": 670.0, "grad_norm": 1.2469325065612793, "learning_rate": 0.0001, "loss": 0.0133, "step": 101840 }, { "epoch": 670.0657894736842, "grad_norm": 1.1616063117980957, "learning_rate": 0.0001, "loss": 0.0113, "step": 101850 }, { "epoch": 670.1315789473684, "grad_norm": 1.3628507852554321, "learning_rate": 0.0001, "loss": 0.0161, "step": 101860 }, { "epoch": 670.1973684210526, "grad_norm": 1.7089757919311523, "learning_rate": 0.0001, "loss": 0.0159, "step": 101870 }, { "epoch": 670.2631578947369, "grad_norm": 1.5724050998687744, "learning_rate": 0.0001, "loss": 0.0145, "step": 101880 }, { "epoch": 670.328947368421, "grad_norm": 1.3863742351531982, "learning_rate": 0.0001, "loss": 0.0138, "step": 101890 }, { "epoch": 670.3947368421053, "grad_norm": 1.2906241416931152, "learning_rate": 0.0001, "loss": 0.0148, "step": 101900 }, { "epoch": 670.4605263157895, "grad_norm": 1.2194890975952148, "learning_rate": 0.0001, "loss": 0.0183, "step": 101910 }, { "epoch": 670.5263157894736, "grad_norm": 1.7609150409698486, "learning_rate": 0.0001, "loss": 0.0147, "step": 101920 }, { "epoch": 670.5921052631579, "grad_norm": 1.5138870477676392, "learning_rate": 0.0001, "loss": 0.0116, "step": 101930 }, { "epoch": 670.6578947368421, "grad_norm": 1.5742788314819336, "learning_rate": 0.0001, "loss": 0.0128, "step": 101940 }, { "epoch": 670.7236842105264, "grad_norm": 1.4049538373947144, "learning_rate": 0.0001, "loss": 0.0151, "step": 101950 }, { "epoch": 670.7894736842105, "grad_norm": 1.4650083780288696, "learning_rate": 0.0001, "loss": 0.0162, "step": 101960 }, { "epoch": 670.8552631578947, "grad_norm": 1.6833230257034302, "learning_rate": 0.0001, "loss": 0.013, "step": 101970 }, { "epoch": 670.921052631579, "grad_norm": 1.157828450202942, "learning_rate": 0.0001, "loss": 0.0149, "step": 101980 }, { "epoch": 670.9868421052631, "grad_norm": 1.4232643842697144, "learning_rate": 0.0001, "loss": 0.0142, "step": 101990 }, { "epoch": 671.0526315789474, "grad_norm": 1.1831313371658325, "learning_rate": 0.0001, "loss": 0.0185, "step": 102000 }, { "epoch": 671.1184210526316, "grad_norm": 1.485419750213623, "learning_rate": 0.0001, "loss": 0.0149, "step": 102010 }, { "epoch": 671.1842105263158, "grad_norm": 1.9691188335418701, "learning_rate": 0.0001, "loss": 0.0111, "step": 102020 }, { "epoch": 671.25, "grad_norm": 1.3715763092041016, "learning_rate": 0.0001, "loss": 0.0119, "step": 102030 }, { "epoch": 671.3157894736842, "grad_norm": 1.9037741422653198, "learning_rate": 0.0001, "loss": 0.0141, "step": 102040 }, { "epoch": 671.3815789473684, "grad_norm": 1.1953930854797363, "learning_rate": 0.0001, "loss": 0.016, "step": 102050 }, { "epoch": 671.4473684210526, "grad_norm": 0.9786741137504578, "learning_rate": 0.0001, "loss": 0.0143, "step": 102060 }, { "epoch": 671.5131578947369, "grad_norm": 1.608737826347351, "learning_rate": 0.0001, "loss": 0.0152, "step": 102070 }, { "epoch": 671.578947368421, "grad_norm": 1.416425347328186, "learning_rate": 0.0001, "loss": 0.0141, "step": 102080 }, { "epoch": 671.6447368421053, "grad_norm": 1.2107266187667847, "learning_rate": 0.0001, "loss": 0.0132, "step": 102090 }, { "epoch": 671.7105263157895, "grad_norm": 1.5697492361068726, "learning_rate": 0.0001, "loss": 0.0151, "step": 102100 }, { "epoch": 671.7763157894736, "grad_norm": 1.2959961891174316, "learning_rate": 0.0001, "loss": 0.0155, "step": 102110 }, { "epoch": 671.8421052631579, "grad_norm": 1.6762232780456543, "learning_rate": 0.0001, "loss": 0.0164, "step": 102120 }, { "epoch": 671.9078947368421, "grad_norm": 1.516201138496399, "learning_rate": 0.0001, "loss": 0.0119, "step": 102130 }, { "epoch": 671.9736842105264, "grad_norm": 1.3739564418792725, "learning_rate": 0.0001, "loss": 0.0133, "step": 102140 }, { "epoch": 672.0394736842105, "grad_norm": 1.4915791749954224, "learning_rate": 0.0001, "loss": 0.0152, "step": 102150 }, { "epoch": 672.1052631578947, "grad_norm": 1.4737424850463867, "learning_rate": 0.0001, "loss": 0.0167, "step": 102160 }, { "epoch": 672.171052631579, "grad_norm": 1.5593748092651367, "learning_rate": 0.0001, "loss": 0.0215, "step": 102170 }, { "epoch": 672.2368421052631, "grad_norm": 1.3305062055587769, "learning_rate": 0.0001, "loss": 0.0147, "step": 102180 }, { "epoch": 672.3026315789474, "grad_norm": 1.1155153512954712, "learning_rate": 0.0001, "loss": 0.0125, "step": 102190 }, { "epoch": 672.3684210526316, "grad_norm": 1.7380589246749878, "learning_rate": 0.0001, "loss": 0.0139, "step": 102200 }, { "epoch": 672.4342105263158, "grad_norm": 1.4604358673095703, "learning_rate": 0.0001, "loss": 0.0181, "step": 102210 }, { "epoch": 672.5, "grad_norm": 1.4476739168167114, "learning_rate": 0.0001, "loss": 0.0112, "step": 102220 }, { "epoch": 672.5657894736842, "grad_norm": 1.784023642539978, "learning_rate": 0.0001, "loss": 0.0141, "step": 102230 }, { "epoch": 672.6315789473684, "grad_norm": 2.0293943881988525, "learning_rate": 0.0001, "loss": 0.0141, "step": 102240 }, { "epoch": 672.6973684210526, "grad_norm": 1.753048062324524, "learning_rate": 0.0001, "loss": 0.0107, "step": 102250 }, { "epoch": 672.7631578947369, "grad_norm": 1.2303646802902222, "learning_rate": 0.0001, "loss": 0.0128, "step": 102260 }, { "epoch": 672.828947368421, "grad_norm": 1.7648169994354248, "learning_rate": 0.0001, "loss": 0.0132, "step": 102270 }, { "epoch": 672.8947368421053, "grad_norm": 1.9938946962356567, "learning_rate": 0.0001, "loss": 0.0118, "step": 102280 }, { "epoch": 672.9605263157895, "grad_norm": 1.7102824449539185, "learning_rate": 0.0001, "loss": 0.0117, "step": 102290 }, { "epoch": 673.0263157894736, "grad_norm": 1.3364213705062866, "learning_rate": 0.0001, "loss": 0.0141, "step": 102300 }, { "epoch": 673.0921052631579, "grad_norm": 1.5935405492782593, "learning_rate": 0.0001, "loss": 0.0148, "step": 102310 }, { "epoch": 673.1578947368421, "grad_norm": 1.3893660306930542, "learning_rate": 0.0001, "loss": 0.0165, "step": 102320 }, { "epoch": 673.2236842105264, "grad_norm": 1.6668514013290405, "learning_rate": 0.0001, "loss": 0.0203, "step": 102330 }, { "epoch": 673.2894736842105, "grad_norm": 2.0832273960113525, "learning_rate": 0.0001, "loss": 0.0136, "step": 102340 }, { "epoch": 673.3552631578947, "grad_norm": 1.7975513935089111, "learning_rate": 0.0001, "loss": 0.0138, "step": 102350 }, { "epoch": 673.421052631579, "grad_norm": 1.5541362762451172, "learning_rate": 0.0001, "loss": 0.0147, "step": 102360 }, { "epoch": 673.4868421052631, "grad_norm": 2.100004196166992, "learning_rate": 0.0001, "loss": 0.0125, "step": 102370 }, { "epoch": 673.5526315789474, "grad_norm": 1.519339919090271, "learning_rate": 0.0001, "loss": 0.0175, "step": 102380 }, { "epoch": 673.6184210526316, "grad_norm": 1.5094468593597412, "learning_rate": 0.0001, "loss": 0.0151, "step": 102390 }, { "epoch": 673.6842105263158, "grad_norm": 1.4263924360275269, "learning_rate": 0.0001, "loss": 0.0154, "step": 102400 }, { "epoch": 673.75, "grad_norm": 1.1994482278823853, "learning_rate": 0.0001, "loss": 0.014, "step": 102410 }, { "epoch": 673.8157894736842, "grad_norm": 1.4312723875045776, "learning_rate": 0.0001, "loss": 0.0144, "step": 102420 }, { "epoch": 673.8815789473684, "grad_norm": 1.5045180320739746, "learning_rate": 0.0001, "loss": 0.0144, "step": 102430 }, { "epoch": 673.9473684210526, "grad_norm": 1.2746684551239014, "learning_rate": 0.0001, "loss": 0.0146, "step": 102440 }, { "epoch": 674.0131578947369, "grad_norm": 1.106709599494934, "learning_rate": 0.0001, "loss": 0.0142, "step": 102450 }, { "epoch": 674.078947368421, "grad_norm": 1.3863939046859741, "learning_rate": 0.0001, "loss": 0.0196, "step": 102460 }, { "epoch": 674.1447368421053, "grad_norm": 1.27506685256958, "learning_rate": 0.0001, "loss": 0.0201, "step": 102470 }, { "epoch": 674.2105263157895, "grad_norm": 1.2903037071228027, "learning_rate": 0.0001, "loss": 0.0144, "step": 102480 }, { "epoch": 674.2763157894736, "grad_norm": 1.392313003540039, "learning_rate": 0.0001, "loss": 0.017, "step": 102490 }, { "epoch": 674.3421052631579, "grad_norm": 1.6653270721435547, "learning_rate": 0.0001, "loss": 0.0149, "step": 102500 }, { "epoch": 674.4078947368421, "grad_norm": 1.6871258020401, "learning_rate": 0.0001, "loss": 0.0125, "step": 102510 }, { "epoch": 674.4736842105264, "grad_norm": 1.5323474407196045, "learning_rate": 0.0001, "loss": 0.0172, "step": 102520 }, { "epoch": 674.5394736842105, "grad_norm": 1.589938759803772, "learning_rate": 0.0001, "loss": 0.0142, "step": 102530 }, { "epoch": 674.6052631578947, "grad_norm": 0.9225675463676453, "learning_rate": 0.0001, "loss": 0.0131, "step": 102540 }, { "epoch": 674.671052631579, "grad_norm": 1.400992751121521, "learning_rate": 0.0001, "loss": 0.0133, "step": 102550 }, { "epoch": 674.7368421052631, "grad_norm": 1.765074372291565, "learning_rate": 0.0001, "loss": 0.0156, "step": 102560 }, { "epoch": 674.8026315789474, "grad_norm": 1.4190680980682373, "learning_rate": 0.0001, "loss": 0.0151, "step": 102570 }, { "epoch": 674.8684210526316, "grad_norm": 1.490584373474121, "learning_rate": 0.0001, "loss": 0.0128, "step": 102580 }, { "epoch": 674.9342105263158, "grad_norm": 1.5924052000045776, "learning_rate": 0.0001, "loss": 0.0169, "step": 102590 }, { "epoch": 675.0, "grad_norm": 1.2814838886260986, "learning_rate": 0.0001, "loss": 0.0137, "step": 102600 }, { "epoch": 675.0657894736842, "grad_norm": 1.3576432466506958, "learning_rate": 0.0001, "loss": 0.0146, "step": 102610 }, { "epoch": 675.1315789473684, "grad_norm": 1.0512124300003052, "learning_rate": 0.0001, "loss": 0.0124, "step": 102620 }, { "epoch": 675.1973684210526, "grad_norm": 1.735721468925476, "learning_rate": 0.0001, "loss": 0.0131, "step": 102630 }, { "epoch": 675.2631578947369, "grad_norm": 1.9722191095352173, "learning_rate": 0.0001, "loss": 0.0166, "step": 102640 }, { "epoch": 675.328947368421, "grad_norm": 1.4864280223846436, "learning_rate": 0.0001, "loss": 0.0131, "step": 102650 }, { "epoch": 675.3947368421053, "grad_norm": 1.6748085021972656, "learning_rate": 0.0001, "loss": 0.0171, "step": 102660 }, { "epoch": 675.4605263157895, "grad_norm": 1.3934227228164673, "learning_rate": 0.0001, "loss": 0.013, "step": 102670 }, { "epoch": 675.5263157894736, "grad_norm": 1.3752304315567017, "learning_rate": 0.0001, "loss": 0.0129, "step": 102680 }, { "epoch": 675.5921052631579, "grad_norm": 1.458877682685852, "learning_rate": 0.0001, "loss": 0.0146, "step": 102690 }, { "epoch": 675.6578947368421, "grad_norm": 1.273902416229248, "learning_rate": 0.0001, "loss": 0.0128, "step": 102700 }, { "epoch": 675.7236842105264, "grad_norm": 1.2862552404403687, "learning_rate": 0.0001, "loss": 0.017, "step": 102710 }, { "epoch": 675.7894736842105, "grad_norm": 1.4275425672531128, "learning_rate": 0.0001, "loss": 0.011, "step": 102720 }, { "epoch": 675.8552631578947, "grad_norm": 1.5958189964294434, "learning_rate": 0.0001, "loss": 0.0143, "step": 102730 }, { "epoch": 675.921052631579, "grad_norm": 1.7245609760284424, "learning_rate": 0.0001, "loss": 0.0162, "step": 102740 }, { "epoch": 675.9868421052631, "grad_norm": 1.6020160913467407, "learning_rate": 0.0001, "loss": 0.0145, "step": 102750 }, { "epoch": 676.0526315789474, "grad_norm": 2.177318572998047, "learning_rate": 0.0001, "loss": 0.0128, "step": 102760 }, { "epoch": 676.1184210526316, "grad_norm": 1.904883861541748, "learning_rate": 0.0001, "loss": 0.0139, "step": 102770 }, { "epoch": 676.1842105263158, "grad_norm": 1.8490303754806519, "learning_rate": 0.0001, "loss": 0.0121, "step": 102780 }, { "epoch": 676.25, "grad_norm": 1.6567201614379883, "learning_rate": 0.0001, "loss": 0.0143, "step": 102790 }, { "epoch": 676.3157894736842, "grad_norm": 1.1878249645233154, "learning_rate": 0.0001, "loss": 0.0153, "step": 102800 }, { "epoch": 676.3815789473684, "grad_norm": 1.0275098085403442, "learning_rate": 0.0001, "loss": 0.0124, "step": 102810 }, { "epoch": 676.4473684210526, "grad_norm": 1.6157249212265015, "learning_rate": 0.0001, "loss": 0.0144, "step": 102820 }, { "epoch": 676.5131578947369, "grad_norm": 1.709367275238037, "learning_rate": 0.0001, "loss": 0.0115, "step": 102830 }, { "epoch": 676.578947368421, "grad_norm": 1.5228643417358398, "learning_rate": 0.0001, "loss": 0.0189, "step": 102840 }, { "epoch": 676.6447368421053, "grad_norm": 2.062875270843506, "learning_rate": 0.0001, "loss": 0.017, "step": 102850 }, { "epoch": 676.7105263157895, "grad_norm": 1.6375800371170044, "learning_rate": 0.0001, "loss": 0.0163, "step": 102860 }, { "epoch": 676.7763157894736, "grad_norm": 1.6098980903625488, "learning_rate": 0.0001, "loss": 0.0136, "step": 102870 }, { "epoch": 676.8421052631579, "grad_norm": 1.758812427520752, "learning_rate": 0.0001, "loss": 0.0121, "step": 102880 }, { "epoch": 676.9078947368421, "grad_norm": 1.037561058998108, "learning_rate": 0.0001, "loss": 0.0125, "step": 102890 }, { "epoch": 676.9736842105264, "grad_norm": 1.6765788793563843, "learning_rate": 0.0001, "loss": 0.0119, "step": 102900 }, { "epoch": 677.0394736842105, "grad_norm": 1.4348900318145752, "learning_rate": 0.0001, "loss": 0.0134, "step": 102910 }, { "epoch": 677.1052631578947, "grad_norm": 1.573360562324524, "learning_rate": 0.0001, "loss": 0.0119, "step": 102920 }, { "epoch": 677.171052631579, "grad_norm": 1.0919407606124878, "learning_rate": 0.0001, "loss": 0.0125, "step": 102930 }, { "epoch": 677.2368421052631, "grad_norm": 1.4311983585357666, "learning_rate": 0.0001, "loss": 0.0141, "step": 102940 }, { "epoch": 677.3026315789474, "grad_norm": 1.6599516868591309, "learning_rate": 0.0001, "loss": 0.0104, "step": 102950 }, { "epoch": 677.3684210526316, "grad_norm": 1.3175612688064575, "learning_rate": 0.0001, "loss": 0.0146, "step": 102960 }, { "epoch": 677.4342105263158, "grad_norm": 1.7873568534851074, "learning_rate": 0.0001, "loss": 0.0149, "step": 102970 }, { "epoch": 677.5, "grad_norm": 1.3898648023605347, "learning_rate": 0.0001, "loss": 0.0143, "step": 102980 }, { "epoch": 677.5657894736842, "grad_norm": 1.5671584606170654, "learning_rate": 0.0001, "loss": 0.0131, "step": 102990 }, { "epoch": 677.6315789473684, "grad_norm": 1.579842448234558, "learning_rate": 0.0001, "loss": 0.0144, "step": 103000 }, { "epoch": 677.6973684210526, "grad_norm": 1.6178920269012451, "learning_rate": 0.0001, "loss": 0.0148, "step": 103010 }, { "epoch": 677.7631578947369, "grad_norm": 1.9630273580551147, "learning_rate": 0.0001, "loss": 0.0244, "step": 103020 }, { "epoch": 677.828947368421, "grad_norm": 1.530535340309143, "learning_rate": 0.0001, "loss": 0.0119, "step": 103030 }, { "epoch": 677.8947368421053, "grad_norm": 1.4067878723144531, "learning_rate": 0.0001, "loss": 0.01, "step": 103040 }, { "epoch": 677.9605263157895, "grad_norm": 1.4359945058822632, "learning_rate": 0.0001, "loss": 0.0112, "step": 103050 }, { "epoch": 678.0263157894736, "grad_norm": 1.4491734504699707, "learning_rate": 0.0001, "loss": 0.0124, "step": 103060 }, { "epoch": 678.0921052631579, "grad_norm": 1.4105638265609741, "learning_rate": 0.0001, "loss": 0.0116, "step": 103070 }, { "epoch": 678.1578947368421, "grad_norm": 1.5818101167678833, "learning_rate": 0.0001, "loss": 0.017, "step": 103080 }, { "epoch": 678.2236842105264, "grad_norm": 1.5700527429580688, "learning_rate": 0.0001, "loss": 0.0158, "step": 103090 }, { "epoch": 678.2894736842105, "grad_norm": 1.3229868412017822, "learning_rate": 0.0001, "loss": 0.0127, "step": 103100 }, { "epoch": 678.3552631578947, "grad_norm": 1.4288700819015503, "learning_rate": 0.0001, "loss": 0.0121, "step": 103110 }, { "epoch": 678.421052631579, "grad_norm": 1.5448458194732666, "learning_rate": 0.0001, "loss": 0.0157, "step": 103120 }, { "epoch": 678.4868421052631, "grad_norm": 1.436314344406128, "learning_rate": 0.0001, "loss": 0.0157, "step": 103130 }, { "epoch": 678.5526315789474, "grad_norm": 1.3176614046096802, "learning_rate": 0.0001, "loss": 0.0131, "step": 103140 }, { "epoch": 678.6184210526316, "grad_norm": 1.8233132362365723, "learning_rate": 0.0001, "loss": 0.0127, "step": 103150 }, { "epoch": 678.6842105263158, "grad_norm": 1.858685851097107, "learning_rate": 0.0001, "loss": 0.0133, "step": 103160 }, { "epoch": 678.75, "grad_norm": 1.6793055534362793, "learning_rate": 0.0001, "loss": 0.0138, "step": 103170 }, { "epoch": 678.8157894736842, "grad_norm": 1.5902068614959717, "learning_rate": 0.0001, "loss": 0.0117, "step": 103180 }, { "epoch": 678.8815789473684, "grad_norm": 1.8248714208602905, "learning_rate": 0.0001, "loss": 0.0149, "step": 103190 }, { "epoch": 678.9473684210526, "grad_norm": 1.9897359609603882, "learning_rate": 0.0001, "loss": 0.0123, "step": 103200 }, { "epoch": 679.0131578947369, "grad_norm": 1.2476563453674316, "learning_rate": 0.0001, "loss": 0.0126, "step": 103210 }, { "epoch": 679.078947368421, "grad_norm": 1.4264522790908813, "learning_rate": 0.0001, "loss": 0.0123, "step": 103220 }, { "epoch": 679.1447368421053, "grad_norm": 1.6358705759048462, "learning_rate": 0.0001, "loss": 0.0146, "step": 103230 }, { "epoch": 679.2105263157895, "grad_norm": 1.3272947072982788, "learning_rate": 0.0001, "loss": 0.0109, "step": 103240 }, { "epoch": 679.2763157894736, "grad_norm": 1.8950024843215942, "learning_rate": 0.0001, "loss": 0.0107, "step": 103250 }, { "epoch": 679.3421052631579, "grad_norm": 1.0197044610977173, "learning_rate": 0.0001, "loss": 0.0153, "step": 103260 }, { "epoch": 679.4078947368421, "grad_norm": 1.4160363674163818, "learning_rate": 0.0001, "loss": 0.0107, "step": 103270 }, { "epoch": 679.4736842105264, "grad_norm": 1.6936793327331543, "learning_rate": 0.0001, "loss": 0.0153, "step": 103280 }, { "epoch": 679.5394736842105, "grad_norm": 1.666623830795288, "learning_rate": 0.0001, "loss": 0.0161, "step": 103290 }, { "epoch": 679.6052631578947, "grad_norm": 1.455980896949768, "learning_rate": 0.0001, "loss": 0.0116, "step": 103300 }, { "epoch": 679.671052631579, "grad_norm": 1.52082359790802, "learning_rate": 0.0001, "loss": 0.0126, "step": 103310 }, { "epoch": 679.7368421052631, "grad_norm": 1.6730157136917114, "learning_rate": 0.0001, "loss": 0.0154, "step": 103320 }, { "epoch": 679.8026315789474, "grad_norm": 1.2211512327194214, "learning_rate": 0.0001, "loss": 0.0127, "step": 103330 }, { "epoch": 679.8684210526316, "grad_norm": 1.2982927560806274, "learning_rate": 0.0001, "loss": 0.0142, "step": 103340 }, { "epoch": 679.9342105263158, "grad_norm": 1.2145583629608154, "learning_rate": 0.0001, "loss": 0.0152, "step": 103350 }, { "epoch": 680.0, "grad_norm": 1.183544397354126, "learning_rate": 0.0001, "loss": 0.0165, "step": 103360 }, { "epoch": 680.0657894736842, "grad_norm": 1.656630039215088, "learning_rate": 0.0001, "loss": 0.0157, "step": 103370 }, { "epoch": 680.1315789473684, "grad_norm": 1.782112956047058, "learning_rate": 0.0001, "loss": 0.0145, "step": 103380 }, { "epoch": 680.1973684210526, "grad_norm": 1.5353258848190308, "learning_rate": 0.0001, "loss": 0.0128, "step": 103390 }, { "epoch": 680.2631578947369, "grad_norm": 1.5369675159454346, "learning_rate": 0.0001, "loss": 0.0131, "step": 103400 }, { "epoch": 680.328947368421, "grad_norm": 1.8047181367874146, "learning_rate": 0.0001, "loss": 0.0137, "step": 103410 }, { "epoch": 680.3947368421053, "grad_norm": 1.335321307182312, "learning_rate": 0.0001, "loss": 0.016, "step": 103420 }, { "epoch": 680.4605263157895, "grad_norm": 1.374880313873291, "learning_rate": 0.0001, "loss": 0.0148, "step": 103430 }, { "epoch": 680.5263157894736, "grad_norm": 0.9316824674606323, "learning_rate": 0.0001, "loss": 0.0108, "step": 103440 }, { "epoch": 680.5921052631579, "grad_norm": 1.6233991384506226, "learning_rate": 0.0001, "loss": 0.013, "step": 103450 }, { "epoch": 680.6578947368421, "grad_norm": 1.7668449878692627, "learning_rate": 0.0001, "loss": 0.0107, "step": 103460 }, { "epoch": 680.7236842105264, "grad_norm": 1.3172235488891602, "learning_rate": 0.0001, "loss": 0.0138, "step": 103470 }, { "epoch": 680.7894736842105, "grad_norm": 1.4263323545455933, "learning_rate": 0.0001, "loss": 0.0166, "step": 103480 }, { "epoch": 680.8552631578947, "grad_norm": 1.4602173566818237, "learning_rate": 0.0001, "loss": 0.0161, "step": 103490 }, { "epoch": 680.921052631579, "grad_norm": 1.561995029449463, "learning_rate": 0.0001, "loss": 0.0133, "step": 103500 }, { "epoch": 680.9868421052631, "grad_norm": 1.3833229541778564, "learning_rate": 0.0001, "loss": 0.0147, "step": 103510 }, { "epoch": 681.0526315789474, "grad_norm": 1.3059651851654053, "learning_rate": 0.0001, "loss": 0.012, "step": 103520 }, { "epoch": 681.1184210526316, "grad_norm": 1.6736021041870117, "learning_rate": 0.0001, "loss": 0.0125, "step": 103530 }, { "epoch": 681.1842105263158, "grad_norm": 1.2682831287384033, "learning_rate": 0.0001, "loss": 0.0118, "step": 103540 }, { "epoch": 681.25, "grad_norm": 1.514453649520874, "learning_rate": 0.0001, "loss": 0.0153, "step": 103550 }, { "epoch": 681.3157894736842, "grad_norm": 1.3349605798721313, "learning_rate": 0.0001, "loss": 0.0124, "step": 103560 }, { "epoch": 681.3815789473684, "grad_norm": 1.571294903755188, "learning_rate": 0.0001, "loss": 0.0127, "step": 103570 }, { "epoch": 681.4473684210526, "grad_norm": 1.5778584480285645, "learning_rate": 0.0001, "loss": 0.0147, "step": 103580 }, { "epoch": 681.5131578947369, "grad_norm": 1.8135418891906738, "learning_rate": 0.0001, "loss": 0.0161, "step": 103590 }, { "epoch": 681.578947368421, "grad_norm": 1.511857271194458, "learning_rate": 0.0001, "loss": 0.0152, "step": 103600 }, { "epoch": 681.6447368421053, "grad_norm": 1.9023253917694092, "learning_rate": 0.0001, "loss": 0.0127, "step": 103610 }, { "epoch": 681.7105263157895, "grad_norm": 1.494551658630371, "learning_rate": 0.0001, "loss": 0.0162, "step": 103620 }, { "epoch": 681.7763157894736, "grad_norm": 1.780362606048584, "learning_rate": 0.0001, "loss": 0.0148, "step": 103630 }, { "epoch": 681.8421052631579, "grad_norm": 1.630050778388977, "learning_rate": 0.0001, "loss": 0.0151, "step": 103640 }, { "epoch": 681.9078947368421, "grad_norm": 1.2438414096832275, "learning_rate": 0.0001, "loss": 0.0156, "step": 103650 }, { "epoch": 681.9736842105264, "grad_norm": 1.6547073125839233, "learning_rate": 0.0001, "loss": 0.0147, "step": 103660 }, { "epoch": 682.0394736842105, "grad_norm": 1.2867809534072876, "learning_rate": 0.0001, "loss": 0.0111, "step": 103670 }, { "epoch": 682.1052631578947, "grad_norm": 1.1837660074234009, "learning_rate": 0.0001, "loss": 0.0134, "step": 103680 }, { "epoch": 682.171052631579, "grad_norm": 1.645387053489685, "learning_rate": 0.0001, "loss": 0.0152, "step": 103690 }, { "epoch": 682.2368421052631, "grad_norm": 1.4568959474563599, "learning_rate": 0.0001, "loss": 0.0136, "step": 103700 }, { "epoch": 682.3026315789474, "grad_norm": 1.1122242212295532, "learning_rate": 0.0001, "loss": 0.0159, "step": 103710 }, { "epoch": 682.3684210526316, "grad_norm": 1.514346957206726, "learning_rate": 0.0001, "loss": 0.015, "step": 103720 }, { "epoch": 682.4342105263158, "grad_norm": 1.380468726158142, "learning_rate": 0.0001, "loss": 0.0109, "step": 103730 }, { "epoch": 682.5, "grad_norm": 1.5775866508483887, "learning_rate": 0.0001, "loss": 0.0135, "step": 103740 }, { "epoch": 682.5657894736842, "grad_norm": 1.3132270574569702, "learning_rate": 0.0001, "loss": 0.015, "step": 103750 }, { "epoch": 682.6315789473684, "grad_norm": 1.258286714553833, "learning_rate": 0.0001, "loss": 0.0126, "step": 103760 }, { "epoch": 682.6973684210526, "grad_norm": 1.4886491298675537, "learning_rate": 0.0001, "loss": 0.0145, "step": 103770 }, { "epoch": 682.7631578947369, "grad_norm": 1.5393478870391846, "learning_rate": 0.0001, "loss": 0.0129, "step": 103780 }, { "epoch": 682.828947368421, "grad_norm": 1.2863644361495972, "learning_rate": 0.0001, "loss": 0.0167, "step": 103790 }, { "epoch": 682.8947368421053, "grad_norm": 1.712401270866394, "learning_rate": 0.0001, "loss": 0.0143, "step": 103800 }, { "epoch": 682.9605263157895, "grad_norm": 1.5377167463302612, "learning_rate": 0.0001, "loss": 0.0131, "step": 103810 }, { "epoch": 683.0263157894736, "grad_norm": 1.6634767055511475, "learning_rate": 0.0001, "loss": 0.0129, "step": 103820 }, { "epoch": 683.0921052631579, "grad_norm": 1.2185044288635254, "learning_rate": 0.0001, "loss": 0.013, "step": 103830 }, { "epoch": 683.1578947368421, "grad_norm": 1.4431778192520142, "learning_rate": 0.0001, "loss": 0.0151, "step": 103840 }, { "epoch": 683.2236842105264, "grad_norm": 1.7402610778808594, "learning_rate": 0.0001, "loss": 0.0131, "step": 103850 }, { "epoch": 683.2894736842105, "grad_norm": 1.29876708984375, "learning_rate": 0.0001, "loss": 0.0152, "step": 103860 }, { "epoch": 683.3552631578947, "grad_norm": 1.1407170295715332, "learning_rate": 0.0001, "loss": 0.0152, "step": 103870 }, { "epoch": 683.421052631579, "grad_norm": 1.1023989915847778, "learning_rate": 0.0001, "loss": 0.0109, "step": 103880 }, { "epoch": 683.4868421052631, "grad_norm": 1.2889491319656372, "learning_rate": 0.0001, "loss": 0.0135, "step": 103890 }, { "epoch": 683.5526315789474, "grad_norm": 1.1254931688308716, "learning_rate": 0.0001, "loss": 0.0122, "step": 103900 }, { "epoch": 683.6184210526316, "grad_norm": 1.4954990148544312, "learning_rate": 0.0001, "loss": 0.0191, "step": 103910 }, { "epoch": 683.6842105263158, "grad_norm": 1.683517575263977, "learning_rate": 0.0001, "loss": 0.0146, "step": 103920 }, { "epoch": 683.75, "grad_norm": 1.6155298948287964, "learning_rate": 0.0001, "loss": 0.0102, "step": 103930 }, { "epoch": 683.8157894736842, "grad_norm": 1.9767946004867554, "learning_rate": 0.0001, "loss": 0.0161, "step": 103940 }, { "epoch": 683.8815789473684, "grad_norm": 2.314356565475464, "learning_rate": 0.0001, "loss": 0.0116, "step": 103950 }, { "epoch": 683.9473684210526, "grad_norm": 1.9150553941726685, "learning_rate": 0.0001, "loss": 0.0153, "step": 103960 }, { "epoch": 684.0131578947369, "grad_norm": 1.7755393981933594, "learning_rate": 0.0001, "loss": 0.0135, "step": 103970 }, { "epoch": 684.078947368421, "grad_norm": 1.635217308998108, "learning_rate": 0.0001, "loss": 0.0141, "step": 103980 }, { "epoch": 684.1447368421053, "grad_norm": 1.761998176574707, "learning_rate": 0.0001, "loss": 0.0167, "step": 103990 }, { "epoch": 684.2105263157895, "grad_norm": 1.4627642631530762, "learning_rate": 0.0001, "loss": 0.0174, "step": 104000 }, { "epoch": 684.2763157894736, "grad_norm": 1.0487887859344482, "learning_rate": 0.0001, "loss": 0.0128, "step": 104010 }, { "epoch": 684.3421052631579, "grad_norm": 1.6420763731002808, "learning_rate": 0.0001, "loss": 0.0128, "step": 104020 }, { "epoch": 684.4078947368421, "grad_norm": 1.561342477798462, "learning_rate": 0.0001, "loss": 0.0141, "step": 104030 }, { "epoch": 684.4736842105264, "grad_norm": 1.0517836809158325, "learning_rate": 0.0001, "loss": 0.0119, "step": 104040 }, { "epoch": 684.5394736842105, "grad_norm": 1.6754345893859863, "learning_rate": 0.0001, "loss": 0.0139, "step": 104050 }, { "epoch": 684.6052631578947, "grad_norm": 1.6410045623779297, "learning_rate": 0.0001, "loss": 0.0174, "step": 104060 }, { "epoch": 684.671052631579, "grad_norm": 1.538403034210205, "learning_rate": 0.0001, "loss": 0.0101, "step": 104070 }, { "epoch": 684.7368421052631, "grad_norm": 1.8080343008041382, "learning_rate": 0.0001, "loss": 0.0149, "step": 104080 }, { "epoch": 684.8026315789474, "grad_norm": 1.4107342958450317, "learning_rate": 0.0001, "loss": 0.0149, "step": 104090 }, { "epoch": 684.8684210526316, "grad_norm": 1.8649604320526123, "learning_rate": 0.0001, "loss": 0.0122, "step": 104100 }, { "epoch": 684.9342105263158, "grad_norm": 1.708782434463501, "learning_rate": 0.0001, "loss": 0.014, "step": 104110 }, { "epoch": 685.0, "grad_norm": 1.7015340328216553, "learning_rate": 0.0001, "loss": 0.0102, "step": 104120 }, { "epoch": 685.0657894736842, "grad_norm": 1.630435824394226, "learning_rate": 0.0001, "loss": 0.0155, "step": 104130 }, { "epoch": 685.1315789473684, "grad_norm": 1.4892579317092896, "learning_rate": 0.0001, "loss": 0.0139, "step": 104140 }, { "epoch": 685.1973684210526, "grad_norm": 1.5648330450057983, "learning_rate": 0.0001, "loss": 0.0124, "step": 104150 }, { "epoch": 685.2631578947369, "grad_norm": 1.5096991062164307, "learning_rate": 0.0001, "loss": 0.0158, "step": 104160 }, { "epoch": 685.328947368421, "grad_norm": 1.4772675037384033, "learning_rate": 0.0001, "loss": 0.013, "step": 104170 }, { "epoch": 685.3947368421053, "grad_norm": 1.9310461282730103, "learning_rate": 0.0001, "loss": 0.0122, "step": 104180 }, { "epoch": 685.4605263157895, "grad_norm": 1.7002077102661133, "learning_rate": 0.0001, "loss": 0.0127, "step": 104190 }, { "epoch": 685.5263157894736, "grad_norm": 1.422314167022705, "learning_rate": 0.0001, "loss": 0.0135, "step": 104200 }, { "epoch": 685.5921052631579, "grad_norm": 1.274430513381958, "learning_rate": 0.0001, "loss": 0.0099, "step": 104210 }, { "epoch": 685.6578947368421, "grad_norm": 1.636681079864502, "learning_rate": 0.0001, "loss": 0.0128, "step": 104220 }, { "epoch": 685.7236842105264, "grad_norm": 1.245060682296753, "learning_rate": 0.0001, "loss": 0.0156, "step": 104230 }, { "epoch": 685.7894736842105, "grad_norm": 1.3873101472854614, "learning_rate": 0.0001, "loss": 0.0118, "step": 104240 }, { "epoch": 685.8552631578947, "grad_norm": 1.5433461666107178, "learning_rate": 0.0001, "loss": 0.0145, "step": 104250 }, { "epoch": 685.921052631579, "grad_norm": 1.3378175497055054, "learning_rate": 0.0001, "loss": 0.0175, "step": 104260 }, { "epoch": 685.9868421052631, "grad_norm": 0.9543928503990173, "learning_rate": 0.0001, "loss": 0.0119, "step": 104270 }, { "epoch": 686.0526315789474, "grad_norm": 1.2870662212371826, "learning_rate": 0.0001, "loss": 0.0139, "step": 104280 }, { "epoch": 686.1184210526316, "grad_norm": 1.632830262184143, "learning_rate": 0.0001, "loss": 0.0137, "step": 104290 }, { "epoch": 686.1842105263158, "grad_norm": 0.9918846487998962, "learning_rate": 0.0001, "loss": 0.0109, "step": 104300 }, { "epoch": 686.25, "grad_norm": 1.7410178184509277, "learning_rate": 0.0001, "loss": 0.0119, "step": 104310 }, { "epoch": 686.3157894736842, "grad_norm": 1.4961419105529785, "learning_rate": 0.0001, "loss": 0.0117, "step": 104320 }, { "epoch": 686.3815789473684, "grad_norm": 1.9003850221633911, "learning_rate": 0.0001, "loss": 0.0155, "step": 104330 }, { "epoch": 686.4473684210526, "grad_norm": 1.565114140510559, "learning_rate": 0.0001, "loss": 0.0148, "step": 104340 }, { "epoch": 686.5131578947369, "grad_norm": 1.8248660564422607, "learning_rate": 0.0001, "loss": 0.0171, "step": 104350 }, { "epoch": 686.578947368421, "grad_norm": 2.0825815200805664, "learning_rate": 0.0001, "loss": 0.0143, "step": 104360 }, { "epoch": 686.6447368421053, "grad_norm": 1.4606989622116089, "learning_rate": 0.0001, "loss": 0.0116, "step": 104370 }, { "epoch": 686.7105263157895, "grad_norm": 1.4242645502090454, "learning_rate": 0.0001, "loss": 0.0166, "step": 104380 }, { "epoch": 686.7763157894736, "grad_norm": 1.459264874458313, "learning_rate": 0.0001, "loss": 0.0158, "step": 104390 }, { "epoch": 686.8421052631579, "grad_norm": 1.4932575225830078, "learning_rate": 0.0001, "loss": 0.0145, "step": 104400 }, { "epoch": 686.9078947368421, "grad_norm": 1.0444973707199097, "learning_rate": 0.0001, "loss": 0.0138, "step": 104410 }, { "epoch": 686.9736842105264, "grad_norm": 1.204711675643921, "learning_rate": 0.0001, "loss": 0.0137, "step": 104420 }, { "epoch": 687.0394736842105, "grad_norm": 1.227626919746399, "learning_rate": 0.0001, "loss": 0.0117, "step": 104430 }, { "epoch": 687.1052631578947, "grad_norm": 1.3246458768844604, "learning_rate": 0.0001, "loss": 0.0136, "step": 104440 }, { "epoch": 687.171052631579, "grad_norm": 1.7293860912322998, "learning_rate": 0.0001, "loss": 0.0147, "step": 104450 }, { "epoch": 687.2368421052631, "grad_norm": 1.6109809875488281, "learning_rate": 0.0001, "loss": 0.0128, "step": 104460 }, { "epoch": 687.3026315789474, "grad_norm": 1.7378265857696533, "learning_rate": 0.0001, "loss": 0.014, "step": 104470 }, { "epoch": 687.3684210526316, "grad_norm": 1.7891714572906494, "learning_rate": 0.0001, "loss": 0.0113, "step": 104480 }, { "epoch": 687.4342105263158, "grad_norm": 1.8044416904449463, "learning_rate": 0.0001, "loss": 0.0199, "step": 104490 }, { "epoch": 687.5, "grad_norm": 2.015094041824341, "learning_rate": 0.0001, "loss": 0.0138, "step": 104500 }, { "epoch": 687.5657894736842, "grad_norm": 1.303857445716858, "learning_rate": 0.0001, "loss": 0.0115, "step": 104510 }, { "epoch": 687.6315789473684, "grad_norm": 1.707370638847351, "learning_rate": 0.0001, "loss": 0.0099, "step": 104520 }, { "epoch": 687.6973684210526, "grad_norm": 1.4760279655456543, "learning_rate": 0.0001, "loss": 0.0164, "step": 104530 }, { "epoch": 687.7631578947369, "grad_norm": 1.8262722492218018, "learning_rate": 0.0001, "loss": 0.0158, "step": 104540 }, { "epoch": 687.828947368421, "grad_norm": 1.153454065322876, "learning_rate": 0.0001, "loss": 0.0138, "step": 104550 }, { "epoch": 687.8947368421053, "grad_norm": 1.3070913553237915, "learning_rate": 0.0001, "loss": 0.0144, "step": 104560 }, { "epoch": 687.9605263157895, "grad_norm": 1.3358815908432007, "learning_rate": 0.0001, "loss": 0.0125, "step": 104570 }, { "epoch": 688.0263157894736, "grad_norm": 1.48537015914917, "learning_rate": 0.0001, "loss": 0.0134, "step": 104580 }, { "epoch": 688.0921052631579, "grad_norm": 1.3038129806518555, "learning_rate": 0.0001, "loss": 0.0101, "step": 104590 }, { "epoch": 688.1578947368421, "grad_norm": 1.6573169231414795, "learning_rate": 0.0001, "loss": 0.0166, "step": 104600 }, { "epoch": 688.2236842105264, "grad_norm": 1.7629177570343018, "learning_rate": 0.0001, "loss": 0.0138, "step": 104610 }, { "epoch": 688.2894736842105, "grad_norm": 1.4855196475982666, "learning_rate": 0.0001, "loss": 0.0111, "step": 104620 }, { "epoch": 688.3552631578947, "grad_norm": 1.5531282424926758, "learning_rate": 0.0001, "loss": 0.0145, "step": 104630 }, { "epoch": 688.421052631579, "grad_norm": 0.8338920474052429, "learning_rate": 0.0001, "loss": 0.0207, "step": 104640 }, { "epoch": 688.4868421052631, "grad_norm": 1.1741689443588257, "learning_rate": 0.0001, "loss": 0.0125, "step": 104650 }, { "epoch": 688.5526315789474, "grad_norm": 1.0783884525299072, "learning_rate": 0.0001, "loss": 0.013, "step": 104660 }, { "epoch": 688.6184210526316, "grad_norm": 1.6957943439483643, "learning_rate": 0.0001, "loss": 0.0132, "step": 104670 }, { "epoch": 688.6842105263158, "grad_norm": 1.3917858600616455, "learning_rate": 0.0001, "loss": 0.0161, "step": 104680 }, { "epoch": 688.75, "grad_norm": 2.1980385780334473, "learning_rate": 0.0001, "loss": 0.0134, "step": 104690 }, { "epoch": 688.8157894736842, "grad_norm": 1.5790433883666992, "learning_rate": 0.0001, "loss": 0.0159, "step": 104700 }, { "epoch": 688.8815789473684, "grad_norm": 1.3497258424758911, "learning_rate": 0.0001, "loss": 0.0135, "step": 104710 }, { "epoch": 688.9473684210526, "grad_norm": 1.5016504526138306, "learning_rate": 0.0001, "loss": 0.0151, "step": 104720 }, { "epoch": 689.0131578947369, "grad_norm": 0.9557645916938782, "learning_rate": 0.0001, "loss": 0.0108, "step": 104730 }, { "epoch": 689.078947368421, "grad_norm": 1.9629571437835693, "learning_rate": 0.0001, "loss": 0.0141, "step": 104740 }, { "epoch": 689.1447368421053, "grad_norm": 1.559274673461914, "learning_rate": 0.0001, "loss": 0.0114, "step": 104750 }, { "epoch": 689.2105263157895, "grad_norm": 1.2656328678131104, "learning_rate": 0.0001, "loss": 0.0121, "step": 104760 }, { "epoch": 689.2763157894736, "grad_norm": 1.18551504611969, "learning_rate": 0.0001, "loss": 0.0162, "step": 104770 }, { "epoch": 689.3421052631579, "grad_norm": 1.5061768293380737, "learning_rate": 0.0001, "loss": 0.0182, "step": 104780 }, { "epoch": 689.4078947368421, "grad_norm": 1.1907559633255005, "learning_rate": 0.0001, "loss": 0.015, "step": 104790 }, { "epoch": 689.4736842105264, "grad_norm": 1.700225830078125, "learning_rate": 0.0001, "loss": 0.0133, "step": 104800 }, { "epoch": 689.5394736842105, "grad_norm": 1.5362460613250732, "learning_rate": 0.0001, "loss": 0.0135, "step": 104810 }, { "epoch": 689.6052631578947, "grad_norm": 1.8739700317382812, "learning_rate": 0.0001, "loss": 0.0188, "step": 104820 }, { "epoch": 689.671052631579, "grad_norm": 1.440048098564148, "learning_rate": 0.0001, "loss": 0.0134, "step": 104830 }, { "epoch": 689.7368421052631, "grad_norm": 1.256736159324646, "learning_rate": 0.0001, "loss": 0.0149, "step": 104840 }, { "epoch": 689.8026315789474, "grad_norm": 1.2653160095214844, "learning_rate": 0.0001, "loss": 0.0117, "step": 104850 }, { "epoch": 689.8684210526316, "grad_norm": 1.6488842964172363, "learning_rate": 0.0001, "loss": 0.0101, "step": 104860 }, { "epoch": 689.9342105263158, "grad_norm": 1.4516634941101074, "learning_rate": 0.0001, "loss": 0.0151, "step": 104870 }, { "epoch": 690.0, "grad_norm": 1.3153610229492188, "learning_rate": 0.0001, "loss": 0.0137, "step": 104880 }, { "epoch": 690.0657894736842, "grad_norm": 1.4556366205215454, "learning_rate": 0.0001, "loss": 0.011, "step": 104890 }, { "epoch": 690.1315789473684, "grad_norm": 1.3718081712722778, "learning_rate": 0.0001, "loss": 0.0148, "step": 104900 }, { "epoch": 690.1973684210526, "grad_norm": 1.4221093654632568, "learning_rate": 0.0001, "loss": 0.017, "step": 104910 }, { "epoch": 690.2631578947369, "grad_norm": 1.1901012659072876, "learning_rate": 0.0001, "loss": 0.0138, "step": 104920 }, { "epoch": 690.328947368421, "grad_norm": 1.0860316753387451, "learning_rate": 0.0001, "loss": 0.0103, "step": 104930 }, { "epoch": 690.3947368421053, "grad_norm": 1.289321780204773, "learning_rate": 0.0001, "loss": 0.0132, "step": 104940 }, { "epoch": 690.4605263157895, "grad_norm": 1.1440311670303345, "learning_rate": 0.0001, "loss": 0.0127, "step": 104950 }, { "epoch": 690.5263157894736, "grad_norm": 1.607438325881958, "learning_rate": 0.0001, "loss": 0.0135, "step": 104960 }, { "epoch": 690.5921052631579, "grad_norm": 1.4826351404190063, "learning_rate": 0.0001, "loss": 0.0143, "step": 104970 }, { "epoch": 690.6578947368421, "grad_norm": 1.1041948795318604, "learning_rate": 0.0001, "loss": 0.0153, "step": 104980 }, { "epoch": 690.7236842105264, "grad_norm": 1.2917048931121826, "learning_rate": 0.0001, "loss": 0.0167, "step": 104990 }, { "epoch": 690.7894736842105, "grad_norm": 1.6774380207061768, "learning_rate": 0.0001, "loss": 0.0126, "step": 105000 }, { "epoch": 690.8552631578947, "grad_norm": 1.9800984859466553, "learning_rate": 0.0001, "loss": 0.0157, "step": 105010 }, { "epoch": 690.921052631579, "grad_norm": 1.535022497177124, "learning_rate": 0.0001, "loss": 0.0127, "step": 105020 }, { "epoch": 690.9868421052631, "grad_norm": 1.6030515432357788, "learning_rate": 0.0001, "loss": 0.0178, "step": 105030 }, { "epoch": 691.0526315789474, "grad_norm": 1.4422091245651245, "learning_rate": 0.0001, "loss": 0.0131, "step": 105040 }, { "epoch": 691.1184210526316, "grad_norm": 1.7398788928985596, "learning_rate": 0.0001, "loss": 0.0145, "step": 105050 }, { "epoch": 691.1842105263158, "grad_norm": 1.596239447593689, "learning_rate": 0.0001, "loss": 0.0181, "step": 105060 }, { "epoch": 691.25, "grad_norm": 1.0196901559829712, "learning_rate": 0.0001, "loss": 0.0144, "step": 105070 }, { "epoch": 691.3157894736842, "grad_norm": 1.520843744277954, "learning_rate": 0.0001, "loss": 0.0131, "step": 105080 }, { "epoch": 691.3815789473684, "grad_norm": 1.6769917011260986, "learning_rate": 0.0001, "loss": 0.0161, "step": 105090 }, { "epoch": 691.4473684210526, "grad_norm": 1.4156259298324585, "learning_rate": 0.0001, "loss": 0.0125, "step": 105100 }, { "epoch": 691.5131578947369, "grad_norm": 1.4868278503417969, "learning_rate": 0.0001, "loss": 0.0123, "step": 105110 }, { "epoch": 691.578947368421, "grad_norm": 1.561286211013794, "learning_rate": 0.0001, "loss": 0.0155, "step": 105120 }, { "epoch": 691.6447368421053, "grad_norm": 1.9852877855300903, "learning_rate": 0.0001, "loss": 0.013, "step": 105130 }, { "epoch": 691.7105263157895, "grad_norm": 1.8260021209716797, "learning_rate": 0.0001, "loss": 0.0134, "step": 105140 }, { "epoch": 691.7763157894736, "grad_norm": 1.6518685817718506, "learning_rate": 0.0001, "loss": 0.0116, "step": 105150 }, { "epoch": 691.8421052631579, "grad_norm": 1.4210432767868042, "learning_rate": 0.0001, "loss": 0.0135, "step": 105160 }, { "epoch": 691.9078947368421, "grad_norm": 1.5000817775726318, "learning_rate": 0.0001, "loss": 0.012, "step": 105170 }, { "epoch": 691.9736842105264, "grad_norm": 1.4802765846252441, "learning_rate": 0.0001, "loss": 0.0184, "step": 105180 }, { "epoch": 692.0394736842105, "grad_norm": 1.3716832399368286, "learning_rate": 0.0001, "loss": 0.0115, "step": 105190 }, { "epoch": 692.1052631578947, "grad_norm": 1.489518642425537, "learning_rate": 0.0001, "loss": 0.0175, "step": 105200 }, { "epoch": 692.171052631579, "grad_norm": 1.2909327745437622, "learning_rate": 0.0001, "loss": 0.0142, "step": 105210 }, { "epoch": 692.2368421052631, "grad_norm": 1.513528823852539, "learning_rate": 0.0001, "loss": 0.0142, "step": 105220 }, { "epoch": 692.3026315789474, "grad_norm": 1.3307676315307617, "learning_rate": 0.0001, "loss": 0.0155, "step": 105230 }, { "epoch": 692.3684210526316, "grad_norm": 1.2393461465835571, "learning_rate": 0.0001, "loss": 0.0152, "step": 105240 }, { "epoch": 692.4342105263158, "grad_norm": 1.736340880393982, "learning_rate": 0.0001, "loss": 0.0117, "step": 105250 }, { "epoch": 692.5, "grad_norm": 1.8451350927352905, "learning_rate": 0.0001, "loss": 0.0143, "step": 105260 }, { "epoch": 692.5657894736842, "grad_norm": 1.642224907875061, "learning_rate": 0.0001, "loss": 0.0112, "step": 105270 }, { "epoch": 692.6315789473684, "grad_norm": 2.156294345855713, "learning_rate": 0.0001, "loss": 0.0133, "step": 105280 }, { "epoch": 692.6973684210526, "grad_norm": 1.7240437269210815, "learning_rate": 0.0001, "loss": 0.0166, "step": 105290 }, { "epoch": 692.7631578947369, "grad_norm": 1.5744749307632446, "learning_rate": 0.0001, "loss": 0.0163, "step": 105300 }, { "epoch": 692.828947368421, "grad_norm": 1.262880563735962, "learning_rate": 0.0001, "loss": 0.0111, "step": 105310 }, { "epoch": 692.8947368421053, "grad_norm": 1.5260062217712402, "learning_rate": 0.0001, "loss": 0.012, "step": 105320 }, { "epoch": 692.9605263157895, "grad_norm": 1.7391053438186646, "learning_rate": 0.0001, "loss": 0.0109, "step": 105330 }, { "epoch": 693.0263157894736, "grad_norm": 1.3582191467285156, "learning_rate": 0.0001, "loss": 0.014, "step": 105340 }, { "epoch": 693.0921052631579, "grad_norm": 1.7476472854614258, "learning_rate": 0.0001, "loss": 0.0179, "step": 105350 }, { "epoch": 693.1578947368421, "grad_norm": 1.373409390449524, "learning_rate": 0.0001, "loss": 0.0153, "step": 105360 }, { "epoch": 693.2236842105264, "grad_norm": 1.3609652519226074, "learning_rate": 0.0001, "loss": 0.0142, "step": 105370 }, { "epoch": 693.2894736842105, "grad_norm": 1.3780475854873657, "learning_rate": 0.0001, "loss": 0.0112, "step": 105380 }, { "epoch": 693.3552631578947, "grad_norm": 1.4695158004760742, "learning_rate": 0.0001, "loss": 0.0135, "step": 105390 }, { "epoch": 693.421052631579, "grad_norm": 1.0664074420928955, "learning_rate": 0.0001, "loss": 0.0121, "step": 105400 }, { "epoch": 693.4868421052631, "grad_norm": 1.6935545206069946, "learning_rate": 0.0001, "loss": 0.0134, "step": 105410 }, { "epoch": 693.5526315789474, "grad_norm": 1.5434010028839111, "learning_rate": 0.0001, "loss": 0.0174, "step": 105420 }, { "epoch": 693.6184210526316, "grad_norm": 1.6404600143432617, "learning_rate": 0.0001, "loss": 0.0155, "step": 105430 }, { "epoch": 693.6842105263158, "grad_norm": 1.4984164237976074, "learning_rate": 0.0001, "loss": 0.0118, "step": 105440 }, { "epoch": 693.75, "grad_norm": 1.5167447328567505, "learning_rate": 0.0001, "loss": 0.0129, "step": 105450 }, { "epoch": 693.8157894736842, "grad_norm": 1.0867475271224976, "learning_rate": 0.0001, "loss": 0.0118, "step": 105460 }, { "epoch": 693.8815789473684, "grad_norm": 1.4027730226516724, "learning_rate": 0.0001, "loss": 0.0118, "step": 105470 }, { "epoch": 693.9473684210526, "grad_norm": 1.5792341232299805, "learning_rate": 0.0001, "loss": 0.0135, "step": 105480 }, { "epoch": 694.0131578947369, "grad_norm": 1.6597784757614136, "learning_rate": 0.0001, "loss": 0.0165, "step": 105490 }, { "epoch": 694.078947368421, "grad_norm": 1.677793264389038, "learning_rate": 0.0001, "loss": 0.0134, "step": 105500 }, { "epoch": 694.1447368421053, "grad_norm": 1.7908496856689453, "learning_rate": 0.0001, "loss": 0.0136, "step": 105510 }, { "epoch": 694.2105263157895, "grad_norm": 1.9147403240203857, "learning_rate": 0.0001, "loss": 0.0142, "step": 105520 }, { "epoch": 694.2763157894736, "grad_norm": 2.1271610260009766, "learning_rate": 0.0001, "loss": 0.0118, "step": 105530 }, { "epoch": 694.3421052631579, "grad_norm": 2.009016275405884, "learning_rate": 0.0001, "loss": 0.0131, "step": 105540 }, { "epoch": 694.4078947368421, "grad_norm": 1.7878384590148926, "learning_rate": 0.0001, "loss": 0.0116, "step": 105550 }, { "epoch": 694.4736842105264, "grad_norm": 1.8793010711669922, "learning_rate": 0.0001, "loss": 0.0115, "step": 105560 }, { "epoch": 694.5394736842105, "grad_norm": 1.5323548316955566, "learning_rate": 0.0001, "loss": 0.0103, "step": 105570 }, { "epoch": 694.6052631578947, "grad_norm": 1.1389228105545044, "learning_rate": 0.0001, "loss": 0.0157, "step": 105580 }, { "epoch": 694.671052631579, "grad_norm": 1.1261117458343506, "learning_rate": 0.0001, "loss": 0.0144, "step": 105590 }, { "epoch": 694.7368421052631, "grad_norm": 1.6269359588623047, "learning_rate": 0.0001, "loss": 0.0138, "step": 105600 }, { "epoch": 694.8026315789474, "grad_norm": 1.9936103820800781, "learning_rate": 0.0001, "loss": 0.0141, "step": 105610 }, { "epoch": 694.8684210526316, "grad_norm": 1.6820183992385864, "learning_rate": 0.0001, "loss": 0.0149, "step": 105620 }, { "epoch": 694.9342105263158, "grad_norm": 1.8930211067199707, "learning_rate": 0.0001, "loss": 0.0147, "step": 105630 }, { "epoch": 695.0, "grad_norm": 1.4064968824386597, "learning_rate": 0.0001, "loss": 0.0153, "step": 105640 }, { "epoch": 695.0657894736842, "grad_norm": 1.2035597562789917, "learning_rate": 0.0001, "loss": 0.0133, "step": 105650 }, { "epoch": 695.1315789473684, "grad_norm": 1.5009838342666626, "learning_rate": 0.0001, "loss": 0.0101, "step": 105660 }, { "epoch": 695.1973684210526, "grad_norm": 1.6444599628448486, "learning_rate": 0.0001, "loss": 0.0164, "step": 105670 }, { "epoch": 695.2631578947369, "grad_norm": 1.6600236892700195, "learning_rate": 0.0001, "loss": 0.013, "step": 105680 }, { "epoch": 695.328947368421, "grad_norm": 1.5427777767181396, "learning_rate": 0.0001, "loss": 0.0116, "step": 105690 }, { "epoch": 695.3947368421053, "grad_norm": 1.7223405838012695, "learning_rate": 0.0001, "loss": 0.0173, "step": 105700 }, { "epoch": 695.4605263157895, "grad_norm": 1.8167531490325928, "learning_rate": 0.0001, "loss": 0.016, "step": 105710 }, { "epoch": 695.5263157894736, "grad_norm": 1.5147333145141602, "learning_rate": 0.0001, "loss": 0.0129, "step": 105720 }, { "epoch": 695.5921052631579, "grad_norm": 1.2261242866516113, "learning_rate": 0.0001, "loss": 0.0141, "step": 105730 }, { "epoch": 695.6578947368421, "grad_norm": 1.373762845993042, "learning_rate": 0.0001, "loss": 0.0138, "step": 105740 }, { "epoch": 695.7236842105264, "grad_norm": 1.2201690673828125, "learning_rate": 0.0001, "loss": 0.0169, "step": 105750 }, { "epoch": 695.7894736842105, "grad_norm": 1.767767310142517, "learning_rate": 0.0001, "loss": 0.0102, "step": 105760 }, { "epoch": 695.8552631578947, "grad_norm": 1.4776166677474976, "learning_rate": 0.0001, "loss": 0.0166, "step": 105770 }, { "epoch": 695.921052631579, "grad_norm": 1.4815547466278076, "learning_rate": 0.0001, "loss": 0.0119, "step": 105780 }, { "epoch": 695.9868421052631, "grad_norm": 1.4975911378860474, "learning_rate": 0.0001, "loss": 0.0107, "step": 105790 }, { "epoch": 696.0526315789474, "grad_norm": 1.1520529985427856, "learning_rate": 0.0001, "loss": 0.0139, "step": 105800 }, { "epoch": 696.1184210526316, "grad_norm": 1.9951999187469482, "learning_rate": 0.0001, "loss": 0.0127, "step": 105810 }, { "epoch": 696.1842105263158, "grad_norm": 1.4803284406661987, "learning_rate": 0.0001, "loss": 0.0123, "step": 105820 }, { "epoch": 696.25, "grad_norm": 1.4274041652679443, "learning_rate": 0.0001, "loss": 0.012, "step": 105830 }, { "epoch": 696.3157894736842, "grad_norm": 1.4053592681884766, "learning_rate": 0.0001, "loss": 0.0112, "step": 105840 }, { "epoch": 696.3815789473684, "grad_norm": 2.0110902786254883, "learning_rate": 0.0001, "loss": 0.0135, "step": 105850 }, { "epoch": 696.4473684210526, "grad_norm": 1.1645880937576294, "learning_rate": 0.0001, "loss": 0.0127, "step": 105860 }, { "epoch": 696.5131578947369, "grad_norm": 1.3378782272338867, "learning_rate": 0.0001, "loss": 0.0143, "step": 105870 }, { "epoch": 696.578947368421, "grad_norm": 1.4294095039367676, "learning_rate": 0.0001, "loss": 0.0116, "step": 105880 }, { "epoch": 696.6447368421053, "grad_norm": 1.6448734998703003, "learning_rate": 0.0001, "loss": 0.0161, "step": 105890 }, { "epoch": 696.7105263157895, "grad_norm": 1.451838493347168, "learning_rate": 0.0001, "loss": 0.0137, "step": 105900 }, { "epoch": 696.7763157894736, "grad_norm": 1.5376343727111816, "learning_rate": 0.0001, "loss": 0.0137, "step": 105910 }, { "epoch": 696.8421052631579, "grad_norm": 1.330016016960144, "learning_rate": 0.0001, "loss": 0.0157, "step": 105920 }, { "epoch": 696.9078947368421, "grad_norm": 1.2374457120895386, "learning_rate": 0.0001, "loss": 0.0175, "step": 105930 }, { "epoch": 696.9736842105264, "grad_norm": 1.7156187295913696, "learning_rate": 0.0001, "loss": 0.0159, "step": 105940 }, { "epoch": 697.0394736842105, "grad_norm": 1.1749367713928223, "learning_rate": 0.0001, "loss": 0.0151, "step": 105950 }, { "epoch": 697.1052631578947, "grad_norm": 1.3962154388427734, "learning_rate": 0.0001, "loss": 0.011, "step": 105960 }, { "epoch": 697.171052631579, "grad_norm": 1.2537871599197388, "learning_rate": 0.0001, "loss": 0.0184, "step": 105970 }, { "epoch": 697.2368421052631, "grad_norm": 1.38993501663208, "learning_rate": 0.0001, "loss": 0.0118, "step": 105980 }, { "epoch": 697.3026315789474, "grad_norm": 1.7826313972473145, "learning_rate": 0.0001, "loss": 0.0153, "step": 105990 }, { "epoch": 697.3684210526316, "grad_norm": 1.9075065851211548, "learning_rate": 0.0001, "loss": 0.0149, "step": 106000 }, { "epoch": 697.4342105263158, "grad_norm": 1.6238073110580444, "learning_rate": 0.0001, "loss": 0.0146, "step": 106010 }, { "epoch": 697.5, "grad_norm": 1.6921645402908325, "learning_rate": 0.0001, "loss": 0.0154, "step": 106020 }, { "epoch": 697.5657894736842, "grad_norm": 1.317937970161438, "learning_rate": 0.0001, "loss": 0.0154, "step": 106030 }, { "epoch": 697.6315789473684, "grad_norm": 1.7139893770217896, "learning_rate": 0.0001, "loss": 0.0112, "step": 106040 }, { "epoch": 697.6973684210526, "grad_norm": 1.657868504524231, "learning_rate": 0.0001, "loss": 0.0107, "step": 106050 }, { "epoch": 697.7631578947369, "grad_norm": 1.7644935846328735, "learning_rate": 0.0001, "loss": 0.0158, "step": 106060 }, { "epoch": 697.828947368421, "grad_norm": 1.6037921905517578, "learning_rate": 0.0001, "loss": 0.0154, "step": 106070 }, { "epoch": 697.8947368421053, "grad_norm": 1.345159888267517, "learning_rate": 0.0001, "loss": 0.0127, "step": 106080 }, { "epoch": 697.9605263157895, "grad_norm": 1.581958532333374, "learning_rate": 0.0001, "loss": 0.0142, "step": 106090 }, { "epoch": 698.0263157894736, "grad_norm": 1.1743396520614624, "learning_rate": 0.0001, "loss": 0.0125, "step": 106100 }, { "epoch": 698.0921052631579, "grad_norm": 1.28936767578125, "learning_rate": 0.0001, "loss": 0.0132, "step": 106110 }, { "epoch": 698.1578947368421, "grad_norm": 1.9903117418289185, "learning_rate": 0.0001, "loss": 0.0136, "step": 106120 }, { "epoch": 698.2236842105264, "grad_norm": 1.9814826250076294, "learning_rate": 0.0001, "loss": 0.0143, "step": 106130 }, { "epoch": 698.2894736842105, "grad_norm": 1.4553810358047485, "learning_rate": 0.0001, "loss": 0.0156, "step": 106140 }, { "epoch": 698.3552631578947, "grad_norm": 1.362070083618164, "learning_rate": 0.0001, "loss": 0.0167, "step": 106150 }, { "epoch": 698.421052631579, "grad_norm": 1.4876432418823242, "learning_rate": 0.0001, "loss": 0.014, "step": 106160 }, { "epoch": 698.4868421052631, "grad_norm": 1.2447222471237183, "learning_rate": 0.0001, "loss": 0.0153, "step": 106170 }, { "epoch": 698.5526315789474, "grad_norm": 1.4740928411483765, "learning_rate": 0.0001, "loss": 0.0112, "step": 106180 }, { "epoch": 698.6184210526316, "grad_norm": 1.242175579071045, "learning_rate": 0.0001, "loss": 0.0182, "step": 106190 }, { "epoch": 698.6842105263158, "grad_norm": 1.6142164468765259, "learning_rate": 0.0001, "loss": 0.0118, "step": 106200 }, { "epoch": 698.75, "grad_norm": 1.6853488683700562, "learning_rate": 0.0001, "loss": 0.0124, "step": 106210 }, { "epoch": 698.8157894736842, "grad_norm": 1.7837543487548828, "learning_rate": 0.0001, "loss": 0.0126, "step": 106220 }, { "epoch": 698.8815789473684, "grad_norm": 1.8396568298339844, "learning_rate": 0.0001, "loss": 0.0132, "step": 106230 }, { "epoch": 698.9473684210526, "grad_norm": 0.9499621987342834, "learning_rate": 0.0001, "loss": 0.0147, "step": 106240 }, { "epoch": 699.0131578947369, "grad_norm": 1.4310238361358643, "learning_rate": 0.0001, "loss": 0.0129, "step": 106250 }, { "epoch": 699.078947368421, "grad_norm": 1.6611106395721436, "learning_rate": 0.0001, "loss": 0.0155, "step": 106260 }, { "epoch": 699.1447368421053, "grad_norm": 1.7867568731307983, "learning_rate": 0.0001, "loss": 0.016, "step": 106270 }, { "epoch": 699.2105263157895, "grad_norm": 1.5653812885284424, "learning_rate": 0.0001, "loss": 0.0151, "step": 106280 }, { "epoch": 699.2763157894736, "grad_norm": 1.322019100189209, "learning_rate": 0.0001, "loss": 0.0135, "step": 106290 }, { "epoch": 699.3421052631579, "grad_norm": 1.467756748199463, "learning_rate": 0.0001, "loss": 0.0169, "step": 106300 }, { "epoch": 699.4078947368421, "grad_norm": 1.5660290718078613, "learning_rate": 0.0001, "loss": 0.0129, "step": 106310 }, { "epoch": 699.4736842105264, "grad_norm": 1.51334810256958, "learning_rate": 0.0001, "loss": 0.0124, "step": 106320 }, { "epoch": 699.5394736842105, "grad_norm": 1.131628155708313, "learning_rate": 0.0001, "loss": 0.0128, "step": 106330 }, { "epoch": 699.6052631578947, "grad_norm": 1.0294538736343384, "learning_rate": 0.0001, "loss": 0.013, "step": 106340 }, { "epoch": 699.671052631579, "grad_norm": 1.4367408752441406, "learning_rate": 0.0001, "loss": 0.0158, "step": 106350 }, { "epoch": 699.7368421052631, "grad_norm": 1.77603018283844, "learning_rate": 0.0001, "loss": 0.0124, "step": 106360 }, { "epoch": 699.8026315789474, "grad_norm": 0.9969038367271423, "learning_rate": 0.0001, "loss": 0.0179, "step": 106370 }, { "epoch": 699.8684210526316, "grad_norm": 1.3702187538146973, "learning_rate": 0.0001, "loss": 0.0155, "step": 106380 }, { "epoch": 699.9342105263158, "grad_norm": 1.4307650327682495, "learning_rate": 0.0001, "loss": 0.0113, "step": 106390 }, { "epoch": 700.0, "grad_norm": 1.635344386100769, "learning_rate": 0.0001, "loss": 0.0127, "step": 106400 }, { "epoch": 700.0657894736842, "grad_norm": 1.8712533712387085, "learning_rate": 0.0001, "loss": 0.0129, "step": 106410 }, { "epoch": 700.1315789473684, "grad_norm": 1.9371920824050903, "learning_rate": 0.0001, "loss": 0.0141, "step": 106420 }, { "epoch": 700.1973684210526, "grad_norm": 1.1480993032455444, "learning_rate": 0.0001, "loss": 0.0121, "step": 106430 }, { "epoch": 700.2631578947369, "grad_norm": 1.0761892795562744, "learning_rate": 0.0001, "loss": 0.013, "step": 106440 }, { "epoch": 700.328947368421, "grad_norm": 1.5964354276657104, "learning_rate": 0.0001, "loss": 0.0176, "step": 106450 }, { "epoch": 700.3947368421053, "grad_norm": 1.4708530902862549, "learning_rate": 0.0001, "loss": 0.0168, "step": 106460 }, { "epoch": 700.4605263157895, "grad_norm": 1.3934862613677979, "learning_rate": 0.0001, "loss": 0.0185, "step": 106470 }, { "epoch": 700.5263157894736, "grad_norm": 1.6588797569274902, "learning_rate": 0.0001, "loss": 0.0117, "step": 106480 }, { "epoch": 700.5921052631579, "grad_norm": 1.5327767133712769, "learning_rate": 0.0001, "loss": 0.0148, "step": 106490 }, { "epoch": 700.6578947368421, "grad_norm": 1.6738446950912476, "learning_rate": 0.0001, "loss": 0.013, "step": 106500 }, { "epoch": 700.7236842105264, "grad_norm": 1.679795503616333, "learning_rate": 0.0001, "loss": 0.0102, "step": 106510 }, { "epoch": 700.7894736842105, "grad_norm": 1.6828978061676025, "learning_rate": 0.0001, "loss": 0.0168, "step": 106520 }, { "epoch": 700.8552631578947, "grad_norm": 1.5495734214782715, "learning_rate": 0.0001, "loss": 0.0104, "step": 106530 }, { "epoch": 700.921052631579, "grad_norm": 1.4483420848846436, "learning_rate": 0.0001, "loss": 0.0126, "step": 106540 }, { "epoch": 700.9868421052631, "grad_norm": 1.3016589879989624, "learning_rate": 0.0001, "loss": 0.0161, "step": 106550 }, { "epoch": 701.0526315789474, "grad_norm": 1.5713452100753784, "learning_rate": 0.0001, "loss": 0.0111, "step": 106560 }, { "epoch": 701.1184210526316, "grad_norm": 1.726638913154602, "learning_rate": 0.0001, "loss": 0.0107, "step": 106570 }, { "epoch": 701.1842105263158, "grad_norm": 1.2283722162246704, "learning_rate": 0.0001, "loss": 0.0118, "step": 106580 }, { "epoch": 701.25, "grad_norm": 1.0022811889648438, "learning_rate": 0.0001, "loss": 0.0137, "step": 106590 }, { "epoch": 701.3157894736842, "grad_norm": 1.2638459205627441, "learning_rate": 0.0001, "loss": 0.0161, "step": 106600 }, { "epoch": 701.3815789473684, "grad_norm": 1.521647334098816, "learning_rate": 0.0001, "loss": 0.0164, "step": 106610 }, { "epoch": 701.4473684210526, "grad_norm": 1.442168116569519, "learning_rate": 0.0001, "loss": 0.0127, "step": 106620 }, { "epoch": 701.5131578947369, "grad_norm": 1.4835644960403442, "learning_rate": 0.0001, "loss": 0.0173, "step": 106630 }, { "epoch": 701.578947368421, "grad_norm": 1.258491039276123, "learning_rate": 0.0001, "loss": 0.0103, "step": 106640 }, { "epoch": 701.6447368421053, "grad_norm": 1.4470549821853638, "learning_rate": 0.0001, "loss": 0.015, "step": 106650 }, { "epoch": 701.7105263157895, "grad_norm": 1.4615473747253418, "learning_rate": 0.0001, "loss": 0.0151, "step": 106660 }, { "epoch": 701.7763157894736, "grad_norm": 1.439660668373108, "learning_rate": 0.0001, "loss": 0.0152, "step": 106670 }, { "epoch": 701.8421052631579, "grad_norm": 1.5950732231140137, "learning_rate": 0.0001, "loss": 0.0181, "step": 106680 }, { "epoch": 701.9078947368421, "grad_norm": 1.2800017595291138, "learning_rate": 0.0001, "loss": 0.0127, "step": 106690 }, { "epoch": 701.9736842105264, "grad_norm": 1.5577884912490845, "learning_rate": 0.0001, "loss": 0.0114, "step": 106700 }, { "epoch": 702.0394736842105, "grad_norm": 1.1724392175674438, "learning_rate": 0.0001, "loss": 0.0157, "step": 106710 }, { "epoch": 702.1052631578947, "grad_norm": 1.8160444498062134, "learning_rate": 0.0001, "loss": 0.0135, "step": 106720 }, { "epoch": 702.171052631579, "grad_norm": 1.3118189573287964, "learning_rate": 0.0001, "loss": 0.012, "step": 106730 }, { "epoch": 702.2368421052631, "grad_norm": 1.3918579816818237, "learning_rate": 0.0001, "loss": 0.0159, "step": 106740 }, { "epoch": 702.3026315789474, "grad_norm": 1.2483983039855957, "learning_rate": 0.0001, "loss": 0.0125, "step": 106750 }, { "epoch": 702.3684210526316, "grad_norm": 1.8395237922668457, "learning_rate": 0.0001, "loss": 0.0144, "step": 106760 }, { "epoch": 702.4342105263158, "grad_norm": 2.016268730163574, "learning_rate": 0.0001, "loss": 0.0165, "step": 106770 }, { "epoch": 702.5, "grad_norm": 1.6499770879745483, "learning_rate": 0.0001, "loss": 0.0126, "step": 106780 }, { "epoch": 702.5657894736842, "grad_norm": 1.6584314107894897, "learning_rate": 0.0001, "loss": 0.0133, "step": 106790 }, { "epoch": 702.6315789473684, "grad_norm": 1.4874799251556396, "learning_rate": 0.0001, "loss": 0.0144, "step": 106800 }, { "epoch": 702.6973684210526, "grad_norm": 1.3213993310928345, "learning_rate": 0.0001, "loss": 0.0125, "step": 106810 }, { "epoch": 702.7631578947369, "grad_norm": 1.6460480690002441, "learning_rate": 0.0001, "loss": 0.0099, "step": 106820 }, { "epoch": 702.828947368421, "grad_norm": 1.8228288888931274, "learning_rate": 0.0001, "loss": 0.0132, "step": 106830 }, { "epoch": 702.8947368421053, "grad_norm": 1.0710926055908203, "learning_rate": 0.0001, "loss": 0.0164, "step": 106840 }, { "epoch": 702.9605263157895, "grad_norm": 1.3520493507385254, "learning_rate": 0.0001, "loss": 0.0118, "step": 106850 }, { "epoch": 703.0263157894736, "grad_norm": 1.630853295326233, "learning_rate": 0.0001, "loss": 0.0158, "step": 106860 }, { "epoch": 703.0921052631579, "grad_norm": 1.4836199283599854, "learning_rate": 0.0001, "loss": 0.0105, "step": 106870 }, { "epoch": 703.1578947368421, "grad_norm": 1.2020355463027954, "learning_rate": 0.0001, "loss": 0.0133, "step": 106880 }, { "epoch": 703.2236842105264, "grad_norm": 1.3237829208374023, "learning_rate": 0.0001, "loss": 0.0121, "step": 106890 }, { "epoch": 703.2894736842105, "grad_norm": 1.1903727054595947, "learning_rate": 0.0001, "loss": 0.0134, "step": 106900 }, { "epoch": 703.3552631578947, "grad_norm": 1.214144229888916, "learning_rate": 0.0001, "loss": 0.0119, "step": 106910 }, { "epoch": 703.421052631579, "grad_norm": 1.4143731594085693, "learning_rate": 0.0001, "loss": 0.0111, "step": 106920 }, { "epoch": 703.4868421052631, "grad_norm": 1.4160983562469482, "learning_rate": 0.0001, "loss": 0.0147, "step": 106930 }, { "epoch": 703.5526315789474, "grad_norm": 1.4348835945129395, "learning_rate": 0.0001, "loss": 0.0162, "step": 106940 }, { "epoch": 703.6184210526316, "grad_norm": 1.168415904045105, "learning_rate": 0.0001, "loss": 0.0173, "step": 106950 }, { "epoch": 703.6842105263158, "grad_norm": 1.1984505653381348, "learning_rate": 0.0001, "loss": 0.0126, "step": 106960 }, { "epoch": 703.75, "grad_norm": 1.3488589525222778, "learning_rate": 0.0001, "loss": 0.0152, "step": 106970 }, { "epoch": 703.8157894736842, "grad_norm": 1.7160837650299072, "learning_rate": 0.0001, "loss": 0.0141, "step": 106980 }, { "epoch": 703.8815789473684, "grad_norm": 1.3541070222854614, "learning_rate": 0.0001, "loss": 0.0173, "step": 106990 }, { "epoch": 703.9473684210526, "grad_norm": 1.913190245628357, "learning_rate": 0.0001, "loss": 0.0136, "step": 107000 }, { "epoch": 704.0131578947369, "grad_norm": 1.462004542350769, "learning_rate": 0.0001, "loss": 0.0151, "step": 107010 }, { "epoch": 704.078947368421, "grad_norm": 1.433447003364563, "learning_rate": 0.0001, "loss": 0.0153, "step": 107020 }, { "epoch": 704.1447368421053, "grad_norm": 1.641514778137207, "learning_rate": 0.0001, "loss": 0.0123, "step": 107030 }, { "epoch": 704.2105263157895, "grad_norm": 1.3894199132919312, "learning_rate": 0.0001, "loss": 0.0146, "step": 107040 }, { "epoch": 704.2763157894736, "grad_norm": 1.5374521017074585, "learning_rate": 0.0001, "loss": 0.0138, "step": 107050 }, { "epoch": 704.3421052631579, "grad_norm": 1.3678863048553467, "learning_rate": 0.0001, "loss": 0.0119, "step": 107060 }, { "epoch": 704.4078947368421, "grad_norm": 1.504101276397705, "learning_rate": 0.0001, "loss": 0.0154, "step": 107070 }, { "epoch": 704.4736842105264, "grad_norm": 1.7663174867630005, "learning_rate": 0.0001, "loss": 0.0134, "step": 107080 }, { "epoch": 704.5394736842105, "grad_norm": 1.683093547821045, "learning_rate": 0.0001, "loss": 0.0141, "step": 107090 }, { "epoch": 704.6052631578947, "grad_norm": 1.2510346174240112, "learning_rate": 0.0001, "loss": 0.0121, "step": 107100 }, { "epoch": 704.671052631579, "grad_norm": 1.3508044481277466, "learning_rate": 0.0001, "loss": 0.0136, "step": 107110 }, { "epoch": 704.7368421052631, "grad_norm": 1.8839439153671265, "learning_rate": 0.0001, "loss": 0.0129, "step": 107120 }, { "epoch": 704.8026315789474, "grad_norm": 1.9177449941635132, "learning_rate": 0.0001, "loss": 0.0173, "step": 107130 }, { "epoch": 704.8684210526316, "grad_norm": 1.2867491245269775, "learning_rate": 0.0001, "loss": 0.0142, "step": 107140 }, { "epoch": 704.9342105263158, "grad_norm": 1.3000553846359253, "learning_rate": 0.0001, "loss": 0.0136, "step": 107150 }, { "epoch": 705.0, "grad_norm": 1.2834903001785278, "learning_rate": 0.0001, "loss": 0.0105, "step": 107160 }, { "epoch": 705.0657894736842, "grad_norm": 1.4151852130889893, "learning_rate": 0.0001, "loss": 0.0138, "step": 107170 }, { "epoch": 705.1315789473684, "grad_norm": 1.6729289293289185, "learning_rate": 0.0001, "loss": 0.0128, "step": 107180 }, { "epoch": 705.1973684210526, "grad_norm": 1.3384878635406494, "learning_rate": 0.0001, "loss": 0.0144, "step": 107190 }, { "epoch": 705.2631578947369, "grad_norm": 1.5233880281448364, "learning_rate": 0.0001, "loss": 0.0153, "step": 107200 }, { "epoch": 705.328947368421, "grad_norm": 1.789371371269226, "learning_rate": 0.0001, "loss": 0.0127, "step": 107210 }, { "epoch": 705.3947368421053, "grad_norm": 1.680765151977539, "learning_rate": 0.0001, "loss": 0.0152, "step": 107220 }, { "epoch": 705.4605263157895, "grad_norm": 1.532517910003662, "learning_rate": 0.0001, "loss": 0.0108, "step": 107230 }, { "epoch": 705.5263157894736, "grad_norm": 1.1895544528961182, "learning_rate": 0.0001, "loss": 0.0122, "step": 107240 }, { "epoch": 705.5921052631579, "grad_norm": 1.691396951675415, "learning_rate": 0.0001, "loss": 0.0169, "step": 107250 }, { "epoch": 705.6578947368421, "grad_norm": 1.9131888151168823, "learning_rate": 0.0001, "loss": 0.0131, "step": 107260 }, { "epoch": 705.7236842105264, "grad_norm": 1.5870317220687866, "learning_rate": 0.0001, "loss": 0.0153, "step": 107270 }, { "epoch": 705.7894736842105, "grad_norm": 1.3875929117202759, "learning_rate": 0.0001, "loss": 0.0121, "step": 107280 }, { "epoch": 705.8552631578947, "grad_norm": 1.2507081031799316, "learning_rate": 0.0001, "loss": 0.0124, "step": 107290 }, { "epoch": 705.921052631579, "grad_norm": 1.252377986907959, "learning_rate": 0.0001, "loss": 0.014, "step": 107300 }, { "epoch": 705.9868421052631, "grad_norm": 1.4632035493850708, "learning_rate": 0.0001, "loss": 0.0145, "step": 107310 }, { "epoch": 706.0526315789474, "grad_norm": 1.1157034635543823, "learning_rate": 0.0001, "loss": 0.0117, "step": 107320 }, { "epoch": 706.1184210526316, "grad_norm": 1.4080573320388794, "learning_rate": 0.0001, "loss": 0.0175, "step": 107330 }, { "epoch": 706.1842105263158, "grad_norm": 1.3445723056793213, "learning_rate": 0.0001, "loss": 0.0183, "step": 107340 }, { "epoch": 706.25, "grad_norm": 1.5909563302993774, "learning_rate": 0.0001, "loss": 0.0146, "step": 107350 }, { "epoch": 706.3157894736842, "grad_norm": 1.4608768224716187, "learning_rate": 0.0001, "loss": 0.0149, "step": 107360 }, { "epoch": 706.3815789473684, "grad_norm": 1.5622596740722656, "learning_rate": 0.0001, "loss": 0.0137, "step": 107370 }, { "epoch": 706.4473684210526, "grad_norm": 1.5381336212158203, "learning_rate": 0.0001, "loss": 0.0136, "step": 107380 }, { "epoch": 706.5131578947369, "grad_norm": 1.2054091691970825, "learning_rate": 0.0001, "loss": 0.0114, "step": 107390 }, { "epoch": 706.578947368421, "grad_norm": 1.5349513292312622, "learning_rate": 0.0001, "loss": 0.0134, "step": 107400 }, { "epoch": 706.6447368421053, "grad_norm": 1.045829176902771, "learning_rate": 0.0001, "loss": 0.0114, "step": 107410 }, { "epoch": 706.7105263157895, "grad_norm": 1.0022194385528564, "learning_rate": 0.0001, "loss": 0.013, "step": 107420 }, { "epoch": 706.7763157894736, "grad_norm": 1.6222920417785645, "learning_rate": 0.0001, "loss": 0.0171, "step": 107430 }, { "epoch": 706.8421052631579, "grad_norm": 1.8452228307724, "learning_rate": 0.0001, "loss": 0.0134, "step": 107440 }, { "epoch": 706.9078947368421, "grad_norm": 1.6921074390411377, "learning_rate": 0.0001, "loss": 0.0132, "step": 107450 }, { "epoch": 706.9736842105264, "grad_norm": 1.7199269533157349, "learning_rate": 0.0001, "loss": 0.0124, "step": 107460 }, { "epoch": 707.0394736842105, "grad_norm": 0.9990474581718445, "learning_rate": 0.0001, "loss": 0.0165, "step": 107470 }, { "epoch": 707.1052631578947, "grad_norm": 1.3139115571975708, "learning_rate": 0.0001, "loss": 0.0154, "step": 107480 }, { "epoch": 707.171052631579, "grad_norm": 1.6910667419433594, "learning_rate": 0.0001, "loss": 0.0161, "step": 107490 }, { "epoch": 707.2368421052631, "grad_norm": 1.729372262954712, "learning_rate": 0.0001, "loss": 0.0133, "step": 107500 }, { "epoch": 707.3026315789474, "grad_norm": 1.4241622686386108, "learning_rate": 0.0001, "loss": 0.0151, "step": 107510 }, { "epoch": 707.3684210526316, "grad_norm": 1.5174546241760254, "learning_rate": 0.0001, "loss": 0.0135, "step": 107520 }, { "epoch": 707.4342105263158, "grad_norm": 1.586256504058838, "learning_rate": 0.0001, "loss": 0.0122, "step": 107530 }, { "epoch": 707.5, "grad_norm": 1.3227407932281494, "learning_rate": 0.0001, "loss": 0.0128, "step": 107540 }, { "epoch": 707.5657894736842, "grad_norm": 1.5184746980667114, "learning_rate": 0.0001, "loss": 0.0108, "step": 107550 }, { "epoch": 707.6315789473684, "grad_norm": 1.4294675588607788, "learning_rate": 0.0001, "loss": 0.0172, "step": 107560 }, { "epoch": 707.6973684210526, "grad_norm": 1.2168750762939453, "learning_rate": 0.0001, "loss": 0.0134, "step": 107570 }, { "epoch": 707.7631578947369, "grad_norm": 1.689828872680664, "learning_rate": 0.0001, "loss": 0.0108, "step": 107580 }, { "epoch": 707.828947368421, "grad_norm": 1.6660468578338623, "learning_rate": 0.0001, "loss": 0.015, "step": 107590 }, { "epoch": 707.8947368421053, "grad_norm": 1.7139595746994019, "learning_rate": 0.0001, "loss": 0.0123, "step": 107600 }, { "epoch": 707.9605263157895, "grad_norm": 1.739303469657898, "learning_rate": 0.0001, "loss": 0.0165, "step": 107610 }, { "epoch": 708.0263157894736, "grad_norm": 1.7574126720428467, "learning_rate": 0.0001, "loss": 0.0168, "step": 107620 }, { "epoch": 708.0921052631579, "grad_norm": 1.353441596031189, "learning_rate": 0.0001, "loss": 0.0142, "step": 107630 }, { "epoch": 708.1578947368421, "grad_norm": 2.0203325748443604, "learning_rate": 0.0001, "loss": 0.0119, "step": 107640 }, { "epoch": 708.2236842105264, "grad_norm": 1.5450931787490845, "learning_rate": 0.0001, "loss": 0.0173, "step": 107650 }, { "epoch": 708.2894736842105, "grad_norm": 2.003761053085327, "learning_rate": 0.0001, "loss": 0.0155, "step": 107660 }, { "epoch": 708.3552631578947, "grad_norm": 1.4915639162063599, "learning_rate": 0.0001, "loss": 0.0132, "step": 107670 }, { "epoch": 708.421052631579, "grad_norm": 1.5422054529190063, "learning_rate": 0.0001, "loss": 0.0151, "step": 107680 }, { "epoch": 708.4868421052631, "grad_norm": 1.3012349605560303, "learning_rate": 0.0001, "loss": 0.0119, "step": 107690 }, { "epoch": 708.5526315789474, "grad_norm": 1.2878769636154175, "learning_rate": 0.0001, "loss": 0.0133, "step": 107700 }, { "epoch": 708.6184210526316, "grad_norm": 1.3678783178329468, "learning_rate": 0.0001, "loss": 0.0142, "step": 107710 }, { "epoch": 708.6842105263158, "grad_norm": 1.4676779508590698, "learning_rate": 0.0001, "loss": 0.0129, "step": 107720 }, { "epoch": 708.75, "grad_norm": 1.3477271795272827, "learning_rate": 0.0001, "loss": 0.012, "step": 107730 }, { "epoch": 708.8157894736842, "grad_norm": 2.0366814136505127, "learning_rate": 0.0001, "loss": 0.0128, "step": 107740 }, { "epoch": 708.8815789473684, "grad_norm": 1.4439398050308228, "learning_rate": 0.0001, "loss": 0.0143, "step": 107750 }, { "epoch": 708.9473684210526, "grad_norm": 1.394221544265747, "learning_rate": 0.0001, "loss": 0.0111, "step": 107760 }, { "epoch": 709.0131578947369, "grad_norm": 1.6872700452804565, "learning_rate": 0.0001, "loss": 0.0138, "step": 107770 }, { "epoch": 709.078947368421, "grad_norm": 1.3578124046325684, "learning_rate": 0.0001, "loss": 0.0125, "step": 107780 }, { "epoch": 709.1447368421053, "grad_norm": 1.7722166776657104, "learning_rate": 0.0001, "loss": 0.0169, "step": 107790 }, { "epoch": 709.2105263157895, "grad_norm": 1.7177654504776, "learning_rate": 0.0001, "loss": 0.0168, "step": 107800 }, { "epoch": 709.2763157894736, "grad_norm": 1.5109416246414185, "learning_rate": 0.0001, "loss": 0.01, "step": 107810 }, { "epoch": 709.3421052631579, "grad_norm": 1.5302708148956299, "learning_rate": 0.0001, "loss": 0.0128, "step": 107820 }, { "epoch": 709.4078947368421, "grad_norm": 1.517020583152771, "learning_rate": 0.0001, "loss": 0.0103, "step": 107830 }, { "epoch": 709.4736842105264, "grad_norm": 1.677924633026123, "learning_rate": 0.0001, "loss": 0.0146, "step": 107840 }, { "epoch": 709.5394736842105, "grad_norm": 1.7106214761734009, "learning_rate": 0.0001, "loss": 0.0119, "step": 107850 }, { "epoch": 709.6052631578947, "grad_norm": 1.0243451595306396, "learning_rate": 0.0001, "loss": 0.0132, "step": 107860 }, { "epoch": 709.671052631579, "grad_norm": 1.482055902481079, "learning_rate": 0.0001, "loss": 0.0188, "step": 107870 }, { "epoch": 709.7368421052631, "grad_norm": 1.4675753116607666, "learning_rate": 0.0001, "loss": 0.0118, "step": 107880 }, { "epoch": 709.8026315789474, "grad_norm": 1.627547025680542, "learning_rate": 0.0001, "loss": 0.0117, "step": 107890 }, { "epoch": 709.8684210526316, "grad_norm": 1.124728798866272, "learning_rate": 0.0001, "loss": 0.0152, "step": 107900 }, { "epoch": 709.9342105263158, "grad_norm": 1.4445551633834839, "learning_rate": 0.0001, "loss": 0.0137, "step": 107910 }, { "epoch": 710.0, "grad_norm": 1.5332189798355103, "learning_rate": 0.0001, "loss": 0.0135, "step": 107920 }, { "epoch": 710.0657894736842, "grad_norm": 1.1746220588684082, "learning_rate": 0.0001, "loss": 0.0123, "step": 107930 }, { "epoch": 710.1315789473684, "grad_norm": 1.3469737768173218, "learning_rate": 0.0001, "loss": 0.0148, "step": 107940 }, { "epoch": 710.1973684210526, "grad_norm": 1.4348816871643066, "learning_rate": 0.0001, "loss": 0.0134, "step": 107950 }, { "epoch": 710.2631578947369, "grad_norm": 1.7452789545059204, "learning_rate": 0.0001, "loss": 0.0136, "step": 107960 }, { "epoch": 710.328947368421, "grad_norm": 1.4314086437225342, "learning_rate": 0.0001, "loss": 0.0138, "step": 107970 }, { "epoch": 710.3947368421053, "grad_norm": 0.988063633441925, "learning_rate": 0.0001, "loss": 0.0146, "step": 107980 }, { "epoch": 710.4605263157895, "grad_norm": 1.7629566192626953, "learning_rate": 0.0001, "loss": 0.0132, "step": 107990 }, { "epoch": 710.5263157894736, "grad_norm": 1.7458670139312744, "learning_rate": 0.0001, "loss": 0.0137, "step": 108000 }, { "epoch": 710.5921052631579, "grad_norm": 1.4475384950637817, "learning_rate": 0.0001, "loss": 0.0131, "step": 108010 }, { "epoch": 710.6578947368421, "grad_norm": 1.3295153379440308, "learning_rate": 0.0001, "loss": 0.0114, "step": 108020 }, { "epoch": 710.7236842105264, "grad_norm": 1.308290719985962, "learning_rate": 0.0001, "loss": 0.0151, "step": 108030 }, { "epoch": 710.7894736842105, "grad_norm": 1.3882603645324707, "learning_rate": 0.0001, "loss": 0.0144, "step": 108040 }, { "epoch": 710.8552631578947, "grad_norm": 0.8890396952629089, "learning_rate": 0.0001, "loss": 0.0139, "step": 108050 }, { "epoch": 710.921052631579, "grad_norm": 1.055899739265442, "learning_rate": 0.0001, "loss": 0.0122, "step": 108060 }, { "epoch": 710.9868421052631, "grad_norm": 1.4169371128082275, "learning_rate": 0.0001, "loss": 0.0182, "step": 108070 }, { "epoch": 711.0526315789474, "grad_norm": 1.3111227750778198, "learning_rate": 0.0001, "loss": 0.0145, "step": 108080 }, { "epoch": 711.1184210526316, "grad_norm": 1.2008601427078247, "learning_rate": 0.0001, "loss": 0.0116, "step": 108090 }, { "epoch": 711.1842105263158, "grad_norm": 1.6706904172897339, "learning_rate": 0.0001, "loss": 0.0122, "step": 108100 }, { "epoch": 711.25, "grad_norm": 1.6699414253234863, "learning_rate": 0.0001, "loss": 0.0136, "step": 108110 }, { "epoch": 711.3157894736842, "grad_norm": 1.8248833417892456, "learning_rate": 0.0001, "loss": 0.0164, "step": 108120 }, { "epoch": 711.3815789473684, "grad_norm": 1.4437580108642578, "learning_rate": 0.0001, "loss": 0.0128, "step": 108130 }, { "epoch": 711.4473684210526, "grad_norm": 1.0547910928726196, "learning_rate": 0.0001, "loss": 0.0138, "step": 108140 }, { "epoch": 711.5131578947369, "grad_norm": 1.7004046440124512, "learning_rate": 0.0001, "loss": 0.0123, "step": 108150 }, { "epoch": 711.578947368421, "grad_norm": 1.2936302423477173, "learning_rate": 0.0001, "loss": 0.0129, "step": 108160 }, { "epoch": 711.6447368421053, "grad_norm": 1.2423175573349, "learning_rate": 0.0001, "loss": 0.0129, "step": 108170 }, { "epoch": 711.7105263157895, "grad_norm": 1.455463171005249, "learning_rate": 0.0001, "loss": 0.0143, "step": 108180 }, { "epoch": 711.7763157894736, "grad_norm": 1.6516271829605103, "learning_rate": 0.0001, "loss": 0.0124, "step": 108190 }, { "epoch": 711.8421052631579, "grad_norm": 1.5543328523635864, "learning_rate": 0.0001, "loss": 0.0199, "step": 108200 }, { "epoch": 711.9078947368421, "grad_norm": 1.3157018423080444, "learning_rate": 0.0001, "loss": 0.0132, "step": 108210 }, { "epoch": 711.9736842105264, "grad_norm": 2.07529616355896, "learning_rate": 0.0001, "loss": 0.0209, "step": 108220 }, { "epoch": 712.0394736842105, "grad_norm": 1.8425443172454834, "learning_rate": 0.0001, "loss": 0.0181, "step": 108230 }, { "epoch": 712.1052631578947, "grad_norm": 1.9911247491836548, "learning_rate": 0.0001, "loss": 0.0119, "step": 108240 }, { "epoch": 712.171052631579, "grad_norm": 2.230585813522339, "learning_rate": 0.0001, "loss": 0.0127, "step": 108250 }, { "epoch": 712.2368421052631, "grad_norm": 1.5227327346801758, "learning_rate": 0.0001, "loss": 0.0169, "step": 108260 }, { "epoch": 712.3026315789474, "grad_norm": 1.4661561250686646, "learning_rate": 0.0001, "loss": 0.0127, "step": 108270 }, { "epoch": 712.3684210526316, "grad_norm": 1.9967175722122192, "learning_rate": 0.0001, "loss": 0.0117, "step": 108280 }, { "epoch": 712.4342105263158, "grad_norm": 1.3144525289535522, "learning_rate": 0.0001, "loss": 0.0143, "step": 108290 }, { "epoch": 712.5, "grad_norm": 1.4602679014205933, "learning_rate": 0.0001, "loss": 0.0128, "step": 108300 }, { "epoch": 712.5657894736842, "grad_norm": 1.3974312543869019, "learning_rate": 0.0001, "loss": 0.013, "step": 108310 }, { "epoch": 712.6315789473684, "grad_norm": 1.6680376529693604, "learning_rate": 0.0001, "loss": 0.0143, "step": 108320 }, { "epoch": 712.6973684210526, "grad_norm": 1.6443816423416138, "learning_rate": 0.0001, "loss": 0.0115, "step": 108330 }, { "epoch": 712.7631578947369, "grad_norm": 1.9111895561218262, "learning_rate": 0.0001, "loss": 0.0126, "step": 108340 }, { "epoch": 712.828947368421, "grad_norm": 1.360927939414978, "learning_rate": 0.0001, "loss": 0.0125, "step": 108350 }, { "epoch": 712.8947368421053, "grad_norm": 1.3969614505767822, "learning_rate": 0.0001, "loss": 0.0137, "step": 108360 }, { "epoch": 712.9605263157895, "grad_norm": 1.834227204322815, "learning_rate": 0.0001, "loss": 0.0158, "step": 108370 }, { "epoch": 713.0263157894736, "grad_norm": 1.9383528232574463, "learning_rate": 0.0001, "loss": 0.0117, "step": 108380 }, { "epoch": 713.0921052631579, "grad_norm": 1.5078556537628174, "learning_rate": 0.0001, "loss": 0.0144, "step": 108390 }, { "epoch": 713.1578947368421, "grad_norm": 1.256677269935608, "learning_rate": 0.0001, "loss": 0.0128, "step": 108400 }, { "epoch": 713.2236842105264, "grad_norm": 1.6815972328186035, "learning_rate": 0.0001, "loss": 0.0116, "step": 108410 }, { "epoch": 713.2894736842105, "grad_norm": 1.8192025423049927, "learning_rate": 0.0001, "loss": 0.0123, "step": 108420 }, { "epoch": 713.3552631578947, "grad_norm": 1.4074296951293945, "learning_rate": 0.0001, "loss": 0.0117, "step": 108430 }, { "epoch": 713.421052631579, "grad_norm": 1.567453384399414, "learning_rate": 0.0001, "loss": 0.0121, "step": 108440 }, { "epoch": 713.4868421052631, "grad_norm": 1.9929256439208984, "learning_rate": 0.0001, "loss": 0.017, "step": 108450 }, { "epoch": 713.5526315789474, "grad_norm": 1.6735024452209473, "learning_rate": 0.0001, "loss": 0.0147, "step": 108460 }, { "epoch": 713.6184210526316, "grad_norm": 1.3476476669311523, "learning_rate": 0.0001, "loss": 0.0148, "step": 108470 }, { "epoch": 713.6842105263158, "grad_norm": 1.915734887123108, "learning_rate": 0.0001, "loss": 0.0146, "step": 108480 }, { "epoch": 713.75, "grad_norm": 1.885774850845337, "learning_rate": 0.0001, "loss": 0.0126, "step": 108490 }, { "epoch": 713.8157894736842, "grad_norm": 1.6562108993530273, "learning_rate": 0.0001, "loss": 0.0135, "step": 108500 }, { "epoch": 713.8815789473684, "grad_norm": 1.4962443113327026, "learning_rate": 0.0001, "loss": 0.0166, "step": 108510 }, { "epoch": 713.9473684210526, "grad_norm": 1.6011945009231567, "learning_rate": 0.0001, "loss": 0.0131, "step": 108520 }, { "epoch": 714.0131578947369, "grad_norm": 1.660852313041687, "learning_rate": 0.0001, "loss": 0.0116, "step": 108530 }, { "epoch": 714.078947368421, "grad_norm": 1.685308814048767, "learning_rate": 0.0001, "loss": 0.012, "step": 108540 }, { "epoch": 714.1447368421053, "grad_norm": 1.8562002182006836, "learning_rate": 0.0001, "loss": 0.0132, "step": 108550 }, { "epoch": 714.2105263157895, "grad_norm": 1.2185122966766357, "learning_rate": 0.0001, "loss": 0.0116, "step": 108560 }, { "epoch": 714.2763157894736, "grad_norm": 1.683568000793457, "learning_rate": 0.0001, "loss": 0.0145, "step": 108570 }, { "epoch": 714.3421052631579, "grad_norm": 1.4309282302856445, "learning_rate": 0.0001, "loss": 0.0099, "step": 108580 }, { "epoch": 714.4078947368421, "grad_norm": 1.4828503131866455, "learning_rate": 0.0001, "loss": 0.015, "step": 108590 }, { "epoch": 714.4736842105264, "grad_norm": 1.1520700454711914, "learning_rate": 0.0001, "loss": 0.0128, "step": 108600 }, { "epoch": 714.5394736842105, "grad_norm": 1.1619194746017456, "learning_rate": 0.0001, "loss": 0.0118, "step": 108610 }, { "epoch": 714.6052631578947, "grad_norm": 1.0778290033340454, "learning_rate": 0.0001, "loss": 0.0156, "step": 108620 }, { "epoch": 714.671052631579, "grad_norm": 1.2762165069580078, "learning_rate": 0.0001, "loss": 0.0122, "step": 108630 }, { "epoch": 714.7368421052631, "grad_norm": 1.0568164587020874, "learning_rate": 0.0001, "loss": 0.019, "step": 108640 }, { "epoch": 714.8026315789474, "grad_norm": 1.4410871267318726, "learning_rate": 0.0001, "loss": 0.0148, "step": 108650 }, { "epoch": 714.8684210526316, "grad_norm": 1.25480318069458, "learning_rate": 0.0001, "loss": 0.0121, "step": 108660 }, { "epoch": 714.9342105263158, "grad_norm": 1.3132293224334717, "learning_rate": 0.0001, "loss": 0.0182, "step": 108670 }, { "epoch": 715.0, "grad_norm": 2.014143466949463, "learning_rate": 0.0001, "loss": 0.0168, "step": 108680 }, { "epoch": 715.0657894736842, "grad_norm": 1.5422223806381226, "learning_rate": 0.0001, "loss": 0.0133, "step": 108690 }, { "epoch": 715.1315789473684, "grad_norm": 1.5425724983215332, "learning_rate": 0.0001, "loss": 0.0154, "step": 108700 }, { "epoch": 715.1973684210526, "grad_norm": 1.1919814348220825, "learning_rate": 0.0001, "loss": 0.0127, "step": 108710 }, { "epoch": 715.2631578947369, "grad_norm": 1.4741380214691162, "learning_rate": 0.0001, "loss": 0.0158, "step": 108720 }, { "epoch": 715.328947368421, "grad_norm": 1.7505828142166138, "learning_rate": 0.0001, "loss": 0.011, "step": 108730 }, { "epoch": 715.3947368421053, "grad_norm": 1.8008205890655518, "learning_rate": 0.0001, "loss": 0.0121, "step": 108740 }, { "epoch": 715.4605263157895, "grad_norm": 1.4205087423324585, "learning_rate": 0.0001, "loss": 0.0137, "step": 108750 }, { "epoch": 715.5263157894736, "grad_norm": 1.0979204177856445, "learning_rate": 0.0001, "loss": 0.0139, "step": 108760 }, { "epoch": 715.5921052631579, "grad_norm": 1.8030610084533691, "learning_rate": 0.0001, "loss": 0.0146, "step": 108770 }, { "epoch": 715.6578947368421, "grad_norm": 1.6223801374435425, "learning_rate": 0.0001, "loss": 0.0159, "step": 108780 }, { "epoch": 715.7236842105264, "grad_norm": 1.291346788406372, "learning_rate": 0.0001, "loss": 0.0141, "step": 108790 }, { "epoch": 715.7894736842105, "grad_norm": 1.165731430053711, "learning_rate": 0.0001, "loss": 0.0136, "step": 108800 }, { "epoch": 715.8552631578947, "grad_norm": 1.5866122245788574, "learning_rate": 0.0001, "loss": 0.0134, "step": 108810 }, { "epoch": 715.921052631579, "grad_norm": 1.4192413091659546, "learning_rate": 0.0001, "loss": 0.018, "step": 108820 }, { "epoch": 715.9868421052631, "grad_norm": 1.4203414916992188, "learning_rate": 0.0001, "loss": 0.0164, "step": 108830 }, { "epoch": 716.0526315789474, "grad_norm": 1.6219761371612549, "learning_rate": 0.0001, "loss": 0.0136, "step": 108840 }, { "epoch": 716.1184210526316, "grad_norm": 1.432753324508667, "learning_rate": 0.0001, "loss": 0.0145, "step": 108850 }, { "epoch": 716.1842105263158, "grad_norm": 1.5987825393676758, "learning_rate": 0.0001, "loss": 0.0135, "step": 108860 }, { "epoch": 716.25, "grad_norm": 1.476148247718811, "learning_rate": 0.0001, "loss": 0.015, "step": 108870 }, { "epoch": 716.3157894736842, "grad_norm": 1.8287761211395264, "learning_rate": 0.0001, "loss": 0.0152, "step": 108880 }, { "epoch": 716.3815789473684, "grad_norm": 1.6089622974395752, "learning_rate": 0.0001, "loss": 0.0113, "step": 108890 }, { "epoch": 716.4473684210526, "grad_norm": 1.283048152923584, "learning_rate": 0.0001, "loss": 0.0174, "step": 108900 }, { "epoch": 716.5131578947369, "grad_norm": 1.1184228658676147, "learning_rate": 0.0001, "loss": 0.0145, "step": 108910 }, { "epoch": 716.578947368421, "grad_norm": 1.4481934309005737, "learning_rate": 0.0001, "loss": 0.0148, "step": 108920 }, { "epoch": 716.6447368421053, "grad_norm": 1.1468799114227295, "learning_rate": 0.0001, "loss": 0.0135, "step": 108930 }, { "epoch": 716.7105263157895, "grad_norm": 1.5600495338439941, "learning_rate": 0.0001, "loss": 0.0166, "step": 108940 }, { "epoch": 716.7763157894736, "grad_norm": 1.8062715530395508, "learning_rate": 0.0001, "loss": 0.0127, "step": 108950 }, { "epoch": 716.8421052631579, "grad_norm": 1.5462164878845215, "learning_rate": 0.0001, "loss": 0.0144, "step": 108960 }, { "epoch": 716.9078947368421, "grad_norm": 1.6022251844406128, "learning_rate": 0.0001, "loss": 0.0143, "step": 108970 }, { "epoch": 716.9736842105264, "grad_norm": 1.3368595838546753, "learning_rate": 0.0001, "loss": 0.017, "step": 108980 }, { "epoch": 717.0394736842105, "grad_norm": 1.415818214416504, "learning_rate": 0.0001, "loss": 0.0176, "step": 108990 }, { "epoch": 717.1052631578947, "grad_norm": 1.8454238176345825, "learning_rate": 0.0001, "loss": 0.0116, "step": 109000 }, { "epoch": 717.171052631579, "grad_norm": 1.8050730228424072, "learning_rate": 0.0001, "loss": 0.0164, "step": 109010 }, { "epoch": 717.2368421052631, "grad_norm": 1.4111343622207642, "learning_rate": 0.0001, "loss": 0.0134, "step": 109020 }, { "epoch": 717.3026315789474, "grad_norm": 1.593401551246643, "learning_rate": 0.0001, "loss": 0.0129, "step": 109030 }, { "epoch": 717.3684210526316, "grad_norm": 1.4422354698181152, "learning_rate": 0.0001, "loss": 0.0147, "step": 109040 }, { "epoch": 717.4342105263158, "grad_norm": 1.465388536453247, "learning_rate": 0.0001, "loss": 0.0155, "step": 109050 }, { "epoch": 717.5, "grad_norm": 1.635627031326294, "learning_rate": 0.0001, "loss": 0.0162, "step": 109060 }, { "epoch": 717.5657894736842, "grad_norm": 1.127429485321045, "learning_rate": 0.0001, "loss": 0.0147, "step": 109070 }, { "epoch": 717.6315789473684, "grad_norm": 1.6850956678390503, "learning_rate": 0.0001, "loss": 0.0139, "step": 109080 }, { "epoch": 717.6973684210526, "grad_norm": 1.4580435752868652, "learning_rate": 0.0001, "loss": 0.0135, "step": 109090 }, { "epoch": 717.7631578947369, "grad_norm": 1.8279330730438232, "learning_rate": 0.0001, "loss": 0.0165, "step": 109100 }, { "epoch": 717.828947368421, "grad_norm": 1.2766921520233154, "learning_rate": 0.0001, "loss": 0.0149, "step": 109110 }, { "epoch": 717.8947368421053, "grad_norm": 1.8278192281723022, "learning_rate": 0.0001, "loss": 0.0177, "step": 109120 }, { "epoch": 717.9605263157895, "grad_norm": 1.4895460605621338, "learning_rate": 0.0001, "loss": 0.0119, "step": 109130 }, { "epoch": 718.0263157894736, "grad_norm": 1.5433272123336792, "learning_rate": 0.0001, "loss": 0.0132, "step": 109140 }, { "epoch": 718.0921052631579, "grad_norm": 1.6386252641677856, "learning_rate": 0.0001, "loss": 0.0112, "step": 109150 }, { "epoch": 718.1578947368421, "grad_norm": 1.6236411333084106, "learning_rate": 0.0001, "loss": 0.0178, "step": 109160 }, { "epoch": 718.2236842105264, "grad_norm": 1.3206182718276978, "learning_rate": 0.0001, "loss": 0.0144, "step": 109170 }, { "epoch": 718.2894736842105, "grad_norm": 1.1901973485946655, "learning_rate": 0.0001, "loss": 0.0117, "step": 109180 }, { "epoch": 718.3552631578947, "grad_norm": 1.803585410118103, "learning_rate": 0.0001, "loss": 0.0119, "step": 109190 }, { "epoch": 718.421052631579, "grad_norm": 1.882531762123108, "learning_rate": 0.0001, "loss": 0.0125, "step": 109200 }, { "epoch": 718.4868421052631, "grad_norm": 1.6395246982574463, "learning_rate": 0.0001, "loss": 0.0151, "step": 109210 }, { "epoch": 718.5526315789474, "grad_norm": 1.9978456497192383, "learning_rate": 0.0001, "loss": 0.014, "step": 109220 }, { "epoch": 718.6184210526316, "grad_norm": 1.5531978607177734, "learning_rate": 0.0001, "loss": 0.015, "step": 109230 }, { "epoch": 718.6842105263158, "grad_norm": 1.522524356842041, "learning_rate": 0.0001, "loss": 0.0137, "step": 109240 }, { "epoch": 718.75, "grad_norm": 1.490006446838379, "learning_rate": 0.0001, "loss": 0.0181, "step": 109250 }, { "epoch": 718.8157894736842, "grad_norm": 1.6388007402420044, "learning_rate": 0.0001, "loss": 0.0179, "step": 109260 }, { "epoch": 718.8815789473684, "grad_norm": 1.461249828338623, "learning_rate": 0.0001, "loss": 0.0148, "step": 109270 }, { "epoch": 718.9473684210526, "grad_norm": 1.2597867250442505, "learning_rate": 0.0001, "loss": 0.0129, "step": 109280 }, { "epoch": 719.0131578947369, "grad_norm": 1.6670854091644287, "learning_rate": 0.0001, "loss": 0.0127, "step": 109290 }, { "epoch": 719.078947368421, "grad_norm": 1.2833703756332397, "learning_rate": 0.0001, "loss": 0.0134, "step": 109300 }, { "epoch": 719.1447368421053, "grad_norm": 1.499019980430603, "learning_rate": 0.0001, "loss": 0.0124, "step": 109310 }, { "epoch": 719.2105263157895, "grad_norm": 1.110885739326477, "learning_rate": 0.0001, "loss": 0.0131, "step": 109320 }, { "epoch": 719.2763157894736, "grad_norm": 1.2118040323257446, "learning_rate": 0.0001, "loss": 0.0135, "step": 109330 }, { "epoch": 719.3421052631579, "grad_norm": 1.4070537090301514, "learning_rate": 0.0001, "loss": 0.0144, "step": 109340 }, { "epoch": 719.4078947368421, "grad_norm": 1.7039008140563965, "learning_rate": 0.0001, "loss": 0.0142, "step": 109350 }, { "epoch": 719.4736842105264, "grad_norm": 1.534133791923523, "learning_rate": 0.0001, "loss": 0.0121, "step": 109360 }, { "epoch": 719.5394736842105, "grad_norm": 1.130072832107544, "learning_rate": 0.0001, "loss": 0.0116, "step": 109370 }, { "epoch": 719.6052631578947, "grad_norm": 1.7971739768981934, "learning_rate": 0.0001, "loss": 0.0144, "step": 109380 }, { "epoch": 719.671052631579, "grad_norm": 1.9501219987869263, "learning_rate": 0.0001, "loss": 0.0131, "step": 109390 }, { "epoch": 719.7368421052631, "grad_norm": 1.8662251234054565, "learning_rate": 0.0001, "loss": 0.0169, "step": 109400 }, { "epoch": 719.8026315789474, "grad_norm": 1.8113676309585571, "learning_rate": 0.0001, "loss": 0.0123, "step": 109410 }, { "epoch": 719.8684210526316, "grad_norm": 1.4876306056976318, "learning_rate": 0.0001, "loss": 0.0177, "step": 109420 }, { "epoch": 719.9342105263158, "grad_norm": 2.062091827392578, "learning_rate": 0.0001, "loss": 0.0149, "step": 109430 }, { "epoch": 720.0, "grad_norm": 1.8471590280532837, "learning_rate": 0.0001, "loss": 0.0155, "step": 109440 }, { "epoch": 720.0657894736842, "grad_norm": 1.7901567220687866, "learning_rate": 0.0001, "loss": 0.0155, "step": 109450 }, { "epoch": 720.1315789473684, "grad_norm": 1.4948748350143433, "learning_rate": 0.0001, "loss": 0.0159, "step": 109460 }, { "epoch": 720.1973684210526, "grad_norm": 1.3319443464279175, "learning_rate": 0.0001, "loss": 0.0116, "step": 109470 }, { "epoch": 720.2631578947369, "grad_norm": 1.57491135597229, "learning_rate": 0.0001, "loss": 0.011, "step": 109480 }, { "epoch": 720.328947368421, "grad_norm": 1.395958423614502, "learning_rate": 0.0001, "loss": 0.0166, "step": 109490 }, { "epoch": 720.3947368421053, "grad_norm": 1.4648628234863281, "learning_rate": 0.0001, "loss": 0.0132, "step": 109500 }, { "epoch": 720.4605263157895, "grad_norm": 1.3796303272247314, "learning_rate": 0.0001, "loss": 0.0134, "step": 109510 }, { "epoch": 720.5263157894736, "grad_norm": 1.6106595993041992, "learning_rate": 0.0001, "loss": 0.0144, "step": 109520 }, { "epoch": 720.5921052631579, "grad_norm": 2.032975912094116, "learning_rate": 0.0001, "loss": 0.016, "step": 109530 }, { "epoch": 720.6578947368421, "grad_norm": 1.30403470993042, "learning_rate": 0.0001, "loss": 0.0134, "step": 109540 }, { "epoch": 720.7236842105264, "grad_norm": 1.8596233129501343, "learning_rate": 0.0001, "loss": 0.0114, "step": 109550 }, { "epoch": 720.7894736842105, "grad_norm": 1.623445749282837, "learning_rate": 0.0001, "loss": 0.0137, "step": 109560 }, { "epoch": 720.8552631578947, "grad_norm": 1.7958581447601318, "learning_rate": 0.0001, "loss": 0.0126, "step": 109570 }, { "epoch": 720.921052631579, "grad_norm": 1.4523776769638062, "learning_rate": 0.0001, "loss": 0.011, "step": 109580 }, { "epoch": 720.9868421052631, "grad_norm": 1.4133187532424927, "learning_rate": 0.0001, "loss": 0.0173, "step": 109590 }, { "epoch": 721.0526315789474, "grad_norm": 1.5261205434799194, "learning_rate": 0.0001, "loss": 0.0163, "step": 109600 }, { "epoch": 721.1184210526316, "grad_norm": 0.9073880910873413, "learning_rate": 0.0001, "loss": 0.0119, "step": 109610 }, { "epoch": 721.1842105263158, "grad_norm": 1.5219533443450928, "learning_rate": 0.0001, "loss": 0.0114, "step": 109620 }, { "epoch": 721.25, "grad_norm": 1.5388362407684326, "learning_rate": 0.0001, "loss": 0.0132, "step": 109630 }, { "epoch": 721.3157894736842, "grad_norm": 1.33000910282135, "learning_rate": 0.0001, "loss": 0.0137, "step": 109640 }, { "epoch": 721.3815789473684, "grad_norm": 1.6848204135894775, "learning_rate": 0.0001, "loss": 0.0141, "step": 109650 }, { "epoch": 721.4473684210526, "grad_norm": 1.1474586725234985, "learning_rate": 0.0001, "loss": 0.0151, "step": 109660 }, { "epoch": 721.5131578947369, "grad_norm": 1.6495625972747803, "learning_rate": 0.0001, "loss": 0.0131, "step": 109670 }, { "epoch": 721.578947368421, "grad_norm": 1.5188108682632446, "learning_rate": 0.0001, "loss": 0.0138, "step": 109680 }, { "epoch": 721.6447368421053, "grad_norm": 1.9392451047897339, "learning_rate": 0.0001, "loss": 0.0147, "step": 109690 }, { "epoch": 721.7105263157895, "grad_norm": 1.4630848169326782, "learning_rate": 0.0001, "loss": 0.0159, "step": 109700 }, { "epoch": 721.7763157894736, "grad_norm": 1.552245020866394, "learning_rate": 0.0001, "loss": 0.0129, "step": 109710 }, { "epoch": 721.8421052631579, "grad_norm": 1.4126038551330566, "learning_rate": 0.0001, "loss": 0.0138, "step": 109720 }, { "epoch": 721.9078947368421, "grad_norm": 1.631232738494873, "learning_rate": 0.0001, "loss": 0.0137, "step": 109730 }, { "epoch": 721.9736842105264, "grad_norm": 1.3475772142410278, "learning_rate": 0.0001, "loss": 0.0136, "step": 109740 }, { "epoch": 722.0394736842105, "grad_norm": 1.0492446422576904, "learning_rate": 0.0001, "loss": 0.0128, "step": 109750 }, { "epoch": 722.1052631578947, "grad_norm": 1.1506385803222656, "learning_rate": 0.0001, "loss": 0.0158, "step": 109760 }, { "epoch": 722.171052631579, "grad_norm": 1.807943344116211, "learning_rate": 0.0001, "loss": 0.0126, "step": 109770 }, { "epoch": 722.2368421052631, "grad_norm": 1.7445530891418457, "learning_rate": 0.0001, "loss": 0.0163, "step": 109780 }, { "epoch": 722.3026315789474, "grad_norm": 1.3332595825195312, "learning_rate": 0.0001, "loss": 0.012, "step": 109790 }, { "epoch": 722.3684210526316, "grad_norm": 1.431434988975525, "learning_rate": 0.0001, "loss": 0.0137, "step": 109800 }, { "epoch": 722.4342105263158, "grad_norm": 1.5977734327316284, "learning_rate": 0.0001, "loss": 0.0129, "step": 109810 }, { "epoch": 722.5, "grad_norm": 1.3816311359405518, "learning_rate": 0.0001, "loss": 0.0128, "step": 109820 }, { "epoch": 722.5657894736842, "grad_norm": 1.5664116144180298, "learning_rate": 0.0001, "loss": 0.0123, "step": 109830 }, { "epoch": 722.6315789473684, "grad_norm": 1.250872015953064, "learning_rate": 0.0001, "loss": 0.0119, "step": 109840 }, { "epoch": 722.6973684210526, "grad_norm": 1.5894737243652344, "learning_rate": 0.0001, "loss": 0.0145, "step": 109850 }, { "epoch": 722.7631578947369, "grad_norm": 1.4161667823791504, "learning_rate": 0.0001, "loss": 0.0146, "step": 109860 }, { "epoch": 722.828947368421, "grad_norm": 1.246541142463684, "learning_rate": 0.0001, "loss": 0.0134, "step": 109870 }, { "epoch": 722.8947368421053, "grad_norm": 1.5826404094696045, "learning_rate": 0.0001, "loss": 0.0121, "step": 109880 }, { "epoch": 722.9605263157895, "grad_norm": 1.1958290338516235, "learning_rate": 0.0001, "loss": 0.0149, "step": 109890 }, { "epoch": 723.0263157894736, "grad_norm": 1.864985704421997, "learning_rate": 0.0001, "loss": 0.0181, "step": 109900 }, { "epoch": 723.0921052631579, "grad_norm": 1.5072077512741089, "learning_rate": 0.0001, "loss": 0.0097, "step": 109910 }, { "epoch": 723.1578947368421, "grad_norm": 1.5497217178344727, "learning_rate": 0.0001, "loss": 0.0116, "step": 109920 }, { "epoch": 723.2236842105264, "grad_norm": 1.9139411449432373, "learning_rate": 0.0001, "loss": 0.0132, "step": 109930 }, { "epoch": 723.2894736842105, "grad_norm": 1.5524400472640991, "learning_rate": 0.0001, "loss": 0.0186, "step": 109940 }, { "epoch": 723.3552631578947, "grad_norm": 2.0271284580230713, "learning_rate": 0.0001, "loss": 0.0122, "step": 109950 }, { "epoch": 723.421052631579, "grad_norm": 1.84784996509552, "learning_rate": 0.0001, "loss": 0.0126, "step": 109960 }, { "epoch": 723.4868421052631, "grad_norm": 1.6121864318847656, "learning_rate": 0.0001, "loss": 0.0128, "step": 109970 }, { "epoch": 723.5526315789474, "grad_norm": 2.0428266525268555, "learning_rate": 0.0001, "loss": 0.02, "step": 109980 }, { "epoch": 723.6184210526316, "grad_norm": 1.3942238092422485, "learning_rate": 0.0001, "loss": 0.011, "step": 109990 }, { "epoch": 723.6842105263158, "grad_norm": 1.4969067573547363, "learning_rate": 0.0001, "loss": 0.0117, "step": 110000 }, { "epoch": 723.75, "grad_norm": 1.7113659381866455, "learning_rate": 0.0001, "loss": 0.0141, "step": 110010 }, { "epoch": 723.8157894736842, "grad_norm": 1.795746922492981, "learning_rate": 0.0001, "loss": 0.0117, "step": 110020 }, { "epoch": 723.8815789473684, "grad_norm": 1.416536808013916, "learning_rate": 0.0001, "loss": 0.011, "step": 110030 }, { "epoch": 723.9473684210526, "grad_norm": 1.0347931385040283, "learning_rate": 0.0001, "loss": 0.0139, "step": 110040 }, { "epoch": 724.0131578947369, "grad_norm": 1.633273720741272, "learning_rate": 0.0001, "loss": 0.0131, "step": 110050 }, { "epoch": 724.078947368421, "grad_norm": 1.595167875289917, "learning_rate": 0.0001, "loss": 0.0107, "step": 110060 }, { "epoch": 724.1447368421053, "grad_norm": 1.1813035011291504, "learning_rate": 0.0001, "loss": 0.0145, "step": 110070 }, { "epoch": 724.2105263157895, "grad_norm": 1.4119372367858887, "learning_rate": 0.0001, "loss": 0.0136, "step": 110080 }, { "epoch": 724.2763157894736, "grad_norm": 1.3069978952407837, "learning_rate": 0.0001, "loss": 0.0175, "step": 110090 }, { "epoch": 724.3421052631579, "grad_norm": 1.8578588962554932, "learning_rate": 0.0001, "loss": 0.0145, "step": 110100 }, { "epoch": 724.4078947368421, "grad_norm": 1.48318612575531, "learning_rate": 0.0001, "loss": 0.0103, "step": 110110 }, { "epoch": 724.4736842105264, "grad_norm": 1.1746701002120972, "learning_rate": 0.0001, "loss": 0.0122, "step": 110120 }, { "epoch": 724.5394736842105, "grad_norm": 1.4297231435775757, "learning_rate": 0.0001, "loss": 0.0123, "step": 110130 }, { "epoch": 724.6052631578947, "grad_norm": 1.832372784614563, "learning_rate": 0.0001, "loss": 0.0169, "step": 110140 }, { "epoch": 724.671052631579, "grad_norm": 1.450789213180542, "learning_rate": 0.0001, "loss": 0.0169, "step": 110150 }, { "epoch": 724.7368421052631, "grad_norm": 1.231414794921875, "learning_rate": 0.0001, "loss": 0.0126, "step": 110160 }, { "epoch": 724.8026315789474, "grad_norm": 1.528393030166626, "learning_rate": 0.0001, "loss": 0.0117, "step": 110170 }, { "epoch": 724.8684210526316, "grad_norm": 1.9260362386703491, "learning_rate": 0.0001, "loss": 0.012, "step": 110180 }, { "epoch": 724.9342105263158, "grad_norm": 1.6268811225891113, "learning_rate": 0.0001, "loss": 0.0107, "step": 110190 }, { "epoch": 725.0, "grad_norm": 1.3794639110565186, "learning_rate": 0.0001, "loss": 0.0177, "step": 110200 }, { "epoch": 725.0657894736842, "grad_norm": 1.5467252731323242, "learning_rate": 0.0001, "loss": 0.0126, "step": 110210 }, { "epoch": 725.1315789473684, "grad_norm": 1.5248676538467407, "learning_rate": 0.0001, "loss": 0.0157, "step": 110220 }, { "epoch": 725.1973684210526, "grad_norm": 2.1822237968444824, "learning_rate": 0.0001, "loss": 0.0134, "step": 110230 }, { "epoch": 725.2631578947369, "grad_norm": 2.10052490234375, "learning_rate": 0.0001, "loss": 0.013, "step": 110240 }, { "epoch": 725.328947368421, "grad_norm": 1.655921459197998, "learning_rate": 0.0001, "loss": 0.0132, "step": 110250 }, { "epoch": 725.3947368421053, "grad_norm": 1.4991308450698853, "learning_rate": 0.0001, "loss": 0.0097, "step": 110260 }, { "epoch": 725.4605263157895, "grad_norm": 1.5819746255874634, "learning_rate": 0.0001, "loss": 0.0151, "step": 110270 }, { "epoch": 725.5263157894736, "grad_norm": 1.05583655834198, "learning_rate": 0.0001, "loss": 0.0113, "step": 110280 }, { "epoch": 725.5921052631579, "grad_norm": 1.1603057384490967, "learning_rate": 0.0001, "loss": 0.0149, "step": 110290 }, { "epoch": 725.6578947368421, "grad_norm": 1.5309010744094849, "learning_rate": 0.0001, "loss": 0.0156, "step": 110300 }, { "epoch": 725.7236842105264, "grad_norm": 1.393668293952942, "learning_rate": 0.0001, "loss": 0.0197, "step": 110310 }, { "epoch": 725.7894736842105, "grad_norm": 1.6978864669799805, "learning_rate": 0.0001, "loss": 0.0128, "step": 110320 }, { "epoch": 725.8552631578947, "grad_norm": 1.3656593561172485, "learning_rate": 0.0001, "loss": 0.0121, "step": 110330 }, { "epoch": 725.921052631579, "grad_norm": 1.6134661436080933, "learning_rate": 0.0001, "loss": 0.0117, "step": 110340 }, { "epoch": 725.9868421052631, "grad_norm": 1.6812870502471924, "learning_rate": 0.0001, "loss": 0.0113, "step": 110350 }, { "epoch": 726.0526315789474, "grad_norm": 1.26609206199646, "learning_rate": 0.0001, "loss": 0.0142, "step": 110360 }, { "epoch": 726.1184210526316, "grad_norm": 1.5601465702056885, "learning_rate": 0.0001, "loss": 0.0122, "step": 110370 }, { "epoch": 726.1842105263158, "grad_norm": 1.6695332527160645, "learning_rate": 0.0001, "loss": 0.0119, "step": 110380 }, { "epoch": 726.25, "grad_norm": 1.577055811882019, "learning_rate": 0.0001, "loss": 0.012, "step": 110390 }, { "epoch": 726.3157894736842, "grad_norm": 1.6685863733291626, "learning_rate": 0.0001, "loss": 0.015, "step": 110400 }, { "epoch": 726.3815789473684, "grad_norm": 1.4638597965240479, "learning_rate": 0.0001, "loss": 0.0148, "step": 110410 }, { "epoch": 726.4473684210526, "grad_norm": 2.117112398147583, "learning_rate": 0.0001, "loss": 0.0097, "step": 110420 }, { "epoch": 726.5131578947369, "grad_norm": 1.5204790830612183, "learning_rate": 0.0001, "loss": 0.0128, "step": 110430 }, { "epoch": 726.578947368421, "grad_norm": 1.2949196100234985, "learning_rate": 0.0001, "loss": 0.0135, "step": 110440 }, { "epoch": 726.6447368421053, "grad_norm": 1.5161006450653076, "learning_rate": 0.0001, "loss": 0.0167, "step": 110450 }, { "epoch": 726.7105263157895, "grad_norm": 1.7724345922470093, "learning_rate": 0.0001, "loss": 0.0179, "step": 110460 }, { "epoch": 726.7763157894736, "grad_norm": 1.815535545349121, "learning_rate": 0.0001, "loss": 0.0156, "step": 110470 }, { "epoch": 726.8421052631579, "grad_norm": 1.6082911491394043, "learning_rate": 0.0001, "loss": 0.0134, "step": 110480 }, { "epoch": 726.9078947368421, "grad_norm": 1.8135734796524048, "learning_rate": 0.0001, "loss": 0.0122, "step": 110490 }, { "epoch": 726.9736842105264, "grad_norm": 1.1466113328933716, "learning_rate": 0.0001, "loss": 0.0131, "step": 110500 }, { "epoch": 727.0394736842105, "grad_norm": 1.85725736618042, "learning_rate": 0.0001, "loss": 0.0098, "step": 110510 }, { "epoch": 727.1052631578947, "grad_norm": 1.1039602756500244, "learning_rate": 0.0001, "loss": 0.0129, "step": 110520 }, { "epoch": 727.171052631579, "grad_norm": 1.2882057428359985, "learning_rate": 0.0001, "loss": 0.016, "step": 110530 }, { "epoch": 727.2368421052631, "grad_norm": 1.59529447555542, "learning_rate": 0.0001, "loss": 0.011, "step": 110540 }, { "epoch": 727.3026315789474, "grad_norm": 1.9237847328186035, "learning_rate": 0.0001, "loss": 0.0123, "step": 110550 }, { "epoch": 727.3684210526316, "grad_norm": 1.6841621398925781, "learning_rate": 0.0001, "loss": 0.0138, "step": 110560 }, { "epoch": 727.4342105263158, "grad_norm": 1.2068192958831787, "learning_rate": 0.0001, "loss": 0.0122, "step": 110570 }, { "epoch": 727.5, "grad_norm": 1.4827370643615723, "learning_rate": 0.0001, "loss": 0.0111, "step": 110580 }, { "epoch": 727.5657894736842, "grad_norm": 0.9381465911865234, "learning_rate": 0.0001, "loss": 0.0132, "step": 110590 }, { "epoch": 727.6315789473684, "grad_norm": 2.0444087982177734, "learning_rate": 0.0001, "loss": 0.0149, "step": 110600 }, { "epoch": 727.6973684210526, "grad_norm": 1.609196662902832, "learning_rate": 0.0001, "loss": 0.0137, "step": 110610 }, { "epoch": 727.7631578947369, "grad_norm": 1.712641716003418, "learning_rate": 0.0001, "loss": 0.0137, "step": 110620 }, { "epoch": 727.828947368421, "grad_norm": 1.418215274810791, "learning_rate": 0.0001, "loss": 0.0177, "step": 110630 }, { "epoch": 727.8947368421053, "grad_norm": 1.4188768863677979, "learning_rate": 0.0001, "loss": 0.0133, "step": 110640 }, { "epoch": 727.9605263157895, "grad_norm": 1.7957977056503296, "learning_rate": 0.0001, "loss": 0.013, "step": 110650 }, { "epoch": 728.0263157894736, "grad_norm": 1.1189132928848267, "learning_rate": 0.0001, "loss": 0.0193, "step": 110660 }, { "epoch": 728.0921052631579, "grad_norm": 1.3404395580291748, "learning_rate": 0.0001, "loss": 0.0126, "step": 110670 }, { "epoch": 728.1578947368421, "grad_norm": 1.8713711500167847, "learning_rate": 0.0001, "loss": 0.0139, "step": 110680 }, { "epoch": 728.2236842105264, "grad_norm": 1.4579238891601562, "learning_rate": 0.0001, "loss": 0.0154, "step": 110690 }, { "epoch": 728.2894736842105, "grad_norm": 1.1792954206466675, "learning_rate": 0.0001, "loss": 0.0149, "step": 110700 }, { "epoch": 728.3552631578947, "grad_norm": 1.4670668840408325, "learning_rate": 0.0001, "loss": 0.0136, "step": 110710 }, { "epoch": 728.421052631579, "grad_norm": 1.6201541423797607, "learning_rate": 0.0001, "loss": 0.0133, "step": 110720 }, { "epoch": 728.4868421052631, "grad_norm": 1.306165099143982, "learning_rate": 0.0001, "loss": 0.014, "step": 110730 }, { "epoch": 728.5526315789474, "grad_norm": 1.2705349922180176, "learning_rate": 0.0001, "loss": 0.0161, "step": 110740 }, { "epoch": 728.6184210526316, "grad_norm": 1.3566521406173706, "learning_rate": 0.0001, "loss": 0.0119, "step": 110750 }, { "epoch": 728.6842105263158, "grad_norm": 1.5838695764541626, "learning_rate": 0.0001, "loss": 0.0125, "step": 110760 }, { "epoch": 728.75, "grad_norm": 1.523471713066101, "learning_rate": 0.0001, "loss": 0.0131, "step": 110770 }, { "epoch": 728.8157894736842, "grad_norm": 1.476409912109375, "learning_rate": 0.0001, "loss": 0.0117, "step": 110780 }, { "epoch": 728.8815789473684, "grad_norm": 1.187221884727478, "learning_rate": 0.0001, "loss": 0.0133, "step": 110790 }, { "epoch": 728.9473684210526, "grad_norm": 1.574027180671692, "learning_rate": 0.0001, "loss": 0.0126, "step": 110800 }, { "epoch": 729.0131578947369, "grad_norm": 1.312565565109253, "learning_rate": 0.0001, "loss": 0.0129, "step": 110810 }, { "epoch": 729.078947368421, "grad_norm": 1.7058583498001099, "learning_rate": 0.0001, "loss": 0.0151, "step": 110820 }, { "epoch": 729.1447368421053, "grad_norm": 1.4764180183410645, "learning_rate": 0.0001, "loss": 0.014, "step": 110830 }, { "epoch": 729.2105263157895, "grad_norm": 1.5162911415100098, "learning_rate": 0.0001, "loss": 0.0165, "step": 110840 }, { "epoch": 729.2763157894736, "grad_norm": 1.227679967880249, "learning_rate": 0.0001, "loss": 0.012, "step": 110850 }, { "epoch": 729.3421052631579, "grad_norm": 1.8556959629058838, "learning_rate": 0.0001, "loss": 0.0127, "step": 110860 }, { "epoch": 729.4078947368421, "grad_norm": 1.5118321180343628, "learning_rate": 0.0001, "loss": 0.013, "step": 110870 }, { "epoch": 729.4736842105264, "grad_norm": 1.4224965572357178, "learning_rate": 0.0001, "loss": 0.0179, "step": 110880 }, { "epoch": 729.5394736842105, "grad_norm": 1.7844213247299194, "learning_rate": 0.0001, "loss": 0.0136, "step": 110890 }, { "epoch": 729.6052631578947, "grad_norm": 1.364409327507019, "learning_rate": 0.0001, "loss": 0.0134, "step": 110900 }, { "epoch": 729.671052631579, "grad_norm": 1.308863878250122, "learning_rate": 0.0001, "loss": 0.0117, "step": 110910 }, { "epoch": 729.7368421052631, "grad_norm": 1.4847370386123657, "learning_rate": 0.0001, "loss": 0.0126, "step": 110920 }, { "epoch": 729.8026315789474, "grad_norm": 1.5072227716445923, "learning_rate": 0.0001, "loss": 0.0138, "step": 110930 }, { "epoch": 729.8684210526316, "grad_norm": 1.3912216424942017, "learning_rate": 0.0001, "loss": 0.0157, "step": 110940 }, { "epoch": 729.9342105263158, "grad_norm": 1.27097487449646, "learning_rate": 0.0001, "loss": 0.0161, "step": 110950 }, { "epoch": 730.0, "grad_norm": 1.5978344678878784, "learning_rate": 0.0001, "loss": 0.0106, "step": 110960 }, { "epoch": 730.0657894736842, "grad_norm": 1.7069264650344849, "learning_rate": 0.0001, "loss": 0.0123, "step": 110970 }, { "epoch": 730.1315789473684, "grad_norm": 1.4309308528900146, "learning_rate": 0.0001, "loss": 0.0153, "step": 110980 }, { "epoch": 730.1973684210526, "grad_norm": 2.057236671447754, "learning_rate": 0.0001, "loss": 0.0159, "step": 110990 }, { "epoch": 730.2631578947369, "grad_norm": 1.723588466644287, "learning_rate": 0.0001, "loss": 0.0123, "step": 111000 }, { "epoch": 730.328947368421, "grad_norm": 1.7058550119400024, "learning_rate": 0.0001, "loss": 0.0124, "step": 111010 }, { "epoch": 730.3947368421053, "grad_norm": 1.492188811302185, "learning_rate": 0.0001, "loss": 0.0122, "step": 111020 }, { "epoch": 730.4605263157895, "grad_norm": 1.4676131010055542, "learning_rate": 0.0001, "loss": 0.0182, "step": 111030 }, { "epoch": 730.5263157894736, "grad_norm": 1.1369279623031616, "learning_rate": 0.0001, "loss": 0.0113, "step": 111040 }, { "epoch": 730.5921052631579, "grad_norm": 1.4863098859786987, "learning_rate": 0.0001, "loss": 0.0132, "step": 111050 }, { "epoch": 730.6578947368421, "grad_norm": 1.7057082653045654, "learning_rate": 0.0001, "loss": 0.0145, "step": 111060 }, { "epoch": 730.7236842105264, "grad_norm": 1.1302835941314697, "learning_rate": 0.0001, "loss": 0.017, "step": 111070 }, { "epoch": 730.7894736842105, "grad_norm": 1.4876000881195068, "learning_rate": 0.0001, "loss": 0.0169, "step": 111080 }, { "epoch": 730.8552631578947, "grad_norm": 1.5240318775177002, "learning_rate": 0.0001, "loss": 0.0108, "step": 111090 }, { "epoch": 730.921052631579, "grad_norm": 1.351487636566162, "learning_rate": 0.0001, "loss": 0.0109, "step": 111100 }, { "epoch": 730.9868421052631, "grad_norm": 1.339991807937622, "learning_rate": 0.0001, "loss": 0.0122, "step": 111110 }, { "epoch": 731.0526315789474, "grad_norm": 1.4808413982391357, "learning_rate": 0.0001, "loss": 0.0147, "step": 111120 }, { "epoch": 731.1184210526316, "grad_norm": 1.9286912679672241, "learning_rate": 0.0001, "loss": 0.0147, "step": 111130 }, { "epoch": 731.1842105263158, "grad_norm": 1.243382453918457, "learning_rate": 0.0001, "loss": 0.0136, "step": 111140 }, { "epoch": 731.25, "grad_norm": 1.3635287284851074, "learning_rate": 0.0001, "loss": 0.016, "step": 111150 }, { "epoch": 731.3157894736842, "grad_norm": 1.666502594947815, "learning_rate": 0.0001, "loss": 0.0123, "step": 111160 }, { "epoch": 731.3815789473684, "grad_norm": 1.7934036254882812, "learning_rate": 0.0001, "loss": 0.0148, "step": 111170 }, { "epoch": 731.4473684210526, "grad_norm": 1.7001577615737915, "learning_rate": 0.0001, "loss": 0.0106, "step": 111180 }, { "epoch": 731.5131578947369, "grad_norm": 0.9406490325927734, "learning_rate": 0.0001, "loss": 0.0158, "step": 111190 }, { "epoch": 731.578947368421, "grad_norm": 1.4344310760498047, "learning_rate": 0.0001, "loss": 0.0119, "step": 111200 }, { "epoch": 731.6447368421053, "grad_norm": 1.940828561782837, "learning_rate": 0.0001, "loss": 0.0132, "step": 111210 }, { "epoch": 731.7105263157895, "grad_norm": 1.5913985967636108, "learning_rate": 0.0001, "loss": 0.0165, "step": 111220 }, { "epoch": 731.7763157894736, "grad_norm": 1.1032859086990356, "learning_rate": 0.0001, "loss": 0.0103, "step": 111230 }, { "epoch": 731.8421052631579, "grad_norm": 1.6841540336608887, "learning_rate": 0.0001, "loss": 0.0143, "step": 111240 }, { "epoch": 731.9078947368421, "grad_norm": 1.685033917427063, "learning_rate": 0.0001, "loss": 0.0148, "step": 111250 }, { "epoch": 731.9736842105264, "grad_norm": 1.2265129089355469, "learning_rate": 0.0001, "loss": 0.0114, "step": 111260 }, { "epoch": 732.0394736842105, "grad_norm": 1.7644885778427124, "learning_rate": 0.0001, "loss": 0.0131, "step": 111270 }, { "epoch": 732.1052631578947, "grad_norm": 1.0953365564346313, "learning_rate": 0.0001, "loss": 0.014, "step": 111280 }, { "epoch": 732.171052631579, "grad_norm": 1.4003386497497559, "learning_rate": 0.0001, "loss": 0.0109, "step": 111290 }, { "epoch": 732.2368421052631, "grad_norm": 1.2354754209518433, "learning_rate": 0.0001, "loss": 0.0214, "step": 111300 }, { "epoch": 732.3026315789474, "grad_norm": 1.5865331888198853, "learning_rate": 0.0001, "loss": 0.0112, "step": 111310 }, { "epoch": 732.3684210526316, "grad_norm": 1.1642177104949951, "learning_rate": 0.0001, "loss": 0.0138, "step": 111320 }, { "epoch": 732.4342105263158, "grad_norm": 1.3584882020950317, "learning_rate": 0.0001, "loss": 0.0105, "step": 111330 }, { "epoch": 732.5, "grad_norm": 1.192331075668335, "learning_rate": 0.0001, "loss": 0.0164, "step": 111340 }, { "epoch": 732.5657894736842, "grad_norm": 1.3936668634414673, "learning_rate": 0.0001, "loss": 0.0124, "step": 111350 }, { "epoch": 732.6315789473684, "grad_norm": 1.4478278160095215, "learning_rate": 0.0001, "loss": 0.0135, "step": 111360 }, { "epoch": 732.6973684210526, "grad_norm": 1.2770686149597168, "learning_rate": 0.0001, "loss": 0.0186, "step": 111370 }, { "epoch": 732.7631578947369, "grad_norm": 1.2804921865463257, "learning_rate": 0.0001, "loss": 0.0187, "step": 111380 }, { "epoch": 732.828947368421, "grad_norm": 1.2719964981079102, "learning_rate": 0.0001, "loss": 0.0119, "step": 111390 }, { "epoch": 732.8947368421053, "grad_norm": 1.447725772857666, "learning_rate": 0.0001, "loss": 0.0148, "step": 111400 }, { "epoch": 732.9605263157895, "grad_norm": 1.767167091369629, "learning_rate": 0.0001, "loss": 0.0129, "step": 111410 }, { "epoch": 733.0263157894736, "grad_norm": 1.7356208562850952, "learning_rate": 0.0001, "loss": 0.0122, "step": 111420 }, { "epoch": 733.0921052631579, "grad_norm": 1.5890642404556274, "learning_rate": 0.0001, "loss": 0.0126, "step": 111430 }, { "epoch": 733.1578947368421, "grad_norm": 1.3208529949188232, "learning_rate": 0.0001, "loss": 0.0148, "step": 111440 }, { "epoch": 733.2236842105264, "grad_norm": 1.478582501411438, "learning_rate": 0.0001, "loss": 0.0118, "step": 111450 }, { "epoch": 733.2894736842105, "grad_norm": 1.8075201511383057, "learning_rate": 0.0001, "loss": 0.0149, "step": 111460 }, { "epoch": 733.3552631578947, "grad_norm": 1.1986054182052612, "learning_rate": 0.0001, "loss": 0.0117, "step": 111470 }, { "epoch": 733.421052631579, "grad_norm": 1.044630765914917, "learning_rate": 0.0001, "loss": 0.0127, "step": 111480 }, { "epoch": 733.4868421052631, "grad_norm": 1.6449185609817505, "learning_rate": 0.0001, "loss": 0.0197, "step": 111490 }, { "epoch": 733.5526315789474, "grad_norm": 1.3631700277328491, "learning_rate": 0.0001, "loss": 0.0157, "step": 111500 }, { "epoch": 733.6184210526316, "grad_norm": 1.6495026350021362, "learning_rate": 0.0001, "loss": 0.015, "step": 111510 }, { "epoch": 733.6842105263158, "grad_norm": 1.3066123723983765, "learning_rate": 0.0001, "loss": 0.0143, "step": 111520 }, { "epoch": 733.75, "grad_norm": 1.3054139614105225, "learning_rate": 0.0001, "loss": 0.011, "step": 111530 }, { "epoch": 733.8157894736842, "grad_norm": 0.9702208042144775, "learning_rate": 0.0001, "loss": 0.0116, "step": 111540 }, { "epoch": 733.8815789473684, "grad_norm": 1.2522939443588257, "learning_rate": 0.0001, "loss": 0.0198, "step": 111550 }, { "epoch": 733.9473684210526, "grad_norm": 1.018194556236267, "learning_rate": 0.0001, "loss": 0.0129, "step": 111560 }, { "epoch": 734.0131578947369, "grad_norm": 1.932863473892212, "learning_rate": 0.0001, "loss": 0.0163, "step": 111570 }, { "epoch": 734.078947368421, "grad_norm": 1.395709753036499, "learning_rate": 0.0001, "loss": 0.0135, "step": 111580 }, { "epoch": 734.1447368421053, "grad_norm": 1.436540126800537, "learning_rate": 0.0001, "loss": 0.0143, "step": 111590 }, { "epoch": 734.2105263157895, "grad_norm": 1.1564021110534668, "learning_rate": 0.0001, "loss": 0.0225, "step": 111600 }, { "epoch": 734.2763157894736, "grad_norm": 1.2076818943023682, "learning_rate": 0.0001, "loss": 0.0167, "step": 111610 }, { "epoch": 734.3421052631579, "grad_norm": 1.4946352243423462, "learning_rate": 0.0001, "loss": 0.0161, "step": 111620 }, { "epoch": 734.4078947368421, "grad_norm": 1.4406554698944092, "learning_rate": 0.0001, "loss": 0.0156, "step": 111630 }, { "epoch": 734.4736842105264, "grad_norm": 1.0716273784637451, "learning_rate": 0.0001, "loss": 0.0162, "step": 111640 }, { "epoch": 734.5394736842105, "grad_norm": 1.4954053163528442, "learning_rate": 0.0001, "loss": 0.0187, "step": 111650 }, { "epoch": 734.6052631578947, "grad_norm": 1.6790885925292969, "learning_rate": 0.0001, "loss": 0.0163, "step": 111660 }, { "epoch": 734.671052631579, "grad_norm": 1.5800522565841675, "learning_rate": 0.0001, "loss": 0.016, "step": 111670 }, { "epoch": 734.7368421052631, "grad_norm": 1.3991299867630005, "learning_rate": 0.0001, "loss": 0.0167, "step": 111680 }, { "epoch": 734.8026315789474, "grad_norm": 1.5937224626541138, "learning_rate": 0.0001, "loss": 0.0193, "step": 111690 }, { "epoch": 734.8684210526316, "grad_norm": 1.6647166013717651, "learning_rate": 0.0001, "loss": 0.0115, "step": 111700 }, { "epoch": 734.9342105263158, "grad_norm": 1.4454677104949951, "learning_rate": 0.0001, "loss": 0.013, "step": 111710 }, { "epoch": 735.0, "grad_norm": 1.619437336921692, "learning_rate": 0.0001, "loss": 0.0128, "step": 111720 }, { "epoch": 735.0657894736842, "grad_norm": 1.8721431493759155, "learning_rate": 0.0001, "loss": 0.0171, "step": 111730 }, { "epoch": 735.1315789473684, "grad_norm": 1.7285747528076172, "learning_rate": 0.0001, "loss": 0.0133, "step": 111740 }, { "epoch": 735.1973684210526, "grad_norm": 1.385873794555664, "learning_rate": 0.0001, "loss": 0.0165, "step": 111750 }, { "epoch": 735.2631578947369, "grad_norm": 1.4740252494812012, "learning_rate": 0.0001, "loss": 0.0109, "step": 111760 }, { "epoch": 735.328947368421, "grad_norm": 1.309643030166626, "learning_rate": 0.0001, "loss": 0.0132, "step": 111770 }, { "epoch": 735.3947368421053, "grad_norm": 1.3876996040344238, "learning_rate": 0.0001, "loss": 0.0125, "step": 111780 }, { "epoch": 735.4605263157895, "grad_norm": 1.3858468532562256, "learning_rate": 0.0001, "loss": 0.015, "step": 111790 }, { "epoch": 735.5263157894736, "grad_norm": 1.4818954467773438, "learning_rate": 0.0001, "loss": 0.0202, "step": 111800 }, { "epoch": 735.5921052631579, "grad_norm": 1.4928839206695557, "learning_rate": 0.0001, "loss": 0.0131, "step": 111810 }, { "epoch": 735.6578947368421, "grad_norm": 1.800119161605835, "learning_rate": 0.0001, "loss": 0.0123, "step": 111820 }, { "epoch": 735.7236842105264, "grad_norm": 1.161574125289917, "learning_rate": 0.0001, "loss": 0.013, "step": 111830 }, { "epoch": 735.7894736842105, "grad_norm": 1.3818392753601074, "learning_rate": 0.0001, "loss": 0.0175, "step": 111840 }, { "epoch": 735.8552631578947, "grad_norm": 1.7571219205856323, "learning_rate": 0.0001, "loss": 0.0157, "step": 111850 }, { "epoch": 735.921052631579, "grad_norm": 1.334517478942871, "learning_rate": 0.0001, "loss": 0.0159, "step": 111860 }, { "epoch": 735.9868421052631, "grad_norm": 1.4164639711380005, "learning_rate": 0.0001, "loss": 0.0165, "step": 111870 }, { "epoch": 736.0526315789474, "grad_norm": 1.8380171060562134, "learning_rate": 0.0001, "loss": 0.0121, "step": 111880 }, { "epoch": 736.1184210526316, "grad_norm": 1.5804692506790161, "learning_rate": 0.0001, "loss": 0.0152, "step": 111890 }, { "epoch": 736.1842105263158, "grad_norm": 1.4332809448242188, "learning_rate": 0.0001, "loss": 0.016, "step": 111900 }, { "epoch": 736.25, "grad_norm": 1.163800835609436, "learning_rate": 0.0001, "loss": 0.013, "step": 111910 }, { "epoch": 736.3157894736842, "grad_norm": 1.3816908597946167, "learning_rate": 0.0001, "loss": 0.0147, "step": 111920 }, { "epoch": 736.3815789473684, "grad_norm": 1.4133328199386597, "learning_rate": 0.0001, "loss": 0.0124, "step": 111930 }, { "epoch": 736.4473684210526, "grad_norm": 1.718580961227417, "learning_rate": 0.0001, "loss": 0.0132, "step": 111940 }, { "epoch": 736.5131578947369, "grad_norm": 1.3076061010360718, "learning_rate": 0.0001, "loss": 0.0152, "step": 111950 }, { "epoch": 736.578947368421, "grad_norm": 1.4406518936157227, "learning_rate": 0.0001, "loss": 0.0209, "step": 111960 }, { "epoch": 736.6447368421053, "grad_norm": 1.389461636543274, "learning_rate": 0.0001, "loss": 0.0128, "step": 111970 }, { "epoch": 736.7105263157895, "grad_norm": 1.4145487546920776, "learning_rate": 0.0001, "loss": 0.0152, "step": 111980 }, { "epoch": 736.7763157894736, "grad_norm": 1.7151868343353271, "learning_rate": 0.0001, "loss": 0.0188, "step": 111990 }, { "epoch": 736.8421052631579, "grad_norm": 1.6074055433273315, "learning_rate": 0.0001, "loss": 0.0147, "step": 112000 }, { "epoch": 736.9078947368421, "grad_norm": 1.2027027606964111, "learning_rate": 0.0001, "loss": 0.0127, "step": 112010 }, { "epoch": 736.9736842105264, "grad_norm": 1.4089974164962769, "learning_rate": 0.0001, "loss": 0.0109, "step": 112020 }, { "epoch": 737.0394736842105, "grad_norm": 1.7935802936553955, "learning_rate": 0.0001, "loss": 0.0119, "step": 112030 }, { "epoch": 737.1052631578947, "grad_norm": 1.3398993015289307, "learning_rate": 0.0001, "loss": 0.0147, "step": 112040 }, { "epoch": 737.171052631579, "grad_norm": 1.6838375329971313, "learning_rate": 0.0001, "loss": 0.0111, "step": 112050 }, { "epoch": 737.2368421052631, "grad_norm": 1.3421428203582764, "learning_rate": 0.0001, "loss": 0.0141, "step": 112060 }, { "epoch": 737.3026315789474, "grad_norm": 1.9311078786849976, "learning_rate": 0.0001, "loss": 0.0112, "step": 112070 }, { "epoch": 737.3684210526316, "grad_norm": 1.4566880464553833, "learning_rate": 0.0001, "loss": 0.011, "step": 112080 }, { "epoch": 737.4342105263158, "grad_norm": 1.6845390796661377, "learning_rate": 0.0001, "loss": 0.0134, "step": 112090 }, { "epoch": 737.5, "grad_norm": 1.826919436454773, "learning_rate": 0.0001, "loss": 0.0191, "step": 112100 }, { "epoch": 737.5657894736842, "grad_norm": 1.756319522857666, "learning_rate": 0.0001, "loss": 0.0148, "step": 112110 }, { "epoch": 737.6315789473684, "grad_norm": 1.1782147884368896, "learning_rate": 0.0001, "loss": 0.0145, "step": 112120 }, { "epoch": 737.6973684210526, "grad_norm": 1.5473148822784424, "learning_rate": 0.0001, "loss": 0.0116, "step": 112130 }, { "epoch": 737.7631578947369, "grad_norm": 1.583122968673706, "learning_rate": 0.0001, "loss": 0.0123, "step": 112140 }, { "epoch": 737.828947368421, "grad_norm": 1.1325808763504028, "learning_rate": 0.0001, "loss": 0.0185, "step": 112150 }, { "epoch": 737.8947368421053, "grad_norm": 1.6543529033660889, "learning_rate": 0.0001, "loss": 0.0155, "step": 112160 }, { "epoch": 737.9605263157895, "grad_norm": 1.3007491827011108, "learning_rate": 0.0001, "loss": 0.0167, "step": 112170 }, { "epoch": 738.0263157894736, "grad_norm": 1.2362582683563232, "learning_rate": 0.0001, "loss": 0.0129, "step": 112180 }, { "epoch": 738.0921052631579, "grad_norm": 1.3483859300613403, "learning_rate": 0.0001, "loss": 0.0175, "step": 112190 }, { "epoch": 738.1578947368421, "grad_norm": 1.1783651113510132, "learning_rate": 0.0001, "loss": 0.0132, "step": 112200 }, { "epoch": 738.2236842105264, "grad_norm": 1.1603150367736816, "learning_rate": 0.0001, "loss": 0.0121, "step": 112210 }, { "epoch": 738.2894736842105, "grad_norm": 2.0316128730773926, "learning_rate": 0.0001, "loss": 0.015, "step": 112220 }, { "epoch": 738.3552631578947, "grad_norm": 1.22400963306427, "learning_rate": 0.0001, "loss": 0.0127, "step": 112230 }, { "epoch": 738.421052631579, "grad_norm": 1.3873482942581177, "learning_rate": 0.0001, "loss": 0.0146, "step": 112240 }, { "epoch": 738.4868421052631, "grad_norm": 1.4121209383010864, "learning_rate": 0.0001, "loss": 0.016, "step": 112250 }, { "epoch": 738.5526315789474, "grad_norm": 1.4252667427062988, "learning_rate": 0.0001, "loss": 0.0149, "step": 112260 }, { "epoch": 738.6184210526316, "grad_norm": 1.3827911615371704, "learning_rate": 0.0001, "loss": 0.0111, "step": 112270 }, { "epoch": 738.6842105263158, "grad_norm": 1.5297269821166992, "learning_rate": 0.0001, "loss": 0.0171, "step": 112280 }, { "epoch": 738.75, "grad_norm": 1.6353862285614014, "learning_rate": 0.0001, "loss": 0.0131, "step": 112290 }, { "epoch": 738.8157894736842, "grad_norm": 1.2561315298080444, "learning_rate": 0.0001, "loss": 0.0129, "step": 112300 }, { "epoch": 738.8815789473684, "grad_norm": 1.7547595500946045, "learning_rate": 0.0001, "loss": 0.0137, "step": 112310 }, { "epoch": 738.9473684210526, "grad_norm": 1.687427282333374, "learning_rate": 0.0001, "loss": 0.0139, "step": 112320 }, { "epoch": 739.0131578947369, "grad_norm": 1.8910940885543823, "learning_rate": 0.0001, "loss": 0.0122, "step": 112330 }, { "epoch": 739.078947368421, "grad_norm": 1.5423610210418701, "learning_rate": 0.0001, "loss": 0.0126, "step": 112340 }, { "epoch": 739.1447368421053, "grad_norm": 1.3827353715896606, "learning_rate": 0.0001, "loss": 0.0141, "step": 112350 }, { "epoch": 739.2105263157895, "grad_norm": 1.234027624130249, "learning_rate": 0.0001, "loss": 0.0101, "step": 112360 }, { "epoch": 739.2763157894736, "grad_norm": 1.133184552192688, "learning_rate": 0.0001, "loss": 0.0124, "step": 112370 }, { "epoch": 739.3421052631579, "grad_norm": 1.7168923616409302, "learning_rate": 0.0001, "loss": 0.012, "step": 112380 }, { "epoch": 739.4078947368421, "grad_norm": 1.5466965436935425, "learning_rate": 0.0001, "loss": 0.0157, "step": 112390 }, { "epoch": 739.4736842105264, "grad_norm": 1.698167085647583, "learning_rate": 0.0001, "loss": 0.012, "step": 112400 }, { "epoch": 739.5394736842105, "grad_norm": 1.3825421333312988, "learning_rate": 0.0001, "loss": 0.0153, "step": 112410 }, { "epoch": 739.6052631578947, "grad_norm": 1.086178183555603, "learning_rate": 0.0001, "loss": 0.0179, "step": 112420 }, { "epoch": 739.671052631579, "grad_norm": 0.9927518963813782, "learning_rate": 0.0001, "loss": 0.0131, "step": 112430 }, { "epoch": 739.7368421052631, "grad_norm": 1.6081218719482422, "learning_rate": 0.0001, "loss": 0.0114, "step": 112440 }, { "epoch": 739.8026315789474, "grad_norm": 1.4428139925003052, "learning_rate": 0.0001, "loss": 0.0182, "step": 112450 }, { "epoch": 739.8684210526316, "grad_norm": 1.4823052883148193, "learning_rate": 0.0001, "loss": 0.0114, "step": 112460 }, { "epoch": 739.9342105263158, "grad_norm": 1.5383739471435547, "learning_rate": 0.0001, "loss": 0.0152, "step": 112470 }, { "epoch": 740.0, "grad_norm": 1.5585881471633911, "learning_rate": 0.0001, "loss": 0.0132, "step": 112480 }, { "epoch": 740.0657894736842, "grad_norm": 1.3250443935394287, "learning_rate": 0.0001, "loss": 0.0176, "step": 112490 }, { "epoch": 740.1315789473684, "grad_norm": 1.350680947303772, "learning_rate": 0.0001, "loss": 0.0121, "step": 112500 }, { "epoch": 740.1973684210526, "grad_norm": 1.574820876121521, "learning_rate": 0.0001, "loss": 0.0161, "step": 112510 }, { "epoch": 740.2631578947369, "grad_norm": 1.2297521829605103, "learning_rate": 0.0001, "loss": 0.0157, "step": 112520 }, { "epoch": 740.328947368421, "grad_norm": 1.1037161350250244, "learning_rate": 0.0001, "loss": 0.0114, "step": 112530 }, { "epoch": 740.3947368421053, "grad_norm": 1.2618056535720825, "learning_rate": 0.0001, "loss": 0.0119, "step": 112540 }, { "epoch": 740.4605263157895, "grad_norm": 1.671007752418518, "learning_rate": 0.0001, "loss": 0.0125, "step": 112550 }, { "epoch": 740.5263157894736, "grad_norm": 1.2937700748443604, "learning_rate": 0.0001, "loss": 0.0146, "step": 112560 }, { "epoch": 740.5921052631579, "grad_norm": 1.2047069072723389, "learning_rate": 0.0001, "loss": 0.0113, "step": 112570 }, { "epoch": 740.6578947368421, "grad_norm": 1.7861170768737793, "learning_rate": 0.0001, "loss": 0.0097, "step": 112580 }, { "epoch": 740.7236842105264, "grad_norm": 1.7887380123138428, "learning_rate": 0.0001, "loss": 0.0177, "step": 112590 }, { "epoch": 740.7894736842105, "grad_norm": 1.8459711074829102, "learning_rate": 0.0001, "loss": 0.0162, "step": 112600 }, { "epoch": 740.8552631578947, "grad_norm": 1.3153733015060425, "learning_rate": 0.0001, "loss": 0.011, "step": 112610 }, { "epoch": 740.921052631579, "grad_norm": 1.4129475355148315, "learning_rate": 0.0001, "loss": 0.0112, "step": 112620 }, { "epoch": 740.9868421052631, "grad_norm": 1.6136035919189453, "learning_rate": 0.0001, "loss": 0.0161, "step": 112630 }, { "epoch": 741.0526315789474, "grad_norm": 1.1575947999954224, "learning_rate": 0.0001, "loss": 0.0131, "step": 112640 }, { "epoch": 741.1184210526316, "grad_norm": 1.5171908140182495, "learning_rate": 0.0001, "loss": 0.0138, "step": 112650 }, { "epoch": 741.1842105263158, "grad_norm": 1.4093295335769653, "learning_rate": 0.0001, "loss": 0.0126, "step": 112660 }, { "epoch": 741.25, "grad_norm": 1.1716071367263794, "learning_rate": 0.0001, "loss": 0.0126, "step": 112670 }, { "epoch": 741.3157894736842, "grad_norm": 1.816389799118042, "learning_rate": 0.0001, "loss": 0.0144, "step": 112680 }, { "epoch": 741.3815789473684, "grad_norm": 2.047142267227173, "learning_rate": 0.0001, "loss": 0.0138, "step": 112690 }, { "epoch": 741.4473684210526, "grad_norm": 1.7646374702453613, "learning_rate": 0.0001, "loss": 0.0128, "step": 112700 }, { "epoch": 741.5131578947369, "grad_norm": 1.3491714000701904, "learning_rate": 0.0001, "loss": 0.0132, "step": 112710 }, { "epoch": 741.578947368421, "grad_norm": 1.0833086967468262, "learning_rate": 0.0001, "loss": 0.0126, "step": 112720 }, { "epoch": 741.6447368421053, "grad_norm": 1.5552772283554077, "learning_rate": 0.0001, "loss": 0.0135, "step": 112730 }, { "epoch": 741.7105263157895, "grad_norm": 1.5370965003967285, "learning_rate": 0.0001, "loss": 0.0115, "step": 112740 }, { "epoch": 741.7763157894736, "grad_norm": 1.0554178953170776, "learning_rate": 0.0001, "loss": 0.0114, "step": 112750 }, { "epoch": 741.8421052631579, "grad_norm": 1.5856842994689941, "learning_rate": 0.0001, "loss": 0.0149, "step": 112760 }, { "epoch": 741.9078947368421, "grad_norm": 1.175365686416626, "learning_rate": 0.0001, "loss": 0.0136, "step": 112770 }, { "epoch": 741.9736842105264, "grad_norm": 1.3247448205947876, "learning_rate": 0.0001, "loss": 0.0152, "step": 112780 }, { "epoch": 742.0394736842105, "grad_norm": 1.5133806467056274, "learning_rate": 0.0001, "loss": 0.0152, "step": 112790 }, { "epoch": 742.1052631578947, "grad_norm": 1.5772846937179565, "learning_rate": 0.0001, "loss": 0.0168, "step": 112800 }, { "epoch": 742.171052631579, "grad_norm": 1.451531171798706, "learning_rate": 0.0001, "loss": 0.0142, "step": 112810 }, { "epoch": 742.2368421052631, "grad_norm": 1.672123670578003, "learning_rate": 0.0001, "loss": 0.0129, "step": 112820 }, { "epoch": 742.3026315789474, "grad_norm": 1.488985538482666, "learning_rate": 0.0001, "loss": 0.0129, "step": 112830 }, { "epoch": 742.3684210526316, "grad_norm": 1.3147897720336914, "learning_rate": 0.0001, "loss": 0.0131, "step": 112840 }, { "epoch": 742.4342105263158, "grad_norm": 1.6119786500930786, "learning_rate": 0.0001, "loss": 0.0117, "step": 112850 }, { "epoch": 742.5, "grad_norm": 1.2920020818710327, "learning_rate": 0.0001, "loss": 0.0138, "step": 112860 }, { "epoch": 742.5657894736842, "grad_norm": 1.0316461324691772, "learning_rate": 0.0001, "loss": 0.0126, "step": 112870 }, { "epoch": 742.6315789473684, "grad_norm": 1.4667404890060425, "learning_rate": 0.0001, "loss": 0.0144, "step": 112880 }, { "epoch": 742.6973684210526, "grad_norm": 1.1663228273391724, "learning_rate": 0.0001, "loss": 0.0123, "step": 112890 }, { "epoch": 742.7631578947369, "grad_norm": 1.5202440023422241, "learning_rate": 0.0001, "loss": 0.0129, "step": 112900 }, { "epoch": 742.828947368421, "grad_norm": 1.3777332305908203, "learning_rate": 0.0001, "loss": 0.0141, "step": 112910 }, { "epoch": 742.8947368421053, "grad_norm": 1.3559551239013672, "learning_rate": 0.0001, "loss": 0.0173, "step": 112920 }, { "epoch": 742.9605263157895, "grad_norm": 1.1707656383514404, "learning_rate": 0.0001, "loss": 0.0109, "step": 112930 }, { "epoch": 743.0263157894736, "grad_norm": 1.5561103820800781, "learning_rate": 0.0001, "loss": 0.0142, "step": 112940 }, { "epoch": 743.0921052631579, "grad_norm": 1.32445228099823, "learning_rate": 0.0001, "loss": 0.0116, "step": 112950 }, { "epoch": 743.1578947368421, "grad_norm": 1.7318894863128662, "learning_rate": 0.0001, "loss": 0.0124, "step": 112960 }, { "epoch": 743.2236842105264, "grad_norm": 1.8862963914871216, "learning_rate": 0.0001, "loss": 0.0141, "step": 112970 }, { "epoch": 743.2894736842105, "grad_norm": 1.3318811655044556, "learning_rate": 0.0001, "loss": 0.0158, "step": 112980 }, { "epoch": 743.3552631578947, "grad_norm": 1.6415354013442993, "learning_rate": 0.0001, "loss": 0.0144, "step": 112990 }, { "epoch": 743.421052631579, "grad_norm": 1.2658071517944336, "learning_rate": 0.0001, "loss": 0.0142, "step": 113000 }, { "epoch": 743.4868421052631, "grad_norm": 1.4986748695373535, "learning_rate": 0.0001, "loss": 0.0138, "step": 113010 }, { "epoch": 743.5526315789474, "grad_norm": 1.7659149169921875, "learning_rate": 0.0001, "loss": 0.0103, "step": 113020 }, { "epoch": 743.6184210526316, "grad_norm": 1.3502507209777832, "learning_rate": 0.0001, "loss": 0.015, "step": 113030 }, { "epoch": 743.6842105263158, "grad_norm": 1.5487334728240967, "learning_rate": 0.0001, "loss": 0.0152, "step": 113040 }, { "epoch": 743.75, "grad_norm": 1.3311392068862915, "learning_rate": 0.0001, "loss": 0.0157, "step": 113050 }, { "epoch": 743.8157894736842, "grad_norm": 1.6740986108779907, "learning_rate": 0.0001, "loss": 0.0105, "step": 113060 }, { "epoch": 743.8815789473684, "grad_norm": 1.325987458229065, "learning_rate": 0.0001, "loss": 0.0149, "step": 113070 }, { "epoch": 743.9473684210526, "grad_norm": 1.3363648653030396, "learning_rate": 0.0001, "loss": 0.0128, "step": 113080 }, { "epoch": 744.0131578947369, "grad_norm": 1.5074912309646606, "learning_rate": 0.0001, "loss": 0.0142, "step": 113090 }, { "epoch": 744.078947368421, "grad_norm": 1.6740174293518066, "learning_rate": 0.0001, "loss": 0.0175, "step": 113100 }, { "epoch": 744.1447368421053, "grad_norm": 1.4060057401657104, "learning_rate": 0.0001, "loss": 0.0164, "step": 113110 }, { "epoch": 744.2105263157895, "grad_norm": 1.33376944065094, "learning_rate": 0.0001, "loss": 0.0099, "step": 113120 }, { "epoch": 744.2763157894736, "grad_norm": 1.2087101936340332, "learning_rate": 0.0001, "loss": 0.0105, "step": 113130 }, { "epoch": 744.3421052631579, "grad_norm": 1.1886279582977295, "learning_rate": 0.0001, "loss": 0.0133, "step": 113140 }, { "epoch": 744.4078947368421, "grad_norm": 1.6160763502120972, "learning_rate": 0.0001, "loss": 0.0149, "step": 113150 }, { "epoch": 744.4736842105264, "grad_norm": 1.765484094619751, "learning_rate": 0.0001, "loss": 0.0161, "step": 113160 }, { "epoch": 744.5394736842105, "grad_norm": 1.450626254081726, "learning_rate": 0.0001, "loss": 0.0122, "step": 113170 }, { "epoch": 744.6052631578947, "grad_norm": 1.3271751403808594, "learning_rate": 0.0001, "loss": 0.0176, "step": 113180 }, { "epoch": 744.671052631579, "grad_norm": 1.4536136388778687, "learning_rate": 0.0001, "loss": 0.0101, "step": 113190 }, { "epoch": 744.7368421052631, "grad_norm": 1.4650520086288452, "learning_rate": 0.0001, "loss": 0.0096, "step": 113200 }, { "epoch": 744.8026315789474, "grad_norm": 1.4234517812728882, "learning_rate": 0.0001, "loss": 0.0116, "step": 113210 }, { "epoch": 744.8684210526316, "grad_norm": 1.0570045709609985, "learning_rate": 0.0001, "loss": 0.0128, "step": 113220 }, { "epoch": 744.9342105263158, "grad_norm": 1.3070417642593384, "learning_rate": 0.0001, "loss": 0.0162, "step": 113230 }, { "epoch": 745.0, "grad_norm": 1.2645646333694458, "learning_rate": 0.0001, "loss": 0.0135, "step": 113240 }, { "epoch": 745.0657894736842, "grad_norm": 1.0460706949234009, "learning_rate": 0.0001, "loss": 0.0139, "step": 113250 }, { "epoch": 745.1315789473684, "grad_norm": 1.5469493865966797, "learning_rate": 0.0001, "loss": 0.0124, "step": 113260 }, { "epoch": 745.1973684210526, "grad_norm": 1.3580296039581299, "learning_rate": 0.0001, "loss": 0.0127, "step": 113270 }, { "epoch": 745.2631578947369, "grad_norm": 1.3931552171707153, "learning_rate": 0.0001, "loss": 0.0094, "step": 113280 }, { "epoch": 745.328947368421, "grad_norm": 1.5096354484558105, "learning_rate": 0.0001, "loss": 0.0126, "step": 113290 }, { "epoch": 745.3947368421053, "grad_norm": 1.3789722919464111, "learning_rate": 0.0001, "loss": 0.0143, "step": 113300 }, { "epoch": 745.4605263157895, "grad_norm": 1.64922034740448, "learning_rate": 0.0001, "loss": 0.0151, "step": 113310 }, { "epoch": 745.5263157894736, "grad_norm": 1.3493163585662842, "learning_rate": 0.0001, "loss": 0.0119, "step": 113320 }, { "epoch": 745.5921052631579, "grad_norm": 1.1304000616073608, "learning_rate": 0.0001, "loss": 0.0155, "step": 113330 }, { "epoch": 745.6578947368421, "grad_norm": 1.4139574766159058, "learning_rate": 0.0001, "loss": 0.0171, "step": 113340 }, { "epoch": 745.7236842105264, "grad_norm": 1.4361026287078857, "learning_rate": 0.0001, "loss": 0.0122, "step": 113350 }, { "epoch": 745.7894736842105, "grad_norm": 1.3370909690856934, "learning_rate": 0.0001, "loss": 0.0101, "step": 113360 }, { "epoch": 745.8552631578947, "grad_norm": 1.4861233234405518, "learning_rate": 0.0001, "loss": 0.0174, "step": 113370 }, { "epoch": 745.921052631579, "grad_norm": 1.6286777257919312, "learning_rate": 0.0001, "loss": 0.0132, "step": 113380 }, { "epoch": 745.9868421052631, "grad_norm": 1.4996697902679443, "learning_rate": 0.0001, "loss": 0.0133, "step": 113390 }, { "epoch": 746.0526315789474, "grad_norm": 1.6532313823699951, "learning_rate": 0.0001, "loss": 0.0177, "step": 113400 }, { "epoch": 746.1184210526316, "grad_norm": 1.5506471395492554, "learning_rate": 0.0001, "loss": 0.0161, "step": 113410 }, { "epoch": 746.1842105263158, "grad_norm": 1.3744488954544067, "learning_rate": 0.0001, "loss": 0.0124, "step": 113420 }, { "epoch": 746.25, "grad_norm": 1.3627344369888306, "learning_rate": 0.0001, "loss": 0.0106, "step": 113430 }, { "epoch": 746.3157894736842, "grad_norm": 1.6082676649093628, "learning_rate": 0.0001, "loss": 0.015, "step": 113440 }, { "epoch": 746.3815789473684, "grad_norm": 1.3080918788909912, "learning_rate": 0.0001, "loss": 0.0143, "step": 113450 }, { "epoch": 746.4473684210526, "grad_norm": 1.3072319030761719, "learning_rate": 0.0001, "loss": 0.0124, "step": 113460 }, { "epoch": 746.5131578947369, "grad_norm": 1.4308100938796997, "learning_rate": 0.0001, "loss": 0.0098, "step": 113470 }, { "epoch": 746.578947368421, "grad_norm": 1.2132319211959839, "learning_rate": 0.0001, "loss": 0.0141, "step": 113480 }, { "epoch": 746.6447368421053, "grad_norm": 1.162263035774231, "learning_rate": 0.0001, "loss": 0.0154, "step": 113490 }, { "epoch": 746.7105263157895, "grad_norm": 0.9503525495529175, "learning_rate": 0.0001, "loss": 0.0114, "step": 113500 }, { "epoch": 746.7763157894736, "grad_norm": 1.4328850507736206, "learning_rate": 0.0001, "loss": 0.0149, "step": 113510 }, { "epoch": 746.8421052631579, "grad_norm": 1.6299690008163452, "learning_rate": 0.0001, "loss": 0.012, "step": 113520 }, { "epoch": 746.9078947368421, "grad_norm": 1.3729792833328247, "learning_rate": 0.0001, "loss": 0.0124, "step": 113530 }, { "epoch": 746.9736842105264, "grad_norm": 1.5687942504882812, "learning_rate": 0.0001, "loss": 0.0142, "step": 113540 }, { "epoch": 747.0394736842105, "grad_norm": 1.4604525566101074, "learning_rate": 0.0001, "loss": 0.0106, "step": 113550 }, { "epoch": 747.1052631578947, "grad_norm": 1.3741806745529175, "learning_rate": 0.0001, "loss": 0.015, "step": 113560 }, { "epoch": 747.171052631579, "grad_norm": 1.5391172170639038, "learning_rate": 0.0001, "loss": 0.0159, "step": 113570 }, { "epoch": 747.2368421052631, "grad_norm": 1.380469560623169, "learning_rate": 0.0001, "loss": 0.0122, "step": 113580 }, { "epoch": 747.3026315789474, "grad_norm": 1.7086485624313354, "learning_rate": 0.0001, "loss": 0.0125, "step": 113590 }, { "epoch": 747.3684210526316, "grad_norm": 1.5943771600723267, "learning_rate": 0.0001, "loss": 0.0155, "step": 113600 }, { "epoch": 747.4342105263158, "grad_norm": 1.8159948587417603, "learning_rate": 0.0001, "loss": 0.0153, "step": 113610 }, { "epoch": 747.5, "grad_norm": 1.7875027656555176, "learning_rate": 0.0001, "loss": 0.0155, "step": 113620 }, { "epoch": 747.5657894736842, "grad_norm": 1.518998146057129, "learning_rate": 0.0001, "loss": 0.0103, "step": 113630 }, { "epoch": 747.6315789473684, "grad_norm": 1.6136544942855835, "learning_rate": 0.0001, "loss": 0.0131, "step": 113640 }, { "epoch": 747.6973684210526, "grad_norm": 1.4869933128356934, "learning_rate": 0.0001, "loss": 0.0128, "step": 113650 }, { "epoch": 747.7631578947369, "grad_norm": 1.1673643589019775, "learning_rate": 0.0001, "loss": 0.01, "step": 113660 }, { "epoch": 747.828947368421, "grad_norm": 1.3481429815292358, "learning_rate": 0.0001, "loss": 0.0139, "step": 113670 }, { "epoch": 747.8947368421053, "grad_norm": 1.3695966005325317, "learning_rate": 0.0001, "loss": 0.0152, "step": 113680 }, { "epoch": 747.9605263157895, "grad_norm": 1.430290937423706, "learning_rate": 0.0001, "loss": 0.0139, "step": 113690 }, { "epoch": 748.0263157894736, "grad_norm": 1.4076098203659058, "learning_rate": 0.0001, "loss": 0.0111, "step": 113700 }, { "epoch": 748.0921052631579, "grad_norm": 1.0792824029922485, "learning_rate": 0.0001, "loss": 0.0155, "step": 113710 }, { "epoch": 748.1578947368421, "grad_norm": 1.320253610610962, "learning_rate": 0.0001, "loss": 0.0133, "step": 113720 }, { "epoch": 748.2236842105264, "grad_norm": 1.364013910293579, "learning_rate": 0.0001, "loss": 0.0163, "step": 113730 }, { "epoch": 748.2894736842105, "grad_norm": 1.576348900794983, "learning_rate": 0.0001, "loss": 0.0127, "step": 113740 }, { "epoch": 748.3552631578947, "grad_norm": 1.4003069400787354, "learning_rate": 0.0001, "loss": 0.0148, "step": 113750 }, { "epoch": 748.421052631579, "grad_norm": 1.2422330379486084, "learning_rate": 0.0001, "loss": 0.0136, "step": 113760 }, { "epoch": 748.4868421052631, "grad_norm": 1.552129864692688, "learning_rate": 0.0001, "loss": 0.0135, "step": 113770 }, { "epoch": 748.5526315789474, "grad_norm": 1.123143196105957, "learning_rate": 0.0001, "loss": 0.0136, "step": 113780 }, { "epoch": 748.6184210526316, "grad_norm": 1.266039252281189, "learning_rate": 0.0001, "loss": 0.0135, "step": 113790 }, { "epoch": 748.6842105263158, "grad_norm": 1.5265350341796875, "learning_rate": 0.0001, "loss": 0.0108, "step": 113800 }, { "epoch": 748.75, "grad_norm": 0.9969764351844788, "learning_rate": 0.0001, "loss": 0.0113, "step": 113810 }, { "epoch": 748.8157894736842, "grad_norm": 1.649034023284912, "learning_rate": 0.0001, "loss": 0.012, "step": 113820 }, { "epoch": 748.8815789473684, "grad_norm": 1.322158694267273, "learning_rate": 0.0001, "loss": 0.014, "step": 113830 }, { "epoch": 748.9473684210526, "grad_norm": 1.6477235555648804, "learning_rate": 0.0001, "loss": 0.0146, "step": 113840 }, { "epoch": 749.0131578947369, "grad_norm": 1.341156244277954, "learning_rate": 0.0001, "loss": 0.014, "step": 113850 }, { "epoch": 749.078947368421, "grad_norm": 1.1172664165496826, "learning_rate": 0.0001, "loss": 0.0113, "step": 113860 }, { "epoch": 749.1447368421053, "grad_norm": 1.1391713619232178, "learning_rate": 0.0001, "loss": 0.0113, "step": 113870 }, { "epoch": 749.2105263157895, "grad_norm": 1.7186745405197144, "learning_rate": 0.0001, "loss": 0.0155, "step": 113880 }, { "epoch": 749.2763157894736, "grad_norm": 1.3860461711883545, "learning_rate": 0.0001, "loss": 0.0168, "step": 113890 }, { "epoch": 749.3421052631579, "grad_norm": 1.475264549255371, "learning_rate": 0.0001, "loss": 0.0132, "step": 113900 }, { "epoch": 749.4078947368421, "grad_norm": 1.647080421447754, "learning_rate": 0.0001, "loss": 0.0131, "step": 113910 }, { "epoch": 749.4736842105264, "grad_norm": 1.5408313274383545, "learning_rate": 0.0001, "loss": 0.0131, "step": 113920 }, { "epoch": 749.5394736842105, "grad_norm": 1.9120978116989136, "learning_rate": 0.0001, "loss": 0.015, "step": 113930 }, { "epoch": 749.6052631578947, "grad_norm": 1.6507596969604492, "learning_rate": 0.0001, "loss": 0.0111, "step": 113940 }, { "epoch": 749.671052631579, "grad_norm": 1.1859245300292969, "learning_rate": 0.0001, "loss": 0.0138, "step": 113950 }, { "epoch": 749.7368421052631, "grad_norm": 1.3305549621582031, "learning_rate": 0.0001, "loss": 0.0119, "step": 113960 }, { "epoch": 749.8026315789474, "grad_norm": 1.5188921689987183, "learning_rate": 0.0001, "loss": 0.0135, "step": 113970 }, { "epoch": 749.8684210526316, "grad_norm": 1.7089998722076416, "learning_rate": 0.0001, "loss": 0.0142, "step": 113980 }, { "epoch": 749.9342105263158, "grad_norm": 1.7234606742858887, "learning_rate": 0.0001, "loss": 0.0164, "step": 113990 }, { "epoch": 750.0, "grad_norm": 1.010663628578186, "learning_rate": 0.0001, "loss": 0.0128, "step": 114000 }, { "epoch": 750.0657894736842, "grad_norm": 1.2166944742202759, "learning_rate": 0.0001, "loss": 0.0129, "step": 114010 }, { "epoch": 750.1315789473684, "grad_norm": 1.6956284046173096, "learning_rate": 0.0001, "loss": 0.0122, "step": 114020 }, { "epoch": 750.1973684210526, "grad_norm": 1.8649920225143433, "learning_rate": 0.0001, "loss": 0.0134, "step": 114030 }, { "epoch": 750.2631578947369, "grad_norm": 1.4820094108581543, "learning_rate": 0.0001, "loss": 0.0185, "step": 114040 }, { "epoch": 750.328947368421, "grad_norm": 1.115463137626648, "learning_rate": 0.0001, "loss": 0.0099, "step": 114050 }, { "epoch": 750.3947368421053, "grad_norm": 1.3863682746887207, "learning_rate": 0.0001, "loss": 0.0118, "step": 114060 }, { "epoch": 750.4605263157895, "grad_norm": 1.639848232269287, "learning_rate": 0.0001, "loss": 0.0136, "step": 114070 }, { "epoch": 750.5263157894736, "grad_norm": 2.0528788566589355, "learning_rate": 0.0001, "loss": 0.0114, "step": 114080 }, { "epoch": 750.5921052631579, "grad_norm": 1.6985269784927368, "learning_rate": 0.0001, "loss": 0.014, "step": 114090 }, { "epoch": 750.6578947368421, "grad_norm": 1.7308225631713867, "learning_rate": 0.0001, "loss": 0.0145, "step": 114100 }, { "epoch": 750.7236842105264, "grad_norm": 1.4885845184326172, "learning_rate": 0.0001, "loss": 0.0154, "step": 114110 }, { "epoch": 750.7894736842105, "grad_norm": 1.8651593923568726, "learning_rate": 0.0001, "loss": 0.0142, "step": 114120 }, { "epoch": 750.8552631578947, "grad_norm": 1.7709568738937378, "learning_rate": 0.0001, "loss": 0.0121, "step": 114130 }, { "epoch": 750.921052631579, "grad_norm": 1.846476674079895, "learning_rate": 0.0001, "loss": 0.0152, "step": 114140 }, { "epoch": 750.9868421052631, "grad_norm": 1.8099815845489502, "learning_rate": 0.0001, "loss": 0.0133, "step": 114150 }, { "epoch": 751.0526315789474, "grad_norm": 1.5858656167984009, "learning_rate": 0.0001, "loss": 0.0113, "step": 114160 }, { "epoch": 751.1184210526316, "grad_norm": 1.6504878997802734, "learning_rate": 0.0001, "loss": 0.0125, "step": 114170 }, { "epoch": 751.1842105263158, "grad_norm": 1.5491669178009033, "learning_rate": 0.0001, "loss": 0.0123, "step": 114180 }, { "epoch": 751.25, "grad_norm": 1.5115385055541992, "learning_rate": 0.0001, "loss": 0.0134, "step": 114190 }, { "epoch": 751.3157894736842, "grad_norm": 1.1438734531402588, "learning_rate": 0.0001, "loss": 0.015, "step": 114200 }, { "epoch": 751.3815789473684, "grad_norm": 1.5034997463226318, "learning_rate": 0.0001, "loss": 0.0127, "step": 114210 }, { "epoch": 751.4473684210526, "grad_norm": 1.4840600490570068, "learning_rate": 0.0001, "loss": 0.0135, "step": 114220 }, { "epoch": 751.5131578947369, "grad_norm": 1.386528491973877, "learning_rate": 0.0001, "loss": 0.0185, "step": 114230 }, { "epoch": 751.578947368421, "grad_norm": 1.5805611610412598, "learning_rate": 0.0001, "loss": 0.0106, "step": 114240 }, { "epoch": 751.6447368421053, "grad_norm": 1.4600621461868286, "learning_rate": 0.0001, "loss": 0.0133, "step": 114250 }, { "epoch": 751.7105263157895, "grad_norm": 1.356446623802185, "learning_rate": 0.0001, "loss": 0.0119, "step": 114260 }, { "epoch": 751.7763157894736, "grad_norm": 1.3743363618850708, "learning_rate": 0.0001, "loss": 0.013, "step": 114270 }, { "epoch": 751.8421052631579, "grad_norm": 1.4553054571151733, "learning_rate": 0.0001, "loss": 0.0111, "step": 114280 }, { "epoch": 751.9078947368421, "grad_norm": 1.3855273723602295, "learning_rate": 0.0001, "loss": 0.0141, "step": 114290 }, { "epoch": 751.9736842105264, "grad_norm": 1.661004662513733, "learning_rate": 0.0001, "loss": 0.0177, "step": 114300 }, { "epoch": 752.0394736842105, "grad_norm": 1.273730993270874, "learning_rate": 0.0001, "loss": 0.0161, "step": 114310 }, { "epoch": 752.1052631578947, "grad_norm": 1.4759362936019897, "learning_rate": 0.0001, "loss": 0.0112, "step": 114320 }, { "epoch": 752.171052631579, "grad_norm": 1.371248483657837, "learning_rate": 0.0001, "loss": 0.0122, "step": 114330 }, { "epoch": 752.2368421052631, "grad_norm": 1.618058681488037, "learning_rate": 0.0001, "loss": 0.0107, "step": 114340 }, { "epoch": 752.3026315789474, "grad_norm": 1.014451026916504, "learning_rate": 0.0001, "loss": 0.0181, "step": 114350 }, { "epoch": 752.3684210526316, "grad_norm": 1.125649333000183, "learning_rate": 0.0001, "loss": 0.0136, "step": 114360 }, { "epoch": 752.4342105263158, "grad_norm": 1.1107865571975708, "learning_rate": 0.0001, "loss": 0.0121, "step": 114370 }, { "epoch": 752.5, "grad_norm": 1.2558397054672241, "learning_rate": 0.0001, "loss": 0.0133, "step": 114380 }, { "epoch": 752.5657894736842, "grad_norm": 0.9977937936782837, "learning_rate": 0.0001, "loss": 0.0139, "step": 114390 }, { "epoch": 752.6315789473684, "grad_norm": 1.5112433433532715, "learning_rate": 0.0001, "loss": 0.013, "step": 114400 }, { "epoch": 752.6973684210526, "grad_norm": 1.4693559408187866, "learning_rate": 0.0001, "loss": 0.0149, "step": 114410 }, { "epoch": 752.7631578947369, "grad_norm": 1.245413899421692, "learning_rate": 0.0001, "loss": 0.0162, "step": 114420 }, { "epoch": 752.828947368421, "grad_norm": 1.6259517669677734, "learning_rate": 0.0001, "loss": 0.0158, "step": 114430 }, { "epoch": 752.8947368421053, "grad_norm": 1.884757161140442, "learning_rate": 0.0001, "loss": 0.0132, "step": 114440 }, { "epoch": 752.9605263157895, "grad_norm": 1.5124081373214722, "learning_rate": 0.0001, "loss": 0.0132, "step": 114450 }, { "epoch": 753.0263157894736, "grad_norm": 1.8033497333526611, "learning_rate": 0.0001, "loss": 0.0139, "step": 114460 }, { "epoch": 753.0921052631579, "grad_norm": 1.3736246824264526, "learning_rate": 0.0001, "loss": 0.0172, "step": 114470 }, { "epoch": 753.1578947368421, "grad_norm": 1.169181227684021, "learning_rate": 0.0001, "loss": 0.0103, "step": 114480 }, { "epoch": 753.2236842105264, "grad_norm": 1.6742287874221802, "learning_rate": 0.0001, "loss": 0.0153, "step": 114490 }, { "epoch": 753.2894736842105, "grad_norm": 1.4429620504379272, "learning_rate": 0.0001, "loss": 0.0111, "step": 114500 }, { "epoch": 753.3552631578947, "grad_norm": 1.805755615234375, "learning_rate": 0.0001, "loss": 0.0123, "step": 114510 }, { "epoch": 753.421052631579, "grad_norm": 1.5175374746322632, "learning_rate": 0.0001, "loss": 0.0146, "step": 114520 }, { "epoch": 753.4868421052631, "grad_norm": 1.1502470970153809, "learning_rate": 0.0001, "loss": 0.0165, "step": 114530 }, { "epoch": 753.5526315789474, "grad_norm": 1.0825039148330688, "learning_rate": 0.0001, "loss": 0.0121, "step": 114540 }, { "epoch": 753.6184210526316, "grad_norm": 1.128941297531128, "learning_rate": 0.0001, "loss": 0.0112, "step": 114550 }, { "epoch": 753.6842105263158, "grad_norm": 1.2664425373077393, "learning_rate": 0.0001, "loss": 0.0152, "step": 114560 }, { "epoch": 753.75, "grad_norm": 1.2863141298294067, "learning_rate": 0.0001, "loss": 0.0132, "step": 114570 }, { "epoch": 753.8157894736842, "grad_norm": 1.6158932447433472, "learning_rate": 0.0001, "loss": 0.015, "step": 114580 }, { "epoch": 753.8815789473684, "grad_norm": 0.8394668698310852, "learning_rate": 0.0001, "loss": 0.0104, "step": 114590 }, { "epoch": 753.9473684210526, "grad_norm": 1.357316493988037, "learning_rate": 0.0001, "loss": 0.0153, "step": 114600 }, { "epoch": 754.0131578947369, "grad_norm": 1.1491005420684814, "learning_rate": 0.0001, "loss": 0.0154, "step": 114610 }, { "epoch": 754.078947368421, "grad_norm": 0.8625369668006897, "learning_rate": 0.0001, "loss": 0.0124, "step": 114620 }, { "epoch": 754.1447368421053, "grad_norm": 1.7683525085449219, "learning_rate": 0.0001, "loss": 0.0195, "step": 114630 }, { "epoch": 754.2105263157895, "grad_norm": 1.4905842542648315, "learning_rate": 0.0001, "loss": 0.0112, "step": 114640 }, { "epoch": 754.2763157894736, "grad_norm": 1.4832899570465088, "learning_rate": 0.0001, "loss": 0.0098, "step": 114650 }, { "epoch": 754.3421052631579, "grad_norm": 1.1218479871749878, "learning_rate": 0.0001, "loss": 0.0124, "step": 114660 }, { "epoch": 754.4078947368421, "grad_norm": 1.5673329830169678, "learning_rate": 0.0001, "loss": 0.0114, "step": 114670 }, { "epoch": 754.4736842105264, "grad_norm": 1.1006799936294556, "learning_rate": 0.0001, "loss": 0.0134, "step": 114680 }, { "epoch": 754.5394736842105, "grad_norm": 1.4251658916473389, "learning_rate": 0.0001, "loss": 0.0192, "step": 114690 }, { "epoch": 754.6052631578947, "grad_norm": 1.7217390537261963, "learning_rate": 0.0001, "loss": 0.017, "step": 114700 }, { "epoch": 754.671052631579, "grad_norm": 1.4161754846572876, "learning_rate": 0.0001, "loss": 0.0117, "step": 114710 }, { "epoch": 754.7368421052631, "grad_norm": 0.9962689876556396, "learning_rate": 0.0001, "loss": 0.0136, "step": 114720 }, { "epoch": 754.8026315789474, "grad_norm": 1.20786452293396, "learning_rate": 0.0001, "loss": 0.0127, "step": 114730 }, { "epoch": 754.8684210526316, "grad_norm": 1.9926972389221191, "learning_rate": 0.0001, "loss": 0.0155, "step": 114740 }, { "epoch": 754.9342105263158, "grad_norm": 1.2991108894348145, "learning_rate": 0.0001, "loss": 0.0119, "step": 114750 }, { "epoch": 755.0, "grad_norm": 1.155897855758667, "learning_rate": 0.0001, "loss": 0.0121, "step": 114760 }, { "epoch": 755.0657894736842, "grad_norm": 1.396903395652771, "learning_rate": 0.0001, "loss": 0.0117, "step": 114770 }, { "epoch": 755.1315789473684, "grad_norm": 1.324428677558899, "learning_rate": 0.0001, "loss": 0.0131, "step": 114780 }, { "epoch": 755.1973684210526, "grad_norm": 1.430230736732483, "learning_rate": 0.0001, "loss": 0.0137, "step": 114790 }, { "epoch": 755.2631578947369, "grad_norm": 1.3362177610397339, "learning_rate": 0.0001, "loss": 0.0104, "step": 114800 }, { "epoch": 755.328947368421, "grad_norm": 1.652655839920044, "learning_rate": 0.0001, "loss": 0.016, "step": 114810 }, { "epoch": 755.3947368421053, "grad_norm": 1.7260407209396362, "learning_rate": 0.0001, "loss": 0.0136, "step": 114820 }, { "epoch": 755.4605263157895, "grad_norm": 1.1691193580627441, "learning_rate": 0.0001, "loss": 0.017, "step": 114830 }, { "epoch": 755.5263157894736, "grad_norm": 1.3465192317962646, "learning_rate": 0.0001, "loss": 0.0117, "step": 114840 }, { "epoch": 755.5921052631579, "grad_norm": 1.4757519960403442, "learning_rate": 0.0001, "loss": 0.0141, "step": 114850 }, { "epoch": 755.6578947368421, "grad_norm": 1.254733681678772, "learning_rate": 0.0001, "loss": 0.0163, "step": 114860 }, { "epoch": 755.7236842105264, "grad_norm": 1.4800572395324707, "learning_rate": 0.0001, "loss": 0.0133, "step": 114870 }, { "epoch": 755.7894736842105, "grad_norm": 1.8786811828613281, "learning_rate": 0.0001, "loss": 0.0106, "step": 114880 }, { "epoch": 755.8552631578947, "grad_norm": 1.5072282552719116, "learning_rate": 0.0001, "loss": 0.0147, "step": 114890 }, { "epoch": 755.921052631579, "grad_norm": 1.6870568990707397, "learning_rate": 0.0001, "loss": 0.0135, "step": 114900 }, { "epoch": 755.9868421052631, "grad_norm": 1.7055318355560303, "learning_rate": 0.0001, "loss": 0.0142, "step": 114910 }, { "epoch": 756.0526315789474, "grad_norm": 1.4759787321090698, "learning_rate": 0.0001, "loss": 0.0159, "step": 114920 }, { "epoch": 756.1184210526316, "grad_norm": 1.2329474687576294, "learning_rate": 0.0001, "loss": 0.0158, "step": 114930 }, { "epoch": 756.1842105263158, "grad_norm": 1.549002766609192, "learning_rate": 0.0001, "loss": 0.0141, "step": 114940 }, { "epoch": 756.25, "grad_norm": 1.5119925737380981, "learning_rate": 0.0001, "loss": 0.0127, "step": 114950 }, { "epoch": 756.3157894736842, "grad_norm": 1.2567940950393677, "learning_rate": 0.0001, "loss": 0.0137, "step": 114960 }, { "epoch": 756.3815789473684, "grad_norm": 1.2971874475479126, "learning_rate": 0.0001, "loss": 0.0107, "step": 114970 }, { "epoch": 756.4473684210526, "grad_norm": 1.164299488067627, "learning_rate": 0.0001, "loss": 0.0114, "step": 114980 }, { "epoch": 756.5131578947369, "grad_norm": 1.7664185762405396, "learning_rate": 0.0001, "loss": 0.0167, "step": 114990 }, { "epoch": 756.578947368421, "grad_norm": 1.7384214401245117, "learning_rate": 0.0001, "loss": 0.0113, "step": 115000 }, { "epoch": 756.6447368421053, "grad_norm": 1.3337302207946777, "learning_rate": 0.0001, "loss": 0.0132, "step": 115010 }, { "epoch": 756.7105263157895, "grad_norm": 1.4472185373306274, "learning_rate": 0.0001, "loss": 0.0124, "step": 115020 }, { "epoch": 756.7763157894736, "grad_norm": 1.2604540586471558, "learning_rate": 0.0001, "loss": 0.0142, "step": 115030 }, { "epoch": 756.8421052631579, "grad_norm": 1.211103081703186, "learning_rate": 0.0001, "loss": 0.012, "step": 115040 }, { "epoch": 756.9078947368421, "grad_norm": 1.5277022123336792, "learning_rate": 0.0001, "loss": 0.015, "step": 115050 }, { "epoch": 756.9736842105264, "grad_norm": 1.6016775369644165, "learning_rate": 0.0001, "loss": 0.0138, "step": 115060 }, { "epoch": 757.0394736842105, "grad_norm": 1.4296729564666748, "learning_rate": 0.0001, "loss": 0.0117, "step": 115070 }, { "epoch": 757.1052631578947, "grad_norm": 1.3691836595535278, "learning_rate": 0.0001, "loss": 0.0123, "step": 115080 }, { "epoch": 757.171052631579, "grad_norm": 1.6587977409362793, "learning_rate": 0.0001, "loss": 0.0105, "step": 115090 }, { "epoch": 757.2368421052631, "grad_norm": 1.289591908454895, "learning_rate": 0.0001, "loss": 0.0146, "step": 115100 }, { "epoch": 757.3026315789474, "grad_norm": 1.5047051906585693, "learning_rate": 0.0001, "loss": 0.0135, "step": 115110 }, { "epoch": 757.3684210526316, "grad_norm": 2.07127046585083, "learning_rate": 0.0001, "loss": 0.014, "step": 115120 }, { "epoch": 757.4342105263158, "grad_norm": 1.139726161956787, "learning_rate": 0.0001, "loss": 0.0165, "step": 115130 }, { "epoch": 757.5, "grad_norm": 1.29218590259552, "learning_rate": 0.0001, "loss": 0.0134, "step": 115140 }, { "epoch": 757.5657894736842, "grad_norm": 1.677734375, "learning_rate": 0.0001, "loss": 0.0133, "step": 115150 }, { "epoch": 757.6315789473684, "grad_norm": 1.2400012016296387, "learning_rate": 0.0001, "loss": 0.0129, "step": 115160 }, { "epoch": 757.6973684210526, "grad_norm": 1.4256808757781982, "learning_rate": 0.0001, "loss": 0.0142, "step": 115170 }, { "epoch": 757.7631578947369, "grad_norm": 0.8407680988311768, "learning_rate": 0.0001, "loss": 0.0124, "step": 115180 }, { "epoch": 757.828947368421, "grad_norm": 1.248169183731079, "learning_rate": 0.0001, "loss": 0.0155, "step": 115190 }, { "epoch": 757.8947368421053, "grad_norm": 1.6199369430541992, "learning_rate": 0.0001, "loss": 0.0117, "step": 115200 }, { "epoch": 757.9605263157895, "grad_norm": 1.1477497816085815, "learning_rate": 0.0001, "loss": 0.0163, "step": 115210 }, { "epoch": 758.0263157894736, "grad_norm": 1.3836077451705933, "learning_rate": 0.0001, "loss": 0.0122, "step": 115220 }, { "epoch": 758.0921052631579, "grad_norm": 1.3531805276870728, "learning_rate": 0.0001, "loss": 0.0125, "step": 115230 }, { "epoch": 758.1578947368421, "grad_norm": 1.5251773595809937, "learning_rate": 0.0001, "loss": 0.0135, "step": 115240 }, { "epoch": 758.2236842105264, "grad_norm": 1.3760857582092285, "learning_rate": 0.0001, "loss": 0.0156, "step": 115250 }, { "epoch": 758.2894736842105, "grad_norm": 1.4372551441192627, "learning_rate": 0.0001, "loss": 0.0168, "step": 115260 }, { "epoch": 758.3552631578947, "grad_norm": 1.2601507902145386, "learning_rate": 0.0001, "loss": 0.0138, "step": 115270 }, { "epoch": 758.421052631579, "grad_norm": 1.2951611280441284, "learning_rate": 0.0001, "loss": 0.0124, "step": 115280 }, { "epoch": 758.4868421052631, "grad_norm": 1.513798713684082, "learning_rate": 0.0001, "loss": 0.0142, "step": 115290 }, { "epoch": 758.5526315789474, "grad_norm": 1.38850998878479, "learning_rate": 0.0001, "loss": 0.016, "step": 115300 }, { "epoch": 758.6184210526316, "grad_norm": 1.4899370670318604, "learning_rate": 0.0001, "loss": 0.016, "step": 115310 }, { "epoch": 758.6842105263158, "grad_norm": 1.1612995862960815, "learning_rate": 0.0001, "loss": 0.0114, "step": 115320 }, { "epoch": 758.75, "grad_norm": 1.2975088357925415, "learning_rate": 0.0001, "loss": 0.0102, "step": 115330 }, { "epoch": 758.8157894736842, "grad_norm": 1.0939162969589233, "learning_rate": 0.0001, "loss": 0.0127, "step": 115340 }, { "epoch": 758.8815789473684, "grad_norm": 1.6875972747802734, "learning_rate": 0.0001, "loss": 0.0155, "step": 115350 }, { "epoch": 758.9473684210526, "grad_norm": 1.1690644025802612, "learning_rate": 0.0001, "loss": 0.0103, "step": 115360 }, { "epoch": 759.0131578947369, "grad_norm": 1.4387016296386719, "learning_rate": 0.0001, "loss": 0.0145, "step": 115370 }, { "epoch": 759.078947368421, "grad_norm": 1.5792449712753296, "learning_rate": 0.0001, "loss": 0.0101, "step": 115380 }, { "epoch": 759.1447368421053, "grad_norm": 1.7125853300094604, "learning_rate": 0.0001, "loss": 0.0114, "step": 115390 }, { "epoch": 759.2105263157895, "grad_norm": 1.746486783027649, "learning_rate": 0.0001, "loss": 0.0157, "step": 115400 }, { "epoch": 759.2763157894736, "grad_norm": 1.8166251182556152, "learning_rate": 0.0001, "loss": 0.0142, "step": 115410 }, { "epoch": 759.3421052631579, "grad_norm": 1.7389289140701294, "learning_rate": 0.0001, "loss": 0.0114, "step": 115420 }, { "epoch": 759.4078947368421, "grad_norm": 1.4865212440490723, "learning_rate": 0.0001, "loss": 0.0118, "step": 115430 }, { "epoch": 759.4736842105264, "grad_norm": 1.3436567783355713, "learning_rate": 0.0001, "loss": 0.0148, "step": 115440 }, { "epoch": 759.5394736842105, "grad_norm": 1.9899193048477173, "learning_rate": 0.0001, "loss": 0.0124, "step": 115450 }, { "epoch": 759.6052631578947, "grad_norm": 2.006760358810425, "learning_rate": 0.0001, "loss": 0.0165, "step": 115460 }, { "epoch": 759.671052631579, "grad_norm": 1.7967689037322998, "learning_rate": 0.0001, "loss": 0.0145, "step": 115470 }, { "epoch": 759.7368421052631, "grad_norm": 1.385080337524414, "learning_rate": 0.0001, "loss": 0.0111, "step": 115480 }, { "epoch": 759.8026315789474, "grad_norm": 1.7148078680038452, "learning_rate": 0.0001, "loss": 0.017, "step": 115490 }, { "epoch": 759.8684210526316, "grad_norm": 1.345892071723938, "learning_rate": 0.0001, "loss": 0.0146, "step": 115500 }, { "epoch": 759.9342105263158, "grad_norm": 1.8605700731277466, "learning_rate": 0.0001, "loss": 0.0107, "step": 115510 }, { "epoch": 760.0, "grad_norm": 1.1709274053573608, "learning_rate": 0.0001, "loss": 0.0134, "step": 115520 }, { "epoch": 760.0657894736842, "grad_norm": 1.8322834968566895, "learning_rate": 0.0001, "loss": 0.0131, "step": 115530 }, { "epoch": 760.1315789473684, "grad_norm": 1.4383184909820557, "learning_rate": 0.0001, "loss": 0.013, "step": 115540 }, { "epoch": 760.1973684210526, "grad_norm": 1.8099300861358643, "learning_rate": 0.0001, "loss": 0.018, "step": 115550 }, { "epoch": 760.2631578947369, "grad_norm": 2.1003401279449463, "learning_rate": 0.0001, "loss": 0.0143, "step": 115560 }, { "epoch": 760.328947368421, "grad_norm": 1.9161313772201538, "learning_rate": 0.0001, "loss": 0.0113, "step": 115570 }, { "epoch": 760.3947368421053, "grad_norm": 1.757474422454834, "learning_rate": 0.0001, "loss": 0.0107, "step": 115580 }, { "epoch": 760.4605263157895, "grad_norm": 1.7021598815917969, "learning_rate": 0.0001, "loss": 0.0126, "step": 115590 }, { "epoch": 760.5263157894736, "grad_norm": 1.2544362545013428, "learning_rate": 0.0001, "loss": 0.0104, "step": 115600 }, { "epoch": 760.5921052631579, "grad_norm": 1.7577154636383057, "learning_rate": 0.0001, "loss": 0.0114, "step": 115610 }, { "epoch": 760.6578947368421, "grad_norm": 2.070103168487549, "learning_rate": 0.0001, "loss": 0.0111, "step": 115620 }, { "epoch": 760.7236842105264, "grad_norm": 1.8935545682907104, "learning_rate": 0.0001, "loss": 0.0122, "step": 115630 }, { "epoch": 760.7894736842105, "grad_norm": 1.1977072954177856, "learning_rate": 0.0001, "loss": 0.0122, "step": 115640 }, { "epoch": 760.8552631578947, "grad_norm": 1.585807204246521, "learning_rate": 0.0001, "loss": 0.0142, "step": 115650 }, { "epoch": 760.921052631579, "grad_norm": 1.6161839962005615, "learning_rate": 0.0001, "loss": 0.0175, "step": 115660 }, { "epoch": 760.9868421052631, "grad_norm": 1.1609266996383667, "learning_rate": 0.0001, "loss": 0.0134, "step": 115670 }, { "epoch": 761.0526315789474, "grad_norm": 0.9137687087059021, "learning_rate": 0.0001, "loss": 0.0142, "step": 115680 }, { "epoch": 761.1184210526316, "grad_norm": 1.338621735572815, "learning_rate": 0.0001, "loss": 0.0135, "step": 115690 }, { "epoch": 761.1842105263158, "grad_norm": 1.5163878202438354, "learning_rate": 0.0001, "loss": 0.0175, "step": 115700 }, { "epoch": 761.25, "grad_norm": 1.8290154933929443, "learning_rate": 0.0001, "loss": 0.0139, "step": 115710 }, { "epoch": 761.3157894736842, "grad_norm": 1.3525278568267822, "learning_rate": 0.0001, "loss": 0.0134, "step": 115720 }, { "epoch": 761.3815789473684, "grad_norm": 1.670906662940979, "learning_rate": 0.0001, "loss": 0.0134, "step": 115730 }, { "epoch": 761.4473684210526, "grad_norm": 1.3485288619995117, "learning_rate": 0.0001, "loss": 0.0108, "step": 115740 }, { "epoch": 761.5131578947369, "grad_norm": 1.2950325012207031, "learning_rate": 0.0001, "loss": 0.013, "step": 115750 }, { "epoch": 761.578947368421, "grad_norm": 1.536702036857605, "learning_rate": 0.0001, "loss": 0.0118, "step": 115760 }, { "epoch": 761.6447368421053, "grad_norm": 1.2606921195983887, "learning_rate": 0.0001, "loss": 0.0127, "step": 115770 }, { "epoch": 761.7105263157895, "grad_norm": 1.7217456102371216, "learning_rate": 0.0001, "loss": 0.0125, "step": 115780 }, { "epoch": 761.7763157894736, "grad_norm": 1.335617184638977, "learning_rate": 0.0001, "loss": 0.0143, "step": 115790 }, { "epoch": 761.8421052631579, "grad_norm": 1.479178547859192, "learning_rate": 0.0001, "loss": 0.0111, "step": 115800 }, { "epoch": 761.9078947368421, "grad_norm": 1.2410250902175903, "learning_rate": 0.0001, "loss": 0.0099, "step": 115810 }, { "epoch": 761.9736842105264, "grad_norm": 1.6412770748138428, "learning_rate": 0.0001, "loss": 0.0175, "step": 115820 }, { "epoch": 762.0394736842105, "grad_norm": 1.6634429693222046, "learning_rate": 0.0001, "loss": 0.0127, "step": 115830 }, { "epoch": 762.1052631578947, "grad_norm": 1.3156737089157104, "learning_rate": 0.0001, "loss": 0.0136, "step": 115840 }, { "epoch": 762.171052631579, "grad_norm": 1.6519899368286133, "learning_rate": 0.0001, "loss": 0.0115, "step": 115850 }, { "epoch": 762.2368421052631, "grad_norm": 1.1107553243637085, "learning_rate": 0.0001, "loss": 0.0119, "step": 115860 }, { "epoch": 762.3026315789474, "grad_norm": 1.2911436557769775, "learning_rate": 0.0001, "loss": 0.0126, "step": 115870 }, { "epoch": 762.3684210526316, "grad_norm": 1.4803091287612915, "learning_rate": 0.0001, "loss": 0.0139, "step": 115880 }, { "epoch": 762.4342105263158, "grad_norm": 1.786361575126648, "learning_rate": 0.0001, "loss": 0.0151, "step": 115890 }, { "epoch": 762.5, "grad_norm": 1.7753149271011353, "learning_rate": 0.0001, "loss": 0.0163, "step": 115900 }, { "epoch": 762.5657894736842, "grad_norm": 1.8140769004821777, "learning_rate": 0.0001, "loss": 0.0106, "step": 115910 }, { "epoch": 762.6315789473684, "grad_norm": 1.524635672569275, "learning_rate": 0.0001, "loss": 0.0133, "step": 115920 }, { "epoch": 762.6973684210526, "grad_norm": 1.681121587753296, "learning_rate": 0.0001, "loss": 0.0153, "step": 115930 }, { "epoch": 762.7631578947369, "grad_norm": 1.005338191986084, "learning_rate": 0.0001, "loss": 0.0129, "step": 115940 }, { "epoch": 762.828947368421, "grad_norm": 1.7497549057006836, "learning_rate": 0.0001, "loss": 0.0136, "step": 115950 }, { "epoch": 762.8947368421053, "grad_norm": 1.4876197576522827, "learning_rate": 0.0001, "loss": 0.013, "step": 115960 }, { "epoch": 762.9605263157895, "grad_norm": 1.509586215019226, "learning_rate": 0.0001, "loss": 0.0166, "step": 115970 }, { "epoch": 763.0263157894736, "grad_norm": 1.7336297035217285, "learning_rate": 0.0001, "loss": 0.0125, "step": 115980 }, { "epoch": 763.0921052631579, "grad_norm": 1.344208836555481, "learning_rate": 0.0001, "loss": 0.0142, "step": 115990 }, { "epoch": 763.1578947368421, "grad_norm": 1.887645959854126, "learning_rate": 0.0001, "loss": 0.0118, "step": 116000 }, { "epoch": 763.2236842105264, "grad_norm": 1.4496312141418457, "learning_rate": 0.0001, "loss": 0.0152, "step": 116010 }, { "epoch": 763.2894736842105, "grad_norm": 1.6610205173492432, "learning_rate": 0.0001, "loss": 0.0142, "step": 116020 }, { "epoch": 763.3552631578947, "grad_norm": 1.309536099433899, "learning_rate": 0.0001, "loss": 0.0153, "step": 116030 }, { "epoch": 763.421052631579, "grad_norm": 1.3353288173675537, "learning_rate": 0.0001, "loss": 0.013, "step": 116040 }, { "epoch": 763.4868421052631, "grad_norm": 1.569358229637146, "learning_rate": 0.0001, "loss": 0.0117, "step": 116050 }, { "epoch": 763.5526315789474, "grad_norm": 1.1409926414489746, "learning_rate": 0.0001, "loss": 0.0162, "step": 116060 }, { "epoch": 763.6184210526316, "grad_norm": 1.2419562339782715, "learning_rate": 0.0001, "loss": 0.0109, "step": 116070 }, { "epoch": 763.6842105263158, "grad_norm": 1.5334362983703613, "learning_rate": 0.0001, "loss": 0.0133, "step": 116080 }, { "epoch": 763.75, "grad_norm": 1.5338636636734009, "learning_rate": 0.0001, "loss": 0.0158, "step": 116090 }, { "epoch": 763.8157894736842, "grad_norm": 1.4394251108169556, "learning_rate": 0.0001, "loss": 0.0136, "step": 116100 }, { "epoch": 763.8815789473684, "grad_norm": 1.4077354669570923, "learning_rate": 0.0001, "loss": 0.0133, "step": 116110 }, { "epoch": 763.9473684210526, "grad_norm": 1.7473666667938232, "learning_rate": 0.0001, "loss": 0.0119, "step": 116120 }, { "epoch": 764.0131578947369, "grad_norm": 1.2797209024429321, "learning_rate": 0.0001, "loss": 0.0157, "step": 116130 }, { "epoch": 764.078947368421, "grad_norm": 1.2473173141479492, "learning_rate": 0.0001, "loss": 0.0131, "step": 116140 }, { "epoch": 764.1447368421053, "grad_norm": 1.1505117416381836, "learning_rate": 0.0001, "loss": 0.011, "step": 116150 }, { "epoch": 764.2105263157895, "grad_norm": 1.3675761222839355, "learning_rate": 0.0001, "loss": 0.013, "step": 116160 }, { "epoch": 764.2763157894736, "grad_norm": 1.4423938989639282, "learning_rate": 0.0001, "loss": 0.011, "step": 116170 }, { "epoch": 764.3421052631579, "grad_norm": 1.267307162284851, "learning_rate": 0.0001, "loss": 0.0166, "step": 116180 }, { "epoch": 764.4078947368421, "grad_norm": 1.8121198415756226, "learning_rate": 0.0001, "loss": 0.0142, "step": 116190 }, { "epoch": 764.4736842105264, "grad_norm": 1.5926166772842407, "learning_rate": 0.0001, "loss": 0.0146, "step": 116200 }, { "epoch": 764.5394736842105, "grad_norm": 1.57331120967865, "learning_rate": 0.0001, "loss": 0.0188, "step": 116210 }, { "epoch": 764.6052631578947, "grad_norm": 1.300418734550476, "learning_rate": 0.0001, "loss": 0.0157, "step": 116220 }, { "epoch": 764.671052631579, "grad_norm": 1.5102616548538208, "learning_rate": 0.0001, "loss": 0.0121, "step": 116230 }, { "epoch": 764.7368421052631, "grad_norm": 1.753910779953003, "learning_rate": 0.0001, "loss": 0.0154, "step": 116240 }, { "epoch": 764.8026315789474, "grad_norm": 1.449820876121521, "learning_rate": 0.0001, "loss": 0.014, "step": 116250 }, { "epoch": 764.8684210526316, "grad_norm": 1.5706682205200195, "learning_rate": 0.0001, "loss": 0.0116, "step": 116260 }, { "epoch": 764.9342105263158, "grad_norm": 1.4758108854293823, "learning_rate": 0.0001, "loss": 0.0126, "step": 116270 }, { "epoch": 765.0, "grad_norm": 1.470518708229065, "learning_rate": 0.0001, "loss": 0.0105, "step": 116280 }, { "epoch": 765.0657894736842, "grad_norm": 0.9701679944992065, "learning_rate": 0.0001, "loss": 0.0197, "step": 116290 }, { "epoch": 765.1315789473684, "grad_norm": 1.8675698041915894, "learning_rate": 0.0001, "loss": 0.0124, "step": 116300 }, { "epoch": 765.1973684210526, "grad_norm": 1.1924515962600708, "learning_rate": 0.0001, "loss": 0.0126, "step": 116310 }, { "epoch": 765.2631578947369, "grad_norm": 1.3718541860580444, "learning_rate": 0.0001, "loss": 0.02, "step": 116320 }, { "epoch": 765.328947368421, "grad_norm": 1.6736418008804321, "learning_rate": 0.0001, "loss": 0.0102, "step": 116330 }, { "epoch": 765.3947368421053, "grad_norm": 1.367605209350586, "learning_rate": 0.0001, "loss": 0.0122, "step": 116340 }, { "epoch": 765.4605263157895, "grad_norm": 1.7448804378509521, "learning_rate": 0.0001, "loss": 0.0146, "step": 116350 }, { "epoch": 765.5263157894736, "grad_norm": 1.391544222831726, "learning_rate": 0.0001, "loss": 0.0116, "step": 116360 }, { "epoch": 765.5921052631579, "grad_norm": 1.1744329929351807, "learning_rate": 0.0001, "loss": 0.0127, "step": 116370 }, { "epoch": 765.6578947368421, "grad_norm": 1.0976762771606445, "learning_rate": 0.0001, "loss": 0.0141, "step": 116380 }, { "epoch": 765.7236842105264, "grad_norm": 1.5358786582946777, "learning_rate": 0.0001, "loss": 0.0192, "step": 116390 }, { "epoch": 765.7894736842105, "grad_norm": 1.9238945245742798, "learning_rate": 0.0001, "loss": 0.0115, "step": 116400 }, { "epoch": 765.8552631578947, "grad_norm": 1.9612014293670654, "learning_rate": 0.0001, "loss": 0.0105, "step": 116410 }, { "epoch": 765.921052631579, "grad_norm": 1.7411386966705322, "learning_rate": 0.0001, "loss": 0.0134, "step": 116420 }, { "epoch": 765.9868421052631, "grad_norm": 1.2119123935699463, "learning_rate": 0.0001, "loss": 0.0122, "step": 116430 }, { "epoch": 766.0526315789474, "grad_norm": 1.4036630392074585, "learning_rate": 0.0001, "loss": 0.0124, "step": 116440 }, { "epoch": 766.1184210526316, "grad_norm": 1.415610671043396, "learning_rate": 0.0001, "loss": 0.016, "step": 116450 }, { "epoch": 766.1842105263158, "grad_norm": 1.4415749311447144, "learning_rate": 0.0001, "loss": 0.0135, "step": 116460 }, { "epoch": 766.25, "grad_norm": 1.2778396606445312, "learning_rate": 0.0001, "loss": 0.0154, "step": 116470 }, { "epoch": 766.3157894736842, "grad_norm": 1.3151251077651978, "learning_rate": 0.0001, "loss": 0.0117, "step": 116480 }, { "epoch": 766.3815789473684, "grad_norm": 1.1031017303466797, "learning_rate": 0.0001, "loss": 0.0148, "step": 116490 }, { "epoch": 766.4473684210526, "grad_norm": 1.5192748308181763, "learning_rate": 0.0001, "loss": 0.0143, "step": 116500 }, { "epoch": 766.5131578947369, "grad_norm": 1.173582911491394, "learning_rate": 0.0001, "loss": 0.0172, "step": 116510 }, { "epoch": 766.578947368421, "grad_norm": 1.2967286109924316, "learning_rate": 0.0001, "loss": 0.0132, "step": 116520 }, { "epoch": 766.6447368421053, "grad_norm": 1.197953224182129, "learning_rate": 0.0001, "loss": 0.0113, "step": 116530 }, { "epoch": 766.7105263157895, "grad_norm": 1.1580605506896973, "learning_rate": 0.0001, "loss": 0.0101, "step": 116540 }, { "epoch": 766.7763157894736, "grad_norm": 1.703427791595459, "learning_rate": 0.0001, "loss": 0.0136, "step": 116550 }, { "epoch": 766.8421052631579, "grad_norm": 1.4414206743240356, "learning_rate": 0.0001, "loss": 0.0138, "step": 116560 }, { "epoch": 766.9078947368421, "grad_norm": 1.5694704055786133, "learning_rate": 0.0001, "loss": 0.0125, "step": 116570 }, { "epoch": 766.9736842105264, "grad_norm": 1.5687357187271118, "learning_rate": 0.0001, "loss": 0.0166, "step": 116580 }, { "epoch": 767.0394736842105, "grad_norm": 1.5816720724105835, "learning_rate": 0.0001, "loss": 0.0164, "step": 116590 }, { "epoch": 767.1052631578947, "grad_norm": 1.7364790439605713, "learning_rate": 0.0001, "loss": 0.0165, "step": 116600 }, { "epoch": 767.171052631579, "grad_norm": 1.3746320009231567, "learning_rate": 0.0001, "loss": 0.0119, "step": 116610 }, { "epoch": 767.2368421052631, "grad_norm": 1.3334177732467651, "learning_rate": 0.0001, "loss": 0.0148, "step": 116620 }, { "epoch": 767.3026315789474, "grad_norm": 1.439239263534546, "learning_rate": 0.0001, "loss": 0.0111, "step": 116630 }, { "epoch": 767.3684210526316, "grad_norm": 1.6203571557998657, "learning_rate": 0.0001, "loss": 0.0122, "step": 116640 }, { "epoch": 767.4342105263158, "grad_norm": 1.5267689228057861, "learning_rate": 0.0001, "loss": 0.0125, "step": 116650 }, { "epoch": 767.5, "grad_norm": 1.6829216480255127, "learning_rate": 0.0001, "loss": 0.0162, "step": 116660 }, { "epoch": 767.5657894736842, "grad_norm": 1.2992626428604126, "learning_rate": 0.0001, "loss": 0.0128, "step": 116670 }, { "epoch": 767.6315789473684, "grad_norm": 1.3694472312927246, "learning_rate": 0.0001, "loss": 0.0114, "step": 116680 }, { "epoch": 767.6973684210526, "grad_norm": 1.624711036682129, "learning_rate": 0.0001, "loss": 0.0162, "step": 116690 }, { "epoch": 767.7631578947369, "grad_norm": 1.430038332939148, "learning_rate": 0.0001, "loss": 0.0122, "step": 116700 }, { "epoch": 767.828947368421, "grad_norm": 1.2785778045654297, "learning_rate": 0.0001, "loss": 0.015, "step": 116710 }, { "epoch": 767.8947368421053, "grad_norm": 1.4057101011276245, "learning_rate": 0.0001, "loss": 0.0105, "step": 116720 }, { "epoch": 767.9605263157895, "grad_norm": 1.3557556867599487, "learning_rate": 0.0001, "loss": 0.0116, "step": 116730 }, { "epoch": 768.0263157894736, "grad_norm": 1.5976324081420898, "learning_rate": 0.0001, "loss": 0.0137, "step": 116740 }, { "epoch": 768.0921052631579, "grad_norm": 1.2013660669326782, "learning_rate": 0.0001, "loss": 0.0152, "step": 116750 }, { "epoch": 768.1578947368421, "grad_norm": 1.5581332445144653, "learning_rate": 0.0001, "loss": 0.0104, "step": 116760 }, { "epoch": 768.2236842105264, "grad_norm": 1.9331238269805908, "learning_rate": 0.0001, "loss": 0.0097, "step": 116770 }, { "epoch": 768.2894736842105, "grad_norm": 1.5434595346450806, "learning_rate": 0.0001, "loss": 0.0149, "step": 116780 }, { "epoch": 768.3552631578947, "grad_norm": 1.9125837087631226, "learning_rate": 0.0001, "loss": 0.0136, "step": 116790 }, { "epoch": 768.421052631579, "grad_norm": 1.7247434854507446, "learning_rate": 0.0001, "loss": 0.0101, "step": 116800 }, { "epoch": 768.4868421052631, "grad_norm": 1.0045006275177002, "learning_rate": 0.0001, "loss": 0.0133, "step": 116810 }, { "epoch": 768.5526315789474, "grad_norm": 1.6424813270568848, "learning_rate": 0.0001, "loss": 0.0121, "step": 116820 }, { "epoch": 768.6184210526316, "grad_norm": 1.5636277198791504, "learning_rate": 0.0001, "loss": 0.0161, "step": 116830 }, { "epoch": 768.6842105263158, "grad_norm": 1.3763707876205444, "learning_rate": 0.0001, "loss": 0.013, "step": 116840 }, { "epoch": 768.75, "grad_norm": 1.8954646587371826, "learning_rate": 0.0001, "loss": 0.0116, "step": 116850 }, { "epoch": 768.8157894736842, "grad_norm": 1.373198390007019, "learning_rate": 0.0001, "loss": 0.0151, "step": 116860 }, { "epoch": 768.8815789473684, "grad_norm": 1.3614428043365479, "learning_rate": 0.0001, "loss": 0.0149, "step": 116870 }, { "epoch": 768.9473684210526, "grad_norm": 1.3185113668441772, "learning_rate": 0.0001, "loss": 0.0122, "step": 116880 }, { "epoch": 769.0131578947369, "grad_norm": 1.5464001893997192, "learning_rate": 0.0001, "loss": 0.0188, "step": 116890 }, { "epoch": 769.078947368421, "grad_norm": 1.581989049911499, "learning_rate": 0.0001, "loss": 0.0128, "step": 116900 }, { "epoch": 769.1447368421053, "grad_norm": 1.4418747425079346, "learning_rate": 0.0001, "loss": 0.0133, "step": 116910 }, { "epoch": 769.2105263157895, "grad_norm": 1.1459062099456787, "learning_rate": 0.0001, "loss": 0.012, "step": 116920 }, { "epoch": 769.2763157894736, "grad_norm": 1.6370776891708374, "learning_rate": 0.0001, "loss": 0.0175, "step": 116930 }, { "epoch": 769.3421052631579, "grad_norm": 1.294480800628662, "learning_rate": 0.0001, "loss": 0.0128, "step": 116940 }, { "epoch": 769.4078947368421, "grad_norm": 3.1437325477600098, "learning_rate": 0.0001, "loss": 0.0143, "step": 116950 }, { "epoch": 769.4736842105264, "grad_norm": 2.6259191036224365, "learning_rate": 0.0001, "loss": 0.0134, "step": 116960 }, { "epoch": 769.5394736842105, "grad_norm": 1.3931934833526611, "learning_rate": 0.0001, "loss": 0.0132, "step": 116970 }, { "epoch": 769.6052631578947, "grad_norm": 1.6558914184570312, "learning_rate": 0.0001, "loss": 0.0123, "step": 116980 }, { "epoch": 769.671052631579, "grad_norm": 1.6279538869857788, "learning_rate": 0.0001, "loss": 0.0161, "step": 116990 }, { "epoch": 769.7368421052631, "grad_norm": 1.6914241313934326, "learning_rate": 0.0001, "loss": 0.0114, "step": 117000 }, { "epoch": 769.8026315789474, "grad_norm": 1.5737208127975464, "learning_rate": 0.0001, "loss": 0.0148, "step": 117010 }, { "epoch": 769.8684210526316, "grad_norm": 2.0009522438049316, "learning_rate": 0.0001, "loss": 0.0147, "step": 117020 }, { "epoch": 769.9342105263158, "grad_norm": 1.560577392578125, "learning_rate": 0.0001, "loss": 0.0133, "step": 117030 }, { "epoch": 770.0, "grad_norm": 1.4144545793533325, "learning_rate": 0.0001, "loss": 0.0134, "step": 117040 }, { "epoch": 770.0657894736842, "grad_norm": 1.351479172706604, "learning_rate": 0.0001, "loss": 0.0116, "step": 117050 }, { "epoch": 770.1315789473684, "grad_norm": 1.8637580871582031, "learning_rate": 0.0001, "loss": 0.0115, "step": 117060 }, { "epoch": 770.1973684210526, "grad_norm": 1.8042832612991333, "learning_rate": 0.0001, "loss": 0.0148, "step": 117070 }, { "epoch": 770.2631578947369, "grad_norm": 1.364928960800171, "learning_rate": 0.0001, "loss": 0.0119, "step": 117080 }, { "epoch": 770.328947368421, "grad_norm": 1.5357133150100708, "learning_rate": 0.0001, "loss": 0.0132, "step": 117090 }, { "epoch": 770.3947368421053, "grad_norm": 1.539341926574707, "learning_rate": 0.0001, "loss": 0.0127, "step": 117100 }, { "epoch": 770.4605263157895, "grad_norm": 1.4033831357955933, "learning_rate": 0.0001, "loss": 0.0118, "step": 117110 }, { "epoch": 770.5263157894736, "grad_norm": 1.5676010847091675, "learning_rate": 0.0001, "loss": 0.0171, "step": 117120 }, { "epoch": 770.5921052631579, "grad_norm": 1.6652369499206543, "learning_rate": 0.0001, "loss": 0.0146, "step": 117130 }, { "epoch": 770.6578947368421, "grad_norm": 1.6845463514328003, "learning_rate": 0.0001, "loss": 0.0152, "step": 117140 }, { "epoch": 770.7236842105264, "grad_norm": 1.6530238389968872, "learning_rate": 0.0001, "loss": 0.0138, "step": 117150 }, { "epoch": 770.7894736842105, "grad_norm": 1.4085458517074585, "learning_rate": 0.0001, "loss": 0.0138, "step": 117160 }, { "epoch": 770.8552631578947, "grad_norm": 1.4217449426651, "learning_rate": 0.0001, "loss": 0.012, "step": 117170 }, { "epoch": 770.921052631579, "grad_norm": 1.957821011543274, "learning_rate": 0.0001, "loss": 0.0107, "step": 117180 }, { "epoch": 770.9868421052631, "grad_norm": 1.4035742282867432, "learning_rate": 0.0001, "loss": 0.0137, "step": 117190 }, { "epoch": 771.0526315789474, "grad_norm": 1.4982537031173706, "learning_rate": 0.0001, "loss": 0.012, "step": 117200 }, { "epoch": 771.1184210526316, "grad_norm": 1.107571005821228, "learning_rate": 0.0001, "loss": 0.0129, "step": 117210 }, { "epoch": 771.1842105263158, "grad_norm": 1.3563767671585083, "learning_rate": 0.0001, "loss": 0.0148, "step": 117220 }, { "epoch": 771.25, "grad_norm": 1.4091339111328125, "learning_rate": 0.0001, "loss": 0.0158, "step": 117230 }, { "epoch": 771.3157894736842, "grad_norm": 1.0314874649047852, "learning_rate": 0.0001, "loss": 0.0167, "step": 117240 }, { "epoch": 771.3815789473684, "grad_norm": 1.5470324754714966, "learning_rate": 0.0001, "loss": 0.013, "step": 117250 }, { "epoch": 771.4473684210526, "grad_norm": 1.3611994981765747, "learning_rate": 0.0001, "loss": 0.014, "step": 117260 }, { "epoch": 771.5131578947369, "grad_norm": 1.5513129234313965, "learning_rate": 0.0001, "loss": 0.0116, "step": 117270 }, { "epoch": 771.578947368421, "grad_norm": 1.580100178718567, "learning_rate": 0.0001, "loss": 0.0122, "step": 117280 }, { "epoch": 771.6447368421053, "grad_norm": 1.6422499418258667, "learning_rate": 0.0001, "loss": 0.0116, "step": 117290 }, { "epoch": 771.7105263157895, "grad_norm": 1.772924780845642, "learning_rate": 0.0001, "loss": 0.0131, "step": 117300 }, { "epoch": 771.7763157894736, "grad_norm": 1.7212406396865845, "learning_rate": 0.0001, "loss": 0.0133, "step": 117310 }, { "epoch": 771.8421052631579, "grad_norm": 1.3791855573654175, "learning_rate": 0.0001, "loss": 0.015, "step": 117320 }, { "epoch": 771.9078947368421, "grad_norm": 1.424912929534912, "learning_rate": 0.0001, "loss": 0.0102, "step": 117330 }, { "epoch": 771.9736842105264, "grad_norm": 1.8623024225234985, "learning_rate": 0.0001, "loss": 0.0171, "step": 117340 }, { "epoch": 772.0394736842105, "grad_norm": 1.5285959243774414, "learning_rate": 0.0001, "loss": 0.019, "step": 117350 }, { "epoch": 772.1052631578947, "grad_norm": 1.1817857027053833, "learning_rate": 0.0001, "loss": 0.0138, "step": 117360 }, { "epoch": 772.171052631579, "grad_norm": 1.3364428281784058, "learning_rate": 0.0001, "loss": 0.0118, "step": 117370 }, { "epoch": 772.2368421052631, "grad_norm": 1.7025913000106812, "learning_rate": 0.0001, "loss": 0.0114, "step": 117380 }, { "epoch": 772.3026315789474, "grad_norm": 1.6299958229064941, "learning_rate": 0.0001, "loss": 0.0135, "step": 117390 }, { "epoch": 772.3684210526316, "grad_norm": 1.5059096813201904, "learning_rate": 0.0001, "loss": 0.0153, "step": 117400 }, { "epoch": 772.4342105263158, "grad_norm": 1.2705498933792114, "learning_rate": 0.0001, "loss": 0.0106, "step": 117410 }, { "epoch": 772.5, "grad_norm": 1.4868000745773315, "learning_rate": 0.0001, "loss": 0.0148, "step": 117420 }, { "epoch": 772.5657894736842, "grad_norm": 1.3679865598678589, "learning_rate": 0.0001, "loss": 0.0139, "step": 117430 }, { "epoch": 772.6315789473684, "grad_norm": 1.7993791103363037, "learning_rate": 0.0001, "loss": 0.0115, "step": 117440 }, { "epoch": 772.6973684210526, "grad_norm": 2.01479434967041, "learning_rate": 0.0001, "loss": 0.0153, "step": 117450 }, { "epoch": 772.7631578947369, "grad_norm": 1.5392093658447266, "learning_rate": 0.0001, "loss": 0.0116, "step": 117460 }, { "epoch": 772.828947368421, "grad_norm": 1.3245824575424194, "learning_rate": 0.0001, "loss": 0.0118, "step": 117470 }, { "epoch": 772.8947368421053, "grad_norm": 1.4409557580947876, "learning_rate": 0.0001, "loss": 0.0124, "step": 117480 }, { "epoch": 772.9605263157895, "grad_norm": 1.6655147075653076, "learning_rate": 0.0001, "loss": 0.0117, "step": 117490 }, { "epoch": 773.0263157894736, "grad_norm": 1.4174166917800903, "learning_rate": 0.0001, "loss": 0.0142, "step": 117500 }, { "epoch": 773.0921052631579, "grad_norm": 1.4598573446273804, "learning_rate": 0.0001, "loss": 0.0155, "step": 117510 }, { "epoch": 773.1578947368421, "grad_norm": 1.4018471240997314, "learning_rate": 0.0001, "loss": 0.0141, "step": 117520 }, { "epoch": 773.2236842105264, "grad_norm": 1.673929214477539, "learning_rate": 0.0001, "loss": 0.0114, "step": 117530 }, { "epoch": 773.2894736842105, "grad_norm": 1.0261871814727783, "learning_rate": 0.0001, "loss": 0.0148, "step": 117540 }, { "epoch": 773.3552631578947, "grad_norm": 1.2416192293167114, "learning_rate": 0.0001, "loss": 0.0133, "step": 117550 }, { "epoch": 773.421052631579, "grad_norm": 1.328464150428772, "learning_rate": 0.0001, "loss": 0.013, "step": 117560 }, { "epoch": 773.4868421052631, "grad_norm": 1.3915812969207764, "learning_rate": 0.0001, "loss": 0.0157, "step": 117570 }, { "epoch": 773.5526315789474, "grad_norm": 1.5982102155685425, "learning_rate": 0.0001, "loss": 0.0103, "step": 117580 }, { "epoch": 773.6184210526316, "grad_norm": 1.5034774541854858, "learning_rate": 0.0001, "loss": 0.0128, "step": 117590 }, { "epoch": 773.6842105263158, "grad_norm": 1.2717703580856323, "learning_rate": 0.0001, "loss": 0.0147, "step": 117600 }, { "epoch": 773.75, "grad_norm": 1.184706687927246, "learning_rate": 0.0001, "loss": 0.0155, "step": 117610 }, { "epoch": 773.8157894736842, "grad_norm": 1.7915529012680054, "learning_rate": 0.0001, "loss": 0.0135, "step": 117620 }, { "epoch": 773.8815789473684, "grad_norm": 1.19523286819458, "learning_rate": 0.0001, "loss": 0.015, "step": 117630 }, { "epoch": 773.9473684210526, "grad_norm": 0.7927185893058777, "learning_rate": 0.0001, "loss": 0.0114, "step": 117640 }, { "epoch": 774.0131578947369, "grad_norm": 1.3365586996078491, "learning_rate": 0.0001, "loss": 0.0141, "step": 117650 }, { "epoch": 774.078947368421, "grad_norm": 1.6289145946502686, "learning_rate": 0.0001, "loss": 0.0108, "step": 117660 }, { "epoch": 774.1447368421053, "grad_norm": 1.405253291130066, "learning_rate": 0.0001, "loss": 0.0128, "step": 117670 }, { "epoch": 774.2105263157895, "grad_norm": 1.2864230871200562, "learning_rate": 0.0001, "loss": 0.0114, "step": 117680 }, { "epoch": 774.2763157894736, "grad_norm": 1.5748963356018066, "learning_rate": 0.0001, "loss": 0.0141, "step": 117690 }, { "epoch": 774.3421052631579, "grad_norm": 1.330047607421875, "learning_rate": 0.0001, "loss": 0.0157, "step": 117700 }, { "epoch": 774.4078947368421, "grad_norm": 1.1259363889694214, "learning_rate": 0.0001, "loss": 0.0097, "step": 117710 }, { "epoch": 774.4736842105264, "grad_norm": 1.4816442728042603, "learning_rate": 0.0001, "loss": 0.0166, "step": 117720 }, { "epoch": 774.5394736842105, "grad_norm": 1.3534523248672485, "learning_rate": 0.0001, "loss": 0.015, "step": 117730 }, { "epoch": 774.6052631578947, "grad_norm": 1.0433349609375, "learning_rate": 0.0001, "loss": 0.0179, "step": 117740 }, { "epoch": 774.671052631579, "grad_norm": 1.1447511911392212, "learning_rate": 0.0001, "loss": 0.0129, "step": 117750 }, { "epoch": 774.7368421052631, "grad_norm": 1.3225462436676025, "learning_rate": 0.0001, "loss": 0.0123, "step": 117760 }, { "epoch": 774.8026315789474, "grad_norm": 1.278283715248108, "learning_rate": 0.0001, "loss": 0.0144, "step": 117770 }, { "epoch": 774.8684210526316, "grad_norm": 1.0901740789413452, "learning_rate": 0.0001, "loss": 0.0142, "step": 117780 }, { "epoch": 774.9342105263158, "grad_norm": 1.3838145732879639, "learning_rate": 0.0001, "loss": 0.0155, "step": 117790 }, { "epoch": 775.0, "grad_norm": 1.2294758558273315, "learning_rate": 0.0001, "loss": 0.0117, "step": 117800 }, { "epoch": 775.0657894736842, "grad_norm": 1.5656145811080933, "learning_rate": 0.0001, "loss": 0.0163, "step": 117810 }, { "epoch": 775.1315789473684, "grad_norm": 1.4100937843322754, "learning_rate": 0.0001, "loss": 0.0124, "step": 117820 }, { "epoch": 775.1973684210526, "grad_norm": 1.3451452255249023, "learning_rate": 0.0001, "loss": 0.0161, "step": 117830 }, { "epoch": 775.2631578947369, "grad_norm": 1.077333688735962, "learning_rate": 0.0001, "loss": 0.011, "step": 117840 }, { "epoch": 775.328947368421, "grad_norm": 1.1771409511566162, "learning_rate": 0.0001, "loss": 0.0166, "step": 117850 }, { "epoch": 775.3947368421053, "grad_norm": 1.301405429840088, "learning_rate": 0.0001, "loss": 0.0159, "step": 117860 }, { "epoch": 775.4605263157895, "grad_norm": 1.0172913074493408, "learning_rate": 0.0001, "loss": 0.0119, "step": 117870 }, { "epoch": 775.5263157894736, "grad_norm": 1.4541507959365845, "learning_rate": 0.0001, "loss": 0.0123, "step": 117880 }, { "epoch": 775.5921052631579, "grad_norm": 1.8554121255874634, "learning_rate": 0.0001, "loss": 0.0124, "step": 117890 }, { "epoch": 775.6578947368421, "grad_norm": 1.7462159395217896, "learning_rate": 0.0001, "loss": 0.0153, "step": 117900 }, { "epoch": 775.7236842105264, "grad_norm": 1.429036259651184, "learning_rate": 0.0001, "loss": 0.0136, "step": 117910 }, { "epoch": 775.7894736842105, "grad_norm": 1.5631977319717407, "learning_rate": 0.0001, "loss": 0.0126, "step": 117920 }, { "epoch": 775.8552631578947, "grad_norm": 0.9749694466590881, "learning_rate": 0.0001, "loss": 0.0123, "step": 117930 }, { "epoch": 775.921052631579, "grad_norm": 1.4251617193222046, "learning_rate": 0.0001, "loss": 0.0151, "step": 117940 }, { "epoch": 775.9868421052631, "grad_norm": 1.7428339719772339, "learning_rate": 0.0001, "loss": 0.0136, "step": 117950 }, { "epoch": 776.0526315789474, "grad_norm": 1.4207303524017334, "learning_rate": 0.0001, "loss": 0.0141, "step": 117960 }, { "epoch": 776.1184210526316, "grad_norm": 1.720077395439148, "learning_rate": 0.0001, "loss": 0.0153, "step": 117970 }, { "epoch": 776.1842105263158, "grad_norm": 1.3200501203536987, "learning_rate": 0.0001, "loss": 0.0095, "step": 117980 }, { "epoch": 776.25, "grad_norm": 1.5397605895996094, "learning_rate": 0.0001, "loss": 0.0193, "step": 117990 }, { "epoch": 776.3157894736842, "grad_norm": 1.3259267807006836, "learning_rate": 0.0001, "loss": 0.0128, "step": 118000 }, { "epoch": 776.3815789473684, "grad_norm": 1.4754812717437744, "learning_rate": 0.0001, "loss": 0.013, "step": 118010 }, { "epoch": 776.4473684210526, "grad_norm": 1.5558054447174072, "learning_rate": 0.0001, "loss": 0.0118, "step": 118020 }, { "epoch": 776.5131578947369, "grad_norm": 1.422050952911377, "learning_rate": 0.0001, "loss": 0.0145, "step": 118030 }, { "epoch": 776.578947368421, "grad_norm": 1.2483266592025757, "learning_rate": 0.0001, "loss": 0.01, "step": 118040 }, { "epoch": 776.6447368421053, "grad_norm": 1.490025520324707, "learning_rate": 0.0001, "loss": 0.0141, "step": 118050 }, { "epoch": 776.7105263157895, "grad_norm": 1.6113227605819702, "learning_rate": 0.0001, "loss": 0.0202, "step": 118060 }, { "epoch": 776.7763157894736, "grad_norm": 1.1881320476531982, "learning_rate": 0.0001, "loss": 0.011, "step": 118070 }, { "epoch": 776.8421052631579, "grad_norm": 1.616684079170227, "learning_rate": 0.0001, "loss": 0.0111, "step": 118080 }, { "epoch": 776.9078947368421, "grad_norm": 1.3749032020568848, "learning_rate": 0.0001, "loss": 0.0119, "step": 118090 }, { "epoch": 776.9736842105264, "grad_norm": 1.72026789188385, "learning_rate": 0.0001, "loss": 0.0131, "step": 118100 }, { "epoch": 777.0394736842105, "grad_norm": 1.3404945135116577, "learning_rate": 0.0001, "loss": 0.0113, "step": 118110 }, { "epoch": 777.1052631578947, "grad_norm": 1.7389558553695679, "learning_rate": 0.0001, "loss": 0.0126, "step": 118120 }, { "epoch": 777.171052631579, "grad_norm": 1.747164249420166, "learning_rate": 0.0001, "loss": 0.0144, "step": 118130 }, { "epoch": 777.2368421052631, "grad_norm": 1.5412545204162598, "learning_rate": 0.0001, "loss": 0.013, "step": 118140 }, { "epoch": 777.3026315789474, "grad_norm": 2.0465409755706787, "learning_rate": 0.0001, "loss": 0.0122, "step": 118150 }, { "epoch": 777.3684210526316, "grad_norm": 1.6078896522521973, "learning_rate": 0.0001, "loss": 0.0131, "step": 118160 }, { "epoch": 777.4342105263158, "grad_norm": 1.6885119676589966, "learning_rate": 0.0001, "loss": 0.0115, "step": 118170 }, { "epoch": 777.5, "grad_norm": 1.7788934707641602, "learning_rate": 0.0001, "loss": 0.0155, "step": 118180 }, { "epoch": 777.5657894736842, "grad_norm": 1.5586590766906738, "learning_rate": 0.0001, "loss": 0.012, "step": 118190 }, { "epoch": 777.6315789473684, "grad_norm": 1.2782087326049805, "learning_rate": 0.0001, "loss": 0.0115, "step": 118200 }, { "epoch": 777.6973684210526, "grad_norm": 1.1563071012496948, "learning_rate": 0.0001, "loss": 0.011, "step": 118210 }, { "epoch": 777.7631578947369, "grad_norm": 1.568233609199524, "learning_rate": 0.0001, "loss": 0.0142, "step": 118220 }, { "epoch": 777.828947368421, "grad_norm": 1.4106571674346924, "learning_rate": 0.0001, "loss": 0.0111, "step": 118230 }, { "epoch": 777.8947368421053, "grad_norm": 1.0203608274459839, "learning_rate": 0.0001, "loss": 0.0138, "step": 118240 }, { "epoch": 777.9605263157895, "grad_norm": 1.2845571041107178, "learning_rate": 0.0001, "loss": 0.021, "step": 118250 }, { "epoch": 778.0263157894736, "grad_norm": 1.327886939048767, "learning_rate": 0.0001, "loss": 0.0159, "step": 118260 }, { "epoch": 778.0921052631579, "grad_norm": 1.6396427154541016, "learning_rate": 0.0001, "loss": 0.0123, "step": 118270 }, { "epoch": 778.1578947368421, "grad_norm": 1.3539034128189087, "learning_rate": 0.0001, "loss": 0.0177, "step": 118280 }, { "epoch": 778.2236842105264, "grad_norm": 1.7115830183029175, "learning_rate": 0.0001, "loss": 0.0132, "step": 118290 }, { "epoch": 778.2894736842105, "grad_norm": 1.4628418684005737, "learning_rate": 0.0001, "loss": 0.0129, "step": 118300 }, { "epoch": 778.3552631578947, "grad_norm": 1.1198406219482422, "learning_rate": 0.0001, "loss": 0.0149, "step": 118310 }, { "epoch": 778.421052631579, "grad_norm": 1.2841591835021973, "learning_rate": 0.0001, "loss": 0.0146, "step": 118320 }, { "epoch": 778.4868421052631, "grad_norm": 1.2043139934539795, "learning_rate": 0.0001, "loss": 0.0125, "step": 118330 }, { "epoch": 778.5526315789474, "grad_norm": 1.9482245445251465, "learning_rate": 0.0001, "loss": 0.0134, "step": 118340 }, { "epoch": 778.6184210526316, "grad_norm": 1.5167983770370483, "learning_rate": 0.0001, "loss": 0.0142, "step": 118350 }, { "epoch": 778.6842105263158, "grad_norm": 1.27537202835083, "learning_rate": 0.0001, "loss": 0.0113, "step": 118360 }, { "epoch": 778.75, "grad_norm": 1.6622034311294556, "learning_rate": 0.0001, "loss": 0.0126, "step": 118370 }, { "epoch": 778.8157894736842, "grad_norm": 1.8456928730010986, "learning_rate": 0.0001, "loss": 0.0143, "step": 118380 }, { "epoch": 778.8815789473684, "grad_norm": 1.19053053855896, "learning_rate": 0.0001, "loss": 0.0113, "step": 118390 }, { "epoch": 778.9473684210526, "grad_norm": 1.7742902040481567, "learning_rate": 0.0001, "loss": 0.0121, "step": 118400 }, { "epoch": 779.0131578947369, "grad_norm": 1.085508942604065, "learning_rate": 0.0001, "loss": 0.0167, "step": 118410 }, { "epoch": 779.078947368421, "grad_norm": 1.2448549270629883, "learning_rate": 0.0001, "loss": 0.0193, "step": 118420 }, { "epoch": 779.1447368421053, "grad_norm": 1.3139543533325195, "learning_rate": 0.0001, "loss": 0.0132, "step": 118430 }, { "epoch": 779.2105263157895, "grad_norm": 1.0778071880340576, "learning_rate": 0.0001, "loss": 0.0177, "step": 118440 }, { "epoch": 779.2763157894736, "grad_norm": 1.323358416557312, "learning_rate": 0.0001, "loss": 0.0102, "step": 118450 }, { "epoch": 779.3421052631579, "grad_norm": 1.4367626905441284, "learning_rate": 0.0001, "loss": 0.0161, "step": 118460 }, { "epoch": 779.4078947368421, "grad_norm": 1.577764630317688, "learning_rate": 0.0001, "loss": 0.0108, "step": 118470 }, { "epoch": 779.4736842105264, "grad_norm": 1.4515050649642944, "learning_rate": 0.0001, "loss": 0.0154, "step": 118480 }, { "epoch": 779.5394736842105, "grad_norm": 1.1976096630096436, "learning_rate": 0.0001, "loss": 0.0158, "step": 118490 }, { "epoch": 779.6052631578947, "grad_norm": 1.6688446998596191, "learning_rate": 0.0001, "loss": 0.0117, "step": 118500 }, { "epoch": 779.671052631579, "grad_norm": 1.2284595966339111, "learning_rate": 0.0001, "loss": 0.0152, "step": 118510 }, { "epoch": 779.7368421052631, "grad_norm": 1.2016891241073608, "learning_rate": 0.0001, "loss": 0.0164, "step": 118520 }, { "epoch": 779.8026315789474, "grad_norm": 1.293155550956726, "learning_rate": 0.0001, "loss": 0.0119, "step": 118530 }, { "epoch": 779.8684210526316, "grad_norm": 1.4205467700958252, "learning_rate": 0.0001, "loss": 0.0128, "step": 118540 }, { "epoch": 779.9342105263158, "grad_norm": 1.158672571182251, "learning_rate": 0.0001, "loss": 0.0121, "step": 118550 }, { "epoch": 780.0, "grad_norm": 1.145185112953186, "learning_rate": 0.0001, "loss": 0.0115, "step": 118560 }, { "epoch": 780.0657894736842, "grad_norm": 1.5229657888412476, "learning_rate": 0.0001, "loss": 0.017, "step": 118570 }, { "epoch": 780.1315789473684, "grad_norm": 1.529106616973877, "learning_rate": 0.0001, "loss": 0.0143, "step": 118580 }, { "epoch": 780.1973684210526, "grad_norm": 1.285982370376587, "learning_rate": 0.0001, "loss": 0.0168, "step": 118590 }, { "epoch": 780.2631578947369, "grad_norm": 1.635189175605774, "learning_rate": 0.0001, "loss": 0.0145, "step": 118600 }, { "epoch": 780.328947368421, "grad_norm": 1.5930699110031128, "learning_rate": 0.0001, "loss": 0.0177, "step": 118610 }, { "epoch": 780.3947368421053, "grad_norm": 1.732635259628296, "learning_rate": 0.0001, "loss": 0.0131, "step": 118620 }, { "epoch": 780.4605263157895, "grad_norm": 1.6783983707427979, "learning_rate": 0.0001, "loss": 0.0138, "step": 118630 }, { "epoch": 780.5263157894736, "grad_norm": 1.1292444467544556, "learning_rate": 0.0001, "loss": 0.0114, "step": 118640 }, { "epoch": 780.5921052631579, "grad_norm": 1.2886077165603638, "learning_rate": 0.0001, "loss": 0.0107, "step": 118650 }, { "epoch": 780.6578947368421, "grad_norm": 1.3052195310592651, "learning_rate": 0.0001, "loss": 0.016, "step": 118660 }, { "epoch": 780.7236842105264, "grad_norm": 1.524764060974121, "learning_rate": 0.0001, "loss": 0.0119, "step": 118670 }, { "epoch": 780.7894736842105, "grad_norm": 1.2332009077072144, "learning_rate": 0.0001, "loss": 0.0109, "step": 118680 }, { "epoch": 780.8552631578947, "grad_norm": 1.2227756977081299, "learning_rate": 0.0001, "loss": 0.0156, "step": 118690 }, { "epoch": 780.921052631579, "grad_norm": 1.4896539449691772, "learning_rate": 0.0001, "loss": 0.0109, "step": 118700 }, { "epoch": 780.9868421052631, "grad_norm": 1.4093775749206543, "learning_rate": 0.0001, "loss": 0.0139, "step": 118710 }, { "epoch": 781.0526315789474, "grad_norm": 1.4564220905303955, "learning_rate": 0.0001, "loss": 0.0135, "step": 118720 }, { "epoch": 781.1184210526316, "grad_norm": 1.3330382108688354, "learning_rate": 0.0001, "loss": 0.0158, "step": 118730 }, { "epoch": 781.1842105263158, "grad_norm": 1.626880168914795, "learning_rate": 0.0001, "loss": 0.0131, "step": 118740 }, { "epoch": 781.25, "grad_norm": 1.6475964784622192, "learning_rate": 0.0001, "loss": 0.015, "step": 118750 }, { "epoch": 781.3157894736842, "grad_norm": 1.593416690826416, "learning_rate": 0.0001, "loss": 0.0174, "step": 118760 }, { "epoch": 781.3815789473684, "grad_norm": 1.5062958002090454, "learning_rate": 0.0001, "loss": 0.013, "step": 118770 }, { "epoch": 781.4473684210526, "grad_norm": 1.2871431112289429, "learning_rate": 0.0001, "loss": 0.013, "step": 118780 }, { "epoch": 781.5131578947369, "grad_norm": 1.4250879287719727, "learning_rate": 0.0001, "loss": 0.0127, "step": 118790 }, { "epoch": 781.578947368421, "grad_norm": 1.1950808763504028, "learning_rate": 0.0001, "loss": 0.0111, "step": 118800 }, { "epoch": 781.6447368421053, "grad_norm": 1.3619606494903564, "learning_rate": 0.0001, "loss": 0.0131, "step": 118810 }, { "epoch": 781.7105263157895, "grad_norm": 1.6130160093307495, "learning_rate": 0.0001, "loss": 0.0154, "step": 118820 }, { "epoch": 781.7763157894736, "grad_norm": 1.8435858488082886, "learning_rate": 0.0001, "loss": 0.0126, "step": 118830 }, { "epoch": 781.8421052631579, "grad_norm": 1.694336175918579, "learning_rate": 0.0001, "loss": 0.0098, "step": 118840 }, { "epoch": 781.9078947368421, "grad_norm": 1.6060748100280762, "learning_rate": 0.0001, "loss": 0.0104, "step": 118850 }, { "epoch": 781.9736842105264, "grad_norm": 1.1320799589157104, "learning_rate": 0.0001, "loss": 0.0167, "step": 118860 }, { "epoch": 782.0394736842105, "grad_norm": 2.0004217624664307, "learning_rate": 0.0001, "loss": 0.0096, "step": 118870 }, { "epoch": 782.1052631578947, "grad_norm": 1.759088397026062, "learning_rate": 0.0001, "loss": 0.0127, "step": 118880 }, { "epoch": 782.171052631579, "grad_norm": 1.531087875366211, "learning_rate": 0.0001, "loss": 0.013, "step": 118890 }, { "epoch": 782.2368421052631, "grad_norm": 1.3060065507888794, "learning_rate": 0.0001, "loss": 0.0196, "step": 118900 }, { "epoch": 782.3026315789474, "grad_norm": 1.4917072057724, "learning_rate": 0.0001, "loss": 0.0149, "step": 118910 }, { "epoch": 782.3684210526316, "grad_norm": 1.5028550624847412, "learning_rate": 0.0001, "loss": 0.0111, "step": 118920 }, { "epoch": 782.4342105263158, "grad_norm": 1.1421515941619873, "learning_rate": 0.0001, "loss": 0.0103, "step": 118930 }, { "epoch": 782.5, "grad_norm": 0.9992579817771912, "learning_rate": 0.0001, "loss": 0.0134, "step": 118940 }, { "epoch": 782.5657894736842, "grad_norm": 1.5823440551757812, "learning_rate": 0.0001, "loss": 0.0117, "step": 118950 }, { "epoch": 782.6315789473684, "grad_norm": 1.5146819353103638, "learning_rate": 0.0001, "loss": 0.0154, "step": 118960 }, { "epoch": 782.6973684210526, "grad_norm": 1.7453725337982178, "learning_rate": 0.0001, "loss": 0.0119, "step": 118970 }, { "epoch": 782.7631578947369, "grad_norm": 1.7666147947311401, "learning_rate": 0.0001, "loss": 0.0147, "step": 118980 }, { "epoch": 782.828947368421, "grad_norm": 1.9276759624481201, "learning_rate": 0.0001, "loss": 0.0142, "step": 118990 }, { "epoch": 782.8947368421053, "grad_norm": 1.519883632659912, "learning_rate": 0.0001, "loss": 0.0098, "step": 119000 }, { "epoch": 782.9605263157895, "grad_norm": 1.4789340496063232, "learning_rate": 0.0001, "loss": 0.0114, "step": 119010 }, { "epoch": 783.0263157894736, "grad_norm": 1.2858966588974, "learning_rate": 0.0001, "loss": 0.0146, "step": 119020 }, { "epoch": 783.0921052631579, "grad_norm": 1.8425369262695312, "learning_rate": 0.0001, "loss": 0.0126, "step": 119030 }, { "epoch": 783.1578947368421, "grad_norm": 1.309570550918579, "learning_rate": 0.0001, "loss": 0.0131, "step": 119040 }, { "epoch": 783.2236842105264, "grad_norm": 1.4567279815673828, "learning_rate": 0.0001, "loss": 0.014, "step": 119050 }, { "epoch": 783.2894736842105, "grad_norm": 1.4731162786483765, "learning_rate": 0.0001, "loss": 0.0165, "step": 119060 }, { "epoch": 783.3552631578947, "grad_norm": 1.2521648406982422, "learning_rate": 0.0001, "loss": 0.0122, "step": 119070 }, { "epoch": 783.421052631579, "grad_norm": 1.062436819076538, "learning_rate": 0.0001, "loss": 0.0134, "step": 119080 }, { "epoch": 783.4868421052631, "grad_norm": 1.4122339487075806, "learning_rate": 0.0001, "loss": 0.0103, "step": 119090 }, { "epoch": 783.5526315789474, "grad_norm": 1.3283238410949707, "learning_rate": 0.0001, "loss": 0.0146, "step": 119100 }, { "epoch": 783.6184210526316, "grad_norm": 1.263909101486206, "learning_rate": 0.0001, "loss": 0.0178, "step": 119110 }, { "epoch": 783.6842105263158, "grad_norm": 1.517830729484558, "learning_rate": 0.0001, "loss": 0.0153, "step": 119120 }, { "epoch": 783.75, "grad_norm": 1.6508365869522095, "learning_rate": 0.0001, "loss": 0.0152, "step": 119130 }, { "epoch": 783.8157894736842, "grad_norm": 1.530362606048584, "learning_rate": 0.0001, "loss": 0.0117, "step": 119140 }, { "epoch": 783.8815789473684, "grad_norm": 1.22704017162323, "learning_rate": 0.0001, "loss": 0.0134, "step": 119150 }, { "epoch": 783.9473684210526, "grad_norm": 1.547345757484436, "learning_rate": 0.0001, "loss": 0.0101, "step": 119160 }, { "epoch": 784.0131578947369, "grad_norm": 1.4145691394805908, "learning_rate": 0.0001, "loss": 0.0138, "step": 119170 }, { "epoch": 784.078947368421, "grad_norm": 1.3770159482955933, "learning_rate": 0.0001, "loss": 0.0131, "step": 119180 }, { "epoch": 784.1447368421053, "grad_norm": 1.0161010026931763, "learning_rate": 0.0001, "loss": 0.0143, "step": 119190 }, { "epoch": 784.2105263157895, "grad_norm": 1.610177755355835, "learning_rate": 0.0001, "loss": 0.0141, "step": 119200 }, { "epoch": 784.2763157894736, "grad_norm": 1.3716585636138916, "learning_rate": 0.0001, "loss": 0.0116, "step": 119210 }, { "epoch": 784.3421052631579, "grad_norm": 1.1020225286483765, "learning_rate": 0.0001, "loss": 0.0108, "step": 119220 }, { "epoch": 784.4078947368421, "grad_norm": 1.289456844329834, "learning_rate": 0.0001, "loss": 0.0098, "step": 119230 }, { "epoch": 784.4736842105264, "grad_norm": 1.5520310401916504, "learning_rate": 0.0001, "loss": 0.0146, "step": 119240 }, { "epoch": 784.5394736842105, "grad_norm": 1.8526047468185425, "learning_rate": 0.0001, "loss": 0.0125, "step": 119250 }, { "epoch": 784.6052631578947, "grad_norm": 1.1816738843917847, "learning_rate": 0.0001, "loss": 0.0117, "step": 119260 }, { "epoch": 784.671052631579, "grad_norm": 1.307490348815918, "learning_rate": 0.0001, "loss": 0.0142, "step": 119270 }, { "epoch": 784.7368421052631, "grad_norm": 1.4652717113494873, "learning_rate": 0.0001, "loss": 0.013, "step": 119280 }, { "epoch": 784.8026315789474, "grad_norm": 1.3756448030471802, "learning_rate": 0.0001, "loss": 0.0149, "step": 119290 }, { "epoch": 784.8684210526316, "grad_norm": 1.3743414878845215, "learning_rate": 0.0001, "loss": 0.0167, "step": 119300 }, { "epoch": 784.9342105263158, "grad_norm": 1.697067379951477, "learning_rate": 0.0001, "loss": 0.0113, "step": 119310 }, { "epoch": 785.0, "grad_norm": 1.705937385559082, "learning_rate": 0.0001, "loss": 0.0162, "step": 119320 }, { "epoch": 785.0657894736842, "grad_norm": 1.6261695623397827, "learning_rate": 0.0001, "loss": 0.0118, "step": 119330 }, { "epoch": 785.1315789473684, "grad_norm": 1.648093581199646, "learning_rate": 0.0001, "loss": 0.0225, "step": 119340 }, { "epoch": 785.1973684210526, "grad_norm": 1.2595224380493164, "learning_rate": 0.0001, "loss": 0.0134, "step": 119350 }, { "epoch": 785.2631578947369, "grad_norm": 2.004023313522339, "learning_rate": 0.0001, "loss": 0.0113, "step": 119360 }, { "epoch": 785.328947368421, "grad_norm": 1.6383088827133179, "learning_rate": 0.0001, "loss": 0.0096, "step": 119370 }, { "epoch": 785.3947368421053, "grad_norm": 1.697852373123169, "learning_rate": 0.0001, "loss": 0.0155, "step": 119380 }, { "epoch": 785.4605263157895, "grad_norm": 1.9981783628463745, "learning_rate": 0.0001, "loss": 0.0118, "step": 119390 }, { "epoch": 785.5263157894736, "grad_norm": 1.8764359951019287, "learning_rate": 0.0001, "loss": 0.0129, "step": 119400 }, { "epoch": 785.5921052631579, "grad_norm": 1.461872935295105, "learning_rate": 0.0001, "loss": 0.015, "step": 119410 }, { "epoch": 785.6578947368421, "grad_norm": 1.4281846284866333, "learning_rate": 0.0001, "loss": 0.0132, "step": 119420 }, { "epoch": 785.7236842105264, "grad_norm": 1.6769027709960938, "learning_rate": 0.0001, "loss": 0.013, "step": 119430 }, { "epoch": 785.7894736842105, "grad_norm": 1.1075910329818726, "learning_rate": 0.0001, "loss": 0.0134, "step": 119440 }, { "epoch": 785.8552631578947, "grad_norm": 1.6135190725326538, "learning_rate": 0.0001, "loss": 0.0132, "step": 119450 }, { "epoch": 785.921052631579, "grad_norm": 1.3844678401947021, "learning_rate": 0.0001, "loss": 0.0111, "step": 119460 }, { "epoch": 785.9868421052631, "grad_norm": 1.6457711458206177, "learning_rate": 0.0001, "loss": 0.0126, "step": 119470 }, { "epoch": 786.0526315789474, "grad_norm": 1.2988646030426025, "learning_rate": 0.0001, "loss": 0.0153, "step": 119480 }, { "epoch": 786.1184210526316, "grad_norm": 1.3555996417999268, "learning_rate": 0.0001, "loss": 0.0117, "step": 119490 }, { "epoch": 786.1842105263158, "grad_norm": 1.3935763835906982, "learning_rate": 0.0001, "loss": 0.0152, "step": 119500 }, { "epoch": 786.25, "grad_norm": 1.48127281665802, "learning_rate": 0.0001, "loss": 0.0124, "step": 119510 }, { "epoch": 786.3157894736842, "grad_norm": 1.2742480039596558, "learning_rate": 0.0001, "loss": 0.0124, "step": 119520 }, { "epoch": 786.3815789473684, "grad_norm": 1.5143762826919556, "learning_rate": 0.0001, "loss": 0.0123, "step": 119530 }, { "epoch": 786.4473684210526, "grad_norm": 1.6493535041809082, "learning_rate": 0.0001, "loss": 0.012, "step": 119540 }, { "epoch": 786.5131578947369, "grad_norm": 1.3512027263641357, "learning_rate": 0.0001, "loss": 0.0114, "step": 119550 }, { "epoch": 786.578947368421, "grad_norm": 1.3596298694610596, "learning_rate": 0.0001, "loss": 0.0115, "step": 119560 }, { "epoch": 786.6447368421053, "grad_norm": 1.7179313898086548, "learning_rate": 0.0001, "loss": 0.0125, "step": 119570 }, { "epoch": 786.7105263157895, "grad_norm": 1.654852032661438, "learning_rate": 0.0001, "loss": 0.017, "step": 119580 }, { "epoch": 786.7763157894736, "grad_norm": 1.6241085529327393, "learning_rate": 0.0001, "loss": 0.0125, "step": 119590 }, { "epoch": 786.8421052631579, "grad_norm": 1.6239190101623535, "learning_rate": 0.0001, "loss": 0.0118, "step": 119600 }, { "epoch": 786.9078947368421, "grad_norm": 1.981985330581665, "learning_rate": 0.0001, "loss": 0.0147, "step": 119610 }, { "epoch": 786.9736842105264, "grad_norm": 1.8902039527893066, "learning_rate": 0.0001, "loss": 0.0163, "step": 119620 }, { "epoch": 787.0394736842105, "grad_norm": 1.786963939666748, "learning_rate": 0.0001, "loss": 0.0111, "step": 119630 }, { "epoch": 787.1052631578947, "grad_norm": 1.6775367259979248, "learning_rate": 0.0001, "loss": 0.0134, "step": 119640 }, { "epoch": 787.171052631579, "grad_norm": 1.6926342248916626, "learning_rate": 0.0001, "loss": 0.0176, "step": 119650 }, { "epoch": 787.2368421052631, "grad_norm": 1.2143518924713135, "learning_rate": 0.0001, "loss": 0.0134, "step": 119660 }, { "epoch": 787.3026315789474, "grad_norm": 1.7071720361709595, "learning_rate": 0.0001, "loss": 0.0131, "step": 119670 }, { "epoch": 787.3684210526316, "grad_norm": 1.4128307104110718, "learning_rate": 0.0001, "loss": 0.0106, "step": 119680 }, { "epoch": 787.4342105263158, "grad_norm": 1.1836111545562744, "learning_rate": 0.0001, "loss": 0.0132, "step": 119690 }, { "epoch": 787.5, "grad_norm": 1.6219416856765747, "learning_rate": 0.0001, "loss": 0.0176, "step": 119700 }, { "epoch": 787.5657894736842, "grad_norm": 1.3669687509536743, "learning_rate": 0.0001, "loss": 0.0109, "step": 119710 }, { "epoch": 787.6315789473684, "grad_norm": 1.1004085540771484, "learning_rate": 0.0001, "loss": 0.0156, "step": 119720 }, { "epoch": 787.6973684210526, "grad_norm": 1.279032588005066, "learning_rate": 0.0001, "loss": 0.0115, "step": 119730 }, { "epoch": 787.7631578947369, "grad_norm": 1.6499073505401611, "learning_rate": 0.0001, "loss": 0.0122, "step": 119740 }, { "epoch": 787.828947368421, "grad_norm": 1.547370195388794, "learning_rate": 0.0001, "loss": 0.013, "step": 119750 }, { "epoch": 787.8947368421053, "grad_norm": 1.8017464876174927, "learning_rate": 0.0001, "loss": 0.012, "step": 119760 }, { "epoch": 787.9605263157895, "grad_norm": 2.1411995887756348, "learning_rate": 0.0001, "loss": 0.0151, "step": 119770 }, { "epoch": 788.0263157894736, "grad_norm": 1.5028386116027832, "learning_rate": 0.0001, "loss": 0.0123, "step": 119780 }, { "epoch": 788.0921052631579, "grad_norm": 1.5332632064819336, "learning_rate": 0.0001, "loss": 0.0115, "step": 119790 }, { "epoch": 788.1578947368421, "grad_norm": 1.5647287368774414, "learning_rate": 0.0001, "loss": 0.0159, "step": 119800 }, { "epoch": 788.2236842105264, "grad_norm": 1.200681209564209, "learning_rate": 0.0001, "loss": 0.0137, "step": 119810 }, { "epoch": 788.2894736842105, "grad_norm": 1.0117236375808716, "learning_rate": 0.0001, "loss": 0.0114, "step": 119820 }, { "epoch": 788.3552631578947, "grad_norm": 1.6327182054519653, "learning_rate": 0.0001, "loss": 0.0184, "step": 119830 }, { "epoch": 788.421052631579, "grad_norm": 1.4656283855438232, "learning_rate": 0.0001, "loss": 0.011, "step": 119840 }, { "epoch": 788.4868421052631, "grad_norm": 1.1998372077941895, "learning_rate": 0.0001, "loss": 0.0141, "step": 119850 }, { "epoch": 788.5526315789474, "grad_norm": 1.6485151052474976, "learning_rate": 0.0001, "loss": 0.0143, "step": 119860 }, { "epoch": 788.6184210526316, "grad_norm": 1.6193571090698242, "learning_rate": 0.0001, "loss": 0.0146, "step": 119870 }, { "epoch": 788.6842105263158, "grad_norm": 1.114668607711792, "learning_rate": 0.0001, "loss": 0.0115, "step": 119880 }, { "epoch": 788.75, "grad_norm": 0.9424605369567871, "learning_rate": 0.0001, "loss": 0.0158, "step": 119890 }, { "epoch": 788.8157894736842, "grad_norm": 1.4015053510665894, "learning_rate": 0.0001, "loss": 0.0118, "step": 119900 }, { "epoch": 788.8815789473684, "grad_norm": 1.5387846231460571, "learning_rate": 0.0001, "loss": 0.0162, "step": 119910 }, { "epoch": 788.9473684210526, "grad_norm": 1.4812443256378174, "learning_rate": 0.0001, "loss": 0.0114, "step": 119920 }, { "epoch": 789.0131578947369, "grad_norm": 1.6601998805999756, "learning_rate": 0.0001, "loss": 0.0119, "step": 119930 }, { "epoch": 789.078947368421, "grad_norm": 1.453046202659607, "learning_rate": 0.0001, "loss": 0.0123, "step": 119940 }, { "epoch": 789.1447368421053, "grad_norm": 1.259183645248413, "learning_rate": 0.0001, "loss": 0.0097, "step": 119950 }, { "epoch": 789.2105263157895, "grad_norm": 1.7759662866592407, "learning_rate": 0.0001, "loss": 0.0138, "step": 119960 }, { "epoch": 789.2763157894736, "grad_norm": 1.3745535612106323, "learning_rate": 0.0001, "loss": 0.0131, "step": 119970 }, { "epoch": 789.3421052631579, "grad_norm": 1.374551773071289, "learning_rate": 0.0001, "loss": 0.0155, "step": 119980 }, { "epoch": 789.4078947368421, "grad_norm": 1.4028632640838623, "learning_rate": 0.0001, "loss": 0.0109, "step": 119990 }, { "epoch": 789.4736842105264, "grad_norm": 1.1496855020523071, "learning_rate": 0.0001, "loss": 0.012, "step": 120000 }, { "epoch": 789.5394736842105, "grad_norm": 1.7433587312698364, "learning_rate": 0.0001, "loss": 0.0154, "step": 120010 }, { "epoch": 789.6052631578947, "grad_norm": 1.4837114810943604, "learning_rate": 0.0001, "loss": 0.015, "step": 120020 }, { "epoch": 789.671052631579, "grad_norm": 1.515573501586914, "learning_rate": 0.0001, "loss": 0.0154, "step": 120030 }, { "epoch": 789.7368421052631, "grad_norm": 1.7087770700454712, "learning_rate": 0.0001, "loss": 0.0127, "step": 120040 }, { "epoch": 789.8026315789474, "grad_norm": 1.5537638664245605, "learning_rate": 0.0001, "loss": 0.0166, "step": 120050 }, { "epoch": 789.8684210526316, "grad_norm": 1.2317562103271484, "learning_rate": 0.0001, "loss": 0.0127, "step": 120060 }, { "epoch": 789.9342105263158, "grad_norm": 1.5469383001327515, "learning_rate": 0.0001, "loss": 0.0138, "step": 120070 }, { "epoch": 790.0, "grad_norm": 1.2652373313903809, "learning_rate": 0.0001, "loss": 0.0119, "step": 120080 }, { "epoch": 790.0657894736842, "grad_norm": 1.752303957939148, "learning_rate": 0.0001, "loss": 0.0124, "step": 120090 }, { "epoch": 790.1315789473684, "grad_norm": 1.4267956018447876, "learning_rate": 0.0001, "loss": 0.0099, "step": 120100 }, { "epoch": 790.1973684210526, "grad_norm": 1.371881127357483, "learning_rate": 0.0001, "loss": 0.019, "step": 120110 }, { "epoch": 790.2631578947369, "grad_norm": 1.4792046546936035, "learning_rate": 0.0001, "loss": 0.0131, "step": 120120 }, { "epoch": 790.328947368421, "grad_norm": 1.3436096906661987, "learning_rate": 0.0001, "loss": 0.0116, "step": 120130 }, { "epoch": 790.3947368421053, "grad_norm": 1.5119013786315918, "learning_rate": 0.0001, "loss": 0.0204, "step": 120140 }, { "epoch": 790.4605263157895, "grad_norm": 1.6656509637832642, "learning_rate": 0.0001, "loss": 0.0107, "step": 120150 }, { "epoch": 790.5263157894736, "grad_norm": 1.2856197357177734, "learning_rate": 0.0001, "loss": 0.015, "step": 120160 }, { "epoch": 790.5921052631579, "grad_norm": 2.001122236251831, "learning_rate": 0.0001, "loss": 0.0133, "step": 120170 }, { "epoch": 790.6578947368421, "grad_norm": 1.5316319465637207, "learning_rate": 0.0001, "loss": 0.0141, "step": 120180 }, { "epoch": 790.7236842105264, "grad_norm": 1.9806833267211914, "learning_rate": 0.0001, "loss": 0.0112, "step": 120190 }, { "epoch": 790.7894736842105, "grad_norm": 1.748010516166687, "learning_rate": 0.0001, "loss": 0.0128, "step": 120200 }, { "epoch": 790.8552631578947, "grad_norm": 1.6264196634292603, "learning_rate": 0.0001, "loss": 0.0127, "step": 120210 }, { "epoch": 790.921052631579, "grad_norm": 1.9526827335357666, "learning_rate": 0.0001, "loss": 0.0146, "step": 120220 }, { "epoch": 790.9868421052631, "grad_norm": 1.633154273033142, "learning_rate": 0.0001, "loss": 0.0101, "step": 120230 }, { "epoch": 791.0526315789474, "grad_norm": 1.5356650352478027, "learning_rate": 0.0001, "loss": 0.0128, "step": 120240 }, { "epoch": 791.1184210526316, "grad_norm": 1.5198614597320557, "learning_rate": 0.0001, "loss": 0.0134, "step": 120250 }, { "epoch": 791.1842105263158, "grad_norm": 1.3592991828918457, "learning_rate": 0.0001, "loss": 0.0135, "step": 120260 }, { "epoch": 791.25, "grad_norm": 1.9146106243133545, "learning_rate": 0.0001, "loss": 0.0129, "step": 120270 }, { "epoch": 791.3157894736842, "grad_norm": 1.489648461341858, "learning_rate": 0.0001, "loss": 0.0116, "step": 120280 }, { "epoch": 791.3815789473684, "grad_norm": 1.1393131017684937, "learning_rate": 0.0001, "loss": 0.0134, "step": 120290 }, { "epoch": 791.4473684210526, "grad_norm": 1.438093662261963, "learning_rate": 0.0001, "loss": 0.0155, "step": 120300 }, { "epoch": 791.5131578947369, "grad_norm": 1.023202657699585, "learning_rate": 0.0001, "loss": 0.0105, "step": 120310 }, { "epoch": 791.578947368421, "grad_norm": 1.5475231409072876, "learning_rate": 0.0001, "loss": 0.0164, "step": 120320 }, { "epoch": 791.6447368421053, "grad_norm": 1.595823049545288, "learning_rate": 0.0001, "loss": 0.0151, "step": 120330 }, { "epoch": 791.7105263157895, "grad_norm": 0.8880116939544678, "learning_rate": 0.0001, "loss": 0.0107, "step": 120340 }, { "epoch": 791.7763157894736, "grad_norm": 1.5297499895095825, "learning_rate": 0.0001, "loss": 0.0149, "step": 120350 }, { "epoch": 791.8421052631579, "grad_norm": 1.5383802652359009, "learning_rate": 0.0001, "loss": 0.0123, "step": 120360 }, { "epoch": 791.9078947368421, "grad_norm": 1.7462745904922485, "learning_rate": 0.0001, "loss": 0.0124, "step": 120370 }, { "epoch": 791.9736842105264, "grad_norm": 1.470432162284851, "learning_rate": 0.0001, "loss": 0.0127, "step": 120380 }, { "epoch": 792.0394736842105, "grad_norm": 1.6100658178329468, "learning_rate": 0.0001, "loss": 0.0151, "step": 120390 }, { "epoch": 792.1052631578947, "grad_norm": 1.2104003429412842, "learning_rate": 0.0001, "loss": 0.011, "step": 120400 }, { "epoch": 792.171052631579, "grad_norm": 1.8407337665557861, "learning_rate": 0.0001, "loss": 0.018, "step": 120410 }, { "epoch": 792.2368421052631, "grad_norm": 1.357035517692566, "learning_rate": 0.0001, "loss": 0.0132, "step": 120420 }, { "epoch": 792.3026315789474, "grad_norm": 1.5518691539764404, "learning_rate": 0.0001, "loss": 0.0123, "step": 120430 }, { "epoch": 792.3684210526316, "grad_norm": 1.1426655054092407, "learning_rate": 0.0001, "loss": 0.0131, "step": 120440 }, { "epoch": 792.4342105263158, "grad_norm": 1.7841227054595947, "learning_rate": 0.0001, "loss": 0.0138, "step": 120450 }, { "epoch": 792.5, "grad_norm": 1.5471450090408325, "learning_rate": 0.0001, "loss": 0.0151, "step": 120460 }, { "epoch": 792.5657894736842, "grad_norm": 1.779282569885254, "learning_rate": 0.0001, "loss": 0.0115, "step": 120470 }, { "epoch": 792.6315789473684, "grad_norm": 1.7641539573669434, "learning_rate": 0.0001, "loss": 0.0161, "step": 120480 }, { "epoch": 792.6973684210526, "grad_norm": 1.4690091609954834, "learning_rate": 0.0001, "loss": 0.0127, "step": 120490 }, { "epoch": 792.7631578947369, "grad_norm": 1.793459177017212, "learning_rate": 0.0001, "loss": 0.0148, "step": 120500 }, { "epoch": 792.828947368421, "grad_norm": 1.3721497058868408, "learning_rate": 0.0001, "loss": 0.0129, "step": 120510 }, { "epoch": 792.8947368421053, "grad_norm": 1.364057183265686, "learning_rate": 0.0001, "loss": 0.012, "step": 120520 }, { "epoch": 792.9605263157895, "grad_norm": 1.4968223571777344, "learning_rate": 0.0001, "loss": 0.0121, "step": 120530 }, { "epoch": 793.0263157894736, "grad_norm": 1.9936431646347046, "learning_rate": 0.0001, "loss": 0.0111, "step": 120540 }, { "epoch": 793.0921052631579, "grad_norm": 1.3991377353668213, "learning_rate": 0.0001, "loss": 0.0119, "step": 120550 }, { "epoch": 793.1578947368421, "grad_norm": 1.3882298469543457, "learning_rate": 0.0001, "loss": 0.0124, "step": 120560 }, { "epoch": 793.2236842105264, "grad_norm": 1.435396671295166, "learning_rate": 0.0001, "loss": 0.015, "step": 120570 }, { "epoch": 793.2894736842105, "grad_norm": 1.105873703956604, "learning_rate": 0.0001, "loss": 0.0141, "step": 120580 }, { "epoch": 793.3552631578947, "grad_norm": 1.8405166864395142, "learning_rate": 0.0001, "loss": 0.0117, "step": 120590 }, { "epoch": 793.421052631579, "grad_norm": 1.4387298822402954, "learning_rate": 0.0001, "loss": 0.0153, "step": 120600 }, { "epoch": 793.4868421052631, "grad_norm": 1.1281381845474243, "learning_rate": 0.0001, "loss": 0.0133, "step": 120610 }, { "epoch": 793.5526315789474, "grad_norm": 1.7074689865112305, "learning_rate": 0.0001, "loss": 0.0113, "step": 120620 }, { "epoch": 793.6184210526316, "grad_norm": 1.2615891695022583, "learning_rate": 0.0001, "loss": 0.0133, "step": 120630 }, { "epoch": 793.6842105263158, "grad_norm": 1.6340607404708862, "learning_rate": 0.0001, "loss": 0.0127, "step": 120640 }, { "epoch": 793.75, "grad_norm": 0.9560874104499817, "learning_rate": 0.0001, "loss": 0.0148, "step": 120650 }, { "epoch": 793.8157894736842, "grad_norm": 1.3108716011047363, "learning_rate": 0.0001, "loss": 0.0147, "step": 120660 }, { "epoch": 793.8815789473684, "grad_norm": 1.0437304973602295, "learning_rate": 0.0001, "loss": 0.0153, "step": 120670 }, { "epoch": 793.9473684210526, "grad_norm": 1.399863839149475, "learning_rate": 0.0001, "loss": 0.0113, "step": 120680 }, { "epoch": 794.0131578947369, "grad_norm": 1.200249433517456, "learning_rate": 0.0001, "loss": 0.0132, "step": 120690 }, { "epoch": 794.078947368421, "grad_norm": 1.480424404144287, "learning_rate": 0.0001, "loss": 0.012, "step": 120700 }, { "epoch": 794.1447368421053, "grad_norm": 0.9545551538467407, "learning_rate": 0.0001, "loss": 0.0139, "step": 120710 }, { "epoch": 794.2105263157895, "grad_norm": 1.5192837715148926, "learning_rate": 0.0001, "loss": 0.0129, "step": 120720 }, { "epoch": 794.2763157894736, "grad_norm": 1.559036374092102, "learning_rate": 0.0001, "loss": 0.0148, "step": 120730 }, { "epoch": 794.3421052631579, "grad_norm": 1.5743498802185059, "learning_rate": 0.0001, "loss": 0.0137, "step": 120740 }, { "epoch": 794.4078947368421, "grad_norm": 1.2912036180496216, "learning_rate": 0.0001, "loss": 0.018, "step": 120750 }, { "epoch": 794.4736842105264, "grad_norm": 1.064568042755127, "learning_rate": 0.0001, "loss": 0.0137, "step": 120760 }, { "epoch": 794.5394736842105, "grad_norm": 1.5485846996307373, "learning_rate": 0.0001, "loss": 0.0113, "step": 120770 }, { "epoch": 794.6052631578947, "grad_norm": 1.4414029121398926, "learning_rate": 0.0001, "loss": 0.0149, "step": 120780 }, { "epoch": 794.671052631579, "grad_norm": 1.36929452419281, "learning_rate": 0.0001, "loss": 0.0106, "step": 120790 }, { "epoch": 794.7368421052631, "grad_norm": 1.7009457349777222, "learning_rate": 0.0001, "loss": 0.0131, "step": 120800 }, { "epoch": 794.8026315789474, "grad_norm": 1.6823605298995972, "learning_rate": 0.0001, "loss": 0.0125, "step": 120810 }, { "epoch": 794.8684210526316, "grad_norm": 1.4054988622665405, "learning_rate": 0.0001, "loss": 0.0109, "step": 120820 }, { "epoch": 794.9342105263158, "grad_norm": 1.967791199684143, "learning_rate": 0.0001, "loss": 0.0138, "step": 120830 }, { "epoch": 795.0, "grad_norm": 1.6926008462905884, "learning_rate": 0.0001, "loss": 0.0149, "step": 120840 }, { "epoch": 795.0657894736842, "grad_norm": 1.719939112663269, "learning_rate": 0.0001, "loss": 0.0182, "step": 120850 }, { "epoch": 795.1315789473684, "grad_norm": 1.5414245128631592, "learning_rate": 0.0001, "loss": 0.014, "step": 120860 }, { "epoch": 795.1973684210526, "grad_norm": 1.031916618347168, "learning_rate": 0.0001, "loss": 0.0144, "step": 120870 }, { "epoch": 795.2631578947369, "grad_norm": 1.2394858598709106, "learning_rate": 0.0001, "loss": 0.0124, "step": 120880 }, { "epoch": 795.328947368421, "grad_norm": 1.1827839612960815, "learning_rate": 0.0001, "loss": 0.0132, "step": 120890 }, { "epoch": 795.3947368421053, "grad_norm": 1.5099167823791504, "learning_rate": 0.0001, "loss": 0.0139, "step": 120900 }, { "epoch": 795.4605263157895, "grad_norm": 1.3146122694015503, "learning_rate": 0.0001, "loss": 0.0118, "step": 120910 }, { "epoch": 795.5263157894736, "grad_norm": 1.1231532096862793, "learning_rate": 0.0001, "loss": 0.0172, "step": 120920 }, { "epoch": 795.5921052631579, "grad_norm": 1.4009188413619995, "learning_rate": 0.0001, "loss": 0.0114, "step": 120930 }, { "epoch": 795.6578947368421, "grad_norm": 1.2737963199615479, "learning_rate": 0.0001, "loss": 0.0141, "step": 120940 }, { "epoch": 795.7236842105264, "grad_norm": 1.5458850860595703, "learning_rate": 0.0001, "loss": 0.0129, "step": 120950 }, { "epoch": 795.7894736842105, "grad_norm": 1.4190510511398315, "learning_rate": 0.0001, "loss": 0.0111, "step": 120960 }, { "epoch": 795.8552631578947, "grad_norm": 1.568726658821106, "learning_rate": 0.0001, "loss": 0.0135, "step": 120970 }, { "epoch": 795.921052631579, "grad_norm": 1.0124289989471436, "learning_rate": 0.0001, "loss": 0.0125, "step": 120980 }, { "epoch": 795.9868421052631, "grad_norm": 1.7743628025054932, "learning_rate": 0.0001, "loss": 0.0133, "step": 120990 }, { "epoch": 796.0526315789474, "grad_norm": 1.5233736038208008, "learning_rate": 0.0001, "loss": 0.0115, "step": 121000 }, { "epoch": 796.1184210526316, "grad_norm": 1.3553656339645386, "learning_rate": 0.0001, "loss": 0.0102, "step": 121010 }, { "epoch": 796.1842105263158, "grad_norm": 1.7125532627105713, "learning_rate": 0.0001, "loss": 0.0128, "step": 121020 }, { "epoch": 796.25, "grad_norm": 1.1128419637680054, "learning_rate": 0.0001, "loss": 0.0151, "step": 121030 }, { "epoch": 796.3157894736842, "grad_norm": 1.2843718528747559, "learning_rate": 0.0001, "loss": 0.0135, "step": 121040 }, { "epoch": 796.3815789473684, "grad_norm": 1.7578951120376587, "learning_rate": 0.0001, "loss": 0.014, "step": 121050 }, { "epoch": 796.4473684210526, "grad_norm": 1.2737822532653809, "learning_rate": 0.0001, "loss": 0.0104, "step": 121060 }, { "epoch": 796.5131578947369, "grad_norm": 1.2539091110229492, "learning_rate": 0.0001, "loss": 0.0148, "step": 121070 }, { "epoch": 796.578947368421, "grad_norm": 1.3653749227523804, "learning_rate": 0.0001, "loss": 0.0126, "step": 121080 }, { "epoch": 796.6447368421053, "grad_norm": 0.8725621700286865, "learning_rate": 0.0001, "loss": 0.013, "step": 121090 }, { "epoch": 796.7105263157895, "grad_norm": 1.3148469924926758, "learning_rate": 0.0001, "loss": 0.0167, "step": 121100 }, { "epoch": 796.7763157894736, "grad_norm": 1.2626681327819824, "learning_rate": 0.0001, "loss": 0.0135, "step": 121110 }, { "epoch": 796.8421052631579, "grad_norm": 1.4251600503921509, "learning_rate": 0.0001, "loss": 0.0124, "step": 121120 }, { "epoch": 796.9078947368421, "grad_norm": 1.1070324182510376, "learning_rate": 0.0001, "loss": 0.0211, "step": 121130 }, { "epoch": 796.9736842105264, "grad_norm": 1.771329402923584, "learning_rate": 0.0001, "loss": 0.0132, "step": 121140 }, { "epoch": 797.0394736842105, "grad_norm": 1.7052538394927979, "learning_rate": 0.0001, "loss": 0.0146, "step": 121150 }, { "epoch": 797.1052631578947, "grad_norm": 1.486114263534546, "learning_rate": 0.0001, "loss": 0.0145, "step": 121160 }, { "epoch": 797.171052631579, "grad_norm": 1.1144218444824219, "learning_rate": 0.0001, "loss": 0.0123, "step": 121170 }, { "epoch": 797.2368421052631, "grad_norm": 1.5651661157608032, "learning_rate": 0.0001, "loss": 0.01, "step": 121180 }, { "epoch": 797.3026315789474, "grad_norm": 1.1705654859542847, "learning_rate": 0.0001, "loss": 0.0167, "step": 121190 }, { "epoch": 797.3684210526316, "grad_norm": 1.5320671796798706, "learning_rate": 0.0001, "loss": 0.0128, "step": 121200 }, { "epoch": 797.4342105263158, "grad_norm": 1.5222041606903076, "learning_rate": 0.0001, "loss": 0.0156, "step": 121210 }, { "epoch": 797.5, "grad_norm": 1.6755105257034302, "learning_rate": 0.0001, "loss": 0.015, "step": 121220 }, { "epoch": 797.5657894736842, "grad_norm": 1.2580909729003906, "learning_rate": 0.0001, "loss": 0.0155, "step": 121230 }, { "epoch": 797.6315789473684, "grad_norm": 1.5422416925430298, "learning_rate": 0.0001, "loss": 0.0114, "step": 121240 }, { "epoch": 797.6973684210526, "grad_norm": 1.4727518558502197, "learning_rate": 0.0001, "loss": 0.0118, "step": 121250 }, { "epoch": 797.7631578947369, "grad_norm": 1.453862190246582, "learning_rate": 0.0001, "loss": 0.0134, "step": 121260 }, { "epoch": 797.828947368421, "grad_norm": 1.5557910203933716, "learning_rate": 0.0001, "loss": 0.015, "step": 121270 }, { "epoch": 797.8947368421053, "grad_norm": 1.8552402257919312, "learning_rate": 0.0001, "loss": 0.0111, "step": 121280 }, { "epoch": 797.9605263157895, "grad_norm": 1.4373736381530762, "learning_rate": 0.0001, "loss": 0.0138, "step": 121290 }, { "epoch": 798.0263157894736, "grad_norm": 1.315005898475647, "learning_rate": 0.0001, "loss": 0.0125, "step": 121300 }, { "epoch": 798.0921052631579, "grad_norm": 1.0608344078063965, "learning_rate": 0.0001, "loss": 0.0141, "step": 121310 }, { "epoch": 798.1578947368421, "grad_norm": 1.2820039987564087, "learning_rate": 0.0001, "loss": 0.0174, "step": 121320 }, { "epoch": 798.2236842105264, "grad_norm": 1.0265748500823975, "learning_rate": 0.0001, "loss": 0.0105, "step": 121330 }, { "epoch": 798.2894736842105, "grad_norm": 1.2269448041915894, "learning_rate": 0.0001, "loss": 0.0159, "step": 121340 }, { "epoch": 798.3552631578947, "grad_norm": 1.2282845973968506, "learning_rate": 0.0001, "loss": 0.0109, "step": 121350 }, { "epoch": 798.421052631579, "grad_norm": 1.3221262693405151, "learning_rate": 0.0001, "loss": 0.0131, "step": 121360 }, { "epoch": 798.4868421052631, "grad_norm": 1.091688871383667, "learning_rate": 0.0001, "loss": 0.0126, "step": 121370 }, { "epoch": 798.5526315789474, "grad_norm": 1.3870993852615356, "learning_rate": 0.0001, "loss": 0.0129, "step": 121380 }, { "epoch": 798.6184210526316, "grad_norm": 1.7586756944656372, "learning_rate": 0.0001, "loss": 0.0125, "step": 121390 }, { "epoch": 798.6842105263158, "grad_norm": 1.6714036464691162, "learning_rate": 0.0001, "loss": 0.0127, "step": 121400 }, { "epoch": 798.75, "grad_norm": 1.0865761041641235, "learning_rate": 0.0001, "loss": 0.0127, "step": 121410 }, { "epoch": 798.8157894736842, "grad_norm": 1.3130487203598022, "learning_rate": 0.0001, "loss": 0.0142, "step": 121420 }, { "epoch": 798.8815789473684, "grad_norm": 1.4883743524551392, "learning_rate": 0.0001, "loss": 0.0134, "step": 121430 }, { "epoch": 798.9473684210526, "grad_norm": 1.606648564338684, "learning_rate": 0.0001, "loss": 0.0129, "step": 121440 }, { "epoch": 799.0131578947369, "grad_norm": 1.3983197212219238, "learning_rate": 0.0001, "loss": 0.0177, "step": 121450 }, { "epoch": 799.078947368421, "grad_norm": 1.0761325359344482, "learning_rate": 0.0001, "loss": 0.013, "step": 121460 }, { "epoch": 799.1447368421053, "grad_norm": 1.5937050580978394, "learning_rate": 0.0001, "loss": 0.014, "step": 121470 }, { "epoch": 799.2105263157895, "grad_norm": 1.7164866924285889, "learning_rate": 0.0001, "loss": 0.0101, "step": 121480 }, { "epoch": 799.2763157894736, "grad_norm": 1.4725840091705322, "learning_rate": 0.0001, "loss": 0.015, "step": 121490 }, { "epoch": 799.3421052631579, "grad_norm": 1.8791757822036743, "learning_rate": 0.0001, "loss": 0.0127, "step": 121500 }, { "epoch": 799.4078947368421, "grad_norm": 1.5808395147323608, "learning_rate": 0.0001, "loss": 0.0132, "step": 121510 }, { "epoch": 799.4736842105264, "grad_norm": 2.1864142417907715, "learning_rate": 0.0001, "loss": 0.0102, "step": 121520 }, { "epoch": 799.5394736842105, "grad_norm": 1.4919686317443848, "learning_rate": 0.0001, "loss": 0.0192, "step": 121530 }, { "epoch": 799.6052631578947, "grad_norm": 1.6154993772506714, "learning_rate": 0.0001, "loss": 0.0102, "step": 121540 }, { "epoch": 799.671052631579, "grad_norm": 1.249658465385437, "learning_rate": 0.0001, "loss": 0.0109, "step": 121550 }, { "epoch": 799.7368421052631, "grad_norm": 1.5895211696624756, "learning_rate": 0.0001, "loss": 0.013, "step": 121560 }, { "epoch": 799.8026315789474, "grad_norm": 1.4595733880996704, "learning_rate": 0.0001, "loss": 0.0128, "step": 121570 }, { "epoch": 799.8684210526316, "grad_norm": 1.3591117858886719, "learning_rate": 0.0001, "loss": 0.0173, "step": 121580 }, { "epoch": 799.9342105263158, "grad_norm": 1.650431752204895, "learning_rate": 0.0001, "loss": 0.0146, "step": 121590 }, { "epoch": 800.0, "grad_norm": 1.501732587814331, "learning_rate": 0.0001, "loss": 0.0165, "step": 121600 }, { "epoch": 800.0657894736842, "grad_norm": 1.5252403020858765, "learning_rate": 0.0001, "loss": 0.0131, "step": 121610 }, { "epoch": 800.1315789473684, "grad_norm": 1.6328822374343872, "learning_rate": 0.0001, "loss": 0.0117, "step": 121620 }, { "epoch": 800.1973684210526, "grad_norm": 1.7181576490402222, "learning_rate": 0.0001, "loss": 0.0173, "step": 121630 }, { "epoch": 800.2631578947369, "grad_norm": 1.692725658416748, "learning_rate": 0.0001, "loss": 0.0133, "step": 121640 }, { "epoch": 800.328947368421, "grad_norm": 1.449394941329956, "learning_rate": 0.0001, "loss": 0.0176, "step": 121650 }, { "epoch": 800.3947368421053, "grad_norm": 1.5459579229354858, "learning_rate": 0.0001, "loss": 0.0105, "step": 121660 }, { "epoch": 800.4605263157895, "grad_norm": 1.406633973121643, "learning_rate": 0.0001, "loss": 0.012, "step": 121670 }, { "epoch": 800.5263157894736, "grad_norm": 1.1953552961349487, "learning_rate": 0.0001, "loss": 0.0151, "step": 121680 }, { "epoch": 800.5921052631579, "grad_norm": 1.3098421096801758, "learning_rate": 0.0001, "loss": 0.0148, "step": 121690 }, { "epoch": 800.6578947368421, "grad_norm": 2.045508623123169, "learning_rate": 0.0001, "loss": 0.0143, "step": 121700 }, { "epoch": 800.7236842105264, "grad_norm": 1.3021981716156006, "learning_rate": 0.0001, "loss": 0.0118, "step": 121710 }, { "epoch": 800.7894736842105, "grad_norm": 1.4306005239486694, "learning_rate": 0.0001, "loss": 0.0162, "step": 121720 }, { "epoch": 800.8552631578947, "grad_norm": 1.855543613433838, "learning_rate": 0.0001, "loss": 0.0134, "step": 121730 }, { "epoch": 800.921052631579, "grad_norm": 1.3082494735717773, "learning_rate": 0.0001, "loss": 0.0118, "step": 121740 }, { "epoch": 800.9868421052631, "grad_norm": 1.604660987854004, "learning_rate": 0.0001, "loss": 0.0096, "step": 121750 }, { "epoch": 801.0526315789474, "grad_norm": 1.5003361701965332, "learning_rate": 0.0001, "loss": 0.0126, "step": 121760 }, { "epoch": 801.1184210526316, "grad_norm": 1.4036004543304443, "learning_rate": 0.0001, "loss": 0.0147, "step": 121770 }, { "epoch": 801.1842105263158, "grad_norm": 0.9903563261032104, "learning_rate": 0.0001, "loss": 0.0122, "step": 121780 }, { "epoch": 801.25, "grad_norm": 1.8446905612945557, "learning_rate": 0.0001, "loss": 0.0156, "step": 121790 }, { "epoch": 801.3157894736842, "grad_norm": 1.1447498798370361, "learning_rate": 0.0001, "loss": 0.0129, "step": 121800 }, { "epoch": 801.3815789473684, "grad_norm": 1.6894930601119995, "learning_rate": 0.0001, "loss": 0.0124, "step": 121810 }, { "epoch": 801.4473684210526, "grad_norm": 1.6666462421417236, "learning_rate": 0.0001, "loss": 0.0127, "step": 121820 }, { "epoch": 801.5131578947369, "grad_norm": 1.480569839477539, "learning_rate": 0.0001, "loss": 0.0131, "step": 121830 }, { "epoch": 801.578947368421, "grad_norm": 1.3134639263153076, "learning_rate": 0.0001, "loss": 0.0155, "step": 121840 }, { "epoch": 801.6447368421053, "grad_norm": 1.3394602537155151, "learning_rate": 0.0001, "loss": 0.0104, "step": 121850 }, { "epoch": 801.7105263157895, "grad_norm": 1.5421578884124756, "learning_rate": 0.0001, "loss": 0.0134, "step": 121860 }, { "epoch": 801.7763157894736, "grad_norm": 1.2189980745315552, "learning_rate": 0.0001, "loss": 0.0119, "step": 121870 }, { "epoch": 801.8421052631579, "grad_norm": 1.7834962606430054, "learning_rate": 0.0001, "loss": 0.0115, "step": 121880 }, { "epoch": 801.9078947368421, "grad_norm": 1.0238170623779297, "learning_rate": 0.0001, "loss": 0.0186, "step": 121890 }, { "epoch": 801.9736842105264, "grad_norm": 1.1560791730880737, "learning_rate": 0.0001, "loss": 0.0113, "step": 121900 }, { "epoch": 802.0394736842105, "grad_norm": 1.4481114149093628, "learning_rate": 0.0001, "loss": 0.0142, "step": 121910 }, { "epoch": 802.1052631578947, "grad_norm": 1.5181657075881958, "learning_rate": 0.0001, "loss": 0.0134, "step": 121920 }, { "epoch": 802.171052631579, "grad_norm": 1.1353936195373535, "learning_rate": 0.0001, "loss": 0.0128, "step": 121930 }, { "epoch": 802.2368421052631, "grad_norm": 1.4950119256973267, "learning_rate": 0.0001, "loss": 0.0132, "step": 121940 }, { "epoch": 802.3026315789474, "grad_norm": 1.6678030490875244, "learning_rate": 0.0001, "loss": 0.0135, "step": 121950 }, { "epoch": 802.3684210526316, "grad_norm": 1.5296763181686401, "learning_rate": 0.0001, "loss": 0.0127, "step": 121960 }, { "epoch": 802.4342105263158, "grad_norm": 1.470542311668396, "learning_rate": 0.0001, "loss": 0.0155, "step": 121970 }, { "epoch": 802.5, "grad_norm": 1.0300859212875366, "learning_rate": 0.0001, "loss": 0.0131, "step": 121980 }, { "epoch": 802.5657894736842, "grad_norm": 1.5861872434616089, "learning_rate": 0.0001, "loss": 0.0124, "step": 121990 }, { "epoch": 802.6315789473684, "grad_norm": 1.3099943399429321, "learning_rate": 0.0001, "loss": 0.0106, "step": 122000 }, { "epoch": 802.6973684210526, "grad_norm": 2.077451705932617, "learning_rate": 0.0001, "loss": 0.0159, "step": 122010 }, { "epoch": 802.7631578947369, "grad_norm": 1.678424596786499, "learning_rate": 0.0001, "loss": 0.0136, "step": 122020 }, { "epoch": 802.828947368421, "grad_norm": 1.8585132360458374, "learning_rate": 0.0001, "loss": 0.0119, "step": 122030 }, { "epoch": 802.8947368421053, "grad_norm": 1.605594515800476, "learning_rate": 0.0001, "loss": 0.0144, "step": 122040 }, { "epoch": 802.9605263157895, "grad_norm": 1.750853180885315, "learning_rate": 0.0001, "loss": 0.0146, "step": 122050 }, { "epoch": 803.0263157894736, "grad_norm": 1.248559832572937, "learning_rate": 0.0001, "loss": 0.0128, "step": 122060 }, { "epoch": 803.0921052631579, "grad_norm": 1.496037483215332, "learning_rate": 0.0001, "loss": 0.015, "step": 122070 }, { "epoch": 803.1578947368421, "grad_norm": 1.9985989332199097, "learning_rate": 0.0001, "loss": 0.0141, "step": 122080 }, { "epoch": 803.2236842105264, "grad_norm": 1.534085988998413, "learning_rate": 0.0001, "loss": 0.0112, "step": 122090 }, { "epoch": 803.2894736842105, "grad_norm": 1.3011153936386108, "learning_rate": 0.0001, "loss": 0.0169, "step": 122100 }, { "epoch": 803.3552631578947, "grad_norm": 1.2771066427230835, "learning_rate": 0.0001, "loss": 0.0139, "step": 122110 }, { "epoch": 803.421052631579, "grad_norm": 1.4699357748031616, "learning_rate": 0.0001, "loss": 0.0136, "step": 122120 }, { "epoch": 803.4868421052631, "grad_norm": 1.892216444015503, "learning_rate": 0.0001, "loss": 0.0117, "step": 122130 }, { "epoch": 803.5526315789474, "grad_norm": 1.5006910562515259, "learning_rate": 0.0001, "loss": 0.0134, "step": 122140 }, { "epoch": 803.6184210526316, "grad_norm": 1.876309871673584, "learning_rate": 0.0001, "loss": 0.0104, "step": 122150 }, { "epoch": 803.6842105263158, "grad_norm": 1.8818696737289429, "learning_rate": 0.0001, "loss": 0.0155, "step": 122160 }, { "epoch": 803.75, "grad_norm": 1.523165225982666, "learning_rate": 0.0001, "loss": 0.0131, "step": 122170 }, { "epoch": 803.8157894736842, "grad_norm": 1.5798101425170898, "learning_rate": 0.0001, "loss": 0.0171, "step": 122180 }, { "epoch": 803.8815789473684, "grad_norm": 1.7621138095855713, "learning_rate": 0.0001, "loss": 0.0114, "step": 122190 }, { "epoch": 803.9473684210526, "grad_norm": 1.6521201133728027, "learning_rate": 0.0001, "loss": 0.0123, "step": 122200 }, { "epoch": 804.0131578947369, "grad_norm": 1.422910213470459, "learning_rate": 0.0001, "loss": 0.0121, "step": 122210 }, { "epoch": 804.078947368421, "grad_norm": 1.5314068794250488, "learning_rate": 0.0001, "loss": 0.0157, "step": 122220 }, { "epoch": 804.1447368421053, "grad_norm": 1.5867077112197876, "learning_rate": 0.0001, "loss": 0.0106, "step": 122230 }, { "epoch": 804.2105263157895, "grad_norm": 1.837193489074707, "learning_rate": 0.0001, "loss": 0.0146, "step": 122240 }, { "epoch": 804.2763157894736, "grad_norm": 1.805098533630371, "learning_rate": 0.0001, "loss": 0.0157, "step": 122250 }, { "epoch": 804.3421052631579, "grad_norm": 1.5021076202392578, "learning_rate": 0.0001, "loss": 0.012, "step": 122260 }, { "epoch": 804.4078947368421, "grad_norm": 1.5989969968795776, "learning_rate": 0.0001, "loss": 0.0097, "step": 122270 }, { "epoch": 804.4736842105264, "grad_norm": 1.3750278949737549, "learning_rate": 0.0001, "loss": 0.0121, "step": 122280 }, { "epoch": 804.5394736842105, "grad_norm": 1.8596009016036987, "learning_rate": 0.0001, "loss": 0.0131, "step": 122290 }, { "epoch": 804.6052631578947, "grad_norm": 1.7272305488586426, "learning_rate": 0.0001, "loss": 0.0169, "step": 122300 }, { "epoch": 804.671052631579, "grad_norm": 1.741553783416748, "learning_rate": 0.0001, "loss": 0.0133, "step": 122310 }, { "epoch": 804.7368421052631, "grad_norm": 1.6373721361160278, "learning_rate": 0.0001, "loss": 0.0115, "step": 122320 }, { "epoch": 804.8026315789474, "grad_norm": 1.6189582347869873, "learning_rate": 0.0001, "loss": 0.0146, "step": 122330 }, { "epoch": 804.8684210526316, "grad_norm": 1.7007280588150024, "learning_rate": 0.0001, "loss": 0.0151, "step": 122340 }, { "epoch": 804.9342105263158, "grad_norm": 1.579875111579895, "learning_rate": 0.0001, "loss": 0.0118, "step": 122350 }, { "epoch": 805.0, "grad_norm": 1.273846983909607, "learning_rate": 0.0001, "loss": 0.0112, "step": 122360 }, { "epoch": 805.0657894736842, "grad_norm": 1.055403232574463, "learning_rate": 0.0001, "loss": 0.0136, "step": 122370 }, { "epoch": 805.1315789473684, "grad_norm": 1.5009870529174805, "learning_rate": 0.0001, "loss": 0.0135, "step": 122380 }, { "epoch": 805.1973684210526, "grad_norm": 1.6028974056243896, "learning_rate": 0.0001, "loss": 0.0131, "step": 122390 }, { "epoch": 805.2631578947369, "grad_norm": 1.4109632968902588, "learning_rate": 0.0001, "loss": 0.0139, "step": 122400 }, { "epoch": 805.328947368421, "grad_norm": 1.551780343055725, "learning_rate": 0.0001, "loss": 0.0146, "step": 122410 }, { "epoch": 805.3947368421053, "grad_norm": 1.6349411010742188, "learning_rate": 0.0001, "loss": 0.0101, "step": 122420 }, { "epoch": 805.4605263157895, "grad_norm": 1.448045253753662, "learning_rate": 0.0001, "loss": 0.0123, "step": 122430 }, { "epoch": 805.5263157894736, "grad_norm": 1.4954352378845215, "learning_rate": 0.0001, "loss": 0.0127, "step": 122440 }, { "epoch": 805.5921052631579, "grad_norm": 1.4194873571395874, "learning_rate": 0.0001, "loss": 0.0121, "step": 122450 }, { "epoch": 805.6578947368421, "grad_norm": 1.2949965000152588, "learning_rate": 0.0001, "loss": 0.0113, "step": 122460 }, { "epoch": 805.7236842105264, "grad_norm": 1.1385294198989868, "learning_rate": 0.0001, "loss": 0.014, "step": 122470 }, { "epoch": 805.7894736842105, "grad_norm": 1.3316209316253662, "learning_rate": 0.0001, "loss": 0.018, "step": 122480 }, { "epoch": 805.8552631578947, "grad_norm": 1.4538122415542603, "learning_rate": 0.0001, "loss": 0.0124, "step": 122490 }, { "epoch": 805.921052631579, "grad_norm": 1.2928369045257568, "learning_rate": 0.0001, "loss": 0.012, "step": 122500 }, { "epoch": 805.9868421052631, "grad_norm": 1.4555655717849731, "learning_rate": 0.0001, "loss": 0.0127, "step": 122510 }, { "epoch": 806.0526315789474, "grad_norm": 1.9406167268753052, "learning_rate": 0.0001, "loss": 0.0132, "step": 122520 }, { "epoch": 806.1184210526316, "grad_norm": 1.778332233428955, "learning_rate": 0.0001, "loss": 0.0106, "step": 122530 }, { "epoch": 806.1842105263158, "grad_norm": 1.7814651727676392, "learning_rate": 0.0001, "loss": 0.0151, "step": 122540 }, { "epoch": 806.25, "grad_norm": 1.934149146080017, "learning_rate": 0.0001, "loss": 0.0128, "step": 122550 }, { "epoch": 806.3157894736842, "grad_norm": 1.608755111694336, "learning_rate": 0.0001, "loss": 0.0187, "step": 122560 }, { "epoch": 806.3815789473684, "grad_norm": 1.5064572095870972, "learning_rate": 0.0001, "loss": 0.0164, "step": 122570 }, { "epoch": 806.4473684210526, "grad_norm": 1.3946229219436646, "learning_rate": 0.0001, "loss": 0.014, "step": 122580 }, { "epoch": 806.5131578947369, "grad_norm": 1.5488263368606567, "learning_rate": 0.0001, "loss": 0.0113, "step": 122590 }, { "epoch": 806.578947368421, "grad_norm": 1.3898130655288696, "learning_rate": 0.0001, "loss": 0.0123, "step": 122600 }, { "epoch": 806.6447368421053, "grad_norm": 1.6163887977600098, "learning_rate": 0.0001, "loss": 0.0118, "step": 122610 }, { "epoch": 806.7105263157895, "grad_norm": 1.2758994102478027, "learning_rate": 0.0001, "loss": 0.0139, "step": 122620 }, { "epoch": 806.7763157894736, "grad_norm": 1.5900753736495972, "learning_rate": 0.0001, "loss": 0.0134, "step": 122630 }, { "epoch": 806.8421052631579, "grad_norm": 1.6253362894058228, "learning_rate": 0.0001, "loss": 0.0121, "step": 122640 }, { "epoch": 806.9078947368421, "grad_norm": 1.4668854475021362, "learning_rate": 0.0001, "loss": 0.017, "step": 122650 }, { "epoch": 806.9736842105264, "grad_norm": 1.5827553272247314, "learning_rate": 0.0001, "loss": 0.0101, "step": 122660 }, { "epoch": 807.0394736842105, "grad_norm": 1.161897897720337, "learning_rate": 0.0001, "loss": 0.0119, "step": 122670 }, { "epoch": 807.1052631578947, "grad_norm": 1.3014202117919922, "learning_rate": 0.0001, "loss": 0.02, "step": 122680 }, { "epoch": 807.171052631579, "grad_norm": 1.5273128747940063, "learning_rate": 0.0001, "loss": 0.0124, "step": 122690 }, { "epoch": 807.2368421052631, "grad_norm": 1.3579182624816895, "learning_rate": 0.0001, "loss": 0.011, "step": 122700 }, { "epoch": 807.3026315789474, "grad_norm": 1.6524441242218018, "learning_rate": 0.0001, "loss": 0.0119, "step": 122710 }, { "epoch": 807.3684210526316, "grad_norm": 1.3726755380630493, "learning_rate": 0.0001, "loss": 0.0153, "step": 122720 }, { "epoch": 807.4342105263158, "grad_norm": 1.4497802257537842, "learning_rate": 0.0001, "loss": 0.0136, "step": 122730 }, { "epoch": 807.5, "grad_norm": 1.839576005935669, "learning_rate": 0.0001, "loss": 0.0143, "step": 122740 }, { "epoch": 807.5657894736842, "grad_norm": 1.6290465593338013, "learning_rate": 0.0001, "loss": 0.0105, "step": 122750 }, { "epoch": 807.6315789473684, "grad_norm": 1.4897361993789673, "learning_rate": 0.0001, "loss": 0.0112, "step": 122760 }, { "epoch": 807.6973684210526, "grad_norm": 1.1850417852401733, "learning_rate": 0.0001, "loss": 0.0129, "step": 122770 }, { "epoch": 807.7631578947369, "grad_norm": 1.677620768547058, "learning_rate": 0.0001, "loss": 0.016, "step": 122780 }, { "epoch": 807.828947368421, "grad_norm": 1.1877559423446655, "learning_rate": 0.0001, "loss": 0.0124, "step": 122790 }, { "epoch": 807.8947368421053, "grad_norm": 1.1389470100402832, "learning_rate": 0.0001, "loss": 0.015, "step": 122800 }, { "epoch": 807.9605263157895, "grad_norm": 1.298449158668518, "learning_rate": 0.0001, "loss": 0.0151, "step": 122810 }, { "epoch": 808.0263157894736, "grad_norm": 1.706453800201416, "learning_rate": 0.0001, "loss": 0.0158, "step": 122820 }, { "epoch": 808.0921052631579, "grad_norm": 1.5044939517974854, "learning_rate": 0.0001, "loss": 0.0146, "step": 122830 }, { "epoch": 808.1578947368421, "grad_norm": 1.544755458831787, "learning_rate": 0.0001, "loss": 0.0153, "step": 122840 }, { "epoch": 808.2236842105264, "grad_norm": 1.6098463535308838, "learning_rate": 0.0001, "loss": 0.01, "step": 122850 }, { "epoch": 808.2894736842105, "grad_norm": 1.3840903043746948, "learning_rate": 0.0001, "loss": 0.0149, "step": 122860 }, { "epoch": 808.3552631578947, "grad_norm": 1.5019302368164062, "learning_rate": 0.0001, "loss": 0.0103, "step": 122870 }, { "epoch": 808.421052631579, "grad_norm": 1.4467545747756958, "learning_rate": 0.0001, "loss": 0.0143, "step": 122880 }, { "epoch": 808.4868421052631, "grad_norm": 1.502623438835144, "learning_rate": 0.0001, "loss": 0.0115, "step": 122890 }, { "epoch": 808.5526315789474, "grad_norm": 1.3028827905654907, "learning_rate": 0.0001, "loss": 0.0158, "step": 122900 }, { "epoch": 808.6184210526316, "grad_norm": 1.2573002576828003, "learning_rate": 0.0001, "loss": 0.0148, "step": 122910 }, { "epoch": 808.6842105263158, "grad_norm": 1.4649755954742432, "learning_rate": 0.0001, "loss": 0.0138, "step": 122920 }, { "epoch": 808.75, "grad_norm": 1.5156586170196533, "learning_rate": 0.0001, "loss": 0.0107, "step": 122930 }, { "epoch": 808.8157894736842, "grad_norm": 1.6610045433044434, "learning_rate": 0.0001, "loss": 0.0115, "step": 122940 }, { "epoch": 808.8815789473684, "grad_norm": 1.707667350769043, "learning_rate": 0.0001, "loss": 0.0135, "step": 122950 }, { "epoch": 808.9473684210526, "grad_norm": 1.331605076789856, "learning_rate": 0.0001, "loss": 0.0135, "step": 122960 }, { "epoch": 809.0131578947369, "grad_norm": 0.9448249936103821, "learning_rate": 0.0001, "loss": 0.0146, "step": 122970 }, { "epoch": 809.078947368421, "grad_norm": 1.217427372932434, "learning_rate": 0.0001, "loss": 0.013, "step": 122980 }, { "epoch": 809.1447368421053, "grad_norm": 1.3259165287017822, "learning_rate": 0.0001, "loss": 0.0149, "step": 122990 }, { "epoch": 809.2105263157895, "grad_norm": 1.3902010917663574, "learning_rate": 0.0001, "loss": 0.0125, "step": 123000 }, { "epoch": 809.2763157894736, "grad_norm": 1.4511394500732422, "learning_rate": 0.0001, "loss": 0.0142, "step": 123010 }, { "epoch": 809.3421052631579, "grad_norm": 1.437680721282959, "learning_rate": 0.0001, "loss": 0.0128, "step": 123020 }, { "epoch": 809.4078947368421, "grad_norm": 1.482677698135376, "learning_rate": 0.0001, "loss": 0.0146, "step": 123030 }, { "epoch": 809.4736842105264, "grad_norm": 1.4556918144226074, "learning_rate": 0.0001, "loss": 0.0124, "step": 123040 }, { "epoch": 809.5394736842105, "grad_norm": 1.1347599029541016, "learning_rate": 0.0001, "loss": 0.0145, "step": 123050 }, { "epoch": 809.6052631578947, "grad_norm": 1.5475976467132568, "learning_rate": 0.0001, "loss": 0.021, "step": 123060 }, { "epoch": 809.671052631579, "grad_norm": 1.4841437339782715, "learning_rate": 0.0001, "loss": 0.0102, "step": 123070 }, { "epoch": 809.7368421052631, "grad_norm": 1.3547031879425049, "learning_rate": 0.0001, "loss": 0.0112, "step": 123080 }, { "epoch": 809.8026315789474, "grad_norm": 1.5971310138702393, "learning_rate": 0.0001, "loss": 0.0109, "step": 123090 }, { "epoch": 809.8684210526316, "grad_norm": 1.092132806777954, "learning_rate": 0.0001, "loss": 0.0132, "step": 123100 }, { "epoch": 809.9342105263158, "grad_norm": 1.5251516103744507, "learning_rate": 0.0001, "loss": 0.0133, "step": 123110 }, { "epoch": 810.0, "grad_norm": 1.6991788148880005, "learning_rate": 0.0001, "loss": 0.0128, "step": 123120 }, { "epoch": 810.0657894736842, "grad_norm": 1.2018015384674072, "learning_rate": 0.0001, "loss": 0.0129, "step": 123130 }, { "epoch": 810.1315789473684, "grad_norm": 1.495795488357544, "learning_rate": 0.0001, "loss": 0.0118, "step": 123140 }, { "epoch": 810.1973684210526, "grad_norm": 1.453340768814087, "learning_rate": 0.0001, "loss": 0.0181, "step": 123150 }, { "epoch": 810.2631578947369, "grad_norm": 1.1267286539077759, "learning_rate": 0.0001, "loss": 0.0122, "step": 123160 }, { "epoch": 810.328947368421, "grad_norm": 1.2990244626998901, "learning_rate": 0.0001, "loss": 0.0156, "step": 123170 }, { "epoch": 810.3947368421053, "grad_norm": 1.1421456336975098, "learning_rate": 0.0001, "loss": 0.0135, "step": 123180 }, { "epoch": 810.4605263157895, "grad_norm": 1.6005806922912598, "learning_rate": 0.0001, "loss": 0.0135, "step": 123190 }, { "epoch": 810.5263157894736, "grad_norm": 1.1412304639816284, "learning_rate": 0.0001, "loss": 0.0163, "step": 123200 }, { "epoch": 810.5921052631579, "grad_norm": 1.2157909870147705, "learning_rate": 0.0001, "loss": 0.0108, "step": 123210 }, { "epoch": 810.6578947368421, "grad_norm": 1.3971120119094849, "learning_rate": 0.0001, "loss": 0.011, "step": 123220 }, { "epoch": 810.7236842105264, "grad_norm": 1.620534896850586, "learning_rate": 0.0001, "loss": 0.0127, "step": 123230 }, { "epoch": 810.7894736842105, "grad_norm": 1.4219770431518555, "learning_rate": 0.0001, "loss": 0.0154, "step": 123240 }, { "epoch": 810.8552631578947, "grad_norm": 1.315970540046692, "learning_rate": 0.0001, "loss": 0.0138, "step": 123250 }, { "epoch": 810.921052631579, "grad_norm": 1.001400351524353, "learning_rate": 0.0001, "loss": 0.0173, "step": 123260 }, { "epoch": 810.9868421052631, "grad_norm": 1.5889734029769897, "learning_rate": 0.0001, "loss": 0.0103, "step": 123270 }, { "epoch": 811.0526315789474, "grad_norm": 1.4716804027557373, "learning_rate": 0.0001, "loss": 0.0097, "step": 123280 }, { "epoch": 811.1184210526316, "grad_norm": 1.3012664318084717, "learning_rate": 0.0001, "loss": 0.0151, "step": 123290 }, { "epoch": 811.1842105263158, "grad_norm": 1.3399295806884766, "learning_rate": 0.0001, "loss": 0.0147, "step": 123300 }, { "epoch": 811.25, "grad_norm": 1.7201900482177734, "learning_rate": 0.0001, "loss": 0.0131, "step": 123310 }, { "epoch": 811.3157894736842, "grad_norm": 1.6437351703643799, "learning_rate": 0.0001, "loss": 0.0136, "step": 123320 }, { "epoch": 811.3815789473684, "grad_norm": 1.4422487020492554, "learning_rate": 0.0001, "loss": 0.0106, "step": 123330 }, { "epoch": 811.4473684210526, "grad_norm": 1.6639245748519897, "learning_rate": 0.0001, "loss": 0.0127, "step": 123340 }, { "epoch": 811.5131578947369, "grad_norm": 1.668827772140503, "learning_rate": 0.0001, "loss": 0.017, "step": 123350 }, { "epoch": 811.578947368421, "grad_norm": 1.1895521879196167, "learning_rate": 0.0001, "loss": 0.0184, "step": 123360 }, { "epoch": 811.6447368421053, "grad_norm": 1.4905343055725098, "learning_rate": 0.0001, "loss": 0.0135, "step": 123370 }, { "epoch": 811.7105263157895, "grad_norm": 1.9324344396591187, "learning_rate": 0.0001, "loss": 0.0141, "step": 123380 }, { "epoch": 811.7763157894736, "grad_norm": 1.333522081375122, "learning_rate": 0.0001, "loss": 0.0101, "step": 123390 }, { "epoch": 811.8421052631579, "grad_norm": 1.2918840646743774, "learning_rate": 0.0001, "loss": 0.0138, "step": 123400 }, { "epoch": 811.9078947368421, "grad_norm": 1.7766027450561523, "learning_rate": 0.0001, "loss": 0.0143, "step": 123410 }, { "epoch": 811.9736842105264, "grad_norm": 1.883455753326416, "learning_rate": 0.0001, "loss": 0.0115, "step": 123420 }, { "epoch": 812.0394736842105, "grad_norm": 1.1598883867263794, "learning_rate": 0.0001, "loss": 0.013, "step": 123430 }, { "epoch": 812.1052631578947, "grad_norm": 0.9884587526321411, "learning_rate": 0.0001, "loss": 0.011, "step": 123440 }, { "epoch": 812.171052631579, "grad_norm": 1.1884453296661377, "learning_rate": 0.0001, "loss": 0.0116, "step": 123450 }, { "epoch": 812.2368421052631, "grad_norm": 0.9840675592422485, "learning_rate": 0.0001, "loss": 0.0117, "step": 123460 }, { "epoch": 812.3026315789474, "grad_norm": 1.5120786428451538, "learning_rate": 0.0001, "loss": 0.011, "step": 123470 }, { "epoch": 812.3684210526316, "grad_norm": 1.7665530443191528, "learning_rate": 0.0001, "loss": 0.01, "step": 123480 }, { "epoch": 812.4342105263158, "grad_norm": 1.421465277671814, "learning_rate": 0.0001, "loss": 0.012, "step": 123490 }, { "epoch": 812.5, "grad_norm": 1.4380699396133423, "learning_rate": 0.0001, "loss": 0.0154, "step": 123500 }, { "epoch": 812.5657894736842, "grad_norm": 1.18253755569458, "learning_rate": 0.0001, "loss": 0.0131, "step": 123510 }, { "epoch": 812.6315789473684, "grad_norm": 1.369807481765747, "learning_rate": 0.0001, "loss": 0.0134, "step": 123520 }, { "epoch": 812.6973684210526, "grad_norm": 1.397146463394165, "learning_rate": 0.0001, "loss": 0.0183, "step": 123530 }, { "epoch": 812.7631578947369, "grad_norm": 1.4766913652420044, "learning_rate": 0.0001, "loss": 0.0152, "step": 123540 }, { "epoch": 812.828947368421, "grad_norm": 1.2890446186065674, "learning_rate": 0.0001, "loss": 0.0153, "step": 123550 }, { "epoch": 812.8947368421053, "grad_norm": 1.6909613609313965, "learning_rate": 0.0001, "loss": 0.0129, "step": 123560 }, { "epoch": 812.9605263157895, "grad_norm": 1.5543546676635742, "learning_rate": 0.0001, "loss": 0.016, "step": 123570 }, { "epoch": 813.0263157894736, "grad_norm": 1.6772395372390747, "learning_rate": 0.0001, "loss": 0.0142, "step": 123580 }, { "epoch": 813.0921052631579, "grad_norm": 1.3875895738601685, "learning_rate": 0.0001, "loss": 0.0205, "step": 123590 }, { "epoch": 813.1578947368421, "grad_norm": 1.0887559652328491, "learning_rate": 0.0001, "loss": 0.012, "step": 123600 }, { "epoch": 813.2236842105264, "grad_norm": 1.3804377317428589, "learning_rate": 0.0001, "loss": 0.012, "step": 123610 }, { "epoch": 813.2894736842105, "grad_norm": 1.5113122463226318, "learning_rate": 0.0001, "loss": 0.0123, "step": 123620 }, { "epoch": 813.3552631578947, "grad_norm": 1.8751221895217896, "learning_rate": 0.0001, "loss": 0.0134, "step": 123630 }, { "epoch": 813.421052631579, "grad_norm": 1.1164411306381226, "learning_rate": 0.0001, "loss": 0.0117, "step": 123640 }, { "epoch": 813.4868421052631, "grad_norm": 1.3100202083587646, "learning_rate": 0.0001, "loss": 0.0128, "step": 123650 }, { "epoch": 813.5526315789474, "grad_norm": 1.5206339359283447, "learning_rate": 0.0001, "loss": 0.0127, "step": 123660 }, { "epoch": 813.6184210526316, "grad_norm": 1.335155963897705, "learning_rate": 0.0001, "loss": 0.0116, "step": 123670 }, { "epoch": 813.6842105263158, "grad_norm": 1.4162037372589111, "learning_rate": 0.0001, "loss": 0.0121, "step": 123680 }, { "epoch": 813.75, "grad_norm": 1.4964121580123901, "learning_rate": 0.0001, "loss": 0.0101, "step": 123690 }, { "epoch": 813.8157894736842, "grad_norm": 1.5574005842208862, "learning_rate": 0.0001, "loss": 0.018, "step": 123700 }, { "epoch": 813.8815789473684, "grad_norm": 1.7463111877441406, "learning_rate": 0.0001, "loss": 0.0124, "step": 123710 }, { "epoch": 813.9473684210526, "grad_norm": 1.3428428173065186, "learning_rate": 0.0001, "loss": 0.0119, "step": 123720 }, { "epoch": 814.0131578947369, "grad_norm": 1.179625153541565, "learning_rate": 0.0001, "loss": 0.0188, "step": 123730 }, { "epoch": 814.078947368421, "grad_norm": 1.3428826332092285, "learning_rate": 0.0001, "loss": 0.0102, "step": 123740 }, { "epoch": 814.1447368421053, "grad_norm": 1.316699504852295, "learning_rate": 0.0001, "loss": 0.0128, "step": 123750 }, { "epoch": 814.2105263157895, "grad_norm": 1.6361804008483887, "learning_rate": 0.0001, "loss": 0.014, "step": 123760 }, { "epoch": 814.2763157894736, "grad_norm": 1.729664921760559, "learning_rate": 0.0001, "loss": 0.0124, "step": 123770 }, { "epoch": 814.3421052631579, "grad_norm": 1.260054111480713, "learning_rate": 0.0001, "loss": 0.0121, "step": 123780 }, { "epoch": 814.4078947368421, "grad_norm": 1.888249397277832, "learning_rate": 0.0001, "loss": 0.0128, "step": 123790 }, { "epoch": 814.4736842105264, "grad_norm": 1.5622467994689941, "learning_rate": 0.0001, "loss": 0.0129, "step": 123800 }, { "epoch": 814.5394736842105, "grad_norm": 1.3768634796142578, "learning_rate": 0.0001, "loss": 0.016, "step": 123810 }, { "epoch": 814.6052631578947, "grad_norm": 1.3266175985336304, "learning_rate": 0.0001, "loss": 0.0109, "step": 123820 }, { "epoch": 814.671052631579, "grad_norm": 1.2891241312026978, "learning_rate": 0.0001, "loss": 0.0144, "step": 123830 }, { "epoch": 814.7368421052631, "grad_norm": 1.5345336198806763, "learning_rate": 0.0001, "loss": 0.0105, "step": 123840 }, { "epoch": 814.8026315789474, "grad_norm": 1.0594104528427124, "learning_rate": 0.0001, "loss": 0.0144, "step": 123850 }, { "epoch": 814.8684210526316, "grad_norm": 1.235940933227539, "learning_rate": 0.0001, "loss": 0.0165, "step": 123860 }, { "epoch": 814.9342105263158, "grad_norm": 1.5864571332931519, "learning_rate": 0.0001, "loss": 0.0132, "step": 123870 }, { "epoch": 815.0, "grad_norm": 1.6743767261505127, "learning_rate": 0.0001, "loss": 0.013, "step": 123880 }, { "epoch": 815.0657894736842, "grad_norm": 1.3686590194702148, "learning_rate": 0.0001, "loss": 0.0146, "step": 123890 }, { "epoch": 815.1315789473684, "grad_norm": 1.472387433052063, "learning_rate": 0.0001, "loss": 0.018, "step": 123900 }, { "epoch": 815.1973684210526, "grad_norm": 1.6713223457336426, "learning_rate": 0.0001, "loss": 0.0103, "step": 123910 }, { "epoch": 815.2631578947369, "grad_norm": 1.2791894674301147, "learning_rate": 0.0001, "loss": 0.0103, "step": 123920 }, { "epoch": 815.328947368421, "grad_norm": 1.106225609779358, "learning_rate": 0.0001, "loss": 0.0097, "step": 123930 }, { "epoch": 815.3947368421053, "grad_norm": 1.4285918474197388, "learning_rate": 0.0001, "loss": 0.0154, "step": 123940 }, { "epoch": 815.4605263157895, "grad_norm": 1.278631329536438, "learning_rate": 0.0001, "loss": 0.0177, "step": 123950 }, { "epoch": 815.5263157894736, "grad_norm": 1.6157718896865845, "learning_rate": 0.0001, "loss": 0.0149, "step": 123960 }, { "epoch": 815.5921052631579, "grad_norm": 0.9605376124382019, "learning_rate": 0.0001, "loss": 0.0103, "step": 123970 }, { "epoch": 815.6578947368421, "grad_norm": 1.52862548828125, "learning_rate": 0.0001, "loss": 0.01, "step": 123980 }, { "epoch": 815.7236842105264, "grad_norm": 1.424532413482666, "learning_rate": 0.0001, "loss": 0.0118, "step": 123990 }, { "epoch": 815.7894736842105, "grad_norm": 1.7877817153930664, "learning_rate": 0.0001, "loss": 0.0148, "step": 124000 }, { "epoch": 815.8552631578947, "grad_norm": 1.3533092737197876, "learning_rate": 0.0001, "loss": 0.0111, "step": 124010 }, { "epoch": 815.921052631579, "grad_norm": 1.262206792831421, "learning_rate": 0.0001, "loss": 0.0123, "step": 124020 }, { "epoch": 815.9868421052631, "grad_norm": 1.3557440042495728, "learning_rate": 0.0001, "loss": 0.0149, "step": 124030 }, { "epoch": 816.0526315789474, "grad_norm": 1.2340757846832275, "learning_rate": 0.0001, "loss": 0.0135, "step": 124040 }, { "epoch": 816.1184210526316, "grad_norm": 1.5123295783996582, "learning_rate": 0.0001, "loss": 0.013, "step": 124050 }, { "epoch": 816.1842105263158, "grad_norm": 2.4984140396118164, "learning_rate": 0.0001, "loss": 0.0173, "step": 124060 }, { "epoch": 816.25, "grad_norm": 1.4784804582595825, "learning_rate": 0.0001, "loss": 0.0128, "step": 124070 }, { "epoch": 816.3157894736842, "grad_norm": 2.0611233711242676, "learning_rate": 0.0001, "loss": 0.0119, "step": 124080 }, { "epoch": 816.3815789473684, "grad_norm": 1.7000569105148315, "learning_rate": 0.0001, "loss": 0.0136, "step": 124090 }, { "epoch": 816.4473684210526, "grad_norm": 1.3164621591567993, "learning_rate": 0.0001, "loss": 0.0155, "step": 124100 }, { "epoch": 816.5131578947369, "grad_norm": 1.564527153968811, "learning_rate": 0.0001, "loss": 0.0116, "step": 124110 }, { "epoch": 816.578947368421, "grad_norm": 1.367215633392334, "learning_rate": 0.0001, "loss": 0.0157, "step": 124120 }, { "epoch": 816.6447368421053, "grad_norm": 1.6285529136657715, "learning_rate": 0.0001, "loss": 0.0116, "step": 124130 }, { "epoch": 816.7105263157895, "grad_norm": 1.4278161525726318, "learning_rate": 0.0001, "loss": 0.0161, "step": 124140 }, { "epoch": 816.7763157894736, "grad_norm": 1.2603267431259155, "learning_rate": 0.0001, "loss": 0.0109, "step": 124150 }, { "epoch": 816.8421052631579, "grad_norm": 1.4556243419647217, "learning_rate": 0.0001, "loss": 0.0158, "step": 124160 }, { "epoch": 816.9078947368421, "grad_norm": 1.4518085718154907, "learning_rate": 0.0001, "loss": 0.0112, "step": 124170 }, { "epoch": 816.9736842105264, "grad_norm": 1.4208263158798218, "learning_rate": 0.0001, "loss": 0.0133, "step": 124180 }, { "epoch": 817.0394736842105, "grad_norm": 1.4962078332901, "learning_rate": 0.0001, "loss": 0.0118, "step": 124190 }, { "epoch": 817.1052631578947, "grad_norm": 1.3275837898254395, "learning_rate": 0.0001, "loss": 0.0101, "step": 124200 }, { "epoch": 817.171052631579, "grad_norm": 1.704427719116211, "learning_rate": 0.0001, "loss": 0.0132, "step": 124210 }, { "epoch": 817.2368421052631, "grad_norm": 1.1857249736785889, "learning_rate": 0.0001, "loss": 0.0114, "step": 124220 }, { "epoch": 817.3026315789474, "grad_norm": 1.4755723476409912, "learning_rate": 0.0001, "loss": 0.01, "step": 124230 }, { "epoch": 817.3684210526316, "grad_norm": 1.332998514175415, "learning_rate": 0.0001, "loss": 0.0143, "step": 124240 }, { "epoch": 817.4342105263158, "grad_norm": 1.0526789426803589, "learning_rate": 0.0001, "loss": 0.011, "step": 124250 }, { "epoch": 817.5, "grad_norm": 1.3118743896484375, "learning_rate": 0.0001, "loss": 0.0127, "step": 124260 }, { "epoch": 817.5657894736842, "grad_norm": 1.592718243598938, "learning_rate": 0.0001, "loss": 0.0126, "step": 124270 }, { "epoch": 817.6315789473684, "grad_norm": 1.2183703184127808, "learning_rate": 0.0001, "loss": 0.0125, "step": 124280 }, { "epoch": 817.6973684210526, "grad_norm": 1.0068892240524292, "learning_rate": 0.0001, "loss": 0.0139, "step": 124290 }, { "epoch": 817.7631578947369, "grad_norm": 1.3951318264007568, "learning_rate": 0.0001, "loss": 0.0153, "step": 124300 }, { "epoch": 817.828947368421, "grad_norm": 1.5101627111434937, "learning_rate": 0.0001, "loss": 0.0117, "step": 124310 }, { "epoch": 817.8947368421053, "grad_norm": 1.348921298980713, "learning_rate": 0.0001, "loss": 0.0175, "step": 124320 }, { "epoch": 817.9605263157895, "grad_norm": 1.3027921915054321, "learning_rate": 0.0001, "loss": 0.0147, "step": 124330 }, { "epoch": 818.0263157894736, "grad_norm": 1.372183918952942, "learning_rate": 0.0001, "loss": 0.0179, "step": 124340 }, { "epoch": 818.0921052631579, "grad_norm": 1.099475383758545, "learning_rate": 0.0001, "loss": 0.0111, "step": 124350 }, { "epoch": 818.1578947368421, "grad_norm": 1.2691444158554077, "learning_rate": 0.0001, "loss": 0.0115, "step": 124360 }, { "epoch": 818.2236842105264, "grad_norm": 1.2957741022109985, "learning_rate": 0.0001, "loss": 0.0174, "step": 124370 }, { "epoch": 818.2894736842105, "grad_norm": 1.4952759742736816, "learning_rate": 0.0001, "loss": 0.0147, "step": 124380 }, { "epoch": 818.3552631578947, "grad_norm": 1.266737699508667, "learning_rate": 0.0001, "loss": 0.0118, "step": 124390 }, { "epoch": 818.421052631579, "grad_norm": 1.4093947410583496, "learning_rate": 0.0001, "loss": 0.0097, "step": 124400 }, { "epoch": 818.4868421052631, "grad_norm": 1.651901364326477, "learning_rate": 0.0001, "loss": 0.0157, "step": 124410 }, { "epoch": 818.5526315789474, "grad_norm": 1.509814739227295, "learning_rate": 0.0001, "loss": 0.0169, "step": 124420 }, { "epoch": 818.6184210526316, "grad_norm": 1.0951045751571655, "learning_rate": 0.0001, "loss": 0.0103, "step": 124430 }, { "epoch": 818.6842105263158, "grad_norm": 1.122619867324829, "learning_rate": 0.0001, "loss": 0.015, "step": 124440 }, { "epoch": 818.75, "grad_norm": 1.5176372528076172, "learning_rate": 0.0001, "loss": 0.0158, "step": 124450 }, { "epoch": 818.8157894736842, "grad_norm": 1.2236008644104004, "learning_rate": 0.0001, "loss": 0.0113, "step": 124460 }, { "epoch": 818.8815789473684, "grad_norm": 1.527068853378296, "learning_rate": 0.0001, "loss": 0.0128, "step": 124470 }, { "epoch": 818.9473684210526, "grad_norm": 1.2921003103256226, "learning_rate": 0.0001, "loss": 0.0151, "step": 124480 }, { "epoch": 819.0131578947369, "grad_norm": 1.278098702430725, "learning_rate": 0.0001, "loss": 0.0133, "step": 124490 }, { "epoch": 819.078947368421, "grad_norm": 1.6436222791671753, "learning_rate": 0.0001, "loss": 0.0153, "step": 124500 }, { "epoch": 819.1447368421053, "grad_norm": 1.6532222032546997, "learning_rate": 0.0001, "loss": 0.0144, "step": 124510 }, { "epoch": 819.2105263157895, "grad_norm": 1.7807610034942627, "learning_rate": 0.0001, "loss": 0.0112, "step": 124520 }, { "epoch": 819.2763157894736, "grad_norm": 1.4658104181289673, "learning_rate": 0.0001, "loss": 0.0141, "step": 124530 }, { "epoch": 819.3421052631579, "grad_norm": 1.693515419960022, "learning_rate": 0.0001, "loss": 0.0139, "step": 124540 }, { "epoch": 819.4078947368421, "grad_norm": 1.93189537525177, "learning_rate": 0.0001, "loss": 0.0114, "step": 124550 }, { "epoch": 819.4736842105264, "grad_norm": 1.265369176864624, "learning_rate": 0.0001, "loss": 0.0114, "step": 124560 }, { "epoch": 819.5394736842105, "grad_norm": 1.06802499294281, "learning_rate": 0.0001, "loss": 0.014, "step": 124570 }, { "epoch": 819.6052631578947, "grad_norm": 1.177432656288147, "learning_rate": 0.0001, "loss": 0.0098, "step": 124580 }, { "epoch": 819.671052631579, "grad_norm": 1.5847574472427368, "learning_rate": 0.0001, "loss": 0.0159, "step": 124590 }, { "epoch": 819.7368421052631, "grad_norm": 1.4850506782531738, "learning_rate": 0.0001, "loss": 0.0129, "step": 124600 }, { "epoch": 819.8026315789474, "grad_norm": 1.2084038257598877, "learning_rate": 0.0001, "loss": 0.0118, "step": 124610 }, { "epoch": 819.8684210526316, "grad_norm": 1.4740686416625977, "learning_rate": 0.0001, "loss": 0.0131, "step": 124620 }, { "epoch": 819.9342105263158, "grad_norm": 1.42989182472229, "learning_rate": 0.0001, "loss": 0.017, "step": 124630 }, { "epoch": 820.0, "grad_norm": 1.6298736333847046, "learning_rate": 0.0001, "loss": 0.0118, "step": 124640 }, { "epoch": 820.0657894736842, "grad_norm": 1.7024978399276733, "learning_rate": 0.0001, "loss": 0.0141, "step": 124650 }, { "epoch": 820.1315789473684, "grad_norm": 1.5138508081436157, "learning_rate": 0.0001, "loss": 0.0108, "step": 124660 }, { "epoch": 820.1973684210526, "grad_norm": 1.6068187952041626, "learning_rate": 0.0001, "loss": 0.0125, "step": 124670 }, { "epoch": 820.2631578947369, "grad_norm": 1.4278866052627563, "learning_rate": 0.0001, "loss": 0.0132, "step": 124680 }, { "epoch": 820.328947368421, "grad_norm": 1.7599352598190308, "learning_rate": 0.0001, "loss": 0.0123, "step": 124690 }, { "epoch": 820.3947368421053, "grad_norm": 1.5844075679779053, "learning_rate": 0.0001, "loss": 0.0128, "step": 124700 }, { "epoch": 820.4605263157895, "grad_norm": 1.8276933431625366, "learning_rate": 0.0001, "loss": 0.0147, "step": 124710 }, { "epoch": 820.5263157894736, "grad_norm": 1.2808748483657837, "learning_rate": 0.0001, "loss": 0.0108, "step": 124720 }, { "epoch": 820.5921052631579, "grad_norm": 1.846043586730957, "learning_rate": 0.0001, "loss": 0.0099, "step": 124730 }, { "epoch": 820.6578947368421, "grad_norm": 1.7982409000396729, "learning_rate": 0.0001, "loss": 0.0119, "step": 124740 }, { "epoch": 820.7236842105264, "grad_norm": 1.1384165287017822, "learning_rate": 0.0001, "loss": 0.0161, "step": 124750 }, { "epoch": 820.7894736842105, "grad_norm": 1.0312650203704834, "learning_rate": 0.0001, "loss": 0.014, "step": 124760 }, { "epoch": 820.8552631578947, "grad_norm": 1.3372516632080078, "learning_rate": 0.0001, "loss": 0.0136, "step": 124770 }, { "epoch": 820.921052631579, "grad_norm": 1.3835489749908447, "learning_rate": 0.0001, "loss": 0.0145, "step": 124780 }, { "epoch": 820.9868421052631, "grad_norm": 1.1873949766159058, "learning_rate": 0.0001, "loss": 0.0135, "step": 124790 }, { "epoch": 821.0526315789474, "grad_norm": 1.2423219680786133, "learning_rate": 0.0001, "loss": 0.0132, "step": 124800 }, { "epoch": 821.1184210526316, "grad_norm": 1.6452012062072754, "learning_rate": 0.0001, "loss": 0.0183, "step": 124810 }, { "epoch": 821.1842105263158, "grad_norm": 2.356687307357788, "learning_rate": 0.0001, "loss": 0.0176, "step": 124820 }, { "epoch": 821.25, "grad_norm": 3.831728219985962, "learning_rate": 0.0001, "loss": 0.0152, "step": 124830 }, { "epoch": 821.3157894736842, "grad_norm": 2.0094165802001953, "learning_rate": 0.0001, "loss": 0.013, "step": 124840 }, { "epoch": 821.3815789473684, "grad_norm": 1.9658054113388062, "learning_rate": 0.0001, "loss": 0.0098, "step": 124850 }, { "epoch": 821.4473684210526, "grad_norm": 1.7766392230987549, "learning_rate": 0.0001, "loss": 0.0138, "step": 124860 }, { "epoch": 821.5131578947369, "grad_norm": 1.928013563156128, "learning_rate": 0.0001, "loss": 0.0106, "step": 124870 }, { "epoch": 821.578947368421, "grad_norm": 1.7001979351043701, "learning_rate": 0.0001, "loss": 0.0097, "step": 124880 }, { "epoch": 821.6447368421053, "grad_norm": 1.6963469982147217, "learning_rate": 0.0001, "loss": 0.0141, "step": 124890 }, { "epoch": 821.7105263157895, "grad_norm": 1.0758428573608398, "learning_rate": 0.0001, "loss": 0.0092, "step": 124900 }, { "epoch": 821.7763157894736, "grad_norm": 1.2885286808013916, "learning_rate": 0.0001, "loss": 0.0092, "step": 124910 }, { "epoch": 821.8421052631579, "grad_norm": 2.0798144340515137, "learning_rate": 0.0001, "loss": 0.0113, "step": 124920 }, { "epoch": 821.9078947368421, "grad_norm": 1.9685728549957275, "learning_rate": 0.0001, "loss": 0.0131, "step": 124930 }, { "epoch": 821.9736842105264, "grad_norm": 1.5766602754592896, "learning_rate": 0.0001, "loss": 0.0149, "step": 124940 }, { "epoch": 822.0394736842105, "grad_norm": 1.439321756362915, "learning_rate": 0.0001, "loss": 0.0189, "step": 124950 }, { "epoch": 822.1052631578947, "grad_norm": 1.6965820789337158, "learning_rate": 0.0001, "loss": 0.0161, "step": 124960 }, { "epoch": 822.171052631579, "grad_norm": 1.4412232637405396, "learning_rate": 0.0001, "loss": 0.0112, "step": 124970 }, { "epoch": 822.2368421052631, "grad_norm": 1.1754169464111328, "learning_rate": 0.0001, "loss": 0.0116, "step": 124980 }, { "epoch": 822.3026315789474, "grad_norm": 1.5828094482421875, "learning_rate": 0.0001, "loss": 0.0147, "step": 124990 }, { "epoch": 822.3684210526316, "grad_norm": 1.82345449924469, "learning_rate": 0.0001, "loss": 0.0143, "step": 125000 }, { "epoch": 822.4342105263158, "grad_norm": 1.4697341918945312, "learning_rate": 0.0001, "loss": 0.0166, "step": 125010 }, { "epoch": 822.5, "grad_norm": 1.40639328956604, "learning_rate": 0.0001, "loss": 0.0108, "step": 125020 }, { "epoch": 822.5657894736842, "grad_norm": 1.2358158826828003, "learning_rate": 0.0001, "loss": 0.0169, "step": 125030 }, { "epoch": 822.6315789473684, "grad_norm": 1.3646806478500366, "learning_rate": 0.0001, "loss": 0.0099, "step": 125040 }, { "epoch": 822.6973684210526, "grad_norm": 1.4826267957687378, "learning_rate": 0.0001, "loss": 0.0139, "step": 125050 }, { "epoch": 822.7631578947369, "grad_norm": 1.3981484174728394, "learning_rate": 0.0001, "loss": 0.0126, "step": 125060 }, { "epoch": 822.828947368421, "grad_norm": 1.1145609617233276, "learning_rate": 0.0001, "loss": 0.0093, "step": 125070 }, { "epoch": 822.8947368421053, "grad_norm": 1.283082127571106, "learning_rate": 0.0001, "loss": 0.0136, "step": 125080 }, { "epoch": 822.9605263157895, "grad_norm": 1.4199542999267578, "learning_rate": 0.0001, "loss": 0.0117, "step": 125090 }, { "epoch": 823.0263157894736, "grad_norm": 1.2435425519943237, "learning_rate": 0.0001, "loss": 0.0148, "step": 125100 }, { "epoch": 823.0921052631579, "grad_norm": 1.0997811555862427, "learning_rate": 0.0001, "loss": 0.0131, "step": 125110 }, { "epoch": 823.1578947368421, "grad_norm": 1.4171661138534546, "learning_rate": 0.0001, "loss": 0.0124, "step": 125120 }, { "epoch": 823.2236842105264, "grad_norm": 1.0490080118179321, "learning_rate": 0.0001, "loss": 0.0141, "step": 125130 }, { "epoch": 823.2894736842105, "grad_norm": 1.1390573978424072, "learning_rate": 0.0001, "loss": 0.0145, "step": 125140 }, { "epoch": 823.3552631578947, "grad_norm": 1.2962349653244019, "learning_rate": 0.0001, "loss": 0.0144, "step": 125150 }, { "epoch": 823.421052631579, "grad_norm": 1.1341630220413208, "learning_rate": 0.0001, "loss": 0.015, "step": 125160 }, { "epoch": 823.4868421052631, "grad_norm": 1.282665729522705, "learning_rate": 0.0001, "loss": 0.0113, "step": 125170 }, { "epoch": 823.5526315789474, "grad_norm": 1.3513184785842896, "learning_rate": 0.0001, "loss": 0.0112, "step": 125180 }, { "epoch": 823.6184210526316, "grad_norm": 1.458191990852356, "learning_rate": 0.0001, "loss": 0.0119, "step": 125190 }, { "epoch": 823.6842105263158, "grad_norm": 1.4826985597610474, "learning_rate": 0.0001, "loss": 0.0116, "step": 125200 }, { "epoch": 823.75, "grad_norm": 1.4005696773529053, "learning_rate": 0.0001, "loss": 0.0173, "step": 125210 }, { "epoch": 823.8157894736842, "grad_norm": 1.7395379543304443, "learning_rate": 0.0001, "loss": 0.0106, "step": 125220 }, { "epoch": 823.8815789473684, "grad_norm": 1.4209774732589722, "learning_rate": 0.0001, "loss": 0.012, "step": 125230 }, { "epoch": 823.9473684210526, "grad_norm": 1.6667505502700806, "learning_rate": 0.0001, "loss": 0.0173, "step": 125240 }, { "epoch": 824.0131578947369, "grad_norm": 1.6887600421905518, "learning_rate": 0.0001, "loss": 0.012, "step": 125250 }, { "epoch": 824.078947368421, "grad_norm": 2.0479888916015625, "learning_rate": 0.0001, "loss": 0.0116, "step": 125260 }, { "epoch": 824.1447368421053, "grad_norm": 1.5899707078933716, "learning_rate": 0.0001, "loss": 0.0156, "step": 125270 }, { "epoch": 824.2105263157895, "grad_norm": 1.4917328357696533, "learning_rate": 0.0001, "loss": 0.0103, "step": 125280 }, { "epoch": 824.2763157894736, "grad_norm": 1.766676664352417, "learning_rate": 0.0001, "loss": 0.011, "step": 125290 }, { "epoch": 824.3421052631579, "grad_norm": 1.3923630714416504, "learning_rate": 0.0001, "loss": 0.0113, "step": 125300 }, { "epoch": 824.4078947368421, "grad_norm": 1.595533013343811, "learning_rate": 0.0001, "loss": 0.0116, "step": 125310 }, { "epoch": 824.4736842105264, "grad_norm": 1.318145990371704, "learning_rate": 0.0001, "loss": 0.0093, "step": 125320 }, { "epoch": 824.5394736842105, "grad_norm": 1.5360060930252075, "learning_rate": 0.0001, "loss": 0.0094, "step": 125330 }, { "epoch": 824.6052631578947, "grad_norm": 1.6974341869354248, "learning_rate": 0.0001, "loss": 0.0159, "step": 125340 }, { "epoch": 824.671052631579, "grad_norm": 1.7659883499145508, "learning_rate": 0.0001, "loss": 0.0129, "step": 125350 }, { "epoch": 824.7368421052631, "grad_norm": 1.9052869081497192, "learning_rate": 0.0001, "loss": 0.0124, "step": 125360 }, { "epoch": 824.8026315789474, "grad_norm": 1.5027861595153809, "learning_rate": 0.0001, "loss": 0.0183, "step": 125370 }, { "epoch": 824.8684210526316, "grad_norm": 1.5263733863830566, "learning_rate": 0.0001, "loss": 0.0186, "step": 125380 }, { "epoch": 824.9342105263158, "grad_norm": 1.6347366571426392, "learning_rate": 0.0001, "loss": 0.0139, "step": 125390 }, { "epoch": 825.0, "grad_norm": 1.3116576671600342, "learning_rate": 0.0001, "loss": 0.0135, "step": 125400 }, { "epoch": 825.0657894736842, "grad_norm": 1.5081477165222168, "learning_rate": 0.0001, "loss": 0.0111, "step": 125410 }, { "epoch": 825.1315789473684, "grad_norm": 1.396284818649292, "learning_rate": 0.0001, "loss": 0.0115, "step": 125420 }, { "epoch": 825.1973684210526, "grad_norm": 1.5156468152999878, "learning_rate": 0.0001, "loss": 0.01, "step": 125430 }, { "epoch": 825.2631578947369, "grad_norm": 1.0420080423355103, "learning_rate": 0.0001, "loss": 0.0095, "step": 125440 }, { "epoch": 825.328947368421, "grad_norm": 1.2337325811386108, "learning_rate": 0.0001, "loss": 0.015, "step": 125450 }, { "epoch": 825.3947368421053, "grad_norm": 1.2384024858474731, "learning_rate": 0.0001, "loss": 0.0126, "step": 125460 }, { "epoch": 825.4605263157895, "grad_norm": 0.9712475538253784, "learning_rate": 0.0001, "loss": 0.0132, "step": 125470 }, { "epoch": 825.5263157894736, "grad_norm": 1.2859656810760498, "learning_rate": 0.0001, "loss": 0.0136, "step": 125480 }, { "epoch": 825.5921052631579, "grad_norm": 1.1094233989715576, "learning_rate": 0.0001, "loss": 0.0115, "step": 125490 }, { "epoch": 825.6578947368421, "grad_norm": 1.7473732233047485, "learning_rate": 0.0001, "loss": 0.0137, "step": 125500 }, { "epoch": 825.7236842105264, "grad_norm": 1.908394694328308, "learning_rate": 0.0001, "loss": 0.0166, "step": 125510 }, { "epoch": 825.7894736842105, "grad_norm": 1.894298791885376, "learning_rate": 0.0001, "loss": 0.019, "step": 125520 }, { "epoch": 825.8552631578947, "grad_norm": 1.9967710971832275, "learning_rate": 0.0001, "loss": 0.0181, "step": 125530 }, { "epoch": 825.921052631579, "grad_norm": 2.0115060806274414, "learning_rate": 0.0001, "loss": 0.0107, "step": 125540 }, { "epoch": 825.9868421052631, "grad_norm": 1.3069628477096558, "learning_rate": 0.0001, "loss": 0.0136, "step": 125550 }, { "epoch": 826.0526315789474, "grad_norm": 1.8123244047164917, "learning_rate": 0.0001, "loss": 0.0124, "step": 125560 }, { "epoch": 826.1184210526316, "grad_norm": 1.1399030685424805, "learning_rate": 0.0001, "loss": 0.0117, "step": 125570 }, { "epoch": 826.1842105263158, "grad_norm": 1.0771255493164062, "learning_rate": 0.0001, "loss": 0.0101, "step": 125580 }, { "epoch": 826.25, "grad_norm": 1.5801254510879517, "learning_rate": 0.0001, "loss": 0.0153, "step": 125590 }, { "epoch": 826.3157894736842, "grad_norm": 1.3267065286636353, "learning_rate": 0.0001, "loss": 0.0121, "step": 125600 }, { "epoch": 826.3815789473684, "grad_norm": 1.5660735368728638, "learning_rate": 0.0001, "loss": 0.0116, "step": 125610 }, { "epoch": 826.4473684210526, "grad_norm": 1.1969376802444458, "learning_rate": 0.0001, "loss": 0.0119, "step": 125620 }, { "epoch": 826.5131578947369, "grad_norm": 0.815005362033844, "learning_rate": 0.0001, "loss": 0.0114, "step": 125630 }, { "epoch": 826.578947368421, "grad_norm": 1.118727684020996, "learning_rate": 0.0001, "loss": 0.02, "step": 125640 }, { "epoch": 826.6447368421053, "grad_norm": 1.358499526977539, "learning_rate": 0.0001, "loss": 0.0152, "step": 125650 }, { "epoch": 826.7105263157895, "grad_norm": 0.888767421245575, "learning_rate": 0.0001, "loss": 0.0156, "step": 125660 }, { "epoch": 826.7763157894736, "grad_norm": 1.3105884790420532, "learning_rate": 0.0001, "loss": 0.0101, "step": 125670 }, { "epoch": 826.8421052631579, "grad_norm": 1.3394136428833008, "learning_rate": 0.0001, "loss": 0.012, "step": 125680 }, { "epoch": 826.9078947368421, "grad_norm": 1.6786818504333496, "learning_rate": 0.0001, "loss": 0.0139, "step": 125690 }, { "epoch": 826.9736842105264, "grad_norm": 1.2720118761062622, "learning_rate": 0.0001, "loss": 0.0185, "step": 125700 }, { "epoch": 827.0394736842105, "grad_norm": 1.6442147493362427, "learning_rate": 0.0001, "loss": 0.0152, "step": 125710 }, { "epoch": 827.1052631578947, "grad_norm": 1.6395248174667358, "learning_rate": 0.0001, "loss": 0.0145, "step": 125720 }, { "epoch": 827.171052631579, "grad_norm": 1.7536286115646362, "learning_rate": 0.0001, "loss": 0.0111, "step": 125730 }, { "epoch": 827.2368421052631, "grad_norm": 1.3924267292022705, "learning_rate": 0.0001, "loss": 0.0125, "step": 125740 }, { "epoch": 827.3026315789474, "grad_norm": 1.3796662092208862, "learning_rate": 0.0001, "loss": 0.0126, "step": 125750 }, { "epoch": 827.3684210526316, "grad_norm": 1.4199450016021729, "learning_rate": 0.0001, "loss": 0.0127, "step": 125760 }, { "epoch": 827.4342105263158, "grad_norm": 1.1984844207763672, "learning_rate": 0.0001, "loss": 0.0178, "step": 125770 }, { "epoch": 827.5, "grad_norm": 1.3348807096481323, "learning_rate": 0.0001, "loss": 0.0123, "step": 125780 }, { "epoch": 827.5657894736842, "grad_norm": 1.474331021308899, "learning_rate": 0.0001, "loss": 0.0141, "step": 125790 }, { "epoch": 827.6315789473684, "grad_norm": 1.5202580690383911, "learning_rate": 0.0001, "loss": 0.0137, "step": 125800 }, { "epoch": 827.6973684210526, "grad_norm": 1.1713001728057861, "learning_rate": 0.0001, "loss": 0.0107, "step": 125810 }, { "epoch": 827.7631578947369, "grad_norm": 1.2617430686950684, "learning_rate": 0.0001, "loss": 0.0114, "step": 125820 }, { "epoch": 827.828947368421, "grad_norm": 1.6850361824035645, "learning_rate": 0.0001, "loss": 0.0132, "step": 125830 }, { "epoch": 827.8947368421053, "grad_norm": 1.6058036088943481, "learning_rate": 0.0001, "loss": 0.0137, "step": 125840 }, { "epoch": 827.9605263157895, "grad_norm": 1.5680596828460693, "learning_rate": 0.0001, "loss": 0.0134, "step": 125850 }, { "epoch": 828.0263157894736, "grad_norm": 1.4290052652359009, "learning_rate": 0.0001, "loss": 0.0169, "step": 125860 }, { "epoch": 828.0921052631579, "grad_norm": 1.1194982528686523, "learning_rate": 0.0001, "loss": 0.0126, "step": 125870 }, { "epoch": 828.1578947368421, "grad_norm": 1.395209550857544, "learning_rate": 0.0001, "loss": 0.0116, "step": 125880 }, { "epoch": 828.2236842105264, "grad_norm": 1.5597938299179077, "learning_rate": 0.0001, "loss": 0.0118, "step": 125890 }, { "epoch": 828.2894736842105, "grad_norm": 2.0889925956726074, "learning_rate": 0.0001, "loss": 0.0131, "step": 125900 }, { "epoch": 828.3552631578947, "grad_norm": 2.090623378753662, "learning_rate": 0.0001, "loss": 0.0173, "step": 125910 }, { "epoch": 828.421052631579, "grad_norm": 1.5351836681365967, "learning_rate": 0.0001, "loss": 0.013, "step": 125920 }, { "epoch": 828.4868421052631, "grad_norm": 1.7680171728134155, "learning_rate": 0.0001, "loss": 0.0153, "step": 125930 }, { "epoch": 828.5526315789474, "grad_norm": 1.4763985872268677, "learning_rate": 0.0001, "loss": 0.0166, "step": 125940 }, { "epoch": 828.6184210526316, "grad_norm": 1.6278773546218872, "learning_rate": 0.0001, "loss": 0.013, "step": 125950 }, { "epoch": 828.6842105263158, "grad_norm": 1.2896838188171387, "learning_rate": 0.0001, "loss": 0.011, "step": 125960 }, { "epoch": 828.75, "grad_norm": 1.1741631031036377, "learning_rate": 0.0001, "loss": 0.0125, "step": 125970 }, { "epoch": 828.8157894736842, "grad_norm": 1.6745328903198242, "learning_rate": 0.0001, "loss": 0.0128, "step": 125980 }, { "epoch": 828.8815789473684, "grad_norm": 1.7920691967010498, "learning_rate": 0.0001, "loss": 0.0109, "step": 125990 }, { "epoch": 828.9473684210526, "grad_norm": 1.3238894939422607, "learning_rate": 0.0001, "loss": 0.0141, "step": 126000 }, { "epoch": 829.0131578947369, "grad_norm": 1.5623029470443726, "learning_rate": 0.0001, "loss": 0.0102, "step": 126010 }, { "epoch": 829.078947368421, "grad_norm": 1.7327520847320557, "learning_rate": 0.0001, "loss": 0.0109, "step": 126020 }, { "epoch": 829.1447368421053, "grad_norm": 1.45041024684906, "learning_rate": 0.0001, "loss": 0.0146, "step": 126030 }, { "epoch": 829.2105263157895, "grad_norm": 1.6249688863754272, "learning_rate": 0.0001, "loss": 0.0109, "step": 126040 }, { "epoch": 829.2763157894736, "grad_norm": 1.8373849391937256, "learning_rate": 0.0001, "loss": 0.0152, "step": 126050 }, { "epoch": 829.3421052631579, "grad_norm": 1.751258373260498, "learning_rate": 0.0001, "loss": 0.0135, "step": 126060 }, { "epoch": 829.4078947368421, "grad_norm": 1.219768762588501, "learning_rate": 0.0001, "loss": 0.0127, "step": 126070 }, { "epoch": 829.4736842105264, "grad_norm": 1.5844093561172485, "learning_rate": 0.0001, "loss": 0.0144, "step": 126080 }, { "epoch": 829.5394736842105, "grad_norm": 1.4459025859832764, "learning_rate": 0.0001, "loss": 0.0116, "step": 126090 }, { "epoch": 829.6052631578947, "grad_norm": 1.0801929235458374, "learning_rate": 0.0001, "loss": 0.0164, "step": 126100 }, { "epoch": 829.671052631579, "grad_norm": 1.4937217235565186, "learning_rate": 0.0001, "loss": 0.0146, "step": 126110 }, { "epoch": 829.7368421052631, "grad_norm": 1.8871515989303589, "learning_rate": 0.0001, "loss": 0.0167, "step": 126120 }, { "epoch": 829.8026315789474, "grad_norm": 1.4403449296951294, "learning_rate": 0.0001, "loss": 0.0118, "step": 126130 }, { "epoch": 829.8684210526316, "grad_norm": 3.0272181034088135, "learning_rate": 0.0001, "loss": 0.0121, "step": 126140 }, { "epoch": 829.9342105263158, "grad_norm": 2.275099515914917, "learning_rate": 0.0001, "loss": 0.0139, "step": 126150 }, { "epoch": 830.0, "grad_norm": 2.276808261871338, "learning_rate": 0.0001, "loss": 0.0123, "step": 126160 }, { "epoch": 830.0657894736842, "grad_norm": 2.0530848503112793, "learning_rate": 0.0001, "loss": 0.0128, "step": 126170 }, { "epoch": 830.1315789473684, "grad_norm": 1.781798243522644, "learning_rate": 0.0001, "loss": 0.0109, "step": 126180 }, { "epoch": 830.1973684210526, "grad_norm": 1.4931871891021729, "learning_rate": 0.0001, "loss": 0.0109, "step": 126190 }, { "epoch": 830.2631578947369, "grad_norm": 1.512377142906189, "learning_rate": 0.0001, "loss": 0.0168, "step": 126200 }, { "epoch": 830.328947368421, "grad_norm": 1.528536319732666, "learning_rate": 0.0001, "loss": 0.0131, "step": 126210 }, { "epoch": 830.3947368421053, "grad_norm": 1.2506803274154663, "learning_rate": 0.0001, "loss": 0.0128, "step": 126220 }, { "epoch": 830.4605263157895, "grad_norm": 1.2386480569839478, "learning_rate": 0.0001, "loss": 0.0153, "step": 126230 }, { "epoch": 830.5263157894736, "grad_norm": 1.3886269330978394, "learning_rate": 0.0001, "loss": 0.0093, "step": 126240 }, { "epoch": 830.5921052631579, "grad_norm": 1.616391897201538, "learning_rate": 0.0001, "loss": 0.0125, "step": 126250 }, { "epoch": 830.6578947368421, "grad_norm": 1.3686342239379883, "learning_rate": 0.0001, "loss": 0.0134, "step": 126260 }, { "epoch": 830.7236842105264, "grad_norm": 1.3584970235824585, "learning_rate": 0.0001, "loss": 0.0128, "step": 126270 }, { "epoch": 830.7894736842105, "grad_norm": 1.7143045663833618, "learning_rate": 0.0001, "loss": 0.0105, "step": 126280 }, { "epoch": 830.8552631578947, "grad_norm": 1.1856242418289185, "learning_rate": 0.0001, "loss": 0.0109, "step": 126290 }, { "epoch": 830.921052631579, "grad_norm": 2.0612692832946777, "learning_rate": 0.0001, "loss": 0.0147, "step": 126300 }, { "epoch": 830.9868421052631, "grad_norm": 1.5004067420959473, "learning_rate": 0.0001, "loss": 0.016, "step": 126310 }, { "epoch": 831.0526315789474, "grad_norm": 1.4382739067077637, "learning_rate": 0.0001, "loss": 0.0101, "step": 126320 }, { "epoch": 831.1184210526316, "grad_norm": 1.2342561483383179, "learning_rate": 0.0001, "loss": 0.0161, "step": 126330 }, { "epoch": 831.1842105263158, "grad_norm": 1.652779221534729, "learning_rate": 0.0001, "loss": 0.0134, "step": 126340 }, { "epoch": 831.25, "grad_norm": 1.3656727075576782, "learning_rate": 0.0001, "loss": 0.0143, "step": 126350 }, { "epoch": 831.3157894736842, "grad_norm": 1.5392564535140991, "learning_rate": 0.0001, "loss": 0.0143, "step": 126360 }, { "epoch": 831.3815789473684, "grad_norm": 1.0801451206207275, "learning_rate": 0.0001, "loss": 0.0118, "step": 126370 }, { "epoch": 831.4473684210526, "grad_norm": 1.7525951862335205, "learning_rate": 0.0001, "loss": 0.0178, "step": 126380 }, { "epoch": 831.5131578947369, "grad_norm": 1.7928251028060913, "learning_rate": 0.0001, "loss": 0.011, "step": 126390 }, { "epoch": 831.578947368421, "grad_norm": 1.6187102794647217, "learning_rate": 0.0001, "loss": 0.0137, "step": 126400 }, { "epoch": 831.6447368421053, "grad_norm": 1.2460933923721313, "learning_rate": 0.0001, "loss": 0.0094, "step": 126410 }, { "epoch": 831.7105263157895, "grad_norm": 1.4204732179641724, "learning_rate": 0.0001, "loss": 0.0119, "step": 126420 }, { "epoch": 831.7763157894736, "grad_norm": 1.835306167602539, "learning_rate": 0.0001, "loss": 0.0147, "step": 126430 }, { "epoch": 831.8421052631579, "grad_norm": 1.8119637966156006, "learning_rate": 0.0001, "loss": 0.0121, "step": 126440 }, { "epoch": 831.9078947368421, "grad_norm": 1.2528225183486938, "learning_rate": 0.0001, "loss": 0.0166, "step": 126450 }, { "epoch": 831.9736842105264, "grad_norm": 1.0428273677825928, "learning_rate": 0.0001, "loss": 0.0106, "step": 126460 }, { "epoch": 832.0394736842105, "grad_norm": 1.3816765546798706, "learning_rate": 0.0001, "loss": 0.0124, "step": 126470 }, { "epoch": 832.1052631578947, "grad_norm": 1.464387059211731, "learning_rate": 0.0001, "loss": 0.0101, "step": 126480 }, { "epoch": 832.171052631579, "grad_norm": 1.6036148071289062, "learning_rate": 0.0001, "loss": 0.0132, "step": 126490 }, { "epoch": 832.2368421052631, "grad_norm": 1.354020595550537, "learning_rate": 0.0001, "loss": 0.0133, "step": 126500 }, { "epoch": 832.3026315789474, "grad_norm": 1.2147419452667236, "learning_rate": 0.0001, "loss": 0.0131, "step": 126510 }, { "epoch": 832.3684210526316, "grad_norm": 1.6220154762268066, "learning_rate": 0.0001, "loss": 0.0138, "step": 126520 }, { "epoch": 832.4342105263158, "grad_norm": 1.6478756666183472, "learning_rate": 0.0001, "loss": 0.0142, "step": 126530 }, { "epoch": 832.5, "grad_norm": 1.6804062128067017, "learning_rate": 0.0001, "loss": 0.0138, "step": 126540 }, { "epoch": 832.5657894736842, "grad_norm": 1.4899712800979614, "learning_rate": 0.0001, "loss": 0.0118, "step": 126550 }, { "epoch": 832.6315789473684, "grad_norm": 1.0298510789871216, "learning_rate": 0.0001, "loss": 0.0154, "step": 126560 }, { "epoch": 832.6973684210526, "grad_norm": 1.5623555183410645, "learning_rate": 0.0001, "loss": 0.0125, "step": 126570 }, { "epoch": 832.7631578947369, "grad_norm": 1.3311704397201538, "learning_rate": 0.0001, "loss": 0.012, "step": 126580 }, { "epoch": 832.828947368421, "grad_norm": 1.5769256353378296, "learning_rate": 0.0001, "loss": 0.0143, "step": 126590 }, { "epoch": 832.8947368421053, "grad_norm": 1.5076794624328613, "learning_rate": 0.0001, "loss": 0.0173, "step": 126600 }, { "epoch": 832.9605263157895, "grad_norm": 1.4366650581359863, "learning_rate": 0.0001, "loss": 0.0148, "step": 126610 }, { "epoch": 833.0263157894736, "grad_norm": 1.437345266342163, "learning_rate": 0.0001, "loss": 0.0119, "step": 126620 }, { "epoch": 833.0921052631579, "grad_norm": 1.24420964717865, "learning_rate": 0.0001, "loss": 0.0152, "step": 126630 }, { "epoch": 833.1578947368421, "grad_norm": 1.7027053833007812, "learning_rate": 0.0001, "loss": 0.0149, "step": 126640 }, { "epoch": 833.2236842105264, "grad_norm": 1.4689500331878662, "learning_rate": 0.0001, "loss": 0.0147, "step": 126650 }, { "epoch": 833.2894736842105, "grad_norm": 1.3752307891845703, "learning_rate": 0.0001, "loss": 0.0125, "step": 126660 }, { "epoch": 833.3552631578947, "grad_norm": 1.385047197341919, "learning_rate": 0.0001, "loss": 0.0124, "step": 126670 }, { "epoch": 833.421052631579, "grad_norm": 1.641148567199707, "learning_rate": 0.0001, "loss": 0.0139, "step": 126680 }, { "epoch": 833.4868421052631, "grad_norm": 1.5043363571166992, "learning_rate": 0.0001, "loss": 0.015, "step": 126690 }, { "epoch": 833.5526315789474, "grad_norm": 1.9036625623703003, "learning_rate": 0.0001, "loss": 0.0135, "step": 126700 }, { "epoch": 833.6184210526316, "grad_norm": 1.448189616203308, "learning_rate": 0.0001, "loss": 0.0143, "step": 126710 }, { "epoch": 833.6842105263158, "grad_norm": 1.820835828781128, "learning_rate": 0.0001, "loss": 0.0105, "step": 126720 }, { "epoch": 833.75, "grad_norm": 1.475182056427002, "learning_rate": 0.0001, "loss": 0.0109, "step": 126730 }, { "epoch": 833.8157894736842, "grad_norm": 1.4973336458206177, "learning_rate": 0.0001, "loss": 0.0118, "step": 126740 }, { "epoch": 833.8815789473684, "grad_norm": 1.459065556526184, "learning_rate": 0.0001, "loss": 0.0125, "step": 126750 }, { "epoch": 833.9473684210526, "grad_norm": 1.4513412714004517, "learning_rate": 0.0001, "loss": 0.0147, "step": 126760 }, { "epoch": 834.0131578947369, "grad_norm": 1.6961389780044556, "learning_rate": 0.0001, "loss": 0.014, "step": 126770 }, { "epoch": 834.078947368421, "grad_norm": 1.2507266998291016, "learning_rate": 0.0001, "loss": 0.012, "step": 126780 }, { "epoch": 834.1447368421053, "grad_norm": 1.2278797626495361, "learning_rate": 0.0001, "loss": 0.0127, "step": 126790 }, { "epoch": 834.2105263157895, "grad_norm": 1.3898810148239136, "learning_rate": 0.0001, "loss": 0.0166, "step": 126800 }, { "epoch": 834.2763157894736, "grad_norm": 0.8312785625457764, "learning_rate": 0.0001, "loss": 0.013, "step": 126810 }, { "epoch": 834.3421052631579, "grad_norm": 1.6397908926010132, "learning_rate": 0.0001, "loss": 0.0152, "step": 126820 }, { "epoch": 834.4078947368421, "grad_norm": 1.8927100896835327, "learning_rate": 0.0001, "loss": 0.0122, "step": 126830 }, { "epoch": 834.4736842105264, "grad_norm": 1.7850512266159058, "learning_rate": 0.0001, "loss": 0.0142, "step": 126840 }, { "epoch": 834.5394736842105, "grad_norm": 1.5607211589813232, "learning_rate": 0.0001, "loss": 0.0196, "step": 126850 }, { "epoch": 834.6052631578947, "grad_norm": 1.4468756914138794, "learning_rate": 0.0001, "loss": 0.0136, "step": 126860 }, { "epoch": 834.671052631579, "grad_norm": 1.8158153295516968, "learning_rate": 0.0001, "loss": 0.0144, "step": 126870 }, { "epoch": 834.7368421052631, "grad_norm": 1.1800618171691895, "learning_rate": 0.0001, "loss": 0.0108, "step": 126880 }, { "epoch": 834.8026315789474, "grad_norm": 1.1630138158798218, "learning_rate": 0.0001, "loss": 0.012, "step": 126890 }, { "epoch": 834.8684210526316, "grad_norm": 1.2241064310073853, "learning_rate": 0.0001, "loss": 0.0097, "step": 126900 }, { "epoch": 834.9342105263158, "grad_norm": 1.4936763048171997, "learning_rate": 0.0001, "loss": 0.0133, "step": 126910 }, { "epoch": 835.0, "grad_norm": 1.4905261993408203, "learning_rate": 0.0001, "loss": 0.0103, "step": 126920 }, { "epoch": 835.0657894736842, "grad_norm": 1.1871709823608398, "learning_rate": 0.0001, "loss": 0.0141, "step": 126930 }, { "epoch": 835.1315789473684, "grad_norm": 1.3171658515930176, "learning_rate": 0.0001, "loss": 0.0125, "step": 126940 }, { "epoch": 835.1973684210526, "grad_norm": 1.2715383768081665, "learning_rate": 0.0001, "loss": 0.0161, "step": 126950 }, { "epoch": 835.2631578947369, "grad_norm": 1.6898120641708374, "learning_rate": 0.0001, "loss": 0.0139, "step": 126960 }, { "epoch": 835.328947368421, "grad_norm": 1.8526750802993774, "learning_rate": 0.0001, "loss": 0.0124, "step": 126970 }, { "epoch": 835.3947368421053, "grad_norm": 1.6694395542144775, "learning_rate": 0.0001, "loss": 0.0133, "step": 126980 }, { "epoch": 835.4605263157895, "grad_norm": 1.5085737705230713, "learning_rate": 0.0001, "loss": 0.0154, "step": 126990 }, { "epoch": 835.5263157894736, "grad_norm": 1.250558853149414, "learning_rate": 0.0001, "loss": 0.0141, "step": 127000 }, { "epoch": 835.5921052631579, "grad_norm": 1.4129488468170166, "learning_rate": 0.0001, "loss": 0.0126, "step": 127010 }, { "epoch": 835.6578947368421, "grad_norm": 1.373984694480896, "learning_rate": 0.0001, "loss": 0.0142, "step": 127020 }, { "epoch": 835.7236842105264, "grad_norm": 1.3968169689178467, "learning_rate": 0.0001, "loss": 0.0115, "step": 127030 }, { "epoch": 835.7894736842105, "grad_norm": 1.568843960762024, "learning_rate": 0.0001, "loss": 0.0104, "step": 127040 }, { "epoch": 835.8552631578947, "grad_norm": 1.538673996925354, "learning_rate": 0.0001, "loss": 0.0096, "step": 127050 }, { "epoch": 835.921052631579, "grad_norm": 1.7785552740097046, "learning_rate": 0.0001, "loss": 0.0095, "step": 127060 }, { "epoch": 835.9868421052631, "grad_norm": 1.5218695402145386, "learning_rate": 0.0001, "loss": 0.0186, "step": 127070 }, { "epoch": 836.0526315789474, "grad_norm": 1.7253493070602417, "learning_rate": 0.0001, "loss": 0.0146, "step": 127080 }, { "epoch": 836.1184210526316, "grad_norm": 1.5554791688919067, "learning_rate": 0.0001, "loss": 0.0134, "step": 127090 }, { "epoch": 836.1842105263158, "grad_norm": 1.4806194305419922, "learning_rate": 0.0001, "loss": 0.0182, "step": 127100 }, { "epoch": 836.25, "grad_norm": 1.2336506843566895, "learning_rate": 0.0001, "loss": 0.0134, "step": 127110 }, { "epoch": 836.3157894736842, "grad_norm": 1.400492548942566, "learning_rate": 0.0001, "loss": 0.0109, "step": 127120 }, { "epoch": 836.3815789473684, "grad_norm": 1.1616874933242798, "learning_rate": 0.0001, "loss": 0.0152, "step": 127130 }, { "epoch": 836.4473684210526, "grad_norm": 1.3593508005142212, "learning_rate": 0.0001, "loss": 0.0098, "step": 127140 }, { "epoch": 836.5131578947369, "grad_norm": 1.9427191019058228, "learning_rate": 0.0001, "loss": 0.0167, "step": 127150 }, { "epoch": 836.578947368421, "grad_norm": 1.7246780395507812, "learning_rate": 0.0001, "loss": 0.0125, "step": 127160 }, { "epoch": 836.6447368421053, "grad_norm": 1.6979783773422241, "learning_rate": 0.0001, "loss": 0.0113, "step": 127170 }, { "epoch": 836.7105263157895, "grad_norm": 1.4263758659362793, "learning_rate": 0.0001, "loss": 0.0132, "step": 127180 }, { "epoch": 836.7763157894736, "grad_norm": 1.525147557258606, "learning_rate": 0.0001, "loss": 0.0154, "step": 127190 }, { "epoch": 836.8421052631579, "grad_norm": 1.4632771015167236, "learning_rate": 0.0001, "loss": 0.0123, "step": 127200 }, { "epoch": 836.9078947368421, "grad_norm": 1.4130555391311646, "learning_rate": 0.0001, "loss": 0.0126, "step": 127210 }, { "epoch": 836.9736842105264, "grad_norm": 1.5035663843154907, "learning_rate": 0.0001, "loss": 0.0101, "step": 127220 }, { "epoch": 837.0394736842105, "grad_norm": 1.5129663944244385, "learning_rate": 0.0001, "loss": 0.0113, "step": 127230 }, { "epoch": 837.1052631578947, "grad_norm": 1.5813815593719482, "learning_rate": 0.0001, "loss": 0.0104, "step": 127240 }, { "epoch": 837.171052631579, "grad_norm": 1.4464528560638428, "learning_rate": 0.0001, "loss": 0.0136, "step": 127250 }, { "epoch": 837.2368421052631, "grad_norm": 1.6119415760040283, "learning_rate": 0.0001, "loss": 0.0094, "step": 127260 }, { "epoch": 837.3026315789474, "grad_norm": 1.7808480262756348, "learning_rate": 0.0001, "loss": 0.0109, "step": 127270 }, { "epoch": 837.3684210526316, "grad_norm": 1.4925923347473145, "learning_rate": 0.0001, "loss": 0.0149, "step": 127280 }, { "epoch": 837.4342105263158, "grad_norm": 1.4992612600326538, "learning_rate": 0.0001, "loss": 0.0179, "step": 127290 }, { "epoch": 837.5, "grad_norm": 1.4185081720352173, "learning_rate": 0.0001, "loss": 0.0113, "step": 127300 }, { "epoch": 837.5657894736842, "grad_norm": 1.047912836074829, "learning_rate": 0.0001, "loss": 0.0127, "step": 127310 }, { "epoch": 837.6315789473684, "grad_norm": 1.6108087301254272, "learning_rate": 0.0001, "loss": 0.0144, "step": 127320 }, { "epoch": 837.6973684210526, "grad_norm": 1.3575068712234497, "learning_rate": 0.0001, "loss": 0.0123, "step": 127330 }, { "epoch": 837.7631578947369, "grad_norm": 1.4762578010559082, "learning_rate": 0.0001, "loss": 0.0159, "step": 127340 }, { "epoch": 837.828947368421, "grad_norm": 1.61713707447052, "learning_rate": 0.0001, "loss": 0.0143, "step": 127350 }, { "epoch": 837.8947368421053, "grad_norm": 1.2410413026809692, "learning_rate": 0.0001, "loss": 0.0114, "step": 127360 }, { "epoch": 837.9605263157895, "grad_norm": 0.7816634774208069, "learning_rate": 0.0001, "loss": 0.0132, "step": 127370 }, { "epoch": 838.0263157894736, "grad_norm": 1.249527931213379, "learning_rate": 0.0001, "loss": 0.0131, "step": 127380 }, { "epoch": 838.0921052631579, "grad_norm": 1.2249088287353516, "learning_rate": 0.0001, "loss": 0.0134, "step": 127390 }, { "epoch": 838.1578947368421, "grad_norm": 1.504388689994812, "learning_rate": 0.0001, "loss": 0.0119, "step": 127400 }, { "epoch": 838.2236842105264, "grad_norm": 1.5774426460266113, "learning_rate": 0.0001, "loss": 0.0104, "step": 127410 }, { "epoch": 838.2894736842105, "grad_norm": 1.472316026687622, "learning_rate": 0.0001, "loss": 0.0159, "step": 127420 }, { "epoch": 838.3552631578947, "grad_norm": 1.7453241348266602, "learning_rate": 0.0001, "loss": 0.0126, "step": 127430 }, { "epoch": 838.421052631579, "grad_norm": 1.1877670288085938, "learning_rate": 0.0001, "loss": 0.0143, "step": 127440 }, { "epoch": 838.4868421052631, "grad_norm": 1.2130277156829834, "learning_rate": 0.0001, "loss": 0.0143, "step": 127450 }, { "epoch": 838.5526315789474, "grad_norm": 0.9828214645385742, "learning_rate": 0.0001, "loss": 0.0128, "step": 127460 }, { "epoch": 838.6184210526316, "grad_norm": 1.4484864473342896, "learning_rate": 0.0001, "loss": 0.0155, "step": 127470 }, { "epoch": 838.6842105263158, "grad_norm": 1.6588616371154785, "learning_rate": 0.0001, "loss": 0.0114, "step": 127480 }, { "epoch": 838.75, "grad_norm": 1.52891206741333, "learning_rate": 0.0001, "loss": 0.0123, "step": 127490 }, { "epoch": 838.8157894736842, "grad_norm": 1.5845212936401367, "learning_rate": 0.0001, "loss": 0.0121, "step": 127500 }, { "epoch": 838.8815789473684, "grad_norm": 1.8038933277130127, "learning_rate": 0.0001, "loss": 0.0124, "step": 127510 }, { "epoch": 838.9473684210526, "grad_norm": 1.4140574932098389, "learning_rate": 0.0001, "loss": 0.0118, "step": 127520 }, { "epoch": 839.0131578947369, "grad_norm": 1.7665209770202637, "learning_rate": 0.0001, "loss": 0.0162, "step": 127530 }, { "epoch": 839.078947368421, "grad_norm": 1.3560363054275513, "learning_rate": 0.0001, "loss": 0.0145, "step": 127540 }, { "epoch": 839.1447368421053, "grad_norm": 1.7950717210769653, "learning_rate": 0.0001, "loss": 0.0142, "step": 127550 }, { "epoch": 839.2105263157895, "grad_norm": 1.8855546712875366, "learning_rate": 0.0001, "loss": 0.0108, "step": 127560 }, { "epoch": 839.2763157894736, "grad_norm": 1.5760159492492676, "learning_rate": 0.0001, "loss": 0.0094, "step": 127570 }, { "epoch": 839.3421052631579, "grad_norm": 0.828997015953064, "learning_rate": 0.0001, "loss": 0.0138, "step": 127580 }, { "epoch": 839.4078947368421, "grad_norm": 1.5577040910720825, "learning_rate": 0.0001, "loss": 0.0141, "step": 127590 }, { "epoch": 839.4736842105264, "grad_norm": 0.9444478154182434, "learning_rate": 0.0001, "loss": 0.0117, "step": 127600 }, { "epoch": 839.5394736842105, "grad_norm": 1.784310221672058, "learning_rate": 0.0001, "loss": 0.0147, "step": 127610 }, { "epoch": 839.6052631578947, "grad_norm": 1.2272366285324097, "learning_rate": 0.0001, "loss": 0.0143, "step": 127620 }, { "epoch": 839.671052631579, "grad_norm": 1.7943968772888184, "learning_rate": 0.0001, "loss": 0.0126, "step": 127630 }, { "epoch": 839.7368421052631, "grad_norm": 1.029978632926941, "learning_rate": 0.0001, "loss": 0.0165, "step": 127640 }, { "epoch": 839.8026315789474, "grad_norm": 1.5970288515090942, "learning_rate": 0.0001, "loss": 0.0121, "step": 127650 }, { "epoch": 839.8684210526316, "grad_norm": 1.6737143993377686, "learning_rate": 0.0001, "loss": 0.0125, "step": 127660 }, { "epoch": 839.9342105263158, "grad_norm": 1.6231416463851929, "learning_rate": 0.0001, "loss": 0.0149, "step": 127670 }, { "epoch": 840.0, "grad_norm": 1.3785442113876343, "learning_rate": 0.0001, "loss": 0.0132, "step": 127680 }, { "epoch": 840.0657894736842, "grad_norm": 1.7770893573760986, "learning_rate": 0.0001, "loss": 0.0148, "step": 127690 }, { "epoch": 840.1315789473684, "grad_norm": 1.3281002044677734, "learning_rate": 0.0001, "loss": 0.0156, "step": 127700 }, { "epoch": 840.1973684210526, "grad_norm": 1.26578950881958, "learning_rate": 0.0001, "loss": 0.0096, "step": 127710 }, { "epoch": 840.2631578947369, "grad_norm": 1.6036332845687866, "learning_rate": 0.0001, "loss": 0.013, "step": 127720 }, { "epoch": 840.328947368421, "grad_norm": 1.1342918872833252, "learning_rate": 0.0001, "loss": 0.0143, "step": 127730 }, { "epoch": 840.3947368421053, "grad_norm": 1.6543833017349243, "learning_rate": 0.0001, "loss": 0.011, "step": 127740 }, { "epoch": 840.4605263157895, "grad_norm": 1.5818736553192139, "learning_rate": 0.0001, "loss": 0.0118, "step": 127750 }, { "epoch": 840.5263157894736, "grad_norm": 1.2317699193954468, "learning_rate": 0.0001, "loss": 0.0097, "step": 127760 }, { "epoch": 840.5921052631579, "grad_norm": 1.3232228755950928, "learning_rate": 0.0001, "loss": 0.0115, "step": 127770 }, { "epoch": 840.6578947368421, "grad_norm": 1.7998217344284058, "learning_rate": 0.0001, "loss": 0.0142, "step": 127780 }, { "epoch": 840.7236842105264, "grad_norm": 1.7889560461044312, "learning_rate": 0.0001, "loss": 0.0121, "step": 127790 }, { "epoch": 840.7894736842105, "grad_norm": 1.21452796459198, "learning_rate": 0.0001, "loss": 0.0135, "step": 127800 }, { "epoch": 840.8552631578947, "grad_norm": 1.485236406326294, "learning_rate": 0.0001, "loss": 0.0144, "step": 127810 }, { "epoch": 840.921052631579, "grad_norm": 1.495262861251831, "learning_rate": 0.0001, "loss": 0.0139, "step": 127820 }, { "epoch": 840.9868421052631, "grad_norm": 1.4306681156158447, "learning_rate": 0.0001, "loss": 0.0158, "step": 127830 }, { "epoch": 841.0526315789474, "grad_norm": 1.0317907333374023, "learning_rate": 0.0001, "loss": 0.0113, "step": 127840 }, { "epoch": 841.1184210526316, "grad_norm": 1.4458950757980347, "learning_rate": 0.0001, "loss": 0.0135, "step": 127850 }, { "epoch": 841.1842105263158, "grad_norm": 1.3417842388153076, "learning_rate": 0.0001, "loss": 0.0122, "step": 127860 }, { "epoch": 841.25, "grad_norm": 1.6539883613586426, "learning_rate": 0.0001, "loss": 0.0134, "step": 127870 }, { "epoch": 841.3157894736842, "grad_norm": 1.8009825944900513, "learning_rate": 0.0001, "loss": 0.0127, "step": 127880 }, { "epoch": 841.3815789473684, "grad_norm": 1.9835216999053955, "learning_rate": 0.0001, "loss": 0.0147, "step": 127890 }, { "epoch": 841.4473684210526, "grad_norm": 1.48895263671875, "learning_rate": 0.0001, "loss": 0.015, "step": 127900 }, { "epoch": 841.5131578947369, "grad_norm": 1.5603314638137817, "learning_rate": 0.0001, "loss": 0.0127, "step": 127910 }, { "epoch": 841.578947368421, "grad_norm": 1.417686939239502, "learning_rate": 0.0001, "loss": 0.014, "step": 127920 }, { "epoch": 841.6447368421053, "grad_norm": 1.4720269441604614, "learning_rate": 0.0001, "loss": 0.0141, "step": 127930 }, { "epoch": 841.7105263157895, "grad_norm": 1.206405520439148, "learning_rate": 0.0001, "loss": 0.0099, "step": 127940 }, { "epoch": 841.7763157894736, "grad_norm": 1.1929903030395508, "learning_rate": 0.0001, "loss": 0.0138, "step": 127950 }, { "epoch": 841.8421052631579, "grad_norm": 1.870266318321228, "learning_rate": 0.0001, "loss": 0.0122, "step": 127960 }, { "epoch": 841.9078947368421, "grad_norm": 0.9522457122802734, "learning_rate": 0.0001, "loss": 0.0139, "step": 127970 }, { "epoch": 841.9736842105264, "grad_norm": 1.69962739944458, "learning_rate": 0.0001, "loss": 0.0124, "step": 127980 }, { "epoch": 842.0394736842105, "grad_norm": 1.8159236907958984, "learning_rate": 0.0001, "loss": 0.0146, "step": 127990 }, { "epoch": 842.1052631578947, "grad_norm": 1.798677921295166, "learning_rate": 0.0001, "loss": 0.0134, "step": 128000 }, { "epoch": 842.171052631579, "grad_norm": 1.3907593488693237, "learning_rate": 0.0001, "loss": 0.0127, "step": 128010 }, { "epoch": 842.2368421052631, "grad_norm": 1.3470362424850464, "learning_rate": 0.0001, "loss": 0.015, "step": 128020 }, { "epoch": 842.3026315789474, "grad_norm": 1.4217718839645386, "learning_rate": 0.0001, "loss": 0.0114, "step": 128030 }, { "epoch": 842.3684210526316, "grad_norm": 1.546380877494812, "learning_rate": 0.0001, "loss": 0.0144, "step": 128040 }, { "epoch": 842.4342105263158, "grad_norm": 1.3306132555007935, "learning_rate": 0.0001, "loss": 0.0142, "step": 128050 }, { "epoch": 842.5, "grad_norm": 1.60628080368042, "learning_rate": 0.0001, "loss": 0.0129, "step": 128060 }, { "epoch": 842.5657894736842, "grad_norm": 1.7810248136520386, "learning_rate": 0.0001, "loss": 0.0112, "step": 128070 }, { "epoch": 842.6315789473684, "grad_norm": 1.3879215717315674, "learning_rate": 0.0001, "loss": 0.0134, "step": 128080 }, { "epoch": 842.6973684210526, "grad_norm": 1.5524390935897827, "learning_rate": 0.0001, "loss": 0.012, "step": 128090 }, { "epoch": 842.7631578947369, "grad_norm": 1.6238269805908203, "learning_rate": 0.0001, "loss": 0.0143, "step": 128100 }, { "epoch": 842.828947368421, "grad_norm": 1.418960452079773, "learning_rate": 0.0001, "loss": 0.012, "step": 128110 }, { "epoch": 842.8947368421053, "grad_norm": 1.155429720878601, "learning_rate": 0.0001, "loss": 0.0159, "step": 128120 }, { "epoch": 842.9605263157895, "grad_norm": 1.5110533237457275, "learning_rate": 0.0001, "loss": 0.0102, "step": 128130 }, { "epoch": 843.0263157894736, "grad_norm": 1.85019850730896, "learning_rate": 0.0001, "loss": 0.0142, "step": 128140 }, { "epoch": 843.0921052631579, "grad_norm": 1.8361048698425293, "learning_rate": 0.0001, "loss": 0.0132, "step": 128150 }, { "epoch": 843.1578947368421, "grad_norm": 1.5165399312973022, "learning_rate": 0.0001, "loss": 0.0154, "step": 128160 }, { "epoch": 843.2236842105264, "grad_norm": 1.672363042831421, "learning_rate": 0.0001, "loss": 0.0137, "step": 128170 }, { "epoch": 843.2894736842105, "grad_norm": 1.5210250616073608, "learning_rate": 0.0001, "loss": 0.0133, "step": 128180 }, { "epoch": 843.3552631578947, "grad_norm": 1.6246618032455444, "learning_rate": 0.0001, "loss": 0.0121, "step": 128190 }, { "epoch": 843.421052631579, "grad_norm": 1.3039168119430542, "learning_rate": 0.0001, "loss": 0.0116, "step": 128200 }, { "epoch": 843.4868421052631, "grad_norm": 1.7639374732971191, "learning_rate": 0.0001, "loss": 0.0105, "step": 128210 }, { "epoch": 843.5526315789474, "grad_norm": 1.2475273609161377, "learning_rate": 0.0001, "loss": 0.0156, "step": 128220 }, { "epoch": 843.6184210526316, "grad_norm": 1.5498744249343872, "learning_rate": 0.0001, "loss": 0.0124, "step": 128230 }, { "epoch": 843.6842105263158, "grad_norm": 1.0933154821395874, "learning_rate": 0.0001, "loss": 0.011, "step": 128240 }, { "epoch": 843.75, "grad_norm": 1.8404227495193481, "learning_rate": 0.0001, "loss": 0.0154, "step": 128250 }, { "epoch": 843.8157894736842, "grad_norm": 1.7793269157409668, "learning_rate": 0.0001, "loss": 0.0142, "step": 128260 }, { "epoch": 843.8815789473684, "grad_norm": 1.712494969367981, "learning_rate": 0.0001, "loss": 0.012, "step": 128270 }, { "epoch": 843.9473684210526, "grad_norm": 1.4744635820388794, "learning_rate": 0.0001, "loss": 0.0127, "step": 128280 }, { "epoch": 844.0131578947369, "grad_norm": 1.4797072410583496, "learning_rate": 0.0001, "loss": 0.0165, "step": 128290 }, { "epoch": 844.078947368421, "grad_norm": 1.0847984552383423, "learning_rate": 0.0001, "loss": 0.0141, "step": 128300 }, { "epoch": 844.1447368421053, "grad_norm": 1.4059889316558838, "learning_rate": 0.0001, "loss": 0.0114, "step": 128310 }, { "epoch": 844.2105263157895, "grad_norm": 1.568772315979004, "learning_rate": 0.0001, "loss": 0.0115, "step": 128320 }, { "epoch": 844.2763157894736, "grad_norm": 1.880004644393921, "learning_rate": 0.0001, "loss": 0.0167, "step": 128330 }, { "epoch": 844.3421052631579, "grad_norm": 1.4563026428222656, "learning_rate": 0.0001, "loss": 0.0143, "step": 128340 }, { "epoch": 844.4078947368421, "grad_norm": 1.4504069089889526, "learning_rate": 0.0001, "loss": 0.0147, "step": 128350 }, { "epoch": 844.4736842105264, "grad_norm": 1.7570078372955322, "learning_rate": 0.0001, "loss": 0.0119, "step": 128360 }, { "epoch": 844.5394736842105, "grad_norm": 1.7093024253845215, "learning_rate": 0.0001, "loss": 0.0127, "step": 128370 }, { "epoch": 844.6052631578947, "grad_norm": 1.3966631889343262, "learning_rate": 0.0001, "loss": 0.0155, "step": 128380 }, { "epoch": 844.671052631579, "grad_norm": 1.432206630706787, "learning_rate": 0.0001, "loss": 0.0113, "step": 128390 }, { "epoch": 844.7368421052631, "grad_norm": 1.2168960571289062, "learning_rate": 0.0001, "loss": 0.0148, "step": 128400 }, { "epoch": 844.8026315789474, "grad_norm": 1.293298602104187, "learning_rate": 0.0001, "loss": 0.0138, "step": 128410 }, { "epoch": 844.8684210526316, "grad_norm": 1.3463494777679443, "learning_rate": 0.0001, "loss": 0.0108, "step": 128420 }, { "epoch": 844.9342105263158, "grad_norm": 1.1201120615005493, "learning_rate": 0.0001, "loss": 0.012, "step": 128430 }, { "epoch": 845.0, "grad_norm": 1.6945960521697998, "learning_rate": 0.0001, "loss": 0.0111, "step": 128440 }, { "epoch": 845.0657894736842, "grad_norm": 1.574524998664856, "learning_rate": 0.0001, "loss": 0.0113, "step": 128450 }, { "epoch": 845.1315789473684, "grad_norm": 1.5936343669891357, "learning_rate": 0.0001, "loss": 0.0147, "step": 128460 }, { "epoch": 845.1973684210526, "grad_norm": 1.3832329511642456, "learning_rate": 0.0001, "loss": 0.0141, "step": 128470 }, { "epoch": 845.2631578947369, "grad_norm": 1.586025595664978, "learning_rate": 0.0001, "loss": 0.011, "step": 128480 }, { "epoch": 845.328947368421, "grad_norm": 1.2334505319595337, "learning_rate": 0.0001, "loss": 0.0107, "step": 128490 }, { "epoch": 845.3947368421053, "grad_norm": 1.4667989015579224, "learning_rate": 0.0001, "loss": 0.0142, "step": 128500 }, { "epoch": 845.4605263157895, "grad_norm": 1.279849648475647, "learning_rate": 0.0001, "loss": 0.0165, "step": 128510 }, { "epoch": 845.5263157894736, "grad_norm": 1.4054807424545288, "learning_rate": 0.0001, "loss": 0.0099, "step": 128520 }, { "epoch": 845.5921052631579, "grad_norm": 1.8421655893325806, "learning_rate": 0.0001, "loss": 0.016, "step": 128530 }, { "epoch": 845.6578947368421, "grad_norm": 0.9922417998313904, "learning_rate": 0.0001, "loss": 0.0103, "step": 128540 }, { "epoch": 845.7236842105264, "grad_norm": 1.5455456972122192, "learning_rate": 0.0001, "loss": 0.0109, "step": 128550 }, { "epoch": 845.7894736842105, "grad_norm": 1.6864502429962158, "learning_rate": 0.0001, "loss": 0.0113, "step": 128560 }, { "epoch": 845.8552631578947, "grad_norm": 1.3593113422393799, "learning_rate": 0.0001, "loss": 0.0172, "step": 128570 }, { "epoch": 845.921052631579, "grad_norm": 1.8927048444747925, "learning_rate": 0.0001, "loss": 0.0144, "step": 128580 }, { "epoch": 845.9868421052631, "grad_norm": 1.5031249523162842, "learning_rate": 0.0001, "loss": 0.0171, "step": 128590 }, { "epoch": 846.0526315789474, "grad_norm": 0.9868516325950623, "learning_rate": 0.0001, "loss": 0.0163, "step": 128600 }, { "epoch": 846.1184210526316, "grad_norm": 1.3906810283660889, "learning_rate": 0.0001, "loss": 0.0148, "step": 128610 }, { "epoch": 846.1842105263158, "grad_norm": 1.1537144184112549, "learning_rate": 0.0001, "loss": 0.0109, "step": 128620 }, { "epoch": 846.25, "grad_norm": 1.6551408767700195, "learning_rate": 0.0001, "loss": 0.0114, "step": 128630 }, { "epoch": 846.3157894736842, "grad_norm": 1.3328627347946167, "learning_rate": 0.0001, "loss": 0.0149, "step": 128640 }, { "epoch": 846.3815789473684, "grad_norm": 2.0664422512054443, "learning_rate": 0.0001, "loss": 0.0129, "step": 128650 }, { "epoch": 846.4473684210526, "grad_norm": 1.5797702074050903, "learning_rate": 0.0001, "loss": 0.012, "step": 128660 }, { "epoch": 846.5131578947369, "grad_norm": 1.5014290809631348, "learning_rate": 0.0001, "loss": 0.0148, "step": 128670 }, { "epoch": 846.578947368421, "grad_norm": 1.6501587629318237, "learning_rate": 0.0001, "loss": 0.0177, "step": 128680 }, { "epoch": 846.6447368421053, "grad_norm": 1.4767999649047852, "learning_rate": 0.0001, "loss": 0.0102, "step": 128690 }, { "epoch": 846.7105263157895, "grad_norm": 1.3815040588378906, "learning_rate": 0.0001, "loss": 0.0098, "step": 128700 }, { "epoch": 846.7763157894736, "grad_norm": 1.3650782108306885, "learning_rate": 0.0001, "loss": 0.0129, "step": 128710 }, { "epoch": 846.8421052631579, "grad_norm": 1.112280011177063, "learning_rate": 0.0001, "loss": 0.014, "step": 128720 }, { "epoch": 846.9078947368421, "grad_norm": 1.1373845338821411, "learning_rate": 0.0001, "loss": 0.0149, "step": 128730 }, { "epoch": 846.9736842105264, "grad_norm": 1.133133888244629, "learning_rate": 0.0001, "loss": 0.0133, "step": 128740 }, { "epoch": 847.0394736842105, "grad_norm": 1.4170300960540771, "learning_rate": 0.0001, "loss": 0.0119, "step": 128750 }, { "epoch": 847.1052631578947, "grad_norm": 1.4815239906311035, "learning_rate": 0.0001, "loss": 0.0152, "step": 128760 }, { "epoch": 847.171052631579, "grad_norm": 1.898679494857788, "learning_rate": 0.0001, "loss": 0.0158, "step": 128770 }, { "epoch": 847.2368421052631, "grad_norm": 1.3651937246322632, "learning_rate": 0.0001, "loss": 0.0099, "step": 128780 }, { "epoch": 847.3026315789474, "grad_norm": 1.2861056327819824, "learning_rate": 0.0001, "loss": 0.014, "step": 128790 }, { "epoch": 847.3684210526316, "grad_norm": 1.5860954523086548, "learning_rate": 0.0001, "loss": 0.0115, "step": 128800 }, { "epoch": 847.4342105263158, "grad_norm": 1.5797587633132935, "learning_rate": 0.0001, "loss": 0.0131, "step": 128810 }, { "epoch": 847.5, "grad_norm": 1.0161339044570923, "learning_rate": 0.0001, "loss": 0.0152, "step": 128820 }, { "epoch": 847.5657894736842, "grad_norm": 1.0450996160507202, "learning_rate": 0.0001, "loss": 0.0138, "step": 128830 }, { "epoch": 847.6315789473684, "grad_norm": 2.173454523086548, "learning_rate": 0.0001, "loss": 0.014, "step": 128840 }, { "epoch": 847.6973684210526, "grad_norm": 1.5346571207046509, "learning_rate": 0.0001, "loss": 0.0119, "step": 128850 }, { "epoch": 847.7631578947369, "grad_norm": 1.4090259075164795, "learning_rate": 0.0001, "loss": 0.0133, "step": 128860 }, { "epoch": 847.828947368421, "grad_norm": 1.3212833404541016, "learning_rate": 0.0001, "loss": 0.0168, "step": 128870 }, { "epoch": 847.8947368421053, "grad_norm": 1.3726396560668945, "learning_rate": 0.0001, "loss": 0.0107, "step": 128880 }, { "epoch": 847.9605263157895, "grad_norm": 1.776389241218567, "learning_rate": 0.0001, "loss": 0.0116, "step": 128890 }, { "epoch": 848.0263157894736, "grad_norm": 1.323431372642517, "learning_rate": 0.0001, "loss": 0.0122, "step": 128900 }, { "epoch": 848.0921052631579, "grad_norm": 1.3965283632278442, "learning_rate": 0.0001, "loss": 0.0143, "step": 128910 }, { "epoch": 848.1578947368421, "grad_norm": 1.7727373838424683, "learning_rate": 0.0001, "loss": 0.0113, "step": 128920 }, { "epoch": 848.2236842105264, "grad_norm": 1.466296911239624, "learning_rate": 0.0001, "loss": 0.0128, "step": 128930 }, { "epoch": 848.2894736842105, "grad_norm": 1.5678622722625732, "learning_rate": 0.0001, "loss": 0.0106, "step": 128940 }, { "epoch": 848.3552631578947, "grad_norm": 1.1464684009552002, "learning_rate": 0.0001, "loss": 0.0118, "step": 128950 }, { "epoch": 848.421052631579, "grad_norm": 1.2943122386932373, "learning_rate": 0.0001, "loss": 0.0164, "step": 128960 }, { "epoch": 848.4868421052631, "grad_norm": 1.3068835735321045, "learning_rate": 0.0001, "loss": 0.0164, "step": 128970 }, { "epoch": 848.5526315789474, "grad_norm": 0.9412446022033691, "learning_rate": 0.0001, "loss": 0.0119, "step": 128980 }, { "epoch": 848.6184210526316, "grad_norm": 1.3763787746429443, "learning_rate": 0.0001, "loss": 0.0119, "step": 128990 }, { "epoch": 848.6842105263158, "grad_norm": 1.4490243196487427, "learning_rate": 0.0001, "loss": 0.0133, "step": 129000 }, { "epoch": 848.75, "grad_norm": 1.553820013999939, "learning_rate": 0.0001, "loss": 0.0108, "step": 129010 }, { "epoch": 848.8157894736842, "grad_norm": 1.5623290538787842, "learning_rate": 0.0001, "loss": 0.0149, "step": 129020 }, { "epoch": 848.8815789473684, "grad_norm": 1.6200385093688965, "learning_rate": 0.0001, "loss": 0.0136, "step": 129030 }, { "epoch": 848.9473684210526, "grad_norm": 2.6114861965179443, "learning_rate": 0.0001, "loss": 0.0179, "step": 129040 }, { "epoch": 849.0131578947369, "grad_norm": 2.2896111011505127, "learning_rate": 0.0001, "loss": 0.0222, "step": 129050 }, { "epoch": 849.078947368421, "grad_norm": 1.9408037662506104, "learning_rate": 0.0001, "loss": 0.0139, "step": 129060 }, { "epoch": 849.1447368421053, "grad_norm": 1.635026216506958, "learning_rate": 0.0001, "loss": 0.015, "step": 129070 }, { "epoch": 849.2105263157895, "grad_norm": 1.7268263101577759, "learning_rate": 0.0001, "loss": 0.0115, "step": 129080 }, { "epoch": 849.2763157894736, "grad_norm": 1.3875731229782104, "learning_rate": 0.0001, "loss": 0.0126, "step": 129090 }, { "epoch": 849.3421052631579, "grad_norm": 1.5395654439926147, "learning_rate": 0.0001, "loss": 0.0121, "step": 129100 }, { "epoch": 849.4078947368421, "grad_norm": 1.9239784479141235, "learning_rate": 0.0001, "loss": 0.0105, "step": 129110 }, { "epoch": 849.4736842105264, "grad_norm": 1.6761680841445923, "learning_rate": 0.0001, "loss": 0.0105, "step": 129120 }, { "epoch": 849.5394736842105, "grad_norm": 1.6527289152145386, "learning_rate": 0.0001, "loss": 0.0163, "step": 129130 }, { "epoch": 849.6052631578947, "grad_norm": 1.4163426160812378, "learning_rate": 0.0001, "loss": 0.0143, "step": 129140 }, { "epoch": 849.671052631579, "grad_norm": 1.641106367111206, "learning_rate": 0.0001, "loss": 0.0116, "step": 129150 }, { "epoch": 849.7368421052631, "grad_norm": 1.3616670370101929, "learning_rate": 0.0001, "loss": 0.0147, "step": 129160 }, { "epoch": 849.8026315789474, "grad_norm": 1.4527229070663452, "learning_rate": 0.0001, "loss": 0.011, "step": 129170 }, { "epoch": 849.8684210526316, "grad_norm": 1.6242402791976929, "learning_rate": 0.0001, "loss": 0.0142, "step": 129180 }, { "epoch": 849.9342105263158, "grad_norm": 1.5288060903549194, "learning_rate": 0.0001, "loss": 0.0124, "step": 129190 }, { "epoch": 850.0, "grad_norm": 1.6721277236938477, "learning_rate": 0.0001, "loss": 0.0134, "step": 129200 }, { "epoch": 850.0657894736842, "grad_norm": 1.4394820928573608, "learning_rate": 0.0001, "loss": 0.0134, "step": 129210 }, { "epoch": 850.1315789473684, "grad_norm": 1.6595405340194702, "learning_rate": 0.0001, "loss": 0.014, "step": 129220 }, { "epoch": 850.1973684210526, "grad_norm": 1.2086169719696045, "learning_rate": 0.0001, "loss": 0.0125, "step": 129230 }, { "epoch": 850.2631578947369, "grad_norm": 1.1591092348098755, "learning_rate": 0.0001, "loss": 0.0132, "step": 129240 }, { "epoch": 850.328947368421, "grad_norm": 1.5550200939178467, "learning_rate": 0.0001, "loss": 0.0159, "step": 129250 }, { "epoch": 850.3947368421053, "grad_norm": 1.5437617301940918, "learning_rate": 0.0001, "loss": 0.0129, "step": 129260 }, { "epoch": 850.4605263157895, "grad_norm": 1.6220146417617798, "learning_rate": 0.0001, "loss": 0.01, "step": 129270 }, { "epoch": 850.5263157894736, "grad_norm": 1.8756314516067505, "learning_rate": 0.0001, "loss": 0.0149, "step": 129280 }, { "epoch": 850.5921052631579, "grad_norm": 1.6719056367874146, "learning_rate": 0.0001, "loss": 0.013, "step": 129290 }, { "epoch": 850.6578947368421, "grad_norm": 1.3474540710449219, "learning_rate": 0.0001, "loss": 0.0145, "step": 129300 }, { "epoch": 850.7236842105264, "grad_norm": 1.8067626953125, "learning_rate": 0.0001, "loss": 0.0123, "step": 129310 }, { "epoch": 850.7894736842105, "grad_norm": 1.3616658449172974, "learning_rate": 0.0001, "loss": 0.0121, "step": 129320 }, { "epoch": 850.8552631578947, "grad_norm": 1.8094897270202637, "learning_rate": 0.0001, "loss": 0.0132, "step": 129330 }, { "epoch": 850.921052631579, "grad_norm": 1.0884069204330444, "learning_rate": 0.0001, "loss": 0.0142, "step": 129340 }, { "epoch": 850.9868421052631, "grad_norm": 1.4968944787979126, "learning_rate": 0.0001, "loss": 0.0116, "step": 129350 }, { "epoch": 851.0526315789474, "grad_norm": 1.634526014328003, "learning_rate": 0.0001, "loss": 0.0139, "step": 129360 }, { "epoch": 851.1184210526316, "grad_norm": 1.3790223598480225, "learning_rate": 0.0001, "loss": 0.0139, "step": 129370 }, { "epoch": 851.1842105263158, "grad_norm": 1.3721470832824707, "learning_rate": 0.0001, "loss": 0.0153, "step": 129380 }, { "epoch": 851.25, "grad_norm": 0.9673137068748474, "learning_rate": 0.0001, "loss": 0.0103, "step": 129390 }, { "epoch": 851.3157894736842, "grad_norm": 1.4997926950454712, "learning_rate": 0.0001, "loss": 0.0171, "step": 129400 }, { "epoch": 851.3815789473684, "grad_norm": 1.3893994092941284, "learning_rate": 0.0001, "loss": 0.0116, "step": 129410 }, { "epoch": 851.4473684210526, "grad_norm": 1.2114200592041016, "learning_rate": 0.0001, "loss": 0.0123, "step": 129420 }, { "epoch": 851.5131578947369, "grad_norm": 1.6372069120407104, "learning_rate": 0.0001, "loss": 0.0108, "step": 129430 }, { "epoch": 851.578947368421, "grad_norm": 1.3591995239257812, "learning_rate": 0.0001, "loss": 0.0111, "step": 129440 }, { "epoch": 851.6447368421053, "grad_norm": 1.0732110738754272, "learning_rate": 0.0001, "loss": 0.0137, "step": 129450 }, { "epoch": 851.7105263157895, "grad_norm": 1.58357572555542, "learning_rate": 0.0001, "loss": 0.0131, "step": 129460 }, { "epoch": 851.7763157894736, "grad_norm": 0.8905077576637268, "learning_rate": 0.0001, "loss": 0.0133, "step": 129470 }, { "epoch": 851.8421052631579, "grad_norm": 1.677322268486023, "learning_rate": 0.0001, "loss": 0.0106, "step": 129480 }, { "epoch": 851.9078947368421, "grad_norm": 1.3726669549942017, "learning_rate": 0.0001, "loss": 0.017, "step": 129490 }, { "epoch": 851.9736842105264, "grad_norm": 1.7905985116958618, "learning_rate": 0.0001, "loss": 0.0135, "step": 129500 }, { "epoch": 852.0394736842105, "grad_norm": 1.7176005840301514, "learning_rate": 0.0001, "loss": 0.0125, "step": 129510 }, { "epoch": 852.1052631578947, "grad_norm": 1.4477554559707642, "learning_rate": 0.0001, "loss": 0.0111, "step": 129520 }, { "epoch": 852.171052631579, "grad_norm": 1.6084524393081665, "learning_rate": 0.0001, "loss": 0.0155, "step": 129530 }, { "epoch": 852.2368421052631, "grad_norm": 1.5124257802963257, "learning_rate": 0.0001, "loss": 0.0128, "step": 129540 }, { "epoch": 852.3026315789474, "grad_norm": 1.3306307792663574, "learning_rate": 0.0001, "loss": 0.0098, "step": 129550 }, { "epoch": 852.3684210526316, "grad_norm": 1.4141188859939575, "learning_rate": 0.0001, "loss": 0.0183, "step": 129560 }, { "epoch": 852.4342105263158, "grad_norm": 1.2165946960449219, "learning_rate": 0.0001, "loss": 0.0189, "step": 129570 }, { "epoch": 852.5, "grad_norm": 1.8171554803848267, "learning_rate": 0.0001, "loss": 0.0136, "step": 129580 }, { "epoch": 852.5657894736842, "grad_norm": 1.6328552961349487, "learning_rate": 0.0001, "loss": 0.0109, "step": 129590 }, { "epoch": 852.6315789473684, "grad_norm": 1.7113800048828125, "learning_rate": 0.0001, "loss": 0.0128, "step": 129600 }, { "epoch": 852.6973684210526, "grad_norm": 1.7208938598632812, "learning_rate": 0.0001, "loss": 0.0109, "step": 129610 }, { "epoch": 852.7631578947369, "grad_norm": 1.7529088258743286, "learning_rate": 0.0001, "loss": 0.0112, "step": 129620 }, { "epoch": 852.828947368421, "grad_norm": 1.3160340785980225, "learning_rate": 0.0001, "loss": 0.012, "step": 129630 }, { "epoch": 852.8947368421053, "grad_norm": 1.491804599761963, "learning_rate": 0.0001, "loss": 0.0126, "step": 129640 }, { "epoch": 852.9605263157895, "grad_norm": 1.0496944189071655, "learning_rate": 0.0001, "loss": 0.0119, "step": 129650 }, { "epoch": 853.0263157894736, "grad_norm": 1.2077083587646484, "learning_rate": 0.0001, "loss": 0.0127, "step": 129660 }, { "epoch": 853.0921052631579, "grad_norm": 1.2871067523956299, "learning_rate": 0.0001, "loss": 0.0136, "step": 129670 }, { "epoch": 853.1578947368421, "grad_norm": 1.1103248596191406, "learning_rate": 0.0001, "loss": 0.0129, "step": 129680 }, { "epoch": 853.2236842105264, "grad_norm": 1.5791008472442627, "learning_rate": 0.0001, "loss": 0.0115, "step": 129690 }, { "epoch": 853.2894736842105, "grad_norm": 1.55245840549469, "learning_rate": 0.0001, "loss": 0.014, "step": 129700 }, { "epoch": 853.3552631578947, "grad_norm": 1.2843526601791382, "learning_rate": 0.0001, "loss": 0.0096, "step": 129710 }, { "epoch": 853.421052631579, "grad_norm": 1.1579642295837402, "learning_rate": 0.0001, "loss": 0.0105, "step": 129720 }, { "epoch": 853.4868421052631, "grad_norm": 1.2649035453796387, "learning_rate": 0.0001, "loss": 0.015, "step": 129730 }, { "epoch": 853.5526315789474, "grad_norm": 1.2496283054351807, "learning_rate": 0.0001, "loss": 0.0142, "step": 129740 }, { "epoch": 853.6184210526316, "grad_norm": 1.5370819568634033, "learning_rate": 0.0001, "loss": 0.0128, "step": 129750 }, { "epoch": 853.6842105263158, "grad_norm": 1.8471126556396484, "learning_rate": 0.0001, "loss": 0.0166, "step": 129760 }, { "epoch": 853.75, "grad_norm": 1.1212924718856812, "learning_rate": 0.0001, "loss": 0.0158, "step": 129770 }, { "epoch": 853.8157894736842, "grad_norm": 1.7006926536560059, "learning_rate": 0.0001, "loss": 0.0135, "step": 129780 }, { "epoch": 853.8815789473684, "grad_norm": 1.6799043416976929, "learning_rate": 0.0001, "loss": 0.0159, "step": 129790 }, { "epoch": 853.9473684210526, "grad_norm": 1.1937743425369263, "learning_rate": 0.0001, "loss": 0.0125, "step": 129800 }, { "epoch": 854.0131578947369, "grad_norm": 1.3522237539291382, "learning_rate": 0.0001, "loss": 0.0116, "step": 129810 }, { "epoch": 854.078947368421, "grad_norm": 1.6762014627456665, "learning_rate": 0.0001, "loss": 0.0161, "step": 129820 }, { "epoch": 854.1447368421053, "grad_norm": 1.3619178533554077, "learning_rate": 0.0001, "loss": 0.0138, "step": 129830 }, { "epoch": 854.2105263157895, "grad_norm": 1.2571498155593872, "learning_rate": 0.0001, "loss": 0.0167, "step": 129840 }, { "epoch": 854.2763157894736, "grad_norm": 1.0626895427703857, "learning_rate": 0.0001, "loss": 0.0121, "step": 129850 }, { "epoch": 854.3421052631579, "grad_norm": 1.5948164463043213, "learning_rate": 0.0001, "loss": 0.0116, "step": 129860 }, { "epoch": 854.4078947368421, "grad_norm": 1.7085438966751099, "learning_rate": 0.0001, "loss": 0.0122, "step": 129870 }, { "epoch": 854.4736842105264, "grad_norm": 1.399874210357666, "learning_rate": 0.0001, "loss": 0.0107, "step": 129880 }, { "epoch": 854.5394736842105, "grad_norm": 1.5862646102905273, "learning_rate": 0.0001, "loss": 0.0137, "step": 129890 }, { "epoch": 854.6052631578947, "grad_norm": 1.8104294538497925, "learning_rate": 0.0001, "loss": 0.0126, "step": 129900 }, { "epoch": 854.671052631579, "grad_norm": 1.3822013139724731, "learning_rate": 0.0001, "loss": 0.0127, "step": 129910 }, { "epoch": 854.7368421052631, "grad_norm": 1.7764885425567627, "learning_rate": 0.0001, "loss": 0.0161, "step": 129920 }, { "epoch": 854.8026315789474, "grad_norm": 1.3469127416610718, "learning_rate": 0.0001, "loss": 0.0107, "step": 129930 }, { "epoch": 854.8684210526316, "grad_norm": 1.6380970478057861, "learning_rate": 0.0001, "loss": 0.013, "step": 129940 }, { "epoch": 854.9342105263158, "grad_norm": 1.8100323677062988, "learning_rate": 0.0001, "loss": 0.012, "step": 129950 }, { "epoch": 855.0, "grad_norm": 1.6590907573699951, "learning_rate": 0.0001, "loss": 0.0107, "step": 129960 }, { "epoch": 855.0657894736842, "grad_norm": 1.3243236541748047, "learning_rate": 0.0001, "loss": 0.0115, "step": 129970 }, { "epoch": 855.1315789473684, "grad_norm": 1.2228033542633057, "learning_rate": 0.0001, "loss": 0.0136, "step": 129980 }, { "epoch": 855.1973684210526, "grad_norm": 1.0824109315872192, "learning_rate": 0.0001, "loss": 0.0113, "step": 129990 }, { "epoch": 855.2631578947369, "grad_norm": 1.342780351638794, "learning_rate": 0.0001, "loss": 0.0098, "step": 130000 }, { "epoch": 855.328947368421, "grad_norm": 1.5094133615493774, "learning_rate": 0.0001, "loss": 0.0152, "step": 130010 }, { "epoch": 855.3947368421053, "grad_norm": 1.6514734029769897, "learning_rate": 0.0001, "loss": 0.013, "step": 130020 }, { "epoch": 855.4605263157895, "grad_norm": 1.2201961278915405, "learning_rate": 0.0001, "loss": 0.0118, "step": 130030 }, { "epoch": 855.5263157894736, "grad_norm": 1.395311713218689, "learning_rate": 0.0001, "loss": 0.0145, "step": 130040 }, { "epoch": 855.5921052631579, "grad_norm": 1.3920435905456543, "learning_rate": 0.0001, "loss": 0.0145, "step": 130050 }, { "epoch": 855.6578947368421, "grad_norm": 1.4616286754608154, "learning_rate": 0.0001, "loss": 0.0119, "step": 130060 }, { "epoch": 855.7236842105264, "grad_norm": 1.841299057006836, "learning_rate": 0.0001, "loss": 0.0103, "step": 130070 }, { "epoch": 855.7894736842105, "grad_norm": 1.5075010061264038, "learning_rate": 0.0001, "loss": 0.0171, "step": 130080 }, { "epoch": 855.8552631578947, "grad_norm": 1.7803013324737549, "learning_rate": 0.0001, "loss": 0.0164, "step": 130090 }, { "epoch": 855.921052631579, "grad_norm": 1.9109123945236206, "learning_rate": 0.0001, "loss": 0.0134, "step": 130100 }, { "epoch": 855.9868421052631, "grad_norm": 1.2300550937652588, "learning_rate": 0.0001, "loss": 0.0135, "step": 130110 }, { "epoch": 856.0526315789474, "grad_norm": 1.398758053779602, "learning_rate": 0.0001, "loss": 0.0097, "step": 130120 }, { "epoch": 856.1184210526316, "grad_norm": 1.7074134349822998, "learning_rate": 0.0001, "loss": 0.0115, "step": 130130 }, { "epoch": 856.1842105263158, "grad_norm": 1.4552608728408813, "learning_rate": 0.0001, "loss": 0.0119, "step": 130140 }, { "epoch": 856.25, "grad_norm": 1.565284252166748, "learning_rate": 0.0001, "loss": 0.0096, "step": 130150 }, { "epoch": 856.3157894736842, "grad_norm": 1.3789268732070923, "learning_rate": 0.0001, "loss": 0.017, "step": 130160 }, { "epoch": 856.3815789473684, "grad_norm": 1.1609442234039307, "learning_rate": 0.0001, "loss": 0.0135, "step": 130170 }, { "epoch": 856.4473684210526, "grad_norm": 1.494024395942688, "learning_rate": 0.0001, "loss": 0.016, "step": 130180 }, { "epoch": 856.5131578947369, "grad_norm": 1.2059389352798462, "learning_rate": 0.0001, "loss": 0.0164, "step": 130190 }, { "epoch": 856.578947368421, "grad_norm": 1.5486243963241577, "learning_rate": 0.0001, "loss": 0.0146, "step": 130200 }, { "epoch": 856.6447368421053, "grad_norm": 1.2235527038574219, "learning_rate": 0.0001, "loss": 0.0101, "step": 130210 }, { "epoch": 856.7105263157895, "grad_norm": 1.2480541467666626, "learning_rate": 0.0001, "loss": 0.0174, "step": 130220 }, { "epoch": 856.7763157894736, "grad_norm": 1.5375417470932007, "learning_rate": 0.0001, "loss": 0.0109, "step": 130230 }, { "epoch": 856.8421052631579, "grad_norm": 1.1191130876541138, "learning_rate": 0.0001, "loss": 0.0119, "step": 130240 }, { "epoch": 856.9078947368421, "grad_norm": 1.062938928604126, "learning_rate": 0.0001, "loss": 0.0121, "step": 130250 }, { "epoch": 856.9736842105264, "grad_norm": 1.1632410287857056, "learning_rate": 0.0001, "loss": 0.0125, "step": 130260 }, { "epoch": 857.0394736842105, "grad_norm": 1.403309941291809, "learning_rate": 0.0001, "loss": 0.0144, "step": 130270 }, { "epoch": 857.1052631578947, "grad_norm": 1.6076477766036987, "learning_rate": 0.0001, "loss": 0.013, "step": 130280 }, { "epoch": 857.171052631579, "grad_norm": 1.357860803604126, "learning_rate": 0.0001, "loss": 0.0161, "step": 130290 }, { "epoch": 857.2368421052631, "grad_norm": 1.32163405418396, "learning_rate": 0.0001, "loss": 0.0119, "step": 130300 }, { "epoch": 857.3026315789474, "grad_norm": 1.258955717086792, "learning_rate": 0.0001, "loss": 0.011, "step": 130310 }, { "epoch": 857.3684210526316, "grad_norm": 1.433882474899292, "learning_rate": 0.0001, "loss": 0.0096, "step": 130320 }, { "epoch": 857.4342105263158, "grad_norm": 1.5774390697479248, "learning_rate": 0.0001, "loss": 0.0128, "step": 130330 }, { "epoch": 857.5, "grad_norm": 1.6434556245803833, "learning_rate": 0.0001, "loss": 0.0121, "step": 130340 }, { "epoch": 857.5657894736842, "grad_norm": 1.6701419353485107, "learning_rate": 0.0001, "loss": 0.0143, "step": 130350 }, { "epoch": 857.6315789473684, "grad_norm": 1.7813105583190918, "learning_rate": 0.0001, "loss": 0.0135, "step": 130360 }, { "epoch": 857.6973684210526, "grad_norm": 1.7335299253463745, "learning_rate": 0.0001, "loss": 0.0128, "step": 130370 }, { "epoch": 857.7631578947369, "grad_norm": 1.261035442352295, "learning_rate": 0.0001, "loss": 0.0169, "step": 130380 }, { "epoch": 857.828947368421, "grad_norm": 1.5416076183319092, "learning_rate": 0.0001, "loss": 0.0106, "step": 130390 }, { "epoch": 857.8947368421053, "grad_norm": 1.2641339302062988, "learning_rate": 0.0001, "loss": 0.0114, "step": 130400 }, { "epoch": 857.9605263157895, "grad_norm": 1.3058863878250122, "learning_rate": 0.0001, "loss": 0.0139, "step": 130410 }, { "epoch": 858.0263157894736, "grad_norm": 0.9381412863731384, "learning_rate": 0.0001, "loss": 0.017, "step": 130420 }, { "epoch": 858.0921052631579, "grad_norm": 1.345221996307373, "learning_rate": 0.0001, "loss": 0.0115, "step": 130430 }, { "epoch": 858.1578947368421, "grad_norm": 0.9904646873474121, "learning_rate": 0.0001, "loss": 0.0115, "step": 130440 }, { "epoch": 858.2236842105264, "grad_norm": 1.2934423685073853, "learning_rate": 0.0001, "loss": 0.0127, "step": 130450 }, { "epoch": 858.2894736842105, "grad_norm": 1.7663406133651733, "learning_rate": 0.0001, "loss": 0.0141, "step": 130460 }, { "epoch": 858.3552631578947, "grad_norm": 1.8176710605621338, "learning_rate": 0.0001, "loss": 0.0131, "step": 130470 }, { "epoch": 858.421052631579, "grad_norm": 1.7278478145599365, "learning_rate": 0.0001, "loss": 0.0131, "step": 130480 }, { "epoch": 858.4868421052631, "grad_norm": 1.3529034852981567, "learning_rate": 0.0001, "loss": 0.0111, "step": 130490 }, { "epoch": 858.5526315789474, "grad_norm": 1.4997241497039795, "learning_rate": 0.0001, "loss": 0.0155, "step": 130500 }, { "epoch": 858.6184210526316, "grad_norm": 1.337589144706726, "learning_rate": 0.0001, "loss": 0.0141, "step": 130510 }, { "epoch": 858.6842105263158, "grad_norm": 1.8426896333694458, "learning_rate": 0.0001, "loss": 0.0109, "step": 130520 }, { "epoch": 858.75, "grad_norm": 1.7206863164901733, "learning_rate": 0.0001, "loss": 0.0137, "step": 130530 }, { "epoch": 858.8157894736842, "grad_norm": 1.586830496788025, "learning_rate": 0.0001, "loss": 0.0126, "step": 130540 }, { "epoch": 858.8815789473684, "grad_norm": 1.8624261617660522, "learning_rate": 0.0001, "loss": 0.016, "step": 130550 }, { "epoch": 858.9473684210526, "grad_norm": 2.0432708263397217, "learning_rate": 0.0001, "loss": 0.0093, "step": 130560 }, { "epoch": 859.0131578947369, "grad_norm": 1.454586148262024, "learning_rate": 0.0001, "loss": 0.0154, "step": 130570 }, { "epoch": 859.078947368421, "grad_norm": 1.2513378858566284, "learning_rate": 0.0001, "loss": 0.017, "step": 130580 }, { "epoch": 859.1447368421053, "grad_norm": 1.7244796752929688, "learning_rate": 0.0001, "loss": 0.0166, "step": 130590 }, { "epoch": 859.2105263157895, "grad_norm": 1.7670952081680298, "learning_rate": 0.0001, "loss": 0.0107, "step": 130600 }, { "epoch": 859.2763157894736, "grad_norm": 1.982169508934021, "learning_rate": 0.0001, "loss": 0.0107, "step": 130610 }, { "epoch": 859.3421052631579, "grad_norm": 1.740512728691101, "learning_rate": 0.0001, "loss": 0.013, "step": 130620 }, { "epoch": 859.4078947368421, "grad_norm": 1.1280266046524048, "learning_rate": 0.0001, "loss": 0.0175, "step": 130630 }, { "epoch": 859.4736842105264, "grad_norm": 1.735882043838501, "learning_rate": 0.0001, "loss": 0.0106, "step": 130640 }, { "epoch": 859.5394736842105, "grad_norm": 1.6083797216415405, "learning_rate": 0.0001, "loss": 0.0115, "step": 130650 }, { "epoch": 859.6052631578947, "grad_norm": 1.0411689281463623, "learning_rate": 0.0001, "loss": 0.0094, "step": 130660 }, { "epoch": 859.671052631579, "grad_norm": 1.127121090888977, "learning_rate": 0.0001, "loss": 0.0132, "step": 130670 }, { "epoch": 859.7368421052631, "grad_norm": 1.3162654638290405, "learning_rate": 0.0001, "loss": 0.0137, "step": 130680 }, { "epoch": 859.8026315789474, "grad_norm": 1.8398581743240356, "learning_rate": 0.0001, "loss": 0.011, "step": 130690 }, { "epoch": 859.8684210526316, "grad_norm": 2.0682551860809326, "learning_rate": 0.0001, "loss": 0.0169, "step": 130700 }, { "epoch": 859.9342105263158, "grad_norm": 1.5368238687515259, "learning_rate": 0.0001, "loss": 0.0112, "step": 130710 }, { "epoch": 860.0, "grad_norm": 1.904638409614563, "learning_rate": 0.0001, "loss": 0.0093, "step": 130720 }, { "epoch": 860.0657894736842, "grad_norm": 1.394534945487976, "learning_rate": 0.0001, "loss": 0.0112, "step": 130730 }, { "epoch": 860.1315789473684, "grad_norm": 1.5671091079711914, "learning_rate": 0.0001, "loss": 0.0143, "step": 130740 }, { "epoch": 860.1973684210526, "grad_norm": 1.228511095046997, "learning_rate": 0.0001, "loss": 0.0206, "step": 130750 }, { "epoch": 860.2631578947369, "grad_norm": 1.14475679397583, "learning_rate": 0.0001, "loss": 0.0109, "step": 130760 }, { "epoch": 860.328947368421, "grad_norm": 1.692384123802185, "learning_rate": 0.0001, "loss": 0.0103, "step": 130770 }, { "epoch": 860.3947368421053, "grad_norm": 0.9860870242118835, "learning_rate": 0.0001, "loss": 0.0165, "step": 130780 }, { "epoch": 860.4605263157895, "grad_norm": 1.6056920289993286, "learning_rate": 0.0001, "loss": 0.0146, "step": 130790 }, { "epoch": 860.5263157894736, "grad_norm": 2.136502981185913, "learning_rate": 0.0001, "loss": 0.013, "step": 130800 }, { "epoch": 860.5921052631579, "grad_norm": 1.7132339477539062, "learning_rate": 0.0001, "loss": 0.009, "step": 130810 }, { "epoch": 860.6578947368421, "grad_norm": 1.4545289278030396, "learning_rate": 0.0001, "loss": 0.0129, "step": 130820 }, { "epoch": 860.7236842105264, "grad_norm": 1.5809324979782104, "learning_rate": 0.0001, "loss": 0.0104, "step": 130830 }, { "epoch": 860.7894736842105, "grad_norm": 1.3711752891540527, "learning_rate": 0.0001, "loss": 0.0122, "step": 130840 }, { "epoch": 860.8552631578947, "grad_norm": 1.4891338348388672, "learning_rate": 0.0001, "loss": 0.0132, "step": 130850 }, { "epoch": 860.921052631579, "grad_norm": 1.4959006309509277, "learning_rate": 0.0001, "loss": 0.0126, "step": 130860 }, { "epoch": 860.9868421052631, "grad_norm": 1.6987990140914917, "learning_rate": 0.0001, "loss": 0.0146, "step": 130870 }, { "epoch": 861.0526315789474, "grad_norm": 1.3916124105453491, "learning_rate": 0.0001, "loss": 0.0117, "step": 130880 }, { "epoch": 861.1184210526316, "grad_norm": 1.0967001914978027, "learning_rate": 0.0001, "loss": 0.0165, "step": 130890 }, { "epoch": 861.1842105263158, "grad_norm": 1.8120574951171875, "learning_rate": 0.0001, "loss": 0.0127, "step": 130900 }, { "epoch": 861.25, "grad_norm": 1.3881852626800537, "learning_rate": 0.0001, "loss": 0.0118, "step": 130910 }, { "epoch": 861.3157894736842, "grad_norm": 1.7606143951416016, "learning_rate": 0.0001, "loss": 0.0127, "step": 130920 }, { "epoch": 861.3815789473684, "grad_norm": 1.4092494249343872, "learning_rate": 0.0001, "loss": 0.0133, "step": 130930 }, { "epoch": 861.4473684210526, "grad_norm": 1.7227661609649658, "learning_rate": 0.0001, "loss": 0.0123, "step": 130940 }, { "epoch": 861.5131578947369, "grad_norm": 1.4141943454742432, "learning_rate": 0.0001, "loss": 0.012, "step": 130950 }, { "epoch": 861.578947368421, "grad_norm": 1.1168339252471924, "learning_rate": 0.0001, "loss": 0.0145, "step": 130960 }, { "epoch": 861.6447368421053, "grad_norm": 1.6237725019454956, "learning_rate": 0.0001, "loss": 0.0124, "step": 130970 }, { "epoch": 861.7105263157895, "grad_norm": 1.4182192087173462, "learning_rate": 0.0001, "loss": 0.0145, "step": 130980 }, { "epoch": 861.7763157894736, "grad_norm": 1.7481285333633423, "learning_rate": 0.0001, "loss": 0.0111, "step": 130990 }, { "epoch": 861.8421052631579, "grad_norm": 1.5828042030334473, "learning_rate": 0.0001, "loss": 0.0168, "step": 131000 }, { "epoch": 861.9078947368421, "grad_norm": 1.402031660079956, "learning_rate": 0.0001, "loss": 0.0123, "step": 131010 }, { "epoch": 861.9736842105264, "grad_norm": 1.0893410444259644, "learning_rate": 0.0001, "loss": 0.0162, "step": 131020 }, { "epoch": 862.0394736842105, "grad_norm": 1.4051169157028198, "learning_rate": 0.0001, "loss": 0.0112, "step": 131030 }, { "epoch": 862.1052631578947, "grad_norm": 1.2136852741241455, "learning_rate": 0.0001, "loss": 0.0138, "step": 131040 }, { "epoch": 862.171052631579, "grad_norm": 1.2625224590301514, "learning_rate": 0.0001, "loss": 0.0129, "step": 131050 }, { "epoch": 862.2368421052631, "grad_norm": 1.383988380432129, "learning_rate": 0.0001, "loss": 0.0148, "step": 131060 }, { "epoch": 862.3026315789474, "grad_norm": 1.3171195983886719, "learning_rate": 0.0001, "loss": 0.0141, "step": 131070 }, { "epoch": 862.3684210526316, "grad_norm": 1.6207973957061768, "learning_rate": 0.0001, "loss": 0.0149, "step": 131080 }, { "epoch": 862.4342105263158, "grad_norm": 1.305612325668335, "learning_rate": 0.0001, "loss": 0.0161, "step": 131090 }, { "epoch": 862.5, "grad_norm": 1.2564427852630615, "learning_rate": 0.0001, "loss": 0.011, "step": 131100 }, { "epoch": 862.5657894736842, "grad_norm": 1.4304280281066895, "learning_rate": 0.0001, "loss": 0.0132, "step": 131110 }, { "epoch": 862.6315789473684, "grad_norm": 1.2962216138839722, "learning_rate": 0.0001, "loss": 0.0145, "step": 131120 }, { "epoch": 862.6973684210526, "grad_norm": 1.5545456409454346, "learning_rate": 0.0001, "loss": 0.0114, "step": 131130 }, { "epoch": 862.7631578947369, "grad_norm": 1.240250825881958, "learning_rate": 0.0001, "loss": 0.0132, "step": 131140 }, { "epoch": 862.828947368421, "grad_norm": 1.9742498397827148, "learning_rate": 0.0001, "loss": 0.0135, "step": 131150 }, { "epoch": 862.8947368421053, "grad_norm": 1.2747457027435303, "learning_rate": 0.0001, "loss": 0.0112, "step": 131160 }, { "epoch": 862.9605263157895, "grad_norm": 1.7723627090454102, "learning_rate": 0.0001, "loss": 0.0123, "step": 131170 }, { "epoch": 863.0263157894736, "grad_norm": 1.0112236738204956, "learning_rate": 0.0001, "loss": 0.017, "step": 131180 }, { "epoch": 863.0921052631579, "grad_norm": 1.311487078666687, "learning_rate": 0.0001, "loss": 0.0126, "step": 131190 }, { "epoch": 863.1578947368421, "grad_norm": 1.426649570465088, "learning_rate": 0.0001, "loss": 0.0139, "step": 131200 }, { "epoch": 863.2236842105264, "grad_norm": 1.5752052068710327, "learning_rate": 0.0001, "loss": 0.0132, "step": 131210 }, { "epoch": 863.2894736842105, "grad_norm": 1.3083157539367676, "learning_rate": 0.0001, "loss": 0.0147, "step": 131220 }, { "epoch": 863.3552631578947, "grad_norm": 1.709531545639038, "learning_rate": 0.0001, "loss": 0.0117, "step": 131230 }, { "epoch": 863.421052631579, "grad_norm": 1.3517330884933472, "learning_rate": 0.0001, "loss": 0.0113, "step": 131240 }, { "epoch": 863.4868421052631, "grad_norm": 1.253702163696289, "learning_rate": 0.0001, "loss": 0.0162, "step": 131250 }, { "epoch": 863.5526315789474, "grad_norm": 1.4549962282180786, "learning_rate": 0.0001, "loss": 0.0186, "step": 131260 }, { "epoch": 863.6184210526316, "grad_norm": 1.3761014938354492, "learning_rate": 0.0001, "loss": 0.0116, "step": 131270 }, { "epoch": 863.6842105263158, "grad_norm": 1.588075876235962, "learning_rate": 0.0001, "loss": 0.0128, "step": 131280 }, { "epoch": 863.75, "grad_norm": 1.822096347808838, "learning_rate": 0.0001, "loss": 0.0163, "step": 131290 }, { "epoch": 863.8157894736842, "grad_norm": 1.624538779258728, "learning_rate": 0.0001, "loss": 0.0177, "step": 131300 }, { "epoch": 863.8815789473684, "grad_norm": 1.7323607206344604, "learning_rate": 0.0001, "loss": 0.0103, "step": 131310 }, { "epoch": 863.9473684210526, "grad_norm": 1.6020026206970215, "learning_rate": 0.0001, "loss": 0.0095, "step": 131320 }, { "epoch": 864.0131578947369, "grad_norm": 1.1383460760116577, "learning_rate": 0.0001, "loss": 0.0132, "step": 131330 }, { "epoch": 864.078947368421, "grad_norm": 2.5448296070098877, "learning_rate": 0.0001, "loss": 0.0168, "step": 131340 }, { "epoch": 864.1447368421053, "grad_norm": 2.185842275619507, "learning_rate": 0.0001, "loss": 0.0159, "step": 131350 }, { "epoch": 864.2105263157895, "grad_norm": 1.6187869310379028, "learning_rate": 0.0001, "loss": 0.0124, "step": 131360 }, { "epoch": 864.2763157894736, "grad_norm": 1.3580322265625, "learning_rate": 0.0001, "loss": 0.0101, "step": 131370 }, { "epoch": 864.3421052631579, "grad_norm": 1.558475136756897, "learning_rate": 0.0001, "loss": 0.0129, "step": 131380 }, { "epoch": 864.4078947368421, "grad_norm": 1.419215440750122, "learning_rate": 0.0001, "loss": 0.0162, "step": 131390 }, { "epoch": 864.4736842105264, "grad_norm": 1.4154391288757324, "learning_rate": 0.0001, "loss": 0.0151, "step": 131400 }, { "epoch": 864.5394736842105, "grad_norm": 1.2424397468566895, "learning_rate": 0.0001, "loss": 0.0158, "step": 131410 }, { "epoch": 864.6052631578947, "grad_norm": 1.784454584121704, "learning_rate": 0.0001, "loss": 0.0162, "step": 131420 }, { "epoch": 864.671052631579, "grad_norm": 1.564079761505127, "learning_rate": 0.0001, "loss": 0.012, "step": 131430 }, { "epoch": 864.7368421052631, "grad_norm": 1.576927900314331, "learning_rate": 0.0001, "loss": 0.0099, "step": 131440 }, { "epoch": 864.8026315789474, "grad_norm": 1.5663518905639648, "learning_rate": 0.0001, "loss": 0.0128, "step": 131450 }, { "epoch": 864.8684210526316, "grad_norm": 1.9191986322402954, "learning_rate": 0.0001, "loss": 0.0138, "step": 131460 }, { "epoch": 864.9342105263158, "grad_norm": 1.3801186084747314, "learning_rate": 0.0001, "loss": 0.0128, "step": 131470 }, { "epoch": 865.0, "grad_norm": 1.534616470336914, "learning_rate": 0.0001, "loss": 0.0121, "step": 131480 }, { "epoch": 865.0657894736842, "grad_norm": 0.962637186050415, "learning_rate": 0.0001, "loss": 0.0174, "step": 131490 }, { "epoch": 865.1315789473684, "grad_norm": 1.2698174715042114, "learning_rate": 0.0001, "loss": 0.0175, "step": 131500 }, { "epoch": 865.1973684210526, "grad_norm": 1.6453262567520142, "learning_rate": 0.0001, "loss": 0.0128, "step": 131510 }, { "epoch": 865.2631578947369, "grad_norm": 2.2283096313476562, "learning_rate": 0.0001, "loss": 0.0123, "step": 131520 }, { "epoch": 865.328947368421, "grad_norm": 1.894195318222046, "learning_rate": 0.0001, "loss": 0.0149, "step": 131530 }, { "epoch": 865.3947368421053, "grad_norm": 1.4666932821273804, "learning_rate": 0.0001, "loss": 0.0108, "step": 131540 }, { "epoch": 865.4605263157895, "grad_norm": 1.3911739587783813, "learning_rate": 0.0001, "loss": 0.0097, "step": 131550 }, { "epoch": 865.5263157894736, "grad_norm": 1.3404499292373657, "learning_rate": 0.0001, "loss": 0.011, "step": 131560 }, { "epoch": 865.5921052631579, "grad_norm": 1.0989530086517334, "learning_rate": 0.0001, "loss": 0.0115, "step": 131570 }, { "epoch": 865.6578947368421, "grad_norm": 1.5854202508926392, "learning_rate": 0.0001, "loss": 0.013, "step": 131580 }, { "epoch": 865.7236842105264, "grad_norm": 1.41796875, "learning_rate": 0.0001, "loss": 0.0144, "step": 131590 }, { "epoch": 865.7894736842105, "grad_norm": 1.4169223308563232, "learning_rate": 0.0001, "loss": 0.0186, "step": 131600 }, { "epoch": 865.8552631578947, "grad_norm": 1.515154480934143, "learning_rate": 0.0001, "loss": 0.0108, "step": 131610 }, { "epoch": 865.921052631579, "grad_norm": 1.5546997785568237, "learning_rate": 0.0001, "loss": 0.0138, "step": 131620 }, { "epoch": 865.9868421052631, "grad_norm": 1.6676524877548218, "learning_rate": 0.0001, "loss": 0.0127, "step": 131630 }, { "epoch": 866.0526315789474, "grad_norm": 2.0053460597991943, "learning_rate": 0.0001, "loss": 0.0143, "step": 131640 }, { "epoch": 866.1184210526316, "grad_norm": 1.6806696653366089, "learning_rate": 0.0001, "loss": 0.0119, "step": 131650 }, { "epoch": 866.1842105263158, "grad_norm": 1.584430456161499, "learning_rate": 0.0001, "loss": 0.0142, "step": 131660 }, { "epoch": 866.25, "grad_norm": 1.5913102626800537, "learning_rate": 0.0001, "loss": 0.015, "step": 131670 }, { "epoch": 866.3157894736842, "grad_norm": 0.7999846935272217, "learning_rate": 0.0001, "loss": 0.0117, "step": 131680 }, { "epoch": 866.3815789473684, "grad_norm": 0.8662315011024475, "learning_rate": 0.0001, "loss": 0.0126, "step": 131690 }, { "epoch": 866.4473684210526, "grad_norm": 1.5858615636825562, "learning_rate": 0.0001, "loss": 0.0106, "step": 131700 }, { "epoch": 866.5131578947369, "grad_norm": 1.4615442752838135, "learning_rate": 0.0001, "loss": 0.0123, "step": 131710 }, { "epoch": 866.578947368421, "grad_norm": 1.316768765449524, "learning_rate": 0.0001, "loss": 0.0157, "step": 131720 }, { "epoch": 866.6447368421053, "grad_norm": 1.5457834005355835, "learning_rate": 0.0001, "loss": 0.0143, "step": 131730 }, { "epoch": 866.7105263157895, "grad_norm": 1.5527210235595703, "learning_rate": 0.0001, "loss": 0.0122, "step": 131740 }, { "epoch": 866.7763157894736, "grad_norm": 1.3923189640045166, "learning_rate": 0.0001, "loss": 0.0124, "step": 131750 }, { "epoch": 866.8421052631579, "grad_norm": 1.865212082862854, "learning_rate": 0.0001, "loss": 0.0126, "step": 131760 }, { "epoch": 866.9078947368421, "grad_norm": 1.4684571027755737, "learning_rate": 0.0001, "loss": 0.0129, "step": 131770 }, { "epoch": 866.9736842105264, "grad_norm": 1.6491864919662476, "learning_rate": 0.0001, "loss": 0.0133, "step": 131780 }, { "epoch": 867.0394736842105, "grad_norm": 1.2256399393081665, "learning_rate": 0.0001, "loss": 0.0152, "step": 131790 }, { "epoch": 867.1052631578947, "grad_norm": 1.3438485860824585, "learning_rate": 0.0001, "loss": 0.0139, "step": 131800 }, { "epoch": 867.171052631579, "grad_norm": 1.2769560813903809, "learning_rate": 0.0001, "loss": 0.0102, "step": 131810 }, { "epoch": 867.2368421052631, "grad_norm": 1.6536349058151245, "learning_rate": 0.0001, "loss": 0.0096, "step": 131820 }, { "epoch": 867.3026315789474, "grad_norm": 1.5862033367156982, "learning_rate": 0.0001, "loss": 0.0113, "step": 131830 }, { "epoch": 867.3684210526316, "grad_norm": 1.2901251316070557, "learning_rate": 0.0001, "loss": 0.0194, "step": 131840 }, { "epoch": 867.4342105263158, "grad_norm": 1.8146846294403076, "learning_rate": 0.0001, "loss": 0.0131, "step": 131850 }, { "epoch": 867.5, "grad_norm": 1.386218547821045, "learning_rate": 0.0001, "loss": 0.0118, "step": 131860 }, { "epoch": 867.5657894736842, "grad_norm": 1.3549590110778809, "learning_rate": 0.0001, "loss": 0.015, "step": 131870 }, { "epoch": 867.6315789473684, "grad_norm": 1.7878926992416382, "learning_rate": 0.0001, "loss": 0.012, "step": 131880 }, { "epoch": 867.6973684210526, "grad_norm": 1.3846572637557983, "learning_rate": 0.0001, "loss": 0.0139, "step": 131890 }, { "epoch": 867.7631578947369, "grad_norm": 1.1874326467514038, "learning_rate": 0.0001, "loss": 0.0139, "step": 131900 }, { "epoch": 867.828947368421, "grad_norm": 1.5255733728408813, "learning_rate": 0.0001, "loss": 0.0115, "step": 131910 }, { "epoch": 867.8947368421053, "grad_norm": 1.7286053895950317, "learning_rate": 0.0001, "loss": 0.0135, "step": 131920 }, { "epoch": 867.9605263157895, "grad_norm": 1.5257943868637085, "learning_rate": 0.0001, "loss": 0.0121, "step": 131930 }, { "epoch": 868.0263157894736, "grad_norm": 1.4636949300765991, "learning_rate": 0.0001, "loss": 0.012, "step": 131940 }, { "epoch": 868.0921052631579, "grad_norm": 1.3998587131500244, "learning_rate": 0.0001, "loss": 0.0137, "step": 131950 }, { "epoch": 868.1578947368421, "grad_norm": 1.0980331897735596, "learning_rate": 0.0001, "loss": 0.0139, "step": 131960 }, { "epoch": 868.2236842105264, "grad_norm": 1.8663843870162964, "learning_rate": 0.0001, "loss": 0.0152, "step": 131970 }, { "epoch": 868.2894736842105, "grad_norm": 1.0835838317871094, "learning_rate": 0.0001, "loss": 0.0138, "step": 131980 }, { "epoch": 868.3552631578947, "grad_norm": 1.0250530242919922, "learning_rate": 0.0001, "loss": 0.0157, "step": 131990 }, { "epoch": 868.421052631579, "grad_norm": 1.775864601135254, "learning_rate": 0.0001, "loss": 0.0137, "step": 132000 }, { "epoch": 868.4868421052631, "grad_norm": 1.7754117250442505, "learning_rate": 0.0001, "loss": 0.0121, "step": 132010 }, { "epoch": 868.5526315789474, "grad_norm": 1.645778775215149, "learning_rate": 0.0001, "loss": 0.0096, "step": 132020 }, { "epoch": 868.6184210526316, "grad_norm": 1.649285078048706, "learning_rate": 0.0001, "loss": 0.0136, "step": 132030 }, { "epoch": 868.6842105263158, "grad_norm": 1.2169857025146484, "learning_rate": 0.0001, "loss": 0.0115, "step": 132040 }, { "epoch": 868.75, "grad_norm": 1.5805656909942627, "learning_rate": 0.0001, "loss": 0.0154, "step": 132050 }, { "epoch": 868.8157894736842, "grad_norm": 1.7621301412582397, "learning_rate": 0.0001, "loss": 0.0143, "step": 132060 }, { "epoch": 868.8815789473684, "grad_norm": 1.3409234285354614, "learning_rate": 0.0001, "loss": 0.0133, "step": 132070 }, { "epoch": 868.9473684210526, "grad_norm": 1.1308876276016235, "learning_rate": 0.0001, "loss": 0.014, "step": 132080 }, { "epoch": 869.0131578947369, "grad_norm": 1.7929314374923706, "learning_rate": 0.0001, "loss": 0.0109, "step": 132090 }, { "epoch": 869.078947368421, "grad_norm": 1.560714602470398, "learning_rate": 0.0001, "loss": 0.0153, "step": 132100 }, { "epoch": 869.1447368421053, "grad_norm": 1.5291681289672852, "learning_rate": 0.0001, "loss": 0.0128, "step": 132110 }, { "epoch": 869.2105263157895, "grad_norm": 1.3231083154678345, "learning_rate": 0.0001, "loss": 0.0148, "step": 132120 }, { "epoch": 869.2763157894736, "grad_norm": 1.3465009927749634, "learning_rate": 0.0001, "loss": 0.012, "step": 132130 }, { "epoch": 869.3421052631579, "grad_norm": 1.2104580402374268, "learning_rate": 0.0001, "loss": 0.0144, "step": 132140 }, { "epoch": 869.4078947368421, "grad_norm": 1.402390718460083, "learning_rate": 0.0001, "loss": 0.0164, "step": 132150 }, { "epoch": 869.4736842105264, "grad_norm": 1.3461742401123047, "learning_rate": 0.0001, "loss": 0.0153, "step": 132160 }, { "epoch": 869.5394736842105, "grad_norm": 1.4518489837646484, "learning_rate": 0.0001, "loss": 0.0137, "step": 132170 }, { "epoch": 869.6052631578947, "grad_norm": 1.4718416929244995, "learning_rate": 0.0001, "loss": 0.0119, "step": 132180 }, { "epoch": 869.671052631579, "grad_norm": 1.8096469640731812, "learning_rate": 0.0001, "loss": 0.0102, "step": 132190 }, { "epoch": 869.7368421052631, "grad_norm": 1.3966784477233887, "learning_rate": 0.0001, "loss": 0.0115, "step": 132200 }, { "epoch": 869.8026315789474, "grad_norm": 1.4250773191452026, "learning_rate": 0.0001, "loss": 0.0132, "step": 132210 }, { "epoch": 869.8684210526316, "grad_norm": 1.331749439239502, "learning_rate": 0.0001, "loss": 0.0093, "step": 132220 }, { "epoch": 869.9342105263158, "grad_norm": 1.5956215858459473, "learning_rate": 0.0001, "loss": 0.015, "step": 132230 }, { "epoch": 870.0, "grad_norm": 1.7273218631744385, "learning_rate": 0.0001, "loss": 0.0132, "step": 132240 }, { "epoch": 870.0657894736842, "grad_norm": 1.4539084434509277, "learning_rate": 0.0001, "loss": 0.013, "step": 132250 }, { "epoch": 870.1315789473684, "grad_norm": 1.6849606037139893, "learning_rate": 0.0001, "loss": 0.0118, "step": 132260 }, { "epoch": 870.1973684210526, "grad_norm": 1.4444307088851929, "learning_rate": 0.0001, "loss": 0.01, "step": 132270 }, { "epoch": 870.2631578947369, "grad_norm": 1.6524721384048462, "learning_rate": 0.0001, "loss": 0.0128, "step": 132280 }, { "epoch": 870.328947368421, "grad_norm": 1.5124589204788208, "learning_rate": 0.0001, "loss": 0.0168, "step": 132290 }, { "epoch": 870.3947368421053, "grad_norm": 1.3690260648727417, "learning_rate": 0.0001, "loss": 0.0135, "step": 132300 }, { "epoch": 870.4605263157895, "grad_norm": 0.9702151417732239, "learning_rate": 0.0001, "loss": 0.0135, "step": 132310 }, { "epoch": 870.5263157894736, "grad_norm": 1.4393999576568604, "learning_rate": 0.0001, "loss": 0.016, "step": 132320 }, { "epoch": 870.5921052631579, "grad_norm": 1.5601997375488281, "learning_rate": 0.0001, "loss": 0.0114, "step": 132330 }, { "epoch": 870.6578947368421, "grad_norm": 1.1914149522781372, "learning_rate": 0.0001, "loss": 0.01, "step": 132340 }, { "epoch": 870.7236842105264, "grad_norm": 1.5718705654144287, "learning_rate": 0.0001, "loss": 0.0121, "step": 132350 }, { "epoch": 870.7894736842105, "grad_norm": 1.5605227947235107, "learning_rate": 0.0001, "loss": 0.0147, "step": 132360 }, { "epoch": 870.8552631578947, "grad_norm": 1.4567852020263672, "learning_rate": 0.0001, "loss": 0.0151, "step": 132370 }, { "epoch": 870.921052631579, "grad_norm": 1.2690999507904053, "learning_rate": 0.0001, "loss": 0.0144, "step": 132380 }, { "epoch": 870.9868421052631, "grad_norm": 1.457142949104309, "learning_rate": 0.0001, "loss": 0.0123, "step": 132390 }, { "epoch": 871.0526315789474, "grad_norm": 1.1464070081710815, "learning_rate": 0.0001, "loss": 0.0184, "step": 132400 }, { "epoch": 871.1184210526316, "grad_norm": 1.532596230506897, "learning_rate": 0.0001, "loss": 0.0114, "step": 132410 }, { "epoch": 871.1842105263158, "grad_norm": 1.4060381650924683, "learning_rate": 0.0001, "loss": 0.0147, "step": 132420 }, { "epoch": 871.25, "grad_norm": 1.8644179105758667, "learning_rate": 0.0001, "loss": 0.0142, "step": 132430 }, { "epoch": 871.3157894736842, "grad_norm": 1.224281668663025, "learning_rate": 0.0001, "loss": 0.0152, "step": 132440 }, { "epoch": 871.3815789473684, "grad_norm": 1.466402292251587, "learning_rate": 0.0001, "loss": 0.0114, "step": 132450 }, { "epoch": 871.4473684210526, "grad_norm": 0.8896701335906982, "learning_rate": 0.0001, "loss": 0.0102, "step": 132460 }, { "epoch": 871.5131578947369, "grad_norm": 1.6592135429382324, "learning_rate": 0.0001, "loss": 0.0107, "step": 132470 }, { "epoch": 871.578947368421, "grad_norm": 1.5942898988723755, "learning_rate": 0.0001, "loss": 0.0127, "step": 132480 }, { "epoch": 871.6447368421053, "grad_norm": 1.975266695022583, "learning_rate": 0.0001, "loss": 0.0109, "step": 132490 }, { "epoch": 871.7105263157895, "grad_norm": 1.605092167854309, "learning_rate": 0.0001, "loss": 0.0128, "step": 132500 }, { "epoch": 871.7763157894736, "grad_norm": 1.3624765872955322, "learning_rate": 0.0001, "loss": 0.0112, "step": 132510 }, { "epoch": 871.8421052631579, "grad_norm": 1.1038751602172852, "learning_rate": 0.0001, "loss": 0.0124, "step": 132520 }, { "epoch": 871.9078947368421, "grad_norm": 1.1718944311141968, "learning_rate": 0.0001, "loss": 0.0116, "step": 132530 }, { "epoch": 871.9736842105264, "grad_norm": 2.026973247528076, "learning_rate": 0.0001, "loss": 0.0123, "step": 132540 }, { "epoch": 872.0394736842105, "grad_norm": 1.4373855590820312, "learning_rate": 0.0001, "loss": 0.0163, "step": 132550 }, { "epoch": 872.1052631578947, "grad_norm": 1.2956033945083618, "learning_rate": 0.0001, "loss": 0.0106, "step": 132560 }, { "epoch": 872.171052631579, "grad_norm": 1.1541513204574585, "learning_rate": 0.0001, "loss": 0.0106, "step": 132570 }, { "epoch": 872.2368421052631, "grad_norm": 1.293738842010498, "learning_rate": 0.0001, "loss": 0.0152, "step": 132580 }, { "epoch": 872.3026315789474, "grad_norm": 1.0959680080413818, "learning_rate": 0.0001, "loss": 0.0127, "step": 132590 }, { "epoch": 872.3684210526316, "grad_norm": 1.354246973991394, "learning_rate": 0.0001, "loss": 0.0146, "step": 132600 }, { "epoch": 872.4342105263158, "grad_norm": 1.1209168434143066, "learning_rate": 0.0001, "loss": 0.0147, "step": 132610 }, { "epoch": 872.5, "grad_norm": 1.2867681980133057, "learning_rate": 0.0001, "loss": 0.0112, "step": 132620 }, { "epoch": 872.5657894736842, "grad_norm": 1.0831197500228882, "learning_rate": 0.0001, "loss": 0.0152, "step": 132630 }, { "epoch": 872.6315789473684, "grad_norm": 1.4439040422439575, "learning_rate": 0.0001, "loss": 0.015, "step": 132640 }, { "epoch": 872.6973684210526, "grad_norm": 1.3715436458587646, "learning_rate": 0.0001, "loss": 0.013, "step": 132650 }, { "epoch": 872.7631578947369, "grad_norm": 1.6101760864257812, "learning_rate": 0.0001, "loss": 0.0136, "step": 132660 }, { "epoch": 872.828947368421, "grad_norm": 1.0434638261795044, "learning_rate": 0.0001, "loss": 0.0169, "step": 132670 }, { "epoch": 872.8947368421053, "grad_norm": 1.5627535581588745, "learning_rate": 0.0001, "loss": 0.0136, "step": 132680 }, { "epoch": 872.9605263157895, "grad_norm": 1.9371094703674316, "learning_rate": 0.0001, "loss": 0.0146, "step": 132690 }, { "epoch": 873.0263157894736, "grad_norm": 2.132028341293335, "learning_rate": 0.0001, "loss": 0.0145, "step": 132700 }, { "epoch": 873.0921052631579, "grad_norm": 1.4204176664352417, "learning_rate": 0.0001, "loss": 0.0127, "step": 132710 }, { "epoch": 873.1578947368421, "grad_norm": 1.3979802131652832, "learning_rate": 0.0001, "loss": 0.0143, "step": 132720 }, { "epoch": 873.2236842105264, "grad_norm": 1.7981276512145996, "learning_rate": 0.0001, "loss": 0.0117, "step": 132730 }, { "epoch": 873.2894736842105, "grad_norm": 1.637779951095581, "learning_rate": 0.0001, "loss": 0.0127, "step": 132740 }, { "epoch": 873.3552631578947, "grad_norm": 1.7854169607162476, "learning_rate": 0.0001, "loss": 0.0134, "step": 132750 }, { "epoch": 873.421052631579, "grad_norm": 1.6982396841049194, "learning_rate": 0.0001, "loss": 0.01, "step": 132760 }, { "epoch": 873.4868421052631, "grad_norm": 1.5897376537322998, "learning_rate": 0.0001, "loss": 0.0119, "step": 132770 }, { "epoch": 873.5526315789474, "grad_norm": 1.579246997833252, "learning_rate": 0.0001, "loss": 0.0134, "step": 132780 }, { "epoch": 873.6184210526316, "grad_norm": 1.2541728019714355, "learning_rate": 0.0001, "loss": 0.0158, "step": 132790 }, { "epoch": 873.6842105263158, "grad_norm": 1.529104471206665, "learning_rate": 0.0001, "loss": 0.0164, "step": 132800 }, { "epoch": 873.75, "grad_norm": 1.3453880548477173, "learning_rate": 0.0001, "loss": 0.019, "step": 132810 }, { "epoch": 873.8157894736842, "grad_norm": 1.373212218284607, "learning_rate": 0.0001, "loss": 0.0108, "step": 132820 }, { "epoch": 873.8815789473684, "grad_norm": 1.2641404867172241, "learning_rate": 0.0001, "loss": 0.0164, "step": 132830 }, { "epoch": 873.9473684210526, "grad_norm": 1.7752430438995361, "learning_rate": 0.0001, "loss": 0.012, "step": 132840 }, { "epoch": 874.0131578947369, "grad_norm": 1.1105983257293701, "learning_rate": 0.0001, "loss": 0.0142, "step": 132850 }, { "epoch": 874.078947368421, "grad_norm": 1.375568151473999, "learning_rate": 0.0001, "loss": 0.013, "step": 132860 }, { "epoch": 874.1447368421053, "grad_norm": 1.1864863634109497, "learning_rate": 0.0001, "loss": 0.0133, "step": 132870 }, { "epoch": 874.2105263157895, "grad_norm": 1.5254359245300293, "learning_rate": 0.0001, "loss": 0.0145, "step": 132880 }, { "epoch": 874.2763157894736, "grad_norm": 1.2698184251785278, "learning_rate": 0.0001, "loss": 0.0111, "step": 132890 }, { "epoch": 874.3421052631579, "grad_norm": 1.3572020530700684, "learning_rate": 0.0001, "loss": 0.0121, "step": 132900 }, { "epoch": 874.4078947368421, "grad_norm": 1.557278037071228, "learning_rate": 0.0001, "loss": 0.0113, "step": 132910 }, { "epoch": 874.4736842105264, "grad_norm": 1.5538268089294434, "learning_rate": 0.0001, "loss": 0.0149, "step": 132920 }, { "epoch": 874.5394736842105, "grad_norm": 1.6828950643539429, "learning_rate": 0.0001, "loss": 0.0142, "step": 132930 }, { "epoch": 874.6052631578947, "grad_norm": 1.4623194932937622, "learning_rate": 0.0001, "loss": 0.0129, "step": 132940 }, { "epoch": 874.671052631579, "grad_norm": 1.6497740745544434, "learning_rate": 0.0001, "loss": 0.0149, "step": 132950 }, { "epoch": 874.7368421052631, "grad_norm": 2.1143529415130615, "learning_rate": 0.0001, "loss": 0.0111, "step": 132960 }, { "epoch": 874.8026315789474, "grad_norm": 1.8356521129608154, "learning_rate": 0.0001, "loss": 0.0133, "step": 132970 }, { "epoch": 874.8684210526316, "grad_norm": 1.754227876663208, "learning_rate": 0.0001, "loss": 0.0203, "step": 132980 }, { "epoch": 874.9342105263158, "grad_norm": 1.814524531364441, "learning_rate": 0.0001, "loss": 0.0144, "step": 132990 }, { "epoch": 875.0, "grad_norm": 1.5264650583267212, "learning_rate": 0.0001, "loss": 0.0135, "step": 133000 }, { "epoch": 875.0657894736842, "grad_norm": 1.6359394788742065, "learning_rate": 0.0001, "loss": 0.0134, "step": 133010 }, { "epoch": 875.1315789473684, "grad_norm": 1.555956482887268, "learning_rate": 0.0001, "loss": 0.0144, "step": 133020 }, { "epoch": 875.1973684210526, "grad_norm": 1.347685694694519, "learning_rate": 0.0001, "loss": 0.0137, "step": 133030 }, { "epoch": 875.2631578947369, "grad_norm": 1.4251371622085571, "learning_rate": 0.0001, "loss": 0.0179, "step": 133040 }, { "epoch": 875.328947368421, "grad_norm": 1.7702276706695557, "learning_rate": 0.0001, "loss": 0.0133, "step": 133050 }, { "epoch": 875.3947368421053, "grad_norm": 1.2093056440353394, "learning_rate": 0.0001, "loss": 0.0122, "step": 133060 }, { "epoch": 875.4605263157895, "grad_norm": 1.5013049840927124, "learning_rate": 0.0001, "loss": 0.0115, "step": 133070 }, { "epoch": 875.5263157894736, "grad_norm": 1.4146400690078735, "learning_rate": 0.0001, "loss": 0.0104, "step": 133080 }, { "epoch": 875.5921052631579, "grad_norm": 1.062119483947754, "learning_rate": 0.0001, "loss": 0.0139, "step": 133090 }, { "epoch": 875.6578947368421, "grad_norm": 1.2517344951629639, "learning_rate": 0.0001, "loss": 0.0132, "step": 133100 }, { "epoch": 875.7236842105264, "grad_norm": 1.9198007583618164, "learning_rate": 0.0001, "loss": 0.0103, "step": 133110 }, { "epoch": 875.7894736842105, "grad_norm": 1.3403024673461914, "learning_rate": 0.0001, "loss": 0.0114, "step": 133120 }, { "epoch": 875.8552631578947, "grad_norm": 1.680169939994812, "learning_rate": 0.0001, "loss": 0.0158, "step": 133130 }, { "epoch": 875.921052631579, "grad_norm": 1.9514590501785278, "learning_rate": 0.0001, "loss": 0.0141, "step": 133140 }, { "epoch": 875.9868421052631, "grad_norm": 1.7077627182006836, "learning_rate": 0.0001, "loss": 0.0132, "step": 133150 }, { "epoch": 876.0526315789474, "grad_norm": 1.4373246431350708, "learning_rate": 0.0001, "loss": 0.0111, "step": 133160 }, { "epoch": 876.1184210526316, "grad_norm": 1.3907021284103394, "learning_rate": 0.0001, "loss": 0.0108, "step": 133170 }, { "epoch": 876.1842105263158, "grad_norm": 1.6078382730484009, "learning_rate": 0.0001, "loss": 0.0119, "step": 133180 }, { "epoch": 876.25, "grad_norm": 1.4638652801513672, "learning_rate": 0.0001, "loss": 0.012, "step": 133190 }, { "epoch": 876.3157894736842, "grad_norm": 1.6968026161193848, "learning_rate": 0.0001, "loss": 0.0123, "step": 133200 }, { "epoch": 876.3815789473684, "grad_norm": 1.1721230745315552, "learning_rate": 0.0001, "loss": 0.014, "step": 133210 }, { "epoch": 876.4473684210526, "grad_norm": 1.366281270980835, "learning_rate": 0.0001, "loss": 0.0123, "step": 133220 }, { "epoch": 876.5131578947369, "grad_norm": 1.2948486804962158, "learning_rate": 0.0001, "loss": 0.0098, "step": 133230 }, { "epoch": 876.578947368421, "grad_norm": 1.3524820804595947, "learning_rate": 0.0001, "loss": 0.0168, "step": 133240 }, { "epoch": 876.6447368421053, "grad_norm": 1.398380160331726, "learning_rate": 0.0001, "loss": 0.0125, "step": 133250 }, { "epoch": 876.7105263157895, "grad_norm": 1.3904156684875488, "learning_rate": 0.0001, "loss": 0.0146, "step": 133260 }, { "epoch": 876.7763157894736, "grad_norm": 1.2073122262954712, "learning_rate": 0.0001, "loss": 0.0132, "step": 133270 }, { "epoch": 876.8421052631579, "grad_norm": 1.0754969120025635, "learning_rate": 0.0001, "loss": 0.0204, "step": 133280 }, { "epoch": 876.9078947368421, "grad_norm": 1.6575307846069336, "learning_rate": 0.0001, "loss": 0.0134, "step": 133290 }, { "epoch": 876.9736842105264, "grad_norm": 1.3787449598312378, "learning_rate": 0.0001, "loss": 0.011, "step": 133300 }, { "epoch": 877.0394736842105, "grad_norm": 1.101938009262085, "learning_rate": 0.0001, "loss": 0.0144, "step": 133310 }, { "epoch": 877.1052631578947, "grad_norm": 1.2111554145812988, "learning_rate": 0.0001, "loss": 0.0098, "step": 133320 }, { "epoch": 877.171052631579, "grad_norm": 1.8384315967559814, "learning_rate": 0.0001, "loss": 0.0137, "step": 133330 }, { "epoch": 877.2368421052631, "grad_norm": 1.718657374382019, "learning_rate": 0.0001, "loss": 0.0109, "step": 133340 }, { "epoch": 877.3026315789474, "grad_norm": 1.2785639762878418, "learning_rate": 0.0001, "loss": 0.0152, "step": 133350 }, { "epoch": 877.3684210526316, "grad_norm": 2.1471927165985107, "learning_rate": 0.0001, "loss": 0.0119, "step": 133360 }, { "epoch": 877.4342105263158, "grad_norm": 1.6288270950317383, "learning_rate": 0.0001, "loss": 0.0135, "step": 133370 }, { "epoch": 877.5, "grad_norm": 1.2791345119476318, "learning_rate": 0.0001, "loss": 0.0167, "step": 133380 }, { "epoch": 877.5657894736842, "grad_norm": 1.3227494955062866, "learning_rate": 0.0001, "loss": 0.0165, "step": 133390 }, { "epoch": 877.6315789473684, "grad_norm": 1.2559112310409546, "learning_rate": 0.0001, "loss": 0.013, "step": 133400 }, { "epoch": 877.6973684210526, "grad_norm": 1.0064692497253418, "learning_rate": 0.0001, "loss": 0.0109, "step": 133410 }, { "epoch": 877.7631578947369, "grad_norm": 1.1942099332809448, "learning_rate": 0.0001, "loss": 0.0123, "step": 133420 }, { "epoch": 877.828947368421, "grad_norm": 1.5174590349197388, "learning_rate": 0.0001, "loss": 0.0106, "step": 133430 }, { "epoch": 877.8947368421053, "grad_norm": 1.2680639028549194, "learning_rate": 0.0001, "loss": 0.0133, "step": 133440 }, { "epoch": 877.9605263157895, "grad_norm": 0.933921217918396, "learning_rate": 0.0001, "loss": 0.0109, "step": 133450 }, { "epoch": 878.0263157894736, "grad_norm": 1.1845866441726685, "learning_rate": 0.0001, "loss": 0.0143, "step": 133460 }, { "epoch": 878.0921052631579, "grad_norm": 1.5907673835754395, "learning_rate": 0.0001, "loss": 0.012, "step": 133470 }, { "epoch": 878.1578947368421, "grad_norm": 1.7858078479766846, "learning_rate": 0.0001, "loss": 0.0128, "step": 133480 }, { "epoch": 878.2236842105264, "grad_norm": 1.8793679475784302, "learning_rate": 0.0001, "loss": 0.0145, "step": 133490 }, { "epoch": 878.2894736842105, "grad_norm": 1.478485107421875, "learning_rate": 0.0001, "loss": 0.0163, "step": 133500 }, { "epoch": 878.3552631578947, "grad_norm": 1.3539817333221436, "learning_rate": 0.0001, "loss": 0.0124, "step": 133510 }, { "epoch": 878.421052631579, "grad_norm": 1.4182603359222412, "learning_rate": 0.0001, "loss": 0.0128, "step": 133520 }, { "epoch": 878.4868421052631, "grad_norm": 1.3202033042907715, "learning_rate": 0.0001, "loss": 0.0103, "step": 133530 }, { "epoch": 878.5526315789474, "grad_norm": 1.6410846710205078, "learning_rate": 0.0001, "loss": 0.0112, "step": 133540 }, { "epoch": 878.6184210526316, "grad_norm": 1.4634772539138794, "learning_rate": 0.0001, "loss": 0.0102, "step": 133550 }, { "epoch": 878.6842105263158, "grad_norm": 1.6426477432250977, "learning_rate": 0.0001, "loss": 0.0118, "step": 133560 }, { "epoch": 878.75, "grad_norm": 1.4133220911026, "learning_rate": 0.0001, "loss": 0.0117, "step": 133570 }, { "epoch": 878.8157894736842, "grad_norm": 1.233482003211975, "learning_rate": 0.0001, "loss": 0.0136, "step": 133580 }, { "epoch": 878.8815789473684, "grad_norm": 1.3914769887924194, "learning_rate": 0.0001, "loss": 0.0169, "step": 133590 }, { "epoch": 878.9473684210526, "grad_norm": 1.6413475275039673, "learning_rate": 0.0001, "loss": 0.0108, "step": 133600 }, { "epoch": 879.0131578947369, "grad_norm": 1.142541527748108, "learning_rate": 0.0001, "loss": 0.0158, "step": 133610 }, { "epoch": 879.078947368421, "grad_norm": 1.4841234683990479, "learning_rate": 0.0001, "loss": 0.0124, "step": 133620 }, { "epoch": 879.1447368421053, "grad_norm": 1.8780924081802368, "learning_rate": 0.0001, "loss": 0.0097, "step": 133630 }, { "epoch": 879.2105263157895, "grad_norm": 1.6761853694915771, "learning_rate": 0.0001, "loss": 0.0148, "step": 133640 }, { "epoch": 879.2763157894736, "grad_norm": 1.3709949254989624, "learning_rate": 0.0001, "loss": 0.0141, "step": 133650 }, { "epoch": 879.3421052631579, "grad_norm": 0.9364446997642517, "learning_rate": 0.0001, "loss": 0.0091, "step": 133660 }, { "epoch": 879.4078947368421, "grad_norm": 1.1319077014923096, "learning_rate": 0.0001, "loss": 0.0113, "step": 133670 }, { "epoch": 879.4736842105264, "grad_norm": 1.4621609449386597, "learning_rate": 0.0001, "loss": 0.0124, "step": 133680 }, { "epoch": 879.5394736842105, "grad_norm": 1.6926411390304565, "learning_rate": 0.0001, "loss": 0.0095, "step": 133690 }, { "epoch": 879.6052631578947, "grad_norm": 1.393122911453247, "learning_rate": 0.0001, "loss": 0.0116, "step": 133700 }, { "epoch": 879.671052631579, "grad_norm": 1.4700591564178467, "learning_rate": 0.0001, "loss": 0.015, "step": 133710 }, { "epoch": 879.7368421052631, "grad_norm": 0.9376477003097534, "learning_rate": 0.0001, "loss": 0.0132, "step": 133720 }, { "epoch": 879.8026315789474, "grad_norm": 1.1401532888412476, "learning_rate": 0.0001, "loss": 0.0152, "step": 133730 }, { "epoch": 879.8684210526316, "grad_norm": 1.3668506145477295, "learning_rate": 0.0001, "loss": 0.0113, "step": 133740 }, { "epoch": 879.9342105263158, "grad_norm": 1.1485307216644287, "learning_rate": 0.0001, "loss": 0.0152, "step": 133750 }, { "epoch": 880.0, "grad_norm": 1.5972704887390137, "learning_rate": 0.0001, "loss": 0.0138, "step": 133760 }, { "epoch": 880.0657894736842, "grad_norm": 1.2701917886734009, "learning_rate": 0.0001, "loss": 0.0127, "step": 133770 }, { "epoch": 880.1315789473684, "grad_norm": 1.6719474792480469, "learning_rate": 0.0001, "loss": 0.0153, "step": 133780 }, { "epoch": 880.1973684210526, "grad_norm": 1.568770170211792, "learning_rate": 0.0001, "loss": 0.0129, "step": 133790 }, { "epoch": 880.2631578947369, "grad_norm": 1.4647384881973267, "learning_rate": 0.0001, "loss": 0.0132, "step": 133800 }, { "epoch": 880.328947368421, "grad_norm": 1.514314889907837, "learning_rate": 0.0001, "loss": 0.0113, "step": 133810 }, { "epoch": 880.3947368421053, "grad_norm": 1.3911210298538208, "learning_rate": 0.0001, "loss": 0.0112, "step": 133820 }, { "epoch": 880.4605263157895, "grad_norm": 1.3872034549713135, "learning_rate": 0.0001, "loss": 0.0138, "step": 133830 }, { "epoch": 880.5263157894736, "grad_norm": 1.7528955936431885, "learning_rate": 0.0001, "loss": 0.0157, "step": 133840 }, { "epoch": 880.5921052631579, "grad_norm": 1.5447969436645508, "learning_rate": 0.0001, "loss": 0.0144, "step": 133850 }, { "epoch": 880.6578947368421, "grad_norm": 1.7845840454101562, "learning_rate": 0.0001, "loss": 0.0146, "step": 133860 }, { "epoch": 880.7236842105264, "grad_norm": 1.9346412420272827, "learning_rate": 0.0001, "loss": 0.0113, "step": 133870 }, { "epoch": 880.7894736842105, "grad_norm": 1.7173151969909668, "learning_rate": 0.0001, "loss": 0.0121, "step": 133880 }, { "epoch": 880.8552631578947, "grad_norm": 1.0122050046920776, "learning_rate": 0.0001, "loss": 0.0112, "step": 133890 }, { "epoch": 880.921052631579, "grad_norm": 1.2895199060440063, "learning_rate": 0.0001, "loss": 0.0107, "step": 133900 }, { "epoch": 880.9868421052631, "grad_norm": 0.9743114113807678, "learning_rate": 0.0001, "loss": 0.0119, "step": 133910 }, { "epoch": 881.0526315789474, "grad_norm": 1.6892739534378052, "learning_rate": 0.0001, "loss": 0.0101, "step": 133920 }, { "epoch": 881.1184210526316, "grad_norm": 1.220253348350525, "learning_rate": 0.0001, "loss": 0.0143, "step": 133930 }, { "epoch": 881.1842105263158, "grad_norm": 1.1200047731399536, "learning_rate": 0.0001, "loss": 0.0148, "step": 133940 }, { "epoch": 881.25, "grad_norm": 1.5744578838348389, "learning_rate": 0.0001, "loss": 0.0159, "step": 133950 }, { "epoch": 881.3157894736842, "grad_norm": 1.7291123867034912, "learning_rate": 0.0001, "loss": 0.0096, "step": 133960 }, { "epoch": 881.3815789473684, "grad_norm": 1.2279319763183594, "learning_rate": 0.0001, "loss": 0.0121, "step": 133970 }, { "epoch": 881.4473684210526, "grad_norm": 1.5421900749206543, "learning_rate": 0.0001, "loss": 0.0147, "step": 133980 }, { "epoch": 881.5131578947369, "grad_norm": 0.9903284907341003, "learning_rate": 0.0001, "loss": 0.011, "step": 133990 }, { "epoch": 881.578947368421, "grad_norm": 1.4152885675430298, "learning_rate": 0.0001, "loss": 0.0125, "step": 134000 }, { "epoch": 881.6447368421053, "grad_norm": 1.0766606330871582, "learning_rate": 0.0001, "loss": 0.0134, "step": 134010 }, { "epoch": 881.7105263157895, "grad_norm": 1.3188596963882446, "learning_rate": 0.0001, "loss": 0.0091, "step": 134020 }, { "epoch": 881.7763157894736, "grad_norm": 1.5521384477615356, "learning_rate": 0.0001, "loss": 0.0128, "step": 134030 }, { "epoch": 881.8421052631579, "grad_norm": 1.379313588142395, "learning_rate": 0.0001, "loss": 0.0144, "step": 134040 }, { "epoch": 881.9078947368421, "grad_norm": 1.1597415208816528, "learning_rate": 0.0001, "loss": 0.0149, "step": 134050 }, { "epoch": 881.9736842105264, "grad_norm": 1.2537131309509277, "learning_rate": 0.0001, "loss": 0.0141, "step": 134060 }, { "epoch": 882.0394736842105, "grad_norm": 1.3565210103988647, "learning_rate": 0.0001, "loss": 0.0134, "step": 134070 }, { "epoch": 882.1052631578947, "grad_norm": 1.5105983018875122, "learning_rate": 0.0001, "loss": 0.0153, "step": 134080 }, { "epoch": 882.171052631579, "grad_norm": 1.2973389625549316, "learning_rate": 0.0001, "loss": 0.0119, "step": 134090 }, { "epoch": 882.2368421052631, "grad_norm": 1.0367259979248047, "learning_rate": 0.0001, "loss": 0.0132, "step": 134100 }, { "epoch": 882.3026315789474, "grad_norm": 1.6998002529144287, "learning_rate": 0.0001, "loss": 0.012, "step": 134110 }, { "epoch": 882.3684210526316, "grad_norm": 1.0276767015457153, "learning_rate": 0.0001, "loss": 0.011, "step": 134120 }, { "epoch": 882.4342105263158, "grad_norm": 1.3938218355178833, "learning_rate": 0.0001, "loss": 0.0161, "step": 134130 }, { "epoch": 882.5, "grad_norm": 1.9859529733657837, "learning_rate": 0.0001, "loss": 0.0115, "step": 134140 }, { "epoch": 882.5657894736842, "grad_norm": 1.5768158435821533, "learning_rate": 0.0001, "loss": 0.0146, "step": 134150 }, { "epoch": 882.6315789473684, "grad_norm": 1.2450268268585205, "learning_rate": 0.0001, "loss": 0.0112, "step": 134160 }, { "epoch": 882.6973684210526, "grad_norm": 1.526232123374939, "learning_rate": 0.0001, "loss": 0.0117, "step": 134170 }, { "epoch": 882.7631578947369, "grad_norm": 1.2618037462234497, "learning_rate": 0.0001, "loss": 0.0121, "step": 134180 }, { "epoch": 882.828947368421, "grad_norm": 1.497081995010376, "learning_rate": 0.0001, "loss": 0.0126, "step": 134190 }, { "epoch": 882.8947368421053, "grad_norm": 1.8093589544296265, "learning_rate": 0.0001, "loss": 0.0165, "step": 134200 }, { "epoch": 882.9605263157895, "grad_norm": 1.0960357189178467, "learning_rate": 0.0001, "loss": 0.0108, "step": 134210 }, { "epoch": 883.0263157894736, "grad_norm": 1.885258436203003, "learning_rate": 0.0001, "loss": 0.0122, "step": 134220 }, { "epoch": 883.0921052631579, "grad_norm": 1.4587990045547485, "learning_rate": 0.0001, "loss": 0.0137, "step": 134230 }, { "epoch": 883.1578947368421, "grad_norm": 1.3738679885864258, "learning_rate": 0.0001, "loss": 0.0139, "step": 134240 }, { "epoch": 883.2236842105264, "grad_norm": 1.2695456743240356, "learning_rate": 0.0001, "loss": 0.0122, "step": 134250 }, { "epoch": 883.2894736842105, "grad_norm": 1.69675612449646, "learning_rate": 0.0001, "loss": 0.0117, "step": 134260 }, { "epoch": 883.3552631578947, "grad_norm": 1.7994858026504517, "learning_rate": 0.0001, "loss": 0.0127, "step": 134270 }, { "epoch": 883.421052631579, "grad_norm": 1.5761483907699585, "learning_rate": 0.0001, "loss": 0.0137, "step": 134280 }, { "epoch": 883.4868421052631, "grad_norm": 1.808353066444397, "learning_rate": 0.0001, "loss": 0.013, "step": 134290 }, { "epoch": 883.5526315789474, "grad_norm": 1.5391381978988647, "learning_rate": 0.0001, "loss": 0.0115, "step": 134300 }, { "epoch": 883.6184210526316, "grad_norm": 1.4536364078521729, "learning_rate": 0.0001, "loss": 0.013, "step": 134310 }, { "epoch": 883.6842105263158, "grad_norm": 1.3310377597808838, "learning_rate": 0.0001, "loss": 0.0106, "step": 134320 }, { "epoch": 883.75, "grad_norm": 1.755284309387207, "learning_rate": 0.0001, "loss": 0.0156, "step": 134330 }, { "epoch": 883.8157894736842, "grad_norm": 1.6743245124816895, "learning_rate": 0.0001, "loss": 0.0191, "step": 134340 }, { "epoch": 883.8815789473684, "grad_norm": 1.616434931755066, "learning_rate": 0.0001, "loss": 0.01, "step": 134350 }, { "epoch": 883.9473684210526, "grad_norm": 1.8011564016342163, "learning_rate": 0.0001, "loss": 0.0117, "step": 134360 }, { "epoch": 884.0131578947369, "grad_norm": 1.3567428588867188, "learning_rate": 0.0001, "loss": 0.0095, "step": 134370 }, { "epoch": 884.078947368421, "grad_norm": 1.5371876955032349, "learning_rate": 0.0001, "loss": 0.0166, "step": 134380 }, { "epoch": 884.1447368421053, "grad_norm": 1.2629222869873047, "learning_rate": 0.0001, "loss": 0.0149, "step": 134390 }, { "epoch": 884.2105263157895, "grad_norm": 1.425181269645691, "learning_rate": 0.0001, "loss": 0.0129, "step": 134400 }, { "epoch": 884.2763157894736, "grad_norm": 1.550247311592102, "learning_rate": 0.0001, "loss": 0.0131, "step": 134410 }, { "epoch": 884.3421052631579, "grad_norm": 1.3979812860488892, "learning_rate": 0.0001, "loss": 0.0122, "step": 134420 }, { "epoch": 884.4078947368421, "grad_norm": 1.3298181295394897, "learning_rate": 0.0001, "loss": 0.0107, "step": 134430 }, { "epoch": 884.4736842105264, "grad_norm": 1.4510338306427002, "learning_rate": 0.0001, "loss": 0.0126, "step": 134440 }, { "epoch": 884.5394736842105, "grad_norm": 1.5977343320846558, "learning_rate": 0.0001, "loss": 0.0136, "step": 134450 }, { "epoch": 884.6052631578947, "grad_norm": 1.0611422061920166, "learning_rate": 0.0001, "loss": 0.0133, "step": 134460 }, { "epoch": 884.671052631579, "grad_norm": 1.3751147985458374, "learning_rate": 0.0001, "loss": 0.0133, "step": 134470 }, { "epoch": 884.7368421052631, "grad_norm": 1.4800809621810913, "learning_rate": 0.0001, "loss": 0.0094, "step": 134480 }, { "epoch": 884.8026315789474, "grad_norm": 1.454749584197998, "learning_rate": 0.0001, "loss": 0.0096, "step": 134490 }, { "epoch": 884.8684210526316, "grad_norm": 1.535650372505188, "learning_rate": 0.0001, "loss": 0.0109, "step": 134500 }, { "epoch": 884.9342105263158, "grad_norm": 1.748141884803772, "learning_rate": 0.0001, "loss": 0.016, "step": 134510 }, { "epoch": 885.0, "grad_norm": 1.3982489109039307, "learning_rate": 0.0001, "loss": 0.0129, "step": 134520 }, { "epoch": 885.0657894736842, "grad_norm": 1.7072374820709229, "learning_rate": 0.0001, "loss": 0.0124, "step": 134530 }, { "epoch": 885.1315789473684, "grad_norm": 1.213473916053772, "learning_rate": 0.0001, "loss": 0.0138, "step": 134540 }, { "epoch": 885.1973684210526, "grad_norm": 1.6071996688842773, "learning_rate": 0.0001, "loss": 0.0125, "step": 134550 }, { "epoch": 885.2631578947369, "grad_norm": 1.4437625408172607, "learning_rate": 0.0001, "loss": 0.013, "step": 134560 }, { "epoch": 885.328947368421, "grad_norm": 1.165787696838379, "learning_rate": 0.0001, "loss": 0.0092, "step": 134570 }, { "epoch": 885.3947368421053, "grad_norm": 1.4363893270492554, "learning_rate": 0.0001, "loss": 0.0128, "step": 134580 }, { "epoch": 885.4605263157895, "grad_norm": 1.3992339372634888, "learning_rate": 0.0001, "loss": 0.0156, "step": 134590 }, { "epoch": 885.5263157894736, "grad_norm": 1.5231928825378418, "learning_rate": 0.0001, "loss": 0.0108, "step": 134600 }, { "epoch": 885.5921052631579, "grad_norm": 1.483432412147522, "learning_rate": 0.0001, "loss": 0.013, "step": 134610 }, { "epoch": 885.6578947368421, "grad_norm": 1.8500158786773682, "learning_rate": 0.0001, "loss": 0.012, "step": 134620 }, { "epoch": 885.7236842105264, "grad_norm": 1.4468718767166138, "learning_rate": 0.0001, "loss": 0.0137, "step": 134630 }, { "epoch": 885.7894736842105, "grad_norm": 1.338241696357727, "learning_rate": 0.0001, "loss": 0.0157, "step": 134640 }, { "epoch": 885.8552631578947, "grad_norm": 1.2766478061676025, "learning_rate": 0.0001, "loss": 0.0126, "step": 134650 }, { "epoch": 885.921052631579, "grad_norm": 1.4374608993530273, "learning_rate": 0.0001, "loss": 0.0173, "step": 134660 }, { "epoch": 885.9868421052631, "grad_norm": 1.3652408123016357, "learning_rate": 0.0001, "loss": 0.0117, "step": 134670 }, { "epoch": 886.0526315789474, "grad_norm": 1.7450436353683472, "learning_rate": 0.0001, "loss": 0.0133, "step": 134680 }, { "epoch": 886.1184210526316, "grad_norm": 1.3904579877853394, "learning_rate": 0.0001, "loss": 0.0127, "step": 134690 }, { "epoch": 886.1842105263158, "grad_norm": 1.8692371845245361, "learning_rate": 0.0001, "loss": 0.016, "step": 134700 }, { "epoch": 886.25, "grad_norm": 1.600956678390503, "learning_rate": 0.0001, "loss": 0.0112, "step": 134710 }, { "epoch": 886.3157894736842, "grad_norm": 1.470109224319458, "learning_rate": 0.0001, "loss": 0.0141, "step": 134720 }, { "epoch": 886.3815789473684, "grad_norm": 1.7297418117523193, "learning_rate": 0.0001, "loss": 0.0111, "step": 134730 }, { "epoch": 886.4473684210526, "grad_norm": 1.3836511373519897, "learning_rate": 0.0001, "loss": 0.0153, "step": 134740 }, { "epoch": 886.5131578947369, "grad_norm": 1.7306674718856812, "learning_rate": 0.0001, "loss": 0.0116, "step": 134750 }, { "epoch": 886.578947368421, "grad_norm": 2.120027542114258, "learning_rate": 0.0001, "loss": 0.0151, "step": 134760 }, { "epoch": 886.6447368421053, "grad_norm": 2.110367774963379, "learning_rate": 0.0001, "loss": 0.0116, "step": 134770 }, { "epoch": 886.7105263157895, "grad_norm": 1.674525499343872, "learning_rate": 0.0001, "loss": 0.0136, "step": 134780 }, { "epoch": 886.7763157894736, "grad_norm": 1.5923429727554321, "learning_rate": 0.0001, "loss": 0.0126, "step": 134790 }, { "epoch": 886.8421052631579, "grad_norm": 2.0376503467559814, "learning_rate": 0.0001, "loss": 0.0124, "step": 134800 }, { "epoch": 886.9078947368421, "grad_norm": 2.1592350006103516, "learning_rate": 0.0001, "loss": 0.0123, "step": 134810 }, { "epoch": 886.9736842105264, "grad_norm": 1.5315005779266357, "learning_rate": 0.0001, "loss": 0.0092, "step": 134820 }, { "epoch": 887.0394736842105, "grad_norm": 1.830115795135498, "learning_rate": 0.0001, "loss": 0.0103, "step": 134830 }, { "epoch": 887.1052631578947, "grad_norm": 1.2257558107376099, "learning_rate": 0.0001, "loss": 0.0129, "step": 134840 }, { "epoch": 887.171052631579, "grad_norm": 1.2288870811462402, "learning_rate": 0.0001, "loss": 0.012, "step": 134850 }, { "epoch": 887.2368421052631, "grad_norm": 1.6305177211761475, "learning_rate": 0.0001, "loss": 0.0132, "step": 134860 }, { "epoch": 887.3026315789474, "grad_norm": 1.5604944229125977, "learning_rate": 0.0001, "loss": 0.0125, "step": 134870 }, { "epoch": 887.3684210526316, "grad_norm": 1.3217166662216187, "learning_rate": 0.0001, "loss": 0.0151, "step": 134880 }, { "epoch": 887.4342105263158, "grad_norm": 1.723432183265686, "learning_rate": 0.0001, "loss": 0.0106, "step": 134890 }, { "epoch": 887.5, "grad_norm": 1.5084279775619507, "learning_rate": 0.0001, "loss": 0.0095, "step": 134900 }, { "epoch": 887.5657894736842, "grad_norm": 1.863463044166565, "learning_rate": 0.0001, "loss": 0.0125, "step": 134910 }, { "epoch": 887.6315789473684, "grad_norm": 1.575991153717041, "learning_rate": 0.0001, "loss": 0.0169, "step": 134920 }, { "epoch": 887.6973684210526, "grad_norm": 1.2476664781570435, "learning_rate": 0.0001, "loss": 0.0092, "step": 134930 }, { "epoch": 887.7631578947369, "grad_norm": 1.3615800142288208, "learning_rate": 0.0001, "loss": 0.0116, "step": 134940 }, { "epoch": 887.828947368421, "grad_norm": 1.4422527551651, "learning_rate": 0.0001, "loss": 0.0116, "step": 134950 }, { "epoch": 887.8947368421053, "grad_norm": 1.637459635734558, "learning_rate": 0.0001, "loss": 0.0145, "step": 134960 }, { "epoch": 887.9605263157895, "grad_norm": 1.4582074880599976, "learning_rate": 0.0001, "loss": 0.0174, "step": 134970 }, { "epoch": 888.0263157894736, "grad_norm": 1.3538672924041748, "learning_rate": 0.0001, "loss": 0.0127, "step": 134980 }, { "epoch": 888.0921052631579, "grad_norm": 1.3288559913635254, "learning_rate": 0.0001, "loss": 0.0142, "step": 134990 }, { "epoch": 888.1578947368421, "grad_norm": 1.1873894929885864, "learning_rate": 0.0001, "loss": 0.0128, "step": 135000 }, { "epoch": 888.2236842105264, "grad_norm": 1.4086946249008179, "learning_rate": 0.0001, "loss": 0.0163, "step": 135010 }, { "epoch": 888.2894736842105, "grad_norm": 1.4349915981292725, "learning_rate": 0.0001, "loss": 0.0113, "step": 135020 }, { "epoch": 888.3552631578947, "grad_norm": 1.2611371278762817, "learning_rate": 0.0001, "loss": 0.0117, "step": 135030 }, { "epoch": 888.421052631579, "grad_norm": 1.2916208505630493, "learning_rate": 0.0001, "loss": 0.0122, "step": 135040 }, { "epoch": 888.4868421052631, "grad_norm": 1.0906051397323608, "learning_rate": 0.0001, "loss": 0.012, "step": 135050 }, { "epoch": 888.5526315789474, "grad_norm": 1.3602946996688843, "learning_rate": 0.0001, "loss": 0.0111, "step": 135060 }, { "epoch": 888.6184210526316, "grad_norm": 1.6491271257400513, "learning_rate": 0.0001, "loss": 0.0095, "step": 135070 }, { "epoch": 888.6842105263158, "grad_norm": 1.4467674493789673, "learning_rate": 0.0001, "loss": 0.0131, "step": 135080 }, { "epoch": 888.75, "grad_norm": 0.8551270365715027, "learning_rate": 0.0001, "loss": 0.0137, "step": 135090 }, { "epoch": 888.8157894736842, "grad_norm": 1.2980036735534668, "learning_rate": 0.0001, "loss": 0.0167, "step": 135100 }, { "epoch": 888.8815789473684, "grad_norm": 1.5750722885131836, "learning_rate": 0.0001, "loss": 0.017, "step": 135110 }, { "epoch": 888.9473684210526, "grad_norm": 1.1047343015670776, "learning_rate": 0.0001, "loss": 0.011, "step": 135120 }, { "epoch": 889.0131578947369, "grad_norm": 1.4044899940490723, "learning_rate": 0.0001, "loss": 0.0123, "step": 135130 }, { "epoch": 889.078947368421, "grad_norm": 1.552643895149231, "learning_rate": 0.0001, "loss": 0.0112, "step": 135140 }, { "epoch": 889.1447368421053, "grad_norm": 1.4746958017349243, "learning_rate": 0.0001, "loss": 0.013, "step": 135150 }, { "epoch": 889.2105263157895, "grad_norm": 1.700591802597046, "learning_rate": 0.0001, "loss": 0.0119, "step": 135160 }, { "epoch": 889.2763157894736, "grad_norm": 1.482553482055664, "learning_rate": 0.0001, "loss": 0.012, "step": 135170 }, { "epoch": 889.3421052631579, "grad_norm": 1.633016586303711, "learning_rate": 0.0001, "loss": 0.0163, "step": 135180 }, { "epoch": 889.4078947368421, "grad_norm": 1.181949496269226, "learning_rate": 0.0001, "loss": 0.0105, "step": 135190 }, { "epoch": 889.4736842105264, "grad_norm": 1.602190613746643, "learning_rate": 0.0001, "loss": 0.0141, "step": 135200 }, { "epoch": 889.5394736842105, "grad_norm": 1.3340134620666504, "learning_rate": 0.0001, "loss": 0.0121, "step": 135210 }, { "epoch": 889.6052631578947, "grad_norm": 1.533037543296814, "learning_rate": 0.0001, "loss": 0.0115, "step": 135220 }, { "epoch": 889.671052631579, "grad_norm": 1.4396798610687256, "learning_rate": 0.0001, "loss": 0.0152, "step": 135230 }, { "epoch": 889.7368421052631, "grad_norm": 1.2773888111114502, "learning_rate": 0.0001, "loss": 0.0129, "step": 135240 }, { "epoch": 889.8026315789474, "grad_norm": 1.3919786214828491, "learning_rate": 0.0001, "loss": 0.0156, "step": 135250 }, { "epoch": 889.8684210526316, "grad_norm": 1.3261969089508057, "learning_rate": 0.0001, "loss": 0.013, "step": 135260 }, { "epoch": 889.9342105263158, "grad_norm": 1.5091975927352905, "learning_rate": 0.0001, "loss": 0.0162, "step": 135270 }, { "epoch": 890.0, "grad_norm": 0.9509366750717163, "learning_rate": 0.0001, "loss": 0.0114, "step": 135280 }, { "epoch": 890.0657894736842, "grad_norm": 1.9044065475463867, "learning_rate": 0.0001, "loss": 0.0101, "step": 135290 }, { "epoch": 890.1315789473684, "grad_norm": 1.6680845022201538, "learning_rate": 0.0001, "loss": 0.0094, "step": 135300 }, { "epoch": 890.1973684210526, "grad_norm": 1.5653338432312012, "learning_rate": 0.0001, "loss": 0.0104, "step": 135310 }, { "epoch": 890.2631578947369, "grad_norm": 1.3510191440582275, "learning_rate": 0.0001, "loss": 0.0152, "step": 135320 }, { "epoch": 890.328947368421, "grad_norm": 1.4754549264907837, "learning_rate": 0.0001, "loss": 0.0122, "step": 135330 }, { "epoch": 890.3947368421053, "grad_norm": 1.477015495300293, "learning_rate": 0.0001, "loss": 0.0127, "step": 135340 }, { "epoch": 890.4605263157895, "grad_norm": 1.7251476049423218, "learning_rate": 0.0001, "loss": 0.0102, "step": 135350 }, { "epoch": 890.5263157894736, "grad_norm": 1.4723482131958008, "learning_rate": 0.0001, "loss": 0.0127, "step": 135360 }, { "epoch": 890.5921052631579, "grad_norm": 1.2542877197265625, "learning_rate": 0.0001, "loss": 0.0181, "step": 135370 }, { "epoch": 890.6578947368421, "grad_norm": 1.1429797410964966, "learning_rate": 0.0001, "loss": 0.0119, "step": 135380 }, { "epoch": 890.7236842105264, "grad_norm": 1.2165924310684204, "learning_rate": 0.0001, "loss": 0.0153, "step": 135390 }, { "epoch": 890.7894736842105, "grad_norm": 1.27065110206604, "learning_rate": 0.0001, "loss": 0.0146, "step": 135400 }, { "epoch": 890.8552631578947, "grad_norm": 1.45904541015625, "learning_rate": 0.0001, "loss": 0.0115, "step": 135410 }, { "epoch": 890.921052631579, "grad_norm": 1.5938198566436768, "learning_rate": 0.0001, "loss": 0.0201, "step": 135420 }, { "epoch": 890.9868421052631, "grad_norm": 1.5697293281555176, "learning_rate": 0.0001, "loss": 0.0113, "step": 135430 }, { "epoch": 891.0526315789474, "grad_norm": 1.4963572025299072, "learning_rate": 0.0001, "loss": 0.0126, "step": 135440 }, { "epoch": 891.1184210526316, "grad_norm": 1.3538835048675537, "learning_rate": 0.0001, "loss": 0.0118, "step": 135450 }, { "epoch": 891.1842105263158, "grad_norm": 1.3306502103805542, "learning_rate": 0.0001, "loss": 0.0145, "step": 135460 }, { "epoch": 891.25, "grad_norm": 1.8533531427383423, "learning_rate": 0.0001, "loss": 0.0136, "step": 135470 }, { "epoch": 891.3157894736842, "grad_norm": 1.7770261764526367, "learning_rate": 0.0001, "loss": 0.012, "step": 135480 }, { "epoch": 891.3815789473684, "grad_norm": 1.4658820629119873, "learning_rate": 0.0001, "loss": 0.0132, "step": 135490 }, { "epoch": 891.4473684210526, "grad_norm": 1.8140426874160767, "learning_rate": 0.0001, "loss": 0.0128, "step": 135500 }, { "epoch": 891.5131578947369, "grad_norm": 1.8695498704910278, "learning_rate": 0.0001, "loss": 0.014, "step": 135510 }, { "epoch": 891.578947368421, "grad_norm": 1.5460034608840942, "learning_rate": 0.0001, "loss": 0.0141, "step": 135520 }, { "epoch": 891.6447368421053, "grad_norm": 1.848440408706665, "learning_rate": 0.0001, "loss": 0.0127, "step": 135530 }, { "epoch": 891.7105263157895, "grad_norm": 1.9472659826278687, "learning_rate": 0.0001, "loss": 0.0126, "step": 135540 }, { "epoch": 891.7763157894736, "grad_norm": 1.4797568321228027, "learning_rate": 0.0001, "loss": 0.0119, "step": 135550 }, { "epoch": 891.8421052631579, "grad_norm": 1.6556957960128784, "learning_rate": 0.0001, "loss": 0.0143, "step": 135560 }, { "epoch": 891.9078947368421, "grad_norm": 1.1036927700042725, "learning_rate": 0.0001, "loss": 0.0124, "step": 135570 }, { "epoch": 891.9736842105264, "grad_norm": 1.70611572265625, "learning_rate": 0.0001, "loss": 0.0107, "step": 135580 }, { "epoch": 892.0394736842105, "grad_norm": 1.8966376781463623, "learning_rate": 0.0001, "loss": 0.0125, "step": 135590 }, { "epoch": 892.1052631578947, "grad_norm": 1.4789830446243286, "learning_rate": 0.0001, "loss": 0.011, "step": 135600 }, { "epoch": 892.171052631579, "grad_norm": 1.3025023937225342, "learning_rate": 0.0001, "loss": 0.0124, "step": 135610 }, { "epoch": 892.2368421052631, "grad_norm": 1.5626308917999268, "learning_rate": 0.0001, "loss": 0.012, "step": 135620 }, { "epoch": 892.3026315789474, "grad_norm": 1.781040072441101, "learning_rate": 0.0001, "loss": 0.0163, "step": 135630 }, { "epoch": 892.3684210526316, "grad_norm": 1.3218640089035034, "learning_rate": 0.0001, "loss": 0.0111, "step": 135640 }, { "epoch": 892.4342105263158, "grad_norm": 1.7405555248260498, "learning_rate": 0.0001, "loss": 0.0123, "step": 135650 }, { "epoch": 892.5, "grad_norm": 1.6167594194412231, "learning_rate": 0.0001, "loss": 0.0111, "step": 135660 }, { "epoch": 892.5657894736842, "grad_norm": 1.3801953792572021, "learning_rate": 0.0001, "loss": 0.0193, "step": 135670 }, { "epoch": 892.6315789473684, "grad_norm": 1.4360356330871582, "learning_rate": 0.0001, "loss": 0.0105, "step": 135680 }, { "epoch": 892.6973684210526, "grad_norm": 1.5405759811401367, "learning_rate": 0.0001, "loss": 0.0139, "step": 135690 }, { "epoch": 892.7631578947369, "grad_norm": 1.3726738691329956, "learning_rate": 0.0001, "loss": 0.0123, "step": 135700 }, { "epoch": 892.828947368421, "grad_norm": 1.6514209508895874, "learning_rate": 0.0001, "loss": 0.0148, "step": 135710 }, { "epoch": 892.8947368421053, "grad_norm": 1.3907421827316284, "learning_rate": 0.0001, "loss": 0.0132, "step": 135720 }, { "epoch": 892.9605263157895, "grad_norm": 1.1810506582260132, "learning_rate": 0.0001, "loss": 0.0099, "step": 135730 }, { "epoch": 893.0263157894736, "grad_norm": 1.4756494760513306, "learning_rate": 0.0001, "loss": 0.012, "step": 135740 }, { "epoch": 893.0921052631579, "grad_norm": 1.3359744548797607, "learning_rate": 0.0001, "loss": 0.0168, "step": 135750 }, { "epoch": 893.1578947368421, "grad_norm": 1.1488641500473022, "learning_rate": 0.0001, "loss": 0.0135, "step": 135760 }, { "epoch": 893.2236842105264, "grad_norm": 1.720204472541809, "learning_rate": 0.0001, "loss": 0.015, "step": 135770 }, { "epoch": 893.2894736842105, "grad_norm": 1.7267961502075195, "learning_rate": 0.0001, "loss": 0.0146, "step": 135780 }, { "epoch": 893.3552631578947, "grad_norm": 1.4325425624847412, "learning_rate": 0.0001, "loss": 0.0144, "step": 135790 }, { "epoch": 893.421052631579, "grad_norm": 1.387343168258667, "learning_rate": 0.0001, "loss": 0.0124, "step": 135800 }, { "epoch": 893.4868421052631, "grad_norm": 1.2187607288360596, "learning_rate": 0.0001, "loss": 0.0098, "step": 135810 }, { "epoch": 893.5526315789474, "grad_norm": 1.554042935371399, "learning_rate": 0.0001, "loss": 0.0125, "step": 135820 }, { "epoch": 893.6184210526316, "grad_norm": 1.400560736656189, "learning_rate": 0.0001, "loss": 0.0124, "step": 135830 }, { "epoch": 893.6842105263158, "grad_norm": 1.8502637147903442, "learning_rate": 0.0001, "loss": 0.013, "step": 135840 }, { "epoch": 893.75, "grad_norm": 1.3692166805267334, "learning_rate": 0.0001, "loss": 0.0127, "step": 135850 }, { "epoch": 893.8157894736842, "grad_norm": 1.0646566152572632, "learning_rate": 0.0001, "loss": 0.0123, "step": 135860 }, { "epoch": 893.8815789473684, "grad_norm": 1.4186389446258545, "learning_rate": 0.0001, "loss": 0.0111, "step": 135870 }, { "epoch": 893.9473684210526, "grad_norm": 1.642809510231018, "learning_rate": 0.0001, "loss": 0.0158, "step": 135880 }, { "epoch": 894.0131578947369, "grad_norm": 1.1756027936935425, "learning_rate": 0.0001, "loss": 0.0104, "step": 135890 }, { "epoch": 894.078947368421, "grad_norm": 1.2103203535079956, "learning_rate": 0.0001, "loss": 0.0142, "step": 135900 }, { "epoch": 894.1447368421053, "grad_norm": 1.1064486503601074, "learning_rate": 0.0001, "loss": 0.0166, "step": 135910 }, { "epoch": 894.2105263157895, "grad_norm": 1.2170182466506958, "learning_rate": 0.0001, "loss": 0.011, "step": 135920 }, { "epoch": 894.2763157894736, "grad_norm": 1.49740731716156, "learning_rate": 0.0001, "loss": 0.0149, "step": 135930 }, { "epoch": 894.3421052631579, "grad_norm": 1.6581743955612183, "learning_rate": 0.0001, "loss": 0.0109, "step": 135940 }, { "epoch": 894.4078947368421, "grad_norm": 1.7010221481323242, "learning_rate": 0.0001, "loss": 0.0122, "step": 135950 }, { "epoch": 894.4736842105264, "grad_norm": 1.725363850593567, "learning_rate": 0.0001, "loss": 0.0108, "step": 135960 }, { "epoch": 894.5394736842105, "grad_norm": 1.5271588563919067, "learning_rate": 0.0001, "loss": 0.0128, "step": 135970 }, { "epoch": 894.6052631578947, "grad_norm": 1.2828328609466553, "learning_rate": 0.0001, "loss": 0.0139, "step": 135980 }, { "epoch": 894.671052631579, "grad_norm": 1.5177345275878906, "learning_rate": 0.0001, "loss": 0.0138, "step": 135990 }, { "epoch": 894.7368421052631, "grad_norm": 1.2821431159973145, "learning_rate": 0.0001, "loss": 0.0144, "step": 136000 }, { "epoch": 894.8026315789474, "grad_norm": 1.5015323162078857, "learning_rate": 0.0001, "loss": 0.0131, "step": 136010 }, { "epoch": 894.8684210526316, "grad_norm": 1.3415594100952148, "learning_rate": 0.0001, "loss": 0.013, "step": 136020 }, { "epoch": 894.9342105263158, "grad_norm": 0.9590960741043091, "learning_rate": 0.0001, "loss": 0.0111, "step": 136030 }, { "epoch": 895.0, "grad_norm": 1.5746740102767944, "learning_rate": 0.0001, "loss": 0.0121, "step": 136040 }, { "epoch": 895.0657894736842, "grad_norm": 1.6459190845489502, "learning_rate": 0.0001, "loss": 0.0118, "step": 136050 }, { "epoch": 895.1315789473684, "grad_norm": 1.4756171703338623, "learning_rate": 0.0001, "loss": 0.0137, "step": 136060 }, { "epoch": 895.1973684210526, "grad_norm": 1.286449909210205, "learning_rate": 0.0001, "loss": 0.0124, "step": 136070 }, { "epoch": 895.2631578947369, "grad_norm": 1.477866291999817, "learning_rate": 0.0001, "loss": 0.0134, "step": 136080 }, { "epoch": 895.328947368421, "grad_norm": 1.406766414642334, "learning_rate": 0.0001, "loss": 0.0134, "step": 136090 }, { "epoch": 895.3947368421053, "grad_norm": 1.1255842447280884, "learning_rate": 0.0001, "loss": 0.0113, "step": 136100 }, { "epoch": 895.4605263157895, "grad_norm": 1.4592841863632202, "learning_rate": 0.0001, "loss": 0.013, "step": 136110 }, { "epoch": 895.5263157894736, "grad_norm": 1.4940675497055054, "learning_rate": 0.0001, "loss": 0.0116, "step": 136120 }, { "epoch": 895.5921052631579, "grad_norm": 1.84965980052948, "learning_rate": 0.0001, "loss": 0.0136, "step": 136130 }, { "epoch": 895.6578947368421, "grad_norm": 1.5009565353393555, "learning_rate": 0.0001, "loss": 0.0099, "step": 136140 }, { "epoch": 895.7236842105264, "grad_norm": 1.4633913040161133, "learning_rate": 0.0001, "loss": 0.0158, "step": 136150 }, { "epoch": 895.7894736842105, "grad_norm": 1.1136289834976196, "learning_rate": 0.0001, "loss": 0.015, "step": 136160 }, { "epoch": 895.8552631578947, "grad_norm": 1.296632170677185, "learning_rate": 0.0001, "loss": 0.0111, "step": 136170 }, { "epoch": 895.921052631579, "grad_norm": 1.3291099071502686, "learning_rate": 0.0001, "loss": 0.0114, "step": 136180 }, { "epoch": 895.9868421052631, "grad_norm": 1.339752197265625, "learning_rate": 0.0001, "loss": 0.0156, "step": 136190 }, { "epoch": 896.0526315789474, "grad_norm": 1.5336767435073853, "learning_rate": 0.0001, "loss": 0.0128, "step": 136200 }, { "epoch": 896.1184210526316, "grad_norm": 1.3189661502838135, "learning_rate": 0.0001, "loss": 0.0101, "step": 136210 }, { "epoch": 896.1842105263158, "grad_norm": 1.4493547677993774, "learning_rate": 0.0001, "loss": 0.0149, "step": 136220 }, { "epoch": 896.25, "grad_norm": 1.173412799835205, "learning_rate": 0.0001, "loss": 0.0116, "step": 136230 }, { "epoch": 896.3157894736842, "grad_norm": 1.1726124286651611, "learning_rate": 0.0001, "loss": 0.0112, "step": 136240 }, { "epoch": 896.3815789473684, "grad_norm": 1.1879676580429077, "learning_rate": 0.0001, "loss": 0.0122, "step": 136250 }, { "epoch": 896.4473684210526, "grad_norm": 1.527592420578003, "learning_rate": 0.0001, "loss": 0.0186, "step": 136260 }, { "epoch": 896.5131578947369, "grad_norm": 1.3317402601242065, "learning_rate": 0.0001, "loss": 0.0113, "step": 136270 }, { "epoch": 896.578947368421, "grad_norm": 1.5904461145401, "learning_rate": 0.0001, "loss": 0.0192, "step": 136280 }, { "epoch": 896.6447368421053, "grad_norm": 1.8804268836975098, "learning_rate": 0.0001, "loss": 0.0139, "step": 136290 }, { "epoch": 896.7105263157895, "grad_norm": 1.4546760320663452, "learning_rate": 0.0001, "loss": 0.0116, "step": 136300 }, { "epoch": 896.7763157894736, "grad_norm": 1.2235292196273804, "learning_rate": 0.0001, "loss": 0.0137, "step": 136310 }, { "epoch": 896.8421052631579, "grad_norm": 1.8659155368804932, "learning_rate": 0.0001, "loss": 0.0135, "step": 136320 }, { "epoch": 896.9078947368421, "grad_norm": 1.4064891338348389, "learning_rate": 0.0001, "loss": 0.0109, "step": 136330 }, { "epoch": 896.9736842105264, "grad_norm": 1.8056575059890747, "learning_rate": 0.0001, "loss": 0.0092, "step": 136340 }, { "epoch": 897.0394736842105, "grad_norm": 1.5917731523513794, "learning_rate": 0.0001, "loss": 0.0111, "step": 136350 }, { "epoch": 897.1052631578947, "grad_norm": 1.6437656879425049, "learning_rate": 0.0001, "loss": 0.0091, "step": 136360 }, { "epoch": 897.171052631579, "grad_norm": 1.7257434129714966, "learning_rate": 0.0001, "loss": 0.0102, "step": 136370 }, { "epoch": 897.2368421052631, "grad_norm": 1.2154171466827393, "learning_rate": 0.0001, "loss": 0.0118, "step": 136380 }, { "epoch": 897.3026315789474, "grad_norm": 1.5774444341659546, "learning_rate": 0.0001, "loss": 0.0156, "step": 136390 }, { "epoch": 897.3684210526316, "grad_norm": 1.4335474967956543, "learning_rate": 0.0001, "loss": 0.0105, "step": 136400 }, { "epoch": 897.4342105263158, "grad_norm": 1.1196736097335815, "learning_rate": 0.0001, "loss": 0.0194, "step": 136410 }, { "epoch": 897.5, "grad_norm": 1.6899220943450928, "learning_rate": 0.0001, "loss": 0.0126, "step": 136420 }, { "epoch": 897.5657894736842, "grad_norm": 1.5995908975601196, "learning_rate": 0.0001, "loss": 0.0112, "step": 136430 }, { "epoch": 897.6315789473684, "grad_norm": 1.6034572124481201, "learning_rate": 0.0001, "loss": 0.0138, "step": 136440 }, { "epoch": 897.6973684210526, "grad_norm": 1.2521603107452393, "learning_rate": 0.0001, "loss": 0.0174, "step": 136450 }, { "epoch": 897.7631578947369, "grad_norm": 1.4977096319198608, "learning_rate": 0.0001, "loss": 0.0129, "step": 136460 }, { "epoch": 897.828947368421, "grad_norm": 1.3376282453536987, "learning_rate": 0.0001, "loss": 0.0126, "step": 136470 }, { "epoch": 897.8947368421053, "grad_norm": 1.5600872039794922, "learning_rate": 0.0001, "loss": 0.0131, "step": 136480 }, { "epoch": 897.9605263157895, "grad_norm": 1.2424918413162231, "learning_rate": 0.0001, "loss": 0.0117, "step": 136490 }, { "epoch": 898.0263157894736, "grad_norm": 1.353691816329956, "learning_rate": 0.0001, "loss": 0.0091, "step": 136500 }, { "epoch": 898.0921052631579, "grad_norm": 1.0788476467132568, "learning_rate": 0.0001, "loss": 0.0126, "step": 136510 }, { "epoch": 898.1578947368421, "grad_norm": 1.1362645626068115, "learning_rate": 0.0001, "loss": 0.014, "step": 136520 }, { "epoch": 898.2236842105264, "grad_norm": 1.0858073234558105, "learning_rate": 0.0001, "loss": 0.0123, "step": 136530 }, { "epoch": 898.2894736842105, "grad_norm": 1.6977238655090332, "learning_rate": 0.0001, "loss": 0.0157, "step": 136540 }, { "epoch": 898.3552631578947, "grad_norm": 1.468193769454956, "learning_rate": 0.0001, "loss": 0.0136, "step": 136550 }, { "epoch": 898.421052631579, "grad_norm": 1.0804797410964966, "learning_rate": 0.0001, "loss": 0.0187, "step": 136560 }, { "epoch": 898.4868421052631, "grad_norm": 1.333512544631958, "learning_rate": 0.0001, "loss": 0.0127, "step": 136570 }, { "epoch": 898.5526315789474, "grad_norm": 2.08994197845459, "learning_rate": 0.0001, "loss": 0.013, "step": 136580 }, { "epoch": 898.6184210526316, "grad_norm": 1.4953819513320923, "learning_rate": 0.0001, "loss": 0.0107, "step": 136590 }, { "epoch": 898.6842105263158, "grad_norm": 1.2844116687774658, "learning_rate": 0.0001, "loss": 0.0114, "step": 136600 }, { "epoch": 898.75, "grad_norm": 1.0682504177093506, "learning_rate": 0.0001, "loss": 0.013, "step": 136610 }, { "epoch": 898.8157894736842, "grad_norm": 1.2170464992523193, "learning_rate": 0.0001, "loss": 0.0111, "step": 136620 }, { "epoch": 898.8815789473684, "grad_norm": 1.5195752382278442, "learning_rate": 0.0001, "loss": 0.0178, "step": 136630 }, { "epoch": 898.9473684210526, "grad_norm": 1.192264199256897, "learning_rate": 0.0001, "loss": 0.0093, "step": 136640 }, { "epoch": 899.0131578947369, "grad_norm": 1.152502179145813, "learning_rate": 0.0001, "loss": 0.0136, "step": 136650 }, { "epoch": 899.078947368421, "grad_norm": 1.2217391729354858, "learning_rate": 0.0001, "loss": 0.0132, "step": 136660 }, { "epoch": 899.1447368421053, "grad_norm": 1.5831010341644287, "learning_rate": 0.0001, "loss": 0.0123, "step": 136670 }, { "epoch": 899.2105263157895, "grad_norm": 1.2466293573379517, "learning_rate": 0.0001, "loss": 0.0107, "step": 136680 }, { "epoch": 899.2763157894736, "grad_norm": 0.9670991897583008, "learning_rate": 0.0001, "loss": 0.0156, "step": 136690 }, { "epoch": 899.3421052631579, "grad_norm": 1.8186942338943481, "learning_rate": 0.0001, "loss": 0.0115, "step": 136700 }, { "epoch": 899.4078947368421, "grad_norm": 1.9102139472961426, "learning_rate": 0.0001, "loss": 0.012, "step": 136710 }, { "epoch": 899.4736842105264, "grad_norm": 1.3122998476028442, "learning_rate": 0.0001, "loss": 0.014, "step": 136720 }, { "epoch": 899.5394736842105, "grad_norm": 1.783566951751709, "learning_rate": 0.0001, "loss": 0.0118, "step": 136730 }, { "epoch": 899.6052631578947, "grad_norm": 1.9589207172393799, "learning_rate": 0.0001, "loss": 0.0139, "step": 136740 }, { "epoch": 899.671052631579, "grad_norm": 1.2571213245391846, "learning_rate": 0.0001, "loss": 0.0103, "step": 136750 }, { "epoch": 899.7368421052631, "grad_norm": 1.3850572109222412, "learning_rate": 0.0001, "loss": 0.0146, "step": 136760 }, { "epoch": 899.8026315789474, "grad_norm": 1.2849969863891602, "learning_rate": 0.0001, "loss": 0.0143, "step": 136770 }, { "epoch": 899.8684210526316, "grad_norm": 1.5583264827728271, "learning_rate": 0.0001, "loss": 0.013, "step": 136780 }, { "epoch": 899.9342105263158, "grad_norm": 1.4792670011520386, "learning_rate": 0.0001, "loss": 0.0125, "step": 136790 }, { "epoch": 900.0, "grad_norm": 1.5621827840805054, "learning_rate": 0.0001, "loss": 0.0148, "step": 136800 }, { "epoch": 900.0657894736842, "grad_norm": 1.3813788890838623, "learning_rate": 0.0001, "loss": 0.011, "step": 136810 }, { "epoch": 900.1315789473684, "grad_norm": 1.4203070402145386, "learning_rate": 0.0001, "loss": 0.0117, "step": 136820 }, { "epoch": 900.1973684210526, "grad_norm": 1.7066304683685303, "learning_rate": 0.0001, "loss": 0.0128, "step": 136830 }, { "epoch": 900.2631578947369, "grad_norm": 1.672236442565918, "learning_rate": 0.0001, "loss": 0.0133, "step": 136840 }, { "epoch": 900.328947368421, "grad_norm": 1.3709981441497803, "learning_rate": 0.0001, "loss": 0.014, "step": 136850 }, { "epoch": 900.3947368421053, "grad_norm": 1.8081141710281372, "learning_rate": 0.0001, "loss": 0.0106, "step": 136860 }, { "epoch": 900.4605263157895, "grad_norm": 1.2462434768676758, "learning_rate": 0.0001, "loss": 0.0142, "step": 136870 }, { "epoch": 900.5263157894736, "grad_norm": 1.4033108949661255, "learning_rate": 0.0001, "loss": 0.0175, "step": 136880 }, { "epoch": 900.5921052631579, "grad_norm": 1.2590340375900269, "learning_rate": 0.0001, "loss": 0.0102, "step": 136890 }, { "epoch": 900.6578947368421, "grad_norm": 1.1290626525878906, "learning_rate": 0.0001, "loss": 0.0128, "step": 136900 }, { "epoch": 900.7236842105264, "grad_norm": 1.2451903820037842, "learning_rate": 0.0001, "loss": 0.0124, "step": 136910 }, { "epoch": 900.7894736842105, "grad_norm": 1.6430151462554932, "learning_rate": 0.0001, "loss": 0.0145, "step": 136920 }, { "epoch": 900.8552631578947, "grad_norm": 1.564396858215332, "learning_rate": 0.0001, "loss": 0.0092, "step": 136930 }, { "epoch": 900.921052631579, "grad_norm": 1.5965279340744019, "learning_rate": 0.0001, "loss": 0.0153, "step": 136940 }, { "epoch": 900.9868421052631, "grad_norm": 1.166930913925171, "learning_rate": 0.0001, "loss": 0.014, "step": 136950 }, { "epoch": 901.0526315789474, "grad_norm": 1.4417942762374878, "learning_rate": 0.0001, "loss": 0.0111, "step": 136960 }, { "epoch": 901.1184210526316, "grad_norm": 1.4586693048477173, "learning_rate": 0.0001, "loss": 0.0116, "step": 136970 }, { "epoch": 901.1842105263158, "grad_norm": 0.9686650037765503, "learning_rate": 0.0001, "loss": 0.0111, "step": 136980 }, { "epoch": 901.25, "grad_norm": 2.0463461875915527, "learning_rate": 0.0001, "loss": 0.0121, "step": 136990 }, { "epoch": 901.3157894736842, "grad_norm": 1.8337023258209229, "learning_rate": 0.0001, "loss": 0.0119, "step": 137000 }, { "epoch": 901.3815789473684, "grad_norm": 1.2212655544281006, "learning_rate": 0.0001, "loss": 0.0151, "step": 137010 }, { "epoch": 901.4473684210526, "grad_norm": 1.2059239149093628, "learning_rate": 0.0001, "loss": 0.0173, "step": 137020 }, { "epoch": 901.5131578947369, "grad_norm": 1.5050055980682373, "learning_rate": 0.0001, "loss": 0.0115, "step": 137030 }, { "epoch": 901.578947368421, "grad_norm": 1.2973823547363281, "learning_rate": 0.0001, "loss": 0.0142, "step": 137040 }, { "epoch": 901.6447368421053, "grad_norm": 1.4415000677108765, "learning_rate": 0.0001, "loss": 0.0135, "step": 137050 }, { "epoch": 901.7105263157895, "grad_norm": 1.5922696590423584, "learning_rate": 0.0001, "loss": 0.0145, "step": 137060 }, { "epoch": 901.7763157894736, "grad_norm": 1.654266357421875, "learning_rate": 0.0001, "loss": 0.0109, "step": 137070 }, { "epoch": 901.8421052631579, "grad_norm": 1.130575180053711, "learning_rate": 0.0001, "loss": 0.0161, "step": 137080 }, { "epoch": 901.9078947368421, "grad_norm": 1.5245037078857422, "learning_rate": 0.0001, "loss": 0.012, "step": 137090 }, { "epoch": 901.9736842105264, "grad_norm": 1.3042939901351929, "learning_rate": 0.0001, "loss": 0.0105, "step": 137100 }, { "epoch": 902.0394736842105, "grad_norm": 1.3441702127456665, "learning_rate": 0.0001, "loss": 0.012, "step": 137110 }, { "epoch": 902.1052631578947, "grad_norm": 1.2353383302688599, "learning_rate": 0.0001, "loss": 0.0124, "step": 137120 }, { "epoch": 902.171052631579, "grad_norm": 0.90091872215271, "learning_rate": 0.0001, "loss": 0.0125, "step": 137130 }, { "epoch": 902.2368421052631, "grad_norm": 1.4741153717041016, "learning_rate": 0.0001, "loss": 0.0118, "step": 137140 }, { "epoch": 902.3026315789474, "grad_norm": 1.536306619644165, "learning_rate": 0.0001, "loss": 0.0111, "step": 137150 }, { "epoch": 902.3684210526316, "grad_norm": 1.0488187074661255, "learning_rate": 0.0001, "loss": 0.011, "step": 137160 }, { "epoch": 902.4342105263158, "grad_norm": 1.3359413146972656, "learning_rate": 0.0001, "loss": 0.0118, "step": 137170 }, { "epoch": 902.5, "grad_norm": 1.4493563175201416, "learning_rate": 0.0001, "loss": 0.0158, "step": 137180 }, { "epoch": 902.5657894736842, "grad_norm": 1.7194744348526, "learning_rate": 0.0001, "loss": 0.0137, "step": 137190 }, { "epoch": 902.6315789473684, "grad_norm": 1.3892627954483032, "learning_rate": 0.0001, "loss": 0.0122, "step": 137200 }, { "epoch": 902.6973684210526, "grad_norm": 1.135801911354065, "learning_rate": 0.0001, "loss": 0.0112, "step": 137210 }, { "epoch": 902.7631578947369, "grad_norm": 1.5216295719146729, "learning_rate": 0.0001, "loss": 0.0136, "step": 137220 }, { "epoch": 902.828947368421, "grad_norm": 1.4562965631484985, "learning_rate": 0.0001, "loss": 0.0179, "step": 137230 }, { "epoch": 902.8947368421053, "grad_norm": 1.2142530679702759, "learning_rate": 0.0001, "loss": 0.0156, "step": 137240 }, { "epoch": 902.9605263157895, "grad_norm": 1.3931152820587158, "learning_rate": 0.0001, "loss": 0.0126, "step": 137250 }, { "epoch": 903.0263157894736, "grad_norm": 1.4093098640441895, "learning_rate": 0.0001, "loss": 0.0146, "step": 137260 }, { "epoch": 903.0921052631579, "grad_norm": 1.6305261850357056, "learning_rate": 0.0001, "loss": 0.0143, "step": 137270 }, { "epoch": 903.1578947368421, "grad_norm": 1.293936014175415, "learning_rate": 0.0001, "loss": 0.0154, "step": 137280 }, { "epoch": 903.2236842105264, "grad_norm": 2.0206539630889893, "learning_rate": 0.0001, "loss": 0.0188, "step": 137290 }, { "epoch": 903.2894736842105, "grad_norm": 1.6147409677505493, "learning_rate": 0.0001, "loss": 0.0102, "step": 137300 }, { "epoch": 903.3552631578947, "grad_norm": 1.5837059020996094, "learning_rate": 0.0001, "loss": 0.0099, "step": 137310 }, { "epoch": 903.421052631579, "grad_norm": 1.0943297147750854, "learning_rate": 0.0001, "loss": 0.0137, "step": 137320 }, { "epoch": 903.4868421052631, "grad_norm": 1.5545313358306885, "learning_rate": 0.0001, "loss": 0.0091, "step": 137330 }, { "epoch": 903.5526315789474, "grad_norm": 1.4945975542068481, "learning_rate": 0.0001, "loss": 0.0126, "step": 137340 }, { "epoch": 903.6184210526316, "grad_norm": 1.3893635272979736, "learning_rate": 0.0001, "loss": 0.0129, "step": 137350 }, { "epoch": 903.6842105263158, "grad_norm": 1.354582667350769, "learning_rate": 0.0001, "loss": 0.0117, "step": 137360 }, { "epoch": 903.75, "grad_norm": 0.9499492645263672, "learning_rate": 0.0001, "loss": 0.0153, "step": 137370 }, { "epoch": 903.8157894736842, "grad_norm": 1.3264191150665283, "learning_rate": 0.0001, "loss": 0.0141, "step": 137380 }, { "epoch": 903.8815789473684, "grad_norm": 1.3340685367584229, "learning_rate": 0.0001, "loss": 0.0142, "step": 137390 }, { "epoch": 903.9473684210526, "grad_norm": 1.704738974571228, "learning_rate": 0.0001, "loss": 0.0117, "step": 137400 }, { "epoch": 904.0131578947369, "grad_norm": 1.538528561592102, "learning_rate": 0.0001, "loss": 0.0101, "step": 137410 }, { "epoch": 904.078947368421, "grad_norm": 1.1702182292938232, "learning_rate": 0.0001, "loss": 0.0143, "step": 137420 }, { "epoch": 904.1447368421053, "grad_norm": 1.5792450904846191, "learning_rate": 0.0001, "loss": 0.0122, "step": 137430 }, { "epoch": 904.2105263157895, "grad_norm": 1.491194725036621, "learning_rate": 0.0001, "loss": 0.0108, "step": 137440 }, { "epoch": 904.2763157894736, "grad_norm": 1.6175085306167603, "learning_rate": 0.0001, "loss": 0.0104, "step": 137450 }, { "epoch": 904.3421052631579, "grad_norm": 1.2738581895828247, "learning_rate": 0.0001, "loss": 0.0114, "step": 137460 }, { "epoch": 904.4078947368421, "grad_norm": 0.9621702432632446, "learning_rate": 0.0001, "loss": 0.0133, "step": 137470 }, { "epoch": 904.4736842105264, "grad_norm": 1.5605860948562622, "learning_rate": 0.0001, "loss": 0.016, "step": 137480 }, { "epoch": 904.5394736842105, "grad_norm": 1.2345356941223145, "learning_rate": 0.0001, "loss": 0.0182, "step": 137490 }, { "epoch": 904.6052631578947, "grad_norm": 1.2982230186462402, "learning_rate": 0.0001, "loss": 0.0116, "step": 137500 }, { "epoch": 904.671052631579, "grad_norm": 1.0492901802062988, "learning_rate": 0.0001, "loss": 0.0128, "step": 137510 }, { "epoch": 904.7368421052631, "grad_norm": 1.1177693605422974, "learning_rate": 0.0001, "loss": 0.0132, "step": 137520 }, { "epoch": 904.8026315789474, "grad_norm": 1.358174204826355, "learning_rate": 0.0001, "loss": 0.0122, "step": 137530 }, { "epoch": 904.8684210526316, "grad_norm": 1.4534047842025757, "learning_rate": 0.0001, "loss": 0.0131, "step": 137540 }, { "epoch": 904.9342105263158, "grad_norm": 1.269898772239685, "learning_rate": 0.0001, "loss": 0.0151, "step": 137550 }, { "epoch": 905.0, "grad_norm": 1.7915019989013672, "learning_rate": 0.0001, "loss": 0.0113, "step": 137560 }, { "epoch": 905.0657894736842, "grad_norm": 1.3274778127670288, "learning_rate": 0.0001, "loss": 0.0155, "step": 137570 }, { "epoch": 905.1315789473684, "grad_norm": 1.3918880224227905, "learning_rate": 0.0001, "loss": 0.012, "step": 137580 }, { "epoch": 905.1973684210526, "grad_norm": 1.484703540802002, "learning_rate": 0.0001, "loss": 0.0104, "step": 137590 }, { "epoch": 905.2631578947369, "grad_norm": 1.3646690845489502, "learning_rate": 0.0001, "loss": 0.0107, "step": 137600 }, { "epoch": 905.328947368421, "grad_norm": 1.1839032173156738, "learning_rate": 0.0001, "loss": 0.0144, "step": 137610 }, { "epoch": 905.3947368421053, "grad_norm": 1.0138155221939087, "learning_rate": 0.0001, "loss": 0.0135, "step": 137620 }, { "epoch": 905.4605263157895, "grad_norm": 1.556128978729248, "learning_rate": 0.0001, "loss": 0.0141, "step": 137630 }, { "epoch": 905.5263157894736, "grad_norm": 1.3826580047607422, "learning_rate": 0.0001, "loss": 0.0144, "step": 137640 }, { "epoch": 905.5921052631579, "grad_norm": 1.3536105155944824, "learning_rate": 0.0001, "loss": 0.0114, "step": 137650 }, { "epoch": 905.6578947368421, "grad_norm": 1.3558907508850098, "learning_rate": 0.0001, "loss": 0.0113, "step": 137660 }, { "epoch": 905.7236842105264, "grad_norm": 1.3178945779800415, "learning_rate": 0.0001, "loss": 0.0115, "step": 137670 }, { "epoch": 905.7894736842105, "grad_norm": 1.452436923980713, "learning_rate": 0.0001, "loss": 0.0127, "step": 137680 }, { "epoch": 905.8552631578947, "grad_norm": 1.611968994140625, "learning_rate": 0.0001, "loss": 0.0124, "step": 137690 }, { "epoch": 905.921052631579, "grad_norm": 2.011895179748535, "learning_rate": 0.0001, "loss": 0.0126, "step": 137700 }, { "epoch": 905.9868421052631, "grad_norm": 0.9391298890113831, "learning_rate": 0.0001, "loss": 0.0172, "step": 137710 }, { "epoch": 906.0526315789474, "grad_norm": 1.3184714317321777, "learning_rate": 0.0001, "loss": 0.0135, "step": 137720 }, { "epoch": 906.1184210526316, "grad_norm": 1.3964930772781372, "learning_rate": 0.0001, "loss": 0.0171, "step": 137730 }, { "epoch": 906.1842105263158, "grad_norm": 1.0390980243682861, "learning_rate": 0.0001, "loss": 0.0113, "step": 137740 }, { "epoch": 906.25, "grad_norm": 1.1643089056015015, "learning_rate": 0.0001, "loss": 0.0122, "step": 137750 }, { "epoch": 906.3157894736842, "grad_norm": 1.4913126230239868, "learning_rate": 0.0001, "loss": 0.0159, "step": 137760 }, { "epoch": 906.3815789473684, "grad_norm": 1.6082104444503784, "learning_rate": 0.0001, "loss": 0.011, "step": 137770 }, { "epoch": 906.4473684210526, "grad_norm": 1.2848265171051025, "learning_rate": 0.0001, "loss": 0.0151, "step": 137780 }, { "epoch": 906.5131578947369, "grad_norm": 1.450252652168274, "learning_rate": 0.0001, "loss": 0.014, "step": 137790 }, { "epoch": 906.578947368421, "grad_norm": 1.7463042736053467, "learning_rate": 0.0001, "loss": 0.0117, "step": 137800 }, { "epoch": 906.6447368421053, "grad_norm": 1.0073528289794922, "learning_rate": 0.0001, "loss": 0.012, "step": 137810 }, { "epoch": 906.7105263157895, "grad_norm": 1.2799254655838013, "learning_rate": 0.0001, "loss": 0.0135, "step": 137820 }, { "epoch": 906.7763157894736, "grad_norm": 1.4224095344543457, "learning_rate": 0.0001, "loss": 0.0108, "step": 137830 }, { "epoch": 906.8421052631579, "grad_norm": 1.8810076713562012, "learning_rate": 0.0001, "loss": 0.0138, "step": 137840 }, { "epoch": 906.9078947368421, "grad_norm": 1.6474609375, "learning_rate": 0.0001, "loss": 0.0122, "step": 137850 }, { "epoch": 906.9736842105264, "grad_norm": 1.5459538698196411, "learning_rate": 0.0001, "loss": 0.0123, "step": 137860 }, { "epoch": 907.0394736842105, "grad_norm": 1.3834182024002075, "learning_rate": 0.0001, "loss": 0.0113, "step": 137870 }, { "epoch": 907.1052631578947, "grad_norm": 1.1827235221862793, "learning_rate": 0.0001, "loss": 0.0111, "step": 137880 }, { "epoch": 907.171052631579, "grad_norm": 1.3506709337234497, "learning_rate": 0.0001, "loss": 0.013, "step": 137890 }, { "epoch": 907.2368421052631, "grad_norm": 1.5885815620422363, "learning_rate": 0.0001, "loss": 0.0148, "step": 137900 }, { "epoch": 907.3026315789474, "grad_norm": 1.523469090461731, "learning_rate": 0.0001, "loss": 0.0101, "step": 137910 }, { "epoch": 907.3684210526316, "grad_norm": 1.1538394689559937, "learning_rate": 0.0001, "loss": 0.0114, "step": 137920 }, { "epoch": 907.4342105263158, "grad_norm": 1.4521716833114624, "learning_rate": 0.0001, "loss": 0.0103, "step": 137930 }, { "epoch": 907.5, "grad_norm": 1.38550865650177, "learning_rate": 0.0001, "loss": 0.0108, "step": 137940 }, { "epoch": 907.5657894736842, "grad_norm": 1.5177801847457886, "learning_rate": 0.0001, "loss": 0.0108, "step": 137950 }, { "epoch": 907.6315789473684, "grad_norm": 1.8020741939544678, "learning_rate": 0.0001, "loss": 0.0142, "step": 137960 }, { "epoch": 907.6973684210526, "grad_norm": 1.3022581338882446, "learning_rate": 0.0001, "loss": 0.0118, "step": 137970 }, { "epoch": 907.7631578947369, "grad_norm": 1.365189552307129, "learning_rate": 0.0001, "loss": 0.0169, "step": 137980 }, { "epoch": 907.828947368421, "grad_norm": 1.4811402559280396, "learning_rate": 0.0001, "loss": 0.0133, "step": 137990 }, { "epoch": 907.8947368421053, "grad_norm": 1.4946105480194092, "learning_rate": 0.0001, "loss": 0.0133, "step": 138000 }, { "epoch": 907.9605263157895, "grad_norm": 1.205735683441162, "learning_rate": 0.0001, "loss": 0.0111, "step": 138010 }, { "epoch": 908.0263157894736, "grad_norm": 1.633504033088684, "learning_rate": 0.0001, "loss": 0.0189, "step": 138020 }, { "epoch": 908.0921052631579, "grad_norm": 1.595725417137146, "learning_rate": 0.0001, "loss": 0.0156, "step": 138030 }, { "epoch": 908.1578947368421, "grad_norm": 1.3449290990829468, "learning_rate": 0.0001, "loss": 0.0133, "step": 138040 }, { "epoch": 908.2236842105264, "grad_norm": 1.2172980308532715, "learning_rate": 0.0001, "loss": 0.0121, "step": 138050 }, { "epoch": 908.2894736842105, "grad_norm": 1.0972955226898193, "learning_rate": 0.0001, "loss": 0.0126, "step": 138060 }, { "epoch": 908.3552631578947, "grad_norm": 1.4131301641464233, "learning_rate": 0.0001, "loss": 0.0155, "step": 138070 }, { "epoch": 908.421052631579, "grad_norm": 1.247426152229309, "learning_rate": 0.0001, "loss": 0.0111, "step": 138080 }, { "epoch": 908.4868421052631, "grad_norm": 1.1184269189834595, "learning_rate": 0.0001, "loss": 0.0127, "step": 138090 }, { "epoch": 908.5526315789474, "grad_norm": 1.528631567955017, "learning_rate": 0.0001, "loss": 0.0111, "step": 138100 }, { "epoch": 908.6184210526316, "grad_norm": 1.4223675727844238, "learning_rate": 0.0001, "loss": 0.0144, "step": 138110 }, { "epoch": 908.6842105263158, "grad_norm": 1.4975579977035522, "learning_rate": 0.0001, "loss": 0.0121, "step": 138120 }, { "epoch": 908.75, "grad_norm": 1.4215736389160156, "learning_rate": 0.0001, "loss": 0.0103, "step": 138130 }, { "epoch": 908.8157894736842, "grad_norm": 1.5480419397354126, "learning_rate": 0.0001, "loss": 0.0111, "step": 138140 }, { "epoch": 908.8815789473684, "grad_norm": 1.4687353372573853, "learning_rate": 0.0001, "loss": 0.0141, "step": 138150 }, { "epoch": 908.9473684210526, "grad_norm": 1.275105357170105, "learning_rate": 0.0001, "loss": 0.0151, "step": 138160 }, { "epoch": 909.0131578947369, "grad_norm": 1.3548073768615723, "learning_rate": 0.0001, "loss": 0.0127, "step": 138170 }, { "epoch": 909.078947368421, "grad_norm": 1.7003448009490967, "learning_rate": 0.0001, "loss": 0.0134, "step": 138180 }, { "epoch": 909.1447368421053, "grad_norm": 1.7095146179199219, "learning_rate": 0.0001, "loss": 0.0111, "step": 138190 }, { "epoch": 909.2105263157895, "grad_norm": 1.661028504371643, "learning_rate": 0.0001, "loss": 0.0134, "step": 138200 }, { "epoch": 909.2763157894736, "grad_norm": 1.4898855686187744, "learning_rate": 0.0001, "loss": 0.0118, "step": 138210 }, { "epoch": 909.3421052631579, "grad_norm": 1.7449891567230225, "learning_rate": 0.0001, "loss": 0.0125, "step": 138220 }, { "epoch": 909.4078947368421, "grad_norm": 1.7558820247650146, "learning_rate": 0.0001, "loss": 0.0157, "step": 138230 }, { "epoch": 909.4736842105264, "grad_norm": 1.04487144947052, "learning_rate": 0.0001, "loss": 0.0103, "step": 138240 }, { "epoch": 909.5394736842105, "grad_norm": 1.5297691822052002, "learning_rate": 0.0001, "loss": 0.0133, "step": 138250 }, { "epoch": 909.6052631578947, "grad_norm": 1.4045600891113281, "learning_rate": 0.0001, "loss": 0.0118, "step": 138260 }, { "epoch": 909.671052631579, "grad_norm": 1.662475347518921, "learning_rate": 0.0001, "loss": 0.0167, "step": 138270 }, { "epoch": 909.7368421052631, "grad_norm": 1.5686781406402588, "learning_rate": 0.0001, "loss": 0.0126, "step": 138280 }, { "epoch": 909.8026315789474, "grad_norm": 1.3931483030319214, "learning_rate": 0.0001, "loss": 0.0112, "step": 138290 }, { "epoch": 909.8684210526316, "grad_norm": 1.247011661529541, "learning_rate": 0.0001, "loss": 0.0134, "step": 138300 }, { "epoch": 909.9342105263158, "grad_norm": 0.9878817200660706, "learning_rate": 0.0001, "loss": 0.0108, "step": 138310 }, { "epoch": 910.0, "grad_norm": 1.4103188514709473, "learning_rate": 0.0001, "loss": 0.0136, "step": 138320 }, { "epoch": 910.0657894736842, "grad_norm": 1.7705857753753662, "learning_rate": 0.0001, "loss": 0.0144, "step": 138330 }, { "epoch": 910.1315789473684, "grad_norm": 1.7299087047576904, "learning_rate": 0.0001, "loss": 0.015, "step": 138340 }, { "epoch": 910.1973684210526, "grad_norm": 1.74335515499115, "learning_rate": 0.0001, "loss": 0.012, "step": 138350 }, { "epoch": 910.2631578947369, "grad_norm": 1.4239544868469238, "learning_rate": 0.0001, "loss": 0.013, "step": 138360 }, { "epoch": 910.328947368421, "grad_norm": 1.6527003049850464, "learning_rate": 0.0001, "loss": 0.0138, "step": 138370 }, { "epoch": 910.3947368421053, "grad_norm": 1.3713083267211914, "learning_rate": 0.0001, "loss": 0.0135, "step": 138380 }, { "epoch": 910.4605263157895, "grad_norm": 1.7656965255737305, "learning_rate": 0.0001, "loss": 0.0138, "step": 138390 }, { "epoch": 910.5263157894736, "grad_norm": 1.8003026247024536, "learning_rate": 0.0001, "loss": 0.0135, "step": 138400 }, { "epoch": 910.5921052631579, "grad_norm": 1.7717723846435547, "learning_rate": 0.0001, "loss": 0.0147, "step": 138410 }, { "epoch": 910.6578947368421, "grad_norm": 1.5578970909118652, "learning_rate": 0.0001, "loss": 0.0111, "step": 138420 }, { "epoch": 910.7236842105264, "grad_norm": 1.9100022315979004, "learning_rate": 0.0001, "loss": 0.0128, "step": 138430 }, { "epoch": 910.7894736842105, "grad_norm": 1.3807684183120728, "learning_rate": 0.0001, "loss": 0.0126, "step": 138440 }, { "epoch": 910.8552631578947, "grad_norm": 1.316465973854065, "learning_rate": 0.0001, "loss": 0.0136, "step": 138450 }, { "epoch": 910.921052631579, "grad_norm": 1.5799978971481323, "learning_rate": 0.0001, "loss": 0.0128, "step": 138460 }, { "epoch": 910.9868421052631, "grad_norm": 1.4615864753723145, "learning_rate": 0.0001, "loss": 0.0123, "step": 138470 }, { "epoch": 911.0526315789474, "grad_norm": 1.7710715532302856, "learning_rate": 0.0001, "loss": 0.0115, "step": 138480 }, { "epoch": 911.1184210526316, "grad_norm": 1.7027298212051392, "learning_rate": 0.0001, "loss": 0.0152, "step": 138490 }, { "epoch": 911.1842105263158, "grad_norm": 1.3432354927062988, "learning_rate": 0.0001, "loss": 0.0122, "step": 138500 }, { "epoch": 911.25, "grad_norm": 1.3911269903182983, "learning_rate": 0.0001, "loss": 0.016, "step": 138510 }, { "epoch": 911.3157894736842, "grad_norm": 1.1689019203186035, "learning_rate": 0.0001, "loss": 0.0147, "step": 138520 }, { "epoch": 911.3815789473684, "grad_norm": 1.2442231178283691, "learning_rate": 0.0001, "loss": 0.011, "step": 138530 }, { "epoch": 911.4473684210526, "grad_norm": 1.6549965143203735, "learning_rate": 0.0001, "loss": 0.0123, "step": 138540 }, { "epoch": 911.5131578947369, "grad_norm": 1.4899065494537354, "learning_rate": 0.0001, "loss": 0.0162, "step": 138550 }, { "epoch": 911.578947368421, "grad_norm": 0.9165178537368774, "learning_rate": 0.0001, "loss": 0.016, "step": 138560 }, { "epoch": 911.6447368421053, "grad_norm": 1.6142669916152954, "learning_rate": 0.0001, "loss": 0.0128, "step": 138570 }, { "epoch": 911.7105263157895, "grad_norm": 1.6722619533538818, "learning_rate": 0.0001, "loss": 0.0133, "step": 138580 }, { "epoch": 911.7763157894736, "grad_norm": 1.0755369663238525, "learning_rate": 0.0001, "loss": 0.013, "step": 138590 }, { "epoch": 911.8421052631579, "grad_norm": 1.2508374452590942, "learning_rate": 0.0001, "loss": 0.0123, "step": 138600 }, { "epoch": 911.9078947368421, "grad_norm": 1.2711622714996338, "learning_rate": 0.0001, "loss": 0.0125, "step": 138610 }, { "epoch": 911.9736842105264, "grad_norm": 1.5062087774276733, "learning_rate": 0.0001, "loss": 0.0118, "step": 138620 }, { "epoch": 912.0394736842105, "grad_norm": 1.661399006843567, "learning_rate": 0.0001, "loss": 0.0184, "step": 138630 }, { "epoch": 912.1052631578947, "grad_norm": 1.3508739471435547, "learning_rate": 0.0001, "loss": 0.0135, "step": 138640 }, { "epoch": 912.171052631579, "grad_norm": 1.5670504570007324, "learning_rate": 0.0001, "loss": 0.0112, "step": 138650 }, { "epoch": 912.2368421052631, "grad_norm": 1.6385771036148071, "learning_rate": 0.0001, "loss": 0.0126, "step": 138660 }, { "epoch": 912.3026315789474, "grad_norm": 1.5147217512130737, "learning_rate": 0.0001, "loss": 0.0136, "step": 138670 }, { "epoch": 912.3684210526316, "grad_norm": 1.2077417373657227, "learning_rate": 0.0001, "loss": 0.0128, "step": 138680 }, { "epoch": 912.4342105263158, "grad_norm": 1.3119935989379883, "learning_rate": 0.0001, "loss": 0.0173, "step": 138690 }, { "epoch": 912.5, "grad_norm": 1.1021575927734375, "learning_rate": 0.0001, "loss": 0.0116, "step": 138700 }, { "epoch": 912.5657894736842, "grad_norm": 1.426323652267456, "learning_rate": 0.0001, "loss": 0.0098, "step": 138710 }, { "epoch": 912.6315789473684, "grad_norm": 1.6330442428588867, "learning_rate": 0.0001, "loss": 0.0117, "step": 138720 }, { "epoch": 912.6973684210526, "grad_norm": 1.4236085414886475, "learning_rate": 0.0001, "loss": 0.0154, "step": 138730 }, { "epoch": 912.7631578947369, "grad_norm": 1.355277180671692, "learning_rate": 0.0001, "loss": 0.0114, "step": 138740 }, { "epoch": 912.828947368421, "grad_norm": 1.5005743503570557, "learning_rate": 0.0001, "loss": 0.0115, "step": 138750 }, { "epoch": 912.8947368421053, "grad_norm": 1.5072940587997437, "learning_rate": 0.0001, "loss": 0.0125, "step": 138760 }, { "epoch": 912.9605263157895, "grad_norm": 1.1911933422088623, "learning_rate": 0.0001, "loss": 0.0139, "step": 138770 }, { "epoch": 913.0263157894736, "grad_norm": 1.1521300077438354, "learning_rate": 0.0001, "loss": 0.0097, "step": 138780 }, { "epoch": 913.0921052631579, "grad_norm": 1.5421594381332397, "learning_rate": 0.0001, "loss": 0.0135, "step": 138790 }, { "epoch": 913.1578947368421, "grad_norm": 1.527529239654541, "learning_rate": 0.0001, "loss": 0.0128, "step": 138800 }, { "epoch": 913.2236842105264, "grad_norm": 1.5731478929519653, "learning_rate": 0.0001, "loss": 0.011, "step": 138810 }, { "epoch": 913.2894736842105, "grad_norm": 1.587997555732727, "learning_rate": 0.0001, "loss": 0.0096, "step": 138820 }, { "epoch": 913.3552631578947, "grad_norm": 1.3150404691696167, "learning_rate": 0.0001, "loss": 0.0126, "step": 138830 }, { "epoch": 913.421052631579, "grad_norm": 1.43244206905365, "learning_rate": 0.0001, "loss": 0.0119, "step": 138840 }, { "epoch": 913.4868421052631, "grad_norm": 1.5471535921096802, "learning_rate": 0.0001, "loss": 0.0139, "step": 138850 }, { "epoch": 913.5526315789474, "grad_norm": 1.3914350271224976, "learning_rate": 0.0001, "loss": 0.0144, "step": 138860 }, { "epoch": 913.6184210526316, "grad_norm": 1.3478312492370605, "learning_rate": 0.0001, "loss": 0.017, "step": 138870 }, { "epoch": 913.6842105263158, "grad_norm": 1.3018279075622559, "learning_rate": 0.0001, "loss": 0.0147, "step": 138880 }, { "epoch": 913.75, "grad_norm": 1.7028915882110596, "learning_rate": 0.0001, "loss": 0.0128, "step": 138890 }, { "epoch": 913.8157894736842, "grad_norm": 1.696677803993225, "learning_rate": 0.0001, "loss": 0.0122, "step": 138900 }, { "epoch": 913.8815789473684, "grad_norm": 1.502360463142395, "learning_rate": 0.0001, "loss": 0.0141, "step": 138910 }, { "epoch": 913.9473684210526, "grad_norm": 1.7690080404281616, "learning_rate": 0.0001, "loss": 0.0119, "step": 138920 }, { "epoch": 914.0131578947369, "grad_norm": 2.084611415863037, "learning_rate": 0.0001, "loss": 0.0142, "step": 138930 }, { "epoch": 914.078947368421, "grad_norm": 1.812525749206543, "learning_rate": 0.0001, "loss": 0.0121, "step": 138940 }, { "epoch": 914.1447368421053, "grad_norm": 1.3929774761199951, "learning_rate": 0.0001, "loss": 0.0159, "step": 138950 }, { "epoch": 914.2105263157895, "grad_norm": 1.70052170753479, "learning_rate": 0.0001, "loss": 0.012, "step": 138960 }, { "epoch": 914.2763157894736, "grad_norm": 1.5800435543060303, "learning_rate": 0.0001, "loss": 0.0123, "step": 138970 }, { "epoch": 914.3421052631579, "grad_norm": 1.0789227485656738, "learning_rate": 0.0001, "loss": 0.0102, "step": 138980 }, { "epoch": 914.4078947368421, "grad_norm": 1.3242721557617188, "learning_rate": 0.0001, "loss": 0.0115, "step": 138990 }, { "epoch": 914.4736842105264, "grad_norm": 1.3798093795776367, "learning_rate": 0.0001, "loss": 0.0127, "step": 139000 }, { "epoch": 914.5394736842105, "grad_norm": 1.5201542377471924, "learning_rate": 0.0001, "loss": 0.0139, "step": 139010 }, { "epoch": 914.6052631578947, "grad_norm": 1.3700060844421387, "learning_rate": 0.0001, "loss": 0.0139, "step": 139020 }, { "epoch": 914.671052631579, "grad_norm": 1.6138060092926025, "learning_rate": 0.0001, "loss": 0.01, "step": 139030 }, { "epoch": 914.7368421052631, "grad_norm": 1.1125797033309937, "learning_rate": 0.0001, "loss": 0.0134, "step": 139040 }, { "epoch": 914.8026315789474, "grad_norm": 1.2498359680175781, "learning_rate": 0.0001, "loss": 0.0093, "step": 139050 }, { "epoch": 914.8684210526316, "grad_norm": 1.881245732307434, "learning_rate": 0.0001, "loss": 0.0136, "step": 139060 }, { "epoch": 914.9342105263158, "grad_norm": 1.6802150011062622, "learning_rate": 0.0001, "loss": 0.0143, "step": 139070 }, { "epoch": 915.0, "grad_norm": 1.336390495300293, "learning_rate": 0.0001, "loss": 0.0148, "step": 139080 }, { "epoch": 915.0657894736842, "grad_norm": 1.7827732563018799, "learning_rate": 0.0001, "loss": 0.0141, "step": 139090 }, { "epoch": 915.1315789473684, "grad_norm": 1.5607959032058716, "learning_rate": 0.0001, "loss": 0.0133, "step": 139100 }, { "epoch": 915.1973684210526, "grad_norm": 1.5003547668457031, "learning_rate": 0.0001, "loss": 0.0128, "step": 139110 }, { "epoch": 915.2631578947369, "grad_norm": 2.0289008617401123, "learning_rate": 0.0001, "loss": 0.014, "step": 139120 }, { "epoch": 915.328947368421, "grad_norm": 1.5664457082748413, "learning_rate": 0.0001, "loss": 0.012, "step": 139130 }, { "epoch": 915.3947368421053, "grad_norm": 1.1241542100906372, "learning_rate": 0.0001, "loss": 0.0132, "step": 139140 }, { "epoch": 915.4605263157895, "grad_norm": 1.5218309164047241, "learning_rate": 0.0001, "loss": 0.0117, "step": 139150 }, { "epoch": 915.5263157894736, "grad_norm": 1.4993892908096313, "learning_rate": 0.0001, "loss": 0.0164, "step": 139160 }, { "epoch": 915.5921052631579, "grad_norm": 1.3447693586349487, "learning_rate": 0.0001, "loss": 0.0099, "step": 139170 }, { "epoch": 915.6578947368421, "grad_norm": 1.2081048488616943, "learning_rate": 0.0001, "loss": 0.0104, "step": 139180 }, { "epoch": 915.7236842105264, "grad_norm": 1.1162351369857788, "learning_rate": 0.0001, "loss": 0.0132, "step": 139190 }, { "epoch": 915.7894736842105, "grad_norm": 1.6127893924713135, "learning_rate": 0.0001, "loss": 0.0113, "step": 139200 }, { "epoch": 915.8552631578947, "grad_norm": 1.6666847467422485, "learning_rate": 0.0001, "loss": 0.0125, "step": 139210 }, { "epoch": 915.921052631579, "grad_norm": 1.9765604734420776, "learning_rate": 0.0001, "loss": 0.0138, "step": 139220 }, { "epoch": 915.9868421052631, "grad_norm": 2.000361919403076, "learning_rate": 0.0001, "loss": 0.0134, "step": 139230 }, { "epoch": 916.0526315789474, "grad_norm": 2.741786479949951, "learning_rate": 0.0001, "loss": 0.0131, "step": 139240 }, { "epoch": 916.1184210526316, "grad_norm": 1.8683977127075195, "learning_rate": 0.0001, "loss": 0.0133, "step": 139250 }, { "epoch": 916.1842105263158, "grad_norm": 1.8972703218460083, "learning_rate": 0.0001, "loss": 0.0114, "step": 139260 }, { "epoch": 916.25, "grad_norm": 1.7690210342407227, "learning_rate": 0.0001, "loss": 0.0125, "step": 139270 }, { "epoch": 916.3157894736842, "grad_norm": 1.5095206499099731, "learning_rate": 0.0001, "loss": 0.0132, "step": 139280 }, { "epoch": 916.3815789473684, "grad_norm": 1.7461563348770142, "learning_rate": 0.0001, "loss": 0.0131, "step": 139290 }, { "epoch": 916.4473684210526, "grad_norm": 1.6880900859832764, "learning_rate": 0.0001, "loss": 0.0105, "step": 139300 }, { "epoch": 916.5131578947369, "grad_norm": 1.7043485641479492, "learning_rate": 0.0001, "loss": 0.013, "step": 139310 }, { "epoch": 916.578947368421, "grad_norm": 1.2781754732131958, "learning_rate": 0.0001, "loss": 0.013, "step": 139320 }, { "epoch": 916.6447368421053, "grad_norm": 1.496767520904541, "learning_rate": 0.0001, "loss": 0.0124, "step": 139330 }, { "epoch": 916.7105263157895, "grad_norm": 1.6569653749465942, "learning_rate": 0.0001, "loss": 0.0108, "step": 139340 }, { "epoch": 916.7763157894736, "grad_norm": 1.3319404125213623, "learning_rate": 0.0001, "loss": 0.0112, "step": 139350 }, { "epoch": 916.8421052631579, "grad_norm": 1.6403552293777466, "learning_rate": 0.0001, "loss": 0.0123, "step": 139360 }, { "epoch": 916.9078947368421, "grad_norm": 1.086984634399414, "learning_rate": 0.0001, "loss": 0.016, "step": 139370 }, { "epoch": 916.9736842105264, "grad_norm": 1.4459831714630127, "learning_rate": 0.0001, "loss": 0.0127, "step": 139380 }, { "epoch": 917.0394736842105, "grad_norm": 1.429258942604065, "learning_rate": 0.0001, "loss": 0.009, "step": 139390 }, { "epoch": 917.1052631578947, "grad_norm": 1.5260236263275146, "learning_rate": 0.0001, "loss": 0.0093, "step": 139400 }, { "epoch": 917.171052631579, "grad_norm": 1.5625975131988525, "learning_rate": 0.0001, "loss": 0.0137, "step": 139410 }, { "epoch": 917.2368421052631, "grad_norm": 1.6665716171264648, "learning_rate": 0.0001, "loss": 0.011, "step": 139420 }, { "epoch": 917.3026315789474, "grad_norm": 1.5926222801208496, "learning_rate": 0.0001, "loss": 0.0115, "step": 139430 }, { "epoch": 917.3684210526316, "grad_norm": 1.6732916831970215, "learning_rate": 0.0001, "loss": 0.0149, "step": 139440 }, { "epoch": 917.4342105263158, "grad_norm": 1.1698634624481201, "learning_rate": 0.0001, "loss": 0.0148, "step": 139450 }, { "epoch": 917.5, "grad_norm": 1.17655348777771, "learning_rate": 0.0001, "loss": 0.0095, "step": 139460 }, { "epoch": 917.5657894736842, "grad_norm": 1.5024595260620117, "learning_rate": 0.0001, "loss": 0.0138, "step": 139470 }, { "epoch": 917.6315789473684, "grad_norm": 1.7623326778411865, "learning_rate": 0.0001, "loss": 0.0148, "step": 139480 }, { "epoch": 917.6973684210526, "grad_norm": 1.4878108501434326, "learning_rate": 0.0001, "loss": 0.0153, "step": 139490 }, { "epoch": 917.7631578947369, "grad_norm": 0.8942447304725647, "learning_rate": 0.0001, "loss": 0.0116, "step": 139500 }, { "epoch": 917.828947368421, "grad_norm": 1.19545316696167, "learning_rate": 0.0001, "loss": 0.0138, "step": 139510 }, { "epoch": 917.8947368421053, "grad_norm": 1.199537754058838, "learning_rate": 0.0001, "loss": 0.0149, "step": 139520 }, { "epoch": 917.9605263157895, "grad_norm": 1.0654898881912231, "learning_rate": 0.0001, "loss": 0.0112, "step": 139530 }, { "epoch": 918.0263157894736, "grad_norm": 1.0037107467651367, "learning_rate": 0.0001, "loss": 0.0114, "step": 139540 }, { "epoch": 918.0921052631579, "grad_norm": 1.397589087486267, "learning_rate": 0.0001, "loss": 0.0147, "step": 139550 }, { "epoch": 918.1578947368421, "grad_norm": 1.1872353553771973, "learning_rate": 0.0001, "loss": 0.0092, "step": 139560 }, { "epoch": 918.2236842105264, "grad_norm": 1.4479858875274658, "learning_rate": 0.0001, "loss": 0.0161, "step": 139570 }, { "epoch": 918.2894736842105, "grad_norm": 1.1936354637145996, "learning_rate": 0.0001, "loss": 0.0109, "step": 139580 }, { "epoch": 918.3552631578947, "grad_norm": 1.6665639877319336, "learning_rate": 0.0001, "loss": 0.0185, "step": 139590 }, { "epoch": 918.421052631579, "grad_norm": 1.7882617712020874, "learning_rate": 0.0001, "loss": 0.0127, "step": 139600 }, { "epoch": 918.4868421052631, "grad_norm": 1.5377490520477295, "learning_rate": 0.0001, "loss": 0.014, "step": 139610 }, { "epoch": 918.5526315789474, "grad_norm": 1.0899631977081299, "learning_rate": 0.0001, "loss": 0.0124, "step": 139620 }, { "epoch": 918.6184210526316, "grad_norm": 1.455590844154358, "learning_rate": 0.0001, "loss": 0.0108, "step": 139630 }, { "epoch": 918.6842105263158, "grad_norm": 1.4054707288742065, "learning_rate": 0.0001, "loss": 0.0124, "step": 139640 }, { "epoch": 918.75, "grad_norm": 1.4696006774902344, "learning_rate": 0.0001, "loss": 0.0125, "step": 139650 }, { "epoch": 918.8157894736842, "grad_norm": 1.6419366598129272, "learning_rate": 0.0001, "loss": 0.0152, "step": 139660 }, { "epoch": 918.8815789473684, "grad_norm": 0.9751062393188477, "learning_rate": 0.0001, "loss": 0.0131, "step": 139670 }, { "epoch": 918.9473684210526, "grad_norm": 1.3601417541503906, "learning_rate": 0.0001, "loss": 0.013, "step": 139680 }, { "epoch": 919.0131578947369, "grad_norm": 1.5738176107406616, "learning_rate": 0.0001, "loss": 0.0114, "step": 139690 }, { "epoch": 919.078947368421, "grad_norm": 1.1163963079452515, "learning_rate": 0.0001, "loss": 0.0103, "step": 139700 }, { "epoch": 919.1447368421053, "grad_norm": 1.2433674335479736, "learning_rate": 0.0001, "loss": 0.0159, "step": 139710 }, { "epoch": 919.2105263157895, "grad_norm": 1.4025108814239502, "learning_rate": 0.0001, "loss": 0.0113, "step": 139720 }, { "epoch": 919.2763157894736, "grad_norm": 1.8090484142303467, "learning_rate": 0.0001, "loss": 0.0158, "step": 139730 }, { "epoch": 919.3421052631579, "grad_norm": 1.3938413858413696, "learning_rate": 0.0001, "loss": 0.0107, "step": 139740 }, { "epoch": 919.4078947368421, "grad_norm": 1.2968835830688477, "learning_rate": 0.0001, "loss": 0.0107, "step": 139750 }, { "epoch": 919.4736842105264, "grad_norm": 1.0437525510787964, "learning_rate": 0.0001, "loss": 0.0131, "step": 139760 }, { "epoch": 919.5394736842105, "grad_norm": 1.5137275457382202, "learning_rate": 0.0001, "loss": 0.0133, "step": 139770 }, { "epoch": 919.6052631578947, "grad_norm": 1.550502061843872, "learning_rate": 0.0001, "loss": 0.0104, "step": 139780 }, { "epoch": 919.671052631579, "grad_norm": 1.2623409032821655, "learning_rate": 0.0001, "loss": 0.015, "step": 139790 }, { "epoch": 919.7368421052631, "grad_norm": 1.3493156433105469, "learning_rate": 0.0001, "loss": 0.0139, "step": 139800 }, { "epoch": 919.8026315789474, "grad_norm": 1.7911083698272705, "learning_rate": 0.0001, "loss": 0.0128, "step": 139810 }, { "epoch": 919.8684210526316, "grad_norm": 1.8874084949493408, "learning_rate": 0.0001, "loss": 0.0145, "step": 139820 }, { "epoch": 919.9342105263158, "grad_norm": 1.876002311706543, "learning_rate": 0.0001, "loss": 0.0146, "step": 139830 }, { "epoch": 920.0, "grad_norm": 1.6498265266418457, "learning_rate": 0.0001, "loss": 0.0114, "step": 139840 }, { "epoch": 920.0657894736842, "grad_norm": 1.4134352207183838, "learning_rate": 0.0001, "loss": 0.0107, "step": 139850 }, { "epoch": 920.1315789473684, "grad_norm": 1.479222059249878, "learning_rate": 0.0001, "loss": 0.0116, "step": 139860 }, { "epoch": 920.1973684210526, "grad_norm": 1.4688893556594849, "learning_rate": 0.0001, "loss": 0.0118, "step": 139870 }, { "epoch": 920.2631578947369, "grad_norm": 1.6507142782211304, "learning_rate": 0.0001, "loss": 0.0138, "step": 139880 }, { "epoch": 920.328947368421, "grad_norm": 1.2691552639007568, "learning_rate": 0.0001, "loss": 0.016, "step": 139890 }, { "epoch": 920.3947368421053, "grad_norm": 1.326647162437439, "learning_rate": 0.0001, "loss": 0.0129, "step": 139900 }, { "epoch": 920.4605263157895, "grad_norm": 1.6964616775512695, "learning_rate": 0.0001, "loss": 0.0114, "step": 139910 }, { "epoch": 920.5263157894736, "grad_norm": 1.4971848726272583, "learning_rate": 0.0001, "loss": 0.012, "step": 139920 }, { "epoch": 920.5921052631579, "grad_norm": 1.4924442768096924, "learning_rate": 0.0001, "loss": 0.0109, "step": 139930 }, { "epoch": 920.6578947368421, "grad_norm": 1.220154047012329, "learning_rate": 0.0001, "loss": 0.0119, "step": 139940 }, { "epoch": 920.7236842105264, "grad_norm": 1.845448613166809, "learning_rate": 0.0001, "loss": 0.0111, "step": 139950 }, { "epoch": 920.7894736842105, "grad_norm": 1.6297473907470703, "learning_rate": 0.0001, "loss": 0.0176, "step": 139960 }, { "epoch": 920.8552631578947, "grad_norm": 1.2699599266052246, "learning_rate": 0.0001, "loss": 0.0093, "step": 139970 }, { "epoch": 920.921052631579, "grad_norm": 1.4922304153442383, "learning_rate": 0.0001, "loss": 0.0126, "step": 139980 }, { "epoch": 920.9868421052631, "grad_norm": 1.1437015533447266, "learning_rate": 0.0001, "loss": 0.0134, "step": 139990 }, { "epoch": 921.0526315789474, "grad_norm": 1.1137590408325195, "learning_rate": 0.0001, "loss": 0.0134, "step": 140000 }, { "epoch": 921.1184210526316, "grad_norm": 1.1301674842834473, "learning_rate": 0.0001, "loss": 0.0142, "step": 140010 }, { "epoch": 921.1842105263158, "grad_norm": 1.1687880754470825, "learning_rate": 0.0001, "loss": 0.0127, "step": 140020 }, { "epoch": 921.25, "grad_norm": 1.3458476066589355, "learning_rate": 0.0001, "loss": 0.0095, "step": 140030 }, { "epoch": 921.3157894736842, "grad_norm": 1.302821397781372, "learning_rate": 0.0001, "loss": 0.0134, "step": 140040 }, { "epoch": 921.3815789473684, "grad_norm": 1.375687599182129, "learning_rate": 0.0001, "loss": 0.0103, "step": 140050 }, { "epoch": 921.4473684210526, "grad_norm": 1.5599430799484253, "learning_rate": 0.0001, "loss": 0.0132, "step": 140060 }, { "epoch": 921.5131578947369, "grad_norm": 1.2661031484603882, "learning_rate": 0.0001, "loss": 0.0144, "step": 140070 }, { "epoch": 921.578947368421, "grad_norm": 1.5500391721725464, "learning_rate": 0.0001, "loss": 0.0133, "step": 140080 }, { "epoch": 921.6447368421053, "grad_norm": 1.065709114074707, "learning_rate": 0.0001, "loss": 0.0135, "step": 140090 }, { "epoch": 921.7105263157895, "grad_norm": 1.4404852390289307, "learning_rate": 0.0001, "loss": 0.0126, "step": 140100 }, { "epoch": 921.7763157894736, "grad_norm": 1.3273744583129883, "learning_rate": 0.0001, "loss": 0.0121, "step": 140110 }, { "epoch": 921.8421052631579, "grad_norm": 1.0675231218338013, "learning_rate": 0.0001, "loss": 0.0165, "step": 140120 }, { "epoch": 921.9078947368421, "grad_norm": 1.2102406024932861, "learning_rate": 0.0001, "loss": 0.0157, "step": 140130 }, { "epoch": 921.9736842105264, "grad_norm": 1.3003069162368774, "learning_rate": 0.0001, "loss": 0.0156, "step": 140140 }, { "epoch": 922.0394736842105, "grad_norm": 1.2986212968826294, "learning_rate": 0.0001, "loss": 0.0105, "step": 140150 }, { "epoch": 922.1052631578947, "grad_norm": 1.6067005395889282, "learning_rate": 0.0001, "loss": 0.0166, "step": 140160 }, { "epoch": 922.171052631579, "grad_norm": 1.3793567419052124, "learning_rate": 0.0001, "loss": 0.0156, "step": 140170 }, { "epoch": 922.2368421052631, "grad_norm": 1.508712649345398, "learning_rate": 0.0001, "loss": 0.0151, "step": 140180 }, { "epoch": 922.3026315789474, "grad_norm": 1.5980473756790161, "learning_rate": 0.0001, "loss": 0.0115, "step": 140190 }, { "epoch": 922.3684210526316, "grad_norm": 1.6205062866210938, "learning_rate": 0.0001, "loss": 0.0114, "step": 140200 }, { "epoch": 922.4342105263158, "grad_norm": 1.0307178497314453, "learning_rate": 0.0001, "loss": 0.0164, "step": 140210 }, { "epoch": 922.5, "grad_norm": 1.6184449195861816, "learning_rate": 0.0001, "loss": 0.0128, "step": 140220 }, { "epoch": 922.5657894736842, "grad_norm": 0.9445580840110779, "learning_rate": 0.0001, "loss": 0.0093, "step": 140230 }, { "epoch": 922.6315789473684, "grad_norm": 1.5479834079742432, "learning_rate": 0.0001, "loss": 0.0094, "step": 140240 }, { "epoch": 922.6973684210526, "grad_norm": 1.111137866973877, "learning_rate": 0.0001, "loss": 0.0166, "step": 140250 }, { "epoch": 922.7631578947369, "grad_norm": 1.2569369077682495, "learning_rate": 0.0001, "loss": 0.0121, "step": 140260 }, { "epoch": 922.828947368421, "grad_norm": 1.729651689529419, "learning_rate": 0.0001, "loss": 0.0122, "step": 140270 }, { "epoch": 922.8947368421053, "grad_norm": 1.031311273574829, "learning_rate": 0.0001, "loss": 0.0122, "step": 140280 }, { "epoch": 922.9605263157895, "grad_norm": 1.2946250438690186, "learning_rate": 0.0001, "loss": 0.0119, "step": 140290 }, { "epoch": 923.0263157894736, "grad_norm": 1.417431116104126, "learning_rate": 0.0001, "loss": 0.0169, "step": 140300 }, { "epoch": 923.0921052631579, "grad_norm": 1.4283459186553955, "learning_rate": 0.0001, "loss": 0.0137, "step": 140310 }, { "epoch": 923.1578947368421, "grad_norm": 1.280181646347046, "learning_rate": 0.0001, "loss": 0.0124, "step": 140320 }, { "epoch": 923.2236842105264, "grad_norm": 1.4239641427993774, "learning_rate": 0.0001, "loss": 0.0108, "step": 140330 }, { "epoch": 923.2894736842105, "grad_norm": 1.437967300415039, "learning_rate": 0.0001, "loss": 0.0148, "step": 140340 }, { "epoch": 923.3552631578947, "grad_norm": 1.706946849822998, "learning_rate": 0.0001, "loss": 0.0101, "step": 140350 }, { "epoch": 923.421052631579, "grad_norm": 1.4727646112442017, "learning_rate": 0.0001, "loss": 0.011, "step": 140360 }, { "epoch": 923.4868421052631, "grad_norm": 1.3501956462860107, "learning_rate": 0.0001, "loss": 0.0137, "step": 140370 }, { "epoch": 923.5526315789474, "grad_norm": 1.6014302968978882, "learning_rate": 0.0001, "loss": 0.0112, "step": 140380 }, { "epoch": 923.6184210526316, "grad_norm": 1.618423342704773, "learning_rate": 0.0001, "loss": 0.0127, "step": 140390 }, { "epoch": 923.6842105263158, "grad_norm": 1.7549856901168823, "learning_rate": 0.0001, "loss": 0.0116, "step": 140400 }, { "epoch": 923.75, "grad_norm": 1.6823674440383911, "learning_rate": 0.0001, "loss": 0.0104, "step": 140410 }, { "epoch": 923.8157894736842, "grad_norm": 1.6983723640441895, "learning_rate": 0.0001, "loss": 0.0128, "step": 140420 }, { "epoch": 923.8815789473684, "grad_norm": 1.3535070419311523, "learning_rate": 0.0001, "loss": 0.0133, "step": 140430 }, { "epoch": 923.9473684210526, "grad_norm": 1.3047149181365967, "learning_rate": 0.0001, "loss": 0.0153, "step": 140440 }, { "epoch": 924.0131578947369, "grad_norm": 1.6401749849319458, "learning_rate": 0.0001, "loss": 0.0171, "step": 140450 }, { "epoch": 924.078947368421, "grad_norm": 1.387373447418213, "learning_rate": 0.0001, "loss": 0.0169, "step": 140460 }, { "epoch": 924.1447368421053, "grad_norm": 1.8001471757888794, "learning_rate": 0.0001, "loss": 0.0151, "step": 140470 }, { "epoch": 924.2105263157895, "grad_norm": 2.0925965309143066, "learning_rate": 0.0001, "loss": 0.017, "step": 140480 }, { "epoch": 924.2763157894736, "grad_norm": 1.6762840747833252, "learning_rate": 0.0001, "loss": 0.0104, "step": 140490 }, { "epoch": 924.3421052631579, "grad_norm": 1.5177218914031982, "learning_rate": 0.0001, "loss": 0.0138, "step": 140500 }, { "epoch": 924.4078947368421, "grad_norm": 1.8086873292922974, "learning_rate": 0.0001, "loss": 0.0093, "step": 140510 }, { "epoch": 924.4736842105264, "grad_norm": 1.5275958776474, "learning_rate": 0.0001, "loss": 0.0119, "step": 140520 }, { "epoch": 924.5394736842105, "grad_norm": 1.122280240058899, "learning_rate": 0.0001, "loss": 0.01, "step": 140530 }, { "epoch": 924.6052631578947, "grad_norm": 0.9004603028297424, "learning_rate": 0.0001, "loss": 0.0136, "step": 140540 }, { "epoch": 924.671052631579, "grad_norm": 1.4018385410308838, "learning_rate": 0.0001, "loss": 0.0095, "step": 140550 }, { "epoch": 924.7368421052631, "grad_norm": 1.7982769012451172, "learning_rate": 0.0001, "loss": 0.0147, "step": 140560 }, { "epoch": 924.8026315789474, "grad_norm": 1.0945266485214233, "learning_rate": 0.0001, "loss": 0.0108, "step": 140570 }, { "epoch": 924.8684210526316, "grad_norm": 1.3988112211227417, "learning_rate": 0.0001, "loss": 0.0126, "step": 140580 }, { "epoch": 924.9342105263158, "grad_norm": 1.5785489082336426, "learning_rate": 0.0001, "loss": 0.0162, "step": 140590 }, { "epoch": 925.0, "grad_norm": 1.3862922191619873, "learning_rate": 0.0001, "loss": 0.0112, "step": 140600 }, { "epoch": 925.0657894736842, "grad_norm": 1.2992768287658691, "learning_rate": 0.0001, "loss": 0.0145, "step": 140610 }, { "epoch": 925.1315789473684, "grad_norm": 1.1412370204925537, "learning_rate": 0.0001, "loss": 0.0137, "step": 140620 }, { "epoch": 925.1973684210526, "grad_norm": 1.227541208267212, "learning_rate": 0.0001, "loss": 0.0131, "step": 140630 }, { "epoch": 925.2631578947369, "grad_norm": 1.506393551826477, "learning_rate": 0.0001, "loss": 0.0112, "step": 140640 }, { "epoch": 925.328947368421, "grad_norm": 1.3699759244918823, "learning_rate": 0.0001, "loss": 0.0132, "step": 140650 }, { "epoch": 925.3947368421053, "grad_norm": 1.3094017505645752, "learning_rate": 0.0001, "loss": 0.0112, "step": 140660 }, { "epoch": 925.4605263157895, "grad_norm": 1.0756523609161377, "learning_rate": 0.0001, "loss": 0.0121, "step": 140670 }, { "epoch": 925.5263157894736, "grad_norm": 1.4943516254425049, "learning_rate": 0.0001, "loss": 0.0112, "step": 140680 }, { "epoch": 925.5921052631579, "grad_norm": 1.8764899969100952, "learning_rate": 0.0001, "loss": 0.0128, "step": 140690 }, { "epoch": 925.6578947368421, "grad_norm": 2.0713157653808594, "learning_rate": 0.0001, "loss": 0.0148, "step": 140700 }, { "epoch": 925.7236842105264, "grad_norm": 1.718097448348999, "learning_rate": 0.0001, "loss": 0.0157, "step": 140710 }, { "epoch": 925.7894736842105, "grad_norm": 1.6375759840011597, "learning_rate": 0.0001, "loss": 0.0147, "step": 140720 }, { "epoch": 925.8552631578947, "grad_norm": 1.3302497863769531, "learning_rate": 0.0001, "loss": 0.0129, "step": 140730 }, { "epoch": 925.921052631579, "grad_norm": 1.442976951599121, "learning_rate": 0.0001, "loss": 0.0103, "step": 140740 }, { "epoch": 925.9868421052631, "grad_norm": 1.638070821762085, "learning_rate": 0.0001, "loss": 0.0091, "step": 140750 }, { "epoch": 926.0526315789474, "grad_norm": 1.5399519205093384, "learning_rate": 0.0001, "loss": 0.0126, "step": 140760 }, { "epoch": 926.1184210526316, "grad_norm": 1.6064544916152954, "learning_rate": 0.0001, "loss": 0.0142, "step": 140770 }, { "epoch": 926.1842105263158, "grad_norm": 1.525118350982666, "learning_rate": 0.0001, "loss": 0.0104, "step": 140780 }, { "epoch": 926.25, "grad_norm": 1.809870958328247, "learning_rate": 0.0001, "loss": 0.0157, "step": 140790 }, { "epoch": 926.3157894736842, "grad_norm": 1.6349273920059204, "learning_rate": 0.0001, "loss": 0.014, "step": 140800 }, { "epoch": 926.3815789473684, "grad_norm": 1.3716164827346802, "learning_rate": 0.0001, "loss": 0.0096, "step": 140810 }, { "epoch": 926.4473684210526, "grad_norm": 1.8077031373977661, "learning_rate": 0.0001, "loss": 0.0187, "step": 140820 }, { "epoch": 926.5131578947369, "grad_norm": 1.211745023727417, "learning_rate": 0.0001, "loss": 0.0116, "step": 140830 }, { "epoch": 926.578947368421, "grad_norm": 1.4823260307312012, "learning_rate": 0.0001, "loss": 0.0094, "step": 140840 }, { "epoch": 926.6447368421053, "grad_norm": 1.5969178676605225, "learning_rate": 0.0001, "loss": 0.0114, "step": 140850 }, { "epoch": 926.7105263157895, "grad_norm": 1.5605825185775757, "learning_rate": 0.0001, "loss": 0.0157, "step": 140860 }, { "epoch": 926.7763157894736, "grad_norm": 1.3450006246566772, "learning_rate": 0.0001, "loss": 0.0135, "step": 140870 }, { "epoch": 926.8421052631579, "grad_norm": 1.1673775911331177, "learning_rate": 0.0001, "loss": 0.011, "step": 140880 }, { "epoch": 926.9078947368421, "grad_norm": 1.5943653583526611, "learning_rate": 0.0001, "loss": 0.0132, "step": 140890 }, { "epoch": 926.9736842105264, "grad_norm": 1.4158649444580078, "learning_rate": 0.0001, "loss": 0.0128, "step": 140900 }, { "epoch": 927.0394736842105, "grad_norm": 1.3863821029663086, "learning_rate": 0.0001, "loss": 0.0122, "step": 140910 }, { "epoch": 927.1052631578947, "grad_norm": 1.3356341123580933, "learning_rate": 0.0001, "loss": 0.0163, "step": 140920 }, { "epoch": 927.171052631579, "grad_norm": 1.6285247802734375, "learning_rate": 0.0001, "loss": 0.0133, "step": 140930 }, { "epoch": 927.2368421052631, "grad_norm": 1.7014178037643433, "learning_rate": 0.0001, "loss": 0.0151, "step": 140940 }, { "epoch": 927.3026315789474, "grad_norm": 1.345711588859558, "learning_rate": 0.0001, "loss": 0.0114, "step": 140950 }, { "epoch": 927.3684210526316, "grad_norm": 1.5862200260162354, "learning_rate": 0.0001, "loss": 0.0114, "step": 140960 }, { "epoch": 927.4342105263158, "grad_norm": 1.4304310083389282, "learning_rate": 0.0001, "loss": 0.0123, "step": 140970 }, { "epoch": 927.5, "grad_norm": 1.417829155921936, "learning_rate": 0.0001, "loss": 0.0093, "step": 140980 }, { "epoch": 927.5657894736842, "grad_norm": 1.2577985525131226, "learning_rate": 0.0001, "loss": 0.012, "step": 140990 }, { "epoch": 927.6315789473684, "grad_norm": 1.2360106706619263, "learning_rate": 0.0001, "loss": 0.0144, "step": 141000 }, { "epoch": 927.6973684210526, "grad_norm": 0.9809899926185608, "learning_rate": 0.0001, "loss": 0.0126, "step": 141010 }, { "epoch": 927.7631578947369, "grad_norm": 1.0680444240570068, "learning_rate": 0.0001, "loss": 0.0154, "step": 141020 }, { "epoch": 927.828947368421, "grad_norm": 1.046910047531128, "learning_rate": 0.0001, "loss": 0.0136, "step": 141030 }, { "epoch": 927.8947368421053, "grad_norm": 1.3194239139556885, "learning_rate": 0.0001, "loss": 0.0126, "step": 141040 }, { "epoch": 927.9605263157895, "grad_norm": 1.1889054775238037, "learning_rate": 0.0001, "loss": 0.0117, "step": 141050 }, { "epoch": 928.0263157894736, "grad_norm": 1.4184948205947876, "learning_rate": 0.0001, "loss": 0.012, "step": 141060 }, { "epoch": 928.0921052631579, "grad_norm": 1.5508875846862793, "learning_rate": 0.0001, "loss": 0.0161, "step": 141070 }, { "epoch": 928.1578947368421, "grad_norm": 1.817280650138855, "learning_rate": 0.0001, "loss": 0.0105, "step": 141080 }, { "epoch": 928.2236842105264, "grad_norm": 1.2994272708892822, "learning_rate": 0.0001, "loss": 0.0114, "step": 141090 }, { "epoch": 928.2894736842105, "grad_norm": 1.3926613330841064, "learning_rate": 0.0001, "loss": 0.0103, "step": 141100 }, { "epoch": 928.3552631578947, "grad_norm": 1.4988833665847778, "learning_rate": 0.0001, "loss": 0.014, "step": 141110 }, { "epoch": 928.421052631579, "grad_norm": 1.4457064867019653, "learning_rate": 0.0001, "loss": 0.014, "step": 141120 }, { "epoch": 928.4868421052631, "grad_norm": 1.121883511543274, "learning_rate": 0.0001, "loss": 0.0124, "step": 141130 }, { "epoch": 928.5526315789474, "grad_norm": 1.1948912143707275, "learning_rate": 0.0001, "loss": 0.0159, "step": 141140 }, { "epoch": 928.6184210526316, "grad_norm": 1.5110472440719604, "learning_rate": 0.0001, "loss": 0.0115, "step": 141150 }, { "epoch": 928.6842105263158, "grad_norm": 1.5295865535736084, "learning_rate": 0.0001, "loss": 0.0138, "step": 141160 }, { "epoch": 928.75, "grad_norm": 1.3557610511779785, "learning_rate": 0.0001, "loss": 0.0116, "step": 141170 }, { "epoch": 928.8157894736842, "grad_norm": 1.5905766487121582, "learning_rate": 0.0001, "loss": 0.0165, "step": 141180 }, { "epoch": 928.8815789473684, "grad_norm": 1.190719723701477, "learning_rate": 0.0001, "loss": 0.0094, "step": 141190 }, { "epoch": 928.9473684210526, "grad_norm": 1.4718337059020996, "learning_rate": 0.0001, "loss": 0.014, "step": 141200 }, { "epoch": 929.0131578947369, "grad_norm": 1.2290788888931274, "learning_rate": 0.0001, "loss": 0.0106, "step": 141210 }, { "epoch": 929.078947368421, "grad_norm": 1.4272830486297607, "learning_rate": 0.0001, "loss": 0.0154, "step": 141220 }, { "epoch": 929.1447368421053, "grad_norm": 1.29914391040802, "learning_rate": 0.0001, "loss": 0.0158, "step": 141230 }, { "epoch": 929.2105263157895, "grad_norm": 1.8265337944030762, "learning_rate": 0.0001, "loss": 0.0128, "step": 141240 }, { "epoch": 929.2763157894736, "grad_norm": 1.6330009698867798, "learning_rate": 0.0001, "loss": 0.0093, "step": 141250 }, { "epoch": 929.3421052631579, "grad_norm": 1.7865843772888184, "learning_rate": 0.0001, "loss": 0.0097, "step": 141260 }, { "epoch": 929.4078947368421, "grad_norm": 1.2944718599319458, "learning_rate": 0.0001, "loss": 0.016, "step": 141270 }, { "epoch": 929.4736842105264, "grad_norm": 1.240435242652893, "learning_rate": 0.0001, "loss": 0.0133, "step": 141280 }, { "epoch": 929.5394736842105, "grad_norm": 1.638891339302063, "learning_rate": 0.0001, "loss": 0.0121, "step": 141290 }, { "epoch": 929.6052631578947, "grad_norm": 1.5075310468673706, "learning_rate": 0.0001, "loss": 0.0159, "step": 141300 }, { "epoch": 929.671052631579, "grad_norm": 1.4079954624176025, "learning_rate": 0.0001, "loss": 0.0115, "step": 141310 }, { "epoch": 929.7368421052631, "grad_norm": 1.3926321268081665, "learning_rate": 0.0001, "loss": 0.0136, "step": 141320 }, { "epoch": 929.8026315789474, "grad_norm": 1.8196150064468384, "learning_rate": 0.0001, "loss": 0.0123, "step": 141330 }, { "epoch": 929.8684210526316, "grad_norm": 1.4948174953460693, "learning_rate": 0.0001, "loss": 0.0119, "step": 141340 }, { "epoch": 929.9342105263158, "grad_norm": 1.4137743711471558, "learning_rate": 0.0001, "loss": 0.0108, "step": 141350 }, { "epoch": 930.0, "grad_norm": 1.4617292881011963, "learning_rate": 0.0001, "loss": 0.0126, "step": 141360 }, { "epoch": 930.0657894736842, "grad_norm": 1.8712068796157837, "learning_rate": 0.0001, "loss": 0.0171, "step": 141370 }, { "epoch": 930.1315789473684, "grad_norm": 1.4566041231155396, "learning_rate": 0.0001, "loss": 0.0137, "step": 141380 }, { "epoch": 930.1973684210526, "grad_norm": 1.177861213684082, "learning_rate": 0.0001, "loss": 0.0164, "step": 141390 }, { "epoch": 930.2631578947369, "grad_norm": 1.319422721862793, "learning_rate": 0.0001, "loss": 0.0131, "step": 141400 }, { "epoch": 930.328947368421, "grad_norm": 1.7622625827789307, "learning_rate": 0.0001, "loss": 0.0127, "step": 141410 }, { "epoch": 930.3947368421053, "grad_norm": 1.316861629486084, "learning_rate": 0.0001, "loss": 0.0109, "step": 141420 }, { "epoch": 930.4605263157895, "grad_norm": 1.4308351278305054, "learning_rate": 0.0001, "loss": 0.0139, "step": 141430 }, { "epoch": 930.5263157894736, "grad_norm": 1.3135432004928589, "learning_rate": 0.0001, "loss": 0.0119, "step": 141440 }, { "epoch": 930.5921052631579, "grad_norm": 1.2552953958511353, "learning_rate": 0.0001, "loss": 0.0143, "step": 141450 }, { "epoch": 930.6578947368421, "grad_norm": 1.6387211084365845, "learning_rate": 0.0001, "loss": 0.0104, "step": 141460 }, { "epoch": 930.7236842105264, "grad_norm": 1.5372658967971802, "learning_rate": 0.0001, "loss": 0.0116, "step": 141470 }, { "epoch": 930.7894736842105, "grad_norm": 1.3278604745864868, "learning_rate": 0.0001, "loss": 0.0111, "step": 141480 }, { "epoch": 930.8552631578947, "grad_norm": 1.3118553161621094, "learning_rate": 0.0001, "loss": 0.0091, "step": 141490 }, { "epoch": 930.921052631579, "grad_norm": 1.7731788158416748, "learning_rate": 0.0001, "loss": 0.009, "step": 141500 }, { "epoch": 930.9868421052631, "grad_norm": 1.5901663303375244, "learning_rate": 0.0001, "loss": 0.0158, "step": 141510 }, { "epoch": 931.0526315789474, "grad_norm": 1.4870980978012085, "learning_rate": 0.0001, "loss": 0.0132, "step": 141520 }, { "epoch": 931.1184210526316, "grad_norm": 1.551031470298767, "learning_rate": 0.0001, "loss": 0.0176, "step": 141530 }, { "epoch": 931.1842105263158, "grad_norm": 1.3098033666610718, "learning_rate": 0.0001, "loss": 0.0096, "step": 141540 }, { "epoch": 931.25, "grad_norm": 1.191756248474121, "learning_rate": 0.0001, "loss": 0.0116, "step": 141550 }, { "epoch": 931.3157894736842, "grad_norm": 1.6066068410873413, "learning_rate": 0.0001, "loss": 0.0092, "step": 141560 }, { "epoch": 931.3815789473684, "grad_norm": 1.6230320930480957, "learning_rate": 0.0001, "loss": 0.0115, "step": 141570 }, { "epoch": 931.4473684210526, "grad_norm": 1.1993556022644043, "learning_rate": 0.0001, "loss": 0.016, "step": 141580 }, { "epoch": 931.5131578947369, "grad_norm": 1.4690593481063843, "learning_rate": 0.0001, "loss": 0.0096, "step": 141590 }, { "epoch": 931.578947368421, "grad_norm": 1.556823968887329, "learning_rate": 0.0001, "loss": 0.0157, "step": 141600 }, { "epoch": 931.6447368421053, "grad_norm": 1.3611570596694946, "learning_rate": 0.0001, "loss": 0.0137, "step": 141610 }, { "epoch": 931.7105263157895, "grad_norm": 1.3724467754364014, "learning_rate": 0.0001, "loss": 0.011, "step": 141620 }, { "epoch": 931.7763157894736, "grad_norm": 1.7785652875900269, "learning_rate": 0.0001, "loss": 0.0125, "step": 141630 }, { "epoch": 931.8421052631579, "grad_norm": 1.1540836095809937, "learning_rate": 0.0001, "loss": 0.0137, "step": 141640 }, { "epoch": 931.9078947368421, "grad_norm": 1.7136765718460083, "learning_rate": 0.0001, "loss": 0.0111, "step": 141650 }, { "epoch": 931.9736842105264, "grad_norm": 1.3514200448989868, "learning_rate": 0.0001, "loss": 0.0132, "step": 141660 }, { "epoch": 932.0394736842105, "grad_norm": 1.4610313177108765, "learning_rate": 0.0001, "loss": 0.0143, "step": 141670 }, { "epoch": 932.1052631578947, "grad_norm": 1.673702597618103, "learning_rate": 0.0001, "loss": 0.0137, "step": 141680 }, { "epoch": 932.171052631579, "grad_norm": 1.4545506238937378, "learning_rate": 0.0001, "loss": 0.0111, "step": 141690 }, { "epoch": 932.2368421052631, "grad_norm": 1.631951093673706, "learning_rate": 0.0001, "loss": 0.0118, "step": 141700 }, { "epoch": 932.3026315789474, "grad_norm": 1.2247966527938843, "learning_rate": 0.0001, "loss": 0.0131, "step": 141710 }, { "epoch": 932.3684210526316, "grad_norm": 1.5268510580062866, "learning_rate": 0.0001, "loss": 0.0122, "step": 141720 }, { "epoch": 932.4342105263158, "grad_norm": 1.529741883277893, "learning_rate": 0.0001, "loss": 0.0122, "step": 141730 }, { "epoch": 932.5, "grad_norm": 1.568448781967163, "learning_rate": 0.0001, "loss": 0.0149, "step": 141740 }, { "epoch": 932.5657894736842, "grad_norm": 1.3017265796661377, "learning_rate": 0.0001, "loss": 0.0129, "step": 141750 }, { "epoch": 932.6315789473684, "grad_norm": 1.2066559791564941, "learning_rate": 0.0001, "loss": 0.014, "step": 141760 }, { "epoch": 932.6973684210526, "grad_norm": 1.7837214469909668, "learning_rate": 0.0001, "loss": 0.0128, "step": 141770 }, { "epoch": 932.7631578947369, "grad_norm": 1.5741633176803589, "learning_rate": 0.0001, "loss": 0.0113, "step": 141780 }, { "epoch": 932.828947368421, "grad_norm": 1.559424638748169, "learning_rate": 0.0001, "loss": 0.0112, "step": 141790 }, { "epoch": 932.8947368421053, "grad_norm": 1.5000436305999756, "learning_rate": 0.0001, "loss": 0.0125, "step": 141800 }, { "epoch": 932.9605263157895, "grad_norm": 1.4638454914093018, "learning_rate": 0.0001, "loss": 0.0134, "step": 141810 }, { "epoch": 933.0263157894736, "grad_norm": 1.0126944780349731, "learning_rate": 0.0001, "loss": 0.0137, "step": 141820 }, { "epoch": 933.0921052631579, "grad_norm": 1.3479516506195068, "learning_rate": 0.0001, "loss": 0.009, "step": 141830 }, { "epoch": 933.1578947368421, "grad_norm": 1.1482173204421997, "learning_rate": 0.0001, "loss": 0.0103, "step": 141840 }, { "epoch": 933.2236842105264, "grad_norm": 1.1056677103042603, "learning_rate": 0.0001, "loss": 0.0154, "step": 141850 }, { "epoch": 933.2894736842105, "grad_norm": 1.467520833015442, "learning_rate": 0.0001, "loss": 0.0122, "step": 141860 }, { "epoch": 933.3552631578947, "grad_norm": 1.4259006977081299, "learning_rate": 0.0001, "loss": 0.0125, "step": 141870 }, { "epoch": 933.421052631579, "grad_norm": 1.511313557624817, "learning_rate": 0.0001, "loss": 0.0111, "step": 141880 }, { "epoch": 933.4868421052631, "grad_norm": 1.834061861038208, "learning_rate": 0.0001, "loss": 0.0168, "step": 141890 }, { "epoch": 933.5526315789474, "grad_norm": 1.3244798183441162, "learning_rate": 0.0001, "loss": 0.0142, "step": 141900 }, { "epoch": 933.6184210526316, "grad_norm": 1.6316534280776978, "learning_rate": 0.0001, "loss": 0.019, "step": 141910 }, { "epoch": 933.6842105263158, "grad_norm": 1.6212153434753418, "learning_rate": 0.0001, "loss": 0.0166, "step": 141920 }, { "epoch": 933.75, "grad_norm": 1.214281439781189, "learning_rate": 0.0001, "loss": 0.0125, "step": 141930 }, { "epoch": 933.8157894736842, "grad_norm": 1.1815201044082642, "learning_rate": 0.0001, "loss": 0.0126, "step": 141940 }, { "epoch": 933.8815789473684, "grad_norm": 1.664067029953003, "learning_rate": 0.0001, "loss": 0.0094, "step": 141950 }, { "epoch": 933.9473684210526, "grad_norm": 1.7517896890640259, "learning_rate": 0.0001, "loss": 0.0098, "step": 141960 }, { "epoch": 934.0131578947369, "grad_norm": 0.9390572309494019, "learning_rate": 0.0001, "loss": 0.0101, "step": 141970 }, { "epoch": 934.078947368421, "grad_norm": 1.323191523551941, "learning_rate": 0.0001, "loss": 0.0129, "step": 141980 }, { "epoch": 934.1447368421053, "grad_norm": 1.3296594619750977, "learning_rate": 0.0001, "loss": 0.0137, "step": 141990 }, { "epoch": 934.2105263157895, "grad_norm": 0.8828091025352478, "learning_rate": 0.0001, "loss": 0.0151, "step": 142000 }, { "epoch": 934.2763157894736, "grad_norm": 1.5086578130722046, "learning_rate": 0.0001, "loss": 0.0116, "step": 142010 }, { "epoch": 934.3421052631579, "grad_norm": 1.257354736328125, "learning_rate": 0.0001, "loss": 0.0124, "step": 142020 }, { "epoch": 934.4078947368421, "grad_norm": 1.4444516897201538, "learning_rate": 0.0001, "loss": 0.0156, "step": 142030 }, { "epoch": 934.4736842105264, "grad_norm": 1.1941909790039062, "learning_rate": 0.0001, "loss": 0.0122, "step": 142040 }, { "epoch": 934.5394736842105, "grad_norm": 1.5200201272964478, "learning_rate": 0.0001, "loss": 0.0144, "step": 142050 }, { "epoch": 934.6052631578947, "grad_norm": 1.4314849376678467, "learning_rate": 0.0001, "loss": 0.014, "step": 142060 }, { "epoch": 934.671052631579, "grad_norm": 1.2724697589874268, "learning_rate": 0.0001, "loss": 0.0097, "step": 142070 }, { "epoch": 934.7368421052631, "grad_norm": 1.53291916847229, "learning_rate": 0.0001, "loss": 0.0165, "step": 142080 }, { "epoch": 934.8026315789474, "grad_norm": 1.8280553817749023, "learning_rate": 0.0001, "loss": 0.0149, "step": 142090 }, { "epoch": 934.8684210526316, "grad_norm": 1.480710506439209, "learning_rate": 0.0001, "loss": 0.0115, "step": 142100 }, { "epoch": 934.9342105263158, "grad_norm": 1.3061007261276245, "learning_rate": 0.0001, "loss": 0.0103, "step": 142110 }, { "epoch": 935.0, "grad_norm": 1.6194288730621338, "learning_rate": 0.0001, "loss": 0.0122, "step": 142120 }, { "epoch": 935.0657894736842, "grad_norm": 1.3964086771011353, "learning_rate": 0.0001, "loss": 0.0123, "step": 142130 }, { "epoch": 935.1315789473684, "grad_norm": 1.805552363395691, "learning_rate": 0.0001, "loss": 0.0118, "step": 142140 }, { "epoch": 935.1973684210526, "grad_norm": 1.8745197057724, "learning_rate": 0.0001, "loss": 0.0122, "step": 142150 }, { "epoch": 935.2631578947369, "grad_norm": 1.4629144668579102, "learning_rate": 0.0001, "loss": 0.0135, "step": 142160 }, { "epoch": 935.328947368421, "grad_norm": 1.151237964630127, "learning_rate": 0.0001, "loss": 0.012, "step": 142170 }, { "epoch": 935.3947368421053, "grad_norm": 1.5426056385040283, "learning_rate": 0.0001, "loss": 0.0127, "step": 142180 }, { "epoch": 935.4605263157895, "grad_norm": 1.4828308820724487, "learning_rate": 0.0001, "loss": 0.0142, "step": 142190 }, { "epoch": 935.5263157894736, "grad_norm": 1.2218953371047974, "learning_rate": 0.0001, "loss": 0.015, "step": 142200 }, { "epoch": 935.5921052631579, "grad_norm": 1.393984317779541, "learning_rate": 0.0001, "loss": 0.0133, "step": 142210 }, { "epoch": 935.6578947368421, "grad_norm": 1.353838324546814, "learning_rate": 0.0001, "loss": 0.0106, "step": 142220 }, { "epoch": 935.7236842105264, "grad_norm": 1.35507333278656, "learning_rate": 0.0001, "loss": 0.0122, "step": 142230 }, { "epoch": 935.7894736842105, "grad_norm": 1.2441374063491821, "learning_rate": 0.0001, "loss": 0.0157, "step": 142240 }, { "epoch": 935.8552631578947, "grad_norm": 1.6730870008468628, "learning_rate": 0.0001, "loss": 0.0146, "step": 142250 }, { "epoch": 935.921052631579, "grad_norm": 1.4806852340698242, "learning_rate": 0.0001, "loss": 0.0107, "step": 142260 }, { "epoch": 935.9868421052631, "grad_norm": 1.90867018699646, "learning_rate": 0.0001, "loss": 0.0136, "step": 142270 }, { "epoch": 936.0526315789474, "grad_norm": 1.5950438976287842, "learning_rate": 0.0001, "loss": 0.012, "step": 142280 }, { "epoch": 936.1184210526316, "grad_norm": 1.601334810256958, "learning_rate": 0.0001, "loss": 0.0109, "step": 142290 }, { "epoch": 936.1842105263158, "grad_norm": 1.3305855989456177, "learning_rate": 0.0001, "loss": 0.0091, "step": 142300 }, { "epoch": 936.25, "grad_norm": 1.6058878898620605, "learning_rate": 0.0001, "loss": 0.0111, "step": 142310 }, { "epoch": 936.3157894736842, "grad_norm": 1.342387318611145, "learning_rate": 0.0001, "loss": 0.0132, "step": 142320 }, { "epoch": 936.3815789473684, "grad_norm": 1.3538199663162231, "learning_rate": 0.0001, "loss": 0.0137, "step": 142330 }, { "epoch": 936.4473684210526, "grad_norm": 1.6003621816635132, "learning_rate": 0.0001, "loss": 0.0157, "step": 142340 }, { "epoch": 936.5131578947369, "grad_norm": 1.2866758108139038, "learning_rate": 0.0001, "loss": 0.0158, "step": 142350 }, { "epoch": 936.578947368421, "grad_norm": 1.5813109874725342, "learning_rate": 0.0001, "loss": 0.0116, "step": 142360 }, { "epoch": 936.6447368421053, "grad_norm": 1.3714712858200073, "learning_rate": 0.0001, "loss": 0.0148, "step": 142370 }, { "epoch": 936.7105263157895, "grad_norm": 1.719346046447754, "learning_rate": 0.0001, "loss": 0.0131, "step": 142380 }, { "epoch": 936.7763157894736, "grad_norm": 1.220705270767212, "learning_rate": 0.0001, "loss": 0.0141, "step": 142390 }, { "epoch": 936.8421052631579, "grad_norm": 1.4808167219161987, "learning_rate": 0.0001, "loss": 0.0143, "step": 142400 }, { "epoch": 936.9078947368421, "grad_norm": 1.0775530338287354, "learning_rate": 0.0001, "loss": 0.0122, "step": 142410 }, { "epoch": 936.9736842105264, "grad_norm": 1.405487060546875, "learning_rate": 0.0001, "loss": 0.0102, "step": 142420 }, { "epoch": 937.0394736842105, "grad_norm": 1.5948731899261475, "learning_rate": 0.0001, "loss": 0.0134, "step": 142430 }, { "epoch": 937.1052631578947, "grad_norm": 1.0740313529968262, "learning_rate": 0.0001, "loss": 0.0116, "step": 142440 }, { "epoch": 937.171052631579, "grad_norm": 1.1838213205337524, "learning_rate": 0.0001, "loss": 0.0144, "step": 142450 }, { "epoch": 937.2368421052631, "grad_norm": 1.1072313785552979, "learning_rate": 0.0001, "loss": 0.0148, "step": 142460 }, { "epoch": 937.3026315789474, "grad_norm": 1.3354060649871826, "learning_rate": 0.0001, "loss": 0.0128, "step": 142470 }, { "epoch": 937.3684210526316, "grad_norm": 1.3944811820983887, "learning_rate": 0.0001, "loss": 0.0096, "step": 142480 }, { "epoch": 937.4342105263158, "grad_norm": 1.3308361768722534, "learning_rate": 0.0001, "loss": 0.0112, "step": 142490 }, { "epoch": 937.5, "grad_norm": 1.5977680683135986, "learning_rate": 0.0001, "loss": 0.0119, "step": 142500 }, { "epoch": 937.5657894736842, "grad_norm": 1.5698448419570923, "learning_rate": 0.0001, "loss": 0.0127, "step": 142510 }, { "epoch": 937.6315789473684, "grad_norm": 1.5888651609420776, "learning_rate": 0.0001, "loss": 0.012, "step": 142520 }, { "epoch": 937.6973684210526, "grad_norm": 1.3004058599472046, "learning_rate": 0.0001, "loss": 0.0119, "step": 142530 }, { "epoch": 937.7631578947369, "grad_norm": 1.5981618165969849, "learning_rate": 0.0001, "loss": 0.0154, "step": 142540 }, { "epoch": 937.828947368421, "grad_norm": 1.626956820487976, "learning_rate": 0.0001, "loss": 0.013, "step": 142550 }, { "epoch": 937.8947368421053, "grad_norm": 1.6659276485443115, "learning_rate": 0.0001, "loss": 0.014, "step": 142560 }, { "epoch": 937.9605263157895, "grad_norm": 1.7948960065841675, "learning_rate": 0.0001, "loss": 0.0121, "step": 142570 }, { "epoch": 938.0263157894736, "grad_norm": 1.5478763580322266, "learning_rate": 0.0001, "loss": 0.0132, "step": 142580 }, { "epoch": 938.0921052631579, "grad_norm": 1.7184059619903564, "learning_rate": 0.0001, "loss": 0.0092, "step": 142590 }, { "epoch": 938.1578947368421, "grad_norm": 1.9129999876022339, "learning_rate": 0.0001, "loss": 0.0117, "step": 142600 }, { "epoch": 938.2236842105264, "grad_norm": 1.6790847778320312, "learning_rate": 0.0001, "loss": 0.0123, "step": 142610 }, { "epoch": 938.2894736842105, "grad_norm": 1.6907256841659546, "learning_rate": 0.0001, "loss": 0.0121, "step": 142620 }, { "epoch": 938.3552631578947, "grad_norm": 1.2637823820114136, "learning_rate": 0.0001, "loss": 0.0121, "step": 142630 }, { "epoch": 938.421052631579, "grad_norm": 1.4729118347167969, "learning_rate": 0.0001, "loss": 0.0138, "step": 142640 }, { "epoch": 938.4868421052631, "grad_norm": 1.211166501045227, "learning_rate": 0.0001, "loss": 0.0122, "step": 142650 }, { "epoch": 938.5526315789474, "grad_norm": 1.3932963609695435, "learning_rate": 0.0001, "loss": 0.0117, "step": 142660 }, { "epoch": 938.6184210526316, "grad_norm": 1.4409351348876953, "learning_rate": 0.0001, "loss": 0.0176, "step": 142670 }, { "epoch": 938.6842105263158, "grad_norm": 1.7204813957214355, "learning_rate": 0.0001, "loss": 0.0104, "step": 142680 }, { "epoch": 938.75, "grad_norm": 1.1596821546554565, "learning_rate": 0.0001, "loss": 0.0104, "step": 142690 }, { "epoch": 938.8157894736842, "grad_norm": 1.24693763256073, "learning_rate": 0.0001, "loss": 0.0122, "step": 142700 }, { "epoch": 938.8815789473684, "grad_norm": 1.307036280632019, "learning_rate": 0.0001, "loss": 0.0152, "step": 142710 }, { "epoch": 938.9473684210526, "grad_norm": 1.429268479347229, "learning_rate": 0.0001, "loss": 0.0141, "step": 142720 }, { "epoch": 939.0131578947369, "grad_norm": 1.4425063133239746, "learning_rate": 0.0001, "loss": 0.013, "step": 142730 }, { "epoch": 939.078947368421, "grad_norm": 1.1702826023101807, "learning_rate": 0.0001, "loss": 0.013, "step": 142740 }, { "epoch": 939.1447368421053, "grad_norm": 1.3890191316604614, "learning_rate": 0.0001, "loss": 0.0121, "step": 142750 }, { "epoch": 939.2105263157895, "grad_norm": 1.415703296661377, "learning_rate": 0.0001, "loss": 0.0163, "step": 142760 }, { "epoch": 939.2763157894736, "grad_norm": 1.0454028844833374, "learning_rate": 0.0001, "loss": 0.0111, "step": 142770 }, { "epoch": 939.3421052631579, "grad_norm": 1.1426876783370972, "learning_rate": 0.0001, "loss": 0.0113, "step": 142780 }, { "epoch": 939.4078947368421, "grad_norm": 1.0766730308532715, "learning_rate": 0.0001, "loss": 0.0142, "step": 142790 }, { "epoch": 939.4736842105264, "grad_norm": 1.5364714860916138, "learning_rate": 0.0001, "loss": 0.013, "step": 142800 }, { "epoch": 939.5394736842105, "grad_norm": 1.1485539674758911, "learning_rate": 0.0001, "loss": 0.0137, "step": 142810 }, { "epoch": 939.6052631578947, "grad_norm": 1.9935736656188965, "learning_rate": 0.0001, "loss": 0.0179, "step": 142820 }, { "epoch": 939.671052631579, "grad_norm": 1.5685068368911743, "learning_rate": 0.0001, "loss": 0.0108, "step": 142830 }, { "epoch": 939.7368421052631, "grad_norm": 1.736770749092102, "learning_rate": 0.0001, "loss": 0.0097, "step": 142840 }, { "epoch": 939.8026315789474, "grad_norm": 1.2837681770324707, "learning_rate": 0.0001, "loss": 0.0131, "step": 142850 }, { "epoch": 939.8684210526316, "grad_norm": 1.1016682386398315, "learning_rate": 0.0001, "loss": 0.0118, "step": 142860 }, { "epoch": 939.9342105263158, "grad_norm": 1.5285701751708984, "learning_rate": 0.0001, "loss": 0.0098, "step": 142870 }, { "epoch": 940.0, "grad_norm": 1.4347033500671387, "learning_rate": 0.0001, "loss": 0.0143, "step": 142880 }, { "epoch": 940.0657894736842, "grad_norm": 1.4728671312332153, "learning_rate": 0.0001, "loss": 0.0121, "step": 142890 }, { "epoch": 940.1315789473684, "grad_norm": 1.3725216388702393, "learning_rate": 0.0001, "loss": 0.011, "step": 142900 }, { "epoch": 940.1973684210526, "grad_norm": 1.4424808025360107, "learning_rate": 0.0001, "loss": 0.0122, "step": 142910 }, { "epoch": 940.2631578947369, "grad_norm": 1.5270509719848633, "learning_rate": 0.0001, "loss": 0.0117, "step": 142920 }, { "epoch": 940.328947368421, "grad_norm": 1.1576521396636963, "learning_rate": 0.0001, "loss": 0.0131, "step": 142930 }, { "epoch": 940.3947368421053, "grad_norm": 1.4317865371704102, "learning_rate": 0.0001, "loss": 0.0133, "step": 142940 }, { "epoch": 940.4605263157895, "grad_norm": 1.6862213611602783, "learning_rate": 0.0001, "loss": 0.0106, "step": 142950 }, { "epoch": 940.5263157894736, "grad_norm": 1.473896861076355, "learning_rate": 0.0001, "loss": 0.014, "step": 142960 }, { "epoch": 940.5921052631579, "grad_norm": 1.825333833694458, "learning_rate": 0.0001, "loss": 0.0109, "step": 142970 }, { "epoch": 940.6578947368421, "grad_norm": 1.521349310874939, "learning_rate": 0.0001, "loss": 0.0127, "step": 142980 }, { "epoch": 940.7236842105264, "grad_norm": 1.3264482021331787, "learning_rate": 0.0001, "loss": 0.0105, "step": 142990 }, { "epoch": 940.7894736842105, "grad_norm": 1.8943246603012085, "learning_rate": 0.0001, "loss": 0.0135, "step": 143000 }, { "epoch": 940.8552631578947, "grad_norm": 1.5507373809814453, "learning_rate": 0.0001, "loss": 0.0174, "step": 143010 }, { "epoch": 940.921052631579, "grad_norm": 1.6063262224197388, "learning_rate": 0.0001, "loss": 0.0131, "step": 143020 }, { "epoch": 940.9868421052631, "grad_norm": 1.2471667528152466, "learning_rate": 0.0001, "loss": 0.0143, "step": 143030 }, { "epoch": 941.0526315789474, "grad_norm": 1.6128287315368652, "learning_rate": 0.0001, "loss": 0.014, "step": 143040 }, { "epoch": 941.1184210526316, "grad_norm": 1.5207875967025757, "learning_rate": 0.0001, "loss": 0.0137, "step": 143050 }, { "epoch": 941.1842105263158, "grad_norm": 1.127669334411621, "learning_rate": 0.0001, "loss": 0.0123, "step": 143060 }, { "epoch": 941.25, "grad_norm": 1.3232378959655762, "learning_rate": 0.0001, "loss": 0.0148, "step": 143070 }, { "epoch": 941.3157894736842, "grad_norm": 1.5373835563659668, "learning_rate": 0.0001, "loss": 0.0099, "step": 143080 }, { "epoch": 941.3815789473684, "grad_norm": 1.0874077081680298, "learning_rate": 0.0001, "loss": 0.0171, "step": 143090 }, { "epoch": 941.4473684210526, "grad_norm": 1.7526788711547852, "learning_rate": 0.0001, "loss": 0.0128, "step": 143100 }, { "epoch": 941.5131578947369, "grad_norm": 1.6757112741470337, "learning_rate": 0.0001, "loss": 0.0102, "step": 143110 }, { "epoch": 941.578947368421, "grad_norm": 1.1240613460540771, "learning_rate": 0.0001, "loss": 0.012, "step": 143120 }, { "epoch": 941.6447368421053, "grad_norm": 1.295961856842041, "learning_rate": 0.0001, "loss": 0.0127, "step": 143130 }, { "epoch": 941.7105263157895, "grad_norm": 1.3748321533203125, "learning_rate": 0.0001, "loss": 0.0112, "step": 143140 }, { "epoch": 941.7763157894736, "grad_norm": 1.3531829118728638, "learning_rate": 0.0001, "loss": 0.0109, "step": 143150 }, { "epoch": 941.8421052631579, "grad_norm": 1.5042628049850464, "learning_rate": 0.0001, "loss": 0.0154, "step": 143160 }, { "epoch": 941.9078947368421, "grad_norm": 1.4157888889312744, "learning_rate": 0.0001, "loss": 0.0139, "step": 143170 }, { "epoch": 941.9736842105264, "grad_norm": 1.4722492694854736, "learning_rate": 0.0001, "loss": 0.0116, "step": 143180 }, { "epoch": 942.0394736842105, "grad_norm": 1.4252771139144897, "learning_rate": 0.0001, "loss": 0.0104, "step": 143190 }, { "epoch": 942.1052631578947, "grad_norm": 1.5617536306381226, "learning_rate": 0.0001, "loss": 0.0097, "step": 143200 }, { "epoch": 942.171052631579, "grad_norm": 1.9337931871414185, "learning_rate": 0.0001, "loss": 0.0124, "step": 143210 }, { "epoch": 942.2368421052631, "grad_norm": 1.6294033527374268, "learning_rate": 0.0001, "loss": 0.0107, "step": 143220 }, { "epoch": 942.3026315789474, "grad_norm": 1.7949275970458984, "learning_rate": 0.0001, "loss": 0.015, "step": 143230 }, { "epoch": 942.3684210526316, "grad_norm": 1.7420669794082642, "learning_rate": 0.0001, "loss": 0.0122, "step": 143240 }, { "epoch": 942.4342105263158, "grad_norm": 1.3666108846664429, "learning_rate": 0.0001, "loss": 0.0117, "step": 143250 }, { "epoch": 942.5, "grad_norm": 1.6398895978927612, "learning_rate": 0.0001, "loss": 0.0126, "step": 143260 }, { "epoch": 942.5657894736842, "grad_norm": 1.8092608451843262, "learning_rate": 0.0001, "loss": 0.0113, "step": 143270 }, { "epoch": 942.6315789473684, "grad_norm": 1.7967216968536377, "learning_rate": 0.0001, "loss": 0.0113, "step": 143280 }, { "epoch": 942.6973684210526, "grad_norm": 1.4965492486953735, "learning_rate": 0.0001, "loss": 0.0123, "step": 143290 }, { "epoch": 942.7631578947369, "grad_norm": 1.3182657957077026, "learning_rate": 0.0001, "loss": 0.0145, "step": 143300 }, { "epoch": 942.828947368421, "grad_norm": 1.2108184099197388, "learning_rate": 0.0001, "loss": 0.0132, "step": 143310 }, { "epoch": 942.8947368421053, "grad_norm": 1.6284220218658447, "learning_rate": 0.0001, "loss": 0.0144, "step": 143320 }, { "epoch": 942.9605263157895, "grad_norm": 1.536023736000061, "learning_rate": 0.0001, "loss": 0.0133, "step": 143330 }, { "epoch": 943.0263157894736, "grad_norm": 1.6681175231933594, "learning_rate": 0.0001, "loss": 0.0177, "step": 143340 }, { "epoch": 943.0921052631579, "grad_norm": 1.4800498485565186, "learning_rate": 0.0001, "loss": 0.0114, "step": 143350 }, { "epoch": 943.1578947368421, "grad_norm": 1.4893473386764526, "learning_rate": 0.0001, "loss": 0.0106, "step": 143360 }, { "epoch": 943.2236842105264, "grad_norm": 1.9233239889144897, "learning_rate": 0.0001, "loss": 0.0105, "step": 143370 }, { "epoch": 943.2894736842105, "grad_norm": 1.5304410457611084, "learning_rate": 0.0001, "loss": 0.0109, "step": 143380 }, { "epoch": 943.3552631578947, "grad_norm": 2.3179738521575928, "learning_rate": 0.0001, "loss": 0.0107, "step": 143390 }, { "epoch": 943.421052631579, "grad_norm": 2.1960482597351074, "learning_rate": 0.0001, "loss": 0.0165, "step": 143400 }, { "epoch": 943.4868421052631, "grad_norm": 1.3325834274291992, "learning_rate": 0.0001, "loss": 0.0108, "step": 143410 }, { "epoch": 943.5526315789474, "grad_norm": 1.677672266960144, "learning_rate": 0.0001, "loss": 0.0151, "step": 143420 }, { "epoch": 943.6184210526316, "grad_norm": 1.687840461730957, "learning_rate": 0.0001, "loss": 0.0158, "step": 143430 }, { "epoch": 943.6842105263158, "grad_norm": 1.7811003923416138, "learning_rate": 0.0001, "loss": 0.0155, "step": 143440 }, { "epoch": 943.75, "grad_norm": 1.7852048873901367, "learning_rate": 0.0001, "loss": 0.0111, "step": 143450 }, { "epoch": 943.8157894736842, "grad_norm": 1.610106348991394, "learning_rate": 0.0001, "loss": 0.0147, "step": 143460 }, { "epoch": 943.8815789473684, "grad_norm": 1.5656158924102783, "learning_rate": 0.0001, "loss": 0.0109, "step": 143470 }, { "epoch": 943.9473684210526, "grad_norm": 1.6237123012542725, "learning_rate": 0.0001, "loss": 0.0119, "step": 143480 }, { "epoch": 944.0131578947369, "grad_norm": 1.3824607133865356, "learning_rate": 0.0001, "loss": 0.0128, "step": 143490 }, { "epoch": 944.078947368421, "grad_norm": 1.7143323421478271, "learning_rate": 0.0001, "loss": 0.015, "step": 143500 }, { "epoch": 944.1447368421053, "grad_norm": 1.2250030040740967, "learning_rate": 0.0001, "loss": 0.0101, "step": 143510 }, { "epoch": 944.2105263157895, "grad_norm": 1.6763790845870972, "learning_rate": 0.0001, "loss": 0.0123, "step": 143520 }, { "epoch": 944.2763157894736, "grad_norm": 1.683943748474121, "learning_rate": 0.0001, "loss": 0.012, "step": 143530 }, { "epoch": 944.3421052631579, "grad_norm": 1.4590421915054321, "learning_rate": 0.0001, "loss": 0.013, "step": 143540 }, { "epoch": 944.4078947368421, "grad_norm": 1.7539594173431396, "learning_rate": 0.0001, "loss": 0.0168, "step": 143550 }, { "epoch": 944.4736842105264, "grad_norm": 1.1736458539962769, "learning_rate": 0.0001, "loss": 0.0119, "step": 143560 }, { "epoch": 944.5394736842105, "grad_norm": 1.3157012462615967, "learning_rate": 0.0001, "loss": 0.0134, "step": 143570 }, { "epoch": 944.6052631578947, "grad_norm": 1.2335083484649658, "learning_rate": 0.0001, "loss": 0.015, "step": 143580 }, { "epoch": 944.671052631579, "grad_norm": 1.308646321296692, "learning_rate": 0.0001, "loss": 0.0113, "step": 143590 }, { "epoch": 944.7368421052631, "grad_norm": 1.2457836866378784, "learning_rate": 0.0001, "loss": 0.0135, "step": 143600 }, { "epoch": 944.8026315789474, "grad_norm": 1.2897307872772217, "learning_rate": 0.0001, "loss": 0.0131, "step": 143610 }, { "epoch": 944.8684210526316, "grad_norm": 1.2018862962722778, "learning_rate": 0.0001, "loss": 0.0098, "step": 143620 }, { "epoch": 944.9342105263158, "grad_norm": 1.3597670793533325, "learning_rate": 0.0001, "loss": 0.0132, "step": 143630 }, { "epoch": 945.0, "grad_norm": 1.859485149383545, "learning_rate": 0.0001, "loss": 0.0104, "step": 143640 }, { "epoch": 945.0657894736842, "grad_norm": 1.7388427257537842, "learning_rate": 0.0001, "loss": 0.0092, "step": 143650 }, { "epoch": 945.1315789473684, "grad_norm": 1.303515911102295, "learning_rate": 0.0001, "loss": 0.0114, "step": 143660 }, { "epoch": 945.1973684210526, "grad_norm": 0.9242700338363647, "learning_rate": 0.0001, "loss": 0.0164, "step": 143670 }, { "epoch": 945.2631578947369, "grad_norm": 1.5926463603973389, "learning_rate": 0.0001, "loss": 0.013, "step": 143680 }, { "epoch": 945.328947368421, "grad_norm": 1.188036561012268, "learning_rate": 0.0001, "loss": 0.0155, "step": 143690 }, { "epoch": 945.3947368421053, "grad_norm": 1.8620529174804688, "learning_rate": 0.0001, "loss": 0.0121, "step": 143700 }, { "epoch": 945.4605263157895, "grad_norm": 1.776767373085022, "learning_rate": 0.0001, "loss": 0.0111, "step": 143710 }, { "epoch": 945.5263157894736, "grad_norm": 1.845749020576477, "learning_rate": 0.0001, "loss": 0.0132, "step": 143720 }, { "epoch": 945.5921052631579, "grad_norm": 1.4647225141525269, "learning_rate": 0.0001, "loss": 0.0105, "step": 143730 }, { "epoch": 945.6578947368421, "grad_norm": 1.5426726341247559, "learning_rate": 0.0001, "loss": 0.0121, "step": 143740 }, { "epoch": 945.7236842105264, "grad_norm": 1.1208350658416748, "learning_rate": 0.0001, "loss": 0.0143, "step": 143750 }, { "epoch": 945.7894736842105, "grad_norm": 1.4940259456634521, "learning_rate": 0.0001, "loss": 0.0096, "step": 143760 }, { "epoch": 945.8552631578947, "grad_norm": 1.7078272104263306, "learning_rate": 0.0001, "loss": 0.0131, "step": 143770 }, { "epoch": 945.921052631579, "grad_norm": 1.3273991346359253, "learning_rate": 0.0001, "loss": 0.0157, "step": 143780 }, { "epoch": 945.9868421052631, "grad_norm": 1.400191068649292, "learning_rate": 0.0001, "loss": 0.0136, "step": 143790 }, { "epoch": 946.0526315789474, "grad_norm": 1.6773120164871216, "learning_rate": 0.0001, "loss": 0.0117, "step": 143800 }, { "epoch": 946.1184210526316, "grad_norm": 1.6568883657455444, "learning_rate": 0.0001, "loss": 0.013, "step": 143810 }, { "epoch": 946.1842105263158, "grad_norm": 1.2729883193969727, "learning_rate": 0.0001, "loss": 0.0128, "step": 143820 }, { "epoch": 946.25, "grad_norm": 1.1144345998764038, "learning_rate": 0.0001, "loss": 0.0109, "step": 143830 }, { "epoch": 946.3157894736842, "grad_norm": 1.2792203426361084, "learning_rate": 0.0001, "loss": 0.0175, "step": 143840 }, { "epoch": 946.3815789473684, "grad_norm": 1.751556158065796, "learning_rate": 0.0001, "loss": 0.0131, "step": 143850 }, { "epoch": 946.4473684210526, "grad_norm": 1.3958170413970947, "learning_rate": 0.0001, "loss": 0.0131, "step": 143860 }, { "epoch": 946.5131578947369, "grad_norm": 1.5868499279022217, "learning_rate": 0.0001, "loss": 0.0138, "step": 143870 }, { "epoch": 946.578947368421, "grad_norm": 1.8744982481002808, "learning_rate": 0.0001, "loss": 0.0093, "step": 143880 }, { "epoch": 946.6447368421053, "grad_norm": 1.5942676067352295, "learning_rate": 0.0001, "loss": 0.0126, "step": 143890 }, { "epoch": 946.7105263157895, "grad_norm": 1.7196407318115234, "learning_rate": 0.0001, "loss": 0.0136, "step": 143900 }, { "epoch": 946.7763157894736, "grad_norm": 1.1515663862228394, "learning_rate": 0.0001, "loss": 0.0114, "step": 143910 }, { "epoch": 946.8421052631579, "grad_norm": 1.2266743183135986, "learning_rate": 0.0001, "loss": 0.0146, "step": 143920 }, { "epoch": 946.9078947368421, "grad_norm": 1.7339338064193726, "learning_rate": 0.0001, "loss": 0.0135, "step": 143930 }, { "epoch": 946.9736842105264, "grad_norm": 1.2800790071487427, "learning_rate": 0.0001, "loss": 0.0122, "step": 143940 }, { "epoch": 947.0394736842105, "grad_norm": 1.6817286014556885, "learning_rate": 0.0001, "loss": 0.0115, "step": 143950 }, { "epoch": 947.1052631578947, "grad_norm": 1.3232215642929077, "learning_rate": 0.0001, "loss": 0.011, "step": 143960 }, { "epoch": 947.171052631579, "grad_norm": 1.6168769598007202, "learning_rate": 0.0001, "loss": 0.0129, "step": 143970 }, { "epoch": 947.2368421052631, "grad_norm": 1.2187737226486206, "learning_rate": 0.0001, "loss": 0.0118, "step": 143980 }, { "epoch": 947.3026315789474, "grad_norm": 1.709048867225647, "learning_rate": 0.0001, "loss": 0.0145, "step": 143990 }, { "epoch": 947.3684210526316, "grad_norm": 1.7222493886947632, "learning_rate": 0.0001, "loss": 0.0112, "step": 144000 }, { "epoch": 947.4342105263158, "grad_norm": 1.4299023151397705, "learning_rate": 0.0001, "loss": 0.0124, "step": 144010 }, { "epoch": 947.5, "grad_norm": 1.8092154264450073, "learning_rate": 0.0001, "loss": 0.012, "step": 144020 }, { "epoch": 947.5657894736842, "grad_norm": 1.3097598552703857, "learning_rate": 0.0001, "loss": 0.0118, "step": 144030 }, { "epoch": 947.6315789473684, "grad_norm": 1.5132373571395874, "learning_rate": 0.0001, "loss": 0.011, "step": 144040 }, { "epoch": 947.6973684210526, "grad_norm": 1.9871983528137207, "learning_rate": 0.0001, "loss": 0.0156, "step": 144050 }, { "epoch": 947.7631578947369, "grad_norm": 1.487114667892456, "learning_rate": 0.0001, "loss": 0.016, "step": 144060 }, { "epoch": 947.828947368421, "grad_norm": 2.0508594512939453, "learning_rate": 0.0001, "loss": 0.0111, "step": 144070 }, { "epoch": 947.8947368421053, "grad_norm": 2.003190279006958, "learning_rate": 0.0001, "loss": 0.0148, "step": 144080 }, { "epoch": 947.9605263157895, "grad_norm": 1.3370134830474854, "learning_rate": 0.0001, "loss": 0.0126, "step": 144090 }, { "epoch": 948.0263157894736, "grad_norm": 1.2263221740722656, "learning_rate": 0.0001, "loss": 0.0091, "step": 144100 }, { "epoch": 948.0921052631579, "grad_norm": 1.3517407178878784, "learning_rate": 0.0001, "loss": 0.0092, "step": 144110 }, { "epoch": 948.1578947368421, "grad_norm": 1.5721372365951538, "learning_rate": 0.0001, "loss": 0.0111, "step": 144120 }, { "epoch": 948.2236842105264, "grad_norm": 1.418186902999878, "learning_rate": 0.0001, "loss": 0.0151, "step": 144130 }, { "epoch": 948.2894736842105, "grad_norm": 0.9587410688400269, "learning_rate": 0.0001, "loss": 0.0143, "step": 144140 }, { "epoch": 948.3552631578947, "grad_norm": 1.379968523979187, "learning_rate": 0.0001, "loss": 0.0109, "step": 144150 }, { "epoch": 948.421052631579, "grad_norm": 1.3250998258590698, "learning_rate": 0.0001, "loss": 0.0161, "step": 144160 }, { "epoch": 948.4868421052631, "grad_norm": 1.4317200183868408, "learning_rate": 0.0001, "loss": 0.0108, "step": 144170 }, { "epoch": 948.5526315789474, "grad_norm": 1.513695240020752, "learning_rate": 0.0001, "loss": 0.0121, "step": 144180 }, { "epoch": 948.6184210526316, "grad_norm": 1.3731719255447388, "learning_rate": 0.0001, "loss": 0.0119, "step": 144190 }, { "epoch": 948.6842105263158, "grad_norm": 1.4426262378692627, "learning_rate": 0.0001, "loss": 0.0146, "step": 144200 }, { "epoch": 948.75, "grad_norm": 1.617365837097168, "learning_rate": 0.0001, "loss": 0.014, "step": 144210 }, { "epoch": 948.8157894736842, "grad_norm": 1.6515859365463257, "learning_rate": 0.0001, "loss": 0.0125, "step": 144220 }, { "epoch": 948.8815789473684, "grad_norm": 1.0057196617126465, "learning_rate": 0.0001, "loss": 0.0132, "step": 144230 }, { "epoch": 948.9473684210526, "grad_norm": 1.423248529434204, "learning_rate": 0.0001, "loss": 0.0158, "step": 144240 }, { "epoch": 949.0131578947369, "grad_norm": 1.6734216213226318, "learning_rate": 0.0001, "loss": 0.0136, "step": 144250 }, { "epoch": 949.078947368421, "grad_norm": 1.4063547849655151, "learning_rate": 0.0001, "loss": 0.0094, "step": 144260 }, { "epoch": 949.1447368421053, "grad_norm": 1.102011799812317, "learning_rate": 0.0001, "loss": 0.014, "step": 144270 }, { "epoch": 949.2105263157895, "grad_norm": 1.5956454277038574, "learning_rate": 0.0001, "loss": 0.0112, "step": 144280 }, { "epoch": 949.2763157894736, "grad_norm": 1.4580620527267456, "learning_rate": 0.0001, "loss": 0.0097, "step": 144290 }, { "epoch": 949.3421052631579, "grad_norm": 1.9934154748916626, "learning_rate": 0.0001, "loss": 0.0112, "step": 144300 }, { "epoch": 949.4078947368421, "grad_norm": 1.672674298286438, "learning_rate": 0.0001, "loss": 0.0106, "step": 144310 }, { "epoch": 949.4736842105264, "grad_norm": 1.4748916625976562, "learning_rate": 0.0001, "loss": 0.0106, "step": 144320 }, { "epoch": 949.5394736842105, "grad_norm": 1.8999953269958496, "learning_rate": 0.0001, "loss": 0.0165, "step": 144330 }, { "epoch": 949.6052631578947, "grad_norm": 1.4446938037872314, "learning_rate": 0.0001, "loss": 0.013, "step": 144340 }, { "epoch": 949.671052631579, "grad_norm": 0.9010093808174133, "learning_rate": 0.0001, "loss": 0.0107, "step": 144350 }, { "epoch": 949.7368421052631, "grad_norm": 1.2622226476669312, "learning_rate": 0.0001, "loss": 0.0123, "step": 144360 }, { "epoch": 949.8026315789474, "grad_norm": 1.3343383073806763, "learning_rate": 0.0001, "loss": 0.0162, "step": 144370 }, { "epoch": 949.8684210526316, "grad_norm": 1.1784496307373047, "learning_rate": 0.0001, "loss": 0.0174, "step": 144380 }, { "epoch": 949.9342105263158, "grad_norm": 1.3936336040496826, "learning_rate": 0.0001, "loss": 0.0107, "step": 144390 }, { "epoch": 950.0, "grad_norm": 1.337624430656433, "learning_rate": 0.0001, "loss": 0.0124, "step": 144400 }, { "epoch": 950.0657894736842, "grad_norm": 1.4488574266433716, "learning_rate": 0.0001, "loss": 0.0127, "step": 144410 }, { "epoch": 950.1315789473684, "grad_norm": 1.416540265083313, "learning_rate": 0.0001, "loss": 0.0168, "step": 144420 }, { "epoch": 950.1973684210526, "grad_norm": 1.3935821056365967, "learning_rate": 0.0001, "loss": 0.015, "step": 144430 }, { "epoch": 950.2631578947369, "grad_norm": 1.3549907207489014, "learning_rate": 0.0001, "loss": 0.0113, "step": 144440 }, { "epoch": 950.328947368421, "grad_norm": 1.7179275751113892, "learning_rate": 0.0001, "loss": 0.0114, "step": 144450 }, { "epoch": 950.3947368421053, "grad_norm": 1.5958245992660522, "learning_rate": 0.0001, "loss": 0.0142, "step": 144460 }, { "epoch": 950.4605263157895, "grad_norm": 1.4015578031539917, "learning_rate": 0.0001, "loss": 0.0129, "step": 144470 }, { "epoch": 950.5263157894736, "grad_norm": 1.825136661529541, "learning_rate": 0.0001, "loss": 0.0108, "step": 144480 }, { "epoch": 950.5921052631579, "grad_norm": 1.1952670812606812, "learning_rate": 0.0001, "loss": 0.0108, "step": 144490 }, { "epoch": 950.6578947368421, "grad_norm": 1.1748610734939575, "learning_rate": 0.0001, "loss": 0.0103, "step": 144500 }, { "epoch": 950.7236842105264, "grad_norm": 1.770904779434204, "learning_rate": 0.0001, "loss": 0.0107, "step": 144510 }, { "epoch": 950.7894736842105, "grad_norm": 1.9536930322647095, "learning_rate": 0.0001, "loss": 0.0131, "step": 144520 }, { "epoch": 950.8552631578947, "grad_norm": 1.2646310329437256, "learning_rate": 0.0001, "loss": 0.012, "step": 144530 }, { "epoch": 950.921052631579, "grad_norm": 1.567901372909546, "learning_rate": 0.0001, "loss": 0.0144, "step": 144540 }, { "epoch": 950.9868421052631, "grad_norm": 1.4253491163253784, "learning_rate": 0.0001, "loss": 0.0145, "step": 144550 }, { "epoch": 951.0526315789474, "grad_norm": 1.6365058422088623, "learning_rate": 0.0001, "loss": 0.0114, "step": 144560 }, { "epoch": 951.1184210526316, "grad_norm": 1.1850507259368896, "learning_rate": 0.0001, "loss": 0.0152, "step": 144570 }, { "epoch": 951.1842105263158, "grad_norm": 1.20968759059906, "learning_rate": 0.0001, "loss": 0.0148, "step": 144580 }, { "epoch": 951.25, "grad_norm": 1.5817043781280518, "learning_rate": 0.0001, "loss": 0.012, "step": 144590 }, { "epoch": 951.3157894736842, "grad_norm": 1.7622101306915283, "learning_rate": 0.0001, "loss": 0.0112, "step": 144600 }, { "epoch": 951.3815789473684, "grad_norm": 1.4524427652359009, "learning_rate": 0.0001, "loss": 0.0117, "step": 144610 }, { "epoch": 951.4473684210526, "grad_norm": 1.1648727655410767, "learning_rate": 0.0001, "loss": 0.0098, "step": 144620 }, { "epoch": 951.5131578947369, "grad_norm": 1.7303866147994995, "learning_rate": 0.0001, "loss": 0.0134, "step": 144630 }, { "epoch": 951.578947368421, "grad_norm": 1.799951195716858, "learning_rate": 0.0001, "loss": 0.0135, "step": 144640 }, { "epoch": 951.6447368421053, "grad_norm": 1.3391791582107544, "learning_rate": 0.0001, "loss": 0.0152, "step": 144650 }, { "epoch": 951.7105263157895, "grad_norm": 1.3566992282867432, "learning_rate": 0.0001, "loss": 0.0124, "step": 144660 }, { "epoch": 951.7763157894736, "grad_norm": 1.187279224395752, "learning_rate": 0.0001, "loss": 0.0165, "step": 144670 }, { "epoch": 951.8421052631579, "grad_norm": 1.2211791276931763, "learning_rate": 0.0001, "loss": 0.0093, "step": 144680 }, { "epoch": 951.9078947368421, "grad_norm": 1.6981370449066162, "learning_rate": 0.0001, "loss": 0.012, "step": 144690 }, { "epoch": 951.9736842105264, "grad_norm": 1.8051636219024658, "learning_rate": 0.0001, "loss": 0.015, "step": 144700 }, { "epoch": 952.0394736842105, "grad_norm": 1.5342509746551514, "learning_rate": 0.0001, "loss": 0.0109, "step": 144710 }, { "epoch": 952.1052631578947, "grad_norm": 1.5157448053359985, "learning_rate": 0.0001, "loss": 0.0126, "step": 144720 }, { "epoch": 952.171052631579, "grad_norm": 1.4711228609085083, "learning_rate": 0.0001, "loss": 0.0136, "step": 144730 }, { "epoch": 952.2368421052631, "grad_norm": 0.8827593922615051, "learning_rate": 0.0001, "loss": 0.0097, "step": 144740 }, { "epoch": 952.3026315789474, "grad_norm": 1.273924469947815, "learning_rate": 0.0001, "loss": 0.0157, "step": 144750 }, { "epoch": 952.3684210526316, "grad_norm": 1.365013837814331, "learning_rate": 0.0001, "loss": 0.0115, "step": 144760 }, { "epoch": 952.4342105263158, "grad_norm": 1.6369978189468384, "learning_rate": 0.0001, "loss": 0.0126, "step": 144770 }, { "epoch": 952.5, "grad_norm": 1.8425822257995605, "learning_rate": 0.0001, "loss": 0.0118, "step": 144780 }, { "epoch": 952.5657894736842, "grad_norm": 1.4679661989212036, "learning_rate": 0.0001, "loss": 0.0126, "step": 144790 }, { "epoch": 952.6315789473684, "grad_norm": 0.9240910410881042, "learning_rate": 0.0001, "loss": 0.0156, "step": 144800 }, { "epoch": 952.6973684210526, "grad_norm": 1.0614218711853027, "learning_rate": 0.0001, "loss": 0.0125, "step": 144810 }, { "epoch": 952.7631578947369, "grad_norm": 1.4719860553741455, "learning_rate": 0.0001, "loss": 0.0125, "step": 144820 }, { "epoch": 952.828947368421, "grad_norm": 1.415068507194519, "learning_rate": 0.0001, "loss": 0.0151, "step": 144830 }, { "epoch": 952.8947368421053, "grad_norm": 1.702304482460022, "learning_rate": 0.0001, "loss": 0.0103, "step": 144840 }, { "epoch": 952.9605263157895, "grad_norm": 1.1662315130233765, "learning_rate": 0.0001, "loss": 0.0152, "step": 144850 }, { "epoch": 953.0263157894736, "grad_norm": 1.5275951623916626, "learning_rate": 0.0001, "loss": 0.0128, "step": 144860 }, { "epoch": 953.0921052631579, "grad_norm": 1.8739529848098755, "learning_rate": 0.0001, "loss": 0.0091, "step": 144870 }, { "epoch": 953.1578947368421, "grad_norm": 1.7075833082199097, "learning_rate": 0.0001, "loss": 0.0102, "step": 144880 }, { "epoch": 953.2236842105264, "grad_norm": 1.3376739025115967, "learning_rate": 0.0001, "loss": 0.0173, "step": 144890 }, { "epoch": 953.2894736842105, "grad_norm": 1.3498797416687012, "learning_rate": 0.0001, "loss": 0.0122, "step": 144900 }, { "epoch": 953.3552631578947, "grad_norm": 1.0870524644851685, "learning_rate": 0.0001, "loss": 0.0127, "step": 144910 }, { "epoch": 953.421052631579, "grad_norm": 1.723137617111206, "learning_rate": 0.0001, "loss": 0.0138, "step": 144920 }, { "epoch": 953.4868421052631, "grad_norm": 1.0866833925247192, "learning_rate": 0.0001, "loss": 0.0172, "step": 144930 }, { "epoch": 953.5526315789474, "grad_norm": 1.1602250337600708, "learning_rate": 0.0001, "loss": 0.0095, "step": 144940 }, { "epoch": 953.6184210526316, "grad_norm": 0.9500905871391296, "learning_rate": 0.0001, "loss": 0.0094, "step": 144950 }, { "epoch": 953.6842105263158, "grad_norm": 1.0570287704467773, "learning_rate": 0.0001, "loss": 0.0146, "step": 144960 }, { "epoch": 953.75, "grad_norm": 1.3297175168991089, "learning_rate": 0.0001, "loss": 0.0108, "step": 144970 }, { "epoch": 953.8157894736842, "grad_norm": 1.29165780544281, "learning_rate": 0.0001, "loss": 0.0138, "step": 144980 }, { "epoch": 953.8815789473684, "grad_norm": 1.283292531967163, "learning_rate": 0.0001, "loss": 0.0141, "step": 144990 }, { "epoch": 953.9473684210526, "grad_norm": 1.3549120426177979, "learning_rate": 0.0001, "loss": 0.0143, "step": 145000 }, { "epoch": 954.0131578947369, "grad_norm": 1.3875700235366821, "learning_rate": 0.0001, "loss": 0.0153, "step": 145010 }, { "epoch": 954.078947368421, "grad_norm": 1.2770893573760986, "learning_rate": 0.0001, "loss": 0.0172, "step": 145020 }, { "epoch": 954.1447368421053, "grad_norm": 1.6285204887390137, "learning_rate": 0.0001, "loss": 0.0116, "step": 145030 }, { "epoch": 954.2105263157895, "grad_norm": 1.372247576713562, "learning_rate": 0.0001, "loss": 0.0188, "step": 145040 }, { "epoch": 954.2763157894736, "grad_norm": 1.4712305068969727, "learning_rate": 0.0001, "loss": 0.0116, "step": 145050 }, { "epoch": 954.3421052631579, "grad_norm": 1.5197123289108276, "learning_rate": 0.0001, "loss": 0.0131, "step": 145060 }, { "epoch": 954.4078947368421, "grad_norm": 1.2523831129074097, "learning_rate": 0.0001, "loss": 0.0146, "step": 145070 }, { "epoch": 954.4736842105264, "grad_norm": 1.7410987615585327, "learning_rate": 0.0001, "loss": 0.0122, "step": 145080 }, { "epoch": 954.5394736842105, "grad_norm": 1.6944526433944702, "learning_rate": 0.0001, "loss": 0.0132, "step": 145090 }, { "epoch": 954.6052631578947, "grad_norm": 1.084529161453247, "learning_rate": 0.0001, "loss": 0.0105, "step": 145100 }, { "epoch": 954.671052631579, "grad_norm": 1.1032780408859253, "learning_rate": 0.0001, "loss": 0.0098, "step": 145110 }, { "epoch": 954.7368421052631, "grad_norm": 1.1970444917678833, "learning_rate": 0.0001, "loss": 0.0121, "step": 145120 }, { "epoch": 954.8026315789474, "grad_norm": 1.647812008857727, "learning_rate": 0.0001, "loss": 0.0157, "step": 145130 }, { "epoch": 954.8684210526316, "grad_norm": 1.2446496486663818, "learning_rate": 0.0001, "loss": 0.0102, "step": 145140 }, { "epoch": 954.9342105263158, "grad_norm": 1.5534170866012573, "learning_rate": 0.0001, "loss": 0.0128, "step": 145150 }, { "epoch": 955.0, "grad_norm": 1.6952259540557861, "learning_rate": 0.0001, "loss": 0.0116, "step": 145160 }, { "epoch": 955.0657894736842, "grad_norm": 1.5540090799331665, "learning_rate": 0.0001, "loss": 0.0131, "step": 145170 }, { "epoch": 955.1315789473684, "grad_norm": 1.7242704629898071, "learning_rate": 0.0001, "loss": 0.0127, "step": 145180 }, { "epoch": 955.1973684210526, "grad_norm": 1.2982877492904663, "learning_rate": 0.0001, "loss": 0.0107, "step": 145190 }, { "epoch": 955.2631578947369, "grad_norm": 1.328855276107788, "learning_rate": 0.0001, "loss": 0.0132, "step": 145200 }, { "epoch": 955.328947368421, "grad_norm": 1.3164223432540894, "learning_rate": 0.0001, "loss": 0.0113, "step": 145210 }, { "epoch": 955.3947368421053, "grad_norm": 1.4932812452316284, "learning_rate": 0.0001, "loss": 0.0124, "step": 145220 }, { "epoch": 955.4605263157895, "grad_norm": 1.3652372360229492, "learning_rate": 0.0001, "loss": 0.0139, "step": 145230 }, { "epoch": 955.5263157894736, "grad_norm": 1.3231068849563599, "learning_rate": 0.0001, "loss": 0.0151, "step": 145240 }, { "epoch": 955.5921052631579, "grad_norm": 1.2710057497024536, "learning_rate": 0.0001, "loss": 0.0172, "step": 145250 }, { "epoch": 955.6578947368421, "grad_norm": 1.1268020868301392, "learning_rate": 0.0001, "loss": 0.0099, "step": 145260 }, { "epoch": 955.7236842105264, "grad_norm": 0.977324366569519, "learning_rate": 0.0001, "loss": 0.0153, "step": 145270 }, { "epoch": 955.7894736842105, "grad_norm": 1.4563744068145752, "learning_rate": 0.0001, "loss": 0.0125, "step": 145280 }, { "epoch": 955.8552631578947, "grad_norm": 1.7069534063339233, "learning_rate": 0.0001, "loss": 0.0128, "step": 145290 }, { "epoch": 955.921052631579, "grad_norm": 1.6860591173171997, "learning_rate": 0.0001, "loss": 0.0125, "step": 145300 }, { "epoch": 955.9868421052631, "grad_norm": 1.3755966424942017, "learning_rate": 0.0001, "loss": 0.0119, "step": 145310 }, { "epoch": 956.0526315789474, "grad_norm": 1.669144630432129, "learning_rate": 0.0001, "loss": 0.012, "step": 145320 }, { "epoch": 956.1184210526316, "grad_norm": 1.478506326675415, "learning_rate": 0.0001, "loss": 0.0151, "step": 145330 }, { "epoch": 956.1842105263158, "grad_norm": 1.8350815773010254, "learning_rate": 0.0001, "loss": 0.0106, "step": 145340 }, { "epoch": 956.25, "grad_norm": 1.5976841449737549, "learning_rate": 0.0001, "loss": 0.012, "step": 145350 }, { "epoch": 956.3157894736842, "grad_norm": 1.4955472946166992, "learning_rate": 0.0001, "loss": 0.0127, "step": 145360 }, { "epoch": 956.3815789473684, "grad_norm": 1.406859278678894, "learning_rate": 0.0001, "loss": 0.0154, "step": 145370 }, { "epoch": 956.4473684210526, "grad_norm": 1.0477579832077026, "learning_rate": 0.0001, "loss": 0.0111, "step": 145380 }, { "epoch": 956.5131578947369, "grad_norm": 1.3253393173217773, "learning_rate": 0.0001, "loss": 0.0116, "step": 145390 }, { "epoch": 956.578947368421, "grad_norm": 1.3341525793075562, "learning_rate": 0.0001, "loss": 0.0167, "step": 145400 }, { "epoch": 956.6447368421053, "grad_norm": 1.6217737197875977, "learning_rate": 0.0001, "loss": 0.0109, "step": 145410 }, { "epoch": 956.7105263157895, "grad_norm": 1.1204935312271118, "learning_rate": 0.0001, "loss": 0.0113, "step": 145420 }, { "epoch": 956.7763157894736, "grad_norm": 1.1048858165740967, "learning_rate": 0.0001, "loss": 0.0131, "step": 145430 }, { "epoch": 956.8421052631579, "grad_norm": 1.6052756309509277, "learning_rate": 0.0001, "loss": 0.0115, "step": 145440 }, { "epoch": 956.9078947368421, "grad_norm": 1.673340082168579, "learning_rate": 0.0001, "loss": 0.0127, "step": 145450 }, { "epoch": 956.9736842105264, "grad_norm": 1.6135447025299072, "learning_rate": 0.0001, "loss": 0.0147, "step": 145460 }, { "epoch": 957.0394736842105, "grad_norm": 1.237823247909546, "learning_rate": 0.0001, "loss": 0.0129, "step": 145470 }, { "epoch": 957.1052631578947, "grad_norm": 1.7634410858154297, "learning_rate": 0.0001, "loss": 0.0113, "step": 145480 }, { "epoch": 957.171052631579, "grad_norm": 1.6129812002182007, "learning_rate": 0.0001, "loss": 0.0118, "step": 145490 }, { "epoch": 957.2368421052631, "grad_norm": 1.0390609502792358, "learning_rate": 0.0001, "loss": 0.012, "step": 145500 }, { "epoch": 957.3026315789474, "grad_norm": 1.5541448593139648, "learning_rate": 0.0001, "loss": 0.0114, "step": 145510 }, { "epoch": 957.3684210526316, "grad_norm": 1.79496431350708, "learning_rate": 0.0001, "loss": 0.0118, "step": 145520 }, { "epoch": 957.4342105263158, "grad_norm": 1.237593173980713, "learning_rate": 0.0001, "loss": 0.011, "step": 145530 }, { "epoch": 957.5, "grad_norm": 1.5593042373657227, "learning_rate": 0.0001, "loss": 0.0111, "step": 145540 }, { "epoch": 957.5657894736842, "grad_norm": 1.525202751159668, "learning_rate": 0.0001, "loss": 0.0143, "step": 145550 }, { "epoch": 957.6315789473684, "grad_norm": 1.446565866470337, "learning_rate": 0.0001, "loss": 0.0129, "step": 145560 }, { "epoch": 957.6973684210526, "grad_norm": 1.4043258428573608, "learning_rate": 0.0001, "loss": 0.0144, "step": 145570 }, { "epoch": 957.7631578947369, "grad_norm": 1.3251537084579468, "learning_rate": 0.0001, "loss": 0.0116, "step": 145580 }, { "epoch": 957.828947368421, "grad_norm": 1.8778581619262695, "learning_rate": 0.0001, "loss": 0.0181, "step": 145590 }, { "epoch": 957.8947368421053, "grad_norm": 1.765238642692566, "learning_rate": 0.0001, "loss": 0.0129, "step": 145600 }, { "epoch": 957.9605263157895, "grad_norm": 1.7529053688049316, "learning_rate": 0.0001, "loss": 0.0105, "step": 145610 }, { "epoch": 958.0263157894736, "grad_norm": 1.9110678434371948, "learning_rate": 0.0001, "loss": 0.0175, "step": 145620 }, { "epoch": 958.0921052631579, "grad_norm": 1.3961327075958252, "learning_rate": 0.0001, "loss": 0.0103, "step": 145630 }, { "epoch": 958.1578947368421, "grad_norm": 1.1564886569976807, "learning_rate": 0.0001, "loss": 0.0143, "step": 145640 }, { "epoch": 958.2236842105264, "grad_norm": 1.4513537883758545, "learning_rate": 0.0001, "loss": 0.0145, "step": 145650 }, { "epoch": 958.2894736842105, "grad_norm": 1.2860608100891113, "learning_rate": 0.0001, "loss": 0.0107, "step": 145660 }, { "epoch": 958.3552631578947, "grad_norm": 1.5611672401428223, "learning_rate": 0.0001, "loss": 0.0113, "step": 145670 }, { "epoch": 958.421052631579, "grad_norm": 0.8927909731864929, "learning_rate": 0.0001, "loss": 0.0108, "step": 145680 }, { "epoch": 958.4868421052631, "grad_norm": 1.2326154708862305, "learning_rate": 0.0001, "loss": 0.0117, "step": 145690 }, { "epoch": 958.5526315789474, "grad_norm": 1.4309897422790527, "learning_rate": 0.0001, "loss": 0.0146, "step": 145700 }, { "epoch": 958.6184210526316, "grad_norm": 1.3762943744659424, "learning_rate": 0.0001, "loss": 0.0128, "step": 145710 }, { "epoch": 958.6842105263158, "grad_norm": 1.5198594331741333, "learning_rate": 0.0001, "loss": 0.0146, "step": 145720 }, { "epoch": 958.75, "grad_norm": 1.1636725664138794, "learning_rate": 0.0001, "loss": 0.0101, "step": 145730 }, { "epoch": 958.8157894736842, "grad_norm": 1.1536329984664917, "learning_rate": 0.0001, "loss": 0.0153, "step": 145740 }, { "epoch": 958.8815789473684, "grad_norm": 1.4075701236724854, "learning_rate": 0.0001, "loss": 0.0195, "step": 145750 }, { "epoch": 958.9473684210526, "grad_norm": 1.3107317686080933, "learning_rate": 0.0001, "loss": 0.0119, "step": 145760 }, { "epoch": 959.0131578947369, "grad_norm": 1.4035686254501343, "learning_rate": 0.0001, "loss": 0.0089, "step": 145770 }, { "epoch": 959.078947368421, "grad_norm": 1.8461743593215942, "learning_rate": 0.0001, "loss": 0.0125, "step": 145780 }, { "epoch": 959.1447368421053, "grad_norm": 1.7626031637191772, "learning_rate": 0.0001, "loss": 0.0129, "step": 145790 }, { "epoch": 959.2105263157895, "grad_norm": 1.5684274435043335, "learning_rate": 0.0001, "loss": 0.0127, "step": 145800 }, { "epoch": 959.2763157894736, "grad_norm": 0.7678616046905518, "learning_rate": 0.0001, "loss": 0.0157, "step": 145810 }, { "epoch": 959.3421052631579, "grad_norm": 1.1576056480407715, "learning_rate": 0.0001, "loss": 0.0161, "step": 145820 }, { "epoch": 959.4078947368421, "grad_norm": 1.4541449546813965, "learning_rate": 0.0001, "loss": 0.01, "step": 145830 }, { "epoch": 959.4736842105264, "grad_norm": 1.1155459880828857, "learning_rate": 0.0001, "loss": 0.0117, "step": 145840 }, { "epoch": 959.5394736842105, "grad_norm": 1.6566085815429688, "learning_rate": 0.0001, "loss": 0.0162, "step": 145850 }, { "epoch": 959.6052631578947, "grad_norm": 1.9134405851364136, "learning_rate": 0.0001, "loss": 0.0151, "step": 145860 }, { "epoch": 959.671052631579, "grad_norm": 1.1450986862182617, "learning_rate": 0.0001, "loss": 0.0122, "step": 145870 }, { "epoch": 959.7368421052631, "grad_norm": 1.9663121700286865, "learning_rate": 0.0001, "loss": 0.01, "step": 145880 }, { "epoch": 959.8026315789474, "grad_norm": 1.3300862312316895, "learning_rate": 0.0001, "loss": 0.0153, "step": 145890 }, { "epoch": 959.8684210526316, "grad_norm": 1.6087288856506348, "learning_rate": 0.0001, "loss": 0.0111, "step": 145900 }, { "epoch": 959.9342105263158, "grad_norm": 1.5065524578094482, "learning_rate": 0.0001, "loss": 0.0098, "step": 145910 }, { "epoch": 960.0, "grad_norm": 1.5808082818984985, "learning_rate": 0.0001, "loss": 0.0134, "step": 145920 }, { "epoch": 960.0657894736842, "grad_norm": 1.9430251121520996, "learning_rate": 0.0001, "loss": 0.0136, "step": 145930 }, { "epoch": 960.1315789473684, "grad_norm": 1.2913750410079956, "learning_rate": 0.0001, "loss": 0.0142, "step": 145940 }, { "epoch": 960.1973684210526, "grad_norm": 1.473825454711914, "learning_rate": 0.0001, "loss": 0.0144, "step": 145950 }, { "epoch": 960.2631578947369, "grad_norm": 1.444980263710022, "learning_rate": 0.0001, "loss": 0.0117, "step": 145960 }, { "epoch": 960.328947368421, "grad_norm": 2.0332744121551514, "learning_rate": 0.0001, "loss": 0.0115, "step": 145970 }, { "epoch": 960.3947368421053, "grad_norm": 1.503909707069397, "learning_rate": 0.0001, "loss": 0.0161, "step": 145980 }, { "epoch": 960.4605263157895, "grad_norm": 1.628747820854187, "learning_rate": 0.0001, "loss": 0.0115, "step": 145990 }, { "epoch": 960.5263157894736, "grad_norm": 1.500073790550232, "learning_rate": 0.0001, "loss": 0.0109, "step": 146000 }, { "epoch": 960.5921052631579, "grad_norm": 1.3012508153915405, "learning_rate": 0.0001, "loss": 0.0096, "step": 146010 }, { "epoch": 960.6578947368421, "grad_norm": 1.4325915575027466, "learning_rate": 0.0001, "loss": 0.0118, "step": 146020 }, { "epoch": 960.7236842105264, "grad_norm": 1.5121066570281982, "learning_rate": 0.0001, "loss": 0.0115, "step": 146030 }, { "epoch": 960.7894736842105, "grad_norm": 1.6552338600158691, "learning_rate": 0.0001, "loss": 0.0126, "step": 146040 }, { "epoch": 960.8552631578947, "grad_norm": 1.7799111604690552, "learning_rate": 0.0001, "loss": 0.0119, "step": 146050 }, { "epoch": 960.921052631579, "grad_norm": 1.4746809005737305, "learning_rate": 0.0001, "loss": 0.0152, "step": 146060 }, { "epoch": 960.9868421052631, "grad_norm": 1.55177640914917, "learning_rate": 0.0001, "loss": 0.0123, "step": 146070 }, { "epoch": 961.0526315789474, "grad_norm": 1.7075321674346924, "learning_rate": 0.0001, "loss": 0.0123, "step": 146080 }, { "epoch": 961.1184210526316, "grad_norm": 1.4229788780212402, "learning_rate": 0.0001, "loss": 0.0123, "step": 146090 }, { "epoch": 961.1842105263158, "grad_norm": 1.150720477104187, "learning_rate": 0.0001, "loss": 0.0142, "step": 146100 }, { "epoch": 961.25, "grad_norm": 1.5681869983673096, "learning_rate": 0.0001, "loss": 0.0154, "step": 146110 }, { "epoch": 961.3157894736842, "grad_norm": 1.1834959983825684, "learning_rate": 0.0001, "loss": 0.0113, "step": 146120 }, { "epoch": 961.3815789473684, "grad_norm": 1.3831743001937866, "learning_rate": 0.0001, "loss": 0.0111, "step": 146130 }, { "epoch": 961.4473684210526, "grad_norm": 1.3153448104858398, "learning_rate": 0.0001, "loss": 0.0138, "step": 146140 }, { "epoch": 961.5131578947369, "grad_norm": 1.4852346181869507, "learning_rate": 0.0001, "loss": 0.014, "step": 146150 }, { "epoch": 961.578947368421, "grad_norm": 1.662226915359497, "learning_rate": 0.0001, "loss": 0.0106, "step": 146160 }, { "epoch": 961.6447368421053, "grad_norm": 1.852256178855896, "learning_rate": 0.0001, "loss": 0.0147, "step": 146170 }, { "epoch": 961.7105263157895, "grad_norm": 1.5282212495803833, "learning_rate": 0.0001, "loss": 0.0127, "step": 146180 }, { "epoch": 961.7763157894736, "grad_norm": 1.1737781763076782, "learning_rate": 0.0001, "loss": 0.0142, "step": 146190 }, { "epoch": 961.8421052631579, "grad_norm": 1.8943148851394653, "learning_rate": 0.0001, "loss": 0.0101, "step": 146200 }, { "epoch": 961.9078947368421, "grad_norm": 2.8926284313201904, "learning_rate": 0.0001, "loss": 0.011, "step": 146210 }, { "epoch": 961.9736842105264, "grad_norm": 1.3079416751861572, "learning_rate": 0.0001, "loss": 0.011, "step": 146220 }, { "epoch": 962.0394736842105, "grad_norm": 1.5680485963821411, "learning_rate": 0.0001, "loss": 0.016, "step": 146230 }, { "epoch": 962.1052631578947, "grad_norm": 1.711248755455017, "learning_rate": 0.0001, "loss": 0.0161, "step": 146240 }, { "epoch": 962.171052631579, "grad_norm": 1.401138186454773, "learning_rate": 0.0001, "loss": 0.0149, "step": 146250 }, { "epoch": 962.2368421052631, "grad_norm": 1.432957410812378, "learning_rate": 0.0001, "loss": 0.01, "step": 146260 }, { "epoch": 962.3026315789474, "grad_norm": 1.302053451538086, "learning_rate": 0.0001, "loss": 0.0094, "step": 146270 }, { "epoch": 962.3684210526316, "grad_norm": 1.7854537963867188, "learning_rate": 0.0001, "loss": 0.0114, "step": 146280 }, { "epoch": 962.4342105263158, "grad_norm": 1.461837887763977, "learning_rate": 0.0001, "loss": 0.0125, "step": 146290 }, { "epoch": 962.5, "grad_norm": 1.472184419631958, "learning_rate": 0.0001, "loss": 0.0111, "step": 146300 }, { "epoch": 962.5657894736842, "grad_norm": 1.409206509590149, "learning_rate": 0.0001, "loss": 0.0126, "step": 146310 }, { "epoch": 962.6315789473684, "grad_norm": 1.425111174583435, "learning_rate": 0.0001, "loss": 0.0132, "step": 146320 }, { "epoch": 962.6973684210526, "grad_norm": 1.834800124168396, "learning_rate": 0.0001, "loss": 0.0113, "step": 146330 }, { "epoch": 962.7631578947369, "grad_norm": 1.7083640098571777, "learning_rate": 0.0001, "loss": 0.0164, "step": 146340 }, { "epoch": 962.828947368421, "grad_norm": 1.6246857643127441, "learning_rate": 0.0001, "loss": 0.012, "step": 146350 }, { "epoch": 962.8947368421053, "grad_norm": 1.7152533531188965, "learning_rate": 0.0001, "loss": 0.0092, "step": 146360 }, { "epoch": 962.9605263157895, "grad_norm": 1.1464489698410034, "learning_rate": 0.0001, "loss": 0.0097, "step": 146370 }, { "epoch": 963.0263157894736, "grad_norm": 1.7645777463912964, "learning_rate": 0.0001, "loss": 0.0127, "step": 146380 }, { "epoch": 963.0921052631579, "grad_norm": 1.2153825759887695, "learning_rate": 0.0001, "loss": 0.0128, "step": 146390 }, { "epoch": 963.1578947368421, "grad_norm": 1.2603603601455688, "learning_rate": 0.0001, "loss": 0.0145, "step": 146400 }, { "epoch": 963.2236842105264, "grad_norm": 1.3645339012145996, "learning_rate": 0.0001, "loss": 0.0112, "step": 146410 }, { "epoch": 963.2894736842105, "grad_norm": 1.1454076766967773, "learning_rate": 0.0001, "loss": 0.0115, "step": 146420 }, { "epoch": 963.3552631578947, "grad_norm": 1.9815889596939087, "learning_rate": 0.0001, "loss": 0.0153, "step": 146430 }, { "epoch": 963.421052631579, "grad_norm": 1.2753314971923828, "learning_rate": 0.0001, "loss": 0.0102, "step": 146440 }, { "epoch": 963.4868421052631, "grad_norm": 1.7454370260238647, "learning_rate": 0.0001, "loss": 0.0108, "step": 146450 }, { "epoch": 963.5526315789474, "grad_norm": 1.764730453491211, "learning_rate": 0.0001, "loss": 0.0118, "step": 146460 }, { "epoch": 963.6184210526316, "grad_norm": 1.2820197343826294, "learning_rate": 0.0001, "loss": 0.0096, "step": 146470 }, { "epoch": 963.6842105263158, "grad_norm": 1.4363720417022705, "learning_rate": 0.0001, "loss": 0.0114, "step": 146480 }, { "epoch": 963.75, "grad_norm": 1.4961787462234497, "learning_rate": 0.0001, "loss": 0.0167, "step": 146490 }, { "epoch": 963.8157894736842, "grad_norm": 1.5756654739379883, "learning_rate": 0.0001, "loss": 0.0089, "step": 146500 }, { "epoch": 963.8815789473684, "grad_norm": 1.646254062652588, "learning_rate": 0.0001, "loss": 0.015, "step": 146510 }, { "epoch": 963.9473684210526, "grad_norm": 1.5755784511566162, "learning_rate": 0.0001, "loss": 0.012, "step": 146520 }, { "epoch": 964.0131578947369, "grad_norm": 1.4199655055999756, "learning_rate": 0.0001, "loss": 0.0126, "step": 146530 }, { "epoch": 964.078947368421, "grad_norm": 1.4975237846374512, "learning_rate": 0.0001, "loss": 0.0144, "step": 146540 }, { "epoch": 964.1447368421053, "grad_norm": 1.7370744943618774, "learning_rate": 0.0001, "loss": 0.0104, "step": 146550 }, { "epoch": 964.2105263157895, "grad_norm": 1.5334609746932983, "learning_rate": 0.0001, "loss": 0.009, "step": 146560 }, { "epoch": 964.2763157894736, "grad_norm": 1.8282099962234497, "learning_rate": 0.0001, "loss": 0.0119, "step": 146570 }, { "epoch": 964.3421052631579, "grad_norm": 1.1261954307556152, "learning_rate": 0.0001, "loss": 0.0153, "step": 146580 }, { "epoch": 964.4078947368421, "grad_norm": 1.6709855794906616, "learning_rate": 0.0001, "loss": 0.0097, "step": 146590 }, { "epoch": 964.4736842105264, "grad_norm": 1.2392370700836182, "learning_rate": 0.0001, "loss": 0.01, "step": 146600 }, { "epoch": 964.5394736842105, "grad_norm": 1.430283546447754, "learning_rate": 0.0001, "loss": 0.012, "step": 146610 }, { "epoch": 964.6052631578947, "grad_norm": 1.796518325805664, "learning_rate": 0.0001, "loss": 0.012, "step": 146620 }, { "epoch": 964.671052631579, "grad_norm": 1.4250129461288452, "learning_rate": 0.0001, "loss": 0.0129, "step": 146630 }, { "epoch": 964.7368421052631, "grad_norm": 1.289918065071106, "learning_rate": 0.0001, "loss": 0.0115, "step": 146640 }, { "epoch": 964.8026315789474, "grad_norm": 1.3194031715393066, "learning_rate": 0.0001, "loss": 0.0105, "step": 146650 }, { "epoch": 964.8684210526316, "grad_norm": 1.1667028665542603, "learning_rate": 0.0001, "loss": 0.0181, "step": 146660 }, { "epoch": 964.9342105263158, "grad_norm": 1.6349010467529297, "learning_rate": 0.0001, "loss": 0.0136, "step": 146670 }, { "epoch": 965.0, "grad_norm": 1.4492801427841187, "learning_rate": 0.0001, "loss": 0.0147, "step": 146680 }, { "epoch": 965.0657894736842, "grad_norm": 1.7804207801818848, "learning_rate": 0.0001, "loss": 0.0115, "step": 146690 }, { "epoch": 965.1315789473684, "grad_norm": 1.6974762678146362, "learning_rate": 0.0001, "loss": 0.0118, "step": 146700 }, { "epoch": 965.1973684210526, "grad_norm": 1.674976110458374, "learning_rate": 0.0001, "loss": 0.0152, "step": 146710 }, { "epoch": 965.2631578947369, "grad_norm": 1.2790719270706177, "learning_rate": 0.0001, "loss": 0.0149, "step": 146720 }, { "epoch": 965.328947368421, "grad_norm": 1.5121515989303589, "learning_rate": 0.0001, "loss": 0.0111, "step": 146730 }, { "epoch": 965.3947368421053, "grad_norm": 1.2567799091339111, "learning_rate": 0.0001, "loss": 0.0157, "step": 146740 }, { "epoch": 965.4605263157895, "grad_norm": 1.1431355476379395, "learning_rate": 0.0001, "loss": 0.011, "step": 146750 }, { "epoch": 965.5263157894736, "grad_norm": 1.87212073802948, "learning_rate": 0.0001, "loss": 0.0133, "step": 146760 }, { "epoch": 965.5921052631579, "grad_norm": 1.5779833793640137, "learning_rate": 0.0001, "loss": 0.0099, "step": 146770 }, { "epoch": 965.6578947368421, "grad_norm": 2.415614366531372, "learning_rate": 0.0001, "loss": 0.0101, "step": 146780 }, { "epoch": 965.7236842105264, "grad_norm": 2.006774663925171, "learning_rate": 0.0001, "loss": 0.0118, "step": 146790 }, { "epoch": 965.7894736842105, "grad_norm": 1.3689771890640259, "learning_rate": 0.0001, "loss": 0.0133, "step": 146800 }, { "epoch": 965.8552631578947, "grad_norm": 1.3501091003417969, "learning_rate": 0.0001, "loss": 0.0147, "step": 146810 }, { "epoch": 965.921052631579, "grad_norm": 1.698964238166809, "learning_rate": 0.0001, "loss": 0.0142, "step": 146820 }, { "epoch": 965.9868421052631, "grad_norm": 1.6736857891082764, "learning_rate": 0.0001, "loss": 0.0111, "step": 146830 }, { "epoch": 966.0526315789474, "grad_norm": 1.3352298736572266, "learning_rate": 0.0001, "loss": 0.0111, "step": 146840 }, { "epoch": 966.1184210526316, "grad_norm": 1.4930994510650635, "learning_rate": 0.0001, "loss": 0.0159, "step": 146850 }, { "epoch": 966.1842105263158, "grad_norm": 1.532651662826538, "learning_rate": 0.0001, "loss": 0.0113, "step": 146860 }, { "epoch": 966.25, "grad_norm": 1.677344560623169, "learning_rate": 0.0001, "loss": 0.0099, "step": 146870 }, { "epoch": 966.3157894736842, "grad_norm": 0.9670716524124146, "learning_rate": 0.0001, "loss": 0.013, "step": 146880 }, { "epoch": 966.3815789473684, "grad_norm": 1.593184471130371, "learning_rate": 0.0001, "loss": 0.0177, "step": 146890 }, { "epoch": 966.4473684210526, "grad_norm": 1.3298667669296265, "learning_rate": 0.0001, "loss": 0.0122, "step": 146900 }, { "epoch": 966.5131578947369, "grad_norm": 1.322838306427002, "learning_rate": 0.0001, "loss": 0.0116, "step": 146910 }, { "epoch": 966.578947368421, "grad_norm": 1.648777961730957, "learning_rate": 0.0001, "loss": 0.0093, "step": 146920 }, { "epoch": 966.6447368421053, "grad_norm": 1.4365330934524536, "learning_rate": 0.0001, "loss": 0.0101, "step": 146930 }, { "epoch": 966.7105263157895, "grad_norm": 1.783467173576355, "learning_rate": 0.0001, "loss": 0.0094, "step": 146940 }, { "epoch": 966.7763157894736, "grad_norm": 1.3505505323410034, "learning_rate": 0.0001, "loss": 0.0121, "step": 146950 }, { "epoch": 966.8421052631579, "grad_norm": 1.447203516960144, "learning_rate": 0.0001, "loss": 0.014, "step": 146960 }, { "epoch": 966.9078947368421, "grad_norm": 1.2757216691970825, "learning_rate": 0.0001, "loss": 0.0118, "step": 146970 }, { "epoch": 966.9736842105264, "grad_norm": 1.5441949367523193, "learning_rate": 0.0001, "loss": 0.0162, "step": 146980 }, { "epoch": 967.0394736842105, "grad_norm": 1.5181576013565063, "learning_rate": 0.0001, "loss": 0.0182, "step": 146990 }, { "epoch": 967.1052631578947, "grad_norm": 1.607051134109497, "learning_rate": 0.0001, "loss": 0.012, "step": 147000 }, { "epoch": 967.171052631579, "grad_norm": 1.5414844751358032, "learning_rate": 0.0001, "loss": 0.0149, "step": 147010 }, { "epoch": 967.2368421052631, "grad_norm": 1.4863561391830444, "learning_rate": 0.0001, "loss": 0.0187, "step": 147020 }, { "epoch": 967.3026315789474, "grad_norm": 1.480576515197754, "learning_rate": 0.0001, "loss": 0.0113, "step": 147030 }, { "epoch": 967.3684210526316, "grad_norm": 1.2013987302780151, "learning_rate": 0.0001, "loss": 0.011, "step": 147040 }, { "epoch": 967.4342105263158, "grad_norm": 1.2304400205612183, "learning_rate": 0.0001, "loss": 0.0114, "step": 147050 }, { "epoch": 967.5, "grad_norm": 1.4754537343978882, "learning_rate": 0.0001, "loss": 0.0112, "step": 147060 }, { "epoch": 967.5657894736842, "grad_norm": 1.6374653577804565, "learning_rate": 0.0001, "loss": 0.0087, "step": 147070 }, { "epoch": 967.6315789473684, "grad_norm": 1.3144187927246094, "learning_rate": 0.0001, "loss": 0.0109, "step": 147080 }, { "epoch": 967.6973684210526, "grad_norm": 1.1635949611663818, "learning_rate": 0.0001, "loss": 0.0168, "step": 147090 }, { "epoch": 967.7631578947369, "grad_norm": 1.281469464302063, "learning_rate": 0.0001, "loss": 0.0148, "step": 147100 }, { "epoch": 967.828947368421, "grad_norm": 1.4628785848617554, "learning_rate": 0.0001, "loss": 0.0098, "step": 147110 }, { "epoch": 967.8947368421053, "grad_norm": 1.1609374284744263, "learning_rate": 0.0001, "loss": 0.0147, "step": 147120 }, { "epoch": 967.9605263157895, "grad_norm": 1.3046172857284546, "learning_rate": 0.0001, "loss": 0.0093, "step": 147130 }, { "epoch": 968.0263157894736, "grad_norm": 1.1413912773132324, "learning_rate": 0.0001, "loss": 0.0144, "step": 147140 }, { "epoch": 968.0921052631579, "grad_norm": 1.5787296295166016, "learning_rate": 0.0001, "loss": 0.0166, "step": 147150 }, { "epoch": 968.1578947368421, "grad_norm": 1.8189481496810913, "learning_rate": 0.0001, "loss": 0.0117, "step": 147160 }, { "epoch": 968.2236842105264, "grad_norm": 1.4441338777542114, "learning_rate": 0.0001, "loss": 0.0151, "step": 147170 }, { "epoch": 968.2894736842105, "grad_norm": 1.3795517683029175, "learning_rate": 0.0001, "loss": 0.011, "step": 147180 }, { "epoch": 968.3552631578947, "grad_norm": 1.445265531539917, "learning_rate": 0.0001, "loss": 0.0124, "step": 147190 }, { "epoch": 968.421052631579, "grad_norm": 1.4881938695907593, "learning_rate": 0.0001, "loss": 0.0108, "step": 147200 }, { "epoch": 968.4868421052631, "grad_norm": 1.770487666130066, "learning_rate": 0.0001, "loss": 0.0166, "step": 147210 }, { "epoch": 968.5526315789474, "grad_norm": 1.3956928253173828, "learning_rate": 0.0001, "loss": 0.013, "step": 147220 }, { "epoch": 968.6184210526316, "grad_norm": 1.3888128995895386, "learning_rate": 0.0001, "loss": 0.0107, "step": 147230 }, { "epoch": 968.6842105263158, "grad_norm": 1.2823011875152588, "learning_rate": 0.0001, "loss": 0.0115, "step": 147240 }, { "epoch": 968.75, "grad_norm": 1.3157142400741577, "learning_rate": 0.0001, "loss": 0.0111, "step": 147250 }, { "epoch": 968.8157894736842, "grad_norm": 1.1003183126449585, "learning_rate": 0.0001, "loss": 0.011, "step": 147260 }, { "epoch": 968.8815789473684, "grad_norm": 0.9868209958076477, "learning_rate": 0.0001, "loss": 0.0102, "step": 147270 }, { "epoch": 968.9473684210526, "grad_norm": 1.6370291709899902, "learning_rate": 0.0001, "loss": 0.0101, "step": 147280 }, { "epoch": 969.0131578947369, "grad_norm": 1.1752780675888062, "learning_rate": 0.0001, "loss": 0.0174, "step": 147290 }, { "epoch": 969.078947368421, "grad_norm": 1.4114341735839844, "learning_rate": 0.0001, "loss": 0.0099, "step": 147300 }, { "epoch": 969.1447368421053, "grad_norm": 1.7374705076217651, "learning_rate": 0.0001, "loss": 0.012, "step": 147310 }, { "epoch": 969.2105263157895, "grad_norm": 1.5393075942993164, "learning_rate": 0.0001, "loss": 0.0125, "step": 147320 }, { "epoch": 969.2763157894736, "grad_norm": 1.3518707752227783, "learning_rate": 0.0001, "loss": 0.0134, "step": 147330 }, { "epoch": 969.3421052631579, "grad_norm": 1.4987953901290894, "learning_rate": 0.0001, "loss": 0.0119, "step": 147340 }, { "epoch": 969.4078947368421, "grad_norm": 1.6200541257858276, "learning_rate": 0.0001, "loss": 0.0131, "step": 147350 }, { "epoch": 969.4736842105264, "grad_norm": 1.6995288133621216, "learning_rate": 0.0001, "loss": 0.015, "step": 147360 }, { "epoch": 969.5394736842105, "grad_norm": 1.8662109375, "learning_rate": 0.0001, "loss": 0.011, "step": 147370 }, { "epoch": 969.6052631578947, "grad_norm": 1.0755977630615234, "learning_rate": 0.0001, "loss": 0.0109, "step": 147380 }, { "epoch": 969.671052631579, "grad_norm": 1.3271609544754028, "learning_rate": 0.0001, "loss": 0.0128, "step": 147390 }, { "epoch": 969.7368421052631, "grad_norm": 1.553574800491333, "learning_rate": 0.0001, "loss": 0.0107, "step": 147400 }, { "epoch": 969.8026315789474, "grad_norm": 1.4803956747055054, "learning_rate": 0.0001, "loss": 0.0141, "step": 147410 }, { "epoch": 969.8684210526316, "grad_norm": 1.763496994972229, "learning_rate": 0.0001, "loss": 0.0158, "step": 147420 }, { "epoch": 969.9342105263158, "grad_norm": 1.5042250156402588, "learning_rate": 0.0001, "loss": 0.0123, "step": 147430 }, { "epoch": 970.0, "grad_norm": 1.3998404741287231, "learning_rate": 0.0001, "loss": 0.0166, "step": 147440 }, { "epoch": 970.0657894736842, "grad_norm": 1.3475927114486694, "learning_rate": 0.0001, "loss": 0.0161, "step": 147450 }, { "epoch": 970.1315789473684, "grad_norm": 1.3875066041946411, "learning_rate": 0.0001, "loss": 0.0122, "step": 147460 }, { "epoch": 970.1973684210526, "grad_norm": 1.1543000936508179, "learning_rate": 0.0001, "loss": 0.0151, "step": 147470 }, { "epoch": 970.2631578947369, "grad_norm": 1.498604655265808, "learning_rate": 0.0001, "loss": 0.0113, "step": 147480 }, { "epoch": 970.328947368421, "grad_norm": 1.7043155431747437, "learning_rate": 0.0001, "loss": 0.0117, "step": 147490 }, { "epoch": 970.3947368421053, "grad_norm": 1.3155089616775513, "learning_rate": 0.0001, "loss": 0.0123, "step": 147500 }, { "epoch": 970.4605263157895, "grad_norm": 1.3444044589996338, "learning_rate": 0.0001, "loss": 0.0109, "step": 147510 }, { "epoch": 970.5263157894736, "grad_norm": 1.5282385349273682, "learning_rate": 0.0001, "loss": 0.0111, "step": 147520 }, { "epoch": 970.5921052631579, "grad_norm": 0.9587247967720032, "learning_rate": 0.0001, "loss": 0.01, "step": 147530 }, { "epoch": 970.6578947368421, "grad_norm": 1.206623911857605, "learning_rate": 0.0001, "loss": 0.0093, "step": 147540 }, { "epoch": 970.7236842105264, "grad_norm": 1.6749485731124878, "learning_rate": 0.0001, "loss": 0.0146, "step": 147550 }, { "epoch": 970.7894736842105, "grad_norm": 1.4845036268234253, "learning_rate": 0.0001, "loss": 0.014, "step": 147560 }, { "epoch": 970.8552631578947, "grad_norm": 1.6600147485733032, "learning_rate": 0.0001, "loss": 0.0097, "step": 147570 }, { "epoch": 970.921052631579, "grad_norm": 1.5025229454040527, "learning_rate": 0.0001, "loss": 0.0156, "step": 147580 }, { "epoch": 970.9868421052631, "grad_norm": 1.4404798746109009, "learning_rate": 0.0001, "loss": 0.0127, "step": 147590 }, { "epoch": 971.0526315789474, "grad_norm": 1.226597547531128, "learning_rate": 0.0001, "loss": 0.0137, "step": 147600 }, { "epoch": 971.1184210526316, "grad_norm": 1.1608972549438477, "learning_rate": 0.0001, "loss": 0.0119, "step": 147610 }, { "epoch": 971.1842105263158, "grad_norm": 1.481155276298523, "learning_rate": 0.0001, "loss": 0.0096, "step": 147620 }, { "epoch": 971.25, "grad_norm": 1.2279810905456543, "learning_rate": 0.0001, "loss": 0.0089, "step": 147630 }, { "epoch": 971.3157894736842, "grad_norm": 1.4153274297714233, "learning_rate": 0.0001, "loss": 0.0136, "step": 147640 }, { "epoch": 971.3815789473684, "grad_norm": 1.5268696546554565, "learning_rate": 0.0001, "loss": 0.0113, "step": 147650 }, { "epoch": 971.4473684210526, "grad_norm": 1.9090111255645752, "learning_rate": 0.0001, "loss": 0.011, "step": 147660 }, { "epoch": 971.5131578947369, "grad_norm": 1.3774234056472778, "learning_rate": 0.0001, "loss": 0.0117, "step": 147670 }, { "epoch": 971.578947368421, "grad_norm": 1.52627694606781, "learning_rate": 0.0001, "loss": 0.0131, "step": 147680 }, { "epoch": 971.6447368421053, "grad_norm": 1.4807273149490356, "learning_rate": 0.0001, "loss": 0.0131, "step": 147690 }, { "epoch": 971.7105263157895, "grad_norm": 1.6417969465255737, "learning_rate": 0.0001, "loss": 0.0095, "step": 147700 }, { "epoch": 971.7763157894736, "grad_norm": 1.059058427810669, "learning_rate": 0.0001, "loss": 0.0177, "step": 147710 }, { "epoch": 971.8421052631579, "grad_norm": 1.4774737358093262, "learning_rate": 0.0001, "loss": 0.0129, "step": 147720 }, { "epoch": 971.9078947368421, "grad_norm": 1.7502398490905762, "learning_rate": 0.0001, "loss": 0.015, "step": 147730 }, { "epoch": 971.9736842105264, "grad_norm": 1.2715688943862915, "learning_rate": 0.0001, "loss": 0.0143, "step": 147740 }, { "epoch": 972.0394736842105, "grad_norm": 1.445356011390686, "learning_rate": 0.0001, "loss": 0.0154, "step": 147750 }, { "epoch": 972.1052631578947, "grad_norm": 1.6749519109725952, "learning_rate": 0.0001, "loss": 0.0117, "step": 147760 }, { "epoch": 972.171052631579, "grad_norm": 1.7593351602554321, "learning_rate": 0.0001, "loss": 0.0129, "step": 147770 }, { "epoch": 972.2368421052631, "grad_norm": 1.412654995918274, "learning_rate": 0.0001, "loss": 0.0122, "step": 147780 }, { "epoch": 972.3026315789474, "grad_norm": 1.6535223722457886, "learning_rate": 0.0001, "loss": 0.0118, "step": 147790 }, { "epoch": 972.3684210526316, "grad_norm": 1.3171342611312866, "learning_rate": 0.0001, "loss": 0.0139, "step": 147800 }, { "epoch": 972.4342105263158, "grad_norm": 1.3113056421279907, "learning_rate": 0.0001, "loss": 0.0092, "step": 147810 }, { "epoch": 972.5, "grad_norm": 1.5580596923828125, "learning_rate": 0.0001, "loss": 0.0155, "step": 147820 }, { "epoch": 972.5657894736842, "grad_norm": 1.2772572040557861, "learning_rate": 0.0001, "loss": 0.0127, "step": 147830 }, { "epoch": 972.6315789473684, "grad_norm": 1.8694837093353271, "learning_rate": 0.0001, "loss": 0.0096, "step": 147840 }, { "epoch": 972.6973684210526, "grad_norm": 1.4273405075073242, "learning_rate": 0.0001, "loss": 0.0107, "step": 147850 }, { "epoch": 972.7631578947369, "grad_norm": 1.05485999584198, "learning_rate": 0.0001, "loss": 0.0136, "step": 147860 }, { "epoch": 972.828947368421, "grad_norm": 1.2725838422775269, "learning_rate": 0.0001, "loss": 0.0106, "step": 147870 }, { "epoch": 972.8947368421053, "grad_norm": 1.5273832082748413, "learning_rate": 0.0001, "loss": 0.0154, "step": 147880 }, { "epoch": 972.9605263157895, "grad_norm": 1.802731990814209, "learning_rate": 0.0001, "loss": 0.0161, "step": 147890 }, { "epoch": 973.0263157894736, "grad_norm": 2.0030815601348877, "learning_rate": 0.0001, "loss": 0.0121, "step": 147900 }, { "epoch": 973.0921052631579, "grad_norm": 1.6667901277542114, "learning_rate": 0.0001, "loss": 0.0096, "step": 147910 }, { "epoch": 973.1578947368421, "grad_norm": 1.411292314529419, "learning_rate": 0.0001, "loss": 0.0104, "step": 147920 }, { "epoch": 973.2236842105264, "grad_norm": 1.3853094577789307, "learning_rate": 0.0001, "loss": 0.0102, "step": 147930 }, { "epoch": 973.2894736842105, "grad_norm": 1.671151041984558, "learning_rate": 0.0001, "loss": 0.0093, "step": 147940 }, { "epoch": 973.3552631578947, "grad_norm": 1.3852155208587646, "learning_rate": 0.0001, "loss": 0.0143, "step": 147950 }, { "epoch": 973.421052631579, "grad_norm": 1.6994231939315796, "learning_rate": 0.0001, "loss": 0.0128, "step": 147960 }, { "epoch": 973.4868421052631, "grad_norm": 1.38526451587677, "learning_rate": 0.0001, "loss": 0.0151, "step": 147970 }, { "epoch": 973.5526315789474, "grad_norm": 1.2532227039337158, "learning_rate": 0.0001, "loss": 0.0109, "step": 147980 }, { "epoch": 973.6184210526316, "grad_norm": 1.2585467100143433, "learning_rate": 0.0001, "loss": 0.0156, "step": 147990 }, { "epoch": 973.6842105263158, "grad_norm": 1.5529725551605225, "learning_rate": 0.0001, "loss": 0.0129, "step": 148000 }, { "epoch": 973.75, "grad_norm": 1.135758638381958, "learning_rate": 0.0001, "loss": 0.0148, "step": 148010 }, { "epoch": 973.8157894736842, "grad_norm": 1.2251116037368774, "learning_rate": 0.0001, "loss": 0.0115, "step": 148020 }, { "epoch": 973.8815789473684, "grad_norm": 1.3509619235992432, "learning_rate": 0.0001, "loss": 0.0154, "step": 148030 }, { "epoch": 973.9473684210526, "grad_norm": 1.584121584892273, "learning_rate": 0.0001, "loss": 0.0143, "step": 148040 }, { "epoch": 974.0131578947369, "grad_norm": 1.1675597429275513, "learning_rate": 0.0001, "loss": 0.0141, "step": 148050 }, { "epoch": 974.078947368421, "grad_norm": 1.0744413137435913, "learning_rate": 0.0001, "loss": 0.0095, "step": 148060 }, { "epoch": 974.1447368421053, "grad_norm": 1.147948980331421, "learning_rate": 0.0001, "loss": 0.0143, "step": 148070 }, { "epoch": 974.2105263157895, "grad_norm": 1.3302816152572632, "learning_rate": 0.0001, "loss": 0.0105, "step": 148080 }, { "epoch": 974.2763157894736, "grad_norm": 1.6206319332122803, "learning_rate": 0.0001, "loss": 0.0104, "step": 148090 }, { "epoch": 974.3421052631579, "grad_norm": 1.1370067596435547, "learning_rate": 0.0001, "loss": 0.0133, "step": 148100 }, { "epoch": 974.4078947368421, "grad_norm": 1.134735107421875, "learning_rate": 0.0001, "loss": 0.011, "step": 148110 }, { "epoch": 974.4736842105264, "grad_norm": 1.6671812534332275, "learning_rate": 0.0001, "loss": 0.0163, "step": 148120 }, { "epoch": 974.5394736842105, "grad_norm": 1.7804760932922363, "learning_rate": 0.0001, "loss": 0.0113, "step": 148130 }, { "epoch": 974.6052631578947, "grad_norm": 1.1172572374343872, "learning_rate": 0.0001, "loss": 0.0125, "step": 148140 }, { "epoch": 974.671052631579, "grad_norm": 2.158763885498047, "learning_rate": 0.0001, "loss": 0.0149, "step": 148150 }, { "epoch": 974.7368421052631, "grad_norm": 2.2015163898468018, "learning_rate": 0.0001, "loss": 0.0121, "step": 148160 }, { "epoch": 974.8026315789474, "grad_norm": 2.2617924213409424, "learning_rate": 0.0001, "loss": 0.0135, "step": 148170 }, { "epoch": 974.8684210526316, "grad_norm": 1.7784277200698853, "learning_rate": 0.0001, "loss": 0.0139, "step": 148180 }, { "epoch": 974.9342105263158, "grad_norm": 2.0498239994049072, "learning_rate": 0.0001, "loss": 0.0101, "step": 148190 }, { "epoch": 975.0, "grad_norm": 1.7214184999465942, "learning_rate": 0.0001, "loss": 0.0178, "step": 148200 }, { "epoch": 975.0657894736842, "grad_norm": 1.034379482269287, "learning_rate": 0.0001, "loss": 0.0137, "step": 148210 }, { "epoch": 975.1315789473684, "grad_norm": 1.1248064041137695, "learning_rate": 0.0001, "loss": 0.0086, "step": 148220 }, { "epoch": 975.1973684210526, "grad_norm": 1.54465651512146, "learning_rate": 0.0001, "loss": 0.0147, "step": 148230 }, { "epoch": 975.2631578947369, "grad_norm": 1.6255682706832886, "learning_rate": 0.0001, "loss": 0.0141, "step": 148240 }, { "epoch": 975.328947368421, "grad_norm": 1.6225600242614746, "learning_rate": 0.0001, "loss": 0.0139, "step": 148250 }, { "epoch": 975.3947368421053, "grad_norm": 1.8067172765731812, "learning_rate": 0.0001, "loss": 0.011, "step": 148260 }, { "epoch": 975.4605263157895, "grad_norm": 1.0957801342010498, "learning_rate": 0.0001, "loss": 0.014, "step": 148270 }, { "epoch": 975.5263157894736, "grad_norm": 1.2162786722183228, "learning_rate": 0.0001, "loss": 0.0104, "step": 148280 }, { "epoch": 975.5921052631579, "grad_norm": 1.7078667879104614, "learning_rate": 0.0001, "loss": 0.0088, "step": 148290 }, { "epoch": 975.6578947368421, "grad_norm": 1.1618760824203491, "learning_rate": 0.0001, "loss": 0.0093, "step": 148300 }, { "epoch": 975.7236842105264, "grad_norm": 1.4966576099395752, "learning_rate": 0.0001, "loss": 0.0131, "step": 148310 }, { "epoch": 975.7894736842105, "grad_norm": 1.4711058139801025, "learning_rate": 0.0001, "loss": 0.0129, "step": 148320 }, { "epoch": 975.8552631578947, "grad_norm": 1.477014422416687, "learning_rate": 0.0001, "loss": 0.0131, "step": 148330 }, { "epoch": 975.921052631579, "grad_norm": 1.6359400749206543, "learning_rate": 0.0001, "loss": 0.0148, "step": 148340 }, { "epoch": 975.9868421052631, "grad_norm": 1.2173631191253662, "learning_rate": 0.0001, "loss": 0.0172, "step": 148350 }, { "epoch": 976.0526315789474, "grad_norm": 1.2909126281738281, "learning_rate": 0.0001, "loss": 0.0128, "step": 148360 }, { "epoch": 976.1184210526316, "grad_norm": 1.310628890991211, "learning_rate": 0.0001, "loss": 0.0144, "step": 148370 }, { "epoch": 976.1842105263158, "grad_norm": 1.2669199705123901, "learning_rate": 0.0001, "loss": 0.0136, "step": 148380 }, { "epoch": 976.25, "grad_norm": 1.512033224105835, "learning_rate": 0.0001, "loss": 0.012, "step": 148390 }, { "epoch": 976.3157894736842, "grad_norm": 1.6078258752822876, "learning_rate": 0.0001, "loss": 0.0163, "step": 148400 }, { "epoch": 976.3815789473684, "grad_norm": 1.3590980768203735, "learning_rate": 0.0001, "loss": 0.0107, "step": 148410 }, { "epoch": 976.4473684210526, "grad_norm": 1.2529633045196533, "learning_rate": 0.0001, "loss": 0.0111, "step": 148420 }, { "epoch": 976.5131578947369, "grad_norm": 1.2092303037643433, "learning_rate": 0.0001, "loss": 0.0092, "step": 148430 }, { "epoch": 976.578947368421, "grad_norm": 1.0834916830062866, "learning_rate": 0.0001, "loss": 0.0139, "step": 148440 }, { "epoch": 976.6447368421053, "grad_norm": 1.3337218761444092, "learning_rate": 0.0001, "loss": 0.0156, "step": 148450 }, { "epoch": 976.7105263157895, "grad_norm": 1.5682755708694458, "learning_rate": 0.0001, "loss": 0.0139, "step": 148460 }, { "epoch": 976.7763157894736, "grad_norm": 1.6976463794708252, "learning_rate": 0.0001, "loss": 0.011, "step": 148470 }, { "epoch": 976.8421052631579, "grad_norm": 1.269697666168213, "learning_rate": 0.0001, "loss": 0.0126, "step": 148480 }, { "epoch": 976.9078947368421, "grad_norm": 1.1741251945495605, "learning_rate": 0.0001, "loss": 0.0115, "step": 148490 }, { "epoch": 976.9736842105264, "grad_norm": 1.5746853351593018, "learning_rate": 0.0001, "loss": 0.01, "step": 148500 }, { "epoch": 977.0394736842105, "grad_norm": 1.4988516569137573, "learning_rate": 0.0001, "loss": 0.0122, "step": 148510 }, { "epoch": 977.1052631578947, "grad_norm": 1.6659159660339355, "learning_rate": 0.0001, "loss": 0.0109, "step": 148520 }, { "epoch": 977.171052631579, "grad_norm": 1.3423070907592773, "learning_rate": 0.0001, "loss": 0.0116, "step": 148530 }, { "epoch": 977.2368421052631, "grad_norm": 1.5832008123397827, "learning_rate": 0.0001, "loss": 0.0107, "step": 148540 }, { "epoch": 977.3026315789474, "grad_norm": 1.220393419265747, "learning_rate": 0.0001, "loss": 0.015, "step": 148550 }, { "epoch": 977.3684210526316, "grad_norm": 1.3856291770935059, "learning_rate": 0.0001, "loss": 0.0108, "step": 148560 }, { "epoch": 977.4342105263158, "grad_norm": 1.7218319177627563, "learning_rate": 0.0001, "loss": 0.0108, "step": 148570 }, { "epoch": 977.5, "grad_norm": 1.307869553565979, "learning_rate": 0.0001, "loss": 0.0093, "step": 148580 }, { "epoch": 977.5657894736842, "grad_norm": 1.6109209060668945, "learning_rate": 0.0001, "loss": 0.0091, "step": 148590 }, { "epoch": 977.6315789473684, "grad_norm": 1.2205017805099487, "learning_rate": 0.0001, "loss": 0.0132, "step": 148600 }, { "epoch": 977.6973684210526, "grad_norm": 1.5837448835372925, "learning_rate": 0.0001, "loss": 0.0127, "step": 148610 }, { "epoch": 977.7631578947369, "grad_norm": 1.9771199226379395, "learning_rate": 0.0001, "loss": 0.0186, "step": 148620 }, { "epoch": 977.828947368421, "grad_norm": 1.2704018354415894, "learning_rate": 0.0001, "loss": 0.0137, "step": 148630 }, { "epoch": 977.8947368421053, "grad_norm": 1.6559406518936157, "learning_rate": 0.0001, "loss": 0.0139, "step": 148640 }, { "epoch": 977.9605263157895, "grad_norm": 1.5510497093200684, "learning_rate": 0.0001, "loss": 0.0199, "step": 148650 }, { "epoch": 978.0263157894736, "grad_norm": 1.2982193231582642, "learning_rate": 0.0001, "loss": 0.0087, "step": 148660 }, { "epoch": 978.0921052631579, "grad_norm": 1.2089903354644775, "learning_rate": 0.0001, "loss": 0.0116, "step": 148670 }, { "epoch": 978.1578947368421, "grad_norm": 1.4413942098617554, "learning_rate": 0.0001, "loss": 0.0116, "step": 148680 }, { "epoch": 978.2236842105264, "grad_norm": 1.2277593612670898, "learning_rate": 0.0001, "loss": 0.0134, "step": 148690 }, { "epoch": 978.2894736842105, "grad_norm": 1.1378295421600342, "learning_rate": 0.0001, "loss": 0.0121, "step": 148700 }, { "epoch": 978.3552631578947, "grad_norm": 1.404553771018982, "learning_rate": 0.0001, "loss": 0.0149, "step": 148710 }, { "epoch": 978.421052631579, "grad_norm": 1.2895848751068115, "learning_rate": 0.0001, "loss": 0.0129, "step": 148720 }, { "epoch": 978.4868421052631, "grad_norm": 1.4186259508132935, "learning_rate": 0.0001, "loss": 0.0148, "step": 148730 }, { "epoch": 978.5526315789474, "grad_norm": 1.492699146270752, "learning_rate": 0.0001, "loss": 0.0109, "step": 148740 }, { "epoch": 978.6184210526316, "grad_norm": 1.8126243352890015, "learning_rate": 0.0001, "loss": 0.013, "step": 148750 }, { "epoch": 978.6842105263158, "grad_norm": 1.6072263717651367, "learning_rate": 0.0001, "loss": 0.0131, "step": 148760 }, { "epoch": 978.75, "grad_norm": 1.8063782453536987, "learning_rate": 0.0001, "loss": 0.0112, "step": 148770 }, { "epoch": 978.8157894736842, "grad_norm": 1.3292694091796875, "learning_rate": 0.0001, "loss": 0.0138, "step": 148780 }, { "epoch": 978.8815789473684, "grad_norm": 1.247031569480896, "learning_rate": 0.0001, "loss": 0.0111, "step": 148790 }, { "epoch": 978.9473684210526, "grad_norm": 1.5129297971725464, "learning_rate": 0.0001, "loss": 0.0151, "step": 148800 }, { "epoch": 979.0131578947369, "grad_norm": 1.0632494688034058, "learning_rate": 0.0001, "loss": 0.0125, "step": 148810 }, { "epoch": 979.078947368421, "grad_norm": 1.4399453401565552, "learning_rate": 0.0001, "loss": 0.0126, "step": 148820 }, { "epoch": 979.1447368421053, "grad_norm": 1.4980601072311401, "learning_rate": 0.0001, "loss": 0.0107, "step": 148830 }, { "epoch": 979.2105263157895, "grad_norm": 1.7658112049102783, "learning_rate": 0.0001, "loss": 0.0109, "step": 148840 }, { "epoch": 979.2763157894736, "grad_norm": 1.488180160522461, "learning_rate": 0.0001, "loss": 0.018, "step": 148850 }, { "epoch": 979.3421052631579, "grad_norm": 1.1573694944381714, "learning_rate": 0.0001, "loss": 0.0121, "step": 148860 }, { "epoch": 979.4078947368421, "grad_norm": 1.3995567560195923, "learning_rate": 0.0001, "loss": 0.0117, "step": 148870 }, { "epoch": 979.4736842105264, "grad_norm": 1.2901344299316406, "learning_rate": 0.0001, "loss": 0.013, "step": 148880 }, { "epoch": 979.5394736842105, "grad_norm": 1.3864270448684692, "learning_rate": 0.0001, "loss": 0.0138, "step": 148890 }, { "epoch": 979.6052631578947, "grad_norm": 1.7551182508468628, "learning_rate": 0.0001, "loss": 0.0137, "step": 148900 }, { "epoch": 979.671052631579, "grad_norm": 1.278478741645813, "learning_rate": 0.0001, "loss": 0.0145, "step": 148910 }, { "epoch": 979.7368421052631, "grad_norm": 1.2677310705184937, "learning_rate": 0.0001, "loss": 0.0106, "step": 148920 }, { "epoch": 979.8026315789474, "grad_norm": 1.306099772453308, "learning_rate": 0.0001, "loss": 0.0104, "step": 148930 }, { "epoch": 979.8684210526316, "grad_norm": 1.2859331369400024, "learning_rate": 0.0001, "loss": 0.0131, "step": 148940 }, { "epoch": 979.9342105263158, "grad_norm": 1.2553855180740356, "learning_rate": 0.0001, "loss": 0.0132, "step": 148950 }, { "epoch": 980.0, "grad_norm": 1.1915796995162964, "learning_rate": 0.0001, "loss": 0.0135, "step": 148960 }, { "epoch": 980.0657894736842, "grad_norm": 1.3734837770462036, "learning_rate": 0.0001, "loss": 0.0118, "step": 148970 }, { "epoch": 980.1315789473684, "grad_norm": 1.3836429119110107, "learning_rate": 0.0001, "loss": 0.0118, "step": 148980 }, { "epoch": 980.1973684210526, "grad_norm": 1.320733666419983, "learning_rate": 0.0001, "loss": 0.0115, "step": 148990 }, { "epoch": 980.2631578947369, "grad_norm": 1.908693552017212, "learning_rate": 0.0001, "loss": 0.0123, "step": 149000 }, { "epoch": 980.328947368421, "grad_norm": 1.131699800491333, "learning_rate": 0.0001, "loss": 0.0105, "step": 149010 }, { "epoch": 980.3947368421053, "grad_norm": 1.1357362270355225, "learning_rate": 0.0001, "loss": 0.0106, "step": 149020 }, { "epoch": 980.4605263157895, "grad_norm": 1.3366466760635376, "learning_rate": 0.0001, "loss": 0.0133, "step": 149030 }, { "epoch": 980.5263157894736, "grad_norm": 1.1714330911636353, "learning_rate": 0.0001, "loss": 0.0111, "step": 149040 }, { "epoch": 980.5921052631579, "grad_norm": 1.7560023069381714, "learning_rate": 0.0001, "loss": 0.0157, "step": 149050 }, { "epoch": 980.6578947368421, "grad_norm": 1.6549501419067383, "learning_rate": 0.0001, "loss": 0.0134, "step": 149060 }, { "epoch": 980.7236842105264, "grad_norm": 1.3449074029922485, "learning_rate": 0.0001, "loss": 0.0129, "step": 149070 }, { "epoch": 980.7894736842105, "grad_norm": 1.4124568700790405, "learning_rate": 0.0001, "loss": 0.015, "step": 149080 }, { "epoch": 980.8552631578947, "grad_norm": 1.3587265014648438, "learning_rate": 0.0001, "loss": 0.0141, "step": 149090 }, { "epoch": 980.921052631579, "grad_norm": 1.5125738382339478, "learning_rate": 0.0001, "loss": 0.0156, "step": 149100 }, { "epoch": 980.9868421052631, "grad_norm": 1.8645241260528564, "learning_rate": 0.0001, "loss": 0.0127, "step": 149110 }, { "epoch": 981.0526315789474, "grad_norm": 1.5971870422363281, "learning_rate": 0.0001, "loss": 0.0115, "step": 149120 }, { "epoch": 981.1184210526316, "grad_norm": 1.537646770477295, "learning_rate": 0.0001, "loss": 0.0119, "step": 149130 }, { "epoch": 981.1842105263158, "grad_norm": 1.5686954259872437, "learning_rate": 0.0001, "loss": 0.0136, "step": 149140 }, { "epoch": 981.25, "grad_norm": 1.3695727586746216, "learning_rate": 0.0001, "loss": 0.0124, "step": 149150 }, { "epoch": 981.3157894736842, "grad_norm": 1.2189819812774658, "learning_rate": 0.0001, "loss": 0.0127, "step": 149160 }, { "epoch": 981.3815789473684, "grad_norm": 1.8461825847625732, "learning_rate": 0.0001, "loss": 0.0121, "step": 149170 }, { "epoch": 981.4473684210526, "grad_norm": 1.7531802654266357, "learning_rate": 0.0001, "loss": 0.0131, "step": 149180 }, { "epoch": 981.5131578947369, "grad_norm": 0.956636369228363, "learning_rate": 0.0001, "loss": 0.015, "step": 149190 }, { "epoch": 981.578947368421, "grad_norm": 1.5300142765045166, "learning_rate": 0.0001, "loss": 0.0148, "step": 149200 }, { "epoch": 981.6447368421053, "grad_norm": 1.8545398712158203, "learning_rate": 0.0001, "loss": 0.0112, "step": 149210 }, { "epoch": 981.7105263157895, "grad_norm": 1.5533498525619507, "learning_rate": 0.0001, "loss": 0.0092, "step": 149220 }, { "epoch": 981.7763157894736, "grad_norm": 1.0046589374542236, "learning_rate": 0.0001, "loss": 0.0091, "step": 149230 }, { "epoch": 981.8421052631579, "grad_norm": 1.5125248432159424, "learning_rate": 0.0001, "loss": 0.0143, "step": 149240 }, { "epoch": 981.9078947368421, "grad_norm": 1.5883243083953857, "learning_rate": 0.0001, "loss": 0.0116, "step": 149250 }, { "epoch": 981.9736842105264, "grad_norm": 1.0684183835983276, "learning_rate": 0.0001, "loss": 0.0135, "step": 149260 }, { "epoch": 982.0394736842105, "grad_norm": 1.3719843626022339, "learning_rate": 0.0001, "loss": 0.0142, "step": 149270 }, { "epoch": 982.1052631578947, "grad_norm": 1.2638099193572998, "learning_rate": 0.0001, "loss": 0.0117, "step": 149280 }, { "epoch": 982.171052631579, "grad_norm": 1.6688660383224487, "learning_rate": 0.0001, "loss": 0.0098, "step": 149290 }, { "epoch": 982.2368421052631, "grad_norm": 1.2436734437942505, "learning_rate": 0.0001, "loss": 0.0098, "step": 149300 }, { "epoch": 982.3026315789474, "grad_norm": 1.2058970928192139, "learning_rate": 0.0001, "loss": 0.0142, "step": 149310 }, { "epoch": 982.3684210526316, "grad_norm": 1.2958214282989502, "learning_rate": 0.0001, "loss": 0.0127, "step": 149320 }, { "epoch": 982.4342105263158, "grad_norm": 1.265516757965088, "learning_rate": 0.0001, "loss": 0.0138, "step": 149330 }, { "epoch": 982.5, "grad_norm": 1.3872530460357666, "learning_rate": 0.0001, "loss": 0.0103, "step": 149340 }, { "epoch": 982.5657894736842, "grad_norm": 1.3350600004196167, "learning_rate": 0.0001, "loss": 0.0162, "step": 149350 }, { "epoch": 982.6315789473684, "grad_norm": 1.589593768119812, "learning_rate": 0.0001, "loss": 0.012, "step": 149360 }, { "epoch": 982.6973684210526, "grad_norm": 1.9380300045013428, "learning_rate": 0.0001, "loss": 0.0112, "step": 149370 }, { "epoch": 982.7631578947369, "grad_norm": 1.6364264488220215, "learning_rate": 0.0001, "loss": 0.0117, "step": 149380 }, { "epoch": 982.828947368421, "grad_norm": 1.1714802980422974, "learning_rate": 0.0001, "loss": 0.016, "step": 149390 }, { "epoch": 982.8947368421053, "grad_norm": 1.3114198446273804, "learning_rate": 0.0001, "loss": 0.0102, "step": 149400 }, { "epoch": 982.9605263157895, "grad_norm": 1.6908690929412842, "learning_rate": 0.0001, "loss": 0.0146, "step": 149410 }, { "epoch": 983.0263157894736, "grad_norm": 1.1037362813949585, "learning_rate": 0.0001, "loss": 0.0125, "step": 149420 }, { "epoch": 983.0921052631579, "grad_norm": 1.6432560682296753, "learning_rate": 0.0001, "loss": 0.0117, "step": 149430 }, { "epoch": 983.1578947368421, "grad_norm": 1.3174971342086792, "learning_rate": 0.0001, "loss": 0.0104, "step": 149440 }, { "epoch": 983.2236842105264, "grad_norm": 1.5800226926803589, "learning_rate": 0.0001, "loss": 0.011, "step": 149450 }, { "epoch": 983.2894736842105, "grad_norm": 1.355060338973999, "learning_rate": 0.0001, "loss": 0.013, "step": 149460 }, { "epoch": 983.3552631578947, "grad_norm": 1.6307930946350098, "learning_rate": 0.0001, "loss": 0.0128, "step": 149470 }, { "epoch": 983.421052631579, "grad_norm": 1.4467440843582153, "learning_rate": 0.0001, "loss": 0.014, "step": 149480 }, { "epoch": 983.4868421052631, "grad_norm": 1.287651538848877, "learning_rate": 0.0001, "loss": 0.0149, "step": 149490 }, { "epoch": 983.5526315789474, "grad_norm": 1.436430811882019, "learning_rate": 0.0001, "loss": 0.0117, "step": 149500 }, { "epoch": 983.6184210526316, "grad_norm": 1.3026747703552246, "learning_rate": 0.0001, "loss": 0.0127, "step": 149510 }, { "epoch": 983.6842105263158, "grad_norm": 1.3903110027313232, "learning_rate": 0.0001, "loss": 0.0101, "step": 149520 }, { "epoch": 983.75, "grad_norm": 1.2656772136688232, "learning_rate": 0.0001, "loss": 0.0139, "step": 149530 }, { "epoch": 983.8157894736842, "grad_norm": 1.4965262413024902, "learning_rate": 0.0001, "loss": 0.0125, "step": 149540 }, { "epoch": 983.8815789473684, "grad_norm": 1.4337387084960938, "learning_rate": 0.0001, "loss": 0.0132, "step": 149550 }, { "epoch": 983.9473684210526, "grad_norm": 1.4213181734085083, "learning_rate": 0.0001, "loss": 0.0176, "step": 149560 }, { "epoch": 984.0131578947369, "grad_norm": 1.2158756256103516, "learning_rate": 0.0001, "loss": 0.0112, "step": 149570 }, { "epoch": 984.078947368421, "grad_norm": 1.2056255340576172, "learning_rate": 0.0001, "loss": 0.0117, "step": 149580 }, { "epoch": 984.1447368421053, "grad_norm": 1.652414083480835, "learning_rate": 0.0001, "loss": 0.0115, "step": 149590 }, { "epoch": 984.2105263157895, "grad_norm": 1.578309178352356, "learning_rate": 0.0001, "loss": 0.0174, "step": 149600 }, { "epoch": 984.2763157894736, "grad_norm": 0.9312256574630737, "learning_rate": 0.0001, "loss": 0.0088, "step": 149610 }, { "epoch": 984.3421052631579, "grad_norm": 1.5326902866363525, "learning_rate": 0.0001, "loss": 0.0118, "step": 149620 }, { "epoch": 984.4078947368421, "grad_norm": 1.6090646982192993, "learning_rate": 0.0001, "loss": 0.0139, "step": 149630 }, { "epoch": 984.4736842105264, "grad_norm": 1.7849041223526, "learning_rate": 0.0001, "loss": 0.0138, "step": 149640 }, { "epoch": 984.5394736842105, "grad_norm": 1.650628924369812, "learning_rate": 0.0001, "loss": 0.0097, "step": 149650 }, { "epoch": 984.6052631578947, "grad_norm": 1.4490805864334106, "learning_rate": 0.0001, "loss": 0.0141, "step": 149660 }, { "epoch": 984.671052631579, "grad_norm": 1.4444150924682617, "learning_rate": 0.0001, "loss": 0.0109, "step": 149670 }, { "epoch": 984.7368421052631, "grad_norm": 1.3940842151641846, "learning_rate": 0.0001, "loss": 0.0146, "step": 149680 }, { "epoch": 984.8026315789474, "grad_norm": 1.2905131578445435, "learning_rate": 0.0001, "loss": 0.0123, "step": 149690 }, { "epoch": 984.8684210526316, "grad_norm": 1.8640186786651611, "learning_rate": 0.0001, "loss": 0.0127, "step": 149700 }, { "epoch": 984.9342105263158, "grad_norm": 1.4812307357788086, "learning_rate": 0.0001, "loss": 0.0126, "step": 149710 }, { "epoch": 985.0, "grad_norm": 1.407413125038147, "learning_rate": 0.0001, "loss": 0.0119, "step": 149720 }, { "epoch": 985.0657894736842, "grad_norm": 1.5222636461257935, "learning_rate": 0.0001, "loss": 0.013, "step": 149730 }, { "epoch": 985.1315789473684, "grad_norm": 1.767130732536316, "learning_rate": 0.0001, "loss": 0.0106, "step": 149740 }, { "epoch": 985.1973684210526, "grad_norm": 1.1940948963165283, "learning_rate": 0.0001, "loss": 0.0111, "step": 149750 }, { "epoch": 985.2631578947369, "grad_norm": 1.418509602546692, "learning_rate": 0.0001, "loss": 0.0111, "step": 149760 }, { "epoch": 985.328947368421, "grad_norm": 1.5080914497375488, "learning_rate": 0.0001, "loss": 0.0118, "step": 149770 }, { "epoch": 985.3947368421053, "grad_norm": 1.3442906141281128, "learning_rate": 0.0001, "loss": 0.0119, "step": 149780 }, { "epoch": 985.4605263157895, "grad_norm": 1.079620122909546, "learning_rate": 0.0001, "loss": 0.0195, "step": 149790 }, { "epoch": 985.5263157894736, "grad_norm": 1.8455467224121094, "learning_rate": 0.0001, "loss": 0.0091, "step": 149800 }, { "epoch": 985.5921052631579, "grad_norm": 1.5994020700454712, "learning_rate": 0.0001, "loss": 0.014, "step": 149810 }, { "epoch": 985.6578947368421, "grad_norm": 1.6615289449691772, "learning_rate": 0.0001, "loss": 0.0147, "step": 149820 }, { "epoch": 985.7236842105264, "grad_norm": 1.236717939376831, "learning_rate": 0.0001, "loss": 0.0117, "step": 149830 }, { "epoch": 985.7894736842105, "grad_norm": 1.6099796295166016, "learning_rate": 0.0001, "loss": 0.0171, "step": 149840 }, { "epoch": 985.8552631578947, "grad_norm": 1.5190376043319702, "learning_rate": 0.0001, "loss": 0.0127, "step": 149850 }, { "epoch": 985.921052631579, "grad_norm": 1.0994973182678223, "learning_rate": 0.0001, "loss": 0.0095, "step": 149860 }, { "epoch": 985.9868421052631, "grad_norm": 1.6973941326141357, "learning_rate": 0.0001, "loss": 0.0091, "step": 149870 }, { "epoch": 986.0526315789474, "grad_norm": 1.8240749835968018, "learning_rate": 0.0001, "loss": 0.0124, "step": 149880 }, { "epoch": 986.1184210526316, "grad_norm": 1.1718368530273438, "learning_rate": 0.0001, "loss": 0.0127, "step": 149890 }, { "epoch": 986.1842105263158, "grad_norm": 1.5903854370117188, "learning_rate": 0.0001, "loss": 0.0122, "step": 149900 }, { "epoch": 986.25, "grad_norm": 1.7092971801757812, "learning_rate": 0.0001, "loss": 0.0146, "step": 149910 }, { "epoch": 986.3157894736842, "grad_norm": 1.6867542266845703, "learning_rate": 0.0001, "loss": 0.0113, "step": 149920 }, { "epoch": 986.3815789473684, "grad_norm": 1.505871295928955, "learning_rate": 0.0001, "loss": 0.012, "step": 149930 }, { "epoch": 986.4473684210526, "grad_norm": 1.8139506578445435, "learning_rate": 0.0001, "loss": 0.0089, "step": 149940 }, { "epoch": 986.5131578947369, "grad_norm": 2.1161956787109375, "learning_rate": 0.0001, "loss": 0.0103, "step": 149950 }, { "epoch": 986.578947368421, "grad_norm": 1.5547980070114136, "learning_rate": 0.0001, "loss": 0.0104, "step": 149960 }, { "epoch": 986.6447368421053, "grad_norm": 1.4911216497421265, "learning_rate": 0.0001, "loss": 0.0119, "step": 149970 }, { "epoch": 986.7105263157895, "grad_norm": 1.2470301389694214, "learning_rate": 0.0001, "loss": 0.0138, "step": 149980 }, { "epoch": 986.7763157894736, "grad_norm": 1.218785285949707, "learning_rate": 0.0001, "loss": 0.0128, "step": 149990 }, { "epoch": 986.8421052631579, "grad_norm": 1.2006551027297974, "learning_rate": 0.0001, "loss": 0.018, "step": 150000 }, { "epoch": 986.9078947368421, "grad_norm": 1.6455953121185303, "learning_rate": 0.0001, "loss": 0.0136, "step": 150010 }, { "epoch": 986.9736842105264, "grad_norm": 1.3704603910446167, "learning_rate": 0.0001, "loss": 0.0125, "step": 150020 }, { "epoch": 987.0394736842105, "grad_norm": 1.2430366277694702, "learning_rate": 0.0001, "loss": 0.0094, "step": 150030 }, { "epoch": 987.1052631578947, "grad_norm": 1.4726755619049072, "learning_rate": 0.0001, "loss": 0.0136, "step": 150040 }, { "epoch": 987.171052631579, "grad_norm": 1.321221113204956, "learning_rate": 0.0001, "loss": 0.013, "step": 150050 }, { "epoch": 987.2368421052631, "grad_norm": 1.7870943546295166, "learning_rate": 0.0001, "loss": 0.0127, "step": 150060 }, { "epoch": 987.3026315789474, "grad_norm": 1.494058609008789, "learning_rate": 0.0001, "loss": 0.011, "step": 150070 }, { "epoch": 987.3684210526316, "grad_norm": 1.5886286497116089, "learning_rate": 0.0001, "loss": 0.0096, "step": 150080 }, { "epoch": 987.4342105263158, "grad_norm": 1.1114692687988281, "learning_rate": 0.0001, "loss": 0.0143, "step": 150090 }, { "epoch": 987.5, "grad_norm": 1.556789755821228, "learning_rate": 0.0001, "loss": 0.0139, "step": 150100 }, { "epoch": 987.5657894736842, "grad_norm": 1.16757333278656, "learning_rate": 0.0001, "loss": 0.0109, "step": 150110 }, { "epoch": 987.6315789473684, "grad_norm": 1.2130249738693237, "learning_rate": 0.0001, "loss": 0.0122, "step": 150120 }, { "epoch": 987.6973684210526, "grad_norm": 0.9313134551048279, "learning_rate": 0.0001, "loss": 0.0137, "step": 150130 }, { "epoch": 987.7631578947369, "grad_norm": 1.5864447355270386, "learning_rate": 0.0001, "loss": 0.0133, "step": 150140 }, { "epoch": 987.828947368421, "grad_norm": 1.1899210214614868, "learning_rate": 0.0001, "loss": 0.015, "step": 150150 }, { "epoch": 987.8947368421053, "grad_norm": 1.2414551973342896, "learning_rate": 0.0001, "loss": 0.0106, "step": 150160 }, { "epoch": 987.9605263157895, "grad_norm": 1.5280718803405762, "learning_rate": 0.0001, "loss": 0.0131, "step": 150170 }, { "epoch": 988.0263157894736, "grad_norm": 1.301979899406433, "learning_rate": 0.0001, "loss": 0.0165, "step": 150180 }, { "epoch": 988.0921052631579, "grad_norm": 1.261670470237732, "learning_rate": 0.0001, "loss": 0.0126, "step": 150190 }, { "epoch": 988.1578947368421, "grad_norm": 1.6259251832962036, "learning_rate": 0.0001, "loss": 0.0164, "step": 150200 }, { "epoch": 988.2236842105264, "grad_norm": 1.6117069721221924, "learning_rate": 0.0001, "loss": 0.013, "step": 150210 }, { "epoch": 988.2894736842105, "grad_norm": 1.657075047492981, "learning_rate": 0.0001, "loss": 0.0094, "step": 150220 }, { "epoch": 988.3552631578947, "grad_norm": 1.0397224426269531, "learning_rate": 0.0001, "loss": 0.011, "step": 150230 }, { "epoch": 988.421052631579, "grad_norm": 0.8817715048789978, "learning_rate": 0.0001, "loss": 0.0148, "step": 150240 }, { "epoch": 988.4868421052631, "grad_norm": 1.1489909887313843, "learning_rate": 0.0001, "loss": 0.0109, "step": 150250 }, { "epoch": 988.5526315789474, "grad_norm": 1.4623209238052368, "learning_rate": 0.0001, "loss": 0.0129, "step": 150260 }, { "epoch": 988.6184210526316, "grad_norm": 1.8367887735366821, "learning_rate": 0.0001, "loss": 0.0114, "step": 150270 }, { "epoch": 988.6842105263158, "grad_norm": 1.4946058988571167, "learning_rate": 0.0001, "loss": 0.0105, "step": 150280 }, { "epoch": 988.75, "grad_norm": 1.7829647064208984, "learning_rate": 0.0001, "loss": 0.0168, "step": 150290 }, { "epoch": 988.8157894736842, "grad_norm": 1.5618789196014404, "learning_rate": 0.0001, "loss": 0.0144, "step": 150300 }, { "epoch": 988.8815789473684, "grad_norm": 1.4280078411102295, "learning_rate": 0.0001, "loss": 0.0123, "step": 150310 }, { "epoch": 988.9473684210526, "grad_norm": 1.9054640531539917, "learning_rate": 0.0001, "loss": 0.0088, "step": 150320 }, { "epoch": 989.0131578947369, "grad_norm": 1.1327086687088013, "learning_rate": 0.0001, "loss": 0.0113, "step": 150330 }, { "epoch": 989.078947368421, "grad_norm": 1.70881986618042, "learning_rate": 0.0001, "loss": 0.0102, "step": 150340 }, { "epoch": 989.1447368421053, "grad_norm": 1.4324415922164917, "learning_rate": 0.0001, "loss": 0.0131, "step": 150350 }, { "epoch": 989.2105263157895, "grad_norm": 1.1831026077270508, "learning_rate": 0.0001, "loss": 0.0129, "step": 150360 }, { "epoch": 989.2763157894736, "grad_norm": 1.564550757408142, "learning_rate": 0.0001, "loss": 0.0131, "step": 150370 }, { "epoch": 989.3421052631579, "grad_norm": 1.5176376104354858, "learning_rate": 0.0001, "loss": 0.0141, "step": 150380 }, { "epoch": 989.4078947368421, "grad_norm": 1.4770851135253906, "learning_rate": 0.0001, "loss": 0.0117, "step": 150390 }, { "epoch": 989.4736842105264, "grad_norm": 1.3715091943740845, "learning_rate": 0.0001, "loss": 0.0097, "step": 150400 }, { "epoch": 989.5394736842105, "grad_norm": 1.5333682298660278, "learning_rate": 0.0001, "loss": 0.0101, "step": 150410 }, { "epoch": 989.6052631578947, "grad_norm": 1.7286620140075684, "learning_rate": 0.0001, "loss": 0.0139, "step": 150420 }, { "epoch": 989.671052631579, "grad_norm": 1.385377049446106, "learning_rate": 0.0001, "loss": 0.0151, "step": 150430 }, { "epoch": 989.7368421052631, "grad_norm": 1.5773863792419434, "learning_rate": 0.0001, "loss": 0.0154, "step": 150440 }, { "epoch": 989.8026315789474, "grad_norm": 1.7188481092453003, "learning_rate": 0.0001, "loss": 0.0119, "step": 150450 }, { "epoch": 989.8684210526316, "grad_norm": 1.652669072151184, "learning_rate": 0.0001, "loss": 0.0135, "step": 150460 }, { "epoch": 989.9342105263158, "grad_norm": 1.9299861192703247, "learning_rate": 0.0001, "loss": 0.0095, "step": 150470 }, { "epoch": 990.0, "grad_norm": 1.7060363292694092, "learning_rate": 0.0001, "loss": 0.0142, "step": 150480 }, { "epoch": 990.0657894736842, "grad_norm": 1.4454219341278076, "learning_rate": 0.0001, "loss": 0.0115, "step": 150490 }, { "epoch": 990.1315789473684, "grad_norm": 1.4385212659835815, "learning_rate": 0.0001, "loss": 0.0131, "step": 150500 }, { "epoch": 990.1973684210526, "grad_norm": 1.308791995048523, "learning_rate": 0.0001, "loss": 0.014, "step": 150510 }, { "epoch": 990.2631578947369, "grad_norm": 1.4078327417373657, "learning_rate": 0.0001, "loss": 0.0096, "step": 150520 }, { "epoch": 990.328947368421, "grad_norm": 1.3313803672790527, "learning_rate": 0.0001, "loss": 0.0118, "step": 150530 }, { "epoch": 990.3947368421053, "grad_norm": 1.2752584218978882, "learning_rate": 0.0001, "loss": 0.0129, "step": 150540 }, { "epoch": 990.4605263157895, "grad_norm": 1.4208213090896606, "learning_rate": 0.0001, "loss": 0.014, "step": 150550 }, { "epoch": 990.5263157894736, "grad_norm": 1.3404821157455444, "learning_rate": 0.0001, "loss": 0.0114, "step": 150560 }, { "epoch": 990.5921052631579, "grad_norm": 1.1281729936599731, "learning_rate": 0.0001, "loss": 0.0132, "step": 150570 }, { "epoch": 990.6578947368421, "grad_norm": 1.126156210899353, "learning_rate": 0.0001, "loss": 0.0136, "step": 150580 }, { "epoch": 990.7236842105264, "grad_norm": 1.2623995542526245, "learning_rate": 0.0001, "loss": 0.0149, "step": 150590 }, { "epoch": 990.7894736842105, "grad_norm": 1.3634233474731445, "learning_rate": 0.0001, "loss": 0.0127, "step": 150600 }, { "epoch": 990.8552631578947, "grad_norm": 1.1328572034835815, "learning_rate": 0.0001, "loss": 0.0129, "step": 150610 }, { "epoch": 990.921052631579, "grad_norm": 1.6245137453079224, "learning_rate": 0.0001, "loss": 0.0111, "step": 150620 }, { "epoch": 990.9868421052631, "grad_norm": 1.5357036590576172, "learning_rate": 0.0001, "loss": 0.0146, "step": 150630 }, { "epoch": 991.0526315789474, "grad_norm": 1.739688754081726, "learning_rate": 0.0001, "loss": 0.0126, "step": 150640 }, { "epoch": 991.1184210526316, "grad_norm": 1.4747097492218018, "learning_rate": 0.0001, "loss": 0.0106, "step": 150650 }, { "epoch": 991.1842105263158, "grad_norm": 1.767410159111023, "learning_rate": 0.0001, "loss": 0.0124, "step": 150660 }, { "epoch": 991.25, "grad_norm": 1.9347885847091675, "learning_rate": 0.0001, "loss": 0.0092, "step": 150670 }, { "epoch": 991.3157894736842, "grad_norm": 1.6673017740249634, "learning_rate": 0.0001, "loss": 0.0146, "step": 150680 }, { "epoch": 991.3815789473684, "grad_norm": 1.443229079246521, "learning_rate": 0.0001, "loss": 0.0109, "step": 150690 }, { "epoch": 991.4473684210526, "grad_norm": 1.3499728441238403, "learning_rate": 0.0001, "loss": 0.0124, "step": 150700 }, { "epoch": 991.5131578947369, "grad_norm": 1.2295962572097778, "learning_rate": 0.0001, "loss": 0.0124, "step": 150710 }, { "epoch": 991.578947368421, "grad_norm": 1.2315282821655273, "learning_rate": 0.0001, "loss": 0.0157, "step": 150720 }, { "epoch": 991.6447368421053, "grad_norm": 1.0323805809020996, "learning_rate": 0.0001, "loss": 0.0097, "step": 150730 }, { "epoch": 991.7105263157895, "grad_norm": 1.6446186304092407, "learning_rate": 0.0001, "loss": 0.0122, "step": 150740 }, { "epoch": 991.7763157894736, "grad_norm": 1.46724271774292, "learning_rate": 0.0001, "loss": 0.0108, "step": 150750 }, { "epoch": 991.8421052631579, "grad_norm": 1.6427642107009888, "learning_rate": 0.0001, "loss": 0.0156, "step": 150760 }, { "epoch": 991.9078947368421, "grad_norm": 1.356147050857544, "learning_rate": 0.0001, "loss": 0.016, "step": 150770 }, { "epoch": 991.9736842105264, "grad_norm": 1.5404000282287598, "learning_rate": 0.0001, "loss": 0.0139, "step": 150780 }, { "epoch": 992.0394736842105, "grad_norm": 1.3328317403793335, "learning_rate": 0.0001, "loss": 0.0128, "step": 150790 }, { "epoch": 992.1052631578947, "grad_norm": 1.589895486831665, "learning_rate": 0.0001, "loss": 0.0127, "step": 150800 }, { "epoch": 992.171052631579, "grad_norm": 1.6749306917190552, "learning_rate": 0.0001, "loss": 0.0135, "step": 150810 }, { "epoch": 992.2368421052631, "grad_norm": 1.4400546550750732, "learning_rate": 0.0001, "loss": 0.0178, "step": 150820 }, { "epoch": 992.3026315789474, "grad_norm": 1.675971269607544, "learning_rate": 0.0001, "loss": 0.0148, "step": 150830 }, { "epoch": 992.3684210526316, "grad_norm": 1.7681766748428345, "learning_rate": 0.0001, "loss": 0.0116, "step": 150840 }, { "epoch": 992.4342105263158, "grad_norm": 1.710736632347107, "learning_rate": 0.0001, "loss": 0.0134, "step": 150850 }, { "epoch": 992.5, "grad_norm": 1.5905210971832275, "learning_rate": 0.0001, "loss": 0.0106, "step": 150860 }, { "epoch": 992.5657894736842, "grad_norm": 1.3418775796890259, "learning_rate": 0.0001, "loss": 0.012, "step": 150870 }, { "epoch": 992.6315789473684, "grad_norm": 1.3310821056365967, "learning_rate": 0.0001, "loss": 0.0131, "step": 150880 }, { "epoch": 992.6973684210526, "grad_norm": 1.5187244415283203, "learning_rate": 0.0001, "loss": 0.0132, "step": 150890 }, { "epoch": 992.7631578947369, "grad_norm": 1.67141592502594, "learning_rate": 0.0001, "loss": 0.0126, "step": 150900 }, { "epoch": 992.828947368421, "grad_norm": 1.786939263343811, "learning_rate": 0.0001, "loss": 0.0113, "step": 150910 }, { "epoch": 992.8947368421053, "grad_norm": 1.3193559646606445, "learning_rate": 0.0001, "loss": 0.009, "step": 150920 }, { "epoch": 992.9605263157895, "grad_norm": 1.5809931755065918, "learning_rate": 0.0001, "loss": 0.0114, "step": 150930 }, { "epoch": 993.0263157894736, "grad_norm": 1.606534481048584, "learning_rate": 0.0001, "loss": 0.0112, "step": 150940 }, { "epoch": 993.0921052631579, "grad_norm": 1.4113054275512695, "learning_rate": 0.0001, "loss": 0.0102, "step": 150950 }, { "epoch": 993.1578947368421, "grad_norm": 1.784018874168396, "learning_rate": 0.0001, "loss": 0.0088, "step": 150960 }, { "epoch": 993.2236842105264, "grad_norm": 1.4295483827590942, "learning_rate": 0.0001, "loss": 0.0128, "step": 150970 }, { "epoch": 993.2894736842105, "grad_norm": 1.035048246383667, "learning_rate": 0.0001, "loss": 0.0106, "step": 150980 }, { "epoch": 993.3552631578947, "grad_norm": 1.3610515594482422, "learning_rate": 0.0001, "loss": 0.0103, "step": 150990 }, { "epoch": 993.421052631579, "grad_norm": 1.46303391456604, "learning_rate": 0.0001, "loss": 0.0094, "step": 151000 }, { "epoch": 993.4868421052631, "grad_norm": 1.6361628770828247, "learning_rate": 0.0001, "loss": 0.0151, "step": 151010 }, { "epoch": 993.5526315789474, "grad_norm": 1.1918847560882568, "learning_rate": 0.0001, "loss": 0.0116, "step": 151020 }, { "epoch": 993.6184210526316, "grad_norm": 1.3161606788635254, "learning_rate": 0.0001, "loss": 0.0143, "step": 151030 }, { "epoch": 993.6842105263158, "grad_norm": 1.8725234270095825, "learning_rate": 0.0001, "loss": 0.0151, "step": 151040 }, { "epoch": 993.75, "grad_norm": 1.4631168842315674, "learning_rate": 0.0001, "loss": 0.0119, "step": 151050 }, { "epoch": 993.8157894736842, "grad_norm": 1.8344992399215698, "learning_rate": 0.0001, "loss": 0.0157, "step": 151060 }, { "epoch": 993.8815789473684, "grad_norm": 1.4148918390274048, "learning_rate": 0.0001, "loss": 0.0141, "step": 151070 }, { "epoch": 993.9473684210526, "grad_norm": 1.3776973485946655, "learning_rate": 0.0001, "loss": 0.0108, "step": 151080 }, { "epoch": 994.0131578947369, "grad_norm": 1.7172436714172363, "learning_rate": 0.0001, "loss": 0.017, "step": 151090 }, { "epoch": 994.078947368421, "grad_norm": 1.3327549695968628, "learning_rate": 0.0001, "loss": 0.0114, "step": 151100 }, { "epoch": 994.1447368421053, "grad_norm": 1.399843454360962, "learning_rate": 0.0001, "loss": 0.0111, "step": 151110 }, { "epoch": 994.2105263157895, "grad_norm": 1.5593435764312744, "learning_rate": 0.0001, "loss": 0.0162, "step": 151120 }, { "epoch": 994.2763157894736, "grad_norm": 1.7604174613952637, "learning_rate": 0.0001, "loss": 0.0095, "step": 151130 }, { "epoch": 994.3421052631579, "grad_norm": 1.0974109172821045, "learning_rate": 0.0001, "loss": 0.013, "step": 151140 }, { "epoch": 994.4078947368421, "grad_norm": 1.7593547105789185, "learning_rate": 0.0001, "loss": 0.0135, "step": 151150 }, { "epoch": 994.4736842105264, "grad_norm": 1.0651047229766846, "learning_rate": 0.0001, "loss": 0.0124, "step": 151160 }, { "epoch": 994.5394736842105, "grad_norm": 1.6693073511123657, "learning_rate": 0.0001, "loss": 0.013, "step": 151170 }, { "epoch": 994.6052631578947, "grad_norm": 0.9420177340507507, "learning_rate": 0.0001, "loss": 0.0114, "step": 151180 }, { "epoch": 994.671052631579, "grad_norm": 1.2496325969696045, "learning_rate": 0.0001, "loss": 0.0117, "step": 151190 }, { "epoch": 994.7368421052631, "grad_norm": 1.3631855249404907, "learning_rate": 0.0001, "loss": 0.0159, "step": 151200 }, { "epoch": 994.8026315789474, "grad_norm": 1.4928463697433472, "learning_rate": 0.0001, "loss": 0.0094, "step": 151210 }, { "epoch": 994.8684210526316, "grad_norm": 1.4182361364364624, "learning_rate": 0.0001, "loss": 0.016, "step": 151220 }, { "epoch": 994.9342105263158, "grad_norm": 0.9115750193595886, "learning_rate": 0.0001, "loss": 0.0111, "step": 151230 }, { "epoch": 995.0, "grad_norm": 1.1750556230545044, "learning_rate": 0.0001, "loss": 0.0102, "step": 151240 }, { "epoch": 995.0657894736842, "grad_norm": 1.8061397075653076, "learning_rate": 0.0001, "loss": 0.0104, "step": 151250 }, { "epoch": 995.1315789473684, "grad_norm": 1.4021267890930176, "learning_rate": 0.0001, "loss": 0.0115, "step": 151260 }, { "epoch": 995.1973684210526, "grad_norm": 1.1544603109359741, "learning_rate": 0.0001, "loss": 0.0107, "step": 151270 }, { "epoch": 995.2631578947369, "grad_norm": 1.6426676511764526, "learning_rate": 0.0001, "loss": 0.0122, "step": 151280 }, { "epoch": 995.328947368421, "grad_norm": 1.7162460088729858, "learning_rate": 0.0001, "loss": 0.0135, "step": 151290 }, { "epoch": 995.3947368421053, "grad_norm": 1.52632474899292, "learning_rate": 0.0001, "loss": 0.0099, "step": 151300 }, { "epoch": 995.4605263157895, "grad_norm": 1.4776155948638916, "learning_rate": 0.0001, "loss": 0.0135, "step": 151310 }, { "epoch": 995.5263157894736, "grad_norm": 1.3478753566741943, "learning_rate": 0.0001, "loss": 0.0132, "step": 151320 }, { "epoch": 995.5921052631579, "grad_norm": 1.5133973360061646, "learning_rate": 0.0001, "loss": 0.0097, "step": 151330 }, { "epoch": 995.6578947368421, "grad_norm": 0.9473016262054443, "learning_rate": 0.0001, "loss": 0.0111, "step": 151340 }, { "epoch": 995.7236842105264, "grad_norm": 1.3779287338256836, "learning_rate": 0.0001, "loss": 0.0118, "step": 151350 }, { "epoch": 995.7894736842105, "grad_norm": 1.5093903541564941, "learning_rate": 0.0001, "loss": 0.0159, "step": 151360 }, { "epoch": 995.8552631578947, "grad_norm": 1.3894121646881104, "learning_rate": 0.0001, "loss": 0.0154, "step": 151370 }, { "epoch": 995.921052631579, "grad_norm": 1.4173896312713623, "learning_rate": 0.0001, "loss": 0.0172, "step": 151380 }, { "epoch": 995.9868421052631, "grad_norm": 1.232253074645996, "learning_rate": 0.0001, "loss": 0.0108, "step": 151390 }, { "epoch": 996.0526315789474, "grad_norm": 1.7945772409439087, "learning_rate": 0.0001, "loss": 0.0151, "step": 151400 }, { "epoch": 996.1184210526316, "grad_norm": 1.6902289390563965, "learning_rate": 0.0001, "loss": 0.0121, "step": 151410 }, { "epoch": 996.1842105263158, "grad_norm": 1.929242730140686, "learning_rate": 0.0001, "loss": 0.0122, "step": 151420 }, { "epoch": 996.25, "grad_norm": 1.7786908149719238, "learning_rate": 0.0001, "loss": 0.0132, "step": 151430 }, { "epoch": 996.3157894736842, "grad_norm": 1.780649185180664, "learning_rate": 0.0001, "loss": 0.009, "step": 151440 }, { "epoch": 996.3815789473684, "grad_norm": 1.7586150169372559, "learning_rate": 0.0001, "loss": 0.0124, "step": 151450 }, { "epoch": 996.4473684210526, "grad_norm": 1.3901731967926025, "learning_rate": 0.0001, "loss": 0.0128, "step": 151460 }, { "epoch": 996.5131578947369, "grad_norm": 1.6284728050231934, "learning_rate": 0.0001, "loss": 0.0113, "step": 151470 }, { "epoch": 996.578947368421, "grad_norm": 1.3464546203613281, "learning_rate": 0.0001, "loss": 0.0133, "step": 151480 }, { "epoch": 996.6447368421053, "grad_norm": 1.387977957725525, "learning_rate": 0.0001, "loss": 0.0127, "step": 151490 }, { "epoch": 996.7105263157895, "grad_norm": 1.6005827188491821, "learning_rate": 0.0001, "loss": 0.0106, "step": 151500 }, { "epoch": 996.7763157894736, "grad_norm": 1.0715209245681763, "learning_rate": 0.0001, "loss": 0.0146, "step": 151510 }, { "epoch": 996.8421052631579, "grad_norm": 1.541865348815918, "learning_rate": 0.0001, "loss": 0.0108, "step": 151520 }, { "epoch": 996.9078947368421, "grad_norm": 1.439843773841858, "learning_rate": 0.0001, "loss": 0.0159, "step": 151530 }, { "epoch": 996.9736842105264, "grad_norm": 1.3821494579315186, "learning_rate": 0.0001, "loss": 0.0119, "step": 151540 }, { "epoch": 997.0394736842105, "grad_norm": 1.8014514446258545, "learning_rate": 0.0001, "loss": 0.0108, "step": 151550 }, { "epoch": 997.1052631578947, "grad_norm": 1.1635822057724, "learning_rate": 0.0001, "loss": 0.0123, "step": 151560 }, { "epoch": 997.171052631579, "grad_norm": 1.4869998693466187, "learning_rate": 0.0001, "loss": 0.0127, "step": 151570 }, { "epoch": 997.2368421052631, "grad_norm": 1.4698817729949951, "learning_rate": 0.0001, "loss": 0.0163, "step": 151580 }, { "epoch": 997.3026315789474, "grad_norm": 1.3093069791793823, "learning_rate": 0.0001, "loss": 0.0107, "step": 151590 }, { "epoch": 997.3684210526316, "grad_norm": 1.3072715997695923, "learning_rate": 0.0001, "loss": 0.0094, "step": 151600 }, { "epoch": 997.4342105263158, "grad_norm": 1.621806025505066, "learning_rate": 0.0001, "loss": 0.0142, "step": 151610 }, { "epoch": 997.5, "grad_norm": 1.141465425491333, "learning_rate": 0.0001, "loss": 0.0102, "step": 151620 }, { "epoch": 997.5657894736842, "grad_norm": 1.0901288986206055, "learning_rate": 0.0001, "loss": 0.0136, "step": 151630 }, { "epoch": 997.6315789473684, "grad_norm": 1.2546563148498535, "learning_rate": 0.0001, "loss": 0.0154, "step": 151640 }, { "epoch": 997.6973684210526, "grad_norm": 0.9590385556221008, "learning_rate": 0.0001, "loss": 0.0174, "step": 151650 }, { "epoch": 997.7631578947369, "grad_norm": 1.036413550376892, "learning_rate": 0.0001, "loss": 0.0122, "step": 151660 }, { "epoch": 997.828947368421, "grad_norm": 1.7532174587249756, "learning_rate": 0.0001, "loss": 0.0094, "step": 151670 }, { "epoch": 997.8947368421053, "grad_norm": 1.3207814693450928, "learning_rate": 0.0001, "loss": 0.009, "step": 151680 }, { "epoch": 997.9605263157895, "grad_norm": 1.0295336246490479, "learning_rate": 0.0001, "loss": 0.0166, "step": 151690 }, { "epoch": 998.0263157894736, "grad_norm": 1.4919713735580444, "learning_rate": 0.0001, "loss": 0.0112, "step": 151700 }, { "epoch": 998.0921052631579, "grad_norm": 1.3339554071426392, "learning_rate": 0.0001, "loss": 0.0122, "step": 151710 }, { "epoch": 998.1578947368421, "grad_norm": 1.5584386587142944, "learning_rate": 0.0001, "loss": 0.0136, "step": 151720 }, { "epoch": 998.2236842105264, "grad_norm": 1.3535948991775513, "learning_rate": 0.0001, "loss": 0.0111, "step": 151730 }, { "epoch": 998.2894736842105, "grad_norm": 1.6197847127914429, "learning_rate": 0.0001, "loss": 0.014, "step": 151740 }, { "epoch": 998.3552631578947, "grad_norm": 1.4435943365097046, "learning_rate": 0.0001, "loss": 0.012, "step": 151750 }, { "epoch": 998.421052631579, "grad_norm": 1.331613540649414, "learning_rate": 0.0001, "loss": 0.0145, "step": 151760 }, { "epoch": 998.4868421052631, "grad_norm": 1.4891773462295532, "learning_rate": 0.0001, "loss": 0.0133, "step": 151770 }, { "epoch": 998.5526315789474, "grad_norm": 1.5479437112808228, "learning_rate": 0.0001, "loss": 0.0115, "step": 151780 }, { "epoch": 998.6184210526316, "grad_norm": 1.4966397285461426, "learning_rate": 0.0001, "loss": 0.0118, "step": 151790 }, { "epoch": 998.6842105263158, "grad_norm": 0.9907501935958862, "learning_rate": 0.0001, "loss": 0.0156, "step": 151800 }, { "epoch": 998.75, "grad_norm": 1.8723785877227783, "learning_rate": 0.0001, "loss": 0.0124, "step": 151810 }, { "epoch": 998.8157894736842, "grad_norm": 1.7294631004333496, "learning_rate": 0.0001, "loss": 0.0112, "step": 151820 }, { "epoch": 998.8815789473684, "grad_norm": 1.0277752876281738, "learning_rate": 0.0001, "loss": 0.01, "step": 151830 }, { "epoch": 998.9473684210526, "grad_norm": 1.5054616928100586, "learning_rate": 0.0001, "loss": 0.0107, "step": 151840 }, { "epoch": 999.0131578947369, "grad_norm": 1.5700194835662842, "learning_rate": 0.0001, "loss": 0.0142, "step": 151850 }, { "epoch": 999.078947368421, "grad_norm": 1.4150317907333374, "learning_rate": 0.0001, "loss": 0.0163, "step": 151860 }, { "epoch": 999.1447368421053, "grad_norm": 1.2375253438949585, "learning_rate": 0.0001, "loss": 0.0129, "step": 151870 }, { "epoch": 999.2105263157895, "grad_norm": 1.1589634418487549, "learning_rate": 0.0001, "loss": 0.0138, "step": 151880 }, { "epoch": 999.2763157894736, "grad_norm": 1.2178140878677368, "learning_rate": 0.0001, "loss": 0.0107, "step": 151890 }, { "epoch": 999.3421052631579, "grad_norm": 1.0464675426483154, "learning_rate": 0.0001, "loss": 0.0108, "step": 151900 }, { "epoch": 999.4078947368421, "grad_norm": 1.6230353116989136, "learning_rate": 0.0001, "loss": 0.0145, "step": 151910 }, { "epoch": 999.4736842105264, "grad_norm": 1.5046865940093994, "learning_rate": 0.0001, "loss": 0.0129, "step": 151920 }, { "epoch": 999.5394736842105, "grad_norm": 1.0328960418701172, "learning_rate": 0.0001, "loss": 0.0093, "step": 151930 }, { "epoch": 999.6052631578947, "grad_norm": 1.220300555229187, "learning_rate": 0.0001, "loss": 0.0197, "step": 151940 }, { "epoch": 999.671052631579, "grad_norm": 1.7804561853408813, "learning_rate": 0.0001, "loss": 0.0101, "step": 151950 }, { "epoch": 999.7368421052631, "grad_norm": 1.2691751718521118, "learning_rate": 0.0001, "loss": 0.0131, "step": 151960 }, { "epoch": 999.8026315789474, "grad_norm": 1.5368170738220215, "learning_rate": 0.0001, "loss": 0.0098, "step": 151970 }, { "epoch": 999.8684210526316, "grad_norm": 1.1863113641738892, "learning_rate": 0.0001, "loss": 0.0126, "step": 151980 }, { "epoch": 999.9342105263158, "grad_norm": 1.6633069515228271, "learning_rate": 0.0001, "loss": 0.0119, "step": 151990 }, { "epoch": 1000.0, "grad_norm": 1.5345690250396729, "learning_rate": 0.0001, "loss": 0.0094, "step": 152000 }, { "epoch": 1000.0657894736842, "grad_norm": 1.1567022800445557, "learning_rate": 0.0001, "loss": 0.0163, "step": 152010 }, { "epoch": 1000.1315789473684, "grad_norm": 1.4405932426452637, "learning_rate": 0.0001, "loss": 0.0151, "step": 152020 }, { "epoch": 1000.1973684210526, "grad_norm": 1.638080358505249, "learning_rate": 0.0001, "loss": 0.0127, "step": 152030 }, { "epoch": 1000.2631578947369, "grad_norm": 1.3180837631225586, "learning_rate": 0.0001, "loss": 0.0101, "step": 152040 }, { "epoch": 1000.328947368421, "grad_norm": 1.6041427850723267, "learning_rate": 0.0001, "loss": 0.011, "step": 152050 }, { "epoch": 1000.3947368421053, "grad_norm": 1.2026602029800415, "learning_rate": 0.0001, "loss": 0.0123, "step": 152060 }, { "epoch": 1000.4605263157895, "grad_norm": 1.7016782760620117, "learning_rate": 0.0001, "loss": 0.0139, "step": 152070 }, { "epoch": 1000.5263157894736, "grad_norm": 1.8837673664093018, "learning_rate": 0.0001, "loss": 0.0103, "step": 152080 }, { "epoch": 1000.5921052631579, "grad_norm": 1.1273716688156128, "learning_rate": 0.0001, "loss": 0.0131, "step": 152090 }, { "epoch": 1000.6578947368421, "grad_norm": 1.3600468635559082, "learning_rate": 0.0001, "loss": 0.0157, "step": 152100 }, { "epoch": 1000.7236842105264, "grad_norm": 1.5845402479171753, "learning_rate": 0.0001, "loss": 0.0104, "step": 152110 }, { "epoch": 1000.7894736842105, "grad_norm": 2.0189807415008545, "learning_rate": 0.0001, "loss": 0.0121, "step": 152120 }, { "epoch": 1000.8552631578947, "grad_norm": 1.299301266670227, "learning_rate": 0.0001, "loss": 0.0141, "step": 152130 }, { "epoch": 1000.921052631579, "grad_norm": 1.4232205152511597, "learning_rate": 0.0001, "loss": 0.0112, "step": 152140 }, { "epoch": 1000.9868421052631, "grad_norm": 1.3693761825561523, "learning_rate": 0.0001, "loss": 0.011, "step": 152150 }, { "epoch": 1001.0526315789474, "grad_norm": 1.3892920017242432, "learning_rate": 0.0001, "loss": 0.0097, "step": 152160 }, { "epoch": 1001.1184210526316, "grad_norm": 1.1482216119766235, "learning_rate": 0.0001, "loss": 0.0174, "step": 152170 }, { "epoch": 1001.1842105263158, "grad_norm": 1.4066424369812012, "learning_rate": 0.0001, "loss": 0.0129, "step": 152180 }, { "epoch": 1001.25, "grad_norm": 1.0863428115844727, "learning_rate": 0.0001, "loss": 0.0127, "step": 152190 }, { "epoch": 1001.3157894736842, "grad_norm": 1.3137198686599731, "learning_rate": 0.0001, "loss": 0.0115, "step": 152200 }, { "epoch": 1001.3815789473684, "grad_norm": 1.3804404735565186, "learning_rate": 0.0001, "loss": 0.0129, "step": 152210 }, { "epoch": 1001.4473684210526, "grad_norm": 1.7825998067855835, "learning_rate": 0.0001, "loss": 0.0132, "step": 152220 }, { "epoch": 1001.5131578947369, "grad_norm": 1.385115623474121, "learning_rate": 0.0001, "loss": 0.0138, "step": 152230 }, { "epoch": 1001.578947368421, "grad_norm": 1.6626554727554321, "learning_rate": 0.0001, "loss": 0.013, "step": 152240 }, { "epoch": 1001.6447368421053, "grad_norm": 1.4887330532073975, "learning_rate": 0.0001, "loss": 0.0114, "step": 152250 }, { "epoch": 1001.7105263157895, "grad_norm": 1.317061185836792, "learning_rate": 0.0001, "loss": 0.011, "step": 152260 }, { "epoch": 1001.7763157894736, "grad_norm": 1.2880586385726929, "learning_rate": 0.0001, "loss": 0.0147, "step": 152270 }, { "epoch": 1001.8421052631579, "grad_norm": 1.32847261428833, "learning_rate": 0.0001, "loss": 0.0088, "step": 152280 }, { "epoch": 1001.9078947368421, "grad_norm": 1.3873234987258911, "learning_rate": 0.0001, "loss": 0.0155, "step": 152290 }, { "epoch": 1001.9736842105264, "grad_norm": 1.2566382884979248, "learning_rate": 0.0001, "loss": 0.0117, "step": 152300 }, { "epoch": 1002.0394736842105, "grad_norm": 1.728257656097412, "learning_rate": 0.0001, "loss": 0.0122, "step": 152310 }, { "epoch": 1002.1052631578947, "grad_norm": 1.6938295364379883, "learning_rate": 0.0001, "loss": 0.011, "step": 152320 }, { "epoch": 1002.171052631579, "grad_norm": 1.4881001710891724, "learning_rate": 0.0001, "loss": 0.0109, "step": 152330 }, { "epoch": 1002.2368421052631, "grad_norm": 1.4940035343170166, "learning_rate": 0.0001, "loss": 0.0145, "step": 152340 }, { "epoch": 1002.3026315789474, "grad_norm": 1.4851409196853638, "learning_rate": 0.0001, "loss": 0.0104, "step": 152350 }, { "epoch": 1002.3684210526316, "grad_norm": 1.8183948993682861, "learning_rate": 0.0001, "loss": 0.0186, "step": 152360 }, { "epoch": 1002.4342105263158, "grad_norm": 1.5807534456253052, "learning_rate": 0.0001, "loss": 0.0111, "step": 152370 }, { "epoch": 1002.5, "grad_norm": 1.4670714139938354, "learning_rate": 0.0001, "loss": 0.0122, "step": 152380 }, { "epoch": 1002.5657894736842, "grad_norm": 1.6731432676315308, "learning_rate": 0.0001, "loss": 0.0099, "step": 152390 }, { "epoch": 1002.6315789473684, "grad_norm": 1.1301766633987427, "learning_rate": 0.0001, "loss": 0.0133, "step": 152400 }, { "epoch": 1002.6973684210526, "grad_norm": 1.3771156072616577, "learning_rate": 0.0001, "loss": 0.0138, "step": 152410 }, { "epoch": 1002.7631578947369, "grad_norm": 1.5047788619995117, "learning_rate": 0.0001, "loss": 0.0123, "step": 152420 }, { "epoch": 1002.828947368421, "grad_norm": 1.266355276107788, "learning_rate": 0.0001, "loss": 0.012, "step": 152430 }, { "epoch": 1002.8947368421053, "grad_norm": 1.3311737775802612, "learning_rate": 0.0001, "loss": 0.0119, "step": 152440 }, { "epoch": 1002.9605263157895, "grad_norm": 1.4275450706481934, "learning_rate": 0.0001, "loss": 0.0128, "step": 152450 }, { "epoch": 1003.0263157894736, "grad_norm": 1.430137276649475, "learning_rate": 0.0001, "loss": 0.0178, "step": 152460 }, { "epoch": 1003.0921052631579, "grad_norm": 1.4255601167678833, "learning_rate": 0.0001, "loss": 0.0115, "step": 152470 }, { "epoch": 1003.1578947368421, "grad_norm": 1.353299856185913, "learning_rate": 0.0001, "loss": 0.0134, "step": 152480 }, { "epoch": 1003.2236842105264, "grad_norm": 1.1773455142974854, "learning_rate": 0.0001, "loss": 0.0121, "step": 152490 }, { "epoch": 1003.2894736842105, "grad_norm": 1.8373653888702393, "learning_rate": 0.0001, "loss": 0.0096, "step": 152500 }, { "epoch": 1003.3552631578947, "grad_norm": 1.8845508098602295, "learning_rate": 0.0001, "loss": 0.0153, "step": 152510 }, { "epoch": 1003.421052631579, "grad_norm": 1.5461983680725098, "learning_rate": 0.0001, "loss": 0.0137, "step": 152520 }, { "epoch": 1003.4868421052631, "grad_norm": 1.4085617065429688, "learning_rate": 0.0001, "loss": 0.0099, "step": 152530 }, { "epoch": 1003.5526315789474, "grad_norm": 1.5273512601852417, "learning_rate": 0.0001, "loss": 0.0173, "step": 152540 }, { "epoch": 1003.6184210526316, "grad_norm": 1.55617094039917, "learning_rate": 0.0001, "loss": 0.013, "step": 152550 }, { "epoch": 1003.6842105263158, "grad_norm": 1.7651923894882202, "learning_rate": 0.0001, "loss": 0.0111, "step": 152560 }, { "epoch": 1003.75, "grad_norm": 2.1127805709838867, "learning_rate": 0.0001, "loss": 0.0107, "step": 152570 }, { "epoch": 1003.8157894736842, "grad_norm": 1.4607994556427002, "learning_rate": 0.0001, "loss": 0.0129, "step": 152580 }, { "epoch": 1003.8815789473684, "grad_norm": 1.5692371129989624, "learning_rate": 0.0001, "loss": 0.0115, "step": 152590 }, { "epoch": 1003.9473684210526, "grad_norm": 1.3831056356430054, "learning_rate": 0.0001, "loss": 0.0117, "step": 152600 }, { "epoch": 1004.0131578947369, "grad_norm": 1.3211033344268799, "learning_rate": 0.0001, "loss": 0.0114, "step": 152610 }, { "epoch": 1004.078947368421, "grad_norm": 1.4140509366989136, "learning_rate": 0.0001, "loss": 0.0115, "step": 152620 }, { "epoch": 1004.1447368421053, "grad_norm": 1.3360843658447266, "learning_rate": 0.0001, "loss": 0.0103, "step": 152630 }, { "epoch": 1004.2105263157895, "grad_norm": 1.6318049430847168, "learning_rate": 0.0001, "loss": 0.0121, "step": 152640 }, { "epoch": 1004.2763157894736, "grad_norm": 1.0978399515151978, "learning_rate": 0.0001, "loss": 0.0137, "step": 152650 }, { "epoch": 1004.3421052631579, "grad_norm": 1.4020347595214844, "learning_rate": 0.0001, "loss": 0.0124, "step": 152660 }, { "epoch": 1004.4078947368421, "grad_norm": 1.66563880443573, "learning_rate": 0.0001, "loss": 0.0102, "step": 152670 }, { "epoch": 1004.4736842105264, "grad_norm": 1.6429922580718994, "learning_rate": 0.0001, "loss": 0.0092, "step": 152680 }, { "epoch": 1004.5394736842105, "grad_norm": 1.5053846836090088, "learning_rate": 0.0001, "loss": 0.0111, "step": 152690 }, { "epoch": 1004.6052631578947, "grad_norm": 1.4411985874176025, "learning_rate": 0.0001, "loss": 0.012, "step": 152700 }, { "epoch": 1004.671052631579, "grad_norm": 1.0861884355545044, "learning_rate": 0.0001, "loss": 0.0166, "step": 152710 }, { "epoch": 1004.7368421052631, "grad_norm": 1.725504994392395, "learning_rate": 0.0001, "loss": 0.0141, "step": 152720 }, { "epoch": 1004.8026315789474, "grad_norm": 1.6039748191833496, "learning_rate": 0.0001, "loss": 0.0118, "step": 152730 }, { "epoch": 1004.8684210526316, "grad_norm": 1.3901615142822266, "learning_rate": 0.0001, "loss": 0.013, "step": 152740 }, { "epoch": 1004.9342105263158, "grad_norm": 1.6065644025802612, "learning_rate": 0.0001, "loss": 0.0148, "step": 152750 }, { "epoch": 1005.0, "grad_norm": 2.0869600772857666, "learning_rate": 0.0001, "loss": 0.0139, "step": 152760 }, { "epoch": 1005.0657894736842, "grad_norm": 1.8810038566589355, "learning_rate": 0.0001, "loss": 0.0178, "step": 152770 }, { "epoch": 1005.1315789473684, "grad_norm": 1.230737566947937, "learning_rate": 0.0001, "loss": 0.0091, "step": 152780 }, { "epoch": 1005.1973684210526, "grad_norm": 1.5230940580368042, "learning_rate": 0.0001, "loss": 0.0153, "step": 152790 }, { "epoch": 1005.2631578947369, "grad_norm": 1.895009994506836, "learning_rate": 0.0001, "loss": 0.0158, "step": 152800 }, { "epoch": 1005.328947368421, "grad_norm": 1.1264668703079224, "learning_rate": 0.0001, "loss": 0.0151, "step": 152810 }, { "epoch": 1005.3947368421053, "grad_norm": 1.802573800086975, "learning_rate": 0.0001, "loss": 0.0141, "step": 152820 }, { "epoch": 1005.4605263157895, "grad_norm": 1.5978095531463623, "learning_rate": 0.0001, "loss": 0.0138, "step": 152830 }, { "epoch": 1005.5263157894736, "grad_norm": 1.7466065883636475, "learning_rate": 0.0001, "loss": 0.0087, "step": 152840 }, { "epoch": 1005.5921052631579, "grad_norm": 1.5217915773391724, "learning_rate": 0.0001, "loss": 0.0133, "step": 152850 }, { "epoch": 1005.6578947368421, "grad_norm": 1.3356971740722656, "learning_rate": 0.0001, "loss": 0.011, "step": 152860 }, { "epoch": 1005.7236842105264, "grad_norm": 1.6171499490737915, "learning_rate": 0.0001, "loss": 0.0112, "step": 152870 }, { "epoch": 1005.7894736842105, "grad_norm": 1.3846542835235596, "learning_rate": 0.0001, "loss": 0.0097, "step": 152880 }, { "epoch": 1005.8552631578947, "grad_norm": 1.1711145639419556, "learning_rate": 0.0001, "loss": 0.0109, "step": 152890 }, { "epoch": 1005.921052631579, "grad_norm": 1.1516964435577393, "learning_rate": 0.0001, "loss": 0.011, "step": 152900 }, { "epoch": 1005.9868421052631, "grad_norm": 1.707839846611023, "learning_rate": 0.0001, "loss": 0.0109, "step": 152910 }, { "epoch": 1006.0526315789474, "grad_norm": 1.4264591932296753, "learning_rate": 0.0001, "loss": 0.0114, "step": 152920 }, { "epoch": 1006.1184210526316, "grad_norm": 1.2010518312454224, "learning_rate": 0.0001, "loss": 0.0126, "step": 152930 }, { "epoch": 1006.1842105263158, "grad_norm": 1.5498546361923218, "learning_rate": 0.0001, "loss": 0.0111, "step": 152940 }, { "epoch": 1006.25, "grad_norm": 1.3190948963165283, "learning_rate": 0.0001, "loss": 0.011, "step": 152950 }, { "epoch": 1006.3157894736842, "grad_norm": 1.5601227283477783, "learning_rate": 0.0001, "loss": 0.0121, "step": 152960 }, { "epoch": 1006.3815789473684, "grad_norm": 1.5625396966934204, "learning_rate": 0.0001, "loss": 0.0122, "step": 152970 }, { "epoch": 1006.4473684210526, "grad_norm": 2.0783395767211914, "learning_rate": 0.0001, "loss": 0.015, "step": 152980 }, { "epoch": 1006.5131578947369, "grad_norm": 1.1399238109588623, "learning_rate": 0.0001, "loss": 0.0115, "step": 152990 }, { "epoch": 1006.578947368421, "grad_norm": 1.452682614326477, "learning_rate": 0.0001, "loss": 0.0125, "step": 153000 }, { "epoch": 1006.6447368421053, "grad_norm": 1.5503461360931396, "learning_rate": 0.0001, "loss": 0.0135, "step": 153010 }, { "epoch": 1006.7105263157895, "grad_norm": 1.6053524017333984, "learning_rate": 0.0001, "loss": 0.0135, "step": 153020 }, { "epoch": 1006.7763157894736, "grad_norm": 1.8017010688781738, "learning_rate": 0.0001, "loss": 0.0107, "step": 153030 }, { "epoch": 1006.8421052631579, "grad_norm": 1.8186235427856445, "learning_rate": 0.0001, "loss": 0.0147, "step": 153040 }, { "epoch": 1006.9078947368421, "grad_norm": 1.390514612197876, "learning_rate": 0.0001, "loss": 0.0098, "step": 153050 }, { "epoch": 1006.9736842105264, "grad_norm": 0.9616032242774963, "learning_rate": 0.0001, "loss": 0.0121, "step": 153060 }, { "epoch": 1007.0394736842105, "grad_norm": 1.2807796001434326, "learning_rate": 0.0001, "loss": 0.0128, "step": 153070 }, { "epoch": 1007.1052631578947, "grad_norm": 1.4000318050384521, "learning_rate": 0.0001, "loss": 0.0121, "step": 153080 }, { "epoch": 1007.171052631579, "grad_norm": 1.488100528717041, "learning_rate": 0.0001, "loss": 0.0106, "step": 153090 }, { "epoch": 1007.2368421052631, "grad_norm": 1.385945200920105, "learning_rate": 0.0001, "loss": 0.0143, "step": 153100 }, { "epoch": 1007.3026315789474, "grad_norm": 1.6753846406936646, "learning_rate": 0.0001, "loss": 0.0134, "step": 153110 }, { "epoch": 1007.3684210526316, "grad_norm": 1.6224547624588013, "learning_rate": 0.0001, "loss": 0.0129, "step": 153120 }, { "epoch": 1007.4342105263158, "grad_norm": 1.36086905002594, "learning_rate": 0.0001, "loss": 0.0132, "step": 153130 }, { "epoch": 1007.5, "grad_norm": 1.5125747919082642, "learning_rate": 0.0001, "loss": 0.0132, "step": 153140 }, { "epoch": 1007.5657894736842, "grad_norm": 1.711441159248352, "learning_rate": 0.0001, "loss": 0.0116, "step": 153150 }, { "epoch": 1007.6315789473684, "grad_norm": 1.3555200099945068, "learning_rate": 0.0001, "loss": 0.0125, "step": 153160 }, { "epoch": 1007.6973684210526, "grad_norm": 1.3760069608688354, "learning_rate": 0.0001, "loss": 0.0155, "step": 153170 }, { "epoch": 1007.7631578947369, "grad_norm": 1.1885364055633545, "learning_rate": 0.0001, "loss": 0.0097, "step": 153180 }, { "epoch": 1007.828947368421, "grad_norm": 1.469584345817566, "learning_rate": 0.0001, "loss": 0.0148, "step": 153190 }, { "epoch": 1007.8947368421053, "grad_norm": 1.11402428150177, "learning_rate": 0.0001, "loss": 0.0086, "step": 153200 }, { "epoch": 1007.9605263157895, "grad_norm": 1.3182264566421509, "learning_rate": 0.0001, "loss": 0.0123, "step": 153210 }, { "epoch": 1008.0263157894736, "grad_norm": 1.510223627090454, "learning_rate": 0.0001, "loss": 0.0123, "step": 153220 }, { "epoch": 1008.0921052631579, "grad_norm": 1.3849061727523804, "learning_rate": 0.0001, "loss": 0.013, "step": 153230 }, { "epoch": 1008.1578947368421, "grad_norm": 1.365692377090454, "learning_rate": 0.0001, "loss": 0.0124, "step": 153240 }, { "epoch": 1008.2236842105264, "grad_norm": 1.1066579818725586, "learning_rate": 0.0001, "loss": 0.0114, "step": 153250 }, { "epoch": 1008.2894736842105, "grad_norm": 1.5905228853225708, "learning_rate": 0.0001, "loss": 0.0112, "step": 153260 }, { "epoch": 1008.3552631578947, "grad_norm": 1.2469727993011475, "learning_rate": 0.0001, "loss": 0.0126, "step": 153270 }, { "epoch": 1008.421052631579, "grad_norm": 1.1468486785888672, "learning_rate": 0.0001, "loss": 0.0134, "step": 153280 }, { "epoch": 1008.4868421052631, "grad_norm": 1.2394890785217285, "learning_rate": 0.0001, "loss": 0.0113, "step": 153290 }, { "epoch": 1008.5526315789474, "grad_norm": 1.4912736415863037, "learning_rate": 0.0001, "loss": 0.011, "step": 153300 }, { "epoch": 1008.6184210526316, "grad_norm": 1.4924973249435425, "learning_rate": 0.0001, "loss": 0.0122, "step": 153310 }, { "epoch": 1008.6842105263158, "grad_norm": 1.3490755558013916, "learning_rate": 0.0001, "loss": 0.0091, "step": 153320 }, { "epoch": 1008.75, "grad_norm": 1.435987114906311, "learning_rate": 0.0001, "loss": 0.0123, "step": 153330 }, { "epoch": 1008.8157894736842, "grad_norm": 1.8214837312698364, "learning_rate": 0.0001, "loss": 0.0134, "step": 153340 }, { "epoch": 1008.8815789473684, "grad_norm": 1.1891599893569946, "learning_rate": 0.0001, "loss": 0.0137, "step": 153350 }, { "epoch": 1008.9473684210526, "grad_norm": 1.3621939420700073, "learning_rate": 0.0001, "loss": 0.0144, "step": 153360 }, { "epoch": 1009.0131578947369, "grad_norm": 1.4373114109039307, "learning_rate": 0.0001, "loss": 0.0158, "step": 153370 }, { "epoch": 1009.078947368421, "grad_norm": 1.4989900588989258, "learning_rate": 0.0001, "loss": 0.013, "step": 153380 }, { "epoch": 1009.1447368421053, "grad_norm": 1.9005622863769531, "learning_rate": 0.0001, "loss": 0.0101, "step": 153390 }, { "epoch": 1009.2105263157895, "grad_norm": 1.5609956979751587, "learning_rate": 0.0001, "loss": 0.0173, "step": 153400 }, { "epoch": 1009.2763157894736, "grad_norm": 1.8356720209121704, "learning_rate": 0.0001, "loss": 0.0166, "step": 153410 }, { "epoch": 1009.3421052631579, "grad_norm": 1.2861074209213257, "learning_rate": 0.0001, "loss": 0.0104, "step": 153420 }, { "epoch": 1009.4078947368421, "grad_norm": 1.9659264087677002, "learning_rate": 0.0001, "loss": 0.0106, "step": 153430 }, { "epoch": 1009.4736842105264, "grad_norm": 1.1312843561172485, "learning_rate": 0.0001, "loss": 0.0125, "step": 153440 }, { "epoch": 1009.5394736842105, "grad_norm": 1.2358123064041138, "learning_rate": 0.0001, "loss": 0.0107, "step": 153450 }, { "epoch": 1009.6052631578947, "grad_norm": 1.3213691711425781, "learning_rate": 0.0001, "loss": 0.0119, "step": 153460 }, { "epoch": 1009.671052631579, "grad_norm": 1.1391417980194092, "learning_rate": 0.0001, "loss": 0.0104, "step": 153470 }, { "epoch": 1009.7368421052631, "grad_norm": 1.0275081396102905, "learning_rate": 0.0001, "loss": 0.0118, "step": 153480 }, { "epoch": 1009.8026315789474, "grad_norm": 1.3866429328918457, "learning_rate": 0.0001, "loss": 0.0151, "step": 153490 }, { "epoch": 1009.8684210526316, "grad_norm": 1.5349925756454468, "learning_rate": 0.0001, "loss": 0.0126, "step": 153500 }, { "epoch": 1009.9342105263158, "grad_norm": 1.7790460586547852, "learning_rate": 0.0001, "loss": 0.0095, "step": 153510 }, { "epoch": 1010.0, "grad_norm": 1.3616803884506226, "learning_rate": 0.0001, "loss": 0.0115, "step": 153520 }, { "epoch": 1010.0657894736842, "grad_norm": 1.5561631917953491, "learning_rate": 0.0001, "loss": 0.0097, "step": 153530 }, { "epoch": 1010.1315789473684, "grad_norm": 1.8692247867584229, "learning_rate": 0.0001, "loss": 0.0123, "step": 153540 }, { "epoch": 1010.1973684210526, "grad_norm": 1.0889755487442017, "learning_rate": 0.0001, "loss": 0.0107, "step": 153550 }, { "epoch": 1010.2631578947369, "grad_norm": 1.3444809913635254, "learning_rate": 0.0001, "loss": 0.018, "step": 153560 }, { "epoch": 1010.328947368421, "grad_norm": 1.7446584701538086, "learning_rate": 0.0001, "loss": 0.0104, "step": 153570 }, { "epoch": 1010.3947368421053, "grad_norm": 1.2040010690689087, "learning_rate": 0.0001, "loss": 0.0137, "step": 153580 }, { "epoch": 1010.4605263157895, "grad_norm": 1.413554310798645, "learning_rate": 0.0001, "loss": 0.015, "step": 153590 }, { "epoch": 1010.5263157894736, "grad_norm": 1.580216884613037, "learning_rate": 0.0001, "loss": 0.0129, "step": 153600 }, { "epoch": 1010.5921052631579, "grad_norm": 1.4029887914657593, "learning_rate": 0.0001, "loss": 0.0131, "step": 153610 }, { "epoch": 1010.6578947368421, "grad_norm": 1.7225489616394043, "learning_rate": 0.0001, "loss": 0.0099, "step": 153620 }, { "epoch": 1010.7236842105264, "grad_norm": 1.4950037002563477, "learning_rate": 0.0001, "loss": 0.0127, "step": 153630 }, { "epoch": 1010.7894736842105, "grad_norm": 1.4830204248428345, "learning_rate": 0.0001, "loss": 0.0115, "step": 153640 }, { "epoch": 1010.8552631578947, "grad_norm": 1.4951541423797607, "learning_rate": 0.0001, "loss": 0.012, "step": 153650 }, { "epoch": 1010.921052631579, "grad_norm": 1.7469168901443481, "learning_rate": 0.0001, "loss": 0.0122, "step": 153660 }, { "epoch": 1010.9868421052631, "grad_norm": 1.2211881875991821, "learning_rate": 0.0001, "loss": 0.0118, "step": 153670 }, { "epoch": 1011.0526315789474, "grad_norm": 1.1614576578140259, "learning_rate": 0.0001, "loss": 0.012, "step": 153680 }, { "epoch": 1011.1184210526316, "grad_norm": 1.168620228767395, "learning_rate": 0.0001, "loss": 0.0133, "step": 153690 }, { "epoch": 1011.1842105263158, "grad_norm": 1.4936285018920898, "learning_rate": 0.0001, "loss": 0.0143, "step": 153700 }, { "epoch": 1011.25, "grad_norm": 1.2096457481384277, "learning_rate": 0.0001, "loss": 0.0162, "step": 153710 }, { "epoch": 1011.3157894736842, "grad_norm": 1.1890270709991455, "learning_rate": 0.0001, "loss": 0.0152, "step": 153720 }, { "epoch": 1011.3815789473684, "grad_norm": 1.3755735158920288, "learning_rate": 0.0001, "loss": 0.0107, "step": 153730 }, { "epoch": 1011.4473684210526, "grad_norm": 1.1856269836425781, "learning_rate": 0.0001, "loss": 0.014, "step": 153740 }, { "epoch": 1011.5131578947369, "grad_norm": 1.7879890203475952, "learning_rate": 0.0001, "loss": 0.0139, "step": 153750 }, { "epoch": 1011.578947368421, "grad_norm": 0.9019443392753601, "learning_rate": 0.0001, "loss": 0.0094, "step": 153760 }, { "epoch": 1011.6447368421053, "grad_norm": 1.4425671100616455, "learning_rate": 0.0001, "loss": 0.0135, "step": 153770 }, { "epoch": 1011.7105263157895, "grad_norm": 1.0872328281402588, "learning_rate": 0.0001, "loss": 0.0171, "step": 153780 }, { "epoch": 1011.7763157894736, "grad_norm": 1.8106046915054321, "learning_rate": 0.0001, "loss": 0.01, "step": 153790 }, { "epoch": 1011.8421052631579, "grad_norm": 1.7438851594924927, "learning_rate": 0.0001, "loss": 0.0096, "step": 153800 }, { "epoch": 1011.9078947368421, "grad_norm": 1.2830860614776611, "learning_rate": 0.0001, "loss": 0.0121, "step": 153810 }, { "epoch": 1011.9736842105264, "grad_norm": 1.25044846534729, "learning_rate": 0.0001, "loss": 0.0108, "step": 153820 }, { "epoch": 1012.0394736842105, "grad_norm": 1.2767047882080078, "learning_rate": 0.0001, "loss": 0.0118, "step": 153830 }, { "epoch": 1012.1052631578947, "grad_norm": 1.2083529233932495, "learning_rate": 0.0001, "loss": 0.0098, "step": 153840 }, { "epoch": 1012.171052631579, "grad_norm": 1.3572767972946167, "learning_rate": 0.0001, "loss": 0.0102, "step": 153850 }, { "epoch": 1012.2368421052631, "grad_norm": 1.3276201486587524, "learning_rate": 0.0001, "loss": 0.0135, "step": 153860 }, { "epoch": 1012.3026315789474, "grad_norm": 1.6305973529815674, "learning_rate": 0.0001, "loss": 0.0093, "step": 153870 }, { "epoch": 1012.3684210526316, "grad_norm": 1.2604589462280273, "learning_rate": 0.0001, "loss": 0.0155, "step": 153880 }, { "epoch": 1012.4342105263158, "grad_norm": 1.3941543102264404, "learning_rate": 0.0001, "loss": 0.0177, "step": 153890 }, { "epoch": 1012.5, "grad_norm": 1.694881558418274, "learning_rate": 0.0001, "loss": 0.0121, "step": 153900 }, { "epoch": 1012.5657894736842, "grad_norm": 1.5868791341781616, "learning_rate": 0.0001, "loss": 0.0158, "step": 153910 }, { "epoch": 1012.6315789473684, "grad_norm": 1.2527388334274292, "learning_rate": 0.0001, "loss": 0.0118, "step": 153920 }, { "epoch": 1012.6973684210526, "grad_norm": 1.6309040784835815, "learning_rate": 0.0001, "loss": 0.0123, "step": 153930 }, { "epoch": 1012.7631578947369, "grad_norm": 1.7905900478363037, "learning_rate": 0.0001, "loss": 0.0121, "step": 153940 }, { "epoch": 1012.828947368421, "grad_norm": 1.0173609256744385, "learning_rate": 0.0001, "loss": 0.0135, "step": 153950 }, { "epoch": 1012.8947368421053, "grad_norm": 1.3872674703598022, "learning_rate": 0.0001, "loss": 0.0132, "step": 153960 }, { "epoch": 1012.9605263157895, "grad_norm": 1.5388840436935425, "learning_rate": 0.0001, "loss": 0.0114, "step": 153970 }, { "epoch": 1013.0263157894736, "grad_norm": 1.6480644941329956, "learning_rate": 0.0001, "loss": 0.0128, "step": 153980 }, { "epoch": 1013.0921052631579, "grad_norm": 1.1007527112960815, "learning_rate": 0.0001, "loss": 0.0096, "step": 153990 }, { "epoch": 1013.1578947368421, "grad_norm": 1.6032131910324097, "learning_rate": 0.0001, "loss": 0.01, "step": 154000 }, { "epoch": 1013.2236842105264, "grad_norm": 1.3920129537582397, "learning_rate": 0.0001, "loss": 0.013, "step": 154010 }, { "epoch": 1013.2894736842105, "grad_norm": 1.2013909816741943, "learning_rate": 0.0001, "loss": 0.0154, "step": 154020 }, { "epoch": 1013.3552631578947, "grad_norm": 1.2499034404754639, "learning_rate": 0.0001, "loss": 0.0126, "step": 154030 }, { "epoch": 1013.421052631579, "grad_norm": 1.3107779026031494, "learning_rate": 0.0001, "loss": 0.0137, "step": 154040 }, { "epoch": 1013.4868421052631, "grad_norm": 0.995363712310791, "learning_rate": 0.0001, "loss": 0.0115, "step": 154050 }, { "epoch": 1013.5526315789474, "grad_norm": 1.1709951162338257, "learning_rate": 0.0001, "loss": 0.0111, "step": 154060 }, { "epoch": 1013.6184210526316, "grad_norm": 1.3044215440750122, "learning_rate": 0.0001, "loss": 0.016, "step": 154070 }, { "epoch": 1013.6842105263158, "grad_norm": 1.1679370403289795, "learning_rate": 0.0001, "loss": 0.0122, "step": 154080 }, { "epoch": 1013.75, "grad_norm": 1.4870814085006714, "learning_rate": 0.0001, "loss": 0.0115, "step": 154090 }, { "epoch": 1013.8157894736842, "grad_norm": 1.4119811058044434, "learning_rate": 0.0001, "loss": 0.012, "step": 154100 }, { "epoch": 1013.8815789473684, "grad_norm": 1.3381848335266113, "learning_rate": 0.0001, "loss": 0.0139, "step": 154110 }, { "epoch": 1013.9473684210526, "grad_norm": 1.2412316799163818, "learning_rate": 0.0001, "loss": 0.0115, "step": 154120 }, { "epoch": 1014.0131578947369, "grad_norm": 1.5553112030029297, "learning_rate": 0.0001, "loss": 0.0169, "step": 154130 }, { "epoch": 1014.078947368421, "grad_norm": 1.3604353666305542, "learning_rate": 0.0001, "loss": 0.0114, "step": 154140 }, { "epoch": 1014.1447368421053, "grad_norm": 1.5805790424346924, "learning_rate": 0.0001, "loss": 0.0141, "step": 154150 }, { "epoch": 1014.2105263157895, "grad_norm": 1.2888113260269165, "learning_rate": 0.0001, "loss": 0.0108, "step": 154160 }, { "epoch": 1014.2763157894736, "grad_norm": 0.9960551857948303, "learning_rate": 0.0001, "loss": 0.0098, "step": 154170 }, { "epoch": 1014.3421052631579, "grad_norm": 1.5314582586288452, "learning_rate": 0.0001, "loss": 0.0118, "step": 154180 }, { "epoch": 1014.4078947368421, "grad_norm": 1.5947290658950806, "learning_rate": 0.0001, "loss": 0.011, "step": 154190 }, { "epoch": 1014.4736842105264, "grad_norm": 1.6806923151016235, "learning_rate": 0.0001, "loss": 0.0093, "step": 154200 }, { "epoch": 1014.5394736842105, "grad_norm": 1.3362884521484375, "learning_rate": 0.0001, "loss": 0.0115, "step": 154210 }, { "epoch": 1014.6052631578947, "grad_norm": 1.2426234483718872, "learning_rate": 0.0001, "loss": 0.0149, "step": 154220 }, { "epoch": 1014.671052631579, "grad_norm": 1.3661725521087646, "learning_rate": 0.0001, "loss": 0.0178, "step": 154230 }, { "epoch": 1014.7368421052631, "grad_norm": 1.7163219451904297, "learning_rate": 0.0001, "loss": 0.0124, "step": 154240 }, { "epoch": 1014.8026315789474, "grad_norm": 1.086313009262085, "learning_rate": 0.0001, "loss": 0.0132, "step": 154250 }, { "epoch": 1014.8684210526316, "grad_norm": 1.533553123474121, "learning_rate": 0.0001, "loss": 0.0129, "step": 154260 }, { "epoch": 1014.9342105263158, "grad_norm": 1.3820143938064575, "learning_rate": 0.0001, "loss": 0.013, "step": 154270 }, { "epoch": 1015.0, "grad_norm": 1.4424455165863037, "learning_rate": 0.0001, "loss": 0.0138, "step": 154280 }, { "epoch": 1015.0657894736842, "grad_norm": 1.3563019037246704, "learning_rate": 0.0001, "loss": 0.0126, "step": 154290 }, { "epoch": 1015.1315789473684, "grad_norm": 1.2366578578948975, "learning_rate": 0.0001, "loss": 0.0122, "step": 154300 }, { "epoch": 1015.1973684210526, "grad_norm": 1.6944180727005005, "learning_rate": 0.0001, "loss": 0.0135, "step": 154310 }, { "epoch": 1015.2631578947369, "grad_norm": 1.7917026281356812, "learning_rate": 0.0001, "loss": 0.0127, "step": 154320 }, { "epoch": 1015.328947368421, "grad_norm": 1.6567447185516357, "learning_rate": 0.0001, "loss": 0.0103, "step": 154330 }, { "epoch": 1015.3947368421053, "grad_norm": 1.3662488460540771, "learning_rate": 0.0001, "loss": 0.013, "step": 154340 }, { "epoch": 1015.4605263157895, "grad_norm": 1.3360621929168701, "learning_rate": 0.0001, "loss": 0.0104, "step": 154350 }, { "epoch": 1015.5263157894736, "grad_norm": 1.2195957899093628, "learning_rate": 0.0001, "loss": 0.0122, "step": 154360 }, { "epoch": 1015.5921052631579, "grad_norm": 1.6565126180648804, "learning_rate": 0.0001, "loss": 0.0173, "step": 154370 }, { "epoch": 1015.6578947368421, "grad_norm": 1.5284736156463623, "learning_rate": 0.0001, "loss": 0.0091, "step": 154380 }, { "epoch": 1015.7236842105264, "grad_norm": 1.3636149168014526, "learning_rate": 0.0001, "loss": 0.0136, "step": 154390 }, { "epoch": 1015.7894736842105, "grad_norm": 1.4908701181411743, "learning_rate": 0.0001, "loss": 0.0106, "step": 154400 }, { "epoch": 1015.8552631578947, "grad_norm": 1.1945390701293945, "learning_rate": 0.0001, "loss": 0.0144, "step": 154410 }, { "epoch": 1015.921052631579, "grad_norm": 1.6412758827209473, "learning_rate": 0.0001, "loss": 0.0116, "step": 154420 }, { "epoch": 1015.9868421052631, "grad_norm": 1.7158865928649902, "learning_rate": 0.0001, "loss": 0.0135, "step": 154430 }, { "epoch": 1016.0526315789474, "grad_norm": 1.636702060699463, "learning_rate": 0.0001, "loss": 0.0137, "step": 154440 }, { "epoch": 1016.1184210526316, "grad_norm": 1.2439113855361938, "learning_rate": 0.0001, "loss": 0.0134, "step": 154450 }, { "epoch": 1016.1842105263158, "grad_norm": 1.7015265226364136, "learning_rate": 0.0001, "loss": 0.0152, "step": 154460 }, { "epoch": 1016.25, "grad_norm": 1.6470087766647339, "learning_rate": 0.0001, "loss": 0.012, "step": 154470 }, { "epoch": 1016.3157894736842, "grad_norm": 1.3456132411956787, "learning_rate": 0.0001, "loss": 0.013, "step": 154480 }, { "epoch": 1016.3815789473684, "grad_norm": 1.1026002168655396, "learning_rate": 0.0001, "loss": 0.0104, "step": 154490 }, { "epoch": 1016.4473684210526, "grad_norm": 1.6196143627166748, "learning_rate": 0.0001, "loss": 0.0128, "step": 154500 }, { "epoch": 1016.5131578947369, "grad_norm": 1.7849818468093872, "learning_rate": 0.0001, "loss": 0.0108, "step": 154510 }, { "epoch": 1016.578947368421, "grad_norm": 1.3578969240188599, "learning_rate": 0.0001, "loss": 0.0094, "step": 154520 }, { "epoch": 1016.6447368421053, "grad_norm": 1.551443099975586, "learning_rate": 0.0001, "loss": 0.0137, "step": 154530 }, { "epoch": 1016.7105263157895, "grad_norm": 1.5172971487045288, "learning_rate": 0.0001, "loss": 0.0175, "step": 154540 }, { "epoch": 1016.7763157894736, "grad_norm": 1.6664410829544067, "learning_rate": 0.0001, "loss": 0.0173, "step": 154550 }, { "epoch": 1016.8421052631579, "grad_norm": 1.4044041633605957, "learning_rate": 0.0001, "loss": 0.0113, "step": 154560 }, { "epoch": 1016.9078947368421, "grad_norm": 1.1968185901641846, "learning_rate": 0.0001, "loss": 0.0102, "step": 154570 }, { "epoch": 1016.9736842105264, "grad_norm": 1.2950619459152222, "learning_rate": 0.0001, "loss": 0.0089, "step": 154580 }, { "epoch": 1017.0394736842105, "grad_norm": 1.6938825845718384, "learning_rate": 0.0001, "loss": 0.0124, "step": 154590 }, { "epoch": 1017.1052631578947, "grad_norm": 1.5722523927688599, "learning_rate": 0.0001, "loss": 0.0139, "step": 154600 }, { "epoch": 1017.171052631579, "grad_norm": 1.3833427429199219, "learning_rate": 0.0001, "loss": 0.011, "step": 154610 }, { "epoch": 1017.2368421052631, "grad_norm": 1.5014735460281372, "learning_rate": 0.0001, "loss": 0.0122, "step": 154620 }, { "epoch": 1017.3026315789474, "grad_norm": 1.273349642753601, "learning_rate": 0.0001, "loss": 0.0113, "step": 154630 }, { "epoch": 1017.3684210526316, "grad_norm": 1.290976643562317, "learning_rate": 0.0001, "loss": 0.0136, "step": 154640 }, { "epoch": 1017.4342105263158, "grad_norm": 1.3881967067718506, "learning_rate": 0.0001, "loss": 0.0109, "step": 154650 }, { "epoch": 1017.5, "grad_norm": 1.403576135635376, "learning_rate": 0.0001, "loss": 0.0121, "step": 154660 }, { "epoch": 1017.5657894736842, "grad_norm": 1.331785798072815, "learning_rate": 0.0001, "loss": 0.0145, "step": 154670 }, { "epoch": 1017.6315789473684, "grad_norm": 1.2251665592193604, "learning_rate": 0.0001, "loss": 0.0119, "step": 154680 }, { "epoch": 1017.6973684210526, "grad_norm": 1.6073216199874878, "learning_rate": 0.0001, "loss": 0.0153, "step": 154690 }, { "epoch": 1017.7631578947369, "grad_norm": 1.7544001340866089, "learning_rate": 0.0001, "loss": 0.0105, "step": 154700 }, { "epoch": 1017.828947368421, "grad_norm": 1.2697007656097412, "learning_rate": 0.0001, "loss": 0.0086, "step": 154710 }, { "epoch": 1017.8947368421053, "grad_norm": 1.3907967805862427, "learning_rate": 0.0001, "loss": 0.0143, "step": 154720 }, { "epoch": 1017.9605263157895, "grad_norm": 1.7824379205703735, "learning_rate": 0.0001, "loss": 0.0142, "step": 154730 }, { "epoch": 1018.0263157894736, "grad_norm": 1.7746797800064087, "learning_rate": 0.0001, "loss": 0.0095, "step": 154740 }, { "epoch": 1018.0921052631579, "grad_norm": 1.6190232038497925, "learning_rate": 0.0001, "loss": 0.0133, "step": 154750 }, { "epoch": 1018.1578947368421, "grad_norm": 1.5060510635375977, "learning_rate": 0.0001, "loss": 0.0132, "step": 154760 }, { "epoch": 1018.2236842105264, "grad_norm": 1.8229182958602905, "learning_rate": 0.0001, "loss": 0.0162, "step": 154770 }, { "epoch": 1018.2894736842105, "grad_norm": 1.5584462881088257, "learning_rate": 0.0001, "loss": 0.0149, "step": 154780 }, { "epoch": 1018.3552631578947, "grad_norm": 0.8637866377830505, "learning_rate": 0.0001, "loss": 0.0087, "step": 154790 }, { "epoch": 1018.421052631579, "grad_norm": 1.3876789808273315, "learning_rate": 0.0001, "loss": 0.0107, "step": 154800 }, { "epoch": 1018.4868421052631, "grad_norm": 0.8624855279922485, "learning_rate": 0.0001, "loss": 0.0092, "step": 154810 }, { "epoch": 1018.5526315789474, "grad_norm": 1.9611936807632446, "learning_rate": 0.0001, "loss": 0.0121, "step": 154820 }, { "epoch": 1018.6184210526316, "grad_norm": 1.7147067785263062, "learning_rate": 0.0001, "loss": 0.0097, "step": 154830 }, { "epoch": 1018.6842105263158, "grad_norm": 1.5242652893066406, "learning_rate": 0.0001, "loss": 0.018, "step": 154840 }, { "epoch": 1018.75, "grad_norm": 1.4496244192123413, "learning_rate": 0.0001, "loss": 0.0095, "step": 154850 }, { "epoch": 1018.8157894736842, "grad_norm": 1.5985974073410034, "learning_rate": 0.0001, "loss": 0.0147, "step": 154860 }, { "epoch": 1018.8815789473684, "grad_norm": 1.2858387231826782, "learning_rate": 0.0001, "loss": 0.012, "step": 154870 }, { "epoch": 1018.9473684210526, "grad_norm": 1.1089131832122803, "learning_rate": 0.0001, "loss": 0.0123, "step": 154880 }, { "epoch": 1019.0131578947369, "grad_norm": 1.5584036111831665, "learning_rate": 0.0001, "loss": 0.0125, "step": 154890 }, { "epoch": 1019.078947368421, "grad_norm": 1.1327495574951172, "learning_rate": 0.0001, "loss": 0.0126, "step": 154900 }, { "epoch": 1019.1447368421053, "grad_norm": 1.227458119392395, "learning_rate": 0.0001, "loss": 0.0117, "step": 154910 }, { "epoch": 1019.2105263157895, "grad_norm": 1.0661594867706299, "learning_rate": 0.0001, "loss": 0.0113, "step": 154920 }, { "epoch": 1019.2763157894736, "grad_norm": 1.4789010286331177, "learning_rate": 0.0001, "loss": 0.0125, "step": 154930 }, { "epoch": 1019.3421052631579, "grad_norm": 1.3471680879592896, "learning_rate": 0.0001, "loss": 0.0145, "step": 154940 }, { "epoch": 1019.4078947368421, "grad_norm": 1.815901517868042, "learning_rate": 0.0001, "loss": 0.0158, "step": 154950 }, { "epoch": 1019.4736842105264, "grad_norm": 1.6797853708267212, "learning_rate": 0.0001, "loss": 0.01, "step": 154960 }, { "epoch": 1019.5394736842105, "grad_norm": 1.3961758613586426, "learning_rate": 0.0001, "loss": 0.0145, "step": 154970 }, { "epoch": 1019.6052631578947, "grad_norm": 1.1569780111312866, "learning_rate": 0.0001, "loss": 0.0109, "step": 154980 }, { "epoch": 1019.671052631579, "grad_norm": 1.1948788166046143, "learning_rate": 0.0001, "loss": 0.0124, "step": 154990 }, { "epoch": 1019.7368421052631, "grad_norm": 1.806797742843628, "learning_rate": 0.0001, "loss": 0.0103, "step": 155000 }, { "epoch": 1019.8026315789474, "grad_norm": 1.576991081237793, "learning_rate": 0.0001, "loss": 0.0166, "step": 155010 }, { "epoch": 1019.8684210526316, "grad_norm": 1.8808319568634033, "learning_rate": 0.0001, "loss": 0.0163, "step": 155020 }, { "epoch": 1019.9342105263158, "grad_norm": 1.8742523193359375, "learning_rate": 0.0001, "loss": 0.0105, "step": 155030 }, { "epoch": 1020.0, "grad_norm": 2.0547053813934326, "learning_rate": 0.0001, "loss": 0.0101, "step": 155040 }, { "epoch": 1020.0657894736842, "grad_norm": 1.5953959226608276, "learning_rate": 0.0001, "loss": 0.0158, "step": 155050 }, { "epoch": 1020.1315789473684, "grad_norm": 1.3166053295135498, "learning_rate": 0.0001, "loss": 0.011, "step": 155060 }, { "epoch": 1020.1973684210526, "grad_norm": 1.2125540971755981, "learning_rate": 0.0001, "loss": 0.0117, "step": 155070 }, { "epoch": 1020.2631578947369, "grad_norm": 1.5531022548675537, "learning_rate": 0.0001, "loss": 0.0122, "step": 155080 }, { "epoch": 1020.328947368421, "grad_norm": 1.2674193382263184, "learning_rate": 0.0001, "loss": 0.0125, "step": 155090 }, { "epoch": 1020.3947368421053, "grad_norm": 1.1785848140716553, "learning_rate": 0.0001, "loss": 0.0103, "step": 155100 }, { "epoch": 1020.4605263157895, "grad_norm": 1.27052903175354, "learning_rate": 0.0001, "loss": 0.0123, "step": 155110 }, { "epoch": 1020.5263157894736, "grad_norm": 1.40677809715271, "learning_rate": 0.0001, "loss": 0.0156, "step": 155120 }, { "epoch": 1020.5921052631579, "grad_norm": 1.7009413242340088, "learning_rate": 0.0001, "loss": 0.0145, "step": 155130 }, { "epoch": 1020.6578947368421, "grad_norm": 1.3469345569610596, "learning_rate": 0.0001, "loss": 0.0114, "step": 155140 }, { "epoch": 1020.7236842105264, "grad_norm": 1.3070282936096191, "learning_rate": 0.0001, "loss": 0.0123, "step": 155150 }, { "epoch": 1020.7894736842105, "grad_norm": 1.1965770721435547, "learning_rate": 0.0001, "loss": 0.0115, "step": 155160 }, { "epoch": 1020.8552631578947, "grad_norm": 1.1749569177627563, "learning_rate": 0.0001, "loss": 0.0112, "step": 155170 }, { "epoch": 1020.921052631579, "grad_norm": 1.325510859489441, "learning_rate": 0.0001, "loss": 0.0143, "step": 155180 }, { "epoch": 1020.9868421052631, "grad_norm": 1.3588881492614746, "learning_rate": 0.0001, "loss": 0.01, "step": 155190 }, { "epoch": 1021.0526315789474, "grad_norm": 1.8664320707321167, "learning_rate": 0.0001, "loss": 0.0109, "step": 155200 }, { "epoch": 1021.1184210526316, "grad_norm": 1.4277405738830566, "learning_rate": 0.0001, "loss": 0.0118, "step": 155210 }, { "epoch": 1021.1842105263158, "grad_norm": 1.230811357498169, "learning_rate": 0.0001, "loss": 0.014, "step": 155220 }, { "epoch": 1021.25, "grad_norm": 1.7906233072280884, "learning_rate": 0.0001, "loss": 0.0127, "step": 155230 }, { "epoch": 1021.3157894736842, "grad_norm": 1.1447935104370117, "learning_rate": 0.0001, "loss": 0.0099, "step": 155240 }, { "epoch": 1021.3815789473684, "grad_norm": 1.5616029500961304, "learning_rate": 0.0001, "loss": 0.0126, "step": 155250 }, { "epoch": 1021.4473684210526, "grad_norm": 1.9566364288330078, "learning_rate": 0.0001, "loss": 0.0105, "step": 155260 }, { "epoch": 1021.5131578947369, "grad_norm": 1.8154263496398926, "learning_rate": 0.0001, "loss": 0.0131, "step": 155270 }, { "epoch": 1021.578947368421, "grad_norm": 1.9096550941467285, "learning_rate": 0.0001, "loss": 0.0181, "step": 155280 }, { "epoch": 1021.6447368421053, "grad_norm": 1.883499026298523, "learning_rate": 0.0001, "loss": 0.0128, "step": 155290 }, { "epoch": 1021.7105263157895, "grad_norm": 1.7113292217254639, "learning_rate": 0.0001, "loss": 0.0166, "step": 155300 }, { "epoch": 1021.7763157894736, "grad_norm": 1.6300617456436157, "learning_rate": 0.0001, "loss": 0.0141, "step": 155310 }, { "epoch": 1021.8421052631579, "grad_norm": 1.55172598361969, "learning_rate": 0.0001, "loss": 0.0115, "step": 155320 }, { "epoch": 1021.9078947368421, "grad_norm": 1.4255715608596802, "learning_rate": 0.0001, "loss": 0.0104, "step": 155330 }, { "epoch": 1021.9736842105264, "grad_norm": 1.4711692333221436, "learning_rate": 0.0001, "loss": 0.0097, "step": 155340 }, { "epoch": 1022.0394736842105, "grad_norm": 1.4721198081970215, "learning_rate": 0.0001, "loss": 0.0112, "step": 155350 }, { "epoch": 1022.1052631578947, "grad_norm": 1.4370471239089966, "learning_rate": 0.0001, "loss": 0.0115, "step": 155360 }, { "epoch": 1022.171052631579, "grad_norm": 1.6619291305541992, "learning_rate": 0.0001, "loss": 0.0106, "step": 155370 }, { "epoch": 1022.2368421052631, "grad_norm": 1.8503707647323608, "learning_rate": 0.0001, "loss": 0.0157, "step": 155380 }, { "epoch": 1022.3026315789474, "grad_norm": 1.4189090728759766, "learning_rate": 0.0001, "loss": 0.0124, "step": 155390 }, { "epoch": 1022.3684210526316, "grad_norm": 1.9012892246246338, "learning_rate": 0.0001, "loss": 0.0153, "step": 155400 }, { "epoch": 1022.4342105263158, "grad_norm": 1.1739012002944946, "learning_rate": 0.0001, "loss": 0.0133, "step": 155410 }, { "epoch": 1022.5, "grad_norm": 1.828318476676941, "learning_rate": 0.0001, "loss": 0.0084, "step": 155420 }, { "epoch": 1022.5657894736842, "grad_norm": 1.987284779548645, "learning_rate": 0.0001, "loss": 0.0133, "step": 155430 }, { "epoch": 1022.6315789473684, "grad_norm": 1.6498397588729858, "learning_rate": 0.0001, "loss": 0.0104, "step": 155440 }, { "epoch": 1022.6973684210526, "grad_norm": 1.3640129566192627, "learning_rate": 0.0001, "loss": 0.0118, "step": 155450 }, { "epoch": 1022.7631578947369, "grad_norm": 1.6436846256256104, "learning_rate": 0.0001, "loss": 0.0151, "step": 155460 }, { "epoch": 1022.828947368421, "grad_norm": 1.4848088026046753, "learning_rate": 0.0001, "loss": 0.0105, "step": 155470 }, { "epoch": 1022.8947368421053, "grad_norm": 1.5140533447265625, "learning_rate": 0.0001, "loss": 0.0108, "step": 155480 }, { "epoch": 1022.9605263157895, "grad_norm": 1.102616786956787, "learning_rate": 0.0001, "loss": 0.0119, "step": 155490 }, { "epoch": 1023.0263157894736, "grad_norm": 1.0315169095993042, "learning_rate": 0.0001, "loss": 0.0098, "step": 155500 }, { "epoch": 1023.0921052631579, "grad_norm": 1.166774868965149, "learning_rate": 0.0001, "loss": 0.013, "step": 155510 }, { "epoch": 1023.1578947368421, "grad_norm": 1.6017858982086182, "learning_rate": 0.0001, "loss": 0.0123, "step": 155520 }, { "epoch": 1023.2236842105264, "grad_norm": 1.2849953174591064, "learning_rate": 0.0001, "loss": 0.0105, "step": 155530 }, { "epoch": 1023.2894736842105, "grad_norm": 0.9910774827003479, "learning_rate": 0.0001, "loss": 0.0105, "step": 155540 }, { "epoch": 1023.3552631578947, "grad_norm": 1.694897174835205, "learning_rate": 0.0001, "loss": 0.0106, "step": 155550 }, { "epoch": 1023.421052631579, "grad_norm": 1.4341158866882324, "learning_rate": 0.0001, "loss": 0.0122, "step": 155560 }, { "epoch": 1023.4868421052631, "grad_norm": 1.382759928703308, "learning_rate": 0.0001, "loss": 0.009, "step": 155570 }, { "epoch": 1023.5526315789474, "grad_norm": 1.3107826709747314, "learning_rate": 0.0001, "loss": 0.0142, "step": 155580 }, { "epoch": 1023.6184210526316, "grad_norm": 1.5516494512557983, "learning_rate": 0.0001, "loss": 0.0129, "step": 155590 }, { "epoch": 1023.6842105263158, "grad_norm": 1.2369507551193237, "learning_rate": 0.0001, "loss": 0.0136, "step": 155600 }, { "epoch": 1023.75, "grad_norm": 1.7611979246139526, "learning_rate": 0.0001, "loss": 0.0163, "step": 155610 }, { "epoch": 1023.8157894736842, "grad_norm": 1.8483986854553223, "learning_rate": 0.0001, "loss": 0.0113, "step": 155620 }, { "epoch": 1023.8815789473684, "grad_norm": 1.7276570796966553, "learning_rate": 0.0001, "loss": 0.0116, "step": 155630 }, { "epoch": 1023.9473684210526, "grad_norm": 1.418904185295105, "learning_rate": 0.0001, "loss": 0.0176, "step": 155640 }, { "epoch": 1024.0131578947369, "grad_norm": 1.5527732372283936, "learning_rate": 0.0001, "loss": 0.0114, "step": 155650 }, { "epoch": 1024.078947368421, "grad_norm": 1.1616294384002686, "learning_rate": 0.0001, "loss": 0.0197, "step": 155660 }, { "epoch": 1024.1447368421052, "grad_norm": 1.4885900020599365, "learning_rate": 0.0001, "loss": 0.0122, "step": 155670 }, { "epoch": 1024.2105263157894, "grad_norm": 0.9834877252578735, "learning_rate": 0.0001, "loss": 0.011, "step": 155680 }, { "epoch": 1024.2763157894738, "grad_norm": 1.2369673252105713, "learning_rate": 0.0001, "loss": 0.0142, "step": 155690 }, { "epoch": 1024.342105263158, "grad_norm": 1.3597097396850586, "learning_rate": 0.0001, "loss": 0.0108, "step": 155700 }, { "epoch": 1024.407894736842, "grad_norm": 1.1322755813598633, "learning_rate": 0.0001, "loss": 0.014, "step": 155710 }, { "epoch": 1024.4736842105262, "grad_norm": 1.3093804121017456, "learning_rate": 0.0001, "loss": 0.0115, "step": 155720 }, { "epoch": 1024.5394736842106, "grad_norm": 1.296202301979065, "learning_rate": 0.0001, "loss": 0.0149, "step": 155730 }, { "epoch": 1024.6052631578948, "grad_norm": 1.630517601966858, "learning_rate": 0.0001, "loss": 0.0104, "step": 155740 }, { "epoch": 1024.671052631579, "grad_norm": 1.5312964916229248, "learning_rate": 0.0001, "loss": 0.0132, "step": 155750 }, { "epoch": 1024.7368421052631, "grad_norm": 1.6595672369003296, "learning_rate": 0.0001, "loss": 0.0094, "step": 155760 }, { "epoch": 1024.8026315789473, "grad_norm": 1.5563298463821411, "learning_rate": 0.0001, "loss": 0.0086, "step": 155770 }, { "epoch": 1024.8684210526317, "grad_norm": 1.0908845663070679, "learning_rate": 0.0001, "loss": 0.0133, "step": 155780 }, { "epoch": 1024.9342105263158, "grad_norm": 1.1521327495574951, "learning_rate": 0.0001, "loss": 0.0122, "step": 155790 }, { "epoch": 1025.0, "grad_norm": 1.822587490081787, "learning_rate": 0.0001, "loss": 0.0106, "step": 155800 }, { "epoch": 1025.0657894736842, "grad_norm": 1.556991457939148, "learning_rate": 0.0001, "loss": 0.0141, "step": 155810 }, { "epoch": 1025.1315789473683, "grad_norm": 1.473217487335205, "learning_rate": 0.0001, "loss": 0.0106, "step": 155820 }, { "epoch": 1025.1973684210527, "grad_norm": 1.6251413822174072, "learning_rate": 0.0001, "loss": 0.0109, "step": 155830 }, { "epoch": 1025.2631578947369, "grad_norm": 1.760446548461914, "learning_rate": 0.0001, "loss": 0.0107, "step": 155840 }, { "epoch": 1025.328947368421, "grad_norm": 1.6307578086853027, "learning_rate": 0.0001, "loss": 0.015, "step": 155850 }, { "epoch": 1025.3947368421052, "grad_norm": 1.5826685428619385, "learning_rate": 0.0001, "loss": 0.0102, "step": 155860 }, { "epoch": 1025.4605263157894, "grad_norm": 1.542696237564087, "learning_rate": 0.0001, "loss": 0.015, "step": 155870 }, { "epoch": 1025.5263157894738, "grad_norm": 1.4685333967208862, "learning_rate": 0.0001, "loss": 0.011, "step": 155880 }, { "epoch": 1025.592105263158, "grad_norm": 1.2119169235229492, "learning_rate": 0.0001, "loss": 0.0148, "step": 155890 }, { "epoch": 1025.657894736842, "grad_norm": 1.6685466766357422, "learning_rate": 0.0001, "loss": 0.0128, "step": 155900 }, { "epoch": 1025.7236842105262, "grad_norm": 1.85104501247406, "learning_rate": 0.0001, "loss": 0.0101, "step": 155910 }, { "epoch": 1025.7894736842106, "grad_norm": 1.0697617530822754, "learning_rate": 0.0001, "loss": 0.0132, "step": 155920 }, { "epoch": 1025.8552631578948, "grad_norm": 1.3053607940673828, "learning_rate": 0.0001, "loss": 0.0159, "step": 155930 }, { "epoch": 1025.921052631579, "grad_norm": 1.6305280923843384, "learning_rate": 0.0001, "loss": 0.0113, "step": 155940 }, { "epoch": 1025.9868421052631, "grad_norm": 1.625167727470398, "learning_rate": 0.0001, "loss": 0.0106, "step": 155950 }, { "epoch": 1026.0526315789473, "grad_norm": 1.804389476776123, "learning_rate": 0.0001, "loss": 0.0105, "step": 155960 }, { "epoch": 1026.1184210526317, "grad_norm": 1.4287550449371338, "learning_rate": 0.0001, "loss": 0.0109, "step": 155970 }, { "epoch": 1026.1842105263158, "grad_norm": 1.7832714319229126, "learning_rate": 0.0001, "loss": 0.0092, "step": 155980 }, { "epoch": 1026.25, "grad_norm": 1.6458430290222168, "learning_rate": 0.0001, "loss": 0.0136, "step": 155990 }, { "epoch": 1026.3157894736842, "grad_norm": 1.6489216089248657, "learning_rate": 0.0001, "loss": 0.0128, "step": 156000 }, { "epoch": 1026.3815789473683, "grad_norm": 1.4507073163986206, "learning_rate": 0.0001, "loss": 0.0151, "step": 156010 }, { "epoch": 1026.4473684210527, "grad_norm": 1.0675874948501587, "learning_rate": 0.0001, "loss": 0.0141, "step": 156020 }, { "epoch": 1026.5131578947369, "grad_norm": 1.458503246307373, "learning_rate": 0.0001, "loss": 0.0115, "step": 156030 }, { "epoch": 1026.578947368421, "grad_norm": 1.5939960479736328, "learning_rate": 0.0001, "loss": 0.016, "step": 156040 }, { "epoch": 1026.6447368421052, "grad_norm": 1.4954296350479126, "learning_rate": 0.0001, "loss": 0.0109, "step": 156050 }, { "epoch": 1026.7105263157894, "grad_norm": 1.7538517713546753, "learning_rate": 0.0001, "loss": 0.0128, "step": 156060 }, { "epoch": 1026.7763157894738, "grad_norm": 1.3879808187484741, "learning_rate": 0.0001, "loss": 0.0115, "step": 156070 }, { "epoch": 1026.842105263158, "grad_norm": 1.0332587957382202, "learning_rate": 0.0001, "loss": 0.012, "step": 156080 }, { "epoch": 1026.907894736842, "grad_norm": 0.9590634107589722, "learning_rate": 0.0001, "loss": 0.0122, "step": 156090 }, { "epoch": 1026.9736842105262, "grad_norm": 1.5962104797363281, "learning_rate": 0.0001, "loss": 0.0114, "step": 156100 }, { "epoch": 1027.0394736842106, "grad_norm": 1.4015371799468994, "learning_rate": 0.0001, "loss": 0.0117, "step": 156110 }, { "epoch": 1027.1052631578948, "grad_norm": 1.308194637298584, "learning_rate": 0.0001, "loss": 0.0113, "step": 156120 }, { "epoch": 1027.171052631579, "grad_norm": 0.9533730745315552, "learning_rate": 0.0001, "loss": 0.0157, "step": 156130 }, { "epoch": 1027.2368421052631, "grad_norm": 1.4011067152023315, "learning_rate": 0.0001, "loss": 0.0138, "step": 156140 }, { "epoch": 1027.3026315789473, "grad_norm": 1.7002278566360474, "learning_rate": 0.0001, "loss": 0.0111, "step": 156150 }, { "epoch": 1027.3684210526317, "grad_norm": 1.068581461906433, "learning_rate": 0.0001, "loss": 0.0105, "step": 156160 }, { "epoch": 1027.4342105263158, "grad_norm": 1.1607362031936646, "learning_rate": 0.0001, "loss": 0.0141, "step": 156170 }, { "epoch": 1027.5, "grad_norm": 1.4788697957992554, "learning_rate": 0.0001, "loss": 0.0138, "step": 156180 }, { "epoch": 1027.5657894736842, "grad_norm": 1.7062106132507324, "learning_rate": 0.0001, "loss": 0.0111, "step": 156190 }, { "epoch": 1027.6315789473683, "grad_norm": 1.5117179155349731, "learning_rate": 0.0001, "loss": 0.0146, "step": 156200 }, { "epoch": 1027.6973684210527, "grad_norm": 1.4721914529800415, "learning_rate": 0.0001, "loss": 0.0133, "step": 156210 }, { "epoch": 1027.7631578947369, "grad_norm": 1.260057806968689, "learning_rate": 0.0001, "loss": 0.0113, "step": 156220 }, { "epoch": 1027.828947368421, "grad_norm": 1.5354093313217163, "learning_rate": 0.0001, "loss": 0.0106, "step": 156230 }, { "epoch": 1027.8947368421052, "grad_norm": 1.1288377046585083, "learning_rate": 0.0001, "loss": 0.0129, "step": 156240 }, { "epoch": 1027.9605263157894, "grad_norm": 1.2869876623153687, "learning_rate": 0.0001, "loss": 0.014, "step": 156250 }, { "epoch": 1028.0263157894738, "grad_norm": 1.3343485593795776, "learning_rate": 0.0001, "loss": 0.0143, "step": 156260 }, { "epoch": 1028.092105263158, "grad_norm": 1.1431965827941895, "learning_rate": 0.0001, "loss": 0.0109, "step": 156270 }, { "epoch": 1028.157894736842, "grad_norm": 1.375058650970459, "learning_rate": 0.0001, "loss": 0.0178, "step": 156280 }, { "epoch": 1028.2236842105262, "grad_norm": 1.2232561111450195, "learning_rate": 0.0001, "loss": 0.011, "step": 156290 }, { "epoch": 1028.2894736842106, "grad_norm": 1.176246166229248, "learning_rate": 0.0001, "loss": 0.0131, "step": 156300 }, { "epoch": 1028.3552631578948, "grad_norm": 1.4144911766052246, "learning_rate": 0.0001, "loss": 0.0134, "step": 156310 }, { "epoch": 1028.421052631579, "grad_norm": 1.6198796033859253, "learning_rate": 0.0001, "loss": 0.0106, "step": 156320 }, { "epoch": 1028.4868421052631, "grad_norm": 1.1314548254013062, "learning_rate": 0.0001, "loss": 0.0128, "step": 156330 }, { "epoch": 1028.5526315789473, "grad_norm": 1.3994193077087402, "learning_rate": 0.0001, "loss": 0.0103, "step": 156340 }, { "epoch": 1028.6184210526317, "grad_norm": 1.426994800567627, "learning_rate": 0.0001, "loss": 0.0152, "step": 156350 }, { "epoch": 1028.6842105263158, "grad_norm": 1.4191983938217163, "learning_rate": 0.0001, "loss": 0.0138, "step": 156360 }, { "epoch": 1028.75, "grad_norm": 1.1034897565841675, "learning_rate": 0.0001, "loss": 0.012, "step": 156370 }, { "epoch": 1028.8157894736842, "grad_norm": 1.7325797080993652, "learning_rate": 0.0001, "loss": 0.0091, "step": 156380 }, { "epoch": 1028.8815789473683, "grad_norm": 1.6963586807250977, "learning_rate": 0.0001, "loss": 0.013, "step": 156390 }, { "epoch": 1028.9473684210527, "grad_norm": 1.7776670455932617, "learning_rate": 0.0001, "loss": 0.0137, "step": 156400 }, { "epoch": 1029.0131578947369, "grad_norm": 1.2211240530014038, "learning_rate": 0.0001, "loss": 0.0093, "step": 156410 }, { "epoch": 1029.078947368421, "grad_norm": 1.4897273778915405, "learning_rate": 0.0001, "loss": 0.0149, "step": 156420 }, { "epoch": 1029.1447368421052, "grad_norm": 1.218395709991455, "learning_rate": 0.0001, "loss": 0.013, "step": 156430 }, { "epoch": 1029.2105263157894, "grad_norm": 1.3754785060882568, "learning_rate": 0.0001, "loss": 0.0124, "step": 156440 }, { "epoch": 1029.2763157894738, "grad_norm": 1.207905650138855, "learning_rate": 0.0001, "loss": 0.0106, "step": 156450 }, { "epoch": 1029.342105263158, "grad_norm": 1.548630952835083, "learning_rate": 0.0001, "loss": 0.012, "step": 156460 }, { "epoch": 1029.407894736842, "grad_norm": 1.3136160373687744, "learning_rate": 0.0001, "loss": 0.0125, "step": 156470 }, { "epoch": 1029.4736842105262, "grad_norm": 1.3996254205703735, "learning_rate": 0.0001, "loss": 0.0098, "step": 156480 }, { "epoch": 1029.5394736842106, "grad_norm": 1.0270999670028687, "learning_rate": 0.0001, "loss": 0.0158, "step": 156490 }, { "epoch": 1029.6052631578948, "grad_norm": 1.384529709815979, "learning_rate": 0.0001, "loss": 0.0139, "step": 156500 }, { "epoch": 1029.671052631579, "grad_norm": 1.464526653289795, "learning_rate": 0.0001, "loss": 0.0133, "step": 156510 }, { "epoch": 1029.7368421052631, "grad_norm": 1.8850371837615967, "learning_rate": 0.0001, "loss": 0.0099, "step": 156520 }, { "epoch": 1029.8026315789473, "grad_norm": 1.910714864730835, "learning_rate": 0.0001, "loss": 0.0162, "step": 156530 }, { "epoch": 1029.8684210526317, "grad_norm": 1.867448329925537, "learning_rate": 0.0001, "loss": 0.0097, "step": 156540 }, { "epoch": 1029.9342105263158, "grad_norm": 1.6496548652648926, "learning_rate": 0.0001, "loss": 0.0141, "step": 156550 }, { "epoch": 1030.0, "grad_norm": 1.3961589336395264, "learning_rate": 0.0001, "loss": 0.0119, "step": 156560 }, { "epoch": 1030.0657894736842, "grad_norm": 1.6133633852005005, "learning_rate": 0.0001, "loss": 0.011, "step": 156570 }, { "epoch": 1030.1315789473683, "grad_norm": 2.037630558013916, "learning_rate": 0.0001, "loss": 0.0121, "step": 156580 }, { "epoch": 1030.1973684210527, "grad_norm": 1.490082025527954, "learning_rate": 0.0001, "loss": 0.0127, "step": 156590 }, { "epoch": 1030.2631578947369, "grad_norm": 1.3560906648635864, "learning_rate": 0.0001, "loss": 0.0131, "step": 156600 }, { "epoch": 1030.328947368421, "grad_norm": 1.6531617641448975, "learning_rate": 0.0001, "loss": 0.0118, "step": 156610 }, { "epoch": 1030.3947368421052, "grad_norm": 1.4421679973602295, "learning_rate": 0.0001, "loss": 0.0111, "step": 156620 }, { "epoch": 1030.4605263157894, "grad_norm": 1.6645219326019287, "learning_rate": 0.0001, "loss": 0.0099, "step": 156630 }, { "epoch": 1030.5263157894738, "grad_norm": 1.2244065999984741, "learning_rate": 0.0001, "loss": 0.0158, "step": 156640 }, { "epoch": 1030.592105263158, "grad_norm": 1.3549281358718872, "learning_rate": 0.0001, "loss": 0.0137, "step": 156650 }, { "epoch": 1030.657894736842, "grad_norm": 1.68430757522583, "learning_rate": 0.0001, "loss": 0.0108, "step": 156660 }, { "epoch": 1030.7236842105262, "grad_norm": 1.4212641716003418, "learning_rate": 0.0001, "loss": 0.0114, "step": 156670 }, { "epoch": 1030.7894736842106, "grad_norm": 1.2633494138717651, "learning_rate": 0.0001, "loss": 0.0107, "step": 156680 }, { "epoch": 1030.8552631578948, "grad_norm": 1.5187357664108276, "learning_rate": 0.0001, "loss": 0.0103, "step": 156690 }, { "epoch": 1030.921052631579, "grad_norm": 1.671228051185608, "learning_rate": 0.0001, "loss": 0.0114, "step": 156700 }, { "epoch": 1030.9868421052631, "grad_norm": 1.351223111152649, "learning_rate": 0.0001, "loss": 0.0166, "step": 156710 }, { "epoch": 1031.0526315789473, "grad_norm": 1.5529645681381226, "learning_rate": 0.0001, "loss": 0.0157, "step": 156720 }, { "epoch": 1031.1184210526317, "grad_norm": 1.2730921506881714, "learning_rate": 0.0001, "loss": 0.0105, "step": 156730 }, { "epoch": 1031.1842105263158, "grad_norm": 1.6821335554122925, "learning_rate": 0.0001, "loss": 0.015, "step": 156740 }, { "epoch": 1031.25, "grad_norm": 1.7149409055709839, "learning_rate": 0.0001, "loss": 0.0116, "step": 156750 }, { "epoch": 1031.3157894736842, "grad_norm": 1.3939000368118286, "learning_rate": 0.0001, "loss": 0.0112, "step": 156760 }, { "epoch": 1031.3815789473683, "grad_norm": 1.6725808382034302, "learning_rate": 0.0001, "loss": 0.0092, "step": 156770 }, { "epoch": 1031.4473684210527, "grad_norm": 1.5340347290039062, "learning_rate": 0.0001, "loss": 0.0113, "step": 156780 }, { "epoch": 1031.5131578947369, "grad_norm": 1.5184276103973389, "learning_rate": 0.0001, "loss": 0.01, "step": 156790 }, { "epoch": 1031.578947368421, "grad_norm": 1.2978358268737793, "learning_rate": 0.0001, "loss": 0.0115, "step": 156800 }, { "epoch": 1031.6447368421052, "grad_norm": 1.6736880540847778, "learning_rate": 0.0001, "loss": 0.0104, "step": 156810 }, { "epoch": 1031.7105263157894, "grad_norm": 1.3117183446884155, "learning_rate": 0.0001, "loss": 0.0127, "step": 156820 }, { "epoch": 1031.7763157894738, "grad_norm": 1.7468225955963135, "learning_rate": 0.0001, "loss": 0.0116, "step": 156830 }, { "epoch": 1031.842105263158, "grad_norm": 1.5734254121780396, "learning_rate": 0.0001, "loss": 0.0138, "step": 156840 }, { "epoch": 1031.907894736842, "grad_norm": 1.2425897121429443, "learning_rate": 0.0001, "loss": 0.0147, "step": 156850 }, { "epoch": 1031.9736842105262, "grad_norm": 1.3100208044052124, "learning_rate": 0.0001, "loss": 0.018, "step": 156860 }, { "epoch": 1032.0394736842106, "grad_norm": 1.3039387464523315, "learning_rate": 0.0001, "loss": 0.0127, "step": 156870 }, { "epoch": 1032.1052631578948, "grad_norm": 1.319002389907837, "learning_rate": 0.0001, "loss": 0.013, "step": 156880 }, { "epoch": 1032.171052631579, "grad_norm": 1.8581159114837646, "learning_rate": 0.0001, "loss": 0.0138, "step": 156890 }, { "epoch": 1032.2368421052631, "grad_norm": 1.2123470306396484, "learning_rate": 0.0001, "loss": 0.0118, "step": 156900 }, { "epoch": 1032.3026315789473, "grad_norm": 1.2396160364151, "learning_rate": 0.0001, "loss": 0.0111, "step": 156910 }, { "epoch": 1032.3684210526317, "grad_norm": 1.0413413047790527, "learning_rate": 0.0001, "loss": 0.0113, "step": 156920 }, { "epoch": 1032.4342105263158, "grad_norm": 1.3097667694091797, "learning_rate": 0.0001, "loss": 0.0103, "step": 156930 }, { "epoch": 1032.5, "grad_norm": 1.5635261535644531, "learning_rate": 0.0001, "loss": 0.0119, "step": 156940 }, { "epoch": 1032.5657894736842, "grad_norm": 1.261675477027893, "learning_rate": 0.0001, "loss": 0.0165, "step": 156950 }, { "epoch": 1032.6315789473683, "grad_norm": 1.428641676902771, "learning_rate": 0.0001, "loss": 0.012, "step": 156960 }, { "epoch": 1032.6973684210527, "grad_norm": 1.3594074249267578, "learning_rate": 0.0001, "loss": 0.0098, "step": 156970 }, { "epoch": 1032.7631578947369, "grad_norm": 1.1234440803527832, "learning_rate": 0.0001, "loss": 0.0098, "step": 156980 }, { "epoch": 1032.828947368421, "grad_norm": 1.512653112411499, "learning_rate": 0.0001, "loss": 0.0146, "step": 156990 }, { "epoch": 1032.8947368421052, "grad_norm": 1.6033166646957397, "learning_rate": 0.0001, "loss": 0.0102, "step": 157000 }, { "epoch": 1032.9605263157894, "grad_norm": 1.522045373916626, "learning_rate": 0.0001, "loss": 0.0127, "step": 157010 }, { "epoch": 1033.0263157894738, "grad_norm": 1.346534013748169, "learning_rate": 0.0001, "loss": 0.0179, "step": 157020 }, { "epoch": 1033.092105263158, "grad_norm": 1.4805207252502441, "learning_rate": 0.0001, "loss": 0.0156, "step": 157030 }, { "epoch": 1033.157894736842, "grad_norm": 1.7296535968780518, "learning_rate": 0.0001, "loss": 0.0158, "step": 157040 }, { "epoch": 1033.2236842105262, "grad_norm": 1.7885081768035889, "learning_rate": 0.0001, "loss": 0.0106, "step": 157050 }, { "epoch": 1033.2894736842106, "grad_norm": 1.6877658367156982, "learning_rate": 0.0001, "loss": 0.0157, "step": 157060 }, { "epoch": 1033.3552631578948, "grad_norm": 1.5065356492996216, "learning_rate": 0.0001, "loss": 0.0135, "step": 157070 }, { "epoch": 1033.421052631579, "grad_norm": 1.5357332229614258, "learning_rate": 0.0001, "loss": 0.0091, "step": 157080 }, { "epoch": 1033.4868421052631, "grad_norm": 1.382460117340088, "learning_rate": 0.0001, "loss": 0.0145, "step": 157090 }, { "epoch": 1033.5526315789473, "grad_norm": 1.316792607307434, "learning_rate": 0.0001, "loss": 0.0118, "step": 157100 }, { "epoch": 1033.6184210526317, "grad_norm": 1.4901647567749023, "learning_rate": 0.0001, "loss": 0.0099, "step": 157110 }, { "epoch": 1033.6842105263158, "grad_norm": 1.0761529207229614, "learning_rate": 0.0001, "loss": 0.0099, "step": 157120 }, { "epoch": 1033.75, "grad_norm": 1.2063227891921997, "learning_rate": 0.0001, "loss": 0.0113, "step": 157130 }, { "epoch": 1033.8157894736842, "grad_norm": 1.5815986394882202, "learning_rate": 0.0001, "loss": 0.0094, "step": 157140 }, { "epoch": 1033.8815789473683, "grad_norm": 1.4186062812805176, "learning_rate": 0.0001, "loss": 0.0121, "step": 157150 }, { "epoch": 1033.9473684210527, "grad_norm": 1.4563757181167603, "learning_rate": 0.0001, "loss": 0.0112, "step": 157160 }, { "epoch": 1034.0131578947369, "grad_norm": 1.4438974857330322, "learning_rate": 0.0001, "loss": 0.0116, "step": 157170 }, { "epoch": 1034.078947368421, "grad_norm": 1.0900474786758423, "learning_rate": 0.0001, "loss": 0.0122, "step": 157180 }, { "epoch": 1034.1447368421052, "grad_norm": 0.9760515093803406, "learning_rate": 0.0001, "loss": 0.0106, "step": 157190 }, { "epoch": 1034.2105263157894, "grad_norm": 1.5168094635009766, "learning_rate": 0.0001, "loss": 0.0171, "step": 157200 }, { "epoch": 1034.2763157894738, "grad_norm": 1.1529297828674316, "learning_rate": 0.0001, "loss": 0.0136, "step": 157210 }, { "epoch": 1034.342105263158, "grad_norm": 1.6949020624160767, "learning_rate": 0.0001, "loss": 0.0112, "step": 157220 }, { "epoch": 1034.407894736842, "grad_norm": 1.1364623308181763, "learning_rate": 0.0001, "loss": 0.0129, "step": 157230 }, { "epoch": 1034.4736842105262, "grad_norm": 1.6084938049316406, "learning_rate": 0.0001, "loss": 0.0111, "step": 157240 }, { "epoch": 1034.5394736842106, "grad_norm": 1.3256322145462036, "learning_rate": 0.0001, "loss": 0.0137, "step": 157250 }, { "epoch": 1034.6052631578948, "grad_norm": 1.3800621032714844, "learning_rate": 0.0001, "loss": 0.014, "step": 157260 }, { "epoch": 1034.671052631579, "grad_norm": 1.3513680696487427, "learning_rate": 0.0001, "loss": 0.012, "step": 157270 }, { "epoch": 1034.7368421052631, "grad_norm": 1.7929927110671997, "learning_rate": 0.0001, "loss": 0.0115, "step": 157280 }, { "epoch": 1034.8026315789473, "grad_norm": 1.276706576347351, "learning_rate": 0.0001, "loss": 0.012, "step": 157290 }, { "epoch": 1034.8684210526317, "grad_norm": 1.4190692901611328, "learning_rate": 0.0001, "loss": 0.0123, "step": 157300 }, { "epoch": 1034.9342105263158, "grad_norm": 1.2199997901916504, "learning_rate": 0.0001, "loss": 0.0093, "step": 157310 }, { "epoch": 1035.0, "grad_norm": 1.5361427068710327, "learning_rate": 0.0001, "loss": 0.0133, "step": 157320 }, { "epoch": 1035.0657894736842, "grad_norm": 0.9096554517745972, "learning_rate": 0.0001, "loss": 0.0121, "step": 157330 }, { "epoch": 1035.1315789473683, "grad_norm": 1.5520751476287842, "learning_rate": 0.0001, "loss": 0.0107, "step": 157340 }, { "epoch": 1035.1973684210527, "grad_norm": 1.4161170721054077, "learning_rate": 0.0001, "loss": 0.0114, "step": 157350 }, { "epoch": 1035.2631578947369, "grad_norm": 1.1327383518218994, "learning_rate": 0.0001, "loss": 0.014, "step": 157360 }, { "epoch": 1035.328947368421, "grad_norm": 1.3394123315811157, "learning_rate": 0.0001, "loss": 0.0106, "step": 157370 }, { "epoch": 1035.3947368421052, "grad_norm": 1.401236891746521, "learning_rate": 0.0001, "loss": 0.0141, "step": 157380 }, { "epoch": 1035.4605263157894, "grad_norm": 1.3542617559432983, "learning_rate": 0.0001, "loss": 0.0182, "step": 157390 }, { "epoch": 1035.5263157894738, "grad_norm": 1.7596817016601562, "learning_rate": 0.0001, "loss": 0.0135, "step": 157400 }, { "epoch": 1035.592105263158, "grad_norm": 1.6763525009155273, "learning_rate": 0.0001, "loss": 0.0124, "step": 157410 }, { "epoch": 1035.657894736842, "grad_norm": 1.65891695022583, "learning_rate": 0.0001, "loss": 0.0114, "step": 157420 }, { "epoch": 1035.7236842105262, "grad_norm": 1.6971220970153809, "learning_rate": 0.0001, "loss": 0.0119, "step": 157430 }, { "epoch": 1035.7894736842106, "grad_norm": 1.1559258699417114, "learning_rate": 0.0001, "loss": 0.013, "step": 157440 }, { "epoch": 1035.8552631578948, "grad_norm": 1.4354703426361084, "learning_rate": 0.0001, "loss": 0.012, "step": 157450 }, { "epoch": 1035.921052631579, "grad_norm": 1.4139165878295898, "learning_rate": 0.0001, "loss": 0.0132, "step": 157460 }, { "epoch": 1035.9868421052631, "grad_norm": 1.5953519344329834, "learning_rate": 0.0001, "loss": 0.0095, "step": 157470 }, { "epoch": 1036.0526315789473, "grad_norm": 1.839040994644165, "learning_rate": 0.0001, "loss": 0.0111, "step": 157480 }, { "epoch": 1036.1184210526317, "grad_norm": 1.1936633586883545, "learning_rate": 0.0001, "loss": 0.0115, "step": 157490 }, { "epoch": 1036.1842105263158, "grad_norm": 1.2816351652145386, "learning_rate": 0.0001, "loss": 0.0136, "step": 157500 }, { "epoch": 1036.25, "grad_norm": 1.359246850013733, "learning_rate": 0.0001, "loss": 0.0119, "step": 157510 }, { "epoch": 1036.3157894736842, "grad_norm": 0.893639087677002, "learning_rate": 0.0001, "loss": 0.019, "step": 157520 }, { "epoch": 1036.3815789473683, "grad_norm": 1.4343905448913574, "learning_rate": 0.0001, "loss": 0.0087, "step": 157530 }, { "epoch": 1036.4473684210527, "grad_norm": 1.2456212043762207, "learning_rate": 0.0001, "loss": 0.0169, "step": 157540 }, { "epoch": 1036.5131578947369, "grad_norm": 1.5194368362426758, "learning_rate": 0.0001, "loss": 0.0111, "step": 157550 }, { "epoch": 1036.578947368421, "grad_norm": 1.4550708532333374, "learning_rate": 0.0001, "loss": 0.0124, "step": 157560 }, { "epoch": 1036.6447368421052, "grad_norm": 1.4140545129776, "learning_rate": 0.0001, "loss": 0.0114, "step": 157570 }, { "epoch": 1036.7105263157894, "grad_norm": 1.2190308570861816, "learning_rate": 0.0001, "loss": 0.0085, "step": 157580 }, { "epoch": 1036.7763157894738, "grad_norm": 1.1116377115249634, "learning_rate": 0.0001, "loss": 0.0088, "step": 157590 }, { "epoch": 1036.842105263158, "grad_norm": 1.616862416267395, "learning_rate": 0.0001, "loss": 0.0148, "step": 157600 }, { "epoch": 1036.907894736842, "grad_norm": 1.5413142442703247, "learning_rate": 0.0001, "loss": 0.0132, "step": 157610 }, { "epoch": 1036.9736842105262, "grad_norm": 1.8604730367660522, "learning_rate": 0.0001, "loss": 0.0129, "step": 157620 }, { "epoch": 1037.0394736842106, "grad_norm": 1.2884116172790527, "learning_rate": 0.0001, "loss": 0.0123, "step": 157630 }, { "epoch": 1037.1052631578948, "grad_norm": 1.117967128753662, "learning_rate": 0.0001, "loss": 0.0133, "step": 157640 }, { "epoch": 1037.171052631579, "grad_norm": 1.2200502157211304, "learning_rate": 0.0001, "loss": 0.0117, "step": 157650 }, { "epoch": 1037.2368421052631, "grad_norm": 1.4886925220489502, "learning_rate": 0.0001, "loss": 0.0138, "step": 157660 }, { "epoch": 1037.3026315789473, "grad_norm": 1.8598284721374512, "learning_rate": 0.0001, "loss": 0.0121, "step": 157670 }, { "epoch": 1037.3684210526317, "grad_norm": 1.6539548635482788, "learning_rate": 0.0001, "loss": 0.0153, "step": 157680 }, { "epoch": 1037.4342105263158, "grad_norm": 1.2234892845153809, "learning_rate": 0.0001, "loss": 0.0093, "step": 157690 }, { "epoch": 1037.5, "grad_norm": 1.3269321918487549, "learning_rate": 0.0001, "loss": 0.0087, "step": 157700 }, { "epoch": 1037.5657894736842, "grad_norm": 1.8102898597717285, "learning_rate": 0.0001, "loss": 0.014, "step": 157710 }, { "epoch": 1037.6315789473683, "grad_norm": 1.9416228532791138, "learning_rate": 0.0001, "loss": 0.0111, "step": 157720 }, { "epoch": 1037.6973684210527, "grad_norm": 1.9852598905563354, "learning_rate": 0.0001, "loss": 0.0119, "step": 157730 }, { "epoch": 1037.7631578947369, "grad_norm": 1.8172234296798706, "learning_rate": 0.0001, "loss": 0.013, "step": 157740 }, { "epoch": 1037.828947368421, "grad_norm": 1.6453596353530884, "learning_rate": 0.0001, "loss": 0.013, "step": 157750 }, { "epoch": 1037.8947368421052, "grad_norm": 1.4241582155227661, "learning_rate": 0.0001, "loss": 0.0109, "step": 157760 }, { "epoch": 1037.9605263157894, "grad_norm": 1.6064234972000122, "learning_rate": 0.0001, "loss": 0.0105, "step": 157770 }, { "epoch": 1038.0263157894738, "grad_norm": 1.1900696754455566, "learning_rate": 0.0001, "loss": 0.0158, "step": 157780 }, { "epoch": 1038.092105263158, "grad_norm": 1.6124560832977295, "learning_rate": 0.0001, "loss": 0.0088, "step": 157790 }, { "epoch": 1038.157894736842, "grad_norm": 1.3375846147537231, "learning_rate": 0.0001, "loss": 0.0131, "step": 157800 }, { "epoch": 1038.2236842105262, "grad_norm": 1.4350812435150146, "learning_rate": 0.0001, "loss": 0.0104, "step": 157810 }, { "epoch": 1038.2894736842106, "grad_norm": 1.3966648578643799, "learning_rate": 0.0001, "loss": 0.0105, "step": 157820 }, { "epoch": 1038.3552631578948, "grad_norm": 1.716967225074768, "learning_rate": 0.0001, "loss": 0.0113, "step": 157830 }, { "epoch": 1038.421052631579, "grad_norm": 1.518107295036316, "learning_rate": 0.0001, "loss": 0.0135, "step": 157840 }, { "epoch": 1038.4868421052631, "grad_norm": 1.3582267761230469, "learning_rate": 0.0001, "loss": 0.0124, "step": 157850 }, { "epoch": 1038.5526315789473, "grad_norm": 1.945196509361267, "learning_rate": 0.0001, "loss": 0.0133, "step": 157860 }, { "epoch": 1038.6184210526317, "grad_norm": 1.6505303382873535, "learning_rate": 0.0001, "loss": 0.0126, "step": 157870 }, { "epoch": 1038.6842105263158, "grad_norm": 1.3663477897644043, "learning_rate": 0.0001, "loss": 0.0105, "step": 157880 }, { "epoch": 1038.75, "grad_norm": 1.691145420074463, "learning_rate": 0.0001, "loss": 0.0112, "step": 157890 }, { "epoch": 1038.8157894736842, "grad_norm": 1.961331844329834, "learning_rate": 0.0001, "loss": 0.014, "step": 157900 }, { "epoch": 1038.8815789473683, "grad_norm": 1.3105170726776123, "learning_rate": 0.0001, "loss": 0.0146, "step": 157910 }, { "epoch": 1038.9473684210527, "grad_norm": 1.4074170589447021, "learning_rate": 0.0001, "loss": 0.0121, "step": 157920 }, { "epoch": 1039.0131578947369, "grad_norm": 1.6809163093566895, "learning_rate": 0.0001, "loss": 0.0148, "step": 157930 }, { "epoch": 1039.078947368421, "grad_norm": 1.8154910802841187, "learning_rate": 0.0001, "loss": 0.0116, "step": 157940 }, { "epoch": 1039.1447368421052, "grad_norm": 1.5296578407287598, "learning_rate": 0.0001, "loss": 0.0122, "step": 157950 }, { "epoch": 1039.2105263157894, "grad_norm": 1.404313325881958, "learning_rate": 0.0001, "loss": 0.0138, "step": 157960 }, { "epoch": 1039.2763157894738, "grad_norm": 1.3957929611206055, "learning_rate": 0.0001, "loss": 0.0119, "step": 157970 }, { "epoch": 1039.342105263158, "grad_norm": 1.5887868404388428, "learning_rate": 0.0001, "loss": 0.0139, "step": 157980 }, { "epoch": 1039.407894736842, "grad_norm": 1.5880521535873413, "learning_rate": 0.0001, "loss": 0.0128, "step": 157990 }, { "epoch": 1039.4736842105262, "grad_norm": 1.36805260181427, "learning_rate": 0.0001, "loss": 0.0119, "step": 158000 }, { "epoch": 1039.5394736842106, "grad_norm": 1.5408259630203247, "learning_rate": 0.0001, "loss": 0.0115, "step": 158010 }, { "epoch": 1039.6052631578948, "grad_norm": 1.5390907526016235, "learning_rate": 0.0001, "loss": 0.0117, "step": 158020 }, { "epoch": 1039.671052631579, "grad_norm": 1.9756340980529785, "learning_rate": 0.0001, "loss": 0.0105, "step": 158030 }, { "epoch": 1039.7368421052631, "grad_norm": 1.2324614524841309, "learning_rate": 0.0001, "loss": 0.0127, "step": 158040 }, { "epoch": 1039.8026315789473, "grad_norm": 1.9043035507202148, "learning_rate": 0.0001, "loss": 0.013, "step": 158050 }, { "epoch": 1039.8684210526317, "grad_norm": 1.3596006631851196, "learning_rate": 0.0001, "loss": 0.0104, "step": 158060 }, { "epoch": 1039.9342105263158, "grad_norm": 1.2376317977905273, "learning_rate": 0.0001, "loss": 0.0137, "step": 158070 }, { "epoch": 1040.0, "grad_norm": 1.3085041046142578, "learning_rate": 0.0001, "loss": 0.0112, "step": 158080 }, { "epoch": 1040.0657894736842, "grad_norm": 1.7103922367095947, "learning_rate": 0.0001, "loss": 0.0112, "step": 158090 }, { "epoch": 1040.1315789473683, "grad_norm": 1.6226006746292114, "learning_rate": 0.0001, "loss": 0.0091, "step": 158100 }, { "epoch": 1040.1973684210527, "grad_norm": 1.1583794355392456, "learning_rate": 0.0001, "loss": 0.011, "step": 158110 }, { "epoch": 1040.2631578947369, "grad_norm": 1.2284246683120728, "learning_rate": 0.0001, "loss": 0.0139, "step": 158120 }, { "epoch": 1040.328947368421, "grad_norm": 1.5068079233169556, "learning_rate": 0.0001, "loss": 0.0124, "step": 158130 }, { "epoch": 1040.3947368421052, "grad_norm": 1.8247507810592651, "learning_rate": 0.0001, "loss": 0.0127, "step": 158140 }, { "epoch": 1040.4605263157894, "grad_norm": 1.756651520729065, "learning_rate": 0.0001, "loss": 0.0187, "step": 158150 }, { "epoch": 1040.5263157894738, "grad_norm": 1.3239336013793945, "learning_rate": 0.0001, "loss": 0.0137, "step": 158160 }, { "epoch": 1040.592105263158, "grad_norm": 1.3193601369857788, "learning_rate": 0.0001, "loss": 0.013, "step": 158170 }, { "epoch": 1040.657894736842, "grad_norm": 1.4295481443405151, "learning_rate": 0.0001, "loss": 0.0132, "step": 158180 }, { "epoch": 1040.7236842105262, "grad_norm": 0.9728693962097168, "learning_rate": 0.0001, "loss": 0.0121, "step": 158190 }, { "epoch": 1040.7894736842106, "grad_norm": 1.4464870691299438, "learning_rate": 0.0001, "loss": 0.014, "step": 158200 }, { "epoch": 1040.8552631578948, "grad_norm": 1.5151629447937012, "learning_rate": 0.0001, "loss": 0.0094, "step": 158210 }, { "epoch": 1040.921052631579, "grad_norm": 1.1581887006759644, "learning_rate": 0.0001, "loss": 0.0111, "step": 158220 }, { "epoch": 1040.9868421052631, "grad_norm": 1.4577194452285767, "learning_rate": 0.0001, "loss": 0.0112, "step": 158230 }, { "epoch": 1041.0526315789473, "grad_norm": 1.371056318283081, "learning_rate": 0.0001, "loss": 0.0113, "step": 158240 }, { "epoch": 1041.1184210526317, "grad_norm": 1.7210532426834106, "learning_rate": 0.0001, "loss": 0.0161, "step": 158250 }, { "epoch": 1041.1842105263158, "grad_norm": 1.6913596391677856, "learning_rate": 0.0001, "loss": 0.0095, "step": 158260 }, { "epoch": 1041.25, "grad_norm": 2.069917678833008, "learning_rate": 0.0001, "loss": 0.0114, "step": 158270 }, { "epoch": 1041.3157894736842, "grad_norm": 1.570471167564392, "learning_rate": 0.0001, "loss": 0.0117, "step": 158280 }, { "epoch": 1041.3815789473683, "grad_norm": 1.9331351518630981, "learning_rate": 0.0001, "loss": 0.014, "step": 158290 }, { "epoch": 1041.4473684210527, "grad_norm": 1.5450923442840576, "learning_rate": 0.0001, "loss": 0.0105, "step": 158300 }, { "epoch": 1041.5131578947369, "grad_norm": 1.5898889303207397, "learning_rate": 0.0001, "loss": 0.0132, "step": 158310 }, { "epoch": 1041.578947368421, "grad_norm": 1.512526273727417, "learning_rate": 0.0001, "loss": 0.0165, "step": 158320 }, { "epoch": 1041.6447368421052, "grad_norm": 1.7502212524414062, "learning_rate": 0.0001, "loss": 0.0092, "step": 158330 }, { "epoch": 1041.7105263157894, "grad_norm": 1.631378173828125, "learning_rate": 0.0001, "loss": 0.0131, "step": 158340 }, { "epoch": 1041.7763157894738, "grad_norm": 1.8054161071777344, "learning_rate": 0.0001, "loss": 0.0122, "step": 158350 }, { "epoch": 1041.842105263158, "grad_norm": 1.6814848184585571, "learning_rate": 0.0001, "loss": 0.013, "step": 158360 }, { "epoch": 1041.907894736842, "grad_norm": 1.4372057914733887, "learning_rate": 0.0001, "loss": 0.0136, "step": 158370 }, { "epoch": 1041.9736842105262, "grad_norm": 1.885439157485962, "learning_rate": 0.0001, "loss": 0.0111, "step": 158380 }, { "epoch": 1042.0394736842106, "grad_norm": 1.2094141244888306, "learning_rate": 0.0001, "loss": 0.0121, "step": 158390 }, { "epoch": 1042.1052631578948, "grad_norm": 2.079016923904419, "learning_rate": 0.0001, "loss": 0.0113, "step": 158400 }, { "epoch": 1042.171052631579, "grad_norm": 1.63840651512146, "learning_rate": 0.0001, "loss": 0.0114, "step": 158410 }, { "epoch": 1042.2368421052631, "grad_norm": 1.867723822593689, "learning_rate": 0.0001, "loss": 0.014, "step": 158420 }, { "epoch": 1042.3026315789473, "grad_norm": 2.023374319076538, "learning_rate": 0.0001, "loss": 0.0111, "step": 158430 }, { "epoch": 1042.3684210526317, "grad_norm": 1.435120701789856, "learning_rate": 0.0001, "loss": 0.0177, "step": 158440 }, { "epoch": 1042.4342105263158, "grad_norm": 1.7571313381195068, "learning_rate": 0.0001, "loss": 0.009, "step": 158450 }, { "epoch": 1042.5, "grad_norm": 1.7285493612289429, "learning_rate": 0.0001, "loss": 0.0101, "step": 158460 }, { "epoch": 1042.5657894736842, "grad_norm": 1.319225549697876, "learning_rate": 0.0001, "loss": 0.0102, "step": 158470 }, { "epoch": 1042.6315789473683, "grad_norm": 1.6855412721633911, "learning_rate": 0.0001, "loss": 0.0109, "step": 158480 }, { "epoch": 1042.6973684210527, "grad_norm": 1.4051624536514282, "learning_rate": 0.0001, "loss": 0.0135, "step": 158490 }, { "epoch": 1042.7631578947369, "grad_norm": 1.5305041074752808, "learning_rate": 0.0001, "loss": 0.0115, "step": 158500 }, { "epoch": 1042.828947368421, "grad_norm": 1.7131940126419067, "learning_rate": 0.0001, "loss": 0.0112, "step": 158510 }, { "epoch": 1042.8947368421052, "grad_norm": 1.3515018224716187, "learning_rate": 0.0001, "loss": 0.0131, "step": 158520 }, { "epoch": 1042.9605263157894, "grad_norm": 1.7281314134597778, "learning_rate": 0.0001, "loss": 0.0157, "step": 158530 }, { "epoch": 1043.0263157894738, "grad_norm": 1.4285470247268677, "learning_rate": 0.0001, "loss": 0.0136, "step": 158540 }, { "epoch": 1043.092105263158, "grad_norm": 1.4512666463851929, "learning_rate": 0.0001, "loss": 0.0101, "step": 158550 }, { "epoch": 1043.157894736842, "grad_norm": 1.230194091796875, "learning_rate": 0.0001, "loss": 0.0135, "step": 158560 }, { "epoch": 1043.2236842105262, "grad_norm": 1.429254174232483, "learning_rate": 0.0001, "loss": 0.0092, "step": 158570 }, { "epoch": 1043.2894736842106, "grad_norm": 1.1491645574569702, "learning_rate": 0.0001, "loss": 0.0098, "step": 158580 }, { "epoch": 1043.3552631578948, "grad_norm": 1.4873498678207397, "learning_rate": 0.0001, "loss": 0.0112, "step": 158590 }, { "epoch": 1043.421052631579, "grad_norm": 1.524107813835144, "learning_rate": 0.0001, "loss": 0.0137, "step": 158600 }, { "epoch": 1043.4868421052631, "grad_norm": 1.8413498401641846, "learning_rate": 0.0001, "loss": 0.0126, "step": 158610 }, { "epoch": 1043.5526315789473, "grad_norm": 1.0793919563293457, "learning_rate": 0.0001, "loss": 0.0131, "step": 158620 }, { "epoch": 1043.6184210526317, "grad_norm": 1.445238709449768, "learning_rate": 0.0001, "loss": 0.0108, "step": 158630 }, { "epoch": 1043.6842105263158, "grad_norm": 1.1889067888259888, "learning_rate": 0.0001, "loss": 0.0138, "step": 158640 }, { "epoch": 1043.75, "grad_norm": 1.1043494939804077, "learning_rate": 0.0001, "loss": 0.0144, "step": 158650 }, { "epoch": 1043.8157894736842, "grad_norm": 1.3971781730651855, "learning_rate": 0.0001, "loss": 0.016, "step": 158660 }, { "epoch": 1043.8815789473683, "grad_norm": 1.401375651359558, "learning_rate": 0.0001, "loss": 0.0151, "step": 158670 }, { "epoch": 1043.9473684210527, "grad_norm": 1.3657453060150146, "learning_rate": 0.0001, "loss": 0.0124, "step": 158680 }, { "epoch": 1044.0131578947369, "grad_norm": 1.8035210371017456, "learning_rate": 0.0001, "loss": 0.011, "step": 158690 }, { "epoch": 1044.078947368421, "grad_norm": 1.6932759284973145, "learning_rate": 0.0001, "loss": 0.016, "step": 158700 }, { "epoch": 1044.1447368421052, "grad_norm": 1.5507817268371582, "learning_rate": 0.0001, "loss": 0.0108, "step": 158710 }, { "epoch": 1044.2105263157894, "grad_norm": 1.6556707620620728, "learning_rate": 0.0001, "loss": 0.0106, "step": 158720 }, { "epoch": 1044.2763157894738, "grad_norm": 1.5637801885604858, "learning_rate": 0.0001, "loss": 0.0134, "step": 158730 }, { "epoch": 1044.342105263158, "grad_norm": 1.4585589170455933, "learning_rate": 0.0001, "loss": 0.0145, "step": 158740 }, { "epoch": 1044.407894736842, "grad_norm": 1.5095961093902588, "learning_rate": 0.0001, "loss": 0.0121, "step": 158750 }, { "epoch": 1044.4736842105262, "grad_norm": 1.8371423482894897, "learning_rate": 0.0001, "loss": 0.0113, "step": 158760 }, { "epoch": 1044.5394736842106, "grad_norm": 1.5094377994537354, "learning_rate": 0.0001, "loss": 0.0125, "step": 158770 }, { "epoch": 1044.6052631578948, "grad_norm": 1.4789891242980957, "learning_rate": 0.0001, "loss": 0.0096, "step": 158780 }, { "epoch": 1044.671052631579, "grad_norm": 1.5392721891403198, "learning_rate": 0.0001, "loss": 0.0098, "step": 158790 }, { "epoch": 1044.7368421052631, "grad_norm": 1.521986961364746, "learning_rate": 0.0001, "loss": 0.0125, "step": 158800 }, { "epoch": 1044.8026315789473, "grad_norm": 1.5505789518356323, "learning_rate": 0.0001, "loss": 0.0134, "step": 158810 }, { "epoch": 1044.8684210526317, "grad_norm": 1.6674299240112305, "learning_rate": 0.0001, "loss": 0.0107, "step": 158820 }, { "epoch": 1044.9342105263158, "grad_norm": 1.7163983583450317, "learning_rate": 0.0001, "loss": 0.0158, "step": 158830 }, { "epoch": 1045.0, "grad_norm": 1.6686619520187378, "learning_rate": 0.0001, "loss": 0.0155, "step": 158840 }, { "epoch": 1045.0657894736842, "grad_norm": 1.743304967880249, "learning_rate": 0.0001, "loss": 0.0109, "step": 158850 }, { "epoch": 1045.1315789473683, "grad_norm": 1.2511515617370605, "learning_rate": 0.0001, "loss": 0.0111, "step": 158860 }, { "epoch": 1045.1973684210527, "grad_norm": 1.5086755752563477, "learning_rate": 0.0001, "loss": 0.0118, "step": 158870 }, { "epoch": 1045.2631578947369, "grad_norm": 1.4490851163864136, "learning_rate": 0.0001, "loss": 0.02, "step": 158880 }, { "epoch": 1045.328947368421, "grad_norm": 1.720654845237732, "learning_rate": 0.0001, "loss": 0.0109, "step": 158890 }, { "epoch": 1045.3947368421052, "grad_norm": 1.7828748226165771, "learning_rate": 0.0001, "loss": 0.0143, "step": 158900 }, { "epoch": 1045.4605263157894, "grad_norm": 1.4694024324417114, "learning_rate": 0.0001, "loss": 0.0135, "step": 158910 }, { "epoch": 1045.5263157894738, "grad_norm": 1.4871903657913208, "learning_rate": 0.0001, "loss": 0.0142, "step": 158920 }, { "epoch": 1045.592105263158, "grad_norm": 1.3572988510131836, "learning_rate": 0.0001, "loss": 0.0091, "step": 158930 }, { "epoch": 1045.657894736842, "grad_norm": 1.2970061302185059, "learning_rate": 0.0001, "loss": 0.0116, "step": 158940 }, { "epoch": 1045.7236842105262, "grad_norm": 1.4383758306503296, "learning_rate": 0.0001, "loss": 0.0141, "step": 158950 }, { "epoch": 1045.7894736842106, "grad_norm": 1.6673113107681274, "learning_rate": 0.0001, "loss": 0.012, "step": 158960 }, { "epoch": 1045.8552631578948, "grad_norm": 1.232186198234558, "learning_rate": 0.0001, "loss": 0.0122, "step": 158970 }, { "epoch": 1045.921052631579, "grad_norm": 1.1804075241088867, "learning_rate": 0.0001, "loss": 0.011, "step": 158980 }, { "epoch": 1045.9868421052631, "grad_norm": 1.2231640815734863, "learning_rate": 0.0001, "loss": 0.0111, "step": 158990 }, { "epoch": 1046.0526315789473, "grad_norm": 1.599390983581543, "learning_rate": 0.0001, "loss": 0.0143, "step": 159000 }, { "epoch": 1046.1184210526317, "grad_norm": 1.7361749410629272, "learning_rate": 0.0001, "loss": 0.0146, "step": 159010 }, { "epoch": 1046.1842105263158, "grad_norm": 1.216265320777893, "learning_rate": 0.0001, "loss": 0.0155, "step": 159020 }, { "epoch": 1046.25, "grad_norm": 1.3671875, "learning_rate": 0.0001, "loss": 0.0108, "step": 159030 }, { "epoch": 1046.3157894736842, "grad_norm": 1.1507537364959717, "learning_rate": 0.0001, "loss": 0.0111, "step": 159040 }, { "epoch": 1046.3815789473683, "grad_norm": 1.260697603225708, "learning_rate": 0.0001, "loss": 0.0173, "step": 159050 }, { "epoch": 1046.4473684210527, "grad_norm": 1.0328161716461182, "learning_rate": 0.0001, "loss": 0.01, "step": 159060 }, { "epoch": 1046.5131578947369, "grad_norm": 1.2085192203521729, "learning_rate": 0.0001, "loss": 0.0099, "step": 159070 }, { "epoch": 1046.578947368421, "grad_norm": 1.0078643560409546, "learning_rate": 0.0001, "loss": 0.0144, "step": 159080 }, { "epoch": 1046.6447368421052, "grad_norm": 1.1550662517547607, "learning_rate": 0.0001, "loss": 0.0142, "step": 159090 }, { "epoch": 1046.7105263157894, "grad_norm": 1.086584210395813, "learning_rate": 0.0001, "loss": 0.0153, "step": 159100 }, { "epoch": 1046.7763157894738, "grad_norm": 1.5127767324447632, "learning_rate": 0.0001, "loss": 0.0153, "step": 159110 }, { "epoch": 1046.842105263158, "grad_norm": 1.3369392156600952, "learning_rate": 0.0001, "loss": 0.0124, "step": 159120 }, { "epoch": 1046.907894736842, "grad_norm": 1.4154279232025146, "learning_rate": 0.0001, "loss": 0.0091, "step": 159130 }, { "epoch": 1046.9736842105262, "grad_norm": 1.3218357563018799, "learning_rate": 0.0001, "loss": 0.0105, "step": 159140 }, { "epoch": 1047.0394736842106, "grad_norm": 1.1471985578536987, "learning_rate": 0.0001, "loss": 0.0103, "step": 159150 }, { "epoch": 1047.1052631578948, "grad_norm": 1.6164253950119019, "learning_rate": 0.0001, "loss": 0.0135, "step": 159160 }, { "epoch": 1047.171052631579, "grad_norm": 1.4827115535736084, "learning_rate": 0.0001, "loss": 0.0138, "step": 159170 }, { "epoch": 1047.2368421052631, "grad_norm": 1.6059455871582031, "learning_rate": 0.0001, "loss": 0.0111, "step": 159180 }, { "epoch": 1047.3026315789473, "grad_norm": 0.9835669994354248, "learning_rate": 0.0001, "loss": 0.0173, "step": 159190 }, { "epoch": 1047.3684210526317, "grad_norm": 1.3717358112335205, "learning_rate": 0.0001, "loss": 0.0111, "step": 159200 }, { "epoch": 1047.4342105263158, "grad_norm": 1.2006266117095947, "learning_rate": 0.0001, "loss": 0.0117, "step": 159210 }, { "epoch": 1047.5, "grad_norm": 1.6425414085388184, "learning_rate": 0.0001, "loss": 0.0141, "step": 159220 }, { "epoch": 1047.5657894736842, "grad_norm": 1.2627134323120117, "learning_rate": 0.0001, "loss": 0.0105, "step": 159230 }, { "epoch": 1047.6315789473683, "grad_norm": 1.1589570045471191, "learning_rate": 0.0001, "loss": 0.0121, "step": 159240 }, { "epoch": 1047.6973684210527, "grad_norm": 1.157839298248291, "learning_rate": 0.0001, "loss": 0.0119, "step": 159250 }, { "epoch": 1047.7631578947369, "grad_norm": 1.4980897903442383, "learning_rate": 0.0001, "loss": 0.0117, "step": 159260 }, { "epoch": 1047.828947368421, "grad_norm": 1.6261473894119263, "learning_rate": 0.0001, "loss": 0.0143, "step": 159270 }, { "epoch": 1047.8947368421052, "grad_norm": 1.2751567363739014, "learning_rate": 0.0001, "loss": 0.0148, "step": 159280 }, { "epoch": 1047.9605263157894, "grad_norm": 1.2339987754821777, "learning_rate": 0.0001, "loss": 0.0123, "step": 159290 }, { "epoch": 1048.0263157894738, "grad_norm": 1.5421853065490723, "learning_rate": 0.0001, "loss": 0.0132, "step": 159300 }, { "epoch": 1048.092105263158, "grad_norm": 1.1424325704574585, "learning_rate": 0.0001, "loss": 0.0138, "step": 159310 }, { "epoch": 1048.157894736842, "grad_norm": 1.2080458402633667, "learning_rate": 0.0001, "loss": 0.0126, "step": 159320 }, { "epoch": 1048.2236842105262, "grad_norm": 1.0781906843185425, "learning_rate": 0.0001, "loss": 0.0152, "step": 159330 }, { "epoch": 1048.2894736842106, "grad_norm": 1.519226312637329, "learning_rate": 0.0001, "loss": 0.0108, "step": 159340 }, { "epoch": 1048.3552631578948, "grad_norm": 1.722835898399353, "learning_rate": 0.0001, "loss": 0.013, "step": 159350 }, { "epoch": 1048.421052631579, "grad_norm": 1.5287854671478271, "learning_rate": 0.0001, "loss": 0.009, "step": 159360 }, { "epoch": 1048.4868421052631, "grad_norm": 1.5539305210113525, "learning_rate": 0.0001, "loss": 0.0121, "step": 159370 }, { "epoch": 1048.5526315789473, "grad_norm": 1.130693793296814, "learning_rate": 0.0001, "loss": 0.0109, "step": 159380 }, { "epoch": 1048.6184210526317, "grad_norm": 1.2412267923355103, "learning_rate": 0.0001, "loss": 0.0148, "step": 159390 }, { "epoch": 1048.6842105263158, "grad_norm": 1.3745673894882202, "learning_rate": 0.0001, "loss": 0.0118, "step": 159400 }, { "epoch": 1048.75, "grad_norm": 1.7784640789031982, "learning_rate": 0.0001, "loss": 0.0128, "step": 159410 }, { "epoch": 1048.8157894736842, "grad_norm": 1.4466731548309326, "learning_rate": 0.0001, "loss": 0.0119, "step": 159420 }, { "epoch": 1048.8815789473683, "grad_norm": 1.174634337425232, "learning_rate": 0.0001, "loss": 0.0121, "step": 159430 }, { "epoch": 1048.9473684210527, "grad_norm": 1.6108568906784058, "learning_rate": 0.0001, "loss": 0.0138, "step": 159440 }, { "epoch": 1049.0131578947369, "grad_norm": 1.2908968925476074, "learning_rate": 0.0001, "loss": 0.0137, "step": 159450 }, { "epoch": 1049.078947368421, "grad_norm": 1.4883376359939575, "learning_rate": 0.0001, "loss": 0.0154, "step": 159460 }, { "epoch": 1049.1447368421052, "grad_norm": 1.1438546180725098, "learning_rate": 0.0001, "loss": 0.0137, "step": 159470 }, { "epoch": 1049.2105263157894, "grad_norm": 1.324489712715149, "learning_rate": 0.0001, "loss": 0.0091, "step": 159480 }, { "epoch": 1049.2763157894738, "grad_norm": 1.2800798416137695, "learning_rate": 0.0001, "loss": 0.0097, "step": 159490 }, { "epoch": 1049.342105263158, "grad_norm": 1.3248025178909302, "learning_rate": 0.0001, "loss": 0.016, "step": 159500 }, { "epoch": 1049.407894736842, "grad_norm": 1.1701107025146484, "learning_rate": 0.0001, "loss": 0.0151, "step": 159510 }, { "epoch": 1049.4736842105262, "grad_norm": 0.9646726250648499, "learning_rate": 0.0001, "loss": 0.0124, "step": 159520 }, { "epoch": 1049.5394736842106, "grad_norm": 1.783889651298523, "learning_rate": 0.0001, "loss": 0.0129, "step": 159530 }, { "epoch": 1049.6052631578948, "grad_norm": 1.4467648267745972, "learning_rate": 0.0001, "loss": 0.0095, "step": 159540 }, { "epoch": 1049.671052631579, "grad_norm": 1.5368505716323853, "learning_rate": 0.0001, "loss": 0.0107, "step": 159550 }, { "epoch": 1049.7368421052631, "grad_norm": 1.3587666749954224, "learning_rate": 0.0001, "loss": 0.013, "step": 159560 }, { "epoch": 1049.8026315789473, "grad_norm": 1.5176007747650146, "learning_rate": 0.0001, "loss": 0.0119, "step": 159570 }, { "epoch": 1049.8684210526317, "grad_norm": 1.7935137748718262, "learning_rate": 0.0001, "loss": 0.0116, "step": 159580 }, { "epoch": 1049.9342105263158, "grad_norm": 1.3631306886672974, "learning_rate": 0.0001, "loss": 0.0142, "step": 159590 }, { "epoch": 1050.0, "grad_norm": 2.1023502349853516, "learning_rate": 0.0001, "loss": 0.0106, "step": 159600 }, { "epoch": 1050.0657894736842, "grad_norm": 1.830657720565796, "learning_rate": 0.0001, "loss": 0.0112, "step": 159610 }, { "epoch": 1050.1315789473683, "grad_norm": 1.9746367931365967, "learning_rate": 0.0001, "loss": 0.0122, "step": 159620 }, { "epoch": 1050.1973684210527, "grad_norm": 1.712441325187683, "learning_rate": 0.0001, "loss": 0.011, "step": 159630 }, { "epoch": 1050.2631578947369, "grad_norm": 1.660301923751831, "learning_rate": 0.0001, "loss": 0.0104, "step": 159640 }, { "epoch": 1050.328947368421, "grad_norm": 1.9503580331802368, "learning_rate": 0.0001, "loss": 0.0095, "step": 159650 }, { "epoch": 1050.3947368421052, "grad_norm": 1.1767072677612305, "learning_rate": 0.0001, "loss": 0.0133, "step": 159660 }, { "epoch": 1050.4605263157894, "grad_norm": 1.5259640216827393, "learning_rate": 0.0001, "loss": 0.0141, "step": 159670 }, { "epoch": 1050.5263157894738, "grad_norm": 1.5043864250183105, "learning_rate": 0.0001, "loss": 0.0148, "step": 159680 }, { "epoch": 1050.592105263158, "grad_norm": 1.5704478025436401, "learning_rate": 0.0001, "loss": 0.0101, "step": 159690 }, { "epoch": 1050.657894736842, "grad_norm": 1.335822343826294, "learning_rate": 0.0001, "loss": 0.0116, "step": 159700 }, { "epoch": 1050.7236842105262, "grad_norm": 1.1853405237197876, "learning_rate": 0.0001, "loss": 0.0115, "step": 159710 }, { "epoch": 1050.7894736842106, "grad_norm": 1.4888699054718018, "learning_rate": 0.0001, "loss": 0.0149, "step": 159720 }, { "epoch": 1050.8552631578948, "grad_norm": 1.440589427947998, "learning_rate": 0.0001, "loss": 0.0123, "step": 159730 }, { "epoch": 1050.921052631579, "grad_norm": 1.469982385635376, "learning_rate": 0.0001, "loss": 0.0155, "step": 159740 }, { "epoch": 1050.9868421052631, "grad_norm": 1.1813398599624634, "learning_rate": 0.0001, "loss": 0.0123, "step": 159750 }, { "epoch": 1051.0526315789473, "grad_norm": 1.1136564016342163, "learning_rate": 0.0001, "loss": 0.0094, "step": 159760 }, { "epoch": 1051.1184210526317, "grad_norm": 1.5759347677230835, "learning_rate": 0.0001, "loss": 0.0141, "step": 159770 }, { "epoch": 1051.1842105263158, "grad_norm": 1.3830227851867676, "learning_rate": 0.0001, "loss": 0.0085, "step": 159780 }, { "epoch": 1051.25, "grad_norm": 1.3127741813659668, "learning_rate": 0.0001, "loss": 0.0105, "step": 159790 }, { "epoch": 1051.3157894736842, "grad_norm": 1.491729497909546, "learning_rate": 0.0001, "loss": 0.0137, "step": 159800 }, { "epoch": 1051.3815789473683, "grad_norm": 1.5107343196868896, "learning_rate": 0.0001, "loss": 0.0129, "step": 159810 }, { "epoch": 1051.4473684210527, "grad_norm": 1.8397160768508911, "learning_rate": 0.0001, "loss": 0.0181, "step": 159820 }, { "epoch": 1051.5131578947369, "grad_norm": 1.7483631372451782, "learning_rate": 0.0001, "loss": 0.0111, "step": 159830 }, { "epoch": 1051.578947368421, "grad_norm": 1.736409306526184, "learning_rate": 0.0001, "loss": 0.0098, "step": 159840 }, { "epoch": 1051.6447368421052, "grad_norm": 1.334708333015442, "learning_rate": 0.0001, "loss": 0.0107, "step": 159850 }, { "epoch": 1051.7105263157894, "grad_norm": 1.5546878576278687, "learning_rate": 0.0001, "loss": 0.0095, "step": 159860 }, { "epoch": 1051.7763157894738, "grad_norm": 1.3758968114852905, "learning_rate": 0.0001, "loss": 0.014, "step": 159870 }, { "epoch": 1051.842105263158, "grad_norm": 1.0804482698440552, "learning_rate": 0.0001, "loss": 0.0124, "step": 159880 }, { "epoch": 1051.907894736842, "grad_norm": 1.4755898714065552, "learning_rate": 0.0001, "loss": 0.0158, "step": 159890 }, { "epoch": 1051.9736842105262, "grad_norm": 1.7614325284957886, "learning_rate": 0.0001, "loss": 0.0139, "step": 159900 }, { "epoch": 1052.0394736842106, "grad_norm": 1.5634264945983887, "learning_rate": 0.0001, "loss": 0.0141, "step": 159910 }, { "epoch": 1052.1052631578948, "grad_norm": 1.8151336908340454, "learning_rate": 0.0001, "loss": 0.0116, "step": 159920 }, { "epoch": 1052.171052631579, "grad_norm": 1.329181432723999, "learning_rate": 0.0001, "loss": 0.0095, "step": 159930 }, { "epoch": 1052.2368421052631, "grad_norm": 1.4303241968154907, "learning_rate": 0.0001, "loss": 0.0128, "step": 159940 }, { "epoch": 1052.3026315789473, "grad_norm": 1.3706966638565063, "learning_rate": 0.0001, "loss": 0.0114, "step": 159950 }, { "epoch": 1052.3684210526317, "grad_norm": 1.4427188634872437, "learning_rate": 0.0001, "loss": 0.012, "step": 159960 }, { "epoch": 1052.4342105263158, "grad_norm": 1.3754792213439941, "learning_rate": 0.0001, "loss": 0.0121, "step": 159970 }, { "epoch": 1052.5, "grad_norm": 1.7764042615890503, "learning_rate": 0.0001, "loss": 0.0089, "step": 159980 }, { "epoch": 1052.5657894736842, "grad_norm": 1.3410223722457886, "learning_rate": 0.0001, "loss": 0.0109, "step": 159990 }, { "epoch": 1052.6315789473683, "grad_norm": 1.4474624395370483, "learning_rate": 0.0001, "loss": 0.0157, "step": 160000 }, { "epoch": 1052.6973684210527, "grad_norm": 1.575502872467041, "learning_rate": 0.0001, "loss": 0.0131, "step": 160010 }, { "epoch": 1052.7631578947369, "grad_norm": 1.5733150243759155, "learning_rate": 0.0001, "loss": 0.0107, "step": 160020 }, { "epoch": 1052.828947368421, "grad_norm": 1.6411106586456299, "learning_rate": 0.0001, "loss": 0.0109, "step": 160030 }, { "epoch": 1052.8947368421052, "grad_norm": 1.1675375699996948, "learning_rate": 0.0001, "loss": 0.0179, "step": 160040 }, { "epoch": 1052.9605263157894, "grad_norm": 1.3264087438583374, "learning_rate": 0.0001, "loss": 0.0111, "step": 160050 }, { "epoch": 1053.0263157894738, "grad_norm": 1.4881078004837036, "learning_rate": 0.0001, "loss": 0.0136, "step": 160060 }, { "epoch": 1053.092105263158, "grad_norm": 1.6291868686676025, "learning_rate": 0.0001, "loss": 0.0166, "step": 160070 }, { "epoch": 1053.157894736842, "grad_norm": 0.878490149974823, "learning_rate": 0.0001, "loss": 0.0106, "step": 160080 }, { "epoch": 1053.2236842105262, "grad_norm": 1.8097612857818604, "learning_rate": 0.0001, "loss": 0.013, "step": 160090 }, { "epoch": 1053.2894736842106, "grad_norm": 1.3416379690170288, "learning_rate": 0.0001, "loss": 0.0091, "step": 160100 }, { "epoch": 1053.3552631578948, "grad_norm": 1.4165873527526855, "learning_rate": 0.0001, "loss": 0.011, "step": 160110 }, { "epoch": 1053.421052631579, "grad_norm": 1.777976632118225, "learning_rate": 0.0001, "loss": 0.0223, "step": 160120 }, { "epoch": 1053.4868421052631, "grad_norm": 1.4001412391662598, "learning_rate": 0.0001, "loss": 0.0106, "step": 160130 }, { "epoch": 1053.5526315789473, "grad_norm": 1.7302961349487305, "learning_rate": 0.0001, "loss": 0.0088, "step": 160140 }, { "epoch": 1053.6184210526317, "grad_norm": 1.5897679328918457, "learning_rate": 0.0001, "loss": 0.012, "step": 160150 }, { "epoch": 1053.6842105263158, "grad_norm": 1.3565458059310913, "learning_rate": 0.0001, "loss": 0.0132, "step": 160160 }, { "epoch": 1053.75, "grad_norm": 1.622744083404541, "learning_rate": 0.0001, "loss": 0.0101, "step": 160170 }, { "epoch": 1053.8157894736842, "grad_norm": 1.46808660030365, "learning_rate": 0.0001, "loss": 0.0099, "step": 160180 }, { "epoch": 1053.8815789473683, "grad_norm": 1.6729135513305664, "learning_rate": 0.0001, "loss": 0.0127, "step": 160190 }, { "epoch": 1053.9473684210527, "grad_norm": 1.2136597633361816, "learning_rate": 0.0001, "loss": 0.0119, "step": 160200 }, { "epoch": 1054.0131578947369, "grad_norm": 1.2219034433364868, "learning_rate": 0.0001, "loss": 0.012, "step": 160210 }, { "epoch": 1054.078947368421, "grad_norm": 1.1649565696716309, "learning_rate": 0.0001, "loss": 0.0092, "step": 160220 }, { "epoch": 1054.1447368421052, "grad_norm": 1.5032843351364136, "learning_rate": 0.0001, "loss": 0.0102, "step": 160230 }, { "epoch": 1054.2105263157894, "grad_norm": 1.4096457958221436, "learning_rate": 0.0001, "loss": 0.0112, "step": 160240 }, { "epoch": 1054.2763157894738, "grad_norm": 0.886164665222168, "learning_rate": 0.0001, "loss": 0.0117, "step": 160250 }, { "epoch": 1054.342105263158, "grad_norm": 1.744012475013733, "learning_rate": 0.0001, "loss": 0.014, "step": 160260 }, { "epoch": 1054.407894736842, "grad_norm": 1.4068022966384888, "learning_rate": 0.0001, "loss": 0.012, "step": 160270 }, { "epoch": 1054.4736842105262, "grad_norm": 1.3838777542114258, "learning_rate": 0.0001, "loss": 0.0135, "step": 160280 }, { "epoch": 1054.5394736842106, "grad_norm": 1.5521020889282227, "learning_rate": 0.0001, "loss": 0.0109, "step": 160290 }, { "epoch": 1054.6052631578948, "grad_norm": 1.4675061702728271, "learning_rate": 0.0001, "loss": 0.0136, "step": 160300 }, { "epoch": 1054.671052631579, "grad_norm": 1.6701167821884155, "learning_rate": 0.0001, "loss": 0.0124, "step": 160310 }, { "epoch": 1054.7368421052631, "grad_norm": 1.8575412034988403, "learning_rate": 0.0001, "loss": 0.0137, "step": 160320 }, { "epoch": 1054.8026315789473, "grad_norm": 1.5862356424331665, "learning_rate": 0.0001, "loss": 0.0135, "step": 160330 }, { "epoch": 1054.8684210526317, "grad_norm": 1.245897650718689, "learning_rate": 0.0001, "loss": 0.0099, "step": 160340 }, { "epoch": 1054.9342105263158, "grad_norm": 1.4184620380401611, "learning_rate": 0.0001, "loss": 0.0127, "step": 160350 }, { "epoch": 1055.0, "grad_norm": 1.5277787446975708, "learning_rate": 0.0001, "loss": 0.0148, "step": 160360 }, { "epoch": 1055.0657894736842, "grad_norm": 1.516723394393921, "learning_rate": 0.0001, "loss": 0.0101, "step": 160370 }, { "epoch": 1055.1315789473683, "grad_norm": 1.3988479375839233, "learning_rate": 0.0001, "loss": 0.0099, "step": 160380 }, { "epoch": 1055.1973684210527, "grad_norm": 1.3486920595169067, "learning_rate": 0.0001, "loss": 0.0141, "step": 160390 }, { "epoch": 1055.2631578947369, "grad_norm": 1.6566227674484253, "learning_rate": 0.0001, "loss": 0.0129, "step": 160400 }, { "epoch": 1055.328947368421, "grad_norm": 1.2007200717926025, "learning_rate": 0.0001, "loss": 0.009, "step": 160410 }, { "epoch": 1055.3947368421052, "grad_norm": 1.3052823543548584, "learning_rate": 0.0001, "loss": 0.0104, "step": 160420 }, { "epoch": 1055.4605263157894, "grad_norm": 1.3119632005691528, "learning_rate": 0.0001, "loss": 0.0114, "step": 160430 }, { "epoch": 1055.5263157894738, "grad_norm": 1.2278542518615723, "learning_rate": 0.0001, "loss": 0.0141, "step": 160440 }, { "epoch": 1055.592105263158, "grad_norm": 1.4525973796844482, "learning_rate": 0.0001, "loss": 0.0123, "step": 160450 }, { "epoch": 1055.657894736842, "grad_norm": 1.6416499614715576, "learning_rate": 0.0001, "loss": 0.0122, "step": 160460 }, { "epoch": 1055.7236842105262, "grad_norm": 1.5439658164978027, "learning_rate": 0.0001, "loss": 0.0147, "step": 160470 }, { "epoch": 1055.7894736842106, "grad_norm": 1.741185188293457, "learning_rate": 0.0001, "loss": 0.015, "step": 160480 }, { "epoch": 1055.8552631578948, "grad_norm": 1.628616213798523, "learning_rate": 0.0001, "loss": 0.011, "step": 160490 }, { "epoch": 1055.921052631579, "grad_norm": 1.50462007522583, "learning_rate": 0.0001, "loss": 0.0147, "step": 160500 }, { "epoch": 1055.9868421052631, "grad_norm": 1.5637738704681396, "learning_rate": 0.0001, "loss": 0.0142, "step": 160510 }, { "epoch": 1056.0526315789473, "grad_norm": 1.434662103652954, "learning_rate": 0.0001, "loss": 0.0122, "step": 160520 }, { "epoch": 1056.1184210526317, "grad_norm": 1.5718008279800415, "learning_rate": 0.0001, "loss": 0.0091, "step": 160530 }, { "epoch": 1056.1842105263158, "grad_norm": 1.2471888065338135, "learning_rate": 0.0001, "loss": 0.0141, "step": 160540 }, { "epoch": 1056.25, "grad_norm": 1.1273481845855713, "learning_rate": 0.0001, "loss": 0.0129, "step": 160550 }, { "epoch": 1056.3157894736842, "grad_norm": 1.4010478258132935, "learning_rate": 0.0001, "loss": 0.0115, "step": 160560 }, { "epoch": 1056.3815789473683, "grad_norm": 1.5407569408416748, "learning_rate": 0.0001, "loss": 0.0128, "step": 160570 }, { "epoch": 1056.4473684210527, "grad_norm": 1.5937808752059937, "learning_rate": 0.0001, "loss": 0.0116, "step": 160580 }, { "epoch": 1056.5131578947369, "grad_norm": 1.1202754974365234, "learning_rate": 0.0001, "loss": 0.0137, "step": 160590 }, { "epoch": 1056.578947368421, "grad_norm": 1.527391791343689, "learning_rate": 0.0001, "loss": 0.0127, "step": 160600 }, { "epoch": 1056.6447368421052, "grad_norm": 1.5386635065078735, "learning_rate": 0.0001, "loss": 0.014, "step": 160610 }, { "epoch": 1056.7105263157894, "grad_norm": 1.0406460762023926, "learning_rate": 0.0001, "loss": 0.0133, "step": 160620 }, { "epoch": 1056.7763157894738, "grad_norm": 1.5442757606506348, "learning_rate": 0.0001, "loss": 0.0139, "step": 160630 }, { "epoch": 1056.842105263158, "grad_norm": 1.8559290170669556, "learning_rate": 0.0001, "loss": 0.0135, "step": 160640 }, { "epoch": 1056.907894736842, "grad_norm": 1.6003379821777344, "learning_rate": 0.0001, "loss": 0.0086, "step": 160650 }, { "epoch": 1056.9736842105262, "grad_norm": 1.152891993522644, "learning_rate": 0.0001, "loss": 0.0127, "step": 160660 }, { "epoch": 1057.0394736842106, "grad_norm": 1.0844024419784546, "learning_rate": 0.0001, "loss": 0.0108, "step": 160670 }, { "epoch": 1057.1052631578948, "grad_norm": 1.6941516399383545, "learning_rate": 0.0001, "loss": 0.0121, "step": 160680 }, { "epoch": 1057.171052631579, "grad_norm": 1.082102656364441, "learning_rate": 0.0001, "loss": 0.01, "step": 160690 }, { "epoch": 1057.2368421052631, "grad_norm": 1.4877318143844604, "learning_rate": 0.0001, "loss": 0.0139, "step": 160700 }, { "epoch": 1057.3026315789473, "grad_norm": 1.263750433921814, "learning_rate": 0.0001, "loss": 0.0165, "step": 160710 }, { "epoch": 1057.3684210526317, "grad_norm": 1.6741440296173096, "learning_rate": 0.0001, "loss": 0.0117, "step": 160720 }, { "epoch": 1057.4342105263158, "grad_norm": 1.0663251876831055, "learning_rate": 0.0001, "loss": 0.0123, "step": 160730 }, { "epoch": 1057.5, "grad_norm": 2.0402801036834717, "learning_rate": 0.0001, "loss": 0.0104, "step": 160740 }, { "epoch": 1057.5657894736842, "grad_norm": 1.4927095174789429, "learning_rate": 0.0001, "loss": 0.0128, "step": 160750 }, { "epoch": 1057.6315789473683, "grad_norm": 1.4704010486602783, "learning_rate": 0.0001, "loss": 0.0139, "step": 160760 }, { "epoch": 1057.6973684210527, "grad_norm": 1.190820574760437, "learning_rate": 0.0001, "loss": 0.0133, "step": 160770 }, { "epoch": 1057.7631578947369, "grad_norm": 1.3196176290512085, "learning_rate": 0.0001, "loss": 0.0145, "step": 160780 }, { "epoch": 1057.828947368421, "grad_norm": 1.0885636806488037, "learning_rate": 0.0001, "loss": 0.0102, "step": 160790 }, { "epoch": 1057.8947368421052, "grad_norm": 1.236764907836914, "learning_rate": 0.0001, "loss": 0.0095, "step": 160800 }, { "epoch": 1057.9605263157894, "grad_norm": 1.234329104423523, "learning_rate": 0.0001, "loss": 0.0106, "step": 160810 }, { "epoch": 1058.0263157894738, "grad_norm": 1.44073486328125, "learning_rate": 0.0001, "loss": 0.0161, "step": 160820 }, { "epoch": 1058.092105263158, "grad_norm": 1.654761791229248, "learning_rate": 0.0001, "loss": 0.0139, "step": 160830 }, { "epoch": 1058.157894736842, "grad_norm": 1.4949485063552856, "learning_rate": 0.0001, "loss": 0.013, "step": 160840 }, { "epoch": 1058.2236842105262, "grad_norm": 1.336172342300415, "learning_rate": 0.0001, "loss": 0.0098, "step": 160850 }, { "epoch": 1058.2894736842106, "grad_norm": 1.4596340656280518, "learning_rate": 0.0001, "loss": 0.0125, "step": 160860 }, { "epoch": 1058.3552631578948, "grad_norm": 1.3481577634811401, "learning_rate": 0.0001, "loss": 0.0133, "step": 160870 }, { "epoch": 1058.421052631579, "grad_norm": 1.3937913179397583, "learning_rate": 0.0001, "loss": 0.011, "step": 160880 }, { "epoch": 1058.4868421052631, "grad_norm": 1.549499750137329, "learning_rate": 0.0001, "loss": 0.0144, "step": 160890 }, { "epoch": 1058.5526315789473, "grad_norm": 1.7696127891540527, "learning_rate": 0.0001, "loss": 0.0121, "step": 160900 }, { "epoch": 1058.6184210526317, "grad_norm": 1.485115647315979, "learning_rate": 0.0001, "loss": 0.0129, "step": 160910 }, { "epoch": 1058.6842105263158, "grad_norm": 1.844874620437622, "learning_rate": 0.0001, "loss": 0.0101, "step": 160920 }, { "epoch": 1058.75, "grad_norm": 1.3366740942001343, "learning_rate": 0.0001, "loss": 0.0127, "step": 160930 }, { "epoch": 1058.8157894736842, "grad_norm": 1.5893878936767578, "learning_rate": 0.0001, "loss": 0.0098, "step": 160940 }, { "epoch": 1058.8815789473683, "grad_norm": 1.4551315307617188, "learning_rate": 0.0001, "loss": 0.0145, "step": 160950 }, { "epoch": 1058.9473684210527, "grad_norm": 1.762495994567871, "learning_rate": 0.0001, "loss": 0.0116, "step": 160960 }, { "epoch": 1059.0131578947369, "grad_norm": 1.5721734762191772, "learning_rate": 0.0001, "loss": 0.0109, "step": 160970 }, { "epoch": 1059.078947368421, "grad_norm": 1.346191644668579, "learning_rate": 0.0001, "loss": 0.0116, "step": 160980 }, { "epoch": 1059.1447368421052, "grad_norm": 1.1540589332580566, "learning_rate": 0.0001, "loss": 0.0089, "step": 160990 }, { "epoch": 1059.2105263157894, "grad_norm": 1.6196191310882568, "learning_rate": 0.0001, "loss": 0.0141, "step": 161000 }, { "epoch": 1059.2763157894738, "grad_norm": 1.4569833278656006, "learning_rate": 0.0001, "loss": 0.0105, "step": 161010 }, { "epoch": 1059.342105263158, "grad_norm": 1.6615846157073975, "learning_rate": 0.0001, "loss": 0.0112, "step": 161020 }, { "epoch": 1059.407894736842, "grad_norm": 1.1541098356246948, "learning_rate": 0.0001, "loss": 0.0236, "step": 161030 }, { "epoch": 1059.4736842105262, "grad_norm": 1.2335140705108643, "learning_rate": 0.0001, "loss": 0.0113, "step": 161040 }, { "epoch": 1059.5394736842106, "grad_norm": 1.4359241724014282, "learning_rate": 0.0001, "loss": 0.0131, "step": 161050 }, { "epoch": 1059.6052631578948, "grad_norm": 1.7024576663970947, "learning_rate": 0.0001, "loss": 0.0103, "step": 161060 }, { "epoch": 1059.671052631579, "grad_norm": 1.374062180519104, "learning_rate": 0.0001, "loss": 0.0098, "step": 161070 }, { "epoch": 1059.7368421052631, "grad_norm": 1.2119286060333252, "learning_rate": 0.0001, "loss": 0.0116, "step": 161080 }, { "epoch": 1059.8026315789473, "grad_norm": 0.7505359053611755, "learning_rate": 0.0001, "loss": 0.0102, "step": 161090 }, { "epoch": 1059.8684210526317, "grad_norm": 1.058053970336914, "learning_rate": 0.0001, "loss": 0.0125, "step": 161100 }, { "epoch": 1059.9342105263158, "grad_norm": 1.1006895303726196, "learning_rate": 0.0001, "loss": 0.0135, "step": 161110 }, { "epoch": 1060.0, "grad_norm": 1.5168373584747314, "learning_rate": 0.0001, "loss": 0.0125, "step": 161120 }, { "epoch": 1060.0657894736842, "grad_norm": 1.2642347812652588, "learning_rate": 0.0001, "loss": 0.0142, "step": 161130 }, { "epoch": 1060.1315789473683, "grad_norm": 1.2011600732803345, "learning_rate": 0.0001, "loss": 0.0109, "step": 161140 }, { "epoch": 1060.1973684210527, "grad_norm": 1.587056279182434, "learning_rate": 0.0001, "loss": 0.0117, "step": 161150 }, { "epoch": 1060.2631578947369, "grad_norm": 1.4672787189483643, "learning_rate": 0.0001, "loss": 0.0118, "step": 161160 }, { "epoch": 1060.328947368421, "grad_norm": 0.9551035761833191, "learning_rate": 0.0001, "loss": 0.0121, "step": 161170 }, { "epoch": 1060.3947368421052, "grad_norm": 1.5429145097732544, "learning_rate": 0.0001, "loss": 0.0135, "step": 161180 }, { "epoch": 1060.4605263157894, "grad_norm": 1.0742647647857666, "learning_rate": 0.0001, "loss": 0.0092, "step": 161190 }, { "epoch": 1060.5263157894738, "grad_norm": 1.2162845134735107, "learning_rate": 0.0001, "loss": 0.0141, "step": 161200 }, { "epoch": 1060.592105263158, "grad_norm": 1.6566680669784546, "learning_rate": 0.0001, "loss": 0.0127, "step": 161210 }, { "epoch": 1060.657894736842, "grad_norm": 1.9096992015838623, "learning_rate": 0.0001, "loss": 0.0123, "step": 161220 }, { "epoch": 1060.7236842105262, "grad_norm": 1.4127943515777588, "learning_rate": 0.0001, "loss": 0.013, "step": 161230 }, { "epoch": 1060.7894736842106, "grad_norm": 1.3619556427001953, "learning_rate": 0.0001, "loss": 0.0114, "step": 161240 }, { "epoch": 1060.8552631578948, "grad_norm": 1.5038174390792847, "learning_rate": 0.0001, "loss": 0.01, "step": 161250 }, { "epoch": 1060.921052631579, "grad_norm": 1.5671097040176392, "learning_rate": 0.0001, "loss": 0.0152, "step": 161260 }, { "epoch": 1060.9868421052631, "grad_norm": 1.603278398513794, "learning_rate": 0.0001, "loss": 0.0133, "step": 161270 }, { "epoch": 1061.0526315789473, "grad_norm": 1.7856098413467407, "learning_rate": 0.0001, "loss": 0.0139, "step": 161280 }, { "epoch": 1061.1184210526317, "grad_norm": 1.4903985261917114, "learning_rate": 0.0001, "loss": 0.0153, "step": 161290 }, { "epoch": 1061.1842105263158, "grad_norm": 1.6055147647857666, "learning_rate": 0.0001, "loss": 0.0097, "step": 161300 }, { "epoch": 1061.25, "grad_norm": 1.5112183094024658, "learning_rate": 0.0001, "loss": 0.0145, "step": 161310 }, { "epoch": 1061.3157894736842, "grad_norm": 1.05936598777771, "learning_rate": 0.0001, "loss": 0.014, "step": 161320 }, { "epoch": 1061.3815789473683, "grad_norm": 1.662822961807251, "learning_rate": 0.0001, "loss": 0.0116, "step": 161330 }, { "epoch": 1061.4473684210527, "grad_norm": 1.438206434249878, "learning_rate": 0.0001, "loss": 0.0105, "step": 161340 }, { "epoch": 1061.5131578947369, "grad_norm": 1.3106322288513184, "learning_rate": 0.0001, "loss": 0.0107, "step": 161350 }, { "epoch": 1061.578947368421, "grad_norm": 1.474400520324707, "learning_rate": 0.0001, "loss": 0.0107, "step": 161360 }, { "epoch": 1061.6447368421052, "grad_norm": 1.7567291259765625, "learning_rate": 0.0001, "loss": 0.0107, "step": 161370 }, { "epoch": 1061.7105263157894, "grad_norm": 1.6144593954086304, "learning_rate": 0.0001, "loss": 0.0122, "step": 161380 }, { "epoch": 1061.7763157894738, "grad_norm": 1.5332543849945068, "learning_rate": 0.0001, "loss": 0.0127, "step": 161390 }, { "epoch": 1061.842105263158, "grad_norm": 1.062935709953308, "learning_rate": 0.0001, "loss": 0.0097, "step": 161400 }, { "epoch": 1061.907894736842, "grad_norm": 1.6957693099975586, "learning_rate": 0.0001, "loss": 0.0162, "step": 161410 }, { "epoch": 1061.9736842105262, "grad_norm": 1.8408470153808594, "learning_rate": 0.0001, "loss": 0.011, "step": 161420 }, { "epoch": 1062.0394736842106, "grad_norm": 1.6729111671447754, "learning_rate": 0.0001, "loss": 0.0115, "step": 161430 }, { "epoch": 1062.1052631578948, "grad_norm": 1.6977276802062988, "learning_rate": 0.0001, "loss": 0.0125, "step": 161440 }, { "epoch": 1062.171052631579, "grad_norm": 1.600317120552063, "learning_rate": 0.0001, "loss": 0.0131, "step": 161450 }, { "epoch": 1062.2368421052631, "grad_norm": 1.3275880813598633, "learning_rate": 0.0001, "loss": 0.013, "step": 161460 }, { "epoch": 1062.3026315789473, "grad_norm": 1.0776962041854858, "learning_rate": 0.0001, "loss": 0.0171, "step": 161470 }, { "epoch": 1062.3684210526317, "grad_norm": 1.1092742681503296, "learning_rate": 0.0001, "loss": 0.0125, "step": 161480 }, { "epoch": 1062.4342105263158, "grad_norm": 1.384753704071045, "learning_rate": 0.0001, "loss": 0.0087, "step": 161490 }, { "epoch": 1062.5, "grad_norm": 1.3685612678527832, "learning_rate": 0.0001, "loss": 0.0113, "step": 161500 }, { "epoch": 1062.5657894736842, "grad_norm": 1.3193219900131226, "learning_rate": 0.0001, "loss": 0.0147, "step": 161510 }, { "epoch": 1062.6315789473683, "grad_norm": 1.0703301429748535, "learning_rate": 0.0001, "loss": 0.0103, "step": 161520 }, { "epoch": 1062.6973684210527, "grad_norm": 1.1646745204925537, "learning_rate": 0.0001, "loss": 0.0136, "step": 161530 }, { "epoch": 1062.7631578947369, "grad_norm": 1.428592324256897, "learning_rate": 0.0001, "loss": 0.012, "step": 161540 }, { "epoch": 1062.828947368421, "grad_norm": 1.307466745376587, "learning_rate": 0.0001, "loss": 0.0116, "step": 161550 }, { "epoch": 1062.8947368421052, "grad_norm": 1.0814881324768066, "learning_rate": 0.0001, "loss": 0.0135, "step": 161560 }, { "epoch": 1062.9605263157894, "grad_norm": 1.0240415334701538, "learning_rate": 0.0001, "loss": 0.011, "step": 161570 }, { "epoch": 1063.0263157894738, "grad_norm": 1.7580387592315674, "learning_rate": 0.0001, "loss": 0.0111, "step": 161580 }, { "epoch": 1063.092105263158, "grad_norm": 1.7004621028900146, "learning_rate": 0.0001, "loss": 0.0139, "step": 161590 }, { "epoch": 1063.157894736842, "grad_norm": 1.830851435661316, "learning_rate": 0.0001, "loss": 0.0122, "step": 161600 }, { "epoch": 1063.2236842105262, "grad_norm": 1.7205827236175537, "learning_rate": 0.0001, "loss": 0.0134, "step": 161610 }, { "epoch": 1063.2894736842106, "grad_norm": 1.568482756614685, "learning_rate": 0.0001, "loss": 0.0119, "step": 161620 }, { "epoch": 1063.3552631578948, "grad_norm": 1.1884335279464722, "learning_rate": 0.0001, "loss": 0.0134, "step": 161630 }, { "epoch": 1063.421052631579, "grad_norm": 1.3221039772033691, "learning_rate": 0.0001, "loss": 0.0125, "step": 161640 }, { "epoch": 1063.4868421052631, "grad_norm": 1.4599745273590088, "learning_rate": 0.0001, "loss": 0.0103, "step": 161650 }, { "epoch": 1063.5526315789473, "grad_norm": 1.3359135389328003, "learning_rate": 0.0001, "loss": 0.0138, "step": 161660 }, { "epoch": 1063.6184210526317, "grad_norm": 1.157245397567749, "learning_rate": 0.0001, "loss": 0.0135, "step": 161670 }, { "epoch": 1063.6842105263158, "grad_norm": 1.5303438901901245, "learning_rate": 0.0001, "loss": 0.0123, "step": 161680 }, { "epoch": 1063.75, "grad_norm": 1.330174446105957, "learning_rate": 0.0001, "loss": 0.0128, "step": 161690 }, { "epoch": 1063.8157894736842, "grad_norm": 1.6845674514770508, "learning_rate": 0.0001, "loss": 0.0108, "step": 161700 }, { "epoch": 1063.8815789473683, "grad_norm": 1.5322730541229248, "learning_rate": 0.0001, "loss": 0.0151, "step": 161710 }, { "epoch": 1063.9473684210527, "grad_norm": 1.4840977191925049, "learning_rate": 0.0001, "loss": 0.0116, "step": 161720 }, { "epoch": 1064.0131578947369, "grad_norm": 1.4347169399261475, "learning_rate": 0.0001, "loss": 0.012, "step": 161730 }, { "epoch": 1064.078947368421, "grad_norm": 1.5822269916534424, "learning_rate": 0.0001, "loss": 0.0127, "step": 161740 }, { "epoch": 1064.1447368421052, "grad_norm": 1.4758433103561401, "learning_rate": 0.0001, "loss": 0.0087, "step": 161750 }, { "epoch": 1064.2105263157894, "grad_norm": 1.219545602798462, "learning_rate": 0.0001, "loss": 0.016, "step": 161760 }, { "epoch": 1064.2763157894738, "grad_norm": 1.6016795635223389, "learning_rate": 0.0001, "loss": 0.0107, "step": 161770 }, { "epoch": 1064.342105263158, "grad_norm": 1.3839439153671265, "learning_rate": 0.0001, "loss": 0.0146, "step": 161780 }, { "epoch": 1064.407894736842, "grad_norm": 1.4383037090301514, "learning_rate": 0.0001, "loss": 0.0105, "step": 161790 }, { "epoch": 1064.4736842105262, "grad_norm": 1.3182348012924194, "learning_rate": 0.0001, "loss": 0.0126, "step": 161800 }, { "epoch": 1064.5394736842106, "grad_norm": 1.871334195137024, "learning_rate": 0.0001, "loss": 0.0165, "step": 161810 }, { "epoch": 1064.6052631578948, "grad_norm": 1.5968903303146362, "learning_rate": 0.0001, "loss": 0.0095, "step": 161820 }, { "epoch": 1064.671052631579, "grad_norm": 1.164229154586792, "learning_rate": 0.0001, "loss": 0.012, "step": 161830 }, { "epoch": 1064.7368421052631, "grad_norm": 1.3378428220748901, "learning_rate": 0.0001, "loss": 0.0117, "step": 161840 }, { "epoch": 1064.8026315789473, "grad_norm": 1.1138004064559937, "learning_rate": 0.0001, "loss": 0.0101, "step": 161850 }, { "epoch": 1064.8684210526317, "grad_norm": 1.6792356967926025, "learning_rate": 0.0001, "loss": 0.0142, "step": 161860 }, { "epoch": 1064.9342105263158, "grad_norm": 2.0092391967773438, "learning_rate": 0.0001, "loss": 0.0126, "step": 161870 }, { "epoch": 1065.0, "grad_norm": 1.3511275053024292, "learning_rate": 0.0001, "loss": 0.0118, "step": 161880 }, { "epoch": 1065.0657894736842, "grad_norm": 1.8538490533828735, "learning_rate": 0.0001, "loss": 0.0159, "step": 161890 }, { "epoch": 1065.1315789473683, "grad_norm": 1.912077784538269, "learning_rate": 0.0001, "loss": 0.0123, "step": 161900 }, { "epoch": 1065.1973684210527, "grad_norm": 1.5841495990753174, "learning_rate": 0.0001, "loss": 0.0104, "step": 161910 }, { "epoch": 1065.2631578947369, "grad_norm": 1.8093572854995728, "learning_rate": 0.0001, "loss": 0.0146, "step": 161920 }, { "epoch": 1065.328947368421, "grad_norm": 1.4541956186294556, "learning_rate": 0.0001, "loss": 0.0124, "step": 161930 }, { "epoch": 1065.3947368421052, "grad_norm": 1.4342955350875854, "learning_rate": 0.0001, "loss": 0.0125, "step": 161940 }, { "epoch": 1065.4605263157894, "grad_norm": 1.3292162418365479, "learning_rate": 0.0001, "loss": 0.0103, "step": 161950 }, { "epoch": 1065.5263157894738, "grad_norm": 1.5645962953567505, "learning_rate": 0.0001, "loss": 0.0153, "step": 161960 }, { "epoch": 1065.592105263158, "grad_norm": 1.5531268119812012, "learning_rate": 0.0001, "loss": 0.0119, "step": 161970 }, { "epoch": 1065.657894736842, "grad_norm": 1.1033347845077515, "learning_rate": 0.0001, "loss": 0.009, "step": 161980 }, { "epoch": 1065.7236842105262, "grad_norm": 1.7340312004089355, "learning_rate": 0.0001, "loss": 0.0122, "step": 161990 }, { "epoch": 1065.7894736842106, "grad_norm": 1.4827330112457275, "learning_rate": 0.0001, "loss": 0.0124, "step": 162000 }, { "epoch": 1065.8552631578948, "grad_norm": 1.6523792743682861, "learning_rate": 0.0001, "loss": 0.011, "step": 162010 }, { "epoch": 1065.921052631579, "grad_norm": 1.7534973621368408, "learning_rate": 0.0001, "loss": 0.0117, "step": 162020 }, { "epoch": 1065.9868421052631, "grad_norm": 1.8149296045303345, "learning_rate": 0.0001, "loss": 0.0104, "step": 162030 }, { "epoch": 1066.0526315789473, "grad_norm": 1.8257046937942505, "learning_rate": 0.0001, "loss": 0.0104, "step": 162040 }, { "epoch": 1066.1184210526317, "grad_norm": 0.9733861088752747, "learning_rate": 0.0001, "loss": 0.0111, "step": 162050 }, { "epoch": 1066.1842105263158, "grad_norm": 1.3563339710235596, "learning_rate": 0.0001, "loss": 0.0123, "step": 162060 }, { "epoch": 1066.25, "grad_norm": 1.7763030529022217, "learning_rate": 0.0001, "loss": 0.0139, "step": 162070 }, { "epoch": 1066.3157894736842, "grad_norm": 1.2144804000854492, "learning_rate": 0.0001, "loss": 0.0155, "step": 162080 }, { "epoch": 1066.3815789473683, "grad_norm": 1.3875300884246826, "learning_rate": 0.0001, "loss": 0.0143, "step": 162090 }, { "epoch": 1066.4473684210527, "grad_norm": 1.5381523370742798, "learning_rate": 0.0001, "loss": 0.0113, "step": 162100 }, { "epoch": 1066.5131578947369, "grad_norm": 1.6119158267974854, "learning_rate": 0.0001, "loss": 0.0104, "step": 162110 }, { "epoch": 1066.578947368421, "grad_norm": 1.6898705959320068, "learning_rate": 0.0001, "loss": 0.0103, "step": 162120 }, { "epoch": 1066.6447368421052, "grad_norm": 1.313508152961731, "learning_rate": 0.0001, "loss": 0.0125, "step": 162130 }, { "epoch": 1066.7105263157894, "grad_norm": 1.3110461235046387, "learning_rate": 0.0001, "loss": 0.009, "step": 162140 }, { "epoch": 1066.7763157894738, "grad_norm": 1.0794556140899658, "learning_rate": 0.0001, "loss": 0.0121, "step": 162150 }, { "epoch": 1066.842105263158, "grad_norm": 1.5851826667785645, "learning_rate": 0.0001, "loss": 0.013, "step": 162160 }, { "epoch": 1066.907894736842, "grad_norm": 1.4900065660476685, "learning_rate": 0.0001, "loss": 0.0131, "step": 162170 }, { "epoch": 1066.9736842105262, "grad_norm": 1.3451790809631348, "learning_rate": 0.0001, "loss": 0.013, "step": 162180 }, { "epoch": 1067.0394736842106, "grad_norm": 1.2189077138900757, "learning_rate": 0.0001, "loss": 0.013, "step": 162190 }, { "epoch": 1067.1052631578948, "grad_norm": 1.2872014045715332, "learning_rate": 0.0001, "loss": 0.0109, "step": 162200 }, { "epoch": 1067.171052631579, "grad_norm": 1.4349291324615479, "learning_rate": 0.0001, "loss": 0.0118, "step": 162210 }, { "epoch": 1067.2368421052631, "grad_norm": 1.3393529653549194, "learning_rate": 0.0001, "loss": 0.011, "step": 162220 }, { "epoch": 1067.3026315789473, "grad_norm": 1.103103518486023, "learning_rate": 0.0001, "loss": 0.0105, "step": 162230 }, { "epoch": 1067.3684210526317, "grad_norm": 1.623225450515747, "learning_rate": 0.0001, "loss": 0.0125, "step": 162240 }, { "epoch": 1067.4342105263158, "grad_norm": 1.389825701713562, "learning_rate": 0.0001, "loss": 0.0104, "step": 162250 }, { "epoch": 1067.5, "grad_norm": 1.5744502544403076, "learning_rate": 0.0001, "loss": 0.0137, "step": 162260 }, { "epoch": 1067.5657894736842, "grad_norm": 1.45872962474823, "learning_rate": 0.0001, "loss": 0.0123, "step": 162270 }, { "epoch": 1067.6315789473683, "grad_norm": 1.8123719692230225, "learning_rate": 0.0001, "loss": 0.0146, "step": 162280 }, { "epoch": 1067.6973684210527, "grad_norm": 1.8922522068023682, "learning_rate": 0.0001, "loss": 0.0111, "step": 162290 }, { "epoch": 1067.7631578947369, "grad_norm": 1.8291364908218384, "learning_rate": 0.0001, "loss": 0.0115, "step": 162300 }, { "epoch": 1067.828947368421, "grad_norm": 1.8422110080718994, "learning_rate": 0.0001, "loss": 0.015, "step": 162310 }, { "epoch": 1067.8947368421052, "grad_norm": 1.8187830448150635, "learning_rate": 0.0001, "loss": 0.013, "step": 162320 }, { "epoch": 1067.9605263157894, "grad_norm": 1.257315993309021, "learning_rate": 0.0001, "loss": 0.0136, "step": 162330 }, { "epoch": 1068.0263157894738, "grad_norm": 1.4478521347045898, "learning_rate": 0.0001, "loss": 0.0111, "step": 162340 }, { "epoch": 1068.092105263158, "grad_norm": 1.5165292024612427, "learning_rate": 0.0001, "loss": 0.0167, "step": 162350 }, { "epoch": 1068.157894736842, "grad_norm": 1.8047367334365845, "learning_rate": 0.0001, "loss": 0.0112, "step": 162360 }, { "epoch": 1068.2236842105262, "grad_norm": 1.468492865562439, "learning_rate": 0.0001, "loss": 0.011, "step": 162370 }, { "epoch": 1068.2894736842106, "grad_norm": 1.8699233531951904, "learning_rate": 0.0001, "loss": 0.012, "step": 162380 }, { "epoch": 1068.3552631578948, "grad_norm": 1.7651070356369019, "learning_rate": 0.0001, "loss": 0.0127, "step": 162390 }, { "epoch": 1068.421052631579, "grad_norm": 1.8358877897262573, "learning_rate": 0.0001, "loss": 0.0133, "step": 162400 }, { "epoch": 1068.4868421052631, "grad_norm": 1.4638806581497192, "learning_rate": 0.0001, "loss": 0.0145, "step": 162410 }, { "epoch": 1068.5526315789473, "grad_norm": 1.485566258430481, "learning_rate": 0.0001, "loss": 0.012, "step": 162420 }, { "epoch": 1068.6184210526317, "grad_norm": 1.0476014614105225, "learning_rate": 0.0001, "loss": 0.0131, "step": 162430 }, { "epoch": 1068.6842105263158, "grad_norm": 1.3670213222503662, "learning_rate": 0.0001, "loss": 0.013, "step": 162440 }, { "epoch": 1068.75, "grad_norm": 1.3932701349258423, "learning_rate": 0.0001, "loss": 0.0122, "step": 162450 }, { "epoch": 1068.8157894736842, "grad_norm": 1.5732263326644897, "learning_rate": 0.0001, "loss": 0.0115, "step": 162460 }, { "epoch": 1068.8815789473683, "grad_norm": 1.5280956029891968, "learning_rate": 0.0001, "loss": 0.0106, "step": 162470 }, { "epoch": 1068.9473684210527, "grad_norm": 1.3281357288360596, "learning_rate": 0.0001, "loss": 0.0088, "step": 162480 }, { "epoch": 1069.0131578947369, "grad_norm": 1.7512460947036743, "learning_rate": 0.0001, "loss": 0.0145, "step": 162490 }, { "epoch": 1069.078947368421, "grad_norm": 1.7881953716278076, "learning_rate": 0.0001, "loss": 0.0132, "step": 162500 }, { "epoch": 1069.1447368421052, "grad_norm": 1.5743775367736816, "learning_rate": 0.0001, "loss": 0.0103, "step": 162510 }, { "epoch": 1069.2105263157894, "grad_norm": 1.4240962266921997, "learning_rate": 0.0001, "loss": 0.0139, "step": 162520 }, { "epoch": 1069.2763157894738, "grad_norm": 1.7160513401031494, "learning_rate": 0.0001, "loss": 0.0167, "step": 162530 }, { "epoch": 1069.342105263158, "grad_norm": 1.6025669574737549, "learning_rate": 0.0001, "loss": 0.0097, "step": 162540 }, { "epoch": 1069.407894736842, "grad_norm": 1.3773527145385742, "learning_rate": 0.0001, "loss": 0.0129, "step": 162550 }, { "epoch": 1069.4736842105262, "grad_norm": 1.4455845355987549, "learning_rate": 0.0001, "loss": 0.0136, "step": 162560 }, { "epoch": 1069.5394736842106, "grad_norm": 1.2951381206512451, "learning_rate": 0.0001, "loss": 0.0108, "step": 162570 }, { "epoch": 1069.6052631578948, "grad_norm": 1.517127513885498, "learning_rate": 0.0001, "loss": 0.0108, "step": 162580 }, { "epoch": 1069.671052631579, "grad_norm": 1.678545355796814, "learning_rate": 0.0001, "loss": 0.0121, "step": 162590 }, { "epoch": 1069.7368421052631, "grad_norm": 1.5365478992462158, "learning_rate": 0.0001, "loss": 0.0098, "step": 162600 }, { "epoch": 1069.8026315789473, "grad_norm": 1.3553171157836914, "learning_rate": 0.0001, "loss": 0.012, "step": 162610 }, { "epoch": 1069.8684210526317, "grad_norm": 1.8265208005905151, "learning_rate": 0.0001, "loss": 0.0114, "step": 162620 }, { "epoch": 1069.9342105263158, "grad_norm": 1.3780943155288696, "learning_rate": 0.0001, "loss": 0.0115, "step": 162630 }, { "epoch": 1070.0, "grad_norm": 1.2502001523971558, "learning_rate": 0.0001, "loss": 0.0133, "step": 162640 }, { "epoch": 1070.0657894736842, "grad_norm": 1.420009732246399, "learning_rate": 0.0001, "loss": 0.0124, "step": 162650 }, { "epoch": 1070.1315789473683, "grad_norm": 1.248957633972168, "learning_rate": 0.0001, "loss": 0.01, "step": 162660 }, { "epoch": 1070.1973684210527, "grad_norm": 1.4800387620925903, "learning_rate": 0.0001, "loss": 0.0108, "step": 162670 }, { "epoch": 1070.2631578947369, "grad_norm": 1.2427479028701782, "learning_rate": 0.0001, "loss": 0.0117, "step": 162680 }, { "epoch": 1070.328947368421, "grad_norm": 1.4010977745056152, "learning_rate": 0.0001, "loss": 0.0141, "step": 162690 }, { "epoch": 1070.3947368421052, "grad_norm": 1.6397697925567627, "learning_rate": 0.0001, "loss": 0.0149, "step": 162700 }, { "epoch": 1070.4605263157894, "grad_norm": 1.1707018613815308, "learning_rate": 0.0001, "loss": 0.0092, "step": 162710 }, { "epoch": 1070.5263157894738, "grad_norm": 1.283128023147583, "learning_rate": 0.0001, "loss": 0.0128, "step": 162720 }, { "epoch": 1070.592105263158, "grad_norm": 1.268645167350769, "learning_rate": 0.0001, "loss": 0.0099, "step": 162730 }, { "epoch": 1070.657894736842, "grad_norm": 1.0086090564727783, "learning_rate": 0.0001, "loss": 0.0098, "step": 162740 }, { "epoch": 1070.7236842105262, "grad_norm": 1.3251621723175049, "learning_rate": 0.0001, "loss": 0.0166, "step": 162750 }, { "epoch": 1070.7894736842106, "grad_norm": 1.0547462701797485, "learning_rate": 0.0001, "loss": 0.0098, "step": 162760 }, { "epoch": 1070.8552631578948, "grad_norm": 1.5680545568466187, "learning_rate": 0.0001, "loss": 0.0117, "step": 162770 }, { "epoch": 1070.921052631579, "grad_norm": 1.1608572006225586, "learning_rate": 0.0001, "loss": 0.0185, "step": 162780 }, { "epoch": 1070.9868421052631, "grad_norm": 1.118967056274414, "learning_rate": 0.0001, "loss": 0.0132, "step": 162790 }, { "epoch": 1071.0526315789473, "grad_norm": 1.1909410953521729, "learning_rate": 0.0001, "loss": 0.0112, "step": 162800 }, { "epoch": 1071.1184210526317, "grad_norm": 1.5186103582382202, "learning_rate": 0.0001, "loss": 0.0135, "step": 162810 }, { "epoch": 1071.1842105263158, "grad_norm": 1.1622649431228638, "learning_rate": 0.0001, "loss": 0.0105, "step": 162820 }, { "epoch": 1071.25, "grad_norm": 1.2634302377700806, "learning_rate": 0.0001, "loss": 0.012, "step": 162830 }, { "epoch": 1071.3157894736842, "grad_norm": 1.4343174695968628, "learning_rate": 0.0001, "loss": 0.0134, "step": 162840 }, { "epoch": 1071.3815789473683, "grad_norm": 1.4336086511611938, "learning_rate": 0.0001, "loss": 0.0122, "step": 162850 }, { "epoch": 1071.4473684210527, "grad_norm": 1.4302364587783813, "learning_rate": 0.0001, "loss": 0.0148, "step": 162860 }, { "epoch": 1071.5131578947369, "grad_norm": 1.1784566640853882, "learning_rate": 0.0001, "loss": 0.0145, "step": 162870 }, { "epoch": 1071.578947368421, "grad_norm": 1.6038438081741333, "learning_rate": 0.0001, "loss": 0.0139, "step": 162880 }, { "epoch": 1071.6447368421052, "grad_norm": 1.1297212839126587, "learning_rate": 0.0001, "loss": 0.0159, "step": 162890 }, { "epoch": 1071.7105263157894, "grad_norm": 1.1182271242141724, "learning_rate": 0.0001, "loss": 0.0126, "step": 162900 }, { "epoch": 1071.7763157894738, "grad_norm": 1.1178569793701172, "learning_rate": 0.0001, "loss": 0.0136, "step": 162910 }, { "epoch": 1071.842105263158, "grad_norm": 1.4341624975204468, "learning_rate": 0.0001, "loss": 0.0089, "step": 162920 }, { "epoch": 1071.907894736842, "grad_norm": 1.897962212562561, "learning_rate": 0.0001, "loss": 0.0133, "step": 162930 }, { "epoch": 1071.9736842105262, "grad_norm": 1.144287347793579, "learning_rate": 0.0001, "loss": 0.0127, "step": 162940 }, { "epoch": 1072.0394736842106, "grad_norm": 1.6453337669372559, "learning_rate": 0.0001, "loss": 0.0105, "step": 162950 }, { "epoch": 1072.1052631578948, "grad_norm": 1.4387258291244507, "learning_rate": 0.0001, "loss": 0.0122, "step": 162960 }, { "epoch": 1072.171052631579, "grad_norm": 1.5387721061706543, "learning_rate": 0.0001, "loss": 0.0111, "step": 162970 }, { "epoch": 1072.2368421052631, "grad_norm": 1.1898126602172852, "learning_rate": 0.0001, "loss": 0.0105, "step": 162980 }, { "epoch": 1072.3026315789473, "grad_norm": 0.8838199377059937, "learning_rate": 0.0001, "loss": 0.0124, "step": 162990 }, { "epoch": 1072.3684210526317, "grad_norm": 1.5964339971542358, "learning_rate": 0.0001, "loss": 0.0134, "step": 163000 }, { "epoch": 1072.4342105263158, "grad_norm": 1.3046627044677734, "learning_rate": 0.0001, "loss": 0.012, "step": 163010 }, { "epoch": 1072.5, "grad_norm": 1.2337905168533325, "learning_rate": 0.0001, "loss": 0.0125, "step": 163020 }, { "epoch": 1072.5657894736842, "grad_norm": 1.1616069078445435, "learning_rate": 0.0001, "loss": 0.0126, "step": 163030 }, { "epoch": 1072.6315789473683, "grad_norm": 1.1819647550582886, "learning_rate": 0.0001, "loss": 0.0138, "step": 163040 }, { "epoch": 1072.6973684210527, "grad_norm": 1.419175386428833, "learning_rate": 0.0001, "loss": 0.016, "step": 163050 }, { "epoch": 1072.7631578947369, "grad_norm": 1.2347959280014038, "learning_rate": 0.0001, "loss": 0.0126, "step": 163060 }, { "epoch": 1072.828947368421, "grad_norm": 1.499179482460022, "learning_rate": 0.0001, "loss": 0.0094, "step": 163070 }, { "epoch": 1072.8947368421052, "grad_norm": 1.5363789796829224, "learning_rate": 0.0001, "loss": 0.0126, "step": 163080 }, { "epoch": 1072.9605263157894, "grad_norm": 1.2667474746704102, "learning_rate": 0.0001, "loss": 0.0138, "step": 163090 }, { "epoch": 1073.0263157894738, "grad_norm": 1.5002894401550293, "learning_rate": 0.0001, "loss": 0.0113, "step": 163100 }, { "epoch": 1073.092105263158, "grad_norm": 1.478769302368164, "learning_rate": 0.0001, "loss": 0.0103, "step": 163110 }, { "epoch": 1073.157894736842, "grad_norm": 1.4134228229522705, "learning_rate": 0.0001, "loss": 0.0114, "step": 163120 }, { "epoch": 1073.2236842105262, "grad_norm": 1.402923583984375, "learning_rate": 0.0001, "loss": 0.0102, "step": 163130 }, { "epoch": 1073.2894736842106, "grad_norm": 1.6175434589385986, "learning_rate": 0.0001, "loss": 0.0176, "step": 163140 }, { "epoch": 1073.3552631578948, "grad_norm": 1.3090664148330688, "learning_rate": 0.0001, "loss": 0.0106, "step": 163150 }, { "epoch": 1073.421052631579, "grad_norm": 1.4237815141677856, "learning_rate": 0.0001, "loss": 0.0121, "step": 163160 }, { "epoch": 1073.4868421052631, "grad_norm": 1.2653119564056396, "learning_rate": 0.0001, "loss": 0.0095, "step": 163170 }, { "epoch": 1073.5526315789473, "grad_norm": 1.2795823812484741, "learning_rate": 0.0001, "loss": 0.0138, "step": 163180 }, { "epoch": 1073.6184210526317, "grad_norm": 1.7209384441375732, "learning_rate": 0.0001, "loss": 0.0106, "step": 163190 }, { "epoch": 1073.6842105263158, "grad_norm": 1.5749536752700806, "learning_rate": 0.0001, "loss": 0.0133, "step": 163200 }, { "epoch": 1073.75, "grad_norm": 1.840319037437439, "learning_rate": 0.0001, "loss": 0.0166, "step": 163210 }, { "epoch": 1073.8157894736842, "grad_norm": 1.4070910215377808, "learning_rate": 0.0001, "loss": 0.0148, "step": 163220 }, { "epoch": 1073.8815789473683, "grad_norm": 1.4190356731414795, "learning_rate": 0.0001, "loss": 0.0119, "step": 163230 }, { "epoch": 1073.9473684210527, "grad_norm": 1.4047889709472656, "learning_rate": 0.0001, "loss": 0.0111, "step": 163240 }, { "epoch": 1074.0131578947369, "grad_norm": 1.4801007509231567, "learning_rate": 0.0001, "loss": 0.0118, "step": 163250 }, { "epoch": 1074.078947368421, "grad_norm": 1.1926954984664917, "learning_rate": 0.0001, "loss": 0.0175, "step": 163260 }, { "epoch": 1074.1447368421052, "grad_norm": 1.188342809677124, "learning_rate": 0.0001, "loss": 0.0107, "step": 163270 }, { "epoch": 1074.2105263157894, "grad_norm": 1.1768733263015747, "learning_rate": 0.0001, "loss": 0.0119, "step": 163280 }, { "epoch": 1074.2763157894738, "grad_norm": 1.1675142049789429, "learning_rate": 0.0001, "loss": 0.0113, "step": 163290 }, { "epoch": 1074.342105263158, "grad_norm": 1.275773525238037, "learning_rate": 0.0001, "loss": 0.0119, "step": 163300 }, { "epoch": 1074.407894736842, "grad_norm": 0.9738022089004517, "learning_rate": 0.0001, "loss": 0.0114, "step": 163310 }, { "epoch": 1074.4736842105262, "grad_norm": 1.8680578470230103, "learning_rate": 0.0001, "loss": 0.0133, "step": 163320 }, { "epoch": 1074.5394736842106, "grad_norm": 1.4932405948638916, "learning_rate": 0.0001, "loss": 0.0143, "step": 163330 }, { "epoch": 1074.6052631578948, "grad_norm": 1.3541828393936157, "learning_rate": 0.0001, "loss": 0.0127, "step": 163340 }, { "epoch": 1074.671052631579, "grad_norm": 1.6812117099761963, "learning_rate": 0.0001, "loss": 0.0117, "step": 163350 }, { "epoch": 1074.7368421052631, "grad_norm": 1.8424828052520752, "learning_rate": 0.0001, "loss": 0.0092, "step": 163360 }, { "epoch": 1074.8026315789473, "grad_norm": 1.4340758323669434, "learning_rate": 0.0001, "loss": 0.0114, "step": 163370 }, { "epoch": 1074.8684210526317, "grad_norm": 1.485103964805603, "learning_rate": 0.0001, "loss": 0.0125, "step": 163380 }, { "epoch": 1074.9342105263158, "grad_norm": 1.1952072381973267, "learning_rate": 0.0001, "loss": 0.0132, "step": 163390 }, { "epoch": 1075.0, "grad_norm": 1.0952770709991455, "learning_rate": 0.0001, "loss": 0.0141, "step": 163400 }, { "epoch": 1075.0657894736842, "grad_norm": 1.346866250038147, "learning_rate": 0.0001, "loss": 0.0111, "step": 163410 }, { "epoch": 1075.1315789473683, "grad_norm": 1.2295862436294556, "learning_rate": 0.0001, "loss": 0.0136, "step": 163420 }, { "epoch": 1075.1973684210527, "grad_norm": 1.7625447511672974, "learning_rate": 0.0001, "loss": 0.0142, "step": 163430 }, { "epoch": 1075.2631578947369, "grad_norm": 1.3077967166900635, "learning_rate": 0.0001, "loss": 0.0115, "step": 163440 }, { "epoch": 1075.328947368421, "grad_norm": 1.7703052759170532, "learning_rate": 0.0001, "loss": 0.0127, "step": 163450 }, { "epoch": 1075.3947368421052, "grad_norm": 1.5097556114196777, "learning_rate": 0.0001, "loss": 0.0136, "step": 163460 }, { "epoch": 1075.4605263157894, "grad_norm": 1.2666733264923096, "learning_rate": 0.0001, "loss": 0.0126, "step": 163470 }, { "epoch": 1075.5263157894738, "grad_norm": 1.0516818761825562, "learning_rate": 0.0001, "loss": 0.0113, "step": 163480 }, { "epoch": 1075.592105263158, "grad_norm": 1.3805603981018066, "learning_rate": 0.0001, "loss": 0.0094, "step": 163490 }, { "epoch": 1075.657894736842, "grad_norm": 1.3761082887649536, "learning_rate": 0.0001, "loss": 0.009, "step": 163500 }, { "epoch": 1075.7236842105262, "grad_norm": 1.675133466720581, "learning_rate": 0.0001, "loss": 0.012, "step": 163510 }, { "epoch": 1075.7894736842106, "grad_norm": 1.2231335639953613, "learning_rate": 0.0001, "loss": 0.0112, "step": 163520 }, { "epoch": 1075.8552631578948, "grad_norm": 0.8638436794281006, "learning_rate": 0.0001, "loss": 0.0146, "step": 163530 }, { "epoch": 1075.921052631579, "grad_norm": 1.307335376739502, "learning_rate": 0.0001, "loss": 0.0113, "step": 163540 }, { "epoch": 1075.9868421052631, "grad_norm": 1.3619062900543213, "learning_rate": 0.0001, "loss": 0.0161, "step": 163550 }, { "epoch": 1076.0526315789473, "grad_norm": 0.8782891035079956, "learning_rate": 0.0001, "loss": 0.0102, "step": 163560 }, { "epoch": 1076.1184210526317, "grad_norm": 1.4723190069198608, "learning_rate": 0.0001, "loss": 0.0133, "step": 163570 }, { "epoch": 1076.1842105263158, "grad_norm": 1.4185415506362915, "learning_rate": 0.0001, "loss": 0.0138, "step": 163580 }, { "epoch": 1076.25, "grad_norm": 1.6796464920043945, "learning_rate": 0.0001, "loss": 0.0106, "step": 163590 }, { "epoch": 1076.3157894736842, "grad_norm": 1.3258624076843262, "learning_rate": 0.0001, "loss": 0.0134, "step": 163600 }, { "epoch": 1076.3815789473683, "grad_norm": 1.6085623502731323, "learning_rate": 0.0001, "loss": 0.0128, "step": 163610 }, { "epoch": 1076.4473684210527, "grad_norm": 1.3876878023147583, "learning_rate": 0.0001, "loss": 0.0105, "step": 163620 }, { "epoch": 1076.5131578947369, "grad_norm": 1.8652887344360352, "learning_rate": 0.0001, "loss": 0.0105, "step": 163630 }, { "epoch": 1076.578947368421, "grad_norm": 1.6167093515396118, "learning_rate": 0.0001, "loss": 0.0129, "step": 163640 }, { "epoch": 1076.6447368421052, "grad_norm": 1.5053414106369019, "learning_rate": 0.0001, "loss": 0.0168, "step": 163650 }, { "epoch": 1076.7105263157894, "grad_norm": 1.7849254608154297, "learning_rate": 0.0001, "loss": 0.013, "step": 163660 }, { "epoch": 1076.7763157894738, "grad_norm": 1.336386799812317, "learning_rate": 0.0001, "loss": 0.0107, "step": 163670 }, { "epoch": 1076.842105263158, "grad_norm": 1.8124892711639404, "learning_rate": 0.0001, "loss": 0.0093, "step": 163680 }, { "epoch": 1076.907894736842, "grad_norm": 1.462072491645813, "learning_rate": 0.0001, "loss": 0.0125, "step": 163690 }, { "epoch": 1076.9736842105262, "grad_norm": 1.8796708583831787, "learning_rate": 0.0001, "loss": 0.015, "step": 163700 }, { "epoch": 1077.0394736842106, "grad_norm": 1.402754783630371, "learning_rate": 0.0001, "loss": 0.0111, "step": 163710 }, { "epoch": 1077.1052631578948, "grad_norm": 1.0947943925857544, "learning_rate": 0.0001, "loss": 0.0121, "step": 163720 }, { "epoch": 1077.171052631579, "grad_norm": 1.675283432006836, "learning_rate": 0.0001, "loss": 0.0147, "step": 163730 }, { "epoch": 1077.2368421052631, "grad_norm": 1.4936466217041016, "learning_rate": 0.0001, "loss": 0.0175, "step": 163740 }, { "epoch": 1077.3026315789473, "grad_norm": 1.3408342599868774, "learning_rate": 0.0001, "loss": 0.0101, "step": 163750 }, { "epoch": 1077.3684210526317, "grad_norm": 1.3923789262771606, "learning_rate": 0.0001, "loss": 0.0107, "step": 163760 }, { "epoch": 1077.4342105263158, "grad_norm": 1.699630618095398, "learning_rate": 0.0001, "loss": 0.0165, "step": 163770 }, { "epoch": 1077.5, "grad_norm": 1.9220176935195923, "learning_rate": 0.0001, "loss": 0.0096, "step": 163780 }, { "epoch": 1077.5657894736842, "grad_norm": 1.3235267400741577, "learning_rate": 0.0001, "loss": 0.0108, "step": 163790 }, { "epoch": 1077.6315789473683, "grad_norm": 1.7419575452804565, "learning_rate": 0.0001, "loss": 0.0094, "step": 163800 }, { "epoch": 1077.6973684210527, "grad_norm": 1.5587135553359985, "learning_rate": 0.0001, "loss": 0.0145, "step": 163810 }, { "epoch": 1077.7631578947369, "grad_norm": 1.2391424179077148, "learning_rate": 0.0001, "loss": 0.012, "step": 163820 }, { "epoch": 1077.828947368421, "grad_norm": 1.4531630277633667, "learning_rate": 0.0001, "loss": 0.0104, "step": 163830 }, { "epoch": 1077.8947368421052, "grad_norm": 1.300506591796875, "learning_rate": 0.0001, "loss": 0.0104, "step": 163840 }, { "epoch": 1077.9605263157894, "grad_norm": 1.2238130569458008, "learning_rate": 0.0001, "loss": 0.0126, "step": 163850 }, { "epoch": 1078.0263157894738, "grad_norm": 1.8424835205078125, "learning_rate": 0.0001, "loss": 0.0107, "step": 163860 }, { "epoch": 1078.092105263158, "grad_norm": 1.9030340909957886, "learning_rate": 0.0001, "loss": 0.0143, "step": 163870 }, { "epoch": 1078.157894736842, "grad_norm": 1.3268013000488281, "learning_rate": 0.0001, "loss": 0.0115, "step": 163880 }, { "epoch": 1078.2236842105262, "grad_norm": 1.2713903188705444, "learning_rate": 0.0001, "loss": 0.0151, "step": 163890 }, { "epoch": 1078.2894736842106, "grad_norm": 1.2535009384155273, "learning_rate": 0.0001, "loss": 0.0104, "step": 163900 }, { "epoch": 1078.3552631578948, "grad_norm": 1.4391664266586304, "learning_rate": 0.0001, "loss": 0.0086, "step": 163910 }, { "epoch": 1078.421052631579, "grad_norm": 1.583138108253479, "learning_rate": 0.0001, "loss": 0.01, "step": 163920 }, { "epoch": 1078.4868421052631, "grad_norm": 1.2359567880630493, "learning_rate": 0.0001, "loss": 0.0134, "step": 163930 }, { "epoch": 1078.5526315789473, "grad_norm": 1.3311368227005005, "learning_rate": 0.0001, "loss": 0.012, "step": 163940 }, { "epoch": 1078.6184210526317, "grad_norm": 1.0801398754119873, "learning_rate": 0.0001, "loss": 0.0138, "step": 163950 }, { "epoch": 1078.6842105263158, "grad_norm": 1.5094949007034302, "learning_rate": 0.0001, "loss": 0.0144, "step": 163960 }, { "epoch": 1078.75, "grad_norm": 1.315773367881775, "learning_rate": 0.0001, "loss": 0.0121, "step": 163970 }, { "epoch": 1078.8157894736842, "grad_norm": 1.5636934041976929, "learning_rate": 0.0001, "loss": 0.0101, "step": 163980 }, { "epoch": 1078.8815789473683, "grad_norm": 1.1148691177368164, "learning_rate": 0.0001, "loss": 0.0168, "step": 163990 }, { "epoch": 1078.9473684210527, "grad_norm": 1.7785229682922363, "learning_rate": 0.0001, "loss": 0.0125, "step": 164000 }, { "epoch": 1079.0131578947369, "grad_norm": 1.3737281560897827, "learning_rate": 0.0001, "loss": 0.0084, "step": 164010 }, { "epoch": 1079.078947368421, "grad_norm": 1.243870735168457, "learning_rate": 0.0001, "loss": 0.0119, "step": 164020 }, { "epoch": 1079.1447368421052, "grad_norm": 1.505688190460205, "learning_rate": 0.0001, "loss": 0.0128, "step": 164030 }, { "epoch": 1079.2105263157894, "grad_norm": 1.4186818599700928, "learning_rate": 0.0001, "loss": 0.0116, "step": 164040 }, { "epoch": 1079.2763157894738, "grad_norm": 1.3556034564971924, "learning_rate": 0.0001, "loss": 0.0144, "step": 164050 }, { "epoch": 1079.342105263158, "grad_norm": 1.789128303527832, "learning_rate": 0.0001, "loss": 0.0117, "step": 164060 }, { "epoch": 1079.407894736842, "grad_norm": 1.4291408061981201, "learning_rate": 0.0001, "loss": 0.0119, "step": 164070 }, { "epoch": 1079.4736842105262, "grad_norm": 1.7344779968261719, "learning_rate": 0.0001, "loss": 0.0124, "step": 164080 }, { "epoch": 1079.5394736842106, "grad_norm": 1.2531548738479614, "learning_rate": 0.0001, "loss": 0.0126, "step": 164090 }, { "epoch": 1079.6052631578948, "grad_norm": 1.9899601936340332, "learning_rate": 0.0001, "loss": 0.0127, "step": 164100 }, { "epoch": 1079.671052631579, "grad_norm": 1.522618055343628, "learning_rate": 0.0001, "loss": 0.0104, "step": 164110 }, { "epoch": 1079.7368421052631, "grad_norm": 1.4068292379379272, "learning_rate": 0.0001, "loss": 0.0159, "step": 164120 }, { "epoch": 1079.8026315789473, "grad_norm": 1.4026659727096558, "learning_rate": 0.0001, "loss": 0.0128, "step": 164130 }, { "epoch": 1079.8684210526317, "grad_norm": 2.008085250854492, "learning_rate": 0.0001, "loss": 0.0101, "step": 164140 }, { "epoch": 1079.9342105263158, "grad_norm": 1.0722002983093262, "learning_rate": 0.0001, "loss": 0.0094, "step": 164150 }, { "epoch": 1080.0, "grad_norm": 1.6833829879760742, "learning_rate": 0.0001, "loss": 0.0131, "step": 164160 }, { "epoch": 1080.0657894736842, "grad_norm": 1.8805797100067139, "learning_rate": 0.0001, "loss": 0.011, "step": 164170 }, { "epoch": 1080.1315789473683, "grad_norm": 1.457138180732727, "learning_rate": 0.0001, "loss": 0.0133, "step": 164180 }, { "epoch": 1080.1973684210527, "grad_norm": 1.603441834449768, "learning_rate": 0.0001, "loss": 0.011, "step": 164190 }, { "epoch": 1080.2631578947369, "grad_norm": 1.8470001220703125, "learning_rate": 0.0001, "loss": 0.009, "step": 164200 }, { "epoch": 1080.328947368421, "grad_norm": 1.40768563747406, "learning_rate": 0.0001, "loss": 0.0103, "step": 164210 }, { "epoch": 1080.3947368421052, "grad_norm": 1.57893705368042, "learning_rate": 0.0001, "loss": 0.0125, "step": 164220 }, { "epoch": 1080.4605263157894, "grad_norm": 1.1438038349151611, "learning_rate": 0.0001, "loss": 0.0134, "step": 164230 }, { "epoch": 1080.5263157894738, "grad_norm": 1.5442379713058472, "learning_rate": 0.0001, "loss": 0.0124, "step": 164240 }, { "epoch": 1080.592105263158, "grad_norm": 1.4736543893814087, "learning_rate": 0.0001, "loss": 0.0124, "step": 164250 }, { "epoch": 1080.657894736842, "grad_norm": 1.5882315635681152, "learning_rate": 0.0001, "loss": 0.0097, "step": 164260 }, { "epoch": 1080.7236842105262, "grad_norm": 1.7513219118118286, "learning_rate": 0.0001, "loss": 0.0145, "step": 164270 }, { "epoch": 1080.7894736842106, "grad_norm": 1.6143101453781128, "learning_rate": 0.0001, "loss": 0.0145, "step": 164280 }, { "epoch": 1080.8552631578948, "grad_norm": 1.4994951486587524, "learning_rate": 0.0001, "loss": 0.0131, "step": 164290 }, { "epoch": 1080.921052631579, "grad_norm": 1.352810263633728, "learning_rate": 0.0001, "loss": 0.0129, "step": 164300 }, { "epoch": 1080.9868421052631, "grad_norm": 0.9762588739395142, "learning_rate": 0.0001, "loss": 0.0128, "step": 164310 }, { "epoch": 1081.0526315789473, "grad_norm": 1.5785064697265625, "learning_rate": 0.0001, "loss": 0.0091, "step": 164320 }, { "epoch": 1081.1184210526317, "grad_norm": 1.7829941511154175, "learning_rate": 0.0001, "loss": 0.0134, "step": 164330 }, { "epoch": 1081.1842105263158, "grad_norm": 1.3608695268630981, "learning_rate": 0.0001, "loss": 0.0124, "step": 164340 }, { "epoch": 1081.25, "grad_norm": 1.5107371807098389, "learning_rate": 0.0001, "loss": 0.0129, "step": 164350 }, { "epoch": 1081.3157894736842, "grad_norm": 1.4806904792785645, "learning_rate": 0.0001, "loss": 0.0174, "step": 164360 }, { "epoch": 1081.3815789473683, "grad_norm": 1.2276611328125, "learning_rate": 0.0001, "loss": 0.012, "step": 164370 }, { "epoch": 1081.4473684210527, "grad_norm": 1.265260100364685, "learning_rate": 0.0001, "loss": 0.0093, "step": 164380 }, { "epoch": 1081.5131578947369, "grad_norm": 1.3750488758087158, "learning_rate": 0.0001, "loss": 0.009, "step": 164390 }, { "epoch": 1081.578947368421, "grad_norm": 1.104292869567871, "learning_rate": 0.0001, "loss": 0.0133, "step": 164400 }, { "epoch": 1081.6447368421052, "grad_norm": 1.573429822921753, "learning_rate": 0.0001, "loss": 0.0103, "step": 164410 }, { "epoch": 1081.7105263157894, "grad_norm": 1.7831203937530518, "learning_rate": 0.0001, "loss": 0.013, "step": 164420 }, { "epoch": 1081.7763157894738, "grad_norm": 1.265946626663208, "learning_rate": 0.0001, "loss": 0.0126, "step": 164430 }, { "epoch": 1081.842105263158, "grad_norm": 1.021244764328003, "learning_rate": 0.0001, "loss": 0.0142, "step": 164440 }, { "epoch": 1081.907894736842, "grad_norm": 1.3533190488815308, "learning_rate": 0.0001, "loss": 0.0125, "step": 164450 }, { "epoch": 1081.9736842105262, "grad_norm": 1.6037650108337402, "learning_rate": 0.0001, "loss": 0.0141, "step": 164460 }, { "epoch": 1082.0394736842106, "grad_norm": 2.156939744949341, "learning_rate": 0.0001, "loss": 0.0118, "step": 164470 }, { "epoch": 1082.1052631578948, "grad_norm": 1.3939895629882812, "learning_rate": 0.0001, "loss": 0.0093, "step": 164480 }, { "epoch": 1082.171052631579, "grad_norm": 1.2106181383132935, "learning_rate": 0.0001, "loss": 0.0114, "step": 164490 }, { "epoch": 1082.2368421052631, "grad_norm": 1.6697113513946533, "learning_rate": 0.0001, "loss": 0.0088, "step": 164500 }, { "epoch": 1082.3026315789473, "grad_norm": 1.4099509716033936, "learning_rate": 0.0001, "loss": 0.0138, "step": 164510 }, { "epoch": 1082.3684210526317, "grad_norm": 2.0556154251098633, "learning_rate": 0.0001, "loss": 0.0135, "step": 164520 }, { "epoch": 1082.4342105263158, "grad_norm": 1.5012454986572266, "learning_rate": 0.0001, "loss": 0.0147, "step": 164530 }, { "epoch": 1082.5, "grad_norm": 1.2190136909484863, "learning_rate": 0.0001, "loss": 0.0146, "step": 164540 }, { "epoch": 1082.5657894736842, "grad_norm": 1.5415446758270264, "learning_rate": 0.0001, "loss": 0.0109, "step": 164550 }, { "epoch": 1082.6315789473683, "grad_norm": 1.6513171195983887, "learning_rate": 0.0001, "loss": 0.0122, "step": 164560 }, { "epoch": 1082.6973684210527, "grad_norm": 1.6129233837127686, "learning_rate": 0.0001, "loss": 0.0135, "step": 164570 }, { "epoch": 1082.7631578947369, "grad_norm": 1.3817050457000732, "learning_rate": 0.0001, "loss": 0.0139, "step": 164580 }, { "epoch": 1082.828947368421, "grad_norm": 1.0258394479751587, "learning_rate": 0.0001, "loss": 0.0119, "step": 164590 }, { "epoch": 1082.8947368421052, "grad_norm": 1.331117868423462, "learning_rate": 0.0001, "loss": 0.0114, "step": 164600 }, { "epoch": 1082.9605263157894, "grad_norm": 1.3673145771026611, "learning_rate": 0.0001, "loss": 0.0103, "step": 164610 }, { "epoch": 1083.0263157894738, "grad_norm": 1.4203897714614868, "learning_rate": 0.0001, "loss": 0.0118, "step": 164620 }, { "epoch": 1083.092105263158, "grad_norm": 1.3213309049606323, "learning_rate": 0.0001, "loss": 0.0114, "step": 164630 }, { "epoch": 1083.157894736842, "grad_norm": 1.4811047315597534, "learning_rate": 0.0001, "loss": 0.0108, "step": 164640 }, { "epoch": 1083.2236842105262, "grad_norm": 1.2464594841003418, "learning_rate": 0.0001, "loss": 0.0125, "step": 164650 }, { "epoch": 1083.2894736842106, "grad_norm": 1.5731980800628662, "learning_rate": 0.0001, "loss": 0.0117, "step": 164660 }, { "epoch": 1083.3552631578948, "grad_norm": 1.2429035902023315, "learning_rate": 0.0001, "loss": 0.0139, "step": 164670 }, { "epoch": 1083.421052631579, "grad_norm": 1.3985689878463745, "learning_rate": 0.0001, "loss": 0.0114, "step": 164680 }, { "epoch": 1083.4868421052631, "grad_norm": 1.2472527027130127, "learning_rate": 0.0001, "loss": 0.0113, "step": 164690 }, { "epoch": 1083.5526315789473, "grad_norm": 1.2804405689239502, "learning_rate": 0.0001, "loss": 0.0147, "step": 164700 }, { "epoch": 1083.6184210526317, "grad_norm": 1.2757971286773682, "learning_rate": 0.0001, "loss": 0.0147, "step": 164710 }, { "epoch": 1083.6842105263158, "grad_norm": 1.7072198390960693, "learning_rate": 0.0001, "loss": 0.0126, "step": 164720 }, { "epoch": 1083.75, "grad_norm": 1.631351113319397, "learning_rate": 0.0001, "loss": 0.0149, "step": 164730 }, { "epoch": 1083.8157894736842, "grad_norm": 1.3846536874771118, "learning_rate": 0.0001, "loss": 0.0104, "step": 164740 }, { "epoch": 1083.8815789473683, "grad_norm": 0.9723584055900574, "learning_rate": 0.0001, "loss": 0.0115, "step": 164750 }, { "epoch": 1083.9473684210527, "grad_norm": 1.0762685537338257, "learning_rate": 0.0001, "loss": 0.0114, "step": 164760 }, { "epoch": 1084.0131578947369, "grad_norm": 1.2368509769439697, "learning_rate": 0.0001, "loss": 0.0121, "step": 164770 }, { "epoch": 1084.078947368421, "grad_norm": 1.2991316318511963, "learning_rate": 0.0001, "loss": 0.0124, "step": 164780 }, { "epoch": 1084.1447368421052, "grad_norm": 1.5925788879394531, "learning_rate": 0.0001, "loss": 0.0095, "step": 164790 }, { "epoch": 1084.2105263157894, "grad_norm": 1.0585451126098633, "learning_rate": 0.0001, "loss": 0.0109, "step": 164800 }, { "epoch": 1084.2763157894738, "grad_norm": 1.5395172834396362, "learning_rate": 0.0001, "loss": 0.014, "step": 164810 }, { "epoch": 1084.342105263158, "grad_norm": 1.554571270942688, "learning_rate": 0.0001, "loss": 0.0095, "step": 164820 }, { "epoch": 1084.407894736842, "grad_norm": 0.9215600490570068, "learning_rate": 0.0001, "loss": 0.0176, "step": 164830 }, { "epoch": 1084.4736842105262, "grad_norm": 1.540976643562317, "learning_rate": 0.0001, "loss": 0.0122, "step": 164840 }, { "epoch": 1084.5394736842106, "grad_norm": 1.377983570098877, "learning_rate": 0.0001, "loss": 0.0108, "step": 164850 }, { "epoch": 1084.6052631578948, "grad_norm": 1.561614751815796, "learning_rate": 0.0001, "loss": 0.0133, "step": 164860 }, { "epoch": 1084.671052631579, "grad_norm": 1.458068609237671, "learning_rate": 0.0001, "loss": 0.0113, "step": 164870 }, { "epoch": 1084.7368421052631, "grad_norm": 1.2859094142913818, "learning_rate": 0.0001, "loss": 0.0102, "step": 164880 }, { "epoch": 1084.8026315789473, "grad_norm": 1.7852593660354614, "learning_rate": 0.0001, "loss": 0.0145, "step": 164890 }, { "epoch": 1084.8684210526317, "grad_norm": 1.7374995946884155, "learning_rate": 0.0001, "loss": 0.0112, "step": 164900 }, { "epoch": 1084.9342105263158, "grad_norm": 1.4000270366668701, "learning_rate": 0.0001, "loss": 0.0158, "step": 164910 }, { "epoch": 1085.0, "grad_norm": 1.6536649465560913, "learning_rate": 0.0001, "loss": 0.0131, "step": 164920 }, { "epoch": 1085.0657894736842, "grad_norm": 1.3526870012283325, "learning_rate": 0.0001, "loss": 0.011, "step": 164930 }, { "epoch": 1085.1315789473683, "grad_norm": 1.4710947275161743, "learning_rate": 0.0001, "loss": 0.0103, "step": 164940 }, { "epoch": 1085.1973684210527, "grad_norm": 1.0669223070144653, "learning_rate": 0.0001, "loss": 0.0125, "step": 164950 }, { "epoch": 1085.2631578947369, "grad_norm": 1.337748646736145, "learning_rate": 0.0001, "loss": 0.0108, "step": 164960 }, { "epoch": 1085.328947368421, "grad_norm": 1.198097825050354, "learning_rate": 0.0001, "loss": 0.0109, "step": 164970 }, { "epoch": 1085.3947368421052, "grad_norm": 0.9705106019973755, "learning_rate": 0.0001, "loss": 0.0167, "step": 164980 }, { "epoch": 1085.4605263157894, "grad_norm": 1.5918744802474976, "learning_rate": 0.0001, "loss": 0.0111, "step": 164990 }, { "epoch": 1085.5263157894738, "grad_norm": 0.9629369974136353, "learning_rate": 0.0001, "loss": 0.014, "step": 165000 }, { "epoch": 1085.592105263158, "grad_norm": 1.1806031465530396, "learning_rate": 0.0001, "loss": 0.013, "step": 165010 }, { "epoch": 1085.657894736842, "grad_norm": 1.1955924034118652, "learning_rate": 0.0001, "loss": 0.0122, "step": 165020 }, { "epoch": 1085.7236842105262, "grad_norm": 1.2440309524536133, "learning_rate": 0.0001, "loss": 0.0161, "step": 165030 }, { "epoch": 1085.7894736842106, "grad_norm": 1.5001050233840942, "learning_rate": 0.0001, "loss": 0.0107, "step": 165040 }, { "epoch": 1085.8552631578948, "grad_norm": 1.4358972311019897, "learning_rate": 0.0001, "loss": 0.0114, "step": 165050 }, { "epoch": 1085.921052631579, "grad_norm": 1.4467926025390625, "learning_rate": 0.0001, "loss": 0.0128, "step": 165060 }, { "epoch": 1085.9868421052631, "grad_norm": 1.4611868858337402, "learning_rate": 0.0001, "loss": 0.0117, "step": 165070 }, { "epoch": 1086.0526315789473, "grad_norm": 1.539191484451294, "learning_rate": 0.0001, "loss": 0.0138, "step": 165080 }, { "epoch": 1086.1184210526317, "grad_norm": 1.4840162992477417, "learning_rate": 0.0001, "loss": 0.0102, "step": 165090 }, { "epoch": 1086.1842105263158, "grad_norm": 1.213793158531189, "learning_rate": 0.0001, "loss": 0.0103, "step": 165100 }, { "epoch": 1086.25, "grad_norm": 1.633858323097229, "learning_rate": 0.0001, "loss": 0.0086, "step": 165110 }, { "epoch": 1086.3157894736842, "grad_norm": 1.6051216125488281, "learning_rate": 0.0001, "loss": 0.0144, "step": 165120 }, { "epoch": 1086.3815789473683, "grad_norm": 0.998378336429596, "learning_rate": 0.0001, "loss": 0.0096, "step": 165130 }, { "epoch": 1086.4473684210527, "grad_norm": 1.10728919506073, "learning_rate": 0.0001, "loss": 0.012, "step": 165140 }, { "epoch": 1086.5131578947369, "grad_norm": 1.3455561399459839, "learning_rate": 0.0001, "loss": 0.0166, "step": 165150 }, { "epoch": 1086.578947368421, "grad_norm": 1.2437083721160889, "learning_rate": 0.0001, "loss": 0.0109, "step": 165160 }, { "epoch": 1086.6447368421052, "grad_norm": 1.4532579183578491, "learning_rate": 0.0001, "loss": 0.0161, "step": 165170 }, { "epoch": 1086.7105263157894, "grad_norm": 1.8517767190933228, "learning_rate": 0.0001, "loss": 0.0115, "step": 165180 }, { "epoch": 1086.7763157894738, "grad_norm": 1.7717700004577637, "learning_rate": 0.0001, "loss": 0.013, "step": 165190 }, { "epoch": 1086.842105263158, "grad_norm": 1.6031941175460815, "learning_rate": 0.0001, "loss": 0.0128, "step": 165200 }, { "epoch": 1086.907894736842, "grad_norm": 1.7972100973129272, "learning_rate": 0.0001, "loss": 0.0131, "step": 165210 }, { "epoch": 1086.9736842105262, "grad_norm": 1.357358694076538, "learning_rate": 0.0001, "loss": 0.015, "step": 165220 }, { "epoch": 1087.0394736842106, "grad_norm": 1.4680668115615845, "learning_rate": 0.0001, "loss": 0.0089, "step": 165230 }, { "epoch": 1087.1052631578948, "grad_norm": 1.2661741971969604, "learning_rate": 0.0001, "loss": 0.0123, "step": 165240 }, { "epoch": 1087.171052631579, "grad_norm": 1.3654754161834717, "learning_rate": 0.0001, "loss": 0.0087, "step": 165250 }, { "epoch": 1087.2368421052631, "grad_norm": 1.2223159074783325, "learning_rate": 0.0001, "loss": 0.0103, "step": 165260 }, { "epoch": 1087.3026315789473, "grad_norm": 1.2137699127197266, "learning_rate": 0.0001, "loss": 0.0156, "step": 165270 }, { "epoch": 1087.3684210526317, "grad_norm": 1.6231437921524048, "learning_rate": 0.0001, "loss": 0.0108, "step": 165280 }, { "epoch": 1087.4342105263158, "grad_norm": 1.1302458047866821, "learning_rate": 0.0001, "loss": 0.0146, "step": 165290 }, { "epoch": 1087.5, "grad_norm": 1.3748127222061157, "learning_rate": 0.0001, "loss": 0.0156, "step": 165300 }, { "epoch": 1087.5657894736842, "grad_norm": 1.3103293180465698, "learning_rate": 0.0001, "loss": 0.0117, "step": 165310 }, { "epoch": 1087.6315789473683, "grad_norm": 1.3452244997024536, "learning_rate": 0.0001, "loss": 0.0091, "step": 165320 }, { "epoch": 1087.6973684210527, "grad_norm": 1.3867508172988892, "learning_rate": 0.0001, "loss": 0.0152, "step": 165330 }, { "epoch": 1087.7631578947369, "grad_norm": 1.1633646488189697, "learning_rate": 0.0001, "loss": 0.012, "step": 165340 }, { "epoch": 1087.828947368421, "grad_norm": 1.878833293914795, "learning_rate": 0.0001, "loss": 0.0114, "step": 165350 }, { "epoch": 1087.8947368421052, "grad_norm": 1.4127379655838013, "learning_rate": 0.0001, "loss": 0.0117, "step": 165360 }, { "epoch": 1087.9605263157894, "grad_norm": 1.424811601638794, "learning_rate": 0.0001, "loss": 0.0128, "step": 165370 }, { "epoch": 1088.0263157894738, "grad_norm": 1.1730930805206299, "learning_rate": 0.0001, "loss": 0.0128, "step": 165380 }, { "epoch": 1088.092105263158, "grad_norm": 1.6059743165969849, "learning_rate": 0.0001, "loss": 0.009, "step": 165390 }, { "epoch": 1088.157894736842, "grad_norm": 1.1737128496170044, "learning_rate": 0.0001, "loss": 0.0126, "step": 165400 }, { "epoch": 1088.2236842105262, "grad_norm": 1.4487857818603516, "learning_rate": 0.0001, "loss": 0.0089, "step": 165410 }, { "epoch": 1088.2894736842106, "grad_norm": 1.886069655418396, "learning_rate": 0.0001, "loss": 0.0101, "step": 165420 }, { "epoch": 1088.3552631578948, "grad_norm": 1.8147330284118652, "learning_rate": 0.0001, "loss": 0.0173, "step": 165430 }, { "epoch": 1088.421052631579, "grad_norm": 1.5590232610702515, "learning_rate": 0.0001, "loss": 0.0091, "step": 165440 }, { "epoch": 1088.4868421052631, "grad_norm": 1.5535558462142944, "learning_rate": 0.0001, "loss": 0.0174, "step": 165450 }, { "epoch": 1088.5526315789473, "grad_norm": 1.556895136833191, "learning_rate": 0.0001, "loss": 0.0131, "step": 165460 }, { "epoch": 1088.6184210526317, "grad_norm": 1.508806824684143, "learning_rate": 0.0001, "loss": 0.0159, "step": 165470 }, { "epoch": 1088.6842105263158, "grad_norm": 1.462488055229187, "learning_rate": 0.0001, "loss": 0.0123, "step": 165480 }, { "epoch": 1088.75, "grad_norm": 1.7212415933609009, "learning_rate": 0.0001, "loss": 0.0122, "step": 165490 }, { "epoch": 1088.8157894736842, "grad_norm": 1.5248275995254517, "learning_rate": 0.0001, "loss": 0.0125, "step": 165500 }, { "epoch": 1088.8815789473683, "grad_norm": 1.2914873361587524, "learning_rate": 0.0001, "loss": 0.0091, "step": 165510 }, { "epoch": 1088.9473684210527, "grad_norm": 1.0807191133499146, "learning_rate": 0.0001, "loss": 0.0115, "step": 165520 }, { "epoch": 1089.0131578947369, "grad_norm": 1.766732096672058, "learning_rate": 0.0001, "loss": 0.0144, "step": 165530 }, { "epoch": 1089.078947368421, "grad_norm": 0.976284921169281, "learning_rate": 0.0001, "loss": 0.0106, "step": 165540 }, { "epoch": 1089.1447368421052, "grad_norm": 1.4836204051971436, "learning_rate": 0.0001, "loss": 0.0129, "step": 165550 }, { "epoch": 1089.2105263157894, "grad_norm": 1.4264962673187256, "learning_rate": 0.0001, "loss": 0.0129, "step": 165560 }, { "epoch": 1089.2763157894738, "grad_norm": 1.3446599245071411, "learning_rate": 0.0001, "loss": 0.0119, "step": 165570 }, { "epoch": 1089.342105263158, "grad_norm": 1.4021323919296265, "learning_rate": 0.0001, "loss": 0.0129, "step": 165580 }, { "epoch": 1089.407894736842, "grad_norm": 1.6331367492675781, "learning_rate": 0.0001, "loss": 0.0136, "step": 165590 }, { "epoch": 1089.4736842105262, "grad_norm": 1.265962839126587, "learning_rate": 0.0001, "loss": 0.0126, "step": 165600 }, { "epoch": 1089.5394736842106, "grad_norm": 1.5624558925628662, "learning_rate": 0.0001, "loss": 0.014, "step": 165610 }, { "epoch": 1089.6052631578948, "grad_norm": 1.1024720668792725, "learning_rate": 0.0001, "loss": 0.01, "step": 165620 }, { "epoch": 1089.671052631579, "grad_norm": 1.1519724130630493, "learning_rate": 0.0001, "loss": 0.014, "step": 165630 }, { "epoch": 1089.7368421052631, "grad_norm": 1.4128562211990356, "learning_rate": 0.0001, "loss": 0.0126, "step": 165640 }, { "epoch": 1089.8026315789473, "grad_norm": 1.4924006462097168, "learning_rate": 0.0001, "loss": 0.0117, "step": 165650 }, { "epoch": 1089.8684210526317, "grad_norm": 1.5597420930862427, "learning_rate": 0.0001, "loss": 0.0122, "step": 165660 }, { "epoch": 1089.9342105263158, "grad_norm": 1.639220118522644, "learning_rate": 0.0001, "loss": 0.0114, "step": 165670 }, { "epoch": 1090.0, "grad_norm": 1.5140665769577026, "learning_rate": 0.0001, "loss": 0.0108, "step": 165680 }, { "epoch": 1090.0657894736842, "grad_norm": 1.270851969718933, "learning_rate": 0.0001, "loss": 0.01, "step": 165690 }, { "epoch": 1090.1315789473683, "grad_norm": 1.7233413457870483, "learning_rate": 0.0001, "loss": 0.0122, "step": 165700 }, { "epoch": 1090.1973684210527, "grad_norm": 1.3921279907226562, "learning_rate": 0.0001, "loss": 0.0141, "step": 165710 }, { "epoch": 1090.2631578947369, "grad_norm": 1.3358805179595947, "learning_rate": 0.0001, "loss": 0.0098, "step": 165720 }, { "epoch": 1090.328947368421, "grad_norm": 1.4811556339263916, "learning_rate": 0.0001, "loss": 0.0118, "step": 165730 }, { "epoch": 1090.3947368421052, "grad_norm": 1.0746419429779053, "learning_rate": 0.0001, "loss": 0.0109, "step": 165740 }, { "epoch": 1090.4605263157894, "grad_norm": 1.8669034242630005, "learning_rate": 0.0001, "loss": 0.0095, "step": 165750 }, { "epoch": 1090.5263157894738, "grad_norm": 1.446641206741333, "learning_rate": 0.0001, "loss": 0.013, "step": 165760 }, { "epoch": 1090.592105263158, "grad_norm": 1.7217438220977783, "learning_rate": 0.0001, "loss": 0.0105, "step": 165770 }, { "epoch": 1090.657894736842, "grad_norm": 1.4421684741973877, "learning_rate": 0.0001, "loss": 0.0158, "step": 165780 }, { "epoch": 1090.7236842105262, "grad_norm": 1.138798475265503, "learning_rate": 0.0001, "loss": 0.0114, "step": 165790 }, { "epoch": 1090.7894736842106, "grad_norm": 1.8441143035888672, "learning_rate": 0.0001, "loss": 0.0085, "step": 165800 }, { "epoch": 1090.8552631578948, "grad_norm": 1.5726478099822998, "learning_rate": 0.0001, "loss": 0.0141, "step": 165810 }, { "epoch": 1090.921052631579, "grad_norm": 1.2904640436172485, "learning_rate": 0.0001, "loss": 0.0154, "step": 165820 }, { "epoch": 1090.9868421052631, "grad_norm": 1.2396938800811768, "learning_rate": 0.0001, "loss": 0.0158, "step": 165830 }, { "epoch": 1091.0526315789473, "grad_norm": 1.4778826236724854, "learning_rate": 0.0001, "loss": 0.01, "step": 165840 }, { "epoch": 1091.1184210526317, "grad_norm": 1.664660096168518, "learning_rate": 0.0001, "loss": 0.0122, "step": 165850 }, { "epoch": 1091.1842105263158, "grad_norm": 1.5865709781646729, "learning_rate": 0.0001, "loss": 0.0111, "step": 165860 }, { "epoch": 1091.25, "grad_norm": 1.0427453517913818, "learning_rate": 0.0001, "loss": 0.0148, "step": 165870 }, { "epoch": 1091.3157894736842, "grad_norm": 1.2357078790664673, "learning_rate": 0.0001, "loss": 0.0119, "step": 165880 }, { "epoch": 1091.3815789473683, "grad_norm": 1.4096928834915161, "learning_rate": 0.0001, "loss": 0.0123, "step": 165890 }, { "epoch": 1091.4473684210527, "grad_norm": 1.4465137720108032, "learning_rate": 0.0001, "loss": 0.0136, "step": 165900 }, { "epoch": 1091.5131578947369, "grad_norm": 1.7366447448730469, "learning_rate": 0.0001, "loss": 0.0103, "step": 165910 }, { "epoch": 1091.578947368421, "grad_norm": 1.529957890510559, "learning_rate": 0.0001, "loss": 0.0095, "step": 165920 }, { "epoch": 1091.6447368421052, "grad_norm": 1.3462722301483154, "learning_rate": 0.0001, "loss": 0.0098, "step": 165930 }, { "epoch": 1091.7105263157894, "grad_norm": 1.6319557428359985, "learning_rate": 0.0001, "loss": 0.0136, "step": 165940 }, { "epoch": 1091.7763157894738, "grad_norm": 1.6598601341247559, "learning_rate": 0.0001, "loss": 0.0118, "step": 165950 }, { "epoch": 1091.842105263158, "grad_norm": 1.6428472995758057, "learning_rate": 0.0001, "loss": 0.012, "step": 165960 }, { "epoch": 1091.907894736842, "grad_norm": 1.102553129196167, "learning_rate": 0.0001, "loss": 0.014, "step": 165970 }, { "epoch": 1091.9736842105262, "grad_norm": 1.2240105867385864, "learning_rate": 0.0001, "loss": 0.0146, "step": 165980 }, { "epoch": 1092.0394736842106, "grad_norm": 1.3639631271362305, "learning_rate": 0.0001, "loss": 0.0092, "step": 165990 }, { "epoch": 1092.1052631578948, "grad_norm": 0.9769591093063354, "learning_rate": 0.0001, "loss": 0.0163, "step": 166000 }, { "epoch": 1092.171052631579, "grad_norm": 1.5706126689910889, "learning_rate": 0.0001, "loss": 0.0152, "step": 166010 }, { "epoch": 1092.2368421052631, "grad_norm": 1.837856411933899, "learning_rate": 0.0001, "loss": 0.0134, "step": 166020 }, { "epoch": 1092.3026315789473, "grad_norm": 1.3645119667053223, "learning_rate": 0.0001, "loss": 0.0105, "step": 166030 }, { "epoch": 1092.3684210526317, "grad_norm": 1.7149280309677124, "learning_rate": 0.0001, "loss": 0.0123, "step": 166040 }, { "epoch": 1092.4342105263158, "grad_norm": 1.7272965908050537, "learning_rate": 0.0001, "loss": 0.012, "step": 166050 }, { "epoch": 1092.5, "grad_norm": 1.2746608257293701, "learning_rate": 0.0001, "loss": 0.0105, "step": 166060 }, { "epoch": 1092.5657894736842, "grad_norm": 1.4876036643981934, "learning_rate": 0.0001, "loss": 0.0088, "step": 166070 }, { "epoch": 1092.6315789473683, "grad_norm": 1.7753520011901855, "learning_rate": 0.0001, "loss": 0.0089, "step": 166080 }, { "epoch": 1092.6973684210527, "grad_norm": 1.604466438293457, "learning_rate": 0.0001, "loss": 0.0126, "step": 166090 }, { "epoch": 1092.7631578947369, "grad_norm": 1.610136866569519, "learning_rate": 0.0001, "loss": 0.0108, "step": 166100 }, { "epoch": 1092.828947368421, "grad_norm": 1.5873267650604248, "learning_rate": 0.0001, "loss": 0.0183, "step": 166110 }, { "epoch": 1092.8947368421052, "grad_norm": 1.7402626276016235, "learning_rate": 0.0001, "loss": 0.0101, "step": 166120 }, { "epoch": 1092.9605263157894, "grad_norm": 2.147258758544922, "learning_rate": 0.0001, "loss": 0.0115, "step": 166130 }, { "epoch": 1093.0263157894738, "grad_norm": 1.6034692525863647, "learning_rate": 0.0001, "loss": 0.0173, "step": 166140 }, { "epoch": 1093.092105263158, "grad_norm": 1.3929206132888794, "learning_rate": 0.0001, "loss": 0.0124, "step": 166150 }, { "epoch": 1093.157894736842, "grad_norm": 1.3902682065963745, "learning_rate": 0.0001, "loss": 0.0104, "step": 166160 }, { "epoch": 1093.2236842105262, "grad_norm": 1.3697487115859985, "learning_rate": 0.0001, "loss": 0.0091, "step": 166170 }, { "epoch": 1093.2894736842106, "grad_norm": 1.4502626657485962, "learning_rate": 0.0001, "loss": 0.0111, "step": 166180 }, { "epoch": 1093.3552631578948, "grad_norm": 1.6344611644744873, "learning_rate": 0.0001, "loss": 0.0151, "step": 166190 }, { "epoch": 1093.421052631579, "grad_norm": 1.5592288970947266, "learning_rate": 0.0001, "loss": 0.0126, "step": 166200 }, { "epoch": 1093.4868421052631, "grad_norm": 1.1533082723617554, "learning_rate": 0.0001, "loss": 0.012, "step": 166210 }, { "epoch": 1093.5526315789473, "grad_norm": 1.4330893754959106, "learning_rate": 0.0001, "loss": 0.0126, "step": 166220 }, { "epoch": 1093.6184210526317, "grad_norm": 1.4221845865249634, "learning_rate": 0.0001, "loss": 0.0127, "step": 166230 }, { "epoch": 1093.6842105263158, "grad_norm": 1.6580349206924438, "learning_rate": 0.0001, "loss": 0.0095, "step": 166240 }, { "epoch": 1093.75, "grad_norm": 1.2322660684585571, "learning_rate": 0.0001, "loss": 0.0134, "step": 166250 }, { "epoch": 1093.8157894736842, "grad_norm": 1.6158740520477295, "learning_rate": 0.0001, "loss": 0.0157, "step": 166260 }, { "epoch": 1093.8815789473683, "grad_norm": 1.1862428188323975, "learning_rate": 0.0001, "loss": 0.0125, "step": 166270 }, { "epoch": 1093.9473684210527, "grad_norm": 1.2747864723205566, "learning_rate": 0.0001, "loss": 0.0115, "step": 166280 }, { "epoch": 1094.0131578947369, "grad_norm": 1.7071504592895508, "learning_rate": 0.0001, "loss": 0.0136, "step": 166290 }, { "epoch": 1094.078947368421, "grad_norm": 1.0806270837783813, "learning_rate": 0.0001, "loss": 0.0126, "step": 166300 }, { "epoch": 1094.1447368421052, "grad_norm": 1.2753689289093018, "learning_rate": 0.0001, "loss": 0.0122, "step": 166310 }, { "epoch": 1094.2105263157894, "grad_norm": 1.2107656002044678, "learning_rate": 0.0001, "loss": 0.0088, "step": 166320 }, { "epoch": 1094.2763157894738, "grad_norm": 1.50320565700531, "learning_rate": 0.0001, "loss": 0.0123, "step": 166330 }, { "epoch": 1094.342105263158, "grad_norm": 1.0307321548461914, "learning_rate": 0.0001, "loss": 0.0134, "step": 166340 }, { "epoch": 1094.407894736842, "grad_norm": 1.1050355434417725, "learning_rate": 0.0001, "loss": 0.0098, "step": 166350 }, { "epoch": 1094.4736842105262, "grad_norm": 1.4789271354675293, "learning_rate": 0.0001, "loss": 0.0112, "step": 166360 }, { "epoch": 1094.5394736842106, "grad_norm": 1.291195034980774, "learning_rate": 0.0001, "loss": 0.0138, "step": 166370 }, { "epoch": 1094.6052631578948, "grad_norm": 1.6110239028930664, "learning_rate": 0.0001, "loss": 0.0101, "step": 166380 }, { "epoch": 1094.671052631579, "grad_norm": 0.9803904294967651, "learning_rate": 0.0001, "loss": 0.0103, "step": 166390 }, { "epoch": 1094.7368421052631, "grad_norm": 1.4679360389709473, "learning_rate": 0.0001, "loss": 0.0195, "step": 166400 }, { "epoch": 1094.8026315789473, "grad_norm": 0.9569622278213501, "learning_rate": 0.0001, "loss": 0.0125, "step": 166410 }, { "epoch": 1094.8684210526317, "grad_norm": 1.0452888011932373, "learning_rate": 0.0001, "loss": 0.0134, "step": 166420 }, { "epoch": 1094.9342105263158, "grad_norm": 1.6585036516189575, "learning_rate": 0.0001, "loss": 0.0122, "step": 166430 }, { "epoch": 1095.0, "grad_norm": 1.0534617900848389, "learning_rate": 0.0001, "loss": 0.0113, "step": 166440 }, { "epoch": 1095.0657894736842, "grad_norm": 1.54884672164917, "learning_rate": 0.0001, "loss": 0.0153, "step": 166450 }, { "epoch": 1095.1315789473683, "grad_norm": 1.1612799167633057, "learning_rate": 0.0001, "loss": 0.0125, "step": 166460 }, { "epoch": 1095.1973684210527, "grad_norm": 1.7929613590240479, "learning_rate": 0.0001, "loss": 0.0109, "step": 166470 }, { "epoch": 1095.2631578947369, "grad_norm": 1.6924402713775635, "learning_rate": 0.0001, "loss": 0.0087, "step": 166480 }, { "epoch": 1095.328947368421, "grad_norm": 1.479387640953064, "learning_rate": 0.0001, "loss": 0.0108, "step": 166490 }, { "epoch": 1095.3947368421052, "grad_norm": 1.67505943775177, "learning_rate": 0.0001, "loss": 0.0148, "step": 166500 }, { "epoch": 1095.4605263157894, "grad_norm": 1.776941180229187, "learning_rate": 0.0001, "loss": 0.0124, "step": 166510 }, { "epoch": 1095.5263157894738, "grad_norm": 1.4454549551010132, "learning_rate": 0.0001, "loss": 0.0112, "step": 166520 }, { "epoch": 1095.592105263158, "grad_norm": 1.6048623323440552, "learning_rate": 0.0001, "loss": 0.0136, "step": 166530 }, { "epoch": 1095.657894736842, "grad_norm": 1.6015207767486572, "learning_rate": 0.0001, "loss": 0.0149, "step": 166540 }, { "epoch": 1095.7236842105262, "grad_norm": 0.9944424033164978, "learning_rate": 0.0001, "loss": 0.0106, "step": 166550 }, { "epoch": 1095.7894736842106, "grad_norm": 1.6775282621383667, "learning_rate": 0.0001, "loss": 0.0143, "step": 166560 }, { "epoch": 1095.8552631578948, "grad_norm": 1.3585195541381836, "learning_rate": 0.0001, "loss": 0.0143, "step": 166570 }, { "epoch": 1095.921052631579, "grad_norm": 1.4623297452926636, "learning_rate": 0.0001, "loss": 0.0111, "step": 166580 }, { "epoch": 1095.9868421052631, "grad_norm": 1.045769453048706, "learning_rate": 0.0001, "loss": 0.0102, "step": 166590 }, { "epoch": 1096.0526315789473, "grad_norm": 1.5254689455032349, "learning_rate": 0.0001, "loss": 0.0105, "step": 166600 }, { "epoch": 1096.1184210526317, "grad_norm": 1.62606942653656, "learning_rate": 0.0001, "loss": 0.0118, "step": 166610 }, { "epoch": 1096.1842105263158, "grad_norm": 1.5092500448226929, "learning_rate": 0.0001, "loss": 0.0129, "step": 166620 }, { "epoch": 1096.25, "grad_norm": 1.1631393432617188, "learning_rate": 0.0001, "loss": 0.0137, "step": 166630 }, { "epoch": 1096.3157894736842, "grad_norm": 0.794577956199646, "learning_rate": 0.0001, "loss": 0.0089, "step": 166640 }, { "epoch": 1096.3815789473683, "grad_norm": 1.2738101482391357, "learning_rate": 0.0001, "loss": 0.0132, "step": 166650 }, { "epoch": 1096.4473684210527, "grad_norm": 1.3755043745040894, "learning_rate": 0.0001, "loss": 0.0107, "step": 166660 }, { "epoch": 1096.5131578947369, "grad_norm": 1.3972396850585938, "learning_rate": 0.0001, "loss": 0.0112, "step": 166670 }, { "epoch": 1096.578947368421, "grad_norm": 1.1577624082565308, "learning_rate": 0.0001, "loss": 0.0119, "step": 166680 }, { "epoch": 1096.6447368421052, "grad_norm": 1.0222452878952026, "learning_rate": 0.0001, "loss": 0.0087, "step": 166690 }, { "epoch": 1096.7105263157894, "grad_norm": 1.46125066280365, "learning_rate": 0.0001, "loss": 0.0123, "step": 166700 }, { "epoch": 1096.7763157894738, "grad_norm": 1.2499809265136719, "learning_rate": 0.0001, "loss": 0.0137, "step": 166710 }, { "epoch": 1096.842105263158, "grad_norm": 1.304382562637329, "learning_rate": 0.0001, "loss": 0.0103, "step": 166720 }, { "epoch": 1096.907894736842, "grad_norm": 1.5604616403579712, "learning_rate": 0.0001, "loss": 0.0183, "step": 166730 }, { "epoch": 1096.9736842105262, "grad_norm": 1.4762097597122192, "learning_rate": 0.0001, "loss": 0.0141, "step": 166740 }, { "epoch": 1097.0394736842106, "grad_norm": 1.3802794218063354, "learning_rate": 0.0001, "loss": 0.0141, "step": 166750 }, { "epoch": 1097.1052631578948, "grad_norm": 1.2838270664215088, "learning_rate": 0.0001, "loss": 0.0102, "step": 166760 }, { "epoch": 1097.171052631579, "grad_norm": 1.5859814882278442, "learning_rate": 0.0001, "loss": 0.009, "step": 166770 }, { "epoch": 1097.2368421052631, "grad_norm": 1.4090368747711182, "learning_rate": 0.0001, "loss": 0.0152, "step": 166780 }, { "epoch": 1097.3026315789473, "grad_norm": 1.103900671005249, "learning_rate": 0.0001, "loss": 0.016, "step": 166790 }, { "epoch": 1097.3684210526317, "grad_norm": 1.5841877460479736, "learning_rate": 0.0001, "loss": 0.0121, "step": 166800 }, { "epoch": 1097.4342105263158, "grad_norm": 1.5840206146240234, "learning_rate": 0.0001, "loss": 0.0146, "step": 166810 }, { "epoch": 1097.5, "grad_norm": 1.6696346998214722, "learning_rate": 0.0001, "loss": 0.014, "step": 166820 }, { "epoch": 1097.5657894736842, "grad_norm": 1.504384994506836, "learning_rate": 0.0001, "loss": 0.0106, "step": 166830 }, { "epoch": 1097.6315789473683, "grad_norm": 1.310981273651123, "learning_rate": 0.0001, "loss": 0.0106, "step": 166840 }, { "epoch": 1097.6973684210527, "grad_norm": 1.3006662130355835, "learning_rate": 0.0001, "loss": 0.0109, "step": 166850 }, { "epoch": 1097.7631578947369, "grad_norm": 1.267500638961792, "learning_rate": 0.0001, "loss": 0.014, "step": 166860 }, { "epoch": 1097.828947368421, "grad_norm": 1.4883512258529663, "learning_rate": 0.0001, "loss": 0.0111, "step": 166870 }, { "epoch": 1097.8947368421052, "grad_norm": 1.8088767528533936, "learning_rate": 0.0001, "loss": 0.0132, "step": 166880 }, { "epoch": 1097.9605263157894, "grad_norm": 1.3055027723312378, "learning_rate": 0.0001, "loss": 0.0107, "step": 166890 }, { "epoch": 1098.0263157894738, "grad_norm": 1.5253461599349976, "learning_rate": 0.0001, "loss": 0.0107, "step": 166900 }, { "epoch": 1098.092105263158, "grad_norm": 1.4432029724121094, "learning_rate": 0.0001, "loss": 0.0149, "step": 166910 }, { "epoch": 1098.157894736842, "grad_norm": 1.1757055521011353, "learning_rate": 0.0001, "loss": 0.0129, "step": 166920 }, { "epoch": 1098.2236842105262, "grad_norm": 1.6685264110565186, "learning_rate": 0.0001, "loss": 0.0128, "step": 166930 }, { "epoch": 1098.2894736842106, "grad_norm": 0.9126343727111816, "learning_rate": 0.0001, "loss": 0.0172, "step": 166940 }, { "epoch": 1098.3552631578948, "grad_norm": 1.405931830406189, "learning_rate": 0.0001, "loss": 0.0106, "step": 166950 }, { "epoch": 1098.421052631579, "grad_norm": 1.3227843046188354, "learning_rate": 0.0001, "loss": 0.0106, "step": 166960 }, { "epoch": 1098.4868421052631, "grad_norm": 1.7417831420898438, "learning_rate": 0.0001, "loss": 0.0135, "step": 166970 }, { "epoch": 1098.5526315789473, "grad_norm": 1.347971796989441, "learning_rate": 0.0001, "loss": 0.0112, "step": 166980 }, { "epoch": 1098.6184210526317, "grad_norm": 1.2755424976348877, "learning_rate": 0.0001, "loss": 0.0133, "step": 166990 }, { "epoch": 1098.6842105263158, "grad_norm": 1.8860573768615723, "learning_rate": 0.0001, "loss": 0.0149, "step": 167000 }, { "epoch": 1098.75, "grad_norm": 1.5681626796722412, "learning_rate": 0.0001, "loss": 0.0127, "step": 167010 }, { "epoch": 1098.8157894736842, "grad_norm": 1.4094926118850708, "learning_rate": 0.0001, "loss": 0.0107, "step": 167020 }, { "epoch": 1098.8815789473683, "grad_norm": 1.704079270362854, "learning_rate": 0.0001, "loss": 0.0122, "step": 167030 }, { "epoch": 1098.9473684210527, "grad_norm": 1.405632734298706, "learning_rate": 0.0001, "loss": 0.0096, "step": 167040 }, { "epoch": 1099.0131578947369, "grad_norm": 1.5796996355056763, "learning_rate": 0.0001, "loss": 0.0127, "step": 167050 }, { "epoch": 1099.078947368421, "grad_norm": 1.617271065711975, "learning_rate": 0.0001, "loss": 0.0142, "step": 167060 }, { "epoch": 1099.1447368421052, "grad_norm": 1.1125123500823975, "learning_rate": 0.0001, "loss": 0.0119, "step": 167070 }, { "epoch": 1099.2105263157894, "grad_norm": 1.4883276224136353, "learning_rate": 0.0001, "loss": 0.0112, "step": 167080 }, { "epoch": 1099.2763157894738, "grad_norm": 1.39487886428833, "learning_rate": 0.0001, "loss": 0.0091, "step": 167090 }, { "epoch": 1099.342105263158, "grad_norm": 1.1458114385604858, "learning_rate": 0.0001, "loss": 0.0101, "step": 167100 }, { "epoch": 1099.407894736842, "grad_norm": 1.32555091381073, "learning_rate": 0.0001, "loss": 0.0134, "step": 167110 }, { "epoch": 1099.4736842105262, "grad_norm": 1.3902103900909424, "learning_rate": 0.0001, "loss": 0.0134, "step": 167120 }, { "epoch": 1099.5394736842106, "grad_norm": 1.8346928358078003, "learning_rate": 0.0001, "loss": 0.0153, "step": 167130 }, { "epoch": 1099.6052631578948, "grad_norm": 1.4643206596374512, "learning_rate": 0.0001, "loss": 0.0085, "step": 167140 }, { "epoch": 1099.671052631579, "grad_norm": 1.406584620475769, "learning_rate": 0.0001, "loss": 0.0155, "step": 167150 }, { "epoch": 1099.7368421052631, "grad_norm": 1.197719693183899, "learning_rate": 0.0001, "loss": 0.0113, "step": 167160 }, { "epoch": 1099.8026315789473, "grad_norm": 1.8573246002197266, "learning_rate": 0.0001, "loss": 0.0161, "step": 167170 }, { "epoch": 1099.8684210526317, "grad_norm": 2.2929847240448, "learning_rate": 0.0001, "loss": 0.0143, "step": 167180 }, { "epoch": 1099.9342105263158, "grad_norm": 1.58701753616333, "learning_rate": 0.0001, "loss": 0.0108, "step": 167190 }, { "epoch": 1100.0, "grad_norm": 1.4329122304916382, "learning_rate": 0.0001, "loss": 0.0096, "step": 167200 }, { "epoch": 1100.0657894736842, "grad_norm": 1.351431965827942, "learning_rate": 0.0001, "loss": 0.0133, "step": 167210 }, { "epoch": 1100.1315789473683, "grad_norm": 1.8028192520141602, "learning_rate": 0.0001, "loss": 0.0131, "step": 167220 }, { "epoch": 1100.1973684210527, "grad_norm": 1.447674036026001, "learning_rate": 0.0001, "loss": 0.0113, "step": 167230 }, { "epoch": 1100.2631578947369, "grad_norm": 1.4083786010742188, "learning_rate": 0.0001, "loss": 0.0142, "step": 167240 }, { "epoch": 1100.328947368421, "grad_norm": 1.7963893413543701, "learning_rate": 0.0001, "loss": 0.0092, "step": 167250 }, { "epoch": 1100.3947368421052, "grad_norm": 1.5986237525939941, "learning_rate": 0.0001, "loss": 0.0091, "step": 167260 }, { "epoch": 1100.4605263157894, "grad_norm": 2.023581027984619, "learning_rate": 0.0001, "loss": 0.0107, "step": 167270 }, { "epoch": 1100.5263157894738, "grad_norm": 1.3811672925949097, "learning_rate": 0.0001, "loss": 0.0091, "step": 167280 }, { "epoch": 1100.592105263158, "grad_norm": 1.8793818950653076, "learning_rate": 0.0001, "loss": 0.0131, "step": 167290 }, { "epoch": 1100.657894736842, "grad_norm": 1.522210717201233, "learning_rate": 0.0001, "loss": 0.0155, "step": 167300 }, { "epoch": 1100.7236842105262, "grad_norm": 1.3759573698043823, "learning_rate": 0.0001, "loss": 0.0133, "step": 167310 }, { "epoch": 1100.7894736842106, "grad_norm": 1.1284765005111694, "learning_rate": 0.0001, "loss": 0.013, "step": 167320 }, { "epoch": 1100.8552631578948, "grad_norm": 1.527465581893921, "learning_rate": 0.0001, "loss": 0.0115, "step": 167330 }, { "epoch": 1100.921052631579, "grad_norm": 1.7264997959136963, "learning_rate": 0.0001, "loss": 0.0093, "step": 167340 }, { "epoch": 1100.9868421052631, "grad_norm": 1.7134140729904175, "learning_rate": 0.0001, "loss": 0.0144, "step": 167350 }, { "epoch": 1101.0526315789473, "grad_norm": 2.675178050994873, "learning_rate": 0.0001, "loss": 0.009, "step": 167360 }, { "epoch": 1101.1184210526317, "grad_norm": 1.7636257410049438, "learning_rate": 0.0001, "loss": 0.0154, "step": 167370 }, { "epoch": 1101.1842105263158, "grad_norm": 1.4904543161392212, "learning_rate": 0.0001, "loss": 0.0142, "step": 167380 }, { "epoch": 1101.25, "grad_norm": 1.7888332605361938, "learning_rate": 0.0001, "loss": 0.0139, "step": 167390 }, { "epoch": 1101.3157894736842, "grad_norm": 1.8926657438278198, "learning_rate": 0.0001, "loss": 0.0112, "step": 167400 }, { "epoch": 1101.3815789473683, "grad_norm": 1.425889253616333, "learning_rate": 0.0001, "loss": 0.0102, "step": 167410 }, { "epoch": 1101.4473684210527, "grad_norm": 1.3788384199142456, "learning_rate": 0.0001, "loss": 0.0089, "step": 167420 }, { "epoch": 1101.5131578947369, "grad_norm": 1.2699275016784668, "learning_rate": 0.0001, "loss": 0.0135, "step": 167430 }, { "epoch": 1101.578947368421, "grad_norm": 1.5366098880767822, "learning_rate": 0.0001, "loss": 0.0118, "step": 167440 }, { "epoch": 1101.6447368421052, "grad_norm": 1.3372799158096313, "learning_rate": 0.0001, "loss": 0.0124, "step": 167450 }, { "epoch": 1101.7105263157894, "grad_norm": 0.842868983745575, "learning_rate": 0.0001, "loss": 0.0104, "step": 167460 }, { "epoch": 1101.7763157894738, "grad_norm": 1.5648956298828125, "learning_rate": 0.0001, "loss": 0.0162, "step": 167470 }, { "epoch": 1101.842105263158, "grad_norm": 1.5513055324554443, "learning_rate": 0.0001, "loss": 0.0086, "step": 167480 }, { "epoch": 1101.907894736842, "grad_norm": 1.3863922357559204, "learning_rate": 0.0001, "loss": 0.013, "step": 167490 }, { "epoch": 1101.9736842105262, "grad_norm": 1.3045580387115479, "learning_rate": 0.0001, "loss": 0.0119, "step": 167500 }, { "epoch": 1102.0394736842106, "grad_norm": 1.3648046255111694, "learning_rate": 0.0001, "loss": 0.0138, "step": 167510 }, { "epoch": 1102.1052631578948, "grad_norm": 1.3023098707199097, "learning_rate": 0.0001, "loss": 0.0105, "step": 167520 }, { "epoch": 1102.171052631579, "grad_norm": 1.0242477655410767, "learning_rate": 0.0001, "loss": 0.0119, "step": 167530 }, { "epoch": 1102.2368421052631, "grad_norm": 1.1322613954544067, "learning_rate": 0.0001, "loss": 0.0124, "step": 167540 }, { "epoch": 1102.3026315789473, "grad_norm": 1.3990461826324463, "learning_rate": 0.0001, "loss": 0.0099, "step": 167550 }, { "epoch": 1102.3684210526317, "grad_norm": 1.6040375232696533, "learning_rate": 0.0001, "loss": 0.0129, "step": 167560 }, { "epoch": 1102.4342105263158, "grad_norm": 1.6251918077468872, "learning_rate": 0.0001, "loss": 0.0143, "step": 167570 }, { "epoch": 1102.5, "grad_norm": 1.0371012687683105, "learning_rate": 0.0001, "loss": 0.0116, "step": 167580 }, { "epoch": 1102.5657894736842, "grad_norm": 1.248453974723816, "learning_rate": 0.0001, "loss": 0.0088, "step": 167590 }, { "epoch": 1102.6315789473683, "grad_norm": 1.5092899799346924, "learning_rate": 0.0001, "loss": 0.0154, "step": 167600 }, { "epoch": 1102.6973684210527, "grad_norm": 1.2704168558120728, "learning_rate": 0.0001, "loss": 0.0132, "step": 167610 }, { "epoch": 1102.7631578947369, "grad_norm": 1.3765087127685547, "learning_rate": 0.0001, "loss": 0.0111, "step": 167620 }, { "epoch": 1102.828947368421, "grad_norm": 1.2688064575195312, "learning_rate": 0.0001, "loss": 0.0102, "step": 167630 }, { "epoch": 1102.8947368421052, "grad_norm": 1.456045150756836, "learning_rate": 0.0001, "loss": 0.0169, "step": 167640 }, { "epoch": 1102.9605263157894, "grad_norm": 1.5235086679458618, "learning_rate": 0.0001, "loss": 0.0135, "step": 167650 }, { "epoch": 1103.0263157894738, "grad_norm": 1.4980957508087158, "learning_rate": 0.0001, "loss": 0.01, "step": 167660 }, { "epoch": 1103.092105263158, "grad_norm": 1.195415735244751, "learning_rate": 0.0001, "loss": 0.0149, "step": 167670 }, { "epoch": 1103.157894736842, "grad_norm": 1.489119052886963, "learning_rate": 0.0001, "loss": 0.0129, "step": 167680 }, { "epoch": 1103.2236842105262, "grad_norm": 1.424919843673706, "learning_rate": 0.0001, "loss": 0.0131, "step": 167690 }, { "epoch": 1103.2894736842106, "grad_norm": 1.656851053237915, "learning_rate": 0.0001, "loss": 0.01, "step": 167700 }, { "epoch": 1103.3552631578948, "grad_norm": 1.6487185955047607, "learning_rate": 0.0001, "loss": 0.0135, "step": 167710 }, { "epoch": 1103.421052631579, "grad_norm": 1.50303053855896, "learning_rate": 0.0001, "loss": 0.0121, "step": 167720 }, { "epoch": 1103.4868421052631, "grad_norm": 1.3340214490890503, "learning_rate": 0.0001, "loss": 0.0122, "step": 167730 }, { "epoch": 1103.5526315789473, "grad_norm": 1.3123503923416138, "learning_rate": 0.0001, "loss": 0.0161, "step": 167740 }, { "epoch": 1103.6184210526317, "grad_norm": 1.2396618127822876, "learning_rate": 0.0001, "loss": 0.0159, "step": 167750 }, { "epoch": 1103.6842105263158, "grad_norm": 1.2005534172058105, "learning_rate": 0.0001, "loss": 0.0089, "step": 167760 }, { "epoch": 1103.75, "grad_norm": 1.631316065788269, "learning_rate": 0.0001, "loss": 0.0095, "step": 167770 }, { "epoch": 1103.8157894736842, "grad_norm": 1.4022537469863892, "learning_rate": 0.0001, "loss": 0.0099, "step": 167780 }, { "epoch": 1103.8815789473683, "grad_norm": 1.6048204898834229, "learning_rate": 0.0001, "loss": 0.0091, "step": 167790 }, { "epoch": 1103.9473684210527, "grad_norm": 1.4558807611465454, "learning_rate": 0.0001, "loss": 0.013, "step": 167800 }, { "epoch": 1104.0131578947369, "grad_norm": 1.1043295860290527, "learning_rate": 0.0001, "loss": 0.0139, "step": 167810 }, { "epoch": 1104.078947368421, "grad_norm": 1.207062840461731, "learning_rate": 0.0001, "loss": 0.0118, "step": 167820 }, { "epoch": 1104.1447368421052, "grad_norm": 1.382075309753418, "learning_rate": 0.0001, "loss": 0.0116, "step": 167830 }, { "epoch": 1104.2105263157894, "grad_norm": 1.6232759952545166, "learning_rate": 0.0001, "loss": 0.0129, "step": 167840 }, { "epoch": 1104.2763157894738, "grad_norm": 1.2651135921478271, "learning_rate": 0.0001, "loss": 0.0109, "step": 167850 }, { "epoch": 1104.342105263158, "grad_norm": 1.2123544216156006, "learning_rate": 0.0001, "loss": 0.0118, "step": 167860 }, { "epoch": 1104.407894736842, "grad_norm": 1.5511642694473267, "learning_rate": 0.0001, "loss": 0.0121, "step": 167870 }, { "epoch": 1104.4736842105262, "grad_norm": 1.456842303276062, "learning_rate": 0.0001, "loss": 0.0145, "step": 167880 }, { "epoch": 1104.5394736842106, "grad_norm": 1.5504719018936157, "learning_rate": 0.0001, "loss": 0.0097, "step": 167890 }, { "epoch": 1104.6052631578948, "grad_norm": 1.3860597610473633, "learning_rate": 0.0001, "loss": 0.0097, "step": 167900 }, { "epoch": 1104.671052631579, "grad_norm": 1.4825711250305176, "learning_rate": 0.0001, "loss": 0.0137, "step": 167910 }, { "epoch": 1104.7368421052631, "grad_norm": 1.7693836688995361, "learning_rate": 0.0001, "loss": 0.0125, "step": 167920 }, { "epoch": 1104.8026315789473, "grad_norm": 1.0815422534942627, "learning_rate": 0.0001, "loss": 0.0102, "step": 167930 }, { "epoch": 1104.8684210526317, "grad_norm": 1.4852182865142822, "learning_rate": 0.0001, "loss": 0.0122, "step": 167940 }, { "epoch": 1104.9342105263158, "grad_norm": 1.2653374671936035, "learning_rate": 0.0001, "loss": 0.0158, "step": 167950 }, { "epoch": 1105.0, "grad_norm": 1.4999711513519287, "learning_rate": 0.0001, "loss": 0.0116, "step": 167960 }, { "epoch": 1105.0657894736842, "grad_norm": 1.164743423461914, "learning_rate": 0.0001, "loss": 0.0133, "step": 167970 }, { "epoch": 1105.1315789473683, "grad_norm": 1.2985079288482666, "learning_rate": 0.0001, "loss": 0.0132, "step": 167980 }, { "epoch": 1105.1973684210527, "grad_norm": 1.6571602821350098, "learning_rate": 0.0001, "loss": 0.0123, "step": 167990 }, { "epoch": 1105.2631578947369, "grad_norm": 1.5083800554275513, "learning_rate": 0.0001, "loss": 0.015, "step": 168000 }, { "epoch": 1105.328947368421, "grad_norm": 1.5938159227371216, "learning_rate": 0.0001, "loss": 0.01, "step": 168010 }, { "epoch": 1105.3947368421052, "grad_norm": 1.4237415790557861, "learning_rate": 0.0001, "loss": 0.0119, "step": 168020 }, { "epoch": 1105.4605263157894, "grad_norm": 1.5032387971878052, "learning_rate": 0.0001, "loss": 0.0159, "step": 168030 }, { "epoch": 1105.5263157894738, "grad_norm": 1.7382616996765137, "learning_rate": 0.0001, "loss": 0.012, "step": 168040 }, { "epoch": 1105.592105263158, "grad_norm": 1.63155996799469, "learning_rate": 0.0001, "loss": 0.0114, "step": 168050 }, { "epoch": 1105.657894736842, "grad_norm": 1.9969507455825806, "learning_rate": 0.0001, "loss": 0.0101, "step": 168060 }, { "epoch": 1105.7236842105262, "grad_norm": 1.3090893030166626, "learning_rate": 0.0001, "loss": 0.0108, "step": 168070 }, { "epoch": 1105.7894736842106, "grad_norm": 1.6002167463302612, "learning_rate": 0.0001, "loss": 0.0134, "step": 168080 }, { "epoch": 1105.8552631578948, "grad_norm": 1.6301344633102417, "learning_rate": 0.0001, "loss": 0.0092, "step": 168090 }, { "epoch": 1105.921052631579, "grad_norm": 1.3788474798202515, "learning_rate": 0.0001, "loss": 0.0114, "step": 168100 }, { "epoch": 1105.9868421052631, "grad_norm": 1.316789150238037, "learning_rate": 0.0001, "loss": 0.0098, "step": 168110 }, { "epoch": 1106.0526315789473, "grad_norm": 1.096588373184204, "learning_rate": 0.0001, "loss": 0.011, "step": 168120 }, { "epoch": 1106.1184210526317, "grad_norm": 1.42216157913208, "learning_rate": 0.0001, "loss": 0.011, "step": 168130 }, { "epoch": 1106.1842105263158, "grad_norm": 1.1248395442962646, "learning_rate": 0.0001, "loss": 0.0121, "step": 168140 }, { "epoch": 1106.25, "grad_norm": 1.3110040426254272, "learning_rate": 0.0001, "loss": 0.0129, "step": 168150 }, { "epoch": 1106.3157894736842, "grad_norm": 1.4693145751953125, "learning_rate": 0.0001, "loss": 0.015, "step": 168160 }, { "epoch": 1106.3815789473683, "grad_norm": 1.7376418113708496, "learning_rate": 0.0001, "loss": 0.0117, "step": 168170 }, { "epoch": 1106.4473684210527, "grad_norm": 1.3882269859313965, "learning_rate": 0.0001, "loss": 0.013, "step": 168180 }, { "epoch": 1106.5131578947369, "grad_norm": 1.1639032363891602, "learning_rate": 0.0001, "loss": 0.0098, "step": 168190 }, { "epoch": 1106.578947368421, "grad_norm": 1.456305980682373, "learning_rate": 0.0001, "loss": 0.0088, "step": 168200 }, { "epoch": 1106.6447368421052, "grad_norm": 1.477148175239563, "learning_rate": 0.0001, "loss": 0.0151, "step": 168210 }, { "epoch": 1106.7105263157894, "grad_norm": 1.1775243282318115, "learning_rate": 0.0001, "loss": 0.016, "step": 168220 }, { "epoch": 1106.7763157894738, "grad_norm": 1.0600346326828003, "learning_rate": 0.0001, "loss": 0.014, "step": 168230 }, { "epoch": 1106.842105263158, "grad_norm": 1.0137680768966675, "learning_rate": 0.0001, "loss": 0.0124, "step": 168240 }, { "epoch": 1106.907894736842, "grad_norm": 1.7013343572616577, "learning_rate": 0.0001, "loss": 0.011, "step": 168250 }, { "epoch": 1106.9736842105262, "grad_norm": 1.247897744178772, "learning_rate": 0.0001, "loss": 0.0098, "step": 168260 }, { "epoch": 1107.0394736842106, "grad_norm": 1.2925105094909668, "learning_rate": 0.0001, "loss": 0.0097, "step": 168270 }, { "epoch": 1107.1052631578948, "grad_norm": 1.776870608329773, "learning_rate": 0.0001, "loss": 0.0118, "step": 168280 }, { "epoch": 1107.171052631579, "grad_norm": 1.5858936309814453, "learning_rate": 0.0001, "loss": 0.0142, "step": 168290 }, { "epoch": 1107.2368421052631, "grad_norm": 1.770964503288269, "learning_rate": 0.0001, "loss": 0.0141, "step": 168300 }, { "epoch": 1107.3026315789473, "grad_norm": 1.3491787910461426, "learning_rate": 0.0001, "loss": 0.0113, "step": 168310 }, { "epoch": 1107.3684210526317, "grad_norm": 1.1099905967712402, "learning_rate": 0.0001, "loss": 0.0136, "step": 168320 }, { "epoch": 1107.4342105263158, "grad_norm": 1.342236876487732, "learning_rate": 0.0001, "loss": 0.0102, "step": 168330 }, { "epoch": 1107.5, "grad_norm": 1.2689276933670044, "learning_rate": 0.0001, "loss": 0.014, "step": 168340 }, { "epoch": 1107.5657894736842, "grad_norm": 1.449759602546692, "learning_rate": 0.0001, "loss": 0.0148, "step": 168350 }, { "epoch": 1107.6315789473683, "grad_norm": 1.236303687095642, "learning_rate": 0.0001, "loss": 0.0118, "step": 168360 }, { "epoch": 1107.6973684210527, "grad_norm": 1.6018871068954468, "learning_rate": 0.0001, "loss": 0.013, "step": 168370 }, { "epoch": 1107.7631578947369, "grad_norm": 1.3352609872817993, "learning_rate": 0.0001, "loss": 0.0134, "step": 168380 }, { "epoch": 1107.828947368421, "grad_norm": 1.6517969369888306, "learning_rate": 0.0001, "loss": 0.0085, "step": 168390 }, { "epoch": 1107.8947368421052, "grad_norm": 1.3381794691085815, "learning_rate": 0.0001, "loss": 0.0126, "step": 168400 }, { "epoch": 1107.9605263157894, "grad_norm": 1.2769230604171753, "learning_rate": 0.0001, "loss": 0.0109, "step": 168410 }, { "epoch": 1108.0263157894738, "grad_norm": 1.1368772983551025, "learning_rate": 0.0001, "loss": 0.0153, "step": 168420 }, { "epoch": 1108.092105263158, "grad_norm": 1.3501834869384766, "learning_rate": 0.0001, "loss": 0.0121, "step": 168430 }, { "epoch": 1108.157894736842, "grad_norm": 1.000313401222229, "learning_rate": 0.0001, "loss": 0.0122, "step": 168440 }, { "epoch": 1108.2236842105262, "grad_norm": 1.3006250858306885, "learning_rate": 0.0001, "loss": 0.017, "step": 168450 }, { "epoch": 1108.2894736842106, "grad_norm": 1.14395272731781, "learning_rate": 0.0001, "loss": 0.011, "step": 168460 }, { "epoch": 1108.3552631578948, "grad_norm": 1.5796458721160889, "learning_rate": 0.0001, "loss": 0.0135, "step": 168470 }, { "epoch": 1108.421052631579, "grad_norm": 1.693658709526062, "learning_rate": 0.0001, "loss": 0.0117, "step": 168480 }, { "epoch": 1108.4868421052631, "grad_norm": 1.0315207242965698, "learning_rate": 0.0001, "loss": 0.0116, "step": 168490 }, { "epoch": 1108.5526315789473, "grad_norm": 1.4093637466430664, "learning_rate": 0.0001, "loss": 0.0117, "step": 168500 }, { "epoch": 1108.6184210526317, "grad_norm": 1.4869264364242554, "learning_rate": 0.0001, "loss": 0.0116, "step": 168510 }, { "epoch": 1108.6842105263158, "grad_norm": 1.0517921447753906, "learning_rate": 0.0001, "loss": 0.0098, "step": 168520 }, { "epoch": 1108.75, "grad_norm": 1.4689826965332031, "learning_rate": 0.0001, "loss": 0.0119, "step": 168530 }, { "epoch": 1108.8157894736842, "grad_norm": 1.4757477045059204, "learning_rate": 0.0001, "loss": 0.0099, "step": 168540 }, { "epoch": 1108.8815789473683, "grad_norm": 1.4261175394058228, "learning_rate": 0.0001, "loss": 0.0104, "step": 168550 }, { "epoch": 1108.9473684210527, "grad_norm": 1.473541021347046, "learning_rate": 0.0001, "loss": 0.0118, "step": 168560 }, { "epoch": 1109.0131578947369, "grad_norm": 1.4317662715911865, "learning_rate": 0.0001, "loss": 0.0135, "step": 168570 }, { "epoch": 1109.078947368421, "grad_norm": 1.1377863883972168, "learning_rate": 0.0001, "loss": 0.01, "step": 168580 }, { "epoch": 1109.1447368421052, "grad_norm": 1.52302086353302, "learning_rate": 0.0001, "loss": 0.0102, "step": 168590 }, { "epoch": 1109.2105263157894, "grad_norm": 1.1749576330184937, "learning_rate": 0.0001, "loss": 0.014, "step": 168600 }, { "epoch": 1109.2763157894738, "grad_norm": 1.4506345987319946, "learning_rate": 0.0001, "loss": 0.0129, "step": 168610 }, { "epoch": 1109.342105263158, "grad_norm": 1.0959988832473755, "learning_rate": 0.0001, "loss": 0.0119, "step": 168620 }, { "epoch": 1109.407894736842, "grad_norm": 1.532886028289795, "learning_rate": 0.0001, "loss": 0.0109, "step": 168630 }, { "epoch": 1109.4736842105262, "grad_norm": 1.4911798238754272, "learning_rate": 0.0001, "loss": 0.0162, "step": 168640 }, { "epoch": 1109.5394736842106, "grad_norm": 1.4498531818389893, "learning_rate": 0.0001, "loss": 0.0103, "step": 168650 }, { "epoch": 1109.6052631578948, "grad_norm": 1.50972580909729, "learning_rate": 0.0001, "loss": 0.0142, "step": 168660 }, { "epoch": 1109.671052631579, "grad_norm": 1.4765651226043701, "learning_rate": 0.0001, "loss": 0.01, "step": 168670 }, { "epoch": 1109.7368421052631, "grad_norm": 1.5896750688552856, "learning_rate": 0.0001, "loss": 0.0183, "step": 168680 }, { "epoch": 1109.8026315789473, "grad_norm": 1.2282251119613647, "learning_rate": 0.0001, "loss": 0.0105, "step": 168690 }, { "epoch": 1109.8684210526317, "grad_norm": 1.4490240812301636, "learning_rate": 0.0001, "loss": 0.0098, "step": 168700 }, { "epoch": 1109.9342105263158, "grad_norm": 1.697117567062378, "learning_rate": 0.0001, "loss": 0.0117, "step": 168710 }, { "epoch": 1110.0, "grad_norm": 1.850149154663086, "learning_rate": 0.0001, "loss": 0.0148, "step": 168720 }, { "epoch": 1110.0657894736842, "grad_norm": 1.5853415727615356, "learning_rate": 0.0001, "loss": 0.0106, "step": 168730 }, { "epoch": 1110.1315789473683, "grad_norm": 1.5656813383102417, "learning_rate": 0.0001, "loss": 0.0126, "step": 168740 }, { "epoch": 1110.1973684210527, "grad_norm": 1.2929410934448242, "learning_rate": 0.0001, "loss": 0.0116, "step": 168750 }, { "epoch": 1110.2631578947369, "grad_norm": 1.832512617111206, "learning_rate": 0.0001, "loss": 0.0125, "step": 168760 }, { "epoch": 1110.328947368421, "grad_norm": 1.8971842527389526, "learning_rate": 0.0001, "loss": 0.0121, "step": 168770 }, { "epoch": 1110.3947368421052, "grad_norm": 1.8517152070999146, "learning_rate": 0.0001, "loss": 0.0101, "step": 168780 }, { "epoch": 1110.4605263157894, "grad_norm": 1.5486677885055542, "learning_rate": 0.0001, "loss": 0.0114, "step": 168790 }, { "epoch": 1110.5263157894738, "grad_norm": 1.8011716604232788, "learning_rate": 0.0001, "loss": 0.0102, "step": 168800 }, { "epoch": 1110.592105263158, "grad_norm": 1.639919638633728, "learning_rate": 0.0001, "loss": 0.0154, "step": 168810 }, { "epoch": 1110.657894736842, "grad_norm": 1.4280189275741577, "learning_rate": 0.0001, "loss": 0.0104, "step": 168820 }, { "epoch": 1110.7236842105262, "grad_norm": 1.495295763015747, "learning_rate": 0.0001, "loss": 0.0139, "step": 168830 }, { "epoch": 1110.7894736842106, "grad_norm": 1.6399013996124268, "learning_rate": 0.0001, "loss": 0.0116, "step": 168840 }, { "epoch": 1110.8552631578948, "grad_norm": 1.6096668243408203, "learning_rate": 0.0001, "loss": 0.0143, "step": 168850 }, { "epoch": 1110.921052631579, "grad_norm": 1.6937774419784546, "learning_rate": 0.0001, "loss": 0.0127, "step": 168860 }, { "epoch": 1110.9868421052631, "grad_norm": 1.1561319828033447, "learning_rate": 0.0001, "loss": 0.0123, "step": 168870 }, { "epoch": 1111.0526315789473, "grad_norm": 1.2814611196517944, "learning_rate": 0.0001, "loss": 0.0114, "step": 168880 }, { "epoch": 1111.1184210526317, "grad_norm": 1.6794798374176025, "learning_rate": 0.0001, "loss": 0.0156, "step": 168890 }, { "epoch": 1111.1842105263158, "grad_norm": 1.3954558372497559, "learning_rate": 0.0001, "loss": 0.0095, "step": 168900 }, { "epoch": 1111.25, "grad_norm": 1.4197022914886475, "learning_rate": 0.0001, "loss": 0.0134, "step": 168910 }, { "epoch": 1111.3157894736842, "grad_norm": 1.6257072687149048, "learning_rate": 0.0001, "loss": 0.0117, "step": 168920 }, { "epoch": 1111.3815789473683, "grad_norm": 1.7730110883712769, "learning_rate": 0.0001, "loss": 0.0094, "step": 168930 }, { "epoch": 1111.4473684210527, "grad_norm": 1.336553692817688, "learning_rate": 0.0001, "loss": 0.0093, "step": 168940 }, { "epoch": 1111.5131578947369, "grad_norm": 1.4853148460388184, "learning_rate": 0.0001, "loss": 0.0095, "step": 168950 }, { "epoch": 1111.578947368421, "grad_norm": 1.2767854928970337, "learning_rate": 0.0001, "loss": 0.0116, "step": 168960 }, { "epoch": 1111.6447368421052, "grad_norm": 1.675093173980713, "learning_rate": 0.0001, "loss": 0.0102, "step": 168970 }, { "epoch": 1111.7105263157894, "grad_norm": 1.4276494979858398, "learning_rate": 0.0001, "loss": 0.0134, "step": 168980 }, { "epoch": 1111.7763157894738, "grad_norm": 1.5569959878921509, "learning_rate": 0.0001, "loss": 0.0122, "step": 168990 }, { "epoch": 1111.842105263158, "grad_norm": 1.133559226989746, "learning_rate": 0.0001, "loss": 0.011, "step": 169000 }, { "epoch": 1111.907894736842, "grad_norm": 1.6298394203186035, "learning_rate": 0.0001, "loss": 0.0121, "step": 169010 }, { "epoch": 1111.9736842105262, "grad_norm": 1.751237392425537, "learning_rate": 0.0001, "loss": 0.0164, "step": 169020 }, { "epoch": 1112.0394736842106, "grad_norm": 1.234555959701538, "learning_rate": 0.0001, "loss": 0.0133, "step": 169030 }, { "epoch": 1112.1052631578948, "grad_norm": 1.4506733417510986, "learning_rate": 0.0001, "loss": 0.0144, "step": 169040 }, { "epoch": 1112.171052631579, "grad_norm": 1.477787971496582, "learning_rate": 0.0001, "loss": 0.0102, "step": 169050 }, { "epoch": 1112.2368421052631, "grad_norm": 1.748376727104187, "learning_rate": 0.0001, "loss": 0.0092, "step": 169060 }, { "epoch": 1112.3026315789473, "grad_norm": 1.4568819999694824, "learning_rate": 0.0001, "loss": 0.0137, "step": 169070 }, { "epoch": 1112.3684210526317, "grad_norm": 1.3692138195037842, "learning_rate": 0.0001, "loss": 0.0116, "step": 169080 }, { "epoch": 1112.4342105263158, "grad_norm": 1.7720632553100586, "learning_rate": 0.0001, "loss": 0.0129, "step": 169090 }, { "epoch": 1112.5, "grad_norm": 1.2173435688018799, "learning_rate": 0.0001, "loss": 0.0118, "step": 169100 }, { "epoch": 1112.5657894736842, "grad_norm": 1.5653233528137207, "learning_rate": 0.0001, "loss": 0.0115, "step": 169110 }, { "epoch": 1112.6315789473683, "grad_norm": 1.8369771242141724, "learning_rate": 0.0001, "loss": 0.0121, "step": 169120 }, { "epoch": 1112.6973684210527, "grad_norm": 1.5784415006637573, "learning_rate": 0.0001, "loss": 0.0139, "step": 169130 }, { "epoch": 1112.7631578947369, "grad_norm": 1.540791630744934, "learning_rate": 0.0001, "loss": 0.0138, "step": 169140 }, { "epoch": 1112.828947368421, "grad_norm": 1.4910887479782104, "learning_rate": 0.0001, "loss": 0.014, "step": 169150 }, { "epoch": 1112.8947368421052, "grad_norm": 1.9962286949157715, "learning_rate": 0.0001, "loss": 0.0092, "step": 169160 }, { "epoch": 1112.9605263157894, "grad_norm": 1.6608365774154663, "learning_rate": 0.0001, "loss": 0.0092, "step": 169170 }, { "epoch": 1113.0263157894738, "grad_norm": 1.3056550025939941, "learning_rate": 0.0001, "loss": 0.0142, "step": 169180 }, { "epoch": 1113.092105263158, "grad_norm": 1.6061893701553345, "learning_rate": 0.0001, "loss": 0.0095, "step": 169190 }, { "epoch": 1113.157894736842, "grad_norm": 1.50796377658844, "learning_rate": 0.0001, "loss": 0.0133, "step": 169200 }, { "epoch": 1113.2236842105262, "grad_norm": 1.3886024951934814, "learning_rate": 0.0001, "loss": 0.0122, "step": 169210 }, { "epoch": 1113.2894736842106, "grad_norm": 1.2486958503723145, "learning_rate": 0.0001, "loss": 0.0105, "step": 169220 }, { "epoch": 1113.3552631578948, "grad_norm": 1.3067364692687988, "learning_rate": 0.0001, "loss": 0.0134, "step": 169230 }, { "epoch": 1113.421052631579, "grad_norm": 1.4713319540023804, "learning_rate": 0.0001, "loss": 0.0091, "step": 169240 }, { "epoch": 1113.4868421052631, "grad_norm": 1.4945214986801147, "learning_rate": 0.0001, "loss": 0.0151, "step": 169250 }, { "epoch": 1113.5526315789473, "grad_norm": 1.2026489973068237, "learning_rate": 0.0001, "loss": 0.0132, "step": 169260 }, { "epoch": 1113.6184210526317, "grad_norm": 0.9973546266555786, "learning_rate": 0.0001, "loss": 0.0121, "step": 169270 }, { "epoch": 1113.6842105263158, "grad_norm": 1.1964064836502075, "learning_rate": 0.0001, "loss": 0.011, "step": 169280 }, { "epoch": 1113.75, "grad_norm": 1.0441210269927979, "learning_rate": 0.0001, "loss": 0.012, "step": 169290 }, { "epoch": 1113.8157894736842, "grad_norm": 1.4554322957992554, "learning_rate": 0.0001, "loss": 0.0105, "step": 169300 }, { "epoch": 1113.8815789473683, "grad_norm": 1.180950403213501, "learning_rate": 0.0001, "loss": 0.0129, "step": 169310 }, { "epoch": 1113.9473684210527, "grad_norm": 1.3222458362579346, "learning_rate": 0.0001, "loss": 0.014, "step": 169320 }, { "epoch": 1114.0131578947369, "grad_norm": 1.5997955799102783, "learning_rate": 0.0001, "loss": 0.0111, "step": 169330 }, { "epoch": 1114.078947368421, "grad_norm": 1.402403712272644, "learning_rate": 0.0001, "loss": 0.0112, "step": 169340 }, { "epoch": 1114.1447368421052, "grad_norm": 1.3807166814804077, "learning_rate": 0.0001, "loss": 0.0146, "step": 169350 }, { "epoch": 1114.2105263157894, "grad_norm": 1.292227029800415, "learning_rate": 0.0001, "loss": 0.0128, "step": 169360 }, { "epoch": 1114.2763157894738, "grad_norm": 1.3389638662338257, "learning_rate": 0.0001, "loss": 0.0132, "step": 169370 }, { "epoch": 1114.342105263158, "grad_norm": 1.2354730367660522, "learning_rate": 0.0001, "loss": 0.0116, "step": 169380 }, { "epoch": 1114.407894736842, "grad_norm": 1.4566526412963867, "learning_rate": 0.0001, "loss": 0.0116, "step": 169390 }, { "epoch": 1114.4736842105262, "grad_norm": 1.581222414970398, "learning_rate": 0.0001, "loss": 0.0109, "step": 169400 }, { "epoch": 1114.5394736842106, "grad_norm": 1.48920476436615, "learning_rate": 0.0001, "loss": 0.0123, "step": 169410 }, { "epoch": 1114.6052631578948, "grad_norm": 1.2189992666244507, "learning_rate": 0.0001, "loss": 0.0106, "step": 169420 }, { "epoch": 1114.671052631579, "grad_norm": 1.2250370979309082, "learning_rate": 0.0001, "loss": 0.0136, "step": 169430 }, { "epoch": 1114.7368421052631, "grad_norm": 1.3349400758743286, "learning_rate": 0.0001, "loss": 0.0133, "step": 169440 }, { "epoch": 1114.8026315789473, "grad_norm": 1.3280614614486694, "learning_rate": 0.0001, "loss": 0.0147, "step": 169450 }, { "epoch": 1114.8684210526317, "grad_norm": 1.4008328914642334, "learning_rate": 0.0001, "loss": 0.0107, "step": 169460 }, { "epoch": 1114.9342105263158, "grad_norm": 1.325437307357788, "learning_rate": 0.0001, "loss": 0.0104, "step": 169470 }, { "epoch": 1115.0, "grad_norm": 1.802746057510376, "learning_rate": 0.0001, "loss": 0.0113, "step": 169480 }, { "epoch": 1115.0657894736842, "grad_norm": 1.4286895990371704, "learning_rate": 0.0001, "loss": 0.01, "step": 169490 }, { "epoch": 1115.1315789473683, "grad_norm": 1.6384358406066895, "learning_rate": 0.0001, "loss": 0.0174, "step": 169500 }, { "epoch": 1115.1973684210527, "grad_norm": 1.7851396799087524, "learning_rate": 0.0001, "loss": 0.0125, "step": 169510 }, { "epoch": 1115.2631578947369, "grad_norm": 1.2830597162246704, "learning_rate": 0.0001, "loss": 0.0091, "step": 169520 }, { "epoch": 1115.328947368421, "grad_norm": 1.3063321113586426, "learning_rate": 0.0001, "loss": 0.0089, "step": 169530 }, { "epoch": 1115.3947368421052, "grad_norm": 1.8285315036773682, "learning_rate": 0.0001, "loss": 0.0137, "step": 169540 }, { "epoch": 1115.4605263157894, "grad_norm": 1.4708929061889648, "learning_rate": 0.0001, "loss": 0.0153, "step": 169550 }, { "epoch": 1115.5263157894738, "grad_norm": 1.3793264627456665, "learning_rate": 0.0001, "loss": 0.0136, "step": 169560 }, { "epoch": 1115.592105263158, "grad_norm": 1.7567390203475952, "learning_rate": 0.0001, "loss": 0.0115, "step": 169570 }, { "epoch": 1115.657894736842, "grad_norm": 1.5252485275268555, "learning_rate": 0.0001, "loss": 0.0131, "step": 169580 }, { "epoch": 1115.7236842105262, "grad_norm": 1.772055983543396, "learning_rate": 0.0001, "loss": 0.0128, "step": 169590 }, { "epoch": 1115.7894736842106, "grad_norm": 1.9675220251083374, "learning_rate": 0.0001, "loss": 0.0153, "step": 169600 }, { "epoch": 1115.8552631578948, "grad_norm": 1.4860175848007202, "learning_rate": 0.0001, "loss": 0.0095, "step": 169610 }, { "epoch": 1115.921052631579, "grad_norm": 1.406925082206726, "learning_rate": 0.0001, "loss": 0.0094, "step": 169620 }, { "epoch": 1115.9868421052631, "grad_norm": 1.3292471170425415, "learning_rate": 0.0001, "loss": 0.0104, "step": 169630 }, { "epoch": 1116.0526315789473, "grad_norm": 1.3767120838165283, "learning_rate": 0.0001, "loss": 0.0153, "step": 169640 }, { "epoch": 1116.1184210526317, "grad_norm": 1.5912526845932007, "learning_rate": 0.0001, "loss": 0.0086, "step": 169650 }, { "epoch": 1116.1842105263158, "grad_norm": 1.4553953409194946, "learning_rate": 0.0001, "loss": 0.0126, "step": 169660 }, { "epoch": 1116.25, "grad_norm": 1.3948149681091309, "learning_rate": 0.0001, "loss": 0.0109, "step": 169670 }, { "epoch": 1116.3157894736842, "grad_norm": 1.9322839975357056, "learning_rate": 0.0001, "loss": 0.0129, "step": 169680 }, { "epoch": 1116.3815789473683, "grad_norm": 1.7892696857452393, "learning_rate": 0.0001, "loss": 0.0115, "step": 169690 }, { "epoch": 1116.4473684210527, "grad_norm": 1.6753932237625122, "learning_rate": 0.0001, "loss": 0.0109, "step": 169700 }, { "epoch": 1116.5131578947369, "grad_norm": 1.0534920692443848, "learning_rate": 0.0001, "loss": 0.0149, "step": 169710 }, { "epoch": 1116.578947368421, "grad_norm": 1.5431909561157227, "learning_rate": 0.0001, "loss": 0.0125, "step": 169720 }, { "epoch": 1116.6447368421052, "grad_norm": 1.5141205787658691, "learning_rate": 0.0001, "loss": 0.0101, "step": 169730 }, { "epoch": 1116.7105263157894, "grad_norm": 1.4476648569107056, "learning_rate": 0.0001, "loss": 0.0129, "step": 169740 }, { "epoch": 1116.7763157894738, "grad_norm": 1.4536819458007812, "learning_rate": 0.0001, "loss": 0.0135, "step": 169750 }, { "epoch": 1116.842105263158, "grad_norm": 1.4118138551712036, "learning_rate": 0.0001, "loss": 0.0132, "step": 169760 }, { "epoch": 1116.907894736842, "grad_norm": 1.041900634765625, "learning_rate": 0.0001, "loss": 0.0122, "step": 169770 }, { "epoch": 1116.9736842105262, "grad_norm": 1.008051872253418, "learning_rate": 0.0001, "loss": 0.0094, "step": 169780 }, { "epoch": 1117.0394736842106, "grad_norm": 1.4811947345733643, "learning_rate": 0.0001, "loss": 0.0112, "step": 169790 }, { "epoch": 1117.1052631578948, "grad_norm": 1.6438745260238647, "learning_rate": 0.0001, "loss": 0.0163, "step": 169800 }, { "epoch": 1117.171052631579, "grad_norm": 1.54276442527771, "learning_rate": 0.0001, "loss": 0.0134, "step": 169810 }, { "epoch": 1117.2368421052631, "grad_norm": 1.522742748260498, "learning_rate": 0.0001, "loss": 0.0146, "step": 169820 }, { "epoch": 1117.3026315789473, "grad_norm": 1.4891278743743896, "learning_rate": 0.0001, "loss": 0.0103, "step": 169830 }, { "epoch": 1117.3684210526317, "grad_norm": 1.1740275621414185, "learning_rate": 0.0001, "loss": 0.0157, "step": 169840 }, { "epoch": 1117.4342105263158, "grad_norm": 1.6081308126449585, "learning_rate": 0.0001, "loss": 0.0085, "step": 169850 }, { "epoch": 1117.5, "grad_norm": 2.162123680114746, "learning_rate": 0.0001, "loss": 0.0112, "step": 169860 }, { "epoch": 1117.5657894736842, "grad_norm": 1.7517104148864746, "learning_rate": 0.0001, "loss": 0.0145, "step": 169870 }, { "epoch": 1117.6315789473683, "grad_norm": 1.9084384441375732, "learning_rate": 0.0001, "loss": 0.0138, "step": 169880 }, { "epoch": 1117.6973684210527, "grad_norm": 1.5847758054733276, "learning_rate": 0.0001, "loss": 0.0086, "step": 169890 }, { "epoch": 1117.7631578947369, "grad_norm": 1.6151893138885498, "learning_rate": 0.0001, "loss": 0.0108, "step": 169900 }, { "epoch": 1117.828947368421, "grad_norm": 1.4719817638397217, "learning_rate": 0.0001, "loss": 0.0081, "step": 169910 }, { "epoch": 1117.8947368421052, "grad_norm": 1.2248108386993408, "learning_rate": 0.0001, "loss": 0.012, "step": 169920 }, { "epoch": 1117.9605263157894, "grad_norm": 1.2840923070907593, "learning_rate": 0.0001, "loss": 0.0112, "step": 169930 }, { "epoch": 1118.0263157894738, "grad_norm": 1.4715420007705688, "learning_rate": 0.0001, "loss": 0.0112, "step": 169940 }, { "epoch": 1118.092105263158, "grad_norm": 1.7400858402252197, "learning_rate": 0.0001, "loss": 0.0142, "step": 169950 }, { "epoch": 1118.157894736842, "grad_norm": 1.3266772031784058, "learning_rate": 0.0001, "loss": 0.0138, "step": 169960 }, { "epoch": 1118.2236842105262, "grad_norm": 1.2406309843063354, "learning_rate": 0.0001, "loss": 0.0117, "step": 169970 }, { "epoch": 1118.2894736842106, "grad_norm": 1.4495481252670288, "learning_rate": 0.0001, "loss": 0.0105, "step": 169980 }, { "epoch": 1118.3552631578948, "grad_norm": 1.0100713968276978, "learning_rate": 0.0001, "loss": 0.0088, "step": 169990 }, { "epoch": 1118.421052631579, "grad_norm": 1.4102951288223267, "learning_rate": 0.0001, "loss": 0.0176, "step": 170000 }, { "epoch": 1118.4868421052631, "grad_norm": 1.4634556770324707, "learning_rate": 0.0001, "loss": 0.0129, "step": 170010 }, { "epoch": 1118.5526315789473, "grad_norm": 1.2906899452209473, "learning_rate": 0.0001, "loss": 0.0129, "step": 170020 }, { "epoch": 1118.6184210526317, "grad_norm": 1.555329442024231, "learning_rate": 0.0001, "loss": 0.0112, "step": 170030 }, { "epoch": 1118.6842105263158, "grad_norm": 1.2989139556884766, "learning_rate": 0.0001, "loss": 0.0103, "step": 170040 }, { "epoch": 1118.75, "grad_norm": 1.817284107208252, "learning_rate": 0.0001, "loss": 0.0114, "step": 170050 }, { "epoch": 1118.8157894736842, "grad_norm": 1.780975580215454, "learning_rate": 0.0001, "loss": 0.0115, "step": 170060 }, { "epoch": 1118.8815789473683, "grad_norm": 1.196070909500122, "learning_rate": 0.0001, "loss": 0.0111, "step": 170070 }, { "epoch": 1118.9473684210527, "grad_norm": 1.5898948907852173, "learning_rate": 0.0001, "loss": 0.0108, "step": 170080 }, { "epoch": 1119.0131578947369, "grad_norm": 1.1025989055633545, "learning_rate": 0.0001, "loss": 0.0168, "step": 170090 }, { "epoch": 1119.078947368421, "grad_norm": 1.3173507452011108, "learning_rate": 0.0001, "loss": 0.0112, "step": 170100 }, { "epoch": 1119.1447368421052, "grad_norm": 1.4970636367797852, "learning_rate": 0.0001, "loss": 0.0126, "step": 170110 }, { "epoch": 1119.2105263157894, "grad_norm": 1.6795488595962524, "learning_rate": 0.0001, "loss": 0.0103, "step": 170120 }, { "epoch": 1119.2763157894738, "grad_norm": 1.6872907876968384, "learning_rate": 0.0001, "loss": 0.0125, "step": 170130 }, { "epoch": 1119.342105263158, "grad_norm": 1.5851812362670898, "learning_rate": 0.0001, "loss": 0.0096, "step": 170140 }, { "epoch": 1119.407894736842, "grad_norm": 1.6236610412597656, "learning_rate": 0.0001, "loss": 0.0128, "step": 170150 }, { "epoch": 1119.4736842105262, "grad_norm": 1.1425617933273315, "learning_rate": 0.0001, "loss": 0.0102, "step": 170160 }, { "epoch": 1119.5394736842106, "grad_norm": 1.5649824142456055, "learning_rate": 0.0001, "loss": 0.0126, "step": 170170 }, { "epoch": 1119.6052631578948, "grad_norm": 1.6085736751556396, "learning_rate": 0.0001, "loss": 0.0127, "step": 170180 }, { "epoch": 1119.671052631579, "grad_norm": 1.2844477891921997, "learning_rate": 0.0001, "loss": 0.01, "step": 170190 }, { "epoch": 1119.7368421052631, "grad_norm": 1.3880079984664917, "learning_rate": 0.0001, "loss": 0.0131, "step": 170200 }, { "epoch": 1119.8026315789473, "grad_norm": 1.1607855558395386, "learning_rate": 0.0001, "loss": 0.013, "step": 170210 }, { "epoch": 1119.8684210526317, "grad_norm": 1.7450129985809326, "learning_rate": 0.0001, "loss": 0.013, "step": 170220 }, { "epoch": 1119.9342105263158, "grad_norm": 1.250399112701416, "learning_rate": 0.0001, "loss": 0.0121, "step": 170230 }, { "epoch": 1120.0, "grad_norm": 1.2086886167526245, "learning_rate": 0.0001, "loss": 0.0143, "step": 170240 }, { "epoch": 1120.0657894736842, "grad_norm": 1.774343490600586, "learning_rate": 0.0001, "loss": 0.0165, "step": 170250 }, { "epoch": 1120.1315789473683, "grad_norm": 1.2730540037155151, "learning_rate": 0.0001, "loss": 0.0103, "step": 170260 }, { "epoch": 1120.1973684210527, "grad_norm": 1.3110556602478027, "learning_rate": 0.0001, "loss": 0.0152, "step": 170270 }, { "epoch": 1120.2631578947369, "grad_norm": 1.3895059823989868, "learning_rate": 0.0001, "loss": 0.0099, "step": 170280 }, { "epoch": 1120.328947368421, "grad_norm": 1.1246583461761475, "learning_rate": 0.0001, "loss": 0.0115, "step": 170290 }, { "epoch": 1120.3947368421052, "grad_norm": 1.2428512573242188, "learning_rate": 0.0001, "loss": 0.0113, "step": 170300 }, { "epoch": 1120.4605263157894, "grad_norm": 1.3374249935150146, "learning_rate": 0.0001, "loss": 0.0124, "step": 170310 }, { "epoch": 1120.5263157894738, "grad_norm": 1.0783573389053345, "learning_rate": 0.0001, "loss": 0.0133, "step": 170320 }, { "epoch": 1120.592105263158, "grad_norm": 1.7121515274047852, "learning_rate": 0.0001, "loss": 0.0127, "step": 170330 }, { "epoch": 1120.657894736842, "grad_norm": 1.5456488132476807, "learning_rate": 0.0001, "loss": 0.0112, "step": 170340 }, { "epoch": 1120.7236842105262, "grad_norm": 1.4039525985717773, "learning_rate": 0.0001, "loss": 0.01, "step": 170350 }, { "epoch": 1120.7894736842106, "grad_norm": 1.3274470567703247, "learning_rate": 0.0001, "loss": 0.0106, "step": 170360 }, { "epoch": 1120.8552631578948, "grad_norm": 1.4200854301452637, "learning_rate": 0.0001, "loss": 0.0143, "step": 170370 }, { "epoch": 1120.921052631579, "grad_norm": 1.441306233406067, "learning_rate": 0.0001, "loss": 0.0085, "step": 170380 }, { "epoch": 1120.9868421052631, "grad_norm": 1.5838850736618042, "learning_rate": 0.0001, "loss": 0.0124, "step": 170390 }, { "epoch": 1121.0526315789473, "grad_norm": 1.356797695159912, "learning_rate": 0.0001, "loss": 0.0154, "step": 170400 }, { "epoch": 1121.1184210526317, "grad_norm": 1.2258808612823486, "learning_rate": 0.0001, "loss": 0.0114, "step": 170410 }, { "epoch": 1121.1842105263158, "grad_norm": 1.4692000150680542, "learning_rate": 0.0001, "loss": 0.0095, "step": 170420 }, { "epoch": 1121.25, "grad_norm": 1.429694652557373, "learning_rate": 0.0001, "loss": 0.0129, "step": 170430 }, { "epoch": 1121.3157894736842, "grad_norm": 1.3754403591156006, "learning_rate": 0.0001, "loss": 0.0106, "step": 170440 }, { "epoch": 1121.3815789473683, "grad_norm": 1.4996263980865479, "learning_rate": 0.0001, "loss": 0.0143, "step": 170450 }, { "epoch": 1121.4473684210527, "grad_norm": 1.6897811889648438, "learning_rate": 0.0001, "loss": 0.0147, "step": 170460 }, { "epoch": 1121.5131578947369, "grad_norm": 1.4464340209960938, "learning_rate": 0.0001, "loss": 0.0101, "step": 170470 }, { "epoch": 1121.578947368421, "grad_norm": 1.3356677293777466, "learning_rate": 0.0001, "loss": 0.0154, "step": 170480 }, { "epoch": 1121.6447368421052, "grad_norm": 1.4754902124404907, "learning_rate": 0.0001, "loss": 0.0115, "step": 170490 }, { "epoch": 1121.7105263157894, "grad_norm": 1.5740251541137695, "learning_rate": 0.0001, "loss": 0.0105, "step": 170500 }, { "epoch": 1121.7763157894738, "grad_norm": 1.4689189195632935, "learning_rate": 0.0001, "loss": 0.0111, "step": 170510 }, { "epoch": 1121.842105263158, "grad_norm": 1.7968095541000366, "learning_rate": 0.0001, "loss": 0.0118, "step": 170520 }, { "epoch": 1121.907894736842, "grad_norm": 1.8498066663742065, "learning_rate": 0.0001, "loss": 0.0114, "step": 170530 }, { "epoch": 1121.9736842105262, "grad_norm": 1.0910992622375488, "learning_rate": 0.0001, "loss": 0.0161, "step": 170540 }, { "epoch": 1122.0394736842106, "grad_norm": 1.8909739255905151, "learning_rate": 0.0001, "loss": 0.0121, "step": 170550 }, { "epoch": 1122.1052631578948, "grad_norm": 1.391100287437439, "learning_rate": 0.0001, "loss": 0.0106, "step": 170560 }, { "epoch": 1122.171052631579, "grad_norm": 1.324644923210144, "learning_rate": 0.0001, "loss": 0.0147, "step": 170570 }, { "epoch": 1122.2368421052631, "grad_norm": 1.1243089437484741, "learning_rate": 0.0001, "loss": 0.0107, "step": 170580 }, { "epoch": 1122.3026315789473, "grad_norm": 1.353118896484375, "learning_rate": 0.0001, "loss": 0.0118, "step": 170590 }, { "epoch": 1122.3684210526317, "grad_norm": 1.2155033349990845, "learning_rate": 0.0001, "loss": 0.011, "step": 170600 }, { "epoch": 1122.4342105263158, "grad_norm": 1.1201235055923462, "learning_rate": 0.0001, "loss": 0.012, "step": 170610 }, { "epoch": 1122.5, "grad_norm": 1.2959197759628296, "learning_rate": 0.0001, "loss": 0.0118, "step": 170620 }, { "epoch": 1122.5657894736842, "grad_norm": 1.3009095191955566, "learning_rate": 0.0001, "loss": 0.0132, "step": 170630 }, { "epoch": 1122.6315789473683, "grad_norm": 1.3822851181030273, "learning_rate": 0.0001, "loss": 0.0105, "step": 170640 }, { "epoch": 1122.6973684210527, "grad_norm": 1.3294426202774048, "learning_rate": 0.0001, "loss": 0.0118, "step": 170650 }, { "epoch": 1122.7631578947369, "grad_norm": 1.2707031965255737, "learning_rate": 0.0001, "loss": 0.0103, "step": 170660 }, { "epoch": 1122.828947368421, "grad_norm": 1.6670013666152954, "learning_rate": 0.0001, "loss": 0.012, "step": 170670 }, { "epoch": 1122.8947368421052, "grad_norm": 1.6879326105117798, "learning_rate": 0.0001, "loss": 0.016, "step": 170680 }, { "epoch": 1122.9605263157894, "grad_norm": 1.3115787506103516, "learning_rate": 0.0001, "loss": 0.014, "step": 170690 }, { "epoch": 1123.0263157894738, "grad_norm": 1.5094155073165894, "learning_rate": 0.0001, "loss": 0.0134, "step": 170700 }, { "epoch": 1123.092105263158, "grad_norm": 1.4270988702774048, "learning_rate": 0.0001, "loss": 0.0129, "step": 170710 }, { "epoch": 1123.157894736842, "grad_norm": 1.5450108051300049, "learning_rate": 0.0001, "loss": 0.0132, "step": 170720 }, { "epoch": 1123.2236842105262, "grad_norm": 1.3741587400436401, "learning_rate": 0.0001, "loss": 0.0124, "step": 170730 }, { "epoch": 1123.2894736842106, "grad_norm": 1.7292277812957764, "learning_rate": 0.0001, "loss": 0.0097, "step": 170740 }, { "epoch": 1123.3552631578948, "grad_norm": 1.463628888130188, "learning_rate": 0.0001, "loss": 0.0162, "step": 170750 }, { "epoch": 1123.421052631579, "grad_norm": 1.498267412185669, "learning_rate": 0.0001, "loss": 0.0119, "step": 170760 }, { "epoch": 1123.4868421052631, "grad_norm": 1.6789813041687012, "learning_rate": 0.0001, "loss": 0.0092, "step": 170770 }, { "epoch": 1123.5526315789473, "grad_norm": 1.1183961629867554, "learning_rate": 0.0001, "loss": 0.0145, "step": 170780 }, { "epoch": 1123.6184210526317, "grad_norm": 1.3252918720245361, "learning_rate": 0.0001, "loss": 0.0115, "step": 170790 }, { "epoch": 1123.6842105263158, "grad_norm": 1.5301803350448608, "learning_rate": 0.0001, "loss": 0.0139, "step": 170800 }, { "epoch": 1123.75, "grad_norm": 2.0554189682006836, "learning_rate": 0.0001, "loss": 0.0111, "step": 170810 }, { "epoch": 1123.8157894736842, "grad_norm": 1.55497145652771, "learning_rate": 0.0001, "loss": 0.0106, "step": 170820 }, { "epoch": 1123.8815789473683, "grad_norm": 1.5932466983795166, "learning_rate": 0.0001, "loss": 0.012, "step": 170830 }, { "epoch": 1123.9473684210527, "grad_norm": 1.6371444463729858, "learning_rate": 0.0001, "loss": 0.0095, "step": 170840 }, { "epoch": 1124.0131578947369, "grad_norm": 1.4656105041503906, "learning_rate": 0.0001, "loss": 0.0113, "step": 170850 }, { "epoch": 1124.078947368421, "grad_norm": 1.4742274284362793, "learning_rate": 0.0001, "loss": 0.0116, "step": 170860 }, { "epoch": 1124.1447368421052, "grad_norm": 1.4724267721176147, "learning_rate": 0.0001, "loss": 0.0128, "step": 170870 }, { "epoch": 1124.2105263157894, "grad_norm": 1.501273512840271, "learning_rate": 0.0001, "loss": 0.0117, "step": 170880 }, { "epoch": 1124.2763157894738, "grad_norm": 1.8740414381027222, "learning_rate": 0.0001, "loss": 0.0123, "step": 170890 }, { "epoch": 1124.342105263158, "grad_norm": 1.7555168867111206, "learning_rate": 0.0001, "loss": 0.0129, "step": 170900 }, { "epoch": 1124.407894736842, "grad_norm": 1.371199369430542, "learning_rate": 0.0001, "loss": 0.0134, "step": 170910 }, { "epoch": 1124.4736842105262, "grad_norm": 1.0386911630630493, "learning_rate": 0.0001, "loss": 0.0103, "step": 170920 }, { "epoch": 1124.5394736842106, "grad_norm": 1.4979910850524902, "learning_rate": 0.0001, "loss": 0.014, "step": 170930 }, { "epoch": 1124.6052631578948, "grad_norm": 1.3778436183929443, "learning_rate": 0.0001, "loss": 0.013, "step": 170940 }, { "epoch": 1124.671052631579, "grad_norm": 1.1849713325500488, "learning_rate": 0.0001, "loss": 0.0108, "step": 170950 }, { "epoch": 1124.7368421052631, "grad_norm": 1.591064453125, "learning_rate": 0.0001, "loss": 0.0103, "step": 170960 }, { "epoch": 1124.8026315789473, "grad_norm": 1.8046817779541016, "learning_rate": 0.0001, "loss": 0.0121, "step": 170970 }, { "epoch": 1124.8684210526317, "grad_norm": 1.1676143407821655, "learning_rate": 0.0001, "loss": 0.0114, "step": 170980 }, { "epoch": 1124.9342105263158, "grad_norm": 1.354720115661621, "learning_rate": 0.0001, "loss": 0.0123, "step": 170990 }, { "epoch": 1125.0, "grad_norm": 1.761888027191162, "learning_rate": 0.0001, "loss": 0.0129, "step": 171000 }, { "epoch": 1125.0657894736842, "grad_norm": 1.4722589254379272, "learning_rate": 0.0001, "loss": 0.0095, "step": 171010 }, { "epoch": 1125.1315789473683, "grad_norm": 1.4212062358856201, "learning_rate": 0.0001, "loss": 0.0135, "step": 171020 }, { "epoch": 1125.1973684210527, "grad_norm": 1.4538253545761108, "learning_rate": 0.0001, "loss": 0.0128, "step": 171030 }, { "epoch": 1125.2631578947369, "grad_norm": 1.34641432762146, "learning_rate": 0.0001, "loss": 0.0132, "step": 171040 }, { "epoch": 1125.328947368421, "grad_norm": 1.500372290611267, "learning_rate": 0.0001, "loss": 0.0136, "step": 171050 }, { "epoch": 1125.3947368421052, "grad_norm": 1.4117169380187988, "learning_rate": 0.0001, "loss": 0.0115, "step": 171060 }, { "epoch": 1125.4605263157894, "grad_norm": 1.1288031339645386, "learning_rate": 0.0001, "loss": 0.0113, "step": 171070 }, { "epoch": 1125.5263157894738, "grad_norm": 1.649530291557312, "learning_rate": 0.0001, "loss": 0.0139, "step": 171080 }, { "epoch": 1125.592105263158, "grad_norm": 1.2815924882888794, "learning_rate": 0.0001, "loss": 0.0091, "step": 171090 }, { "epoch": 1125.657894736842, "grad_norm": 1.2180702686309814, "learning_rate": 0.0001, "loss": 0.0134, "step": 171100 }, { "epoch": 1125.7236842105262, "grad_norm": 1.5676703453063965, "learning_rate": 0.0001, "loss": 0.0136, "step": 171110 }, { "epoch": 1125.7894736842106, "grad_norm": 1.6592521667480469, "learning_rate": 0.0001, "loss": 0.0104, "step": 171120 }, { "epoch": 1125.8552631578948, "grad_norm": 1.706538438796997, "learning_rate": 0.0001, "loss": 0.0099, "step": 171130 }, { "epoch": 1125.921052631579, "grad_norm": 1.1440454721450806, "learning_rate": 0.0001, "loss": 0.0102, "step": 171140 }, { "epoch": 1125.9868421052631, "grad_norm": 1.594955325126648, "learning_rate": 0.0001, "loss": 0.0168, "step": 171150 }, { "epoch": 1126.0526315789473, "grad_norm": 1.2544904947280884, "learning_rate": 0.0001, "loss": 0.014, "step": 171160 }, { "epoch": 1126.1184210526317, "grad_norm": 1.4270612001419067, "learning_rate": 0.0001, "loss": 0.011, "step": 171170 }, { "epoch": 1126.1842105263158, "grad_norm": 1.4647289514541626, "learning_rate": 0.0001, "loss": 0.0124, "step": 171180 }, { "epoch": 1126.25, "grad_norm": 1.7346749305725098, "learning_rate": 0.0001, "loss": 0.0157, "step": 171190 }, { "epoch": 1126.3157894736842, "grad_norm": 1.4942398071289062, "learning_rate": 0.0001, "loss": 0.013, "step": 171200 }, { "epoch": 1126.3815789473683, "grad_norm": 1.1811250448226929, "learning_rate": 0.0001, "loss": 0.0121, "step": 171210 }, { "epoch": 1126.4473684210527, "grad_norm": 1.5562204122543335, "learning_rate": 0.0001, "loss": 0.0101, "step": 171220 }, { "epoch": 1126.5131578947369, "grad_norm": 1.2395974397659302, "learning_rate": 0.0001, "loss": 0.0129, "step": 171230 }, { "epoch": 1126.578947368421, "grad_norm": 1.0670456886291504, "learning_rate": 0.0001, "loss": 0.0158, "step": 171240 }, { "epoch": 1126.6447368421052, "grad_norm": 1.3755578994750977, "learning_rate": 0.0001, "loss": 0.0098, "step": 171250 }, { "epoch": 1126.7105263157894, "grad_norm": 1.587870478630066, "learning_rate": 0.0001, "loss": 0.0108, "step": 171260 }, { "epoch": 1126.7763157894738, "grad_norm": 1.397241234779358, "learning_rate": 0.0001, "loss": 0.0096, "step": 171270 }, { "epoch": 1126.842105263158, "grad_norm": 1.815962553024292, "learning_rate": 0.0001, "loss": 0.0092, "step": 171280 }, { "epoch": 1126.907894736842, "grad_norm": 1.3865633010864258, "learning_rate": 0.0001, "loss": 0.0111, "step": 171290 }, { "epoch": 1126.9736842105262, "grad_norm": 1.441246747970581, "learning_rate": 0.0001, "loss": 0.0138, "step": 171300 }, { "epoch": 1127.0394736842106, "grad_norm": 1.4186714887619019, "learning_rate": 0.0001, "loss": 0.0112, "step": 171310 }, { "epoch": 1127.1052631578948, "grad_norm": 1.6188372373580933, "learning_rate": 0.0001, "loss": 0.0109, "step": 171320 }, { "epoch": 1127.171052631579, "grad_norm": 1.2745546102523804, "learning_rate": 0.0001, "loss": 0.0107, "step": 171330 }, { "epoch": 1127.2368421052631, "grad_norm": 1.4431205987930298, "learning_rate": 0.0001, "loss": 0.0095, "step": 171340 }, { "epoch": 1127.3026315789473, "grad_norm": 1.4948945045471191, "learning_rate": 0.0001, "loss": 0.0136, "step": 171350 }, { "epoch": 1127.3684210526317, "grad_norm": 0.938871443271637, "learning_rate": 0.0001, "loss": 0.0126, "step": 171360 }, { "epoch": 1127.4342105263158, "grad_norm": 1.327417016029358, "learning_rate": 0.0001, "loss": 0.0145, "step": 171370 }, { "epoch": 1127.5, "grad_norm": 1.2583355903625488, "learning_rate": 0.0001, "loss": 0.0147, "step": 171380 }, { "epoch": 1127.5657894736842, "grad_norm": 1.6173036098480225, "learning_rate": 0.0001, "loss": 0.0111, "step": 171390 }, { "epoch": 1127.6315789473683, "grad_norm": 1.4119755029678345, "learning_rate": 0.0001, "loss": 0.0127, "step": 171400 }, { "epoch": 1127.6973684210527, "grad_norm": 1.2898746728897095, "learning_rate": 0.0001, "loss": 0.0174, "step": 171410 }, { "epoch": 1127.7631578947369, "grad_norm": 1.4681363105773926, "learning_rate": 0.0001, "loss": 0.0126, "step": 171420 }, { "epoch": 1127.828947368421, "grad_norm": 1.468137502670288, "learning_rate": 0.0001, "loss": 0.0122, "step": 171430 }, { "epoch": 1127.8947368421052, "grad_norm": 1.7962485551834106, "learning_rate": 0.0001, "loss": 0.0083, "step": 171440 }, { "epoch": 1127.9605263157894, "grad_norm": 1.3090723752975464, "learning_rate": 0.0001, "loss": 0.012, "step": 171450 }, { "epoch": 1128.0263157894738, "grad_norm": 1.6048089265823364, "learning_rate": 0.0001, "loss": 0.0093, "step": 171460 }, { "epoch": 1128.092105263158, "grad_norm": 1.4217885732650757, "learning_rate": 0.0001, "loss": 0.0121, "step": 171470 }, { "epoch": 1128.157894736842, "grad_norm": 1.200107455253601, "learning_rate": 0.0001, "loss": 0.0127, "step": 171480 }, { "epoch": 1128.2236842105262, "grad_norm": 1.4987956285476685, "learning_rate": 0.0001, "loss": 0.0099, "step": 171490 }, { "epoch": 1128.2894736842106, "grad_norm": 1.6591331958770752, "learning_rate": 0.0001, "loss": 0.0125, "step": 171500 }, { "epoch": 1128.3552631578948, "grad_norm": 1.3644421100616455, "learning_rate": 0.0001, "loss": 0.0116, "step": 171510 }, { "epoch": 1128.421052631579, "grad_norm": 1.5181941986083984, "learning_rate": 0.0001, "loss": 0.0138, "step": 171520 }, { "epoch": 1128.4868421052631, "grad_norm": 1.6223392486572266, "learning_rate": 0.0001, "loss": 0.0125, "step": 171530 }, { "epoch": 1128.5526315789473, "grad_norm": 1.586948275566101, "learning_rate": 0.0001, "loss": 0.0102, "step": 171540 }, { "epoch": 1128.6184210526317, "grad_norm": 1.6386512517929077, "learning_rate": 0.0001, "loss": 0.0101, "step": 171550 }, { "epoch": 1128.6842105263158, "grad_norm": 1.3267613649368286, "learning_rate": 0.0001, "loss": 0.0171, "step": 171560 }, { "epoch": 1128.75, "grad_norm": 1.3705363273620605, "learning_rate": 0.0001, "loss": 0.0115, "step": 171570 }, { "epoch": 1128.8157894736842, "grad_norm": 1.7341153621673584, "learning_rate": 0.0001, "loss": 0.0084, "step": 171580 }, { "epoch": 1128.8815789473683, "grad_norm": 1.5364067554473877, "learning_rate": 0.0001, "loss": 0.0127, "step": 171590 }, { "epoch": 1128.9473684210527, "grad_norm": 1.69677734375, "learning_rate": 0.0001, "loss": 0.013, "step": 171600 }, { "epoch": 1129.0131578947369, "grad_norm": 0.9868074655532837, "learning_rate": 0.0001, "loss": 0.0135, "step": 171610 }, { "epoch": 1129.078947368421, "grad_norm": 1.4041619300842285, "learning_rate": 0.0001, "loss": 0.0101, "step": 171620 }, { "epoch": 1129.1447368421052, "grad_norm": 1.4678597450256348, "learning_rate": 0.0001, "loss": 0.0104, "step": 171630 }, { "epoch": 1129.2105263157894, "grad_norm": 1.206837773323059, "learning_rate": 0.0001, "loss": 0.0138, "step": 171640 }, { "epoch": 1129.2763157894738, "grad_norm": 1.1508547067642212, "learning_rate": 0.0001, "loss": 0.0167, "step": 171650 }, { "epoch": 1129.342105263158, "grad_norm": 1.638597011566162, "learning_rate": 0.0001, "loss": 0.0148, "step": 171660 }, { "epoch": 1129.407894736842, "grad_norm": 1.2495654821395874, "learning_rate": 0.0001, "loss": 0.0157, "step": 171670 }, { "epoch": 1129.4736842105262, "grad_norm": 1.7412585020065308, "learning_rate": 0.0001, "loss": 0.0111, "step": 171680 }, { "epoch": 1129.5394736842106, "grad_norm": 1.5609853267669678, "learning_rate": 0.0001, "loss": 0.0129, "step": 171690 }, { "epoch": 1129.6052631578948, "grad_norm": 1.5312888622283936, "learning_rate": 0.0001, "loss": 0.0124, "step": 171700 }, { "epoch": 1129.671052631579, "grad_norm": 1.7082549333572388, "learning_rate": 0.0001, "loss": 0.01, "step": 171710 }, { "epoch": 1129.7368421052631, "grad_norm": 1.3983269929885864, "learning_rate": 0.0001, "loss": 0.0099, "step": 171720 }, { "epoch": 1129.8026315789473, "grad_norm": 1.5392043590545654, "learning_rate": 0.0001, "loss": 0.0107, "step": 171730 }, { "epoch": 1129.8684210526317, "grad_norm": 1.7234010696411133, "learning_rate": 0.0001, "loss": 0.0094, "step": 171740 }, { "epoch": 1129.9342105263158, "grad_norm": 1.4749034643173218, "learning_rate": 0.0001, "loss": 0.013, "step": 171750 }, { "epoch": 1130.0, "grad_norm": 1.2531743049621582, "learning_rate": 0.0001, "loss": 0.0097, "step": 171760 }, { "epoch": 1130.0657894736842, "grad_norm": 1.206955909729004, "learning_rate": 0.0001, "loss": 0.0115, "step": 171770 }, { "epoch": 1130.1315789473683, "grad_norm": 1.5257858037948608, "learning_rate": 0.0001, "loss": 0.0178, "step": 171780 }, { "epoch": 1130.1973684210527, "grad_norm": 1.755685567855835, "learning_rate": 0.0001, "loss": 0.0144, "step": 171790 }, { "epoch": 1130.2631578947369, "grad_norm": 1.2300875186920166, "learning_rate": 0.0001, "loss": 0.0109, "step": 171800 }, { "epoch": 1130.328947368421, "grad_norm": 1.4258850812911987, "learning_rate": 0.0001, "loss": 0.0153, "step": 171810 }, { "epoch": 1130.3947368421052, "grad_norm": 1.7315689325332642, "learning_rate": 0.0001, "loss": 0.0115, "step": 171820 }, { "epoch": 1130.4605263157894, "grad_norm": 1.647549033164978, "learning_rate": 0.0001, "loss": 0.0121, "step": 171830 }, { "epoch": 1130.5263157894738, "grad_norm": 1.3540818691253662, "learning_rate": 0.0001, "loss": 0.009, "step": 171840 }, { "epoch": 1130.592105263158, "grad_norm": 1.7434369325637817, "learning_rate": 0.0001, "loss": 0.0128, "step": 171850 }, { "epoch": 1130.657894736842, "grad_norm": 2.223470449447632, "learning_rate": 0.0001, "loss": 0.0091, "step": 171860 }, { "epoch": 1130.7236842105262, "grad_norm": 1.673007607460022, "learning_rate": 0.0001, "loss": 0.0127, "step": 171870 }, { "epoch": 1130.7894736842106, "grad_norm": 1.292779564857483, "learning_rate": 0.0001, "loss": 0.0088, "step": 171880 }, { "epoch": 1130.8552631578948, "grad_norm": 1.6728299856185913, "learning_rate": 0.0001, "loss": 0.0095, "step": 171890 }, { "epoch": 1130.921052631579, "grad_norm": 0.9987917542457581, "learning_rate": 0.0001, "loss": 0.0128, "step": 171900 }, { "epoch": 1130.9868421052631, "grad_norm": 1.4222102165222168, "learning_rate": 0.0001, "loss": 0.0122, "step": 171910 }, { "epoch": 1131.0526315789473, "grad_norm": 1.0951389074325562, "learning_rate": 0.0001, "loss": 0.0104, "step": 171920 }, { "epoch": 1131.1184210526317, "grad_norm": 1.0953434705734253, "learning_rate": 0.0001, "loss": 0.0101, "step": 171930 }, { "epoch": 1131.1842105263158, "grad_norm": 1.297682285308838, "learning_rate": 0.0001, "loss": 0.0087, "step": 171940 }, { "epoch": 1131.25, "grad_norm": 1.420255422592163, "learning_rate": 0.0001, "loss": 0.0127, "step": 171950 }, { "epoch": 1131.3157894736842, "grad_norm": 1.5227311849594116, "learning_rate": 0.0001, "loss": 0.0124, "step": 171960 }, { "epoch": 1131.3815789473683, "grad_norm": 1.5650485754013062, "learning_rate": 0.0001, "loss": 0.0118, "step": 171970 }, { "epoch": 1131.4473684210527, "grad_norm": 1.5252916812896729, "learning_rate": 0.0001, "loss": 0.0112, "step": 171980 }, { "epoch": 1131.5131578947369, "grad_norm": 1.5693285465240479, "learning_rate": 0.0001, "loss": 0.0171, "step": 171990 }, { "epoch": 1131.578947368421, "grad_norm": 1.593724012374878, "learning_rate": 0.0001, "loss": 0.01, "step": 172000 }, { "epoch": 1131.6447368421052, "grad_norm": 1.0831918716430664, "learning_rate": 0.0001, "loss": 0.0125, "step": 172010 }, { "epoch": 1131.7105263157894, "grad_norm": 1.0409125089645386, "learning_rate": 0.0001, "loss": 0.0126, "step": 172020 }, { "epoch": 1131.7763157894738, "grad_norm": 1.5190855264663696, "learning_rate": 0.0001, "loss": 0.0124, "step": 172030 }, { "epoch": 1131.842105263158, "grad_norm": 1.0497446060180664, "learning_rate": 0.0001, "loss": 0.0163, "step": 172040 }, { "epoch": 1131.907894736842, "grad_norm": 1.8740745782852173, "learning_rate": 0.0001, "loss": 0.0097, "step": 172050 }, { "epoch": 1131.9736842105262, "grad_norm": 1.730207920074463, "learning_rate": 0.0001, "loss": 0.0113, "step": 172060 }, { "epoch": 1132.0394736842106, "grad_norm": 1.4963394403457642, "learning_rate": 0.0001, "loss": 0.0182, "step": 172070 }, { "epoch": 1132.1052631578948, "grad_norm": 1.57485830783844, "learning_rate": 0.0001, "loss": 0.014, "step": 172080 }, { "epoch": 1132.171052631579, "grad_norm": 1.3174645900726318, "learning_rate": 0.0001, "loss": 0.013, "step": 172090 }, { "epoch": 1132.2368421052631, "grad_norm": 1.7730622291564941, "learning_rate": 0.0001, "loss": 0.0105, "step": 172100 }, { "epoch": 1132.3026315789473, "grad_norm": 1.454184651374817, "learning_rate": 0.0001, "loss": 0.0111, "step": 172110 }, { "epoch": 1132.3684210526317, "grad_norm": 1.0961520671844482, "learning_rate": 0.0001, "loss": 0.0111, "step": 172120 }, { "epoch": 1132.4342105263158, "grad_norm": 1.3876346349716187, "learning_rate": 0.0001, "loss": 0.0119, "step": 172130 }, { "epoch": 1132.5, "grad_norm": 1.5954856872558594, "learning_rate": 0.0001, "loss": 0.0138, "step": 172140 }, { "epoch": 1132.5657894736842, "grad_norm": 1.2539081573486328, "learning_rate": 0.0001, "loss": 0.0118, "step": 172150 }, { "epoch": 1132.6315789473683, "grad_norm": 1.657205581665039, "learning_rate": 0.0001, "loss": 0.0106, "step": 172160 }, { "epoch": 1132.6973684210527, "grad_norm": 1.0597645044326782, "learning_rate": 0.0001, "loss": 0.0118, "step": 172170 }, { "epoch": 1132.7631578947369, "grad_norm": 1.2958935499191284, "learning_rate": 0.0001, "loss": 0.0122, "step": 172180 }, { "epoch": 1132.828947368421, "grad_norm": 1.1143966913223267, "learning_rate": 0.0001, "loss": 0.0142, "step": 172190 }, { "epoch": 1132.8947368421052, "grad_norm": 1.2535138130187988, "learning_rate": 0.0001, "loss": 0.0144, "step": 172200 }, { "epoch": 1132.9605263157894, "grad_norm": 1.4817274808883667, "learning_rate": 0.0001, "loss": 0.0102, "step": 172210 }, { "epoch": 1133.0263157894738, "grad_norm": 1.37044095993042, "learning_rate": 0.0001, "loss": 0.0089, "step": 172220 }, { "epoch": 1133.092105263158, "grad_norm": 1.9101403951644897, "learning_rate": 0.0001, "loss": 0.0096, "step": 172230 }, { "epoch": 1133.157894736842, "grad_norm": 1.5137667655944824, "learning_rate": 0.0001, "loss": 0.0146, "step": 172240 }, { "epoch": 1133.2236842105262, "grad_norm": 1.5738131999969482, "learning_rate": 0.0001, "loss": 0.0147, "step": 172250 }, { "epoch": 1133.2894736842106, "grad_norm": 1.6964776515960693, "learning_rate": 0.0001, "loss": 0.0126, "step": 172260 }, { "epoch": 1133.3552631578948, "grad_norm": 1.6617194414138794, "learning_rate": 0.0001, "loss": 0.0136, "step": 172270 }, { "epoch": 1133.421052631579, "grad_norm": 1.4189282655715942, "learning_rate": 0.0001, "loss": 0.0118, "step": 172280 }, { "epoch": 1133.4868421052631, "grad_norm": 1.6105929613113403, "learning_rate": 0.0001, "loss": 0.0112, "step": 172290 }, { "epoch": 1133.5526315789473, "grad_norm": 1.4369432926177979, "learning_rate": 0.0001, "loss": 0.0108, "step": 172300 }, { "epoch": 1133.6184210526317, "grad_norm": 0.9058899879455566, "learning_rate": 0.0001, "loss": 0.0135, "step": 172310 }, { "epoch": 1133.6842105263158, "grad_norm": 1.2638312578201294, "learning_rate": 0.0001, "loss": 0.0138, "step": 172320 }, { "epoch": 1133.75, "grad_norm": 1.3913240432739258, "learning_rate": 0.0001, "loss": 0.0095, "step": 172330 }, { "epoch": 1133.8157894736842, "grad_norm": 1.6232179403305054, "learning_rate": 0.0001, "loss": 0.0087, "step": 172340 }, { "epoch": 1133.8815789473683, "grad_norm": 1.577711582183838, "learning_rate": 0.0001, "loss": 0.0129, "step": 172350 }, { "epoch": 1133.9473684210527, "grad_norm": 1.1043808460235596, "learning_rate": 0.0001, "loss": 0.0141, "step": 172360 }, { "epoch": 1134.0131578947369, "grad_norm": 1.7212305068969727, "learning_rate": 0.0001, "loss": 0.0135, "step": 172370 }, { "epoch": 1134.078947368421, "grad_norm": 1.2320806980133057, "learning_rate": 0.0001, "loss": 0.0087, "step": 172380 }, { "epoch": 1134.1447368421052, "grad_norm": 1.3838047981262207, "learning_rate": 0.0001, "loss": 0.0113, "step": 172390 }, { "epoch": 1134.2105263157894, "grad_norm": 1.305981159210205, "learning_rate": 0.0001, "loss": 0.0118, "step": 172400 }, { "epoch": 1134.2763157894738, "grad_norm": 1.6906261444091797, "learning_rate": 0.0001, "loss": 0.0117, "step": 172410 }, { "epoch": 1134.342105263158, "grad_norm": 1.5937745571136475, "learning_rate": 0.0001, "loss": 0.0142, "step": 172420 }, { "epoch": 1134.407894736842, "grad_norm": 1.7839034795761108, "learning_rate": 0.0001, "loss": 0.0147, "step": 172430 }, { "epoch": 1134.4736842105262, "grad_norm": 1.4391299486160278, "learning_rate": 0.0001, "loss": 0.0125, "step": 172440 }, { "epoch": 1134.5394736842106, "grad_norm": 1.6815577745437622, "learning_rate": 0.0001, "loss": 0.0097, "step": 172450 }, { "epoch": 1134.6052631578948, "grad_norm": 1.5546783208847046, "learning_rate": 0.0001, "loss": 0.0107, "step": 172460 }, { "epoch": 1134.671052631579, "grad_norm": 1.169229507446289, "learning_rate": 0.0001, "loss": 0.0156, "step": 172470 }, { "epoch": 1134.7368421052631, "grad_norm": 1.2734931707382202, "learning_rate": 0.0001, "loss": 0.0091, "step": 172480 }, { "epoch": 1134.8026315789473, "grad_norm": 1.17262864112854, "learning_rate": 0.0001, "loss": 0.0106, "step": 172490 }, { "epoch": 1134.8684210526317, "grad_norm": 1.0629934072494507, "learning_rate": 0.0001, "loss": 0.0104, "step": 172500 }, { "epoch": 1134.9342105263158, "grad_norm": 1.3263142108917236, "learning_rate": 0.0001, "loss": 0.0167, "step": 172510 }, { "epoch": 1135.0, "grad_norm": 1.2105752229690552, "learning_rate": 0.0001, "loss": 0.0141, "step": 172520 }, { "epoch": 1135.0657894736842, "grad_norm": 1.476367473602295, "learning_rate": 0.0001, "loss": 0.0143, "step": 172530 }, { "epoch": 1135.1315789473683, "grad_norm": 1.2388933897018433, "learning_rate": 0.0001, "loss": 0.012, "step": 172540 }, { "epoch": 1135.1973684210527, "grad_norm": 1.0688279867172241, "learning_rate": 0.0001, "loss": 0.0094, "step": 172550 }, { "epoch": 1135.2631578947369, "grad_norm": 1.178054690361023, "learning_rate": 0.0001, "loss": 0.0149, "step": 172560 }, { "epoch": 1135.328947368421, "grad_norm": 1.032576560974121, "learning_rate": 0.0001, "loss": 0.0167, "step": 172570 }, { "epoch": 1135.3947368421052, "grad_norm": 1.3976858854293823, "learning_rate": 0.0001, "loss": 0.01, "step": 172580 }, { "epoch": 1135.4605263157894, "grad_norm": 1.5133326053619385, "learning_rate": 0.0001, "loss": 0.0153, "step": 172590 }, { "epoch": 1135.5263157894738, "grad_norm": 1.390443205833435, "learning_rate": 0.0001, "loss": 0.0099, "step": 172600 }, { "epoch": 1135.592105263158, "grad_norm": 1.416909098625183, "learning_rate": 0.0001, "loss": 0.0139, "step": 172610 }, { "epoch": 1135.657894736842, "grad_norm": 1.2227009534835815, "learning_rate": 0.0001, "loss": 0.012, "step": 172620 }, { "epoch": 1135.7236842105262, "grad_norm": 1.4084209203720093, "learning_rate": 0.0001, "loss": 0.0131, "step": 172630 }, { "epoch": 1135.7894736842106, "grad_norm": 1.7002519369125366, "learning_rate": 0.0001, "loss": 0.0093, "step": 172640 }, { "epoch": 1135.8552631578948, "grad_norm": 1.4184927940368652, "learning_rate": 0.0001, "loss": 0.0103, "step": 172650 }, { "epoch": 1135.921052631579, "grad_norm": 1.3448197841644287, "learning_rate": 0.0001, "loss": 0.0086, "step": 172660 }, { "epoch": 1135.9868421052631, "grad_norm": 1.3222978115081787, "learning_rate": 0.0001, "loss": 0.014, "step": 172670 }, { "epoch": 1136.0526315789473, "grad_norm": 1.4807777404785156, "learning_rate": 0.0001, "loss": 0.0093, "step": 172680 }, { "epoch": 1136.1184210526317, "grad_norm": 1.5764248371124268, "learning_rate": 0.0001, "loss": 0.0117, "step": 172690 }, { "epoch": 1136.1842105263158, "grad_norm": 1.5645744800567627, "learning_rate": 0.0001, "loss": 0.0113, "step": 172700 }, { "epoch": 1136.25, "grad_norm": 1.7363853454589844, "learning_rate": 0.0001, "loss": 0.0139, "step": 172710 }, { "epoch": 1136.3157894736842, "grad_norm": 1.4895364046096802, "learning_rate": 0.0001, "loss": 0.0102, "step": 172720 }, { "epoch": 1136.3815789473683, "grad_norm": 1.5989742279052734, "learning_rate": 0.0001, "loss": 0.0135, "step": 172730 }, { "epoch": 1136.4473684210527, "grad_norm": 1.4875613451004028, "learning_rate": 0.0001, "loss": 0.014, "step": 172740 }, { "epoch": 1136.5131578947369, "grad_norm": 1.6299102306365967, "learning_rate": 0.0001, "loss": 0.0154, "step": 172750 }, { "epoch": 1136.578947368421, "grad_norm": 1.8922697305679321, "learning_rate": 0.0001, "loss": 0.0113, "step": 172760 }, { "epoch": 1136.6447368421052, "grad_norm": 1.1792153120040894, "learning_rate": 0.0001, "loss": 0.0141, "step": 172770 }, { "epoch": 1136.7105263157894, "grad_norm": 1.3434832096099854, "learning_rate": 0.0001, "loss": 0.0112, "step": 172780 }, { "epoch": 1136.7763157894738, "grad_norm": 1.6354548931121826, "learning_rate": 0.0001, "loss": 0.0135, "step": 172790 }, { "epoch": 1136.842105263158, "grad_norm": 1.4578118324279785, "learning_rate": 0.0001, "loss": 0.0114, "step": 172800 }, { "epoch": 1136.907894736842, "grad_norm": 1.5455387830734253, "learning_rate": 0.0001, "loss": 0.0105, "step": 172810 }, { "epoch": 1136.9736842105262, "grad_norm": 1.4084256887435913, "learning_rate": 0.0001, "loss": 0.0091, "step": 172820 }, { "epoch": 1137.0394736842106, "grad_norm": 1.491653323173523, "learning_rate": 0.0001, "loss": 0.0129, "step": 172830 }, { "epoch": 1137.1052631578948, "grad_norm": 1.6616017818450928, "learning_rate": 0.0001, "loss": 0.0103, "step": 172840 }, { "epoch": 1137.171052631579, "grad_norm": 1.9078140258789062, "learning_rate": 0.0001, "loss": 0.0102, "step": 172850 }, { "epoch": 1137.2368421052631, "grad_norm": 1.716423511505127, "learning_rate": 0.0001, "loss": 0.0109, "step": 172860 }, { "epoch": 1137.3026315789473, "grad_norm": 1.2187440395355225, "learning_rate": 0.0001, "loss": 0.0145, "step": 172870 }, { "epoch": 1137.3684210526317, "grad_norm": 1.1854326725006104, "learning_rate": 0.0001, "loss": 0.0085, "step": 172880 }, { "epoch": 1137.4342105263158, "grad_norm": 1.896477460861206, "learning_rate": 0.0001, "loss": 0.0159, "step": 172890 }, { "epoch": 1137.5, "grad_norm": 1.6854041814804077, "learning_rate": 0.0001, "loss": 0.0132, "step": 172900 }, { "epoch": 1137.5657894736842, "grad_norm": 1.7503631114959717, "learning_rate": 0.0001, "loss": 0.0122, "step": 172910 }, { "epoch": 1137.6315789473683, "grad_norm": 1.5235346555709839, "learning_rate": 0.0001, "loss": 0.0104, "step": 172920 }, { "epoch": 1137.6973684210527, "grad_norm": 1.1079225540161133, "learning_rate": 0.0001, "loss": 0.0134, "step": 172930 }, { "epoch": 1137.7631578947369, "grad_norm": 1.4252431392669678, "learning_rate": 0.0001, "loss": 0.0099, "step": 172940 }, { "epoch": 1137.828947368421, "grad_norm": 1.5713491439819336, "learning_rate": 0.0001, "loss": 0.0099, "step": 172950 }, { "epoch": 1137.8947368421052, "grad_norm": 1.6197320222854614, "learning_rate": 0.0001, "loss": 0.0145, "step": 172960 }, { "epoch": 1137.9605263157894, "grad_norm": 1.7062314748764038, "learning_rate": 0.0001, "loss": 0.0092, "step": 172970 }, { "epoch": 1138.0263157894738, "grad_norm": 1.1301143169403076, "learning_rate": 0.0001, "loss": 0.0107, "step": 172980 }, { "epoch": 1138.092105263158, "grad_norm": 1.2121385335922241, "learning_rate": 0.0001, "loss": 0.011, "step": 172990 }, { "epoch": 1138.157894736842, "grad_norm": 1.735736608505249, "learning_rate": 0.0001, "loss": 0.0098, "step": 173000 }, { "epoch": 1138.2236842105262, "grad_norm": 1.307931900024414, "learning_rate": 0.0001, "loss": 0.0103, "step": 173010 }, { "epoch": 1138.2894736842106, "grad_norm": 1.7532097101211548, "learning_rate": 0.0001, "loss": 0.012, "step": 173020 }, { "epoch": 1138.3552631578948, "grad_norm": 1.7267072200775146, "learning_rate": 0.0001, "loss": 0.0144, "step": 173030 }, { "epoch": 1138.421052631579, "grad_norm": 1.0284768342971802, "learning_rate": 0.0001, "loss": 0.0115, "step": 173040 }, { "epoch": 1138.4868421052631, "grad_norm": 1.5435481071472168, "learning_rate": 0.0001, "loss": 0.0124, "step": 173050 }, { "epoch": 1138.5526315789473, "grad_norm": 0.9520408511161804, "learning_rate": 0.0001, "loss": 0.0128, "step": 173060 }, { "epoch": 1138.6184210526317, "grad_norm": 1.413148045539856, "learning_rate": 0.0001, "loss": 0.0089, "step": 173070 }, { "epoch": 1138.6842105263158, "grad_norm": 1.7109636068344116, "learning_rate": 0.0001, "loss": 0.0187, "step": 173080 }, { "epoch": 1138.75, "grad_norm": 1.3477548360824585, "learning_rate": 0.0001, "loss": 0.012, "step": 173090 }, { "epoch": 1138.8157894736842, "grad_norm": 1.4685393571853638, "learning_rate": 0.0001, "loss": 0.0114, "step": 173100 }, { "epoch": 1138.8815789473683, "grad_norm": 1.2334438562393188, "learning_rate": 0.0001, "loss": 0.0102, "step": 173110 }, { "epoch": 1138.9473684210527, "grad_norm": 1.7441747188568115, "learning_rate": 0.0001, "loss": 0.0119, "step": 173120 }, { "epoch": 1139.0131578947369, "grad_norm": 2.123462438583374, "learning_rate": 0.0001, "loss": 0.0133, "step": 173130 }, { "epoch": 1139.078947368421, "grad_norm": 1.6932549476623535, "learning_rate": 0.0001, "loss": 0.0121, "step": 173140 }, { "epoch": 1139.1447368421052, "grad_norm": 1.3027451038360596, "learning_rate": 0.0001, "loss": 0.0089, "step": 173150 }, { "epoch": 1139.2105263157894, "grad_norm": 1.0819048881530762, "learning_rate": 0.0001, "loss": 0.0136, "step": 173160 }, { "epoch": 1139.2763157894738, "grad_norm": 1.6305080652236938, "learning_rate": 0.0001, "loss": 0.0107, "step": 173170 }, { "epoch": 1139.342105263158, "grad_norm": 1.2649600505828857, "learning_rate": 0.0001, "loss": 0.011, "step": 173180 }, { "epoch": 1139.407894736842, "grad_norm": 1.6186362504959106, "learning_rate": 0.0001, "loss": 0.012, "step": 173190 }, { "epoch": 1139.4736842105262, "grad_norm": 1.475329875946045, "learning_rate": 0.0001, "loss": 0.0141, "step": 173200 }, { "epoch": 1139.5394736842106, "grad_norm": 1.3346686363220215, "learning_rate": 0.0001, "loss": 0.014, "step": 173210 }, { "epoch": 1139.6052631578948, "grad_norm": 1.2059470415115356, "learning_rate": 0.0001, "loss": 0.0119, "step": 173220 }, { "epoch": 1139.671052631579, "grad_norm": 1.3640931844711304, "learning_rate": 0.0001, "loss": 0.012, "step": 173230 }, { "epoch": 1139.7368421052631, "grad_norm": 1.3013370037078857, "learning_rate": 0.0001, "loss": 0.0133, "step": 173240 }, { "epoch": 1139.8026315789473, "grad_norm": 1.3176252841949463, "learning_rate": 0.0001, "loss": 0.0131, "step": 173250 }, { "epoch": 1139.8684210526317, "grad_norm": 1.215875506401062, "learning_rate": 0.0001, "loss": 0.0113, "step": 173260 }, { "epoch": 1139.9342105263158, "grad_norm": 1.4835245609283447, "learning_rate": 0.0001, "loss": 0.0106, "step": 173270 }, { "epoch": 1140.0, "grad_norm": 1.4689252376556396, "learning_rate": 0.0001, "loss": 0.0102, "step": 173280 }, { "epoch": 1140.0657894736842, "grad_norm": 1.5303544998168945, "learning_rate": 0.0001, "loss": 0.0099, "step": 173290 }, { "epoch": 1140.1315789473683, "grad_norm": 1.2356352806091309, "learning_rate": 0.0001, "loss": 0.014, "step": 173300 }, { "epoch": 1140.1973684210527, "grad_norm": 1.354560136795044, "learning_rate": 0.0001, "loss": 0.0126, "step": 173310 }, { "epoch": 1140.2631578947369, "grad_norm": 1.2851897478103638, "learning_rate": 0.0001, "loss": 0.013, "step": 173320 }, { "epoch": 1140.328947368421, "grad_norm": 1.1471362113952637, "learning_rate": 0.0001, "loss": 0.0126, "step": 173330 }, { "epoch": 1140.3947368421052, "grad_norm": 1.5411367416381836, "learning_rate": 0.0001, "loss": 0.0129, "step": 173340 }, { "epoch": 1140.4605263157894, "grad_norm": 1.1367428302764893, "learning_rate": 0.0001, "loss": 0.0093, "step": 173350 }, { "epoch": 1140.5263157894738, "grad_norm": 1.0028557777404785, "learning_rate": 0.0001, "loss": 0.0144, "step": 173360 }, { "epoch": 1140.592105263158, "grad_norm": 1.092563271522522, "learning_rate": 0.0001, "loss": 0.0108, "step": 173370 }, { "epoch": 1140.657894736842, "grad_norm": 1.6976338624954224, "learning_rate": 0.0001, "loss": 0.0105, "step": 173380 }, { "epoch": 1140.7236842105262, "grad_norm": 1.3804683685302734, "learning_rate": 0.0001, "loss": 0.0158, "step": 173390 }, { "epoch": 1140.7894736842106, "grad_norm": 1.4358649253845215, "learning_rate": 0.0001, "loss": 0.013, "step": 173400 }, { "epoch": 1140.8552631578948, "grad_norm": 1.0825127363204956, "learning_rate": 0.0001, "loss": 0.0114, "step": 173410 }, { "epoch": 1140.921052631579, "grad_norm": 1.483611822128296, "learning_rate": 0.0001, "loss": 0.0103, "step": 173420 }, { "epoch": 1140.9868421052631, "grad_norm": 1.5787732601165771, "learning_rate": 0.0001, "loss": 0.0108, "step": 173430 }, { "epoch": 1141.0526315789473, "grad_norm": 1.1135996580123901, "learning_rate": 0.0001, "loss": 0.0111, "step": 173440 }, { "epoch": 1141.1184210526317, "grad_norm": 1.1760056018829346, "learning_rate": 0.0001, "loss": 0.0135, "step": 173450 }, { "epoch": 1141.1842105263158, "grad_norm": 1.5632156133651733, "learning_rate": 0.0001, "loss": 0.0132, "step": 173460 }, { "epoch": 1141.25, "grad_norm": 1.1224418878555298, "learning_rate": 0.0001, "loss": 0.0116, "step": 173470 }, { "epoch": 1141.3157894736842, "grad_norm": 1.7102373838424683, "learning_rate": 0.0001, "loss": 0.0104, "step": 173480 }, { "epoch": 1141.3815789473683, "grad_norm": 1.5489684343338013, "learning_rate": 0.0001, "loss": 0.0155, "step": 173490 }, { "epoch": 1141.4473684210527, "grad_norm": 1.6051194667816162, "learning_rate": 0.0001, "loss": 0.0119, "step": 173500 }, { "epoch": 1141.5131578947369, "grad_norm": 1.7198774814605713, "learning_rate": 0.0001, "loss": 0.0138, "step": 173510 }, { "epoch": 1141.578947368421, "grad_norm": 1.626919150352478, "learning_rate": 0.0001, "loss": 0.0112, "step": 173520 }, { "epoch": 1141.6447368421052, "grad_norm": 1.2027056217193604, "learning_rate": 0.0001, "loss": 0.0123, "step": 173530 }, { "epoch": 1141.7105263157894, "grad_norm": 1.2697997093200684, "learning_rate": 0.0001, "loss": 0.0121, "step": 173540 }, { "epoch": 1141.7763157894738, "grad_norm": 1.2478845119476318, "learning_rate": 0.0001, "loss": 0.0123, "step": 173550 }, { "epoch": 1141.842105263158, "grad_norm": 1.3443061113357544, "learning_rate": 0.0001, "loss": 0.0106, "step": 173560 }, { "epoch": 1141.907894736842, "grad_norm": 1.552095890045166, "learning_rate": 0.0001, "loss": 0.0112, "step": 173570 }, { "epoch": 1141.9736842105262, "grad_norm": 1.4631935358047485, "learning_rate": 0.0001, "loss": 0.0114, "step": 173580 }, { "epoch": 1142.0394736842106, "grad_norm": 1.4461356401443481, "learning_rate": 0.0001, "loss": 0.0109, "step": 173590 }, { "epoch": 1142.1052631578948, "grad_norm": 1.6377668380737305, "learning_rate": 0.0001, "loss": 0.0159, "step": 173600 }, { "epoch": 1142.171052631579, "grad_norm": 1.628415584564209, "learning_rate": 0.0001, "loss": 0.0102, "step": 173610 }, { "epoch": 1142.2368421052631, "grad_norm": 1.092022180557251, "learning_rate": 0.0001, "loss": 0.0103, "step": 173620 }, { "epoch": 1142.3026315789473, "grad_norm": 1.4750525951385498, "learning_rate": 0.0001, "loss": 0.0122, "step": 173630 }, { "epoch": 1142.3684210526317, "grad_norm": 1.507574200630188, "learning_rate": 0.0001, "loss": 0.0103, "step": 173640 }, { "epoch": 1142.4342105263158, "grad_norm": 1.357633352279663, "learning_rate": 0.0001, "loss": 0.014, "step": 173650 }, { "epoch": 1142.5, "grad_norm": 1.8231059312820435, "learning_rate": 0.0001, "loss": 0.0107, "step": 173660 }, { "epoch": 1142.5657894736842, "grad_norm": 1.7393079996109009, "learning_rate": 0.0001, "loss": 0.0137, "step": 173670 }, { "epoch": 1142.6315789473683, "grad_norm": 1.3889869451522827, "learning_rate": 0.0001, "loss": 0.0107, "step": 173680 }, { "epoch": 1142.6973684210527, "grad_norm": 1.7997220754623413, "learning_rate": 0.0001, "loss": 0.0108, "step": 173690 }, { "epoch": 1142.7631578947369, "grad_norm": 1.5419992208480835, "learning_rate": 0.0001, "loss": 0.0092, "step": 173700 }, { "epoch": 1142.828947368421, "grad_norm": 1.3483463525772095, "learning_rate": 0.0001, "loss": 0.0193, "step": 173710 }, { "epoch": 1142.8947368421052, "grad_norm": 1.2836644649505615, "learning_rate": 0.0001, "loss": 0.0104, "step": 173720 }, { "epoch": 1142.9605263157894, "grad_norm": 1.40969979763031, "learning_rate": 0.0001, "loss": 0.0115, "step": 173730 }, { "epoch": 1143.0263157894738, "grad_norm": 1.3250373601913452, "learning_rate": 0.0001, "loss": 0.0167, "step": 173740 }, { "epoch": 1143.092105263158, "grad_norm": 1.4046112298965454, "learning_rate": 0.0001, "loss": 0.0091, "step": 173750 }, { "epoch": 1143.157894736842, "grad_norm": 1.8919271230697632, "learning_rate": 0.0001, "loss": 0.0131, "step": 173760 }, { "epoch": 1143.2236842105262, "grad_norm": 1.265720248222351, "learning_rate": 0.0001, "loss": 0.0104, "step": 173770 }, { "epoch": 1143.2894736842106, "grad_norm": 1.300895094871521, "learning_rate": 0.0001, "loss": 0.0099, "step": 173780 }, { "epoch": 1143.3552631578948, "grad_norm": 1.6607540845870972, "learning_rate": 0.0001, "loss": 0.0124, "step": 173790 }, { "epoch": 1143.421052631579, "grad_norm": 1.09311842918396, "learning_rate": 0.0001, "loss": 0.009, "step": 173800 }, { "epoch": 1143.4868421052631, "grad_norm": 1.1789919137954712, "learning_rate": 0.0001, "loss": 0.0137, "step": 173810 }, { "epoch": 1143.5526315789473, "grad_norm": 1.2974152565002441, "learning_rate": 0.0001, "loss": 0.0141, "step": 173820 }, { "epoch": 1143.6184210526317, "grad_norm": 1.476278305053711, "learning_rate": 0.0001, "loss": 0.0131, "step": 173830 }, { "epoch": 1143.6842105263158, "grad_norm": 1.2272350788116455, "learning_rate": 0.0001, "loss": 0.0134, "step": 173840 }, { "epoch": 1143.75, "grad_norm": 1.3216954469680786, "learning_rate": 0.0001, "loss": 0.0108, "step": 173850 }, { "epoch": 1143.8157894736842, "grad_norm": 1.2814610004425049, "learning_rate": 0.0001, "loss": 0.0138, "step": 173860 }, { "epoch": 1143.8815789473683, "grad_norm": 1.580427885055542, "learning_rate": 0.0001, "loss": 0.0131, "step": 173870 }, { "epoch": 1143.9473684210527, "grad_norm": 1.4495477676391602, "learning_rate": 0.0001, "loss": 0.0087, "step": 173880 }, { "epoch": 1144.0131578947369, "grad_norm": 1.3981302976608276, "learning_rate": 0.0001, "loss": 0.0132, "step": 173890 }, { "epoch": 1144.078947368421, "grad_norm": 1.4392577409744263, "learning_rate": 0.0001, "loss": 0.0143, "step": 173900 }, { "epoch": 1144.1447368421052, "grad_norm": 1.008037805557251, "learning_rate": 0.0001, "loss": 0.0141, "step": 173910 }, { "epoch": 1144.2105263157894, "grad_norm": 1.6474742889404297, "learning_rate": 0.0001, "loss": 0.0117, "step": 173920 }, { "epoch": 1144.2763157894738, "grad_norm": 1.482295036315918, "learning_rate": 0.0001, "loss": 0.0103, "step": 173930 }, { "epoch": 1144.342105263158, "grad_norm": 1.6493650674819946, "learning_rate": 0.0001, "loss": 0.0104, "step": 173940 }, { "epoch": 1144.407894736842, "grad_norm": 1.4570465087890625, "learning_rate": 0.0001, "loss": 0.011, "step": 173950 }, { "epoch": 1144.4736842105262, "grad_norm": 1.6098135709762573, "learning_rate": 0.0001, "loss": 0.0134, "step": 173960 }, { "epoch": 1144.5394736842106, "grad_norm": 1.775958776473999, "learning_rate": 0.0001, "loss": 0.0111, "step": 173970 }, { "epoch": 1144.6052631578948, "grad_norm": 1.4067511558532715, "learning_rate": 0.0001, "loss": 0.0109, "step": 173980 }, { "epoch": 1144.671052631579, "grad_norm": 1.305610179901123, "learning_rate": 0.0001, "loss": 0.0095, "step": 173990 }, { "epoch": 1144.7368421052631, "grad_norm": 1.5099265575408936, "learning_rate": 0.0001, "loss": 0.0124, "step": 174000 }, { "epoch": 1144.8026315789473, "grad_norm": 1.2229963541030884, "learning_rate": 0.0001, "loss": 0.0108, "step": 174010 }, { "epoch": 1144.8684210526317, "grad_norm": 1.6126185655593872, "learning_rate": 0.0001, "loss": 0.0095, "step": 174020 }, { "epoch": 1144.9342105263158, "grad_norm": 1.6708210706710815, "learning_rate": 0.0001, "loss": 0.0146, "step": 174030 }, { "epoch": 1145.0, "grad_norm": 1.3984320163726807, "learning_rate": 0.0001, "loss": 0.0137, "step": 174040 }, { "epoch": 1145.0657894736842, "grad_norm": 1.3829288482666016, "learning_rate": 0.0001, "loss": 0.0124, "step": 174050 }, { "epoch": 1145.1315789473683, "grad_norm": 1.6654620170593262, "learning_rate": 0.0001, "loss": 0.0084, "step": 174060 }, { "epoch": 1145.1973684210527, "grad_norm": 1.1202178001403809, "learning_rate": 0.0001, "loss": 0.0167, "step": 174070 }, { "epoch": 1145.2631578947369, "grad_norm": 1.755071759223938, "learning_rate": 0.0001, "loss": 0.0112, "step": 174080 }, { "epoch": 1145.328947368421, "grad_norm": 1.3332911729812622, "learning_rate": 0.0001, "loss": 0.009, "step": 174090 }, { "epoch": 1145.3947368421052, "grad_norm": 1.2902302742004395, "learning_rate": 0.0001, "loss": 0.0094, "step": 174100 }, { "epoch": 1145.4605263157894, "grad_norm": 1.4811331033706665, "learning_rate": 0.0001, "loss": 0.0122, "step": 174110 }, { "epoch": 1145.5263157894738, "grad_norm": 1.7309186458587646, "learning_rate": 0.0001, "loss": 0.0123, "step": 174120 }, { "epoch": 1145.592105263158, "grad_norm": 1.2340848445892334, "learning_rate": 0.0001, "loss": 0.0143, "step": 174130 }, { "epoch": 1145.657894736842, "grad_norm": 1.0942107439041138, "learning_rate": 0.0001, "loss": 0.0131, "step": 174140 }, { "epoch": 1145.7236842105262, "grad_norm": 1.392307996749878, "learning_rate": 0.0001, "loss": 0.0152, "step": 174150 }, { "epoch": 1145.7894736842106, "grad_norm": 1.686576247215271, "learning_rate": 0.0001, "loss": 0.0113, "step": 174160 }, { "epoch": 1145.8552631578948, "grad_norm": 1.6361463069915771, "learning_rate": 0.0001, "loss": 0.0118, "step": 174170 }, { "epoch": 1145.921052631579, "grad_norm": 1.614984154701233, "learning_rate": 0.0001, "loss": 0.0097, "step": 174180 }, { "epoch": 1145.9868421052631, "grad_norm": 1.417463779449463, "learning_rate": 0.0001, "loss": 0.0119, "step": 174190 }, { "epoch": 1146.0526315789473, "grad_norm": 1.6755043268203735, "learning_rate": 0.0001, "loss": 0.0101, "step": 174200 }, { "epoch": 1146.1184210526317, "grad_norm": 1.4927635192871094, "learning_rate": 0.0001, "loss": 0.0101, "step": 174210 }, { "epoch": 1146.1842105263158, "grad_norm": 1.5919910669326782, "learning_rate": 0.0001, "loss": 0.0095, "step": 174220 }, { "epoch": 1146.25, "grad_norm": 1.5357474088668823, "learning_rate": 0.0001, "loss": 0.0111, "step": 174230 }, { "epoch": 1146.3157894736842, "grad_norm": 1.2204885482788086, "learning_rate": 0.0001, "loss": 0.0093, "step": 174240 }, { "epoch": 1146.3815789473683, "grad_norm": 1.3896278142929077, "learning_rate": 0.0001, "loss": 0.0129, "step": 174250 }, { "epoch": 1146.4473684210527, "grad_norm": 1.85769784450531, "learning_rate": 0.0001, "loss": 0.0118, "step": 174260 }, { "epoch": 1146.5131578947369, "grad_norm": 1.6888995170593262, "learning_rate": 0.0001, "loss": 0.0103, "step": 174270 }, { "epoch": 1146.578947368421, "grad_norm": 1.503872036933899, "learning_rate": 0.0001, "loss": 0.0131, "step": 174280 }, { "epoch": 1146.6447368421052, "grad_norm": 1.2702854871749878, "learning_rate": 0.0001, "loss": 0.0146, "step": 174290 }, { "epoch": 1146.7105263157894, "grad_norm": 1.892910361289978, "learning_rate": 0.0001, "loss": 0.0097, "step": 174300 }, { "epoch": 1146.7763157894738, "grad_norm": 1.7979884147644043, "learning_rate": 0.0001, "loss": 0.0119, "step": 174310 }, { "epoch": 1146.842105263158, "grad_norm": 1.4010487794876099, "learning_rate": 0.0001, "loss": 0.0117, "step": 174320 }, { "epoch": 1146.907894736842, "grad_norm": 1.4760991334915161, "learning_rate": 0.0001, "loss": 0.0118, "step": 174330 }, { "epoch": 1146.9736842105262, "grad_norm": 1.532047152519226, "learning_rate": 0.0001, "loss": 0.0187, "step": 174340 }, { "epoch": 1147.0394736842106, "grad_norm": 1.2078194618225098, "learning_rate": 0.0001, "loss": 0.0119, "step": 174350 }, { "epoch": 1147.1052631578948, "grad_norm": 1.3872032165527344, "learning_rate": 0.0001, "loss": 0.0109, "step": 174360 }, { "epoch": 1147.171052631579, "grad_norm": 1.0449318885803223, "learning_rate": 0.0001, "loss": 0.0109, "step": 174370 }, { "epoch": 1147.2368421052631, "grad_norm": 1.582435965538025, "learning_rate": 0.0001, "loss": 0.0113, "step": 174380 }, { "epoch": 1147.3026315789473, "grad_norm": 1.2556771039962769, "learning_rate": 0.0001, "loss": 0.0114, "step": 174390 }, { "epoch": 1147.3684210526317, "grad_norm": 1.103916049003601, "learning_rate": 0.0001, "loss": 0.0112, "step": 174400 }, { "epoch": 1147.4342105263158, "grad_norm": 1.7527799606323242, "learning_rate": 0.0001, "loss": 0.0111, "step": 174410 }, { "epoch": 1147.5, "grad_norm": 1.1061375141143799, "learning_rate": 0.0001, "loss": 0.0145, "step": 174420 }, { "epoch": 1147.5657894736842, "grad_norm": 1.3181756734848022, "learning_rate": 0.0001, "loss": 0.0162, "step": 174430 }, { "epoch": 1147.6315789473683, "grad_norm": 1.1068283319473267, "learning_rate": 0.0001, "loss": 0.0112, "step": 174440 }, { "epoch": 1147.6973684210527, "grad_norm": 1.621151089668274, "learning_rate": 0.0001, "loss": 0.0106, "step": 174450 }, { "epoch": 1147.7631578947369, "grad_norm": 1.6957591772079468, "learning_rate": 0.0001, "loss": 0.0108, "step": 174460 }, { "epoch": 1147.828947368421, "grad_norm": 1.4605997800827026, "learning_rate": 0.0001, "loss": 0.0108, "step": 174470 }, { "epoch": 1147.8947368421052, "grad_norm": 1.6140074729919434, "learning_rate": 0.0001, "loss": 0.0115, "step": 174480 }, { "epoch": 1147.9605263157894, "grad_norm": 1.1421561241149902, "learning_rate": 0.0001, "loss": 0.0172, "step": 174490 }, { "epoch": 1148.0263157894738, "grad_norm": 1.5305228233337402, "learning_rate": 0.0001, "loss": 0.0105, "step": 174500 }, { "epoch": 1148.092105263158, "grad_norm": 1.462479829788208, "learning_rate": 0.0001, "loss": 0.0131, "step": 174510 }, { "epoch": 1148.157894736842, "grad_norm": 1.7832708358764648, "learning_rate": 0.0001, "loss": 0.0121, "step": 174520 }, { "epoch": 1148.2236842105262, "grad_norm": 1.6386204957962036, "learning_rate": 0.0001, "loss": 0.0122, "step": 174530 }, { "epoch": 1148.2894736842106, "grad_norm": 1.6399437189102173, "learning_rate": 0.0001, "loss": 0.0129, "step": 174540 }, { "epoch": 1148.3552631578948, "grad_norm": 1.9055781364440918, "learning_rate": 0.0001, "loss": 0.0141, "step": 174550 }, { "epoch": 1148.421052631579, "grad_norm": 0.9460023641586304, "learning_rate": 0.0001, "loss": 0.0126, "step": 174560 }, { "epoch": 1148.4868421052631, "grad_norm": 1.172697901725769, "learning_rate": 0.0001, "loss": 0.0101, "step": 174570 }, { "epoch": 1148.5526315789473, "grad_norm": 1.567223310470581, "learning_rate": 0.0001, "loss": 0.0084, "step": 174580 }, { "epoch": 1148.6184210526317, "grad_norm": 1.6761503219604492, "learning_rate": 0.0001, "loss": 0.0111, "step": 174590 }, { "epoch": 1148.6842105263158, "grad_norm": 1.8621057271957397, "learning_rate": 0.0001, "loss": 0.0142, "step": 174600 }, { "epoch": 1148.75, "grad_norm": 1.671093463897705, "learning_rate": 0.0001, "loss": 0.013, "step": 174610 }, { "epoch": 1148.8157894736842, "grad_norm": 1.3536454439163208, "learning_rate": 0.0001, "loss": 0.0097, "step": 174620 }, { "epoch": 1148.8815789473683, "grad_norm": 1.004010558128357, "learning_rate": 0.0001, "loss": 0.0129, "step": 174630 }, { "epoch": 1148.9473684210527, "grad_norm": 1.4722177982330322, "learning_rate": 0.0001, "loss": 0.0124, "step": 174640 }, { "epoch": 1149.0131578947369, "grad_norm": 1.4859020709991455, "learning_rate": 0.0001, "loss": 0.0119, "step": 174650 }, { "epoch": 1149.078947368421, "grad_norm": 1.199258804321289, "learning_rate": 0.0001, "loss": 0.0165, "step": 174660 }, { "epoch": 1149.1447368421052, "grad_norm": 1.181323766708374, "learning_rate": 0.0001, "loss": 0.01, "step": 174670 }, { "epoch": 1149.2105263157894, "grad_norm": 1.3144196271896362, "learning_rate": 0.0001, "loss": 0.0089, "step": 174680 }, { "epoch": 1149.2763157894738, "grad_norm": 1.4934375286102295, "learning_rate": 0.0001, "loss": 0.0125, "step": 174690 }, { "epoch": 1149.342105263158, "grad_norm": 1.6656079292297363, "learning_rate": 0.0001, "loss": 0.0098, "step": 174700 }, { "epoch": 1149.407894736842, "grad_norm": 1.3578672409057617, "learning_rate": 0.0001, "loss": 0.0105, "step": 174710 }, { "epoch": 1149.4736842105262, "grad_norm": 1.1275330781936646, "learning_rate": 0.0001, "loss": 0.009, "step": 174720 }, { "epoch": 1149.5394736842106, "grad_norm": 1.3341012001037598, "learning_rate": 0.0001, "loss": 0.0093, "step": 174730 }, { "epoch": 1149.6052631578948, "grad_norm": 1.5208731889724731, "learning_rate": 0.0001, "loss": 0.0162, "step": 174740 }, { "epoch": 1149.671052631579, "grad_norm": 1.4287173748016357, "learning_rate": 0.0001, "loss": 0.0126, "step": 174750 }, { "epoch": 1149.7368421052631, "grad_norm": 1.6421583890914917, "learning_rate": 0.0001, "loss": 0.0147, "step": 174760 }, { "epoch": 1149.8026315789473, "grad_norm": 1.0263359546661377, "learning_rate": 0.0001, "loss": 0.0141, "step": 174770 }, { "epoch": 1149.8684210526317, "grad_norm": 1.538923740386963, "learning_rate": 0.0001, "loss": 0.0116, "step": 174780 }, { "epoch": 1149.9342105263158, "grad_norm": 1.4687045812606812, "learning_rate": 0.0001, "loss": 0.0126, "step": 174790 }, { "epoch": 1150.0, "grad_norm": 1.4639639854431152, "learning_rate": 0.0001, "loss": 0.0148, "step": 174800 }, { "epoch": 1150.0657894736842, "grad_norm": 1.3452421426773071, "learning_rate": 0.0001, "loss": 0.0137, "step": 174810 }, { "epoch": 1150.1315789473683, "grad_norm": 1.1853818893432617, "learning_rate": 0.0001, "loss": 0.0124, "step": 174820 }, { "epoch": 1150.1973684210527, "grad_norm": 1.4535669088363647, "learning_rate": 0.0001, "loss": 0.0094, "step": 174830 }, { "epoch": 1150.2631578947369, "grad_norm": 0.9974392652511597, "learning_rate": 0.0001, "loss": 0.0147, "step": 174840 }, { "epoch": 1150.328947368421, "grad_norm": 1.332427978515625, "learning_rate": 0.0001, "loss": 0.0102, "step": 174850 }, { "epoch": 1150.3947368421052, "grad_norm": 1.3708852529525757, "learning_rate": 0.0001, "loss": 0.0107, "step": 174860 }, { "epoch": 1150.4605263157894, "grad_norm": 0.8997687101364136, "learning_rate": 0.0001, "loss": 0.0139, "step": 174870 }, { "epoch": 1150.5263157894738, "grad_norm": 1.3388043642044067, "learning_rate": 0.0001, "loss": 0.0146, "step": 174880 }, { "epoch": 1150.592105263158, "grad_norm": 1.4439517259597778, "learning_rate": 0.0001, "loss": 0.0128, "step": 174890 }, { "epoch": 1150.657894736842, "grad_norm": 1.4301255941390991, "learning_rate": 0.0001, "loss": 0.0113, "step": 174900 }, { "epoch": 1150.7236842105262, "grad_norm": 1.424272894859314, "learning_rate": 0.0001, "loss": 0.0124, "step": 174910 }, { "epoch": 1150.7894736842106, "grad_norm": 1.1851893663406372, "learning_rate": 0.0001, "loss": 0.0108, "step": 174920 }, { "epoch": 1150.8552631578948, "grad_norm": 1.1358842849731445, "learning_rate": 0.0001, "loss": 0.0108, "step": 174930 }, { "epoch": 1150.921052631579, "grad_norm": 1.545984148979187, "learning_rate": 0.0001, "loss": 0.0129, "step": 174940 }, { "epoch": 1150.9868421052631, "grad_norm": 1.3543682098388672, "learning_rate": 0.0001, "loss": 0.0114, "step": 174950 }, { "epoch": 1151.0526315789473, "grad_norm": 1.769384741783142, "learning_rate": 0.0001, "loss": 0.0115, "step": 174960 }, { "epoch": 1151.1184210526317, "grad_norm": 1.7081431150436401, "learning_rate": 0.0001, "loss": 0.0131, "step": 174970 }, { "epoch": 1151.1842105263158, "grad_norm": 1.8476265668869019, "learning_rate": 0.0001, "loss": 0.0162, "step": 174980 }, { "epoch": 1151.25, "grad_norm": 1.178856611251831, "learning_rate": 0.0001, "loss": 0.0106, "step": 174990 }, { "epoch": 1151.3157894736842, "grad_norm": 1.3947796821594238, "learning_rate": 0.0001, "loss": 0.0121, "step": 175000 }, { "epoch": 1151.3815789473683, "grad_norm": 1.354361653327942, "learning_rate": 0.0001, "loss": 0.01, "step": 175010 }, { "epoch": 1151.4473684210527, "grad_norm": 1.2064828872680664, "learning_rate": 0.0001, "loss": 0.0099, "step": 175020 }, { "epoch": 1151.5131578947369, "grad_norm": 1.678396224975586, "learning_rate": 0.0001, "loss": 0.0154, "step": 175030 }, { "epoch": 1151.578947368421, "grad_norm": 1.5952624082565308, "learning_rate": 0.0001, "loss": 0.0117, "step": 175040 }, { "epoch": 1151.6447368421052, "grad_norm": 1.706534504890442, "learning_rate": 0.0001, "loss": 0.0111, "step": 175050 }, { "epoch": 1151.7105263157894, "grad_norm": 1.3436297178268433, "learning_rate": 0.0001, "loss": 0.0133, "step": 175060 }, { "epoch": 1151.7763157894738, "grad_norm": 1.584829330444336, "learning_rate": 0.0001, "loss": 0.0147, "step": 175070 }, { "epoch": 1151.842105263158, "grad_norm": 1.4648338556289673, "learning_rate": 0.0001, "loss": 0.0118, "step": 175080 }, { "epoch": 1151.907894736842, "grad_norm": 1.788306474685669, "learning_rate": 0.0001, "loss": 0.0098, "step": 175090 }, { "epoch": 1151.9736842105262, "grad_norm": 1.3746306896209717, "learning_rate": 0.0001, "loss": 0.0117, "step": 175100 }, { "epoch": 1152.0394736842106, "grad_norm": 1.5717719793319702, "learning_rate": 0.0001, "loss": 0.0136, "step": 175110 }, { "epoch": 1152.1052631578948, "grad_norm": 1.7276618480682373, "learning_rate": 0.0001, "loss": 0.0129, "step": 175120 }, { "epoch": 1152.171052631579, "grad_norm": 1.35426664352417, "learning_rate": 0.0001, "loss": 0.0133, "step": 175130 }, { "epoch": 1152.2368421052631, "grad_norm": 1.194170355796814, "learning_rate": 0.0001, "loss": 0.0134, "step": 175140 }, { "epoch": 1152.3026315789473, "grad_norm": 1.3372957706451416, "learning_rate": 0.0001, "loss": 0.016, "step": 175150 }, { "epoch": 1152.3684210526317, "grad_norm": 1.1894408464431763, "learning_rate": 0.0001, "loss": 0.0123, "step": 175160 }, { "epoch": 1152.4342105263158, "grad_norm": 1.4860903024673462, "learning_rate": 0.0001, "loss": 0.0117, "step": 175170 }, { "epoch": 1152.5, "grad_norm": 1.5848817825317383, "learning_rate": 0.0001, "loss": 0.0094, "step": 175180 }, { "epoch": 1152.5657894736842, "grad_norm": 1.2035346031188965, "learning_rate": 0.0001, "loss": 0.0113, "step": 175190 }, { "epoch": 1152.6315789473683, "grad_norm": 1.4949374198913574, "learning_rate": 0.0001, "loss": 0.0103, "step": 175200 }, { "epoch": 1152.6973684210527, "grad_norm": 1.5129493474960327, "learning_rate": 0.0001, "loss": 0.011, "step": 175210 }, { "epoch": 1152.7631578947369, "grad_norm": 1.4521971940994263, "learning_rate": 0.0001, "loss": 0.0104, "step": 175220 }, { "epoch": 1152.828947368421, "grad_norm": 1.61952543258667, "learning_rate": 0.0001, "loss": 0.0119, "step": 175230 }, { "epoch": 1152.8947368421052, "grad_norm": 1.4537110328674316, "learning_rate": 0.0001, "loss": 0.0121, "step": 175240 }, { "epoch": 1152.9605263157894, "grad_norm": 1.594971776008606, "learning_rate": 0.0001, "loss": 0.0146, "step": 175250 }, { "epoch": 1153.0263157894738, "grad_norm": 1.3130197525024414, "learning_rate": 0.0001, "loss": 0.0109, "step": 175260 }, { "epoch": 1153.092105263158, "grad_norm": 1.6987690925598145, "learning_rate": 0.0001, "loss": 0.0093, "step": 175270 }, { "epoch": 1153.157894736842, "grad_norm": 1.1290346384048462, "learning_rate": 0.0001, "loss": 0.0157, "step": 175280 }, { "epoch": 1153.2236842105262, "grad_norm": 1.823441505432129, "learning_rate": 0.0001, "loss": 0.0102, "step": 175290 }, { "epoch": 1153.2894736842106, "grad_norm": 1.3163156509399414, "learning_rate": 0.0001, "loss": 0.0109, "step": 175300 }, { "epoch": 1153.3552631578948, "grad_norm": 1.2488690614700317, "learning_rate": 0.0001, "loss": 0.0119, "step": 175310 }, { "epoch": 1153.421052631579, "grad_norm": 1.4925785064697266, "learning_rate": 0.0001, "loss": 0.0129, "step": 175320 }, { "epoch": 1153.4868421052631, "grad_norm": 1.6326755285263062, "learning_rate": 0.0001, "loss": 0.018, "step": 175330 }, { "epoch": 1153.5526315789473, "grad_norm": 1.432363510131836, "learning_rate": 0.0001, "loss": 0.0107, "step": 175340 }, { "epoch": 1153.6184210526317, "grad_norm": 1.6722933053970337, "learning_rate": 0.0001, "loss": 0.0148, "step": 175350 }, { "epoch": 1153.6842105263158, "grad_norm": 1.4648281335830688, "learning_rate": 0.0001, "loss": 0.014, "step": 175360 }, { "epoch": 1153.75, "grad_norm": 1.4035356044769287, "learning_rate": 0.0001, "loss": 0.0107, "step": 175370 }, { "epoch": 1153.8157894736842, "grad_norm": 1.5853582620620728, "learning_rate": 0.0001, "loss": 0.0111, "step": 175380 }, { "epoch": 1153.8815789473683, "grad_norm": 1.5304068326950073, "learning_rate": 0.0001, "loss": 0.0164, "step": 175390 }, { "epoch": 1153.9473684210527, "grad_norm": 1.317885398864746, "learning_rate": 0.0001, "loss": 0.0116, "step": 175400 }, { "epoch": 1154.0131578947369, "grad_norm": 1.5530861616134644, "learning_rate": 0.0001, "loss": 0.0097, "step": 175410 }, { "epoch": 1154.078947368421, "grad_norm": 1.4733555316925049, "learning_rate": 0.0001, "loss": 0.0133, "step": 175420 }, { "epoch": 1154.1447368421052, "grad_norm": 1.7174029350280762, "learning_rate": 0.0001, "loss": 0.0127, "step": 175430 }, { "epoch": 1154.2105263157894, "grad_norm": 1.594877004623413, "learning_rate": 0.0001, "loss": 0.0119, "step": 175440 }, { "epoch": 1154.2763157894738, "grad_norm": 1.3447816371917725, "learning_rate": 0.0001, "loss": 0.0124, "step": 175450 }, { "epoch": 1154.342105263158, "grad_norm": 1.695220947265625, "learning_rate": 0.0001, "loss": 0.012, "step": 175460 }, { "epoch": 1154.407894736842, "grad_norm": 1.5623142719268799, "learning_rate": 0.0001, "loss": 0.0139, "step": 175470 }, { "epoch": 1154.4736842105262, "grad_norm": 1.36543869972229, "learning_rate": 0.0001, "loss": 0.0145, "step": 175480 }, { "epoch": 1154.5394736842106, "grad_norm": 1.1202443838119507, "learning_rate": 0.0001, "loss": 0.0141, "step": 175490 }, { "epoch": 1154.6052631578948, "grad_norm": 0.7722774147987366, "learning_rate": 0.0001, "loss": 0.0139, "step": 175500 }, { "epoch": 1154.671052631579, "grad_norm": 1.355323076248169, "learning_rate": 0.0001, "loss": 0.0115, "step": 175510 }, { "epoch": 1154.7368421052631, "grad_norm": 1.3672009706497192, "learning_rate": 0.0001, "loss": 0.0161, "step": 175520 }, { "epoch": 1154.8026315789473, "grad_norm": 1.466096043586731, "learning_rate": 0.0001, "loss": 0.0168, "step": 175530 }, { "epoch": 1154.8684210526317, "grad_norm": 1.4460487365722656, "learning_rate": 0.0001, "loss": 0.0186, "step": 175540 }, { "epoch": 1154.9342105263158, "grad_norm": 1.3295724391937256, "learning_rate": 0.0001, "loss": 0.0143, "step": 175550 }, { "epoch": 1155.0, "grad_norm": 1.3536489009857178, "learning_rate": 0.0001, "loss": 0.0131, "step": 175560 }, { "epoch": 1155.0657894736842, "grad_norm": 1.5138098001480103, "learning_rate": 0.0001, "loss": 0.0143, "step": 175570 }, { "epoch": 1155.1315789473683, "grad_norm": 1.5392322540283203, "learning_rate": 0.0001, "loss": 0.0116, "step": 175580 }, { "epoch": 1155.1973684210527, "grad_norm": 1.426003098487854, "learning_rate": 0.0001, "loss": 0.0093, "step": 175590 }, { "epoch": 1155.2631578947369, "grad_norm": 1.9277855157852173, "learning_rate": 0.0001, "loss": 0.0111, "step": 175600 }, { "epoch": 1155.328947368421, "grad_norm": 1.7536050081253052, "learning_rate": 0.0001, "loss": 0.014, "step": 175610 }, { "epoch": 1155.3947368421052, "grad_norm": 1.021134614944458, "learning_rate": 0.0001, "loss": 0.0112, "step": 175620 }, { "epoch": 1155.4605263157894, "grad_norm": 1.509833574295044, "learning_rate": 0.0001, "loss": 0.0129, "step": 175630 }, { "epoch": 1155.5263157894738, "grad_norm": 1.2765495777130127, "learning_rate": 0.0001, "loss": 0.0116, "step": 175640 }, { "epoch": 1155.592105263158, "grad_norm": 1.4467591047286987, "learning_rate": 0.0001, "loss": 0.015, "step": 175650 }, { "epoch": 1155.657894736842, "grad_norm": 1.6781113147735596, "learning_rate": 0.0001, "loss": 0.0171, "step": 175660 }, { "epoch": 1155.7236842105262, "grad_norm": 1.3353177309036255, "learning_rate": 0.0001, "loss": 0.0123, "step": 175670 }, { "epoch": 1155.7894736842106, "grad_norm": 1.9913198947906494, "learning_rate": 0.0001, "loss": 0.0117, "step": 175680 }, { "epoch": 1155.8552631578948, "grad_norm": 1.6918903589248657, "learning_rate": 0.0001, "loss": 0.0109, "step": 175690 }, { "epoch": 1155.921052631579, "grad_norm": 1.367912769317627, "learning_rate": 0.0001, "loss": 0.0144, "step": 175700 }, { "epoch": 1155.9868421052631, "grad_norm": 1.6351057291030884, "learning_rate": 0.0001, "loss": 0.0104, "step": 175710 }, { "epoch": 1156.0526315789473, "grad_norm": 1.875104308128357, "learning_rate": 0.0001, "loss": 0.0102, "step": 175720 }, { "epoch": 1156.1184210526317, "grad_norm": 1.3919380903244019, "learning_rate": 0.0001, "loss": 0.0136, "step": 175730 }, { "epoch": 1156.1842105263158, "grad_norm": 1.4868285655975342, "learning_rate": 0.0001, "loss": 0.0092, "step": 175740 }, { "epoch": 1156.25, "grad_norm": 1.3439351320266724, "learning_rate": 0.0001, "loss": 0.0128, "step": 175750 }, { "epoch": 1156.3157894736842, "grad_norm": 1.4571852684020996, "learning_rate": 0.0001, "loss": 0.0148, "step": 175760 }, { "epoch": 1156.3815789473683, "grad_norm": 1.1616448163986206, "learning_rate": 0.0001, "loss": 0.0101, "step": 175770 }, { "epoch": 1156.4473684210527, "grad_norm": 1.0528664588928223, "learning_rate": 0.0001, "loss": 0.01, "step": 175780 }, { "epoch": 1156.5131578947369, "grad_norm": 1.2203112840652466, "learning_rate": 0.0001, "loss": 0.0133, "step": 175790 }, { "epoch": 1156.578947368421, "grad_norm": 1.2898924350738525, "learning_rate": 0.0001, "loss": 0.0081, "step": 175800 }, { "epoch": 1156.6447368421052, "grad_norm": 1.3277580738067627, "learning_rate": 0.0001, "loss": 0.0136, "step": 175810 }, { "epoch": 1156.7105263157894, "grad_norm": 1.639156460762024, "learning_rate": 0.0001, "loss": 0.0109, "step": 175820 }, { "epoch": 1156.7763157894738, "grad_norm": 1.2745779752731323, "learning_rate": 0.0001, "loss": 0.0105, "step": 175830 }, { "epoch": 1156.842105263158, "grad_norm": 1.1166177988052368, "learning_rate": 0.0001, "loss": 0.0128, "step": 175840 }, { "epoch": 1156.907894736842, "grad_norm": 1.109490156173706, "learning_rate": 0.0001, "loss": 0.0181, "step": 175850 }, { "epoch": 1156.9736842105262, "grad_norm": 1.409898042678833, "learning_rate": 0.0001, "loss": 0.011, "step": 175860 }, { "epoch": 1157.0394736842106, "grad_norm": 1.4515776634216309, "learning_rate": 0.0001, "loss": 0.0101, "step": 175870 }, { "epoch": 1157.1052631578948, "grad_norm": 1.1081136465072632, "learning_rate": 0.0001, "loss": 0.009, "step": 175880 }, { "epoch": 1157.171052631579, "grad_norm": 1.3675696849822998, "learning_rate": 0.0001, "loss": 0.0115, "step": 175890 }, { "epoch": 1157.2368421052631, "grad_norm": 1.2639594078063965, "learning_rate": 0.0001, "loss": 0.0132, "step": 175900 }, { "epoch": 1157.3026315789473, "grad_norm": 1.1321505308151245, "learning_rate": 0.0001, "loss": 0.0134, "step": 175910 }, { "epoch": 1157.3684210526317, "grad_norm": 1.2572224140167236, "learning_rate": 0.0001, "loss": 0.0116, "step": 175920 }, { "epoch": 1157.4342105263158, "grad_norm": 1.262131929397583, "learning_rate": 0.0001, "loss": 0.0122, "step": 175930 }, { "epoch": 1157.5, "grad_norm": 1.7479702234268188, "learning_rate": 0.0001, "loss": 0.0147, "step": 175940 }, { "epoch": 1157.5657894736842, "grad_norm": 1.520777702331543, "learning_rate": 0.0001, "loss": 0.0114, "step": 175950 }, { "epoch": 1157.6315789473683, "grad_norm": 1.5218570232391357, "learning_rate": 0.0001, "loss": 0.0119, "step": 175960 }, { "epoch": 1157.6973684210527, "grad_norm": 1.6535159349441528, "learning_rate": 0.0001, "loss": 0.0132, "step": 175970 }, { "epoch": 1157.7631578947369, "grad_norm": 1.3221880197525024, "learning_rate": 0.0001, "loss": 0.0097, "step": 175980 }, { "epoch": 1157.828947368421, "grad_norm": 1.5733702182769775, "learning_rate": 0.0001, "loss": 0.015, "step": 175990 }, { "epoch": 1157.8947368421052, "grad_norm": 1.408676266670227, "learning_rate": 0.0001, "loss": 0.0153, "step": 176000 }, { "epoch": 1157.9605263157894, "grad_norm": 1.624991536140442, "learning_rate": 0.0001, "loss": 0.0104, "step": 176010 }, { "epoch": 1158.0263157894738, "grad_norm": 1.2167068719863892, "learning_rate": 0.0001, "loss": 0.0098, "step": 176020 }, { "epoch": 1158.092105263158, "grad_norm": 1.4019873142242432, "learning_rate": 0.0001, "loss": 0.0101, "step": 176030 }, { "epoch": 1158.157894736842, "grad_norm": 1.7521976232528687, "learning_rate": 0.0001, "loss": 0.0087, "step": 176040 }, { "epoch": 1158.2236842105262, "grad_norm": 1.1590869426727295, "learning_rate": 0.0001, "loss": 0.0138, "step": 176050 }, { "epoch": 1158.2894736842106, "grad_norm": 1.4554139375686646, "learning_rate": 0.0001, "loss": 0.0113, "step": 176060 }, { "epoch": 1158.3552631578948, "grad_norm": 1.4550379514694214, "learning_rate": 0.0001, "loss": 0.0119, "step": 176070 }, { "epoch": 1158.421052631579, "grad_norm": 1.7416101694107056, "learning_rate": 0.0001, "loss": 0.0143, "step": 176080 }, { "epoch": 1158.4868421052631, "grad_norm": 1.472220540046692, "learning_rate": 0.0001, "loss": 0.0099, "step": 176090 }, { "epoch": 1158.5526315789473, "grad_norm": 0.9296644330024719, "learning_rate": 0.0001, "loss": 0.0137, "step": 176100 }, { "epoch": 1158.6184210526317, "grad_norm": 1.3504987955093384, "learning_rate": 0.0001, "loss": 0.0131, "step": 176110 }, { "epoch": 1158.6842105263158, "grad_norm": 0.7618977427482605, "learning_rate": 0.0001, "loss": 0.0099, "step": 176120 }, { "epoch": 1158.75, "grad_norm": 1.4062368869781494, "learning_rate": 0.0001, "loss": 0.014, "step": 176130 }, { "epoch": 1158.8157894736842, "grad_norm": 1.087605357170105, "learning_rate": 0.0001, "loss": 0.0091, "step": 176140 }, { "epoch": 1158.8815789473683, "grad_norm": 1.8479574918746948, "learning_rate": 0.0001, "loss": 0.0106, "step": 176150 }, { "epoch": 1158.9473684210527, "grad_norm": 1.4705570936203003, "learning_rate": 0.0001, "loss": 0.0139, "step": 176160 }, { "epoch": 1159.0131578947369, "grad_norm": 1.7680943012237549, "learning_rate": 0.0001, "loss": 0.0163, "step": 176170 }, { "epoch": 1159.078947368421, "grad_norm": 1.560385823249817, "learning_rate": 0.0001, "loss": 0.0096, "step": 176180 }, { "epoch": 1159.1447368421052, "grad_norm": 1.4508188962936401, "learning_rate": 0.0001, "loss": 0.013, "step": 176190 }, { "epoch": 1159.2105263157894, "grad_norm": 1.79560387134552, "learning_rate": 0.0001, "loss": 0.0108, "step": 176200 }, { "epoch": 1159.2763157894738, "grad_norm": 1.7232240438461304, "learning_rate": 0.0001, "loss": 0.0088, "step": 176210 }, { "epoch": 1159.342105263158, "grad_norm": 1.7553902864456177, "learning_rate": 0.0001, "loss": 0.0162, "step": 176220 }, { "epoch": 1159.407894736842, "grad_norm": 1.4313784837722778, "learning_rate": 0.0001, "loss": 0.0119, "step": 176230 }, { "epoch": 1159.4736842105262, "grad_norm": 1.5134189128875732, "learning_rate": 0.0001, "loss": 0.012, "step": 176240 }, { "epoch": 1159.5394736842106, "grad_norm": 1.3664851188659668, "learning_rate": 0.0001, "loss": 0.0116, "step": 176250 }, { "epoch": 1159.6052631578948, "grad_norm": 1.393532395362854, "learning_rate": 0.0001, "loss": 0.012, "step": 176260 }, { "epoch": 1159.671052631579, "grad_norm": 1.7098374366760254, "learning_rate": 0.0001, "loss": 0.0104, "step": 176270 }, { "epoch": 1159.7368421052631, "grad_norm": 1.764613389968872, "learning_rate": 0.0001, "loss": 0.0113, "step": 176280 }, { "epoch": 1159.8026315789473, "grad_norm": 1.3870724439620972, "learning_rate": 0.0001, "loss": 0.0117, "step": 176290 }, { "epoch": 1159.8684210526317, "grad_norm": 1.9134773015975952, "learning_rate": 0.0001, "loss": 0.01, "step": 176300 }, { "epoch": 1159.9342105263158, "grad_norm": 1.5394302606582642, "learning_rate": 0.0001, "loss": 0.0161, "step": 176310 }, { "epoch": 1160.0, "grad_norm": 1.4557392597198486, "learning_rate": 0.0001, "loss": 0.0136, "step": 176320 }, { "epoch": 1160.0657894736842, "grad_norm": 1.3363618850708008, "learning_rate": 0.0001, "loss": 0.0096, "step": 176330 }, { "epoch": 1160.1315789473683, "grad_norm": 1.5180895328521729, "learning_rate": 0.0001, "loss": 0.0118, "step": 176340 }, { "epoch": 1160.1973684210527, "grad_norm": 1.453987717628479, "learning_rate": 0.0001, "loss": 0.012, "step": 176350 }, { "epoch": 1160.2631578947369, "grad_norm": 1.448307752609253, "learning_rate": 0.0001, "loss": 0.0117, "step": 176360 }, { "epoch": 1160.328947368421, "grad_norm": 1.3806403875350952, "learning_rate": 0.0001, "loss": 0.0097, "step": 176370 }, { "epoch": 1160.3947368421052, "grad_norm": 1.7019217014312744, "learning_rate": 0.0001, "loss": 0.0137, "step": 176380 }, { "epoch": 1160.4605263157894, "grad_norm": 1.5181480646133423, "learning_rate": 0.0001, "loss": 0.0112, "step": 176390 }, { "epoch": 1160.5263157894738, "grad_norm": 1.278814673423767, "learning_rate": 0.0001, "loss": 0.0087, "step": 176400 }, { "epoch": 1160.592105263158, "grad_norm": 1.5402175188064575, "learning_rate": 0.0001, "loss": 0.0109, "step": 176410 }, { "epoch": 1160.657894736842, "grad_norm": 1.316131353378296, "learning_rate": 0.0001, "loss": 0.0128, "step": 176420 }, { "epoch": 1160.7236842105262, "grad_norm": 1.316079020500183, "learning_rate": 0.0001, "loss": 0.0157, "step": 176430 }, { "epoch": 1160.7894736842106, "grad_norm": 1.8491196632385254, "learning_rate": 0.0001, "loss": 0.0126, "step": 176440 }, { "epoch": 1160.8552631578948, "grad_norm": 1.4479775428771973, "learning_rate": 0.0001, "loss": 0.0114, "step": 176450 }, { "epoch": 1160.921052631579, "grad_norm": 1.5741008520126343, "learning_rate": 0.0001, "loss": 0.0086, "step": 176460 }, { "epoch": 1160.9868421052631, "grad_norm": 1.1090642213821411, "learning_rate": 0.0001, "loss": 0.0123, "step": 176470 }, { "epoch": 1161.0526315789473, "grad_norm": 1.3371107578277588, "learning_rate": 0.0001, "loss": 0.0128, "step": 176480 }, { "epoch": 1161.1184210526317, "grad_norm": 1.308225393295288, "learning_rate": 0.0001, "loss": 0.0126, "step": 176490 }, { "epoch": 1161.1842105263158, "grad_norm": 1.1582967042922974, "learning_rate": 0.0001, "loss": 0.0146, "step": 176500 }, { "epoch": 1161.25, "grad_norm": 1.594375729560852, "learning_rate": 0.0001, "loss": 0.0117, "step": 176510 }, { "epoch": 1161.3157894736842, "grad_norm": 1.474657416343689, "learning_rate": 0.0001, "loss": 0.0094, "step": 176520 }, { "epoch": 1161.3815789473683, "grad_norm": 1.2020033597946167, "learning_rate": 0.0001, "loss": 0.013, "step": 176530 }, { "epoch": 1161.4473684210527, "grad_norm": 1.1011885404586792, "learning_rate": 0.0001, "loss": 0.0126, "step": 176540 }, { "epoch": 1161.5131578947369, "grad_norm": 1.3815133571624756, "learning_rate": 0.0001, "loss": 0.0123, "step": 176550 }, { "epoch": 1161.578947368421, "grad_norm": 1.8477903604507446, "learning_rate": 0.0001, "loss": 0.009, "step": 176560 }, { "epoch": 1161.6447368421052, "grad_norm": 0.9769545793533325, "learning_rate": 0.0001, "loss": 0.0138, "step": 176570 }, { "epoch": 1161.7105263157894, "grad_norm": 1.442308783531189, "learning_rate": 0.0001, "loss": 0.0092, "step": 176580 }, { "epoch": 1161.7763157894738, "grad_norm": 1.924320936203003, "learning_rate": 0.0001, "loss": 0.0105, "step": 176590 }, { "epoch": 1161.842105263158, "grad_norm": 1.5450575351715088, "learning_rate": 0.0001, "loss": 0.011, "step": 176600 }, { "epoch": 1161.907894736842, "grad_norm": 1.447658658027649, "learning_rate": 0.0001, "loss": 0.013, "step": 176610 }, { "epoch": 1161.9736842105262, "grad_norm": 1.753088355064392, "learning_rate": 0.0001, "loss": 0.0158, "step": 176620 }, { "epoch": 1162.0394736842106, "grad_norm": 1.8470854759216309, "learning_rate": 0.0001, "loss": 0.0104, "step": 176630 }, { "epoch": 1162.1052631578948, "grad_norm": 1.3121730089187622, "learning_rate": 0.0001, "loss": 0.0168, "step": 176640 }, { "epoch": 1162.171052631579, "grad_norm": 1.0868992805480957, "learning_rate": 0.0001, "loss": 0.0109, "step": 176650 }, { "epoch": 1162.2368421052631, "grad_norm": 1.13553786277771, "learning_rate": 0.0001, "loss": 0.011, "step": 176660 }, { "epoch": 1162.3026315789473, "grad_norm": 1.4005696773529053, "learning_rate": 0.0001, "loss": 0.0149, "step": 176670 }, { "epoch": 1162.3684210526317, "grad_norm": 1.1013069152832031, "learning_rate": 0.0001, "loss": 0.0093, "step": 176680 }, { "epoch": 1162.4342105263158, "grad_norm": 1.8839763402938843, "learning_rate": 0.0001, "loss": 0.0144, "step": 176690 }, { "epoch": 1162.5, "grad_norm": 1.4373282194137573, "learning_rate": 0.0001, "loss": 0.013, "step": 176700 }, { "epoch": 1162.5657894736842, "grad_norm": 1.0190247297286987, "learning_rate": 0.0001, "loss": 0.016, "step": 176710 }, { "epoch": 1162.6315789473683, "grad_norm": 1.1101574897766113, "learning_rate": 0.0001, "loss": 0.009, "step": 176720 }, { "epoch": 1162.6973684210527, "grad_norm": 1.9073596000671387, "learning_rate": 0.0001, "loss": 0.0105, "step": 176730 }, { "epoch": 1162.7631578947369, "grad_norm": 1.2073019742965698, "learning_rate": 0.0001, "loss": 0.0101, "step": 176740 }, { "epoch": 1162.828947368421, "grad_norm": 1.2656586170196533, "learning_rate": 0.0001, "loss": 0.0083, "step": 176750 }, { "epoch": 1162.8947368421052, "grad_norm": 1.3661689758300781, "learning_rate": 0.0001, "loss": 0.015, "step": 176760 }, { "epoch": 1162.9605263157894, "grad_norm": 1.4266762733459473, "learning_rate": 0.0001, "loss": 0.0124, "step": 176770 }, { "epoch": 1163.0263157894738, "grad_norm": 1.594706416130066, "learning_rate": 0.0001, "loss": 0.0122, "step": 176780 }, { "epoch": 1163.092105263158, "grad_norm": 1.5538355112075806, "learning_rate": 0.0001, "loss": 0.0129, "step": 176790 }, { "epoch": 1163.157894736842, "grad_norm": 1.2161448001861572, "learning_rate": 0.0001, "loss": 0.0146, "step": 176800 }, { "epoch": 1163.2236842105262, "grad_norm": 1.7867112159729004, "learning_rate": 0.0001, "loss": 0.0135, "step": 176810 }, { "epoch": 1163.2894736842106, "grad_norm": 1.4101911783218384, "learning_rate": 0.0001, "loss": 0.0138, "step": 176820 }, { "epoch": 1163.3552631578948, "grad_norm": 1.3999425172805786, "learning_rate": 0.0001, "loss": 0.016, "step": 176830 }, { "epoch": 1163.421052631579, "grad_norm": 1.7015836238861084, "learning_rate": 0.0001, "loss": 0.0144, "step": 176840 }, { "epoch": 1163.4868421052631, "grad_norm": 1.2179269790649414, "learning_rate": 0.0001, "loss": 0.0098, "step": 176850 }, { "epoch": 1163.5526315789473, "grad_norm": 1.4226808547973633, "learning_rate": 0.0001, "loss": 0.0083, "step": 176860 }, { "epoch": 1163.6184210526317, "grad_norm": 1.4528636932373047, "learning_rate": 0.0001, "loss": 0.0099, "step": 176870 }, { "epoch": 1163.6842105263158, "grad_norm": 1.361203670501709, "learning_rate": 0.0001, "loss": 0.0118, "step": 176880 }, { "epoch": 1163.75, "grad_norm": 1.7640613317489624, "learning_rate": 0.0001, "loss": 0.0089, "step": 176890 }, { "epoch": 1163.8157894736842, "grad_norm": 1.5463091135025024, "learning_rate": 0.0001, "loss": 0.0111, "step": 176900 }, { "epoch": 1163.8815789473683, "grad_norm": 1.6100932359695435, "learning_rate": 0.0001, "loss": 0.012, "step": 176910 }, { "epoch": 1163.9473684210527, "grad_norm": 1.2899532318115234, "learning_rate": 0.0001, "loss": 0.0127, "step": 176920 }, { "epoch": 1164.0131578947369, "grad_norm": 1.7148686647415161, "learning_rate": 0.0001, "loss": 0.0108, "step": 176930 }, { "epoch": 1164.078947368421, "grad_norm": 1.0450801849365234, "learning_rate": 0.0001, "loss": 0.011, "step": 176940 }, { "epoch": 1164.1447368421052, "grad_norm": 1.5509226322174072, "learning_rate": 0.0001, "loss": 0.0097, "step": 176950 }, { "epoch": 1164.2105263157894, "grad_norm": 1.5433064699172974, "learning_rate": 0.0001, "loss": 0.0109, "step": 176960 }, { "epoch": 1164.2763157894738, "grad_norm": 1.2671968936920166, "learning_rate": 0.0001, "loss": 0.0117, "step": 176970 }, { "epoch": 1164.342105263158, "grad_norm": 1.3687937259674072, "learning_rate": 0.0001, "loss": 0.0115, "step": 176980 }, { "epoch": 1164.407894736842, "grad_norm": 1.3780088424682617, "learning_rate": 0.0001, "loss": 0.0117, "step": 176990 }, { "epoch": 1164.4736842105262, "grad_norm": 1.145357608795166, "learning_rate": 0.0001, "loss": 0.0165, "step": 177000 }, { "epoch": 1164.5394736842106, "grad_norm": 1.5568561553955078, "learning_rate": 0.0001, "loss": 0.0096, "step": 177010 }, { "epoch": 1164.6052631578948, "grad_norm": 1.3285624980926514, "learning_rate": 0.0001, "loss": 0.0106, "step": 177020 }, { "epoch": 1164.671052631579, "grad_norm": 1.4689173698425293, "learning_rate": 0.0001, "loss": 0.0149, "step": 177030 }, { "epoch": 1164.7368421052631, "grad_norm": 1.3829624652862549, "learning_rate": 0.0001, "loss": 0.0133, "step": 177040 }, { "epoch": 1164.8026315789473, "grad_norm": 1.703649878501892, "learning_rate": 0.0001, "loss": 0.0096, "step": 177050 }, { "epoch": 1164.8684210526317, "grad_norm": 1.444873332977295, "learning_rate": 0.0001, "loss": 0.0157, "step": 177060 }, { "epoch": 1164.9342105263158, "grad_norm": 1.8363596200942993, "learning_rate": 0.0001, "loss": 0.0104, "step": 177070 }, { "epoch": 1165.0, "grad_norm": 1.443617820739746, "learning_rate": 0.0001, "loss": 0.0163, "step": 177080 }, { "epoch": 1165.0657894736842, "grad_norm": 1.6398000717163086, "learning_rate": 0.0001, "loss": 0.009, "step": 177090 }, { "epoch": 1165.1315789473683, "grad_norm": 1.479915976524353, "learning_rate": 0.0001, "loss": 0.0134, "step": 177100 }, { "epoch": 1165.1973684210527, "grad_norm": 1.481320858001709, "learning_rate": 0.0001, "loss": 0.0161, "step": 177110 }, { "epoch": 1165.2631578947369, "grad_norm": 1.3490216732025146, "learning_rate": 0.0001, "loss": 0.0143, "step": 177120 }, { "epoch": 1165.328947368421, "grad_norm": 1.9635274410247803, "learning_rate": 0.0001, "loss": 0.0129, "step": 177130 }, { "epoch": 1165.3947368421052, "grad_norm": 1.218561053276062, "learning_rate": 0.0001, "loss": 0.0108, "step": 177140 }, { "epoch": 1165.4605263157894, "grad_norm": 1.5639996528625488, "learning_rate": 0.0001, "loss": 0.0126, "step": 177150 }, { "epoch": 1165.5263157894738, "grad_norm": 0.9338251352310181, "learning_rate": 0.0001, "loss": 0.0123, "step": 177160 }, { "epoch": 1165.592105263158, "grad_norm": 1.5798596143722534, "learning_rate": 0.0001, "loss": 0.0112, "step": 177170 }, { "epoch": 1165.657894736842, "grad_norm": 1.467841386795044, "learning_rate": 0.0001, "loss": 0.0103, "step": 177180 }, { "epoch": 1165.7236842105262, "grad_norm": 1.2548426389694214, "learning_rate": 0.0001, "loss": 0.0133, "step": 177190 }, { "epoch": 1165.7894736842106, "grad_norm": 1.0295097827911377, "learning_rate": 0.0001, "loss": 0.009, "step": 177200 }, { "epoch": 1165.8552631578948, "grad_norm": 0.9640434384346008, "learning_rate": 0.0001, "loss": 0.0102, "step": 177210 }, { "epoch": 1165.921052631579, "grad_norm": 1.4043864011764526, "learning_rate": 0.0001, "loss": 0.0121, "step": 177220 }, { "epoch": 1165.9868421052631, "grad_norm": 1.2837684154510498, "learning_rate": 0.0001, "loss": 0.0116, "step": 177230 }, { "epoch": 1166.0526315789473, "grad_norm": 1.3581486940383911, "learning_rate": 0.0001, "loss": 0.0161, "step": 177240 }, { "epoch": 1166.1184210526317, "grad_norm": 1.4707919359207153, "learning_rate": 0.0001, "loss": 0.0092, "step": 177250 }, { "epoch": 1166.1842105263158, "grad_norm": 1.2018218040466309, "learning_rate": 0.0001, "loss": 0.0104, "step": 177260 }, { "epoch": 1166.25, "grad_norm": 1.3571407794952393, "learning_rate": 0.0001, "loss": 0.0125, "step": 177270 }, { "epoch": 1166.3157894736842, "grad_norm": 1.5504543781280518, "learning_rate": 0.0001, "loss": 0.0102, "step": 177280 }, { "epoch": 1166.3815789473683, "grad_norm": 1.4214692115783691, "learning_rate": 0.0001, "loss": 0.014, "step": 177290 }, { "epoch": 1166.4473684210527, "grad_norm": 1.51360285282135, "learning_rate": 0.0001, "loss": 0.0096, "step": 177300 }, { "epoch": 1166.5131578947369, "grad_norm": 1.4512224197387695, "learning_rate": 0.0001, "loss": 0.0154, "step": 177310 }, { "epoch": 1166.578947368421, "grad_norm": 1.5246403217315674, "learning_rate": 0.0001, "loss": 0.0112, "step": 177320 }, { "epoch": 1166.6447368421052, "grad_norm": 1.092790126800537, "learning_rate": 0.0001, "loss": 0.0114, "step": 177330 }, { "epoch": 1166.7105263157894, "grad_norm": 1.1998109817504883, "learning_rate": 0.0001, "loss": 0.0117, "step": 177340 }, { "epoch": 1166.7763157894738, "grad_norm": 1.7526781558990479, "learning_rate": 0.0001, "loss": 0.0124, "step": 177350 }, { "epoch": 1166.842105263158, "grad_norm": 1.5543334484100342, "learning_rate": 0.0001, "loss": 0.0143, "step": 177360 }, { "epoch": 1166.907894736842, "grad_norm": 1.4534173011779785, "learning_rate": 0.0001, "loss": 0.0101, "step": 177370 }, { "epoch": 1166.9736842105262, "grad_norm": 0.9187477827072144, "learning_rate": 0.0001, "loss": 0.0107, "step": 177380 }, { "epoch": 1167.0394736842106, "grad_norm": 1.2927989959716797, "learning_rate": 0.0001, "loss": 0.0119, "step": 177390 }, { "epoch": 1167.1052631578948, "grad_norm": 1.2378191947937012, "learning_rate": 0.0001, "loss": 0.0133, "step": 177400 }, { "epoch": 1167.171052631579, "grad_norm": 1.3288905620574951, "learning_rate": 0.0001, "loss": 0.0127, "step": 177410 }, { "epoch": 1167.2368421052631, "grad_norm": 1.539038896560669, "learning_rate": 0.0001, "loss": 0.013, "step": 177420 }, { "epoch": 1167.3026315789473, "grad_norm": 1.4469225406646729, "learning_rate": 0.0001, "loss": 0.0163, "step": 177430 }, { "epoch": 1167.3684210526317, "grad_norm": 1.3198143243789673, "learning_rate": 0.0001, "loss": 0.0107, "step": 177440 }, { "epoch": 1167.4342105263158, "grad_norm": 1.4767965078353882, "learning_rate": 0.0001, "loss": 0.0131, "step": 177450 }, { "epoch": 1167.5, "grad_norm": 1.3074194192886353, "learning_rate": 0.0001, "loss": 0.0125, "step": 177460 }, { "epoch": 1167.5657894736842, "grad_norm": 1.1386024951934814, "learning_rate": 0.0001, "loss": 0.0098, "step": 177470 }, { "epoch": 1167.6315789473683, "grad_norm": 1.1425807476043701, "learning_rate": 0.0001, "loss": 0.0155, "step": 177480 }, { "epoch": 1167.6973684210527, "grad_norm": 1.861998200416565, "learning_rate": 0.0001, "loss": 0.0101, "step": 177490 }, { "epoch": 1167.7631578947369, "grad_norm": 1.3653815984725952, "learning_rate": 0.0001, "loss": 0.0087, "step": 177500 }, { "epoch": 1167.828947368421, "grad_norm": 1.8498955965042114, "learning_rate": 0.0001, "loss": 0.0114, "step": 177510 }, { "epoch": 1167.8947368421052, "grad_norm": 1.7605525255203247, "learning_rate": 0.0001, "loss": 0.0092, "step": 177520 }, { "epoch": 1167.9605263157894, "grad_norm": 1.3949675559997559, "learning_rate": 0.0001, "loss": 0.0131, "step": 177530 }, { "epoch": 1168.0263157894738, "grad_norm": 1.0367738008499146, "learning_rate": 0.0001, "loss": 0.01, "step": 177540 }, { "epoch": 1168.092105263158, "grad_norm": 1.28173828125, "learning_rate": 0.0001, "loss": 0.0168, "step": 177550 }, { "epoch": 1168.157894736842, "grad_norm": 1.4256985187530518, "learning_rate": 0.0001, "loss": 0.0091, "step": 177560 }, { "epoch": 1168.2236842105262, "grad_norm": 0.9823119640350342, "learning_rate": 0.0001, "loss": 0.0135, "step": 177570 }, { "epoch": 1168.2894736842106, "grad_norm": 1.2208877801895142, "learning_rate": 0.0001, "loss": 0.0165, "step": 177580 }, { "epoch": 1168.3552631578948, "grad_norm": 1.3015801906585693, "learning_rate": 0.0001, "loss": 0.0118, "step": 177590 }, { "epoch": 1168.421052631579, "grad_norm": 1.1971583366394043, "learning_rate": 0.0001, "loss": 0.0102, "step": 177600 }, { "epoch": 1168.4868421052631, "grad_norm": 1.3127185106277466, "learning_rate": 0.0001, "loss": 0.0099, "step": 177610 }, { "epoch": 1168.5526315789473, "grad_norm": 1.3602441549301147, "learning_rate": 0.0001, "loss": 0.0119, "step": 177620 }, { "epoch": 1168.6184210526317, "grad_norm": 1.492967963218689, "learning_rate": 0.0001, "loss": 0.0143, "step": 177630 }, { "epoch": 1168.6842105263158, "grad_norm": 1.4230343103408813, "learning_rate": 0.0001, "loss": 0.0114, "step": 177640 }, { "epoch": 1168.75, "grad_norm": 1.5230165719985962, "learning_rate": 0.0001, "loss": 0.0089, "step": 177650 }, { "epoch": 1168.8157894736842, "grad_norm": 1.287248969078064, "learning_rate": 0.0001, "loss": 0.01, "step": 177660 }, { "epoch": 1168.8815789473683, "grad_norm": 1.5134950876235962, "learning_rate": 0.0001, "loss": 0.0093, "step": 177670 }, { "epoch": 1168.9473684210527, "grad_norm": 1.5340601205825806, "learning_rate": 0.0001, "loss": 0.0122, "step": 177680 }, { "epoch": 1169.0131578947369, "grad_norm": 1.328674554824829, "learning_rate": 0.0001, "loss": 0.0139, "step": 177690 }, { "epoch": 1169.078947368421, "grad_norm": 1.4592010974884033, "learning_rate": 0.0001, "loss": 0.0131, "step": 177700 }, { "epoch": 1169.1447368421052, "grad_norm": 1.0156333446502686, "learning_rate": 0.0001, "loss": 0.0138, "step": 177710 }, { "epoch": 1169.2105263157894, "grad_norm": 1.108156442642212, "learning_rate": 0.0001, "loss": 0.0097, "step": 177720 }, { "epoch": 1169.2763157894738, "grad_norm": 1.4632837772369385, "learning_rate": 0.0001, "loss": 0.0099, "step": 177730 }, { "epoch": 1169.342105263158, "grad_norm": 1.280709147453308, "learning_rate": 0.0001, "loss": 0.0137, "step": 177740 }, { "epoch": 1169.407894736842, "grad_norm": 1.6403154134750366, "learning_rate": 0.0001, "loss": 0.015, "step": 177750 }, { "epoch": 1169.4736842105262, "grad_norm": 1.0618095397949219, "learning_rate": 0.0001, "loss": 0.0116, "step": 177760 }, { "epoch": 1169.5394736842106, "grad_norm": 1.5505964756011963, "learning_rate": 0.0001, "loss": 0.0104, "step": 177770 }, { "epoch": 1169.6052631578948, "grad_norm": 1.3844820261001587, "learning_rate": 0.0001, "loss": 0.0117, "step": 177780 }, { "epoch": 1169.671052631579, "grad_norm": 1.5933821201324463, "learning_rate": 0.0001, "loss": 0.0098, "step": 177790 }, { "epoch": 1169.7368421052631, "grad_norm": 1.532155990600586, "learning_rate": 0.0001, "loss": 0.0125, "step": 177800 }, { "epoch": 1169.8026315789473, "grad_norm": 1.687004566192627, "learning_rate": 0.0001, "loss": 0.0159, "step": 177810 }, { "epoch": 1169.8684210526317, "grad_norm": 1.1050459146499634, "learning_rate": 0.0001, "loss": 0.0089, "step": 177820 }, { "epoch": 1169.9342105263158, "grad_norm": 1.8222657442092896, "learning_rate": 0.0001, "loss": 0.0106, "step": 177830 }, { "epoch": 1170.0, "grad_norm": 1.764248251914978, "learning_rate": 0.0001, "loss": 0.0112, "step": 177840 }, { "epoch": 1170.0657894736842, "grad_norm": 1.7306239604949951, "learning_rate": 0.0001, "loss": 0.0099, "step": 177850 }, { "epoch": 1170.1315789473683, "grad_norm": 0.9984796047210693, "learning_rate": 0.0001, "loss": 0.0099, "step": 177860 }, { "epoch": 1170.1973684210527, "grad_norm": 1.2116321325302124, "learning_rate": 0.0001, "loss": 0.0083, "step": 177870 }, { "epoch": 1170.2631578947369, "grad_norm": 1.4124023914337158, "learning_rate": 0.0001, "loss": 0.0137, "step": 177880 }, { "epoch": 1170.328947368421, "grad_norm": 1.4005061388015747, "learning_rate": 0.0001, "loss": 0.0111, "step": 177890 }, { "epoch": 1170.3947368421052, "grad_norm": 1.2745976448059082, "learning_rate": 0.0001, "loss": 0.0108, "step": 177900 }, { "epoch": 1170.4605263157894, "grad_norm": 1.1205390691757202, "learning_rate": 0.0001, "loss": 0.012, "step": 177910 }, { "epoch": 1170.5263157894738, "grad_norm": 1.3386731147766113, "learning_rate": 0.0001, "loss": 0.0119, "step": 177920 }, { "epoch": 1170.592105263158, "grad_norm": 1.422613263130188, "learning_rate": 0.0001, "loss": 0.0144, "step": 177930 }, { "epoch": 1170.657894736842, "grad_norm": 1.7210954427719116, "learning_rate": 0.0001, "loss": 0.0136, "step": 177940 }, { "epoch": 1170.7236842105262, "grad_norm": 1.6146125793457031, "learning_rate": 0.0001, "loss": 0.0129, "step": 177950 }, { "epoch": 1170.7894736842106, "grad_norm": 1.2416328191757202, "learning_rate": 0.0001, "loss": 0.0125, "step": 177960 }, { "epoch": 1170.8552631578948, "grad_norm": 1.1275485754013062, "learning_rate": 0.0001, "loss": 0.0121, "step": 177970 }, { "epoch": 1170.921052631579, "grad_norm": 1.1781346797943115, "learning_rate": 0.0001, "loss": 0.0118, "step": 177980 }, { "epoch": 1170.9868421052631, "grad_norm": 1.4293204545974731, "learning_rate": 0.0001, "loss": 0.0126, "step": 177990 }, { "epoch": 1171.0526315789473, "grad_norm": 1.2799476385116577, "learning_rate": 0.0001, "loss": 0.0147, "step": 178000 }, { "epoch": 1171.1184210526317, "grad_norm": 1.417135238647461, "learning_rate": 0.0001, "loss": 0.0088, "step": 178010 }, { "epoch": 1171.1842105263158, "grad_norm": 1.2723448276519775, "learning_rate": 0.0001, "loss": 0.0128, "step": 178020 }, { "epoch": 1171.25, "grad_norm": 1.346282958984375, "learning_rate": 0.0001, "loss": 0.0113, "step": 178030 }, { "epoch": 1171.3157894736842, "grad_norm": 1.567480444908142, "learning_rate": 0.0001, "loss": 0.0112, "step": 178040 }, { "epoch": 1171.3815789473683, "grad_norm": 1.5547620058059692, "learning_rate": 0.0001, "loss": 0.0116, "step": 178050 }, { "epoch": 1171.4473684210527, "grad_norm": 1.2216962575912476, "learning_rate": 0.0001, "loss": 0.0103, "step": 178060 }, { "epoch": 1171.5131578947369, "grad_norm": 1.5525908470153809, "learning_rate": 0.0001, "loss": 0.0134, "step": 178070 }, { "epoch": 1171.578947368421, "grad_norm": 1.531211495399475, "learning_rate": 0.0001, "loss": 0.0174, "step": 178080 }, { "epoch": 1171.6447368421052, "grad_norm": 1.824846863746643, "learning_rate": 0.0001, "loss": 0.0105, "step": 178090 }, { "epoch": 1171.7105263157894, "grad_norm": 1.0805151462554932, "learning_rate": 0.0001, "loss": 0.0118, "step": 178100 }, { "epoch": 1171.7763157894738, "grad_norm": 1.2558516263961792, "learning_rate": 0.0001, "loss": 0.0143, "step": 178110 }, { "epoch": 1171.842105263158, "grad_norm": 1.7016905546188354, "learning_rate": 0.0001, "loss": 0.0108, "step": 178120 }, { "epoch": 1171.907894736842, "grad_norm": 1.6502832174301147, "learning_rate": 0.0001, "loss": 0.0097, "step": 178130 }, { "epoch": 1171.9736842105262, "grad_norm": 1.5950132608413696, "learning_rate": 0.0001, "loss": 0.0126, "step": 178140 }, { "epoch": 1172.0394736842106, "grad_norm": 0.9532908201217651, "learning_rate": 0.0001, "loss": 0.013, "step": 178150 }, { "epoch": 1172.1052631578948, "grad_norm": 1.142980933189392, "learning_rate": 0.0001, "loss": 0.0141, "step": 178160 }, { "epoch": 1172.171052631579, "grad_norm": 1.5589685440063477, "learning_rate": 0.0001, "loss": 0.0093, "step": 178170 }, { "epoch": 1172.2368421052631, "grad_norm": 1.569337010383606, "learning_rate": 0.0001, "loss": 0.0093, "step": 178180 }, { "epoch": 1172.3026315789473, "grad_norm": 1.4236246347427368, "learning_rate": 0.0001, "loss": 0.0115, "step": 178190 }, { "epoch": 1172.3684210526317, "grad_norm": 1.3871781826019287, "learning_rate": 0.0001, "loss": 0.0162, "step": 178200 }, { "epoch": 1172.4342105263158, "grad_norm": 1.9947973489761353, "learning_rate": 0.0001, "loss": 0.0112, "step": 178210 }, { "epoch": 1172.5, "grad_norm": 1.907113790512085, "learning_rate": 0.0001, "loss": 0.0099, "step": 178220 }, { "epoch": 1172.5657894736842, "grad_norm": 1.6217637062072754, "learning_rate": 0.0001, "loss": 0.0112, "step": 178230 }, { "epoch": 1172.6315789473683, "grad_norm": 1.5230531692504883, "learning_rate": 0.0001, "loss": 0.0111, "step": 178240 }, { "epoch": 1172.6973684210527, "grad_norm": 1.6762244701385498, "learning_rate": 0.0001, "loss": 0.0087, "step": 178250 }, { "epoch": 1172.7631578947369, "grad_norm": 1.2630763053894043, "learning_rate": 0.0001, "loss": 0.0118, "step": 178260 }, { "epoch": 1172.828947368421, "grad_norm": 1.634730577468872, "learning_rate": 0.0001, "loss": 0.0121, "step": 178270 }, { "epoch": 1172.8947368421052, "grad_norm": 1.6935391426086426, "learning_rate": 0.0001, "loss": 0.0153, "step": 178280 }, { "epoch": 1172.9605263157894, "grad_norm": 1.390713095664978, "learning_rate": 0.0001, "loss": 0.0131, "step": 178290 }, { "epoch": 1173.0263157894738, "grad_norm": 1.0206769704818726, "learning_rate": 0.0001, "loss": 0.0127, "step": 178300 }, { "epoch": 1173.092105263158, "grad_norm": 1.7077986001968384, "learning_rate": 0.0001, "loss": 0.0103, "step": 178310 }, { "epoch": 1173.157894736842, "grad_norm": 1.1125541925430298, "learning_rate": 0.0001, "loss": 0.0131, "step": 178320 }, { "epoch": 1173.2236842105262, "grad_norm": 1.466005563735962, "learning_rate": 0.0001, "loss": 0.012, "step": 178330 }, { "epoch": 1173.2894736842106, "grad_norm": 1.6692843437194824, "learning_rate": 0.0001, "loss": 0.0109, "step": 178340 }, { "epoch": 1173.3552631578948, "grad_norm": 1.5181567668914795, "learning_rate": 0.0001, "loss": 0.0117, "step": 178350 }, { "epoch": 1173.421052631579, "grad_norm": 1.4606913328170776, "learning_rate": 0.0001, "loss": 0.014, "step": 178360 }, { "epoch": 1173.4868421052631, "grad_norm": 1.6841099262237549, "learning_rate": 0.0001, "loss": 0.0097, "step": 178370 }, { "epoch": 1173.5526315789473, "grad_norm": 1.3849406242370605, "learning_rate": 0.0001, "loss": 0.0123, "step": 178380 }, { "epoch": 1173.6184210526317, "grad_norm": 1.729057788848877, "learning_rate": 0.0001, "loss": 0.0128, "step": 178390 }, { "epoch": 1173.6842105263158, "grad_norm": 1.5269122123718262, "learning_rate": 0.0001, "loss": 0.0101, "step": 178400 }, { "epoch": 1173.75, "grad_norm": 1.234616994857788, "learning_rate": 0.0001, "loss": 0.011, "step": 178410 }, { "epoch": 1173.8157894736842, "grad_norm": 0.9467893838882446, "learning_rate": 0.0001, "loss": 0.0125, "step": 178420 }, { "epoch": 1173.8815789473683, "grad_norm": 1.0340499877929688, "learning_rate": 0.0001, "loss": 0.0111, "step": 178430 }, { "epoch": 1173.9473684210527, "grad_norm": 0.8377929925918579, "learning_rate": 0.0001, "loss": 0.0126, "step": 178440 }, { "epoch": 1174.0131578947369, "grad_norm": 1.5118622779846191, "learning_rate": 0.0001, "loss": 0.0123, "step": 178450 }, { "epoch": 1174.078947368421, "grad_norm": 1.2054342031478882, "learning_rate": 0.0001, "loss": 0.0135, "step": 178460 }, { "epoch": 1174.1447368421052, "grad_norm": 1.0646848678588867, "learning_rate": 0.0001, "loss": 0.0117, "step": 178470 }, { "epoch": 1174.2105263157894, "grad_norm": 1.363682508468628, "learning_rate": 0.0001, "loss": 0.0139, "step": 178480 }, { "epoch": 1174.2763157894738, "grad_norm": 1.1567845344543457, "learning_rate": 0.0001, "loss": 0.0127, "step": 178490 }, { "epoch": 1174.342105263158, "grad_norm": 1.5365349054336548, "learning_rate": 0.0001, "loss": 0.0101, "step": 178500 }, { "epoch": 1174.407894736842, "grad_norm": 1.2465460300445557, "learning_rate": 0.0001, "loss": 0.0146, "step": 178510 }, { "epoch": 1174.4736842105262, "grad_norm": 1.7379969358444214, "learning_rate": 0.0001, "loss": 0.012, "step": 178520 }, { "epoch": 1174.5394736842106, "grad_norm": 1.944411277770996, "learning_rate": 0.0001, "loss": 0.01, "step": 178530 }, { "epoch": 1174.6052631578948, "grad_norm": 1.285022258758545, "learning_rate": 0.0001, "loss": 0.0146, "step": 178540 }, { "epoch": 1174.671052631579, "grad_norm": 1.5425459146499634, "learning_rate": 0.0001, "loss": 0.0122, "step": 178550 }, { "epoch": 1174.7368421052631, "grad_norm": 1.6126500368118286, "learning_rate": 0.0001, "loss": 0.0107, "step": 178560 }, { "epoch": 1174.8026315789473, "grad_norm": 1.102604866027832, "learning_rate": 0.0001, "loss": 0.0132, "step": 178570 }, { "epoch": 1174.8684210526317, "grad_norm": 1.2446011304855347, "learning_rate": 0.0001, "loss": 0.0106, "step": 178580 }, { "epoch": 1174.9342105263158, "grad_norm": 1.4989306926727295, "learning_rate": 0.0001, "loss": 0.0107, "step": 178590 }, { "epoch": 1175.0, "grad_norm": 1.5242666006088257, "learning_rate": 0.0001, "loss": 0.009, "step": 178600 }, { "epoch": 1175.0657894736842, "grad_norm": 1.5056519508361816, "learning_rate": 0.0001, "loss": 0.0116, "step": 178610 }, { "epoch": 1175.1315789473683, "grad_norm": 1.302706241607666, "learning_rate": 0.0001, "loss": 0.0105, "step": 178620 }, { "epoch": 1175.1973684210527, "grad_norm": 1.2965831756591797, "learning_rate": 0.0001, "loss": 0.0131, "step": 178630 }, { "epoch": 1175.2631578947369, "grad_norm": 1.1851215362548828, "learning_rate": 0.0001, "loss": 0.0107, "step": 178640 }, { "epoch": 1175.328947368421, "grad_norm": 1.656539797782898, "learning_rate": 0.0001, "loss": 0.0115, "step": 178650 }, { "epoch": 1175.3947368421052, "grad_norm": 1.4721945524215698, "learning_rate": 0.0001, "loss": 0.0136, "step": 178660 }, { "epoch": 1175.4605263157894, "grad_norm": 1.1548882722854614, "learning_rate": 0.0001, "loss": 0.0115, "step": 178670 }, { "epoch": 1175.5263157894738, "grad_norm": 1.6251531839370728, "learning_rate": 0.0001, "loss": 0.0122, "step": 178680 }, { "epoch": 1175.592105263158, "grad_norm": 1.4485573768615723, "learning_rate": 0.0001, "loss": 0.0119, "step": 178690 }, { "epoch": 1175.657894736842, "grad_norm": 1.236702561378479, "learning_rate": 0.0001, "loss": 0.0144, "step": 178700 }, { "epoch": 1175.7236842105262, "grad_norm": 1.0979368686676025, "learning_rate": 0.0001, "loss": 0.0091, "step": 178710 }, { "epoch": 1175.7894736842106, "grad_norm": 1.336005449295044, "learning_rate": 0.0001, "loss": 0.013, "step": 178720 }, { "epoch": 1175.8552631578948, "grad_norm": 1.5168288946151733, "learning_rate": 0.0001, "loss": 0.0083, "step": 178730 }, { "epoch": 1175.921052631579, "grad_norm": 1.6978859901428223, "learning_rate": 0.0001, "loss": 0.0108, "step": 178740 }, { "epoch": 1175.9868421052631, "grad_norm": 1.4908281564712524, "learning_rate": 0.0001, "loss": 0.0154, "step": 178750 }, { "epoch": 1176.0526315789473, "grad_norm": 1.5683348178863525, "learning_rate": 0.0001, "loss": 0.012, "step": 178760 }, { "epoch": 1176.1184210526317, "grad_norm": 1.5691719055175781, "learning_rate": 0.0001, "loss": 0.0102, "step": 178770 }, { "epoch": 1176.1842105263158, "grad_norm": 1.3299872875213623, "learning_rate": 0.0001, "loss": 0.0111, "step": 178780 }, { "epoch": 1176.25, "grad_norm": 1.0120748281478882, "learning_rate": 0.0001, "loss": 0.0094, "step": 178790 }, { "epoch": 1176.3157894736842, "grad_norm": 1.3271794319152832, "learning_rate": 0.0001, "loss": 0.0142, "step": 178800 }, { "epoch": 1176.3815789473683, "grad_norm": 1.7435978651046753, "learning_rate": 0.0001, "loss": 0.0116, "step": 178810 }, { "epoch": 1176.4473684210527, "grad_norm": 1.4494178295135498, "learning_rate": 0.0001, "loss": 0.0191, "step": 178820 }, { "epoch": 1176.5131578947369, "grad_norm": 1.0800607204437256, "learning_rate": 0.0001, "loss": 0.0107, "step": 178830 }, { "epoch": 1176.578947368421, "grad_norm": 1.1647429466247559, "learning_rate": 0.0001, "loss": 0.013, "step": 178840 }, { "epoch": 1176.6447368421052, "grad_norm": 1.3550162315368652, "learning_rate": 0.0001, "loss": 0.0115, "step": 178850 }, { "epoch": 1176.7105263157894, "grad_norm": 1.3103845119476318, "learning_rate": 0.0001, "loss": 0.0117, "step": 178860 }, { "epoch": 1176.7763157894738, "grad_norm": 1.674179196357727, "learning_rate": 0.0001, "loss": 0.0115, "step": 178870 }, { "epoch": 1176.842105263158, "grad_norm": 1.7230024337768555, "learning_rate": 0.0001, "loss": 0.0106, "step": 178880 }, { "epoch": 1176.907894736842, "grad_norm": 1.5608913898468018, "learning_rate": 0.0001, "loss": 0.0114, "step": 178890 }, { "epoch": 1176.9736842105262, "grad_norm": 1.6479169130325317, "learning_rate": 0.0001, "loss": 0.0123, "step": 178900 }, { "epoch": 1177.0394736842106, "grad_norm": 1.8514952659606934, "learning_rate": 0.0001, "loss": 0.0133, "step": 178910 }, { "epoch": 1177.1052631578948, "grad_norm": 1.1225299835205078, "learning_rate": 0.0001, "loss": 0.0091, "step": 178920 }, { "epoch": 1177.171052631579, "grad_norm": 0.9663496017456055, "learning_rate": 0.0001, "loss": 0.0137, "step": 178930 }, { "epoch": 1177.2368421052631, "grad_norm": 1.21413254737854, "learning_rate": 0.0001, "loss": 0.0161, "step": 178940 }, { "epoch": 1177.3026315789473, "grad_norm": 1.7040126323699951, "learning_rate": 0.0001, "loss": 0.0128, "step": 178950 }, { "epoch": 1177.3684210526317, "grad_norm": 1.3151904344558716, "learning_rate": 0.0001, "loss": 0.0105, "step": 178960 }, { "epoch": 1177.4342105263158, "grad_norm": 1.2465176582336426, "learning_rate": 0.0001, "loss": 0.0141, "step": 178970 }, { "epoch": 1177.5, "grad_norm": 1.3484728336334229, "learning_rate": 0.0001, "loss": 0.0124, "step": 178980 }, { "epoch": 1177.5657894736842, "grad_norm": 1.2826777696609497, "learning_rate": 0.0001, "loss": 0.01, "step": 178990 }, { "epoch": 1177.6315789473683, "grad_norm": 0.9591245055198669, "learning_rate": 0.0001, "loss": 0.0096, "step": 179000 }, { "epoch": 1177.6973684210527, "grad_norm": 1.6379069089889526, "learning_rate": 0.0001, "loss": 0.011, "step": 179010 }, { "epoch": 1177.7631578947369, "grad_norm": 1.739497423171997, "learning_rate": 0.0001, "loss": 0.0085, "step": 179020 }, { "epoch": 1177.828947368421, "grad_norm": 1.2557004690170288, "learning_rate": 0.0001, "loss": 0.0096, "step": 179030 }, { "epoch": 1177.8947368421052, "grad_norm": 1.243186116218567, "learning_rate": 0.0001, "loss": 0.0153, "step": 179040 }, { "epoch": 1177.9605263157894, "grad_norm": 1.4208959341049194, "learning_rate": 0.0001, "loss": 0.0139, "step": 179050 }, { "epoch": 1178.0263157894738, "grad_norm": 1.439584732055664, "learning_rate": 0.0001, "loss": 0.0113, "step": 179060 }, { "epoch": 1178.092105263158, "grad_norm": 1.5226659774780273, "learning_rate": 0.0001, "loss": 0.0094, "step": 179070 }, { "epoch": 1178.157894736842, "grad_norm": 1.3014672994613647, "learning_rate": 0.0001, "loss": 0.0119, "step": 179080 }, { "epoch": 1178.2236842105262, "grad_norm": 1.6991885900497437, "learning_rate": 0.0001, "loss": 0.0139, "step": 179090 }, { "epoch": 1178.2894736842106, "grad_norm": 1.2953583002090454, "learning_rate": 0.0001, "loss": 0.011, "step": 179100 }, { "epoch": 1178.3552631578948, "grad_norm": 1.281272053718567, "learning_rate": 0.0001, "loss": 0.0083, "step": 179110 }, { "epoch": 1178.421052631579, "grad_norm": 1.728569746017456, "learning_rate": 0.0001, "loss": 0.0112, "step": 179120 }, { "epoch": 1178.4868421052631, "grad_norm": 1.5014829635620117, "learning_rate": 0.0001, "loss": 0.0106, "step": 179130 }, { "epoch": 1178.5526315789473, "grad_norm": 1.41307532787323, "learning_rate": 0.0001, "loss": 0.0167, "step": 179140 }, { "epoch": 1178.6184210526317, "grad_norm": 1.3068623542785645, "learning_rate": 0.0001, "loss": 0.0126, "step": 179150 }, { "epoch": 1178.6842105263158, "grad_norm": 1.2323598861694336, "learning_rate": 0.0001, "loss": 0.0127, "step": 179160 }, { "epoch": 1178.75, "grad_norm": 1.5183072090148926, "learning_rate": 0.0001, "loss": 0.0094, "step": 179170 }, { "epoch": 1178.8157894736842, "grad_norm": 1.3154641389846802, "learning_rate": 0.0001, "loss": 0.0118, "step": 179180 }, { "epoch": 1178.8815789473683, "grad_norm": 1.5777839422225952, "learning_rate": 0.0001, "loss": 0.0111, "step": 179190 }, { "epoch": 1178.9473684210527, "grad_norm": 1.7433876991271973, "learning_rate": 0.0001, "loss": 0.0155, "step": 179200 }, { "epoch": 1179.0131578947369, "grad_norm": 1.4473059177398682, "learning_rate": 0.0001, "loss": 0.0121, "step": 179210 }, { "epoch": 1179.078947368421, "grad_norm": 1.3547879457473755, "learning_rate": 0.0001, "loss": 0.0092, "step": 179220 }, { "epoch": 1179.1447368421052, "grad_norm": 1.389849305152893, "learning_rate": 0.0001, "loss": 0.0111, "step": 179230 }, { "epoch": 1179.2105263157894, "grad_norm": 1.1601641178131104, "learning_rate": 0.0001, "loss": 0.0096, "step": 179240 }, { "epoch": 1179.2763157894738, "grad_norm": 1.359365463256836, "learning_rate": 0.0001, "loss": 0.0148, "step": 179250 }, { "epoch": 1179.342105263158, "grad_norm": 1.0313860177993774, "learning_rate": 0.0001, "loss": 0.0101, "step": 179260 }, { "epoch": 1179.407894736842, "grad_norm": 1.2688562870025635, "learning_rate": 0.0001, "loss": 0.0144, "step": 179270 }, { "epoch": 1179.4736842105262, "grad_norm": 1.4322148561477661, "learning_rate": 0.0001, "loss": 0.0101, "step": 179280 }, { "epoch": 1179.5394736842106, "grad_norm": 1.266709804534912, "learning_rate": 0.0001, "loss": 0.0129, "step": 179290 }, { "epoch": 1179.6052631578948, "grad_norm": 1.1147500276565552, "learning_rate": 0.0001, "loss": 0.0099, "step": 179300 }, { "epoch": 1179.671052631579, "grad_norm": 0.935669481754303, "learning_rate": 0.0001, "loss": 0.0102, "step": 179310 }, { "epoch": 1179.7368421052631, "grad_norm": 1.808039903640747, "learning_rate": 0.0001, "loss": 0.014, "step": 179320 }, { "epoch": 1179.8026315789473, "grad_norm": 1.434203028678894, "learning_rate": 0.0001, "loss": 0.0135, "step": 179330 }, { "epoch": 1179.8684210526317, "grad_norm": 1.267082929611206, "learning_rate": 0.0001, "loss": 0.0146, "step": 179340 }, { "epoch": 1179.9342105263158, "grad_norm": 1.0592442750930786, "learning_rate": 0.0001, "loss": 0.015, "step": 179350 }, { "epoch": 1180.0, "grad_norm": 1.339080572128296, "learning_rate": 0.0001, "loss": 0.0107, "step": 179360 }, { "epoch": 1180.0657894736842, "grad_norm": 1.4091718196868896, "learning_rate": 0.0001, "loss": 0.0109, "step": 179370 }, { "epoch": 1180.1315789473683, "grad_norm": 1.4289418458938599, "learning_rate": 0.0001, "loss": 0.0115, "step": 179380 }, { "epoch": 1180.1973684210527, "grad_norm": 1.5907689332962036, "learning_rate": 0.0001, "loss": 0.0115, "step": 179390 }, { "epoch": 1180.2631578947369, "grad_norm": 1.3280335664749146, "learning_rate": 0.0001, "loss": 0.0108, "step": 179400 }, { "epoch": 1180.328947368421, "grad_norm": 1.4266337156295776, "learning_rate": 0.0001, "loss": 0.0128, "step": 179410 }, { "epoch": 1180.3947368421052, "grad_norm": 1.5757168531417847, "learning_rate": 0.0001, "loss": 0.0108, "step": 179420 }, { "epoch": 1180.4605263157894, "grad_norm": 1.844122290611267, "learning_rate": 0.0001, "loss": 0.0121, "step": 179430 }, { "epoch": 1180.5263157894738, "grad_norm": 1.591789722442627, "learning_rate": 0.0001, "loss": 0.0132, "step": 179440 }, { "epoch": 1180.592105263158, "grad_norm": 1.5864835977554321, "learning_rate": 0.0001, "loss": 0.0113, "step": 179450 }, { "epoch": 1180.657894736842, "grad_norm": 1.883447527885437, "learning_rate": 0.0001, "loss": 0.0106, "step": 179460 }, { "epoch": 1180.7236842105262, "grad_norm": 1.9319666624069214, "learning_rate": 0.0001, "loss": 0.0115, "step": 179470 }, { "epoch": 1180.7894736842106, "grad_norm": 1.6754413843154907, "learning_rate": 0.0001, "loss": 0.0161, "step": 179480 }, { "epoch": 1180.8552631578948, "grad_norm": 1.5455551147460938, "learning_rate": 0.0001, "loss": 0.0108, "step": 179490 }, { "epoch": 1180.921052631579, "grad_norm": 1.4719334840774536, "learning_rate": 0.0001, "loss": 0.0126, "step": 179500 }, { "epoch": 1180.9868421052631, "grad_norm": 1.5098745822906494, "learning_rate": 0.0001, "loss": 0.012, "step": 179510 }, { "epoch": 1181.0526315789473, "grad_norm": 1.5976704359054565, "learning_rate": 0.0001, "loss": 0.0129, "step": 179520 }, { "epoch": 1181.1184210526317, "grad_norm": 1.2012510299682617, "learning_rate": 0.0001, "loss": 0.0122, "step": 179530 }, { "epoch": 1181.1842105263158, "grad_norm": 1.4412367343902588, "learning_rate": 0.0001, "loss": 0.0127, "step": 179540 }, { "epoch": 1181.25, "grad_norm": 1.4963215589523315, "learning_rate": 0.0001, "loss": 0.0119, "step": 179550 }, { "epoch": 1181.3157894736842, "grad_norm": 1.5266706943511963, "learning_rate": 0.0001, "loss": 0.0084, "step": 179560 }, { "epoch": 1181.3815789473683, "grad_norm": 1.9487699270248413, "learning_rate": 0.0001, "loss": 0.01, "step": 179570 }, { "epoch": 1181.4473684210527, "grad_norm": 1.5634984970092773, "learning_rate": 0.0001, "loss": 0.0128, "step": 179580 }, { "epoch": 1181.5131578947369, "grad_norm": 1.4340654611587524, "learning_rate": 0.0001, "loss": 0.0081, "step": 179590 }, { "epoch": 1181.578947368421, "grad_norm": 1.6727044582366943, "learning_rate": 0.0001, "loss": 0.0114, "step": 179600 }, { "epoch": 1181.6447368421052, "grad_norm": 1.475998878479004, "learning_rate": 0.0001, "loss": 0.0137, "step": 179610 }, { "epoch": 1181.7105263157894, "grad_norm": 1.3522264957427979, "learning_rate": 0.0001, "loss": 0.0121, "step": 179620 }, { "epoch": 1181.7763157894738, "grad_norm": 0.9200405478477478, "learning_rate": 0.0001, "loss": 0.0153, "step": 179630 }, { "epoch": 1181.842105263158, "grad_norm": 1.2860138416290283, "learning_rate": 0.0001, "loss": 0.0131, "step": 179640 }, { "epoch": 1181.907894736842, "grad_norm": 1.1856030225753784, "learning_rate": 0.0001, "loss": 0.0133, "step": 179650 }, { "epoch": 1181.9736842105262, "grad_norm": 1.392082929611206, "learning_rate": 0.0001, "loss": 0.0087, "step": 179660 }, { "epoch": 1182.0394736842106, "grad_norm": 1.5477997064590454, "learning_rate": 0.0001, "loss": 0.0147, "step": 179670 }, { "epoch": 1182.1052631578948, "grad_norm": 1.2619411945343018, "learning_rate": 0.0001, "loss": 0.0112, "step": 179680 }, { "epoch": 1182.171052631579, "grad_norm": 1.3904857635498047, "learning_rate": 0.0001, "loss": 0.0117, "step": 179690 }, { "epoch": 1182.2368421052631, "grad_norm": 1.430029273033142, "learning_rate": 0.0001, "loss": 0.012, "step": 179700 }, { "epoch": 1182.3026315789473, "grad_norm": 1.740669846534729, "learning_rate": 0.0001, "loss": 0.0099, "step": 179710 }, { "epoch": 1182.3684210526317, "grad_norm": 1.8351223468780518, "learning_rate": 0.0001, "loss": 0.0132, "step": 179720 }, { "epoch": 1182.4342105263158, "grad_norm": 1.7920143604278564, "learning_rate": 0.0001, "loss": 0.0115, "step": 179730 }, { "epoch": 1182.5, "grad_norm": 1.3161859512329102, "learning_rate": 0.0001, "loss": 0.0123, "step": 179740 }, { "epoch": 1182.5657894736842, "grad_norm": 1.5275499820709229, "learning_rate": 0.0001, "loss": 0.0097, "step": 179750 }, { "epoch": 1182.6315789473683, "grad_norm": 1.7494144439697266, "learning_rate": 0.0001, "loss": 0.0106, "step": 179760 }, { "epoch": 1182.6973684210527, "grad_norm": 1.6563754081726074, "learning_rate": 0.0001, "loss": 0.0107, "step": 179770 }, { "epoch": 1182.7631578947369, "grad_norm": 1.6409225463867188, "learning_rate": 0.0001, "loss": 0.0105, "step": 179780 }, { "epoch": 1182.828947368421, "grad_norm": 2.076585531234741, "learning_rate": 0.0001, "loss": 0.0168, "step": 179790 }, { "epoch": 1182.8947368421052, "grad_norm": 1.58701753616333, "learning_rate": 0.0001, "loss": 0.0109, "step": 179800 }, { "epoch": 1182.9605263157894, "grad_norm": 1.6318439245224, "learning_rate": 0.0001, "loss": 0.0136, "step": 179810 }, { "epoch": 1183.0263157894738, "grad_norm": 1.1497156620025635, "learning_rate": 0.0001, "loss": 0.0155, "step": 179820 }, { "epoch": 1183.092105263158, "grad_norm": 1.337497591972351, "learning_rate": 0.0001, "loss": 0.0119, "step": 179830 }, { "epoch": 1183.157894736842, "grad_norm": 1.9662150144577026, "learning_rate": 0.0001, "loss": 0.0118, "step": 179840 }, { "epoch": 1183.2236842105262, "grad_norm": 1.324824333190918, "learning_rate": 0.0001, "loss": 0.0134, "step": 179850 }, { "epoch": 1183.2894736842106, "grad_norm": 1.247708797454834, "learning_rate": 0.0001, "loss": 0.011, "step": 179860 }, { "epoch": 1183.3552631578948, "grad_norm": 1.3938440084457397, "learning_rate": 0.0001, "loss": 0.012, "step": 179870 }, { "epoch": 1183.421052631579, "grad_norm": 1.832596778869629, "learning_rate": 0.0001, "loss": 0.012, "step": 179880 }, { "epoch": 1183.4868421052631, "grad_norm": 1.7082464694976807, "learning_rate": 0.0001, "loss": 0.0102, "step": 179890 }, { "epoch": 1183.5526315789473, "grad_norm": 1.8857053518295288, "learning_rate": 0.0001, "loss": 0.0147, "step": 179900 }, { "epoch": 1183.6184210526317, "grad_norm": 1.280592918395996, "learning_rate": 0.0001, "loss": 0.0127, "step": 179910 }, { "epoch": 1183.6842105263158, "grad_norm": 1.5851857662200928, "learning_rate": 0.0001, "loss": 0.0097, "step": 179920 }, { "epoch": 1183.75, "grad_norm": 1.736794352531433, "learning_rate": 0.0001, "loss": 0.0094, "step": 179930 }, { "epoch": 1183.8157894736842, "grad_norm": 1.611746907234192, "learning_rate": 0.0001, "loss": 0.0102, "step": 179940 }, { "epoch": 1183.8815789473683, "grad_norm": 1.3283923864364624, "learning_rate": 0.0001, "loss": 0.0089, "step": 179950 }, { "epoch": 1183.9473684210527, "grad_norm": 1.3064154386520386, "learning_rate": 0.0001, "loss": 0.0116, "step": 179960 }, { "epoch": 1184.0131578947369, "grad_norm": 1.3542779684066772, "learning_rate": 0.0001, "loss": 0.0115, "step": 179970 }, { "epoch": 1184.078947368421, "grad_norm": 1.374304175376892, "learning_rate": 0.0001, "loss": 0.0114, "step": 179980 }, { "epoch": 1184.1447368421052, "grad_norm": 1.3974847793579102, "learning_rate": 0.0001, "loss": 0.012, "step": 179990 }, { "epoch": 1184.2105263157894, "grad_norm": 1.4946845769882202, "learning_rate": 0.0001, "loss": 0.0133, "step": 180000 }, { "epoch": 1184.2763157894738, "grad_norm": 1.7280199527740479, "learning_rate": 0.0001, "loss": 0.0119, "step": 180010 }, { "epoch": 1184.342105263158, "grad_norm": 1.0662562847137451, "learning_rate": 0.0001, "loss": 0.0135, "step": 180020 }, { "epoch": 1184.407894736842, "grad_norm": 1.0755711793899536, "learning_rate": 0.0001, "loss": 0.0138, "step": 180030 }, { "epoch": 1184.4736842105262, "grad_norm": 1.195715308189392, "learning_rate": 0.0001, "loss": 0.01, "step": 180040 }, { "epoch": 1184.5394736842106, "grad_norm": 1.4414880275726318, "learning_rate": 0.0001, "loss": 0.0127, "step": 180050 }, { "epoch": 1184.6052631578948, "grad_norm": 1.311197280883789, "learning_rate": 0.0001, "loss": 0.012, "step": 180060 }, { "epoch": 1184.671052631579, "grad_norm": 1.249899983406067, "learning_rate": 0.0001, "loss": 0.0129, "step": 180070 }, { "epoch": 1184.7368421052631, "grad_norm": 1.916966438293457, "learning_rate": 0.0001, "loss": 0.0095, "step": 180080 }, { "epoch": 1184.8026315789473, "grad_norm": 1.8124878406524658, "learning_rate": 0.0001, "loss": 0.0119, "step": 180090 }, { "epoch": 1184.8684210526317, "grad_norm": 1.8097450733184814, "learning_rate": 0.0001, "loss": 0.0129, "step": 180100 }, { "epoch": 1184.9342105263158, "grad_norm": 1.9099973440170288, "learning_rate": 0.0001, "loss": 0.0095, "step": 180110 }, { "epoch": 1185.0, "grad_norm": 1.7481553554534912, "learning_rate": 0.0001, "loss": 0.0136, "step": 180120 }, { "epoch": 1185.0657894736842, "grad_norm": 1.2523829936981201, "learning_rate": 0.0001, "loss": 0.0174, "step": 180130 }, { "epoch": 1185.1315789473683, "grad_norm": 1.5012831687927246, "learning_rate": 0.0001, "loss": 0.0118, "step": 180140 }, { "epoch": 1185.1973684210527, "grad_norm": 1.6258310079574585, "learning_rate": 0.0001, "loss": 0.0088, "step": 180150 }, { "epoch": 1185.2631578947369, "grad_norm": 1.4904812574386597, "learning_rate": 0.0001, "loss": 0.0087, "step": 180160 }, { "epoch": 1185.328947368421, "grad_norm": 1.3533165454864502, "learning_rate": 0.0001, "loss": 0.0116, "step": 180170 }, { "epoch": 1185.3947368421052, "grad_norm": 1.4859440326690674, "learning_rate": 0.0001, "loss": 0.0137, "step": 180180 }, { "epoch": 1185.4605263157894, "grad_norm": 1.098004937171936, "learning_rate": 0.0001, "loss": 0.0128, "step": 180190 }, { "epoch": 1185.5263157894738, "grad_norm": 1.5172935724258423, "learning_rate": 0.0001, "loss": 0.0121, "step": 180200 }, { "epoch": 1185.592105263158, "grad_norm": 1.698667287826538, "learning_rate": 0.0001, "loss": 0.0111, "step": 180210 }, { "epoch": 1185.657894736842, "grad_norm": 1.7719718217849731, "learning_rate": 0.0001, "loss": 0.0095, "step": 180220 }, { "epoch": 1185.7236842105262, "grad_norm": 1.2595958709716797, "learning_rate": 0.0001, "loss": 0.01, "step": 180230 }, { "epoch": 1185.7894736842106, "grad_norm": 1.7465862035751343, "learning_rate": 0.0001, "loss": 0.0117, "step": 180240 }, { "epoch": 1185.8552631578948, "grad_norm": 1.5030196905136108, "learning_rate": 0.0001, "loss": 0.0163, "step": 180250 }, { "epoch": 1185.921052631579, "grad_norm": 1.5090402364730835, "learning_rate": 0.0001, "loss": 0.0143, "step": 180260 }, { "epoch": 1185.9868421052631, "grad_norm": 1.6014529466629028, "learning_rate": 0.0001, "loss": 0.0106, "step": 180270 }, { "epoch": 1186.0526315789473, "grad_norm": 1.3552976846694946, "learning_rate": 0.0001, "loss": 0.014, "step": 180280 }, { "epoch": 1186.1184210526317, "grad_norm": 0.8576266765594482, "learning_rate": 0.0001, "loss": 0.0083, "step": 180290 }, { "epoch": 1186.1842105263158, "grad_norm": 1.5320172309875488, "learning_rate": 0.0001, "loss": 0.012, "step": 180300 }, { "epoch": 1186.25, "grad_norm": 1.008284091949463, "learning_rate": 0.0001, "loss": 0.0101, "step": 180310 }, { "epoch": 1186.3157894736842, "grad_norm": 1.0729851722717285, "learning_rate": 0.0001, "loss": 0.0126, "step": 180320 }, { "epoch": 1186.3815789473683, "grad_norm": 1.5600985288619995, "learning_rate": 0.0001, "loss": 0.0154, "step": 180330 }, { "epoch": 1186.4473684210527, "grad_norm": 1.012097716331482, "learning_rate": 0.0001, "loss": 0.012, "step": 180340 }, { "epoch": 1186.5131578947369, "grad_norm": 1.07919180393219, "learning_rate": 0.0001, "loss": 0.0154, "step": 180350 }, { "epoch": 1186.578947368421, "grad_norm": 1.099684715270996, "learning_rate": 0.0001, "loss": 0.0089, "step": 180360 }, { "epoch": 1186.6447368421052, "grad_norm": 1.1618226766586304, "learning_rate": 0.0001, "loss": 0.0135, "step": 180370 }, { "epoch": 1186.7105263157894, "grad_norm": 1.1407313346862793, "learning_rate": 0.0001, "loss": 0.012, "step": 180380 }, { "epoch": 1186.7763157894738, "grad_norm": 1.6612005233764648, "learning_rate": 0.0001, "loss": 0.0117, "step": 180390 }, { "epoch": 1186.842105263158, "grad_norm": 1.3510338068008423, "learning_rate": 0.0001, "loss": 0.0095, "step": 180400 }, { "epoch": 1186.907894736842, "grad_norm": 1.4388211965560913, "learning_rate": 0.0001, "loss": 0.0109, "step": 180410 }, { "epoch": 1186.9736842105262, "grad_norm": 1.1491273641586304, "learning_rate": 0.0001, "loss": 0.0148, "step": 180420 }, { "epoch": 1187.0394736842106, "grad_norm": 1.3614307641983032, "learning_rate": 0.0001, "loss": 0.0138, "step": 180430 }, { "epoch": 1187.1052631578948, "grad_norm": 1.2012394666671753, "learning_rate": 0.0001, "loss": 0.0131, "step": 180440 }, { "epoch": 1187.171052631579, "grad_norm": 1.5945820808410645, "learning_rate": 0.0001, "loss": 0.0122, "step": 180450 }, { "epoch": 1187.2368421052631, "grad_norm": 1.3451720476150513, "learning_rate": 0.0001, "loss": 0.015, "step": 180460 }, { "epoch": 1187.3026315789473, "grad_norm": 1.459583044052124, "learning_rate": 0.0001, "loss": 0.0152, "step": 180470 }, { "epoch": 1187.3684210526317, "grad_norm": 1.4323937892913818, "learning_rate": 0.0001, "loss": 0.0112, "step": 180480 }, { "epoch": 1187.4342105263158, "grad_norm": 1.835673451423645, "learning_rate": 0.0001, "loss": 0.0115, "step": 180490 }, { "epoch": 1187.5, "grad_norm": 1.7753686904907227, "learning_rate": 0.0001, "loss": 0.0135, "step": 180500 }, { "epoch": 1187.5657894736842, "grad_norm": 1.8950334787368774, "learning_rate": 0.0001, "loss": 0.0099, "step": 180510 }, { "epoch": 1187.6315789473683, "grad_norm": 1.4907115697860718, "learning_rate": 0.0001, "loss": 0.0135, "step": 180520 }, { "epoch": 1187.6973684210527, "grad_norm": 2.1437461376190186, "learning_rate": 0.0001, "loss": 0.0112, "step": 180530 }, { "epoch": 1187.7631578947369, "grad_norm": 1.8643455505371094, "learning_rate": 0.0001, "loss": 0.0095, "step": 180540 }, { "epoch": 1187.828947368421, "grad_norm": 1.8570889234542847, "learning_rate": 0.0001, "loss": 0.0119, "step": 180550 }, { "epoch": 1187.8947368421052, "grad_norm": 1.9192677736282349, "learning_rate": 0.0001, "loss": 0.011, "step": 180560 }, { "epoch": 1187.9605263157894, "grad_norm": 1.3594433069229126, "learning_rate": 0.0001, "loss": 0.0095, "step": 180570 }, { "epoch": 1188.0263157894738, "grad_norm": 1.775444746017456, "learning_rate": 0.0001, "loss": 0.0137, "step": 180580 }, { "epoch": 1188.092105263158, "grad_norm": 1.4003305435180664, "learning_rate": 0.0001, "loss": 0.0123, "step": 180590 }, { "epoch": 1188.157894736842, "grad_norm": 1.7437744140625, "learning_rate": 0.0001, "loss": 0.0113, "step": 180600 }, { "epoch": 1188.2236842105262, "grad_norm": 1.2331202030181885, "learning_rate": 0.0001, "loss": 0.0136, "step": 180610 }, { "epoch": 1188.2894736842106, "grad_norm": 1.4521386623382568, "learning_rate": 0.0001, "loss": 0.0121, "step": 180620 }, { "epoch": 1188.3552631578948, "grad_norm": 1.3773293495178223, "learning_rate": 0.0001, "loss": 0.0151, "step": 180630 }, { "epoch": 1188.421052631579, "grad_norm": 1.033101201057434, "learning_rate": 0.0001, "loss": 0.009, "step": 180640 }, { "epoch": 1188.4868421052631, "grad_norm": 1.3561155796051025, "learning_rate": 0.0001, "loss": 0.0096, "step": 180650 }, { "epoch": 1188.5526315789473, "grad_norm": 1.3427296876907349, "learning_rate": 0.0001, "loss": 0.0121, "step": 180660 }, { "epoch": 1188.6184210526317, "grad_norm": 1.7254316806793213, "learning_rate": 0.0001, "loss": 0.0114, "step": 180670 }, { "epoch": 1188.6842105263158, "grad_norm": 1.5456750392913818, "learning_rate": 0.0001, "loss": 0.012, "step": 180680 }, { "epoch": 1188.75, "grad_norm": 1.5348024368286133, "learning_rate": 0.0001, "loss": 0.0135, "step": 180690 }, { "epoch": 1188.8157894736842, "grad_norm": 1.4552415609359741, "learning_rate": 0.0001, "loss": 0.0106, "step": 180700 }, { "epoch": 1188.8815789473683, "grad_norm": 1.556308627128601, "learning_rate": 0.0001, "loss": 0.0112, "step": 180710 }, { "epoch": 1188.9473684210527, "grad_norm": 1.2014644145965576, "learning_rate": 0.0001, "loss": 0.0143, "step": 180720 }, { "epoch": 1189.0131578947369, "grad_norm": 2.07954478263855, "learning_rate": 0.0001, "loss": 0.0124, "step": 180730 }, { "epoch": 1189.078947368421, "grad_norm": 1.457672119140625, "learning_rate": 0.0001, "loss": 0.0085, "step": 180740 }, { "epoch": 1189.1447368421052, "grad_norm": 1.6122580766677856, "learning_rate": 0.0001, "loss": 0.013, "step": 180750 }, { "epoch": 1189.2105263157894, "grad_norm": 1.8022069931030273, "learning_rate": 0.0001, "loss": 0.0088, "step": 180760 }, { "epoch": 1189.2763157894738, "grad_norm": 1.0493839979171753, "learning_rate": 0.0001, "loss": 0.0101, "step": 180770 }, { "epoch": 1189.342105263158, "grad_norm": 1.6912893056869507, "learning_rate": 0.0001, "loss": 0.01, "step": 180780 }, { "epoch": 1189.407894736842, "grad_norm": 1.3057159185409546, "learning_rate": 0.0001, "loss": 0.0122, "step": 180790 }, { "epoch": 1189.4736842105262, "grad_norm": 1.3569891452789307, "learning_rate": 0.0001, "loss": 0.0139, "step": 180800 }, { "epoch": 1189.5394736842106, "grad_norm": 1.491847276687622, "learning_rate": 0.0001, "loss": 0.0111, "step": 180810 }, { "epoch": 1189.6052631578948, "grad_norm": 1.5224536657333374, "learning_rate": 0.0001, "loss": 0.0119, "step": 180820 }, { "epoch": 1189.671052631579, "grad_norm": 0.9678660035133362, "learning_rate": 0.0001, "loss": 0.0126, "step": 180830 }, { "epoch": 1189.7368421052631, "grad_norm": 1.4993155002593994, "learning_rate": 0.0001, "loss": 0.0145, "step": 180840 }, { "epoch": 1189.8026315789473, "grad_norm": 1.8409208059310913, "learning_rate": 0.0001, "loss": 0.0163, "step": 180850 }, { "epoch": 1189.8684210526317, "grad_norm": 1.7241319417953491, "learning_rate": 0.0001, "loss": 0.0152, "step": 180860 }, { "epoch": 1189.9342105263158, "grad_norm": 1.69033944606781, "learning_rate": 0.0001, "loss": 0.0116, "step": 180870 }, { "epoch": 1190.0, "grad_norm": 1.5463366508483887, "learning_rate": 0.0001, "loss": 0.01, "step": 180880 }, { "epoch": 1190.0657894736842, "grad_norm": 1.4962332248687744, "learning_rate": 0.0001, "loss": 0.0138, "step": 180890 }, { "epoch": 1190.1315789473683, "grad_norm": 1.211410403251648, "learning_rate": 0.0001, "loss": 0.0138, "step": 180900 }, { "epoch": 1190.1973684210527, "grad_norm": 1.5097498893737793, "learning_rate": 0.0001, "loss": 0.0114, "step": 180910 }, { "epoch": 1190.2631578947369, "grad_norm": 1.3968160152435303, "learning_rate": 0.0001, "loss": 0.0137, "step": 180920 }, { "epoch": 1190.328947368421, "grad_norm": 1.329642415046692, "learning_rate": 0.0001, "loss": 0.0083, "step": 180930 }, { "epoch": 1190.3947368421052, "grad_norm": 1.035478949546814, "learning_rate": 0.0001, "loss": 0.0129, "step": 180940 }, { "epoch": 1190.4605263157894, "grad_norm": 2.0623698234558105, "learning_rate": 0.0001, "loss": 0.0097, "step": 180950 }, { "epoch": 1190.5263157894738, "grad_norm": 1.5362149477005005, "learning_rate": 0.0001, "loss": 0.0124, "step": 180960 }, { "epoch": 1190.592105263158, "grad_norm": 1.3600163459777832, "learning_rate": 0.0001, "loss": 0.0125, "step": 180970 }, { "epoch": 1190.657894736842, "grad_norm": 1.339435338973999, "learning_rate": 0.0001, "loss": 0.0116, "step": 180980 }, { "epoch": 1190.7236842105262, "grad_norm": 1.1084638833999634, "learning_rate": 0.0001, "loss": 0.012, "step": 180990 }, { "epoch": 1190.7894736842106, "grad_norm": 1.58629310131073, "learning_rate": 0.0001, "loss": 0.0099, "step": 181000 }, { "epoch": 1190.8552631578948, "grad_norm": 1.017935872077942, "learning_rate": 0.0001, "loss": 0.0127, "step": 181010 }, { "epoch": 1190.921052631579, "grad_norm": 1.7094731330871582, "learning_rate": 0.0001, "loss": 0.011, "step": 181020 }, { "epoch": 1190.9868421052631, "grad_norm": 1.1324844360351562, "learning_rate": 0.0001, "loss": 0.013, "step": 181030 }, { "epoch": 1191.0526315789473, "grad_norm": 1.40479576587677, "learning_rate": 0.0001, "loss": 0.0105, "step": 181040 }, { "epoch": 1191.1184210526317, "grad_norm": 1.980483055114746, "learning_rate": 0.0001, "loss": 0.0124, "step": 181050 }, { "epoch": 1191.1842105263158, "grad_norm": 1.6120669841766357, "learning_rate": 0.0001, "loss": 0.0103, "step": 181060 }, { "epoch": 1191.25, "grad_norm": 1.0063859224319458, "learning_rate": 0.0001, "loss": 0.0104, "step": 181070 }, { "epoch": 1191.3157894736842, "grad_norm": 1.488358974456787, "learning_rate": 0.0001, "loss": 0.0114, "step": 181080 }, { "epoch": 1191.3815789473683, "grad_norm": 1.3147696256637573, "learning_rate": 0.0001, "loss": 0.0155, "step": 181090 }, { "epoch": 1191.4473684210527, "grad_norm": 1.0943691730499268, "learning_rate": 0.0001, "loss": 0.0106, "step": 181100 }, { "epoch": 1191.5131578947369, "grad_norm": 1.3576385974884033, "learning_rate": 0.0001, "loss": 0.0144, "step": 181110 }, { "epoch": 1191.578947368421, "grad_norm": 1.4461612701416016, "learning_rate": 0.0001, "loss": 0.0139, "step": 181120 }, { "epoch": 1191.6447368421052, "grad_norm": 1.3072803020477295, "learning_rate": 0.0001, "loss": 0.0117, "step": 181130 }, { "epoch": 1191.7105263157894, "grad_norm": 1.371636152267456, "learning_rate": 0.0001, "loss": 0.0123, "step": 181140 }, { "epoch": 1191.7763157894738, "grad_norm": 1.2680213451385498, "learning_rate": 0.0001, "loss": 0.0102, "step": 181150 }, { "epoch": 1191.842105263158, "grad_norm": 1.2084290981292725, "learning_rate": 0.0001, "loss": 0.0128, "step": 181160 }, { "epoch": 1191.907894736842, "grad_norm": 1.4369373321533203, "learning_rate": 0.0001, "loss": 0.0156, "step": 181170 }, { "epoch": 1191.9736842105262, "grad_norm": 1.1876963376998901, "learning_rate": 0.0001, "loss": 0.0091, "step": 181180 }, { "epoch": 1192.0394736842106, "grad_norm": 1.2982423305511475, "learning_rate": 0.0001, "loss": 0.0129, "step": 181190 }, { "epoch": 1192.1052631578948, "grad_norm": 1.5042023658752441, "learning_rate": 0.0001, "loss": 0.0087, "step": 181200 }, { "epoch": 1192.171052631579, "grad_norm": 1.4566150903701782, "learning_rate": 0.0001, "loss": 0.0134, "step": 181210 }, { "epoch": 1192.2368421052631, "grad_norm": 1.6733314990997314, "learning_rate": 0.0001, "loss": 0.0147, "step": 181220 }, { "epoch": 1192.3026315789473, "grad_norm": 1.3827568292617798, "learning_rate": 0.0001, "loss": 0.0109, "step": 181230 }, { "epoch": 1192.3684210526317, "grad_norm": 1.2534300088882446, "learning_rate": 0.0001, "loss": 0.0136, "step": 181240 }, { "epoch": 1192.4342105263158, "grad_norm": 1.4022619724273682, "learning_rate": 0.0001, "loss": 0.014, "step": 181250 }, { "epoch": 1192.5, "grad_norm": 1.7130787372589111, "learning_rate": 0.0001, "loss": 0.0139, "step": 181260 }, { "epoch": 1192.5657894736842, "grad_norm": 1.2746260166168213, "learning_rate": 0.0001, "loss": 0.0106, "step": 181270 }, { "epoch": 1192.6315789473683, "grad_norm": 1.6737772226333618, "learning_rate": 0.0001, "loss": 0.0131, "step": 181280 }, { "epoch": 1192.6973684210527, "grad_norm": 1.667872667312622, "learning_rate": 0.0001, "loss": 0.0107, "step": 181290 }, { "epoch": 1192.7631578947369, "grad_norm": 1.2416611909866333, "learning_rate": 0.0001, "loss": 0.0099, "step": 181300 }, { "epoch": 1192.828947368421, "grad_norm": 1.5822666883468628, "learning_rate": 0.0001, "loss": 0.0123, "step": 181310 }, { "epoch": 1192.8947368421052, "grad_norm": 1.3194670677185059, "learning_rate": 0.0001, "loss": 0.0165, "step": 181320 }, { "epoch": 1192.9605263157894, "grad_norm": 1.3709219694137573, "learning_rate": 0.0001, "loss": 0.0089, "step": 181330 }, { "epoch": 1193.0263157894738, "grad_norm": 1.415262222290039, "learning_rate": 0.0001, "loss": 0.0095, "step": 181340 }, { "epoch": 1193.092105263158, "grad_norm": 0.9743747711181641, "learning_rate": 0.0001, "loss": 0.0135, "step": 181350 }, { "epoch": 1193.157894736842, "grad_norm": 1.6094553470611572, "learning_rate": 0.0001, "loss": 0.0148, "step": 181360 }, { "epoch": 1193.2236842105262, "grad_norm": 1.2783271074295044, "learning_rate": 0.0001, "loss": 0.016, "step": 181370 }, { "epoch": 1193.2894736842106, "grad_norm": 1.3957195281982422, "learning_rate": 0.0001, "loss": 0.011, "step": 181380 }, { "epoch": 1193.3552631578948, "grad_norm": 1.591414451599121, "learning_rate": 0.0001, "loss": 0.0164, "step": 181390 }, { "epoch": 1193.421052631579, "grad_norm": 1.1911691427230835, "learning_rate": 0.0001, "loss": 0.0133, "step": 181400 }, { "epoch": 1193.4868421052631, "grad_norm": 1.313908576965332, "learning_rate": 0.0001, "loss": 0.0085, "step": 181410 }, { "epoch": 1193.5526315789473, "grad_norm": 1.2525756359100342, "learning_rate": 0.0001, "loss": 0.0118, "step": 181420 }, { "epoch": 1193.6184210526317, "grad_norm": 1.6716032028198242, "learning_rate": 0.0001, "loss": 0.0101, "step": 181430 }, { "epoch": 1193.6842105263158, "grad_norm": 1.8187240362167358, "learning_rate": 0.0001, "loss": 0.0095, "step": 181440 }, { "epoch": 1193.75, "grad_norm": 1.4710139036178589, "learning_rate": 0.0001, "loss": 0.0111, "step": 181450 }, { "epoch": 1193.8157894736842, "grad_norm": 1.4333950281143188, "learning_rate": 0.0001, "loss": 0.0119, "step": 181460 }, { "epoch": 1193.8815789473683, "grad_norm": 1.3707115650177002, "learning_rate": 0.0001, "loss": 0.0096, "step": 181470 }, { "epoch": 1193.9473684210527, "grad_norm": 1.4504510164260864, "learning_rate": 0.0001, "loss": 0.0137, "step": 181480 }, { "epoch": 1194.0131578947369, "grad_norm": 1.679206132888794, "learning_rate": 0.0001, "loss": 0.012, "step": 181490 }, { "epoch": 1194.078947368421, "grad_norm": 1.4905837774276733, "learning_rate": 0.0001, "loss": 0.0099, "step": 181500 }, { "epoch": 1194.1447368421052, "grad_norm": 1.461971640586853, "learning_rate": 0.0001, "loss": 0.0138, "step": 181510 }, { "epoch": 1194.2105263157894, "grad_norm": 0.9012203812599182, "learning_rate": 0.0001, "loss": 0.0123, "step": 181520 }, { "epoch": 1194.2763157894738, "grad_norm": 1.5035923719406128, "learning_rate": 0.0001, "loss": 0.0138, "step": 181530 }, { "epoch": 1194.342105263158, "grad_norm": 1.7010166645050049, "learning_rate": 0.0001, "loss": 0.0119, "step": 181540 }, { "epoch": 1194.407894736842, "grad_norm": 1.3864818811416626, "learning_rate": 0.0001, "loss": 0.0112, "step": 181550 }, { "epoch": 1194.4736842105262, "grad_norm": 1.5087649822235107, "learning_rate": 0.0001, "loss": 0.0131, "step": 181560 }, { "epoch": 1194.5394736842106, "grad_norm": 1.1236271858215332, "learning_rate": 0.0001, "loss": 0.0118, "step": 181570 }, { "epoch": 1194.6052631578948, "grad_norm": 1.4132107496261597, "learning_rate": 0.0001, "loss": 0.0119, "step": 181580 }, { "epoch": 1194.671052631579, "grad_norm": 1.718405842781067, "learning_rate": 0.0001, "loss": 0.011, "step": 181590 }, { "epoch": 1194.7368421052631, "grad_norm": 1.76503586769104, "learning_rate": 0.0001, "loss": 0.0112, "step": 181600 }, { "epoch": 1194.8026315789473, "grad_norm": 1.3476378917694092, "learning_rate": 0.0001, "loss": 0.0132, "step": 181610 }, { "epoch": 1194.8684210526317, "grad_norm": 1.629447340965271, "learning_rate": 0.0001, "loss": 0.0093, "step": 181620 }, { "epoch": 1194.9342105263158, "grad_norm": 1.0570226907730103, "learning_rate": 0.0001, "loss": 0.0142, "step": 181630 }, { "epoch": 1195.0, "grad_norm": 1.3869355916976929, "learning_rate": 0.0001, "loss": 0.0116, "step": 181640 }, { "epoch": 1195.0657894736842, "grad_norm": 1.388905644416809, "learning_rate": 0.0001, "loss": 0.0097, "step": 181650 }, { "epoch": 1195.1315789473683, "grad_norm": 1.2636529207229614, "learning_rate": 0.0001, "loss": 0.0159, "step": 181660 }, { "epoch": 1195.1973684210527, "grad_norm": 1.2759031057357788, "learning_rate": 0.0001, "loss": 0.013, "step": 181670 }, { "epoch": 1195.2631578947369, "grad_norm": 1.6138713359832764, "learning_rate": 0.0001, "loss": 0.0098, "step": 181680 }, { "epoch": 1195.328947368421, "grad_norm": 1.3124803304672241, "learning_rate": 0.0001, "loss": 0.0155, "step": 181690 }, { "epoch": 1195.3947368421052, "grad_norm": 1.6029092073440552, "learning_rate": 0.0001, "loss": 0.0096, "step": 181700 }, { "epoch": 1195.4605263157894, "grad_norm": 1.3494555950164795, "learning_rate": 0.0001, "loss": 0.0195, "step": 181710 }, { "epoch": 1195.5263157894738, "grad_norm": 1.4142688512802124, "learning_rate": 0.0001, "loss": 0.0132, "step": 181720 }, { "epoch": 1195.592105263158, "grad_norm": 1.2826107740402222, "learning_rate": 0.0001, "loss": 0.0119, "step": 181730 }, { "epoch": 1195.657894736842, "grad_norm": 1.9591542482376099, "learning_rate": 0.0001, "loss": 0.0103, "step": 181740 }, { "epoch": 1195.7236842105262, "grad_norm": 1.7190109491348267, "learning_rate": 0.0001, "loss": 0.0097, "step": 181750 }, { "epoch": 1195.7894736842106, "grad_norm": 1.1284778118133545, "learning_rate": 0.0001, "loss": 0.0081, "step": 181760 }, { "epoch": 1195.8552631578948, "grad_norm": 0.8567036986351013, "learning_rate": 0.0001, "loss": 0.0123, "step": 181770 }, { "epoch": 1195.921052631579, "grad_norm": 1.2436800003051758, "learning_rate": 0.0001, "loss": 0.0103, "step": 181780 }, { "epoch": 1195.9868421052631, "grad_norm": 1.1578855514526367, "learning_rate": 0.0001, "loss": 0.0101, "step": 181790 }, { "epoch": 1196.0526315789473, "grad_norm": 1.706596851348877, "learning_rate": 0.0001, "loss": 0.0089, "step": 181800 }, { "epoch": 1196.1184210526317, "grad_norm": 1.6739020347595215, "learning_rate": 0.0001, "loss": 0.0122, "step": 181810 }, { "epoch": 1196.1842105263158, "grad_norm": 1.4730595350265503, "learning_rate": 0.0001, "loss": 0.0141, "step": 181820 }, { "epoch": 1196.25, "grad_norm": 1.1565604209899902, "learning_rate": 0.0001, "loss": 0.0098, "step": 181830 }, { "epoch": 1196.3157894736842, "grad_norm": 1.4470055103302002, "learning_rate": 0.0001, "loss": 0.0126, "step": 181840 }, { "epoch": 1196.3815789473683, "grad_norm": 1.6547256708145142, "learning_rate": 0.0001, "loss": 0.0105, "step": 181850 }, { "epoch": 1196.4473684210527, "grad_norm": 1.3092511892318726, "learning_rate": 0.0001, "loss": 0.0156, "step": 181860 }, { "epoch": 1196.5131578947369, "grad_norm": 1.3533838987350464, "learning_rate": 0.0001, "loss": 0.0111, "step": 181870 }, { "epoch": 1196.578947368421, "grad_norm": 1.115848183631897, "learning_rate": 0.0001, "loss": 0.017, "step": 181880 }, { "epoch": 1196.6447368421052, "grad_norm": 1.7072186470031738, "learning_rate": 0.0001, "loss": 0.0127, "step": 181890 }, { "epoch": 1196.7105263157894, "grad_norm": 1.6703840494155884, "learning_rate": 0.0001, "loss": 0.0123, "step": 181900 }, { "epoch": 1196.7763157894738, "grad_norm": 1.7547203302383423, "learning_rate": 0.0001, "loss": 0.0109, "step": 181910 }, { "epoch": 1196.842105263158, "grad_norm": 1.5768003463745117, "learning_rate": 0.0001, "loss": 0.0122, "step": 181920 }, { "epoch": 1196.907894736842, "grad_norm": 1.809128999710083, "learning_rate": 0.0001, "loss": 0.0105, "step": 181930 }, { "epoch": 1196.9736842105262, "grad_norm": 1.6119065284729004, "learning_rate": 0.0001, "loss": 0.012, "step": 181940 }, { "epoch": 1197.0394736842106, "grad_norm": 1.6073716878890991, "learning_rate": 0.0001, "loss": 0.0104, "step": 181950 }, { "epoch": 1197.1052631578948, "grad_norm": 1.3223860263824463, "learning_rate": 0.0001, "loss": 0.0097, "step": 181960 }, { "epoch": 1197.171052631579, "grad_norm": 1.3444913625717163, "learning_rate": 0.0001, "loss": 0.0116, "step": 181970 }, { "epoch": 1197.2368421052631, "grad_norm": 1.1849110126495361, "learning_rate": 0.0001, "loss": 0.0105, "step": 181980 }, { "epoch": 1197.3026315789473, "grad_norm": 1.478541612625122, "learning_rate": 0.0001, "loss": 0.0139, "step": 181990 }, { "epoch": 1197.3684210526317, "grad_norm": 1.6450375318527222, "learning_rate": 0.0001, "loss": 0.0102, "step": 182000 }, { "epoch": 1197.4342105263158, "grad_norm": 1.6895653009414673, "learning_rate": 0.0001, "loss": 0.0098, "step": 182010 }, { "epoch": 1197.5, "grad_norm": 1.4378571510314941, "learning_rate": 0.0001, "loss": 0.0102, "step": 182020 }, { "epoch": 1197.5657894736842, "grad_norm": 1.4345893859863281, "learning_rate": 0.0001, "loss": 0.014, "step": 182030 }, { "epoch": 1197.6315789473683, "grad_norm": 1.3673508167266846, "learning_rate": 0.0001, "loss": 0.0125, "step": 182040 }, { "epoch": 1197.6973684210527, "grad_norm": 1.7420519590377808, "learning_rate": 0.0001, "loss": 0.0131, "step": 182050 }, { "epoch": 1197.7631578947369, "grad_norm": 1.3952010869979858, "learning_rate": 0.0001, "loss": 0.0101, "step": 182060 }, { "epoch": 1197.828947368421, "grad_norm": 1.4125508069992065, "learning_rate": 0.0001, "loss": 0.0132, "step": 182070 }, { "epoch": 1197.8947368421052, "grad_norm": 1.2643882036209106, "learning_rate": 0.0001, "loss": 0.0147, "step": 182080 }, { "epoch": 1197.9605263157894, "grad_norm": 1.3288276195526123, "learning_rate": 0.0001, "loss": 0.0167, "step": 182090 }, { "epoch": 1198.0263157894738, "grad_norm": 0.9888513088226318, "learning_rate": 0.0001, "loss": 0.0099, "step": 182100 }, { "epoch": 1198.092105263158, "grad_norm": 0.956295907497406, "learning_rate": 0.0001, "loss": 0.0123, "step": 182110 }, { "epoch": 1198.157894736842, "grad_norm": 1.5867425203323364, "learning_rate": 0.0001, "loss": 0.0145, "step": 182120 }, { "epoch": 1198.2236842105262, "grad_norm": 1.6269810199737549, "learning_rate": 0.0001, "loss": 0.0117, "step": 182130 }, { "epoch": 1198.2894736842106, "grad_norm": 1.3329930305480957, "learning_rate": 0.0001, "loss": 0.0144, "step": 182140 }, { "epoch": 1198.3552631578948, "grad_norm": 1.326216697692871, "learning_rate": 0.0001, "loss": 0.0154, "step": 182150 }, { "epoch": 1198.421052631579, "grad_norm": 1.2934436798095703, "learning_rate": 0.0001, "loss": 0.0113, "step": 182160 }, { "epoch": 1198.4868421052631, "grad_norm": 1.3218837976455688, "learning_rate": 0.0001, "loss": 0.0147, "step": 182170 }, { "epoch": 1198.5526315789473, "grad_norm": 1.6582801342010498, "learning_rate": 0.0001, "loss": 0.0107, "step": 182180 }, { "epoch": 1198.6184210526317, "grad_norm": 1.7172439098358154, "learning_rate": 0.0001, "loss": 0.0088, "step": 182190 }, { "epoch": 1198.6842105263158, "grad_norm": 1.6642649173736572, "learning_rate": 0.0001, "loss": 0.0161, "step": 182200 }, { "epoch": 1198.75, "grad_norm": 1.383530855178833, "learning_rate": 0.0001, "loss": 0.0107, "step": 182210 }, { "epoch": 1198.8157894736842, "grad_norm": 1.3946146965026855, "learning_rate": 0.0001, "loss": 0.0095, "step": 182220 }, { "epoch": 1198.8815789473683, "grad_norm": 1.4221469163894653, "learning_rate": 0.0001, "loss": 0.0116, "step": 182230 }, { "epoch": 1198.9473684210527, "grad_norm": 1.8128501176834106, "learning_rate": 0.0001, "loss": 0.0098, "step": 182240 }, { "epoch": 1199.0131578947369, "grad_norm": 1.5327993631362915, "learning_rate": 0.0001, "loss": 0.0084, "step": 182250 }, { "epoch": 1199.078947368421, "grad_norm": 1.5891945362091064, "learning_rate": 0.0001, "loss": 0.0138, "step": 182260 }, { "epoch": 1199.1447368421052, "grad_norm": 1.3180402517318726, "learning_rate": 0.0001, "loss": 0.014, "step": 182270 }, { "epoch": 1199.2105263157894, "grad_norm": 1.8356209993362427, "learning_rate": 0.0001, "loss": 0.0095, "step": 182280 }, { "epoch": 1199.2763157894738, "grad_norm": 1.3629604578018188, "learning_rate": 0.0001, "loss": 0.0097, "step": 182290 }, { "epoch": 1199.342105263158, "grad_norm": 1.3929800987243652, "learning_rate": 0.0001, "loss": 0.0138, "step": 182300 }, { "epoch": 1199.407894736842, "grad_norm": 1.1649595499038696, "learning_rate": 0.0001, "loss": 0.0101, "step": 182310 }, { "epoch": 1199.4736842105262, "grad_norm": 1.4532092809677124, "learning_rate": 0.0001, "loss": 0.0117, "step": 182320 }, { "epoch": 1199.5394736842106, "grad_norm": 1.396209478378296, "learning_rate": 0.0001, "loss": 0.0115, "step": 182330 }, { "epoch": 1199.6052631578948, "grad_norm": 1.0377691984176636, "learning_rate": 0.0001, "loss": 0.0118, "step": 182340 }, { "epoch": 1199.671052631579, "grad_norm": 0.8741316795349121, "learning_rate": 0.0001, "loss": 0.0096, "step": 182350 }, { "epoch": 1199.7368421052631, "grad_norm": 1.067919373512268, "learning_rate": 0.0001, "loss": 0.0129, "step": 182360 }, { "epoch": 1199.8026315789473, "grad_norm": 1.272141695022583, "learning_rate": 0.0001, "loss": 0.0162, "step": 182370 }, { "epoch": 1199.8684210526317, "grad_norm": 1.1466541290283203, "learning_rate": 0.0001, "loss": 0.0097, "step": 182380 }, { "epoch": 1199.9342105263158, "grad_norm": 1.6735228300094604, "learning_rate": 0.0001, "loss": 0.0134, "step": 182390 }, { "epoch": 1200.0, "grad_norm": 2.0447890758514404, "learning_rate": 0.0001, "loss": 0.0109, "step": 182400 }, { "epoch": 1200.0657894736842, "grad_norm": 1.582513689994812, "learning_rate": 0.0001, "loss": 0.0114, "step": 182410 }, { "epoch": 1200.1315789473683, "grad_norm": 1.7559014558792114, "learning_rate": 0.0001, "loss": 0.0104, "step": 182420 }, { "epoch": 1200.1973684210527, "grad_norm": 1.284210205078125, "learning_rate": 0.0001, "loss": 0.0109, "step": 182430 }, { "epoch": 1200.2631578947369, "grad_norm": 1.1728720664978027, "learning_rate": 0.0001, "loss": 0.0171, "step": 182440 }, { "epoch": 1200.328947368421, "grad_norm": 1.0428951978683472, "learning_rate": 0.0001, "loss": 0.0092, "step": 182450 }, { "epoch": 1200.3947368421052, "grad_norm": 1.3352020978927612, "learning_rate": 0.0001, "loss": 0.0111, "step": 182460 }, { "epoch": 1200.4605263157894, "grad_norm": 1.3914052248001099, "learning_rate": 0.0001, "loss": 0.0112, "step": 182470 }, { "epoch": 1200.5263157894738, "grad_norm": 1.4829344749450684, "learning_rate": 0.0001, "loss": 0.0108, "step": 182480 }, { "epoch": 1200.592105263158, "grad_norm": 1.2601330280303955, "learning_rate": 0.0001, "loss": 0.0127, "step": 182490 }, { "epoch": 1200.657894736842, "grad_norm": 1.397079348564148, "learning_rate": 0.0001, "loss": 0.0125, "step": 182500 }, { "epoch": 1200.7236842105262, "grad_norm": 1.270208716392517, "learning_rate": 0.0001, "loss": 0.0122, "step": 182510 }, { "epoch": 1200.7894736842106, "grad_norm": 1.7501616477966309, "learning_rate": 0.0001, "loss": 0.0116, "step": 182520 }, { "epoch": 1200.8552631578948, "grad_norm": 2.1729190349578857, "learning_rate": 0.0001, "loss": 0.0097, "step": 182530 }, { "epoch": 1200.921052631579, "grad_norm": 1.664726734161377, "learning_rate": 0.0001, "loss": 0.0122, "step": 182540 }, { "epoch": 1200.9868421052631, "grad_norm": 1.7246286869049072, "learning_rate": 0.0001, "loss": 0.015, "step": 182550 }, { "epoch": 1201.0526315789473, "grad_norm": 1.659400224685669, "learning_rate": 0.0001, "loss": 0.0117, "step": 182560 }, { "epoch": 1201.1184210526317, "grad_norm": 1.52376389503479, "learning_rate": 0.0001, "loss": 0.0113, "step": 182570 }, { "epoch": 1201.1842105263158, "grad_norm": 1.1943650245666504, "learning_rate": 0.0001, "loss": 0.0108, "step": 182580 }, { "epoch": 1201.25, "grad_norm": 1.3578540086746216, "learning_rate": 0.0001, "loss": 0.0126, "step": 182590 }, { "epoch": 1201.3157894736842, "grad_norm": 1.0658079385757446, "learning_rate": 0.0001, "loss": 0.0098, "step": 182600 }, { "epoch": 1201.3815789473683, "grad_norm": 2.027252674102783, "learning_rate": 0.0001, "loss": 0.0124, "step": 182610 }, { "epoch": 1201.4473684210527, "grad_norm": 1.9437909126281738, "learning_rate": 0.0001, "loss": 0.0107, "step": 182620 }, { "epoch": 1201.5131578947369, "grad_norm": 1.9896882772445679, "learning_rate": 0.0001, "loss": 0.0099, "step": 182630 }, { "epoch": 1201.578947368421, "grad_norm": 1.2360001802444458, "learning_rate": 0.0001, "loss": 0.0135, "step": 182640 }, { "epoch": 1201.6447368421052, "grad_norm": 1.2421250343322754, "learning_rate": 0.0001, "loss": 0.0143, "step": 182650 }, { "epoch": 1201.7105263157894, "grad_norm": 1.3906375169754028, "learning_rate": 0.0001, "loss": 0.0152, "step": 182660 }, { "epoch": 1201.7763157894738, "grad_norm": 0.9836069345474243, "learning_rate": 0.0001, "loss": 0.0105, "step": 182670 }, { "epoch": 1201.842105263158, "grad_norm": 1.5193989276885986, "learning_rate": 0.0001, "loss": 0.0093, "step": 182680 }, { "epoch": 1201.907894736842, "grad_norm": 1.2498319149017334, "learning_rate": 0.0001, "loss": 0.0128, "step": 182690 }, { "epoch": 1201.9736842105262, "grad_norm": 1.380154013633728, "learning_rate": 0.0001, "loss": 0.0113, "step": 182700 }, { "epoch": 1202.0394736842106, "grad_norm": 1.1284549236297607, "learning_rate": 0.0001, "loss": 0.0117, "step": 182710 }, { "epoch": 1202.1052631578948, "grad_norm": 1.0602751970291138, "learning_rate": 0.0001, "loss": 0.0114, "step": 182720 }, { "epoch": 1202.171052631579, "grad_norm": 1.5996694564819336, "learning_rate": 0.0001, "loss": 0.0138, "step": 182730 }, { "epoch": 1202.2368421052631, "grad_norm": 1.4893628358840942, "learning_rate": 0.0001, "loss": 0.0174, "step": 182740 }, { "epoch": 1202.3026315789473, "grad_norm": 1.1348620653152466, "learning_rate": 0.0001, "loss": 0.0155, "step": 182750 }, { "epoch": 1202.3684210526317, "grad_norm": 1.4051015377044678, "learning_rate": 0.0001, "loss": 0.0103, "step": 182760 }, { "epoch": 1202.4342105263158, "grad_norm": 1.546409249305725, "learning_rate": 0.0001, "loss": 0.0095, "step": 182770 }, { "epoch": 1202.5, "grad_norm": 1.636676549911499, "learning_rate": 0.0001, "loss": 0.0127, "step": 182780 }, { "epoch": 1202.5657894736842, "grad_norm": 1.3175495862960815, "learning_rate": 0.0001, "loss": 0.0122, "step": 182790 }, { "epoch": 1202.6315789473683, "grad_norm": 1.453131914138794, "learning_rate": 0.0001, "loss": 0.0113, "step": 182800 }, { "epoch": 1202.6973684210527, "grad_norm": 1.5770297050476074, "learning_rate": 0.0001, "loss": 0.0119, "step": 182810 }, { "epoch": 1202.7631578947369, "grad_norm": 1.378658413887024, "learning_rate": 0.0001, "loss": 0.0119, "step": 182820 }, { "epoch": 1202.828947368421, "grad_norm": 1.799135446548462, "learning_rate": 0.0001, "loss": 0.0102, "step": 182830 }, { "epoch": 1202.8947368421052, "grad_norm": 1.572259545326233, "learning_rate": 0.0001, "loss": 0.0104, "step": 182840 }, { "epoch": 1202.9605263157894, "grad_norm": 1.2894010543823242, "learning_rate": 0.0001, "loss": 0.0091, "step": 182850 }, { "epoch": 1203.0263157894738, "grad_norm": 1.3528878688812256, "learning_rate": 0.0001, "loss": 0.0103, "step": 182860 }, { "epoch": 1203.092105263158, "grad_norm": 1.7481560707092285, "learning_rate": 0.0001, "loss": 0.0122, "step": 182870 }, { "epoch": 1203.157894736842, "grad_norm": 1.1790616512298584, "learning_rate": 0.0001, "loss": 0.0126, "step": 182880 }, { "epoch": 1203.2236842105262, "grad_norm": 1.2407673597335815, "learning_rate": 0.0001, "loss": 0.0102, "step": 182890 }, { "epoch": 1203.2894736842106, "grad_norm": 1.508331060409546, "learning_rate": 0.0001, "loss": 0.0115, "step": 182900 }, { "epoch": 1203.3552631578948, "grad_norm": 1.3073382377624512, "learning_rate": 0.0001, "loss": 0.0131, "step": 182910 }, { "epoch": 1203.421052631579, "grad_norm": 1.1648343801498413, "learning_rate": 0.0001, "loss": 0.0087, "step": 182920 }, { "epoch": 1203.4868421052631, "grad_norm": 1.4526903629302979, "learning_rate": 0.0001, "loss": 0.0123, "step": 182930 }, { "epoch": 1203.5526315789473, "grad_norm": 1.6696882247924805, "learning_rate": 0.0001, "loss": 0.0099, "step": 182940 }, { "epoch": 1203.6184210526317, "grad_norm": 1.3239126205444336, "learning_rate": 0.0001, "loss": 0.0103, "step": 182950 }, { "epoch": 1203.6842105263158, "grad_norm": 1.3381779193878174, "learning_rate": 0.0001, "loss": 0.0139, "step": 182960 }, { "epoch": 1203.75, "grad_norm": 1.4185609817504883, "learning_rate": 0.0001, "loss": 0.0112, "step": 182970 }, { "epoch": 1203.8157894736842, "grad_norm": 1.5863778591156006, "learning_rate": 0.0001, "loss": 0.0126, "step": 182980 }, { "epoch": 1203.8815789473683, "grad_norm": 0.9616867303848267, "learning_rate": 0.0001, "loss": 0.0132, "step": 182990 }, { "epoch": 1203.9473684210527, "grad_norm": 1.3382362127304077, "learning_rate": 0.0001, "loss": 0.0145, "step": 183000 }, { "epoch": 1204.0131578947369, "grad_norm": 1.1840876340866089, "learning_rate": 0.0001, "loss": 0.011, "step": 183010 }, { "epoch": 1204.078947368421, "grad_norm": 1.7555612325668335, "learning_rate": 0.0001, "loss": 0.0092, "step": 183020 }, { "epoch": 1204.1447368421052, "grad_norm": 1.7114800214767456, "learning_rate": 0.0001, "loss": 0.0098, "step": 183030 }, { "epoch": 1204.2105263157894, "grad_norm": 1.4167888164520264, "learning_rate": 0.0001, "loss": 0.0158, "step": 183040 }, { "epoch": 1204.2763157894738, "grad_norm": 1.378233551979065, "learning_rate": 0.0001, "loss": 0.0104, "step": 183050 }, { "epoch": 1204.342105263158, "grad_norm": 1.8164076805114746, "learning_rate": 0.0001, "loss": 0.0111, "step": 183060 }, { "epoch": 1204.407894736842, "grad_norm": 1.3960098028182983, "learning_rate": 0.0001, "loss": 0.0095, "step": 183070 }, { "epoch": 1204.4736842105262, "grad_norm": 1.1789876222610474, "learning_rate": 0.0001, "loss": 0.0108, "step": 183080 }, { "epoch": 1204.5394736842106, "grad_norm": 1.4749737977981567, "learning_rate": 0.0001, "loss": 0.0165, "step": 183090 }, { "epoch": 1204.6052631578948, "grad_norm": 1.3639384508132935, "learning_rate": 0.0001, "loss": 0.0108, "step": 183100 }, { "epoch": 1204.671052631579, "grad_norm": 1.7576892375946045, "learning_rate": 0.0001, "loss": 0.0123, "step": 183110 }, { "epoch": 1204.7368421052631, "grad_norm": 1.1395140886306763, "learning_rate": 0.0001, "loss": 0.0145, "step": 183120 }, { "epoch": 1204.8026315789473, "grad_norm": 1.6290117502212524, "learning_rate": 0.0001, "loss": 0.0127, "step": 183130 }, { "epoch": 1204.8684210526317, "grad_norm": 1.7599492073059082, "learning_rate": 0.0001, "loss": 0.0122, "step": 183140 }, { "epoch": 1204.9342105263158, "grad_norm": 1.596353530883789, "learning_rate": 0.0001, "loss": 0.0105, "step": 183150 }, { "epoch": 1205.0, "grad_norm": 1.4949827194213867, "learning_rate": 0.0001, "loss": 0.0151, "step": 183160 }, { "epoch": 1205.0657894736842, "grad_norm": 1.472423791885376, "learning_rate": 0.0001, "loss": 0.0131, "step": 183170 }, { "epoch": 1205.1315789473683, "grad_norm": 1.3993942737579346, "learning_rate": 0.0001, "loss": 0.0115, "step": 183180 }, { "epoch": 1205.1973684210527, "grad_norm": 1.5019534826278687, "learning_rate": 0.0001, "loss": 0.0103, "step": 183190 }, { "epoch": 1205.2631578947369, "grad_norm": 2.046924114227295, "learning_rate": 0.0001, "loss": 0.0124, "step": 183200 }, { "epoch": 1205.328947368421, "grad_norm": 1.397500991821289, "learning_rate": 0.0001, "loss": 0.0136, "step": 183210 }, { "epoch": 1205.3947368421052, "grad_norm": 1.5272618532180786, "learning_rate": 0.0001, "loss": 0.01, "step": 183220 }, { "epoch": 1205.4605263157894, "grad_norm": 1.536831259727478, "learning_rate": 0.0001, "loss": 0.011, "step": 183230 }, { "epoch": 1205.5263157894738, "grad_norm": 1.4028255939483643, "learning_rate": 0.0001, "loss": 0.0098, "step": 183240 }, { "epoch": 1205.592105263158, "grad_norm": 1.7280622720718384, "learning_rate": 0.0001, "loss": 0.0148, "step": 183250 }, { "epoch": 1205.657894736842, "grad_norm": 1.247133493423462, "learning_rate": 0.0001, "loss": 0.0114, "step": 183260 }, { "epoch": 1205.7236842105262, "grad_norm": 1.118105411529541, "learning_rate": 0.0001, "loss": 0.0127, "step": 183270 }, { "epoch": 1205.7894736842106, "grad_norm": 1.3045361042022705, "learning_rate": 0.0001, "loss": 0.0147, "step": 183280 }, { "epoch": 1205.8552631578948, "grad_norm": 1.5359129905700684, "learning_rate": 0.0001, "loss": 0.0114, "step": 183290 }, { "epoch": 1205.921052631579, "grad_norm": 1.6755226850509644, "learning_rate": 0.0001, "loss": 0.0113, "step": 183300 }, { "epoch": 1205.9868421052631, "grad_norm": 1.6592198610305786, "learning_rate": 0.0001, "loss": 0.01, "step": 183310 }, { "epoch": 1206.0526315789473, "grad_norm": 1.6728466749191284, "learning_rate": 0.0001, "loss": 0.0103, "step": 183320 }, { "epoch": 1206.1184210526317, "grad_norm": 1.3012150526046753, "learning_rate": 0.0001, "loss": 0.0085, "step": 183330 }, { "epoch": 1206.1842105263158, "grad_norm": 1.5153006315231323, "learning_rate": 0.0001, "loss": 0.0127, "step": 183340 }, { "epoch": 1206.25, "grad_norm": 1.1960012912750244, "learning_rate": 0.0001, "loss": 0.0097, "step": 183350 }, { "epoch": 1206.3157894736842, "grad_norm": 1.2342413663864136, "learning_rate": 0.0001, "loss": 0.0127, "step": 183360 }, { "epoch": 1206.3815789473683, "grad_norm": 1.209977149963379, "learning_rate": 0.0001, "loss": 0.011, "step": 183370 }, { "epoch": 1206.4473684210527, "grad_norm": 1.495352029800415, "learning_rate": 0.0001, "loss": 0.0168, "step": 183380 }, { "epoch": 1206.5131578947369, "grad_norm": 1.4862654209136963, "learning_rate": 0.0001, "loss": 0.0133, "step": 183390 }, { "epoch": 1206.578947368421, "grad_norm": 1.6409178972244263, "learning_rate": 0.0001, "loss": 0.0169, "step": 183400 }, { "epoch": 1206.6447368421052, "grad_norm": 1.3941552639007568, "learning_rate": 0.0001, "loss": 0.011, "step": 183410 }, { "epoch": 1206.7105263157894, "grad_norm": 1.7517387866973877, "learning_rate": 0.0001, "loss": 0.0121, "step": 183420 }, { "epoch": 1206.7763157894738, "grad_norm": 2.0545926094055176, "learning_rate": 0.0001, "loss": 0.0118, "step": 183430 }, { "epoch": 1206.842105263158, "grad_norm": 2.1559395790100098, "learning_rate": 0.0001, "loss": 0.011, "step": 183440 }, { "epoch": 1206.907894736842, "grad_norm": 2.3358652591705322, "learning_rate": 0.0001, "loss": 0.0116, "step": 183450 }, { "epoch": 1206.9736842105262, "grad_norm": 1.5828877687454224, "learning_rate": 0.0001, "loss": 0.0113, "step": 183460 }, { "epoch": 1207.0394736842106, "grad_norm": 1.5186651945114136, "learning_rate": 0.0001, "loss": 0.0116, "step": 183470 }, { "epoch": 1207.1052631578948, "grad_norm": 1.5842972993850708, "learning_rate": 0.0001, "loss": 0.0116, "step": 183480 }, { "epoch": 1207.171052631579, "grad_norm": 1.2923448085784912, "learning_rate": 0.0001, "loss": 0.0155, "step": 183490 }, { "epoch": 1207.2368421052631, "grad_norm": 1.5698707103729248, "learning_rate": 0.0001, "loss": 0.0108, "step": 183500 }, { "epoch": 1207.3026315789473, "grad_norm": 1.352089762687683, "learning_rate": 0.0001, "loss": 0.0148, "step": 183510 }, { "epoch": 1207.3684210526317, "grad_norm": 1.3709075450897217, "learning_rate": 0.0001, "loss": 0.008, "step": 183520 }, { "epoch": 1207.4342105263158, "grad_norm": 1.567407250404358, "learning_rate": 0.0001, "loss": 0.0108, "step": 183530 }, { "epoch": 1207.5, "grad_norm": 1.6472855806350708, "learning_rate": 0.0001, "loss": 0.0105, "step": 183540 }, { "epoch": 1207.5657894736842, "grad_norm": 1.6631754636764526, "learning_rate": 0.0001, "loss": 0.0123, "step": 183550 }, { "epoch": 1207.6315789473683, "grad_norm": 1.5551279783248901, "learning_rate": 0.0001, "loss": 0.0135, "step": 183560 }, { "epoch": 1207.6973684210527, "grad_norm": 1.7225749492645264, "learning_rate": 0.0001, "loss": 0.0102, "step": 183570 }, { "epoch": 1207.7631578947369, "grad_norm": 1.1735285520553589, "learning_rate": 0.0001, "loss": 0.013, "step": 183580 }, { "epoch": 1207.828947368421, "grad_norm": 1.524477481842041, "learning_rate": 0.0001, "loss": 0.0111, "step": 183590 }, { "epoch": 1207.8947368421052, "grad_norm": 1.3100042343139648, "learning_rate": 0.0001, "loss": 0.013, "step": 183600 }, { "epoch": 1207.9605263157894, "grad_norm": 1.4013437032699585, "learning_rate": 0.0001, "loss": 0.01, "step": 183610 }, { "epoch": 1208.0263157894738, "grad_norm": 1.2120552062988281, "learning_rate": 0.0001, "loss": 0.0101, "step": 183620 }, { "epoch": 1208.092105263158, "grad_norm": 1.560067057609558, "learning_rate": 0.0001, "loss": 0.012, "step": 183630 }, { "epoch": 1208.157894736842, "grad_norm": 1.7993714809417725, "learning_rate": 0.0001, "loss": 0.0136, "step": 183640 }, { "epoch": 1208.2236842105262, "grad_norm": 1.6683564186096191, "learning_rate": 0.0001, "loss": 0.0116, "step": 183650 }, { "epoch": 1208.2894736842106, "grad_norm": 2.013200044631958, "learning_rate": 0.0001, "loss": 0.0088, "step": 183660 }, { "epoch": 1208.3552631578948, "grad_norm": 1.7340012788772583, "learning_rate": 0.0001, "loss": 0.0113, "step": 183670 }, { "epoch": 1208.421052631579, "grad_norm": 1.6998413801193237, "learning_rate": 0.0001, "loss": 0.0129, "step": 183680 }, { "epoch": 1208.4868421052631, "grad_norm": 1.692186713218689, "learning_rate": 0.0001, "loss": 0.0095, "step": 183690 }, { "epoch": 1208.5526315789473, "grad_norm": 1.1842854022979736, "learning_rate": 0.0001, "loss": 0.0132, "step": 183700 }, { "epoch": 1208.6184210526317, "grad_norm": 1.6265720129013062, "learning_rate": 0.0001, "loss": 0.009, "step": 183710 }, { "epoch": 1208.6842105263158, "grad_norm": 1.439220905303955, "learning_rate": 0.0001, "loss": 0.014, "step": 183720 }, { "epoch": 1208.75, "grad_norm": 1.5344557762145996, "learning_rate": 0.0001, "loss": 0.0148, "step": 183730 }, { "epoch": 1208.8157894736842, "grad_norm": 1.249257206916809, "learning_rate": 0.0001, "loss": 0.0104, "step": 183740 }, { "epoch": 1208.8815789473683, "grad_norm": 1.3123711347579956, "learning_rate": 0.0001, "loss": 0.0137, "step": 183750 }, { "epoch": 1208.9473684210527, "grad_norm": 1.1543691158294678, "learning_rate": 0.0001, "loss": 0.0098, "step": 183760 }, { "epoch": 1209.0131578947369, "grad_norm": 1.5706441402435303, "learning_rate": 0.0001, "loss": 0.0113, "step": 183770 }, { "epoch": 1209.078947368421, "grad_norm": 1.3986729383468628, "learning_rate": 0.0001, "loss": 0.0129, "step": 183780 }, { "epoch": 1209.1447368421052, "grad_norm": 1.1260381937026978, "learning_rate": 0.0001, "loss": 0.0103, "step": 183790 }, { "epoch": 1209.2105263157894, "grad_norm": 1.5819392204284668, "learning_rate": 0.0001, "loss": 0.0132, "step": 183800 }, { "epoch": 1209.2763157894738, "grad_norm": 1.3482987880706787, "learning_rate": 0.0001, "loss": 0.011, "step": 183810 }, { "epoch": 1209.342105263158, "grad_norm": 1.0182491540908813, "learning_rate": 0.0001, "loss": 0.0124, "step": 183820 }, { "epoch": 1209.407894736842, "grad_norm": 1.5091811418533325, "learning_rate": 0.0001, "loss": 0.0101, "step": 183830 }, { "epoch": 1209.4736842105262, "grad_norm": 1.5546860694885254, "learning_rate": 0.0001, "loss": 0.01, "step": 183840 }, { "epoch": 1209.5394736842106, "grad_norm": 1.2053159475326538, "learning_rate": 0.0001, "loss": 0.0132, "step": 183850 }, { "epoch": 1209.6052631578948, "grad_norm": 1.592341661453247, "learning_rate": 0.0001, "loss": 0.0126, "step": 183860 }, { "epoch": 1209.671052631579, "grad_norm": 1.7074567079544067, "learning_rate": 0.0001, "loss": 0.0108, "step": 183870 }, { "epoch": 1209.7368421052631, "grad_norm": 1.8592902421951294, "learning_rate": 0.0001, "loss": 0.0123, "step": 183880 }, { "epoch": 1209.8026315789473, "grad_norm": 1.748378872871399, "learning_rate": 0.0001, "loss": 0.0081, "step": 183890 }, { "epoch": 1209.8684210526317, "grad_norm": 0.8025380969047546, "learning_rate": 0.0001, "loss": 0.0144, "step": 183900 }, { "epoch": 1209.9342105263158, "grad_norm": 1.8943196535110474, "learning_rate": 0.0001, "loss": 0.0168, "step": 183910 }, { "epoch": 1210.0, "grad_norm": 1.5526551008224487, "learning_rate": 0.0001, "loss": 0.0113, "step": 183920 }, { "epoch": 1210.0657894736842, "grad_norm": 1.4431174993515015, "learning_rate": 0.0001, "loss": 0.0118, "step": 183930 }, { "epoch": 1210.1315789473683, "grad_norm": 1.688730239868164, "learning_rate": 0.0001, "loss": 0.0136, "step": 183940 }, { "epoch": 1210.1973684210527, "grad_norm": 2.2332825660705566, "learning_rate": 0.0001, "loss": 0.0112, "step": 183950 }, { "epoch": 1210.2631578947369, "grad_norm": 1.7185882329940796, "learning_rate": 0.0001, "loss": 0.0129, "step": 183960 }, { "epoch": 1210.328947368421, "grad_norm": 1.763987421989441, "learning_rate": 0.0001, "loss": 0.008, "step": 183970 }, { "epoch": 1210.3947368421052, "grad_norm": 1.5026801824569702, "learning_rate": 0.0001, "loss": 0.0088, "step": 183980 }, { "epoch": 1210.4605263157894, "grad_norm": 1.1030330657958984, "learning_rate": 0.0001, "loss": 0.0104, "step": 183990 }, { "epoch": 1210.5263157894738, "grad_norm": 1.5933244228363037, "learning_rate": 0.0001, "loss": 0.0134, "step": 184000 }, { "epoch": 1210.592105263158, "grad_norm": 1.4223265647888184, "learning_rate": 0.0001, "loss": 0.0142, "step": 184010 }, { "epoch": 1210.657894736842, "grad_norm": 1.6546822786331177, "learning_rate": 0.0001, "loss": 0.0101, "step": 184020 }, { "epoch": 1210.7236842105262, "grad_norm": 1.2448925971984863, "learning_rate": 0.0001, "loss": 0.0115, "step": 184030 }, { "epoch": 1210.7894736842106, "grad_norm": 1.0185606479644775, "learning_rate": 0.0001, "loss": 0.0142, "step": 184040 }, { "epoch": 1210.8552631578948, "grad_norm": 1.5173640251159668, "learning_rate": 0.0001, "loss": 0.011, "step": 184050 }, { "epoch": 1210.921052631579, "grad_norm": 1.2412794828414917, "learning_rate": 0.0001, "loss": 0.0137, "step": 184060 }, { "epoch": 1210.9868421052631, "grad_norm": 1.3907177448272705, "learning_rate": 0.0001, "loss": 0.0122, "step": 184070 }, { "epoch": 1211.0526315789473, "grad_norm": 1.3219045400619507, "learning_rate": 0.0001, "loss": 0.0131, "step": 184080 }, { "epoch": 1211.1184210526317, "grad_norm": 1.5139260292053223, "learning_rate": 0.0001, "loss": 0.0105, "step": 184090 }, { "epoch": 1211.1842105263158, "grad_norm": 1.1564385890960693, "learning_rate": 0.0001, "loss": 0.0102, "step": 184100 }, { "epoch": 1211.25, "grad_norm": 1.3487344980239868, "learning_rate": 0.0001, "loss": 0.0113, "step": 184110 }, { "epoch": 1211.3157894736842, "grad_norm": 1.3852537870407104, "learning_rate": 0.0001, "loss": 0.0163, "step": 184120 }, { "epoch": 1211.3815789473683, "grad_norm": 1.4242624044418335, "learning_rate": 0.0001, "loss": 0.013, "step": 184130 }, { "epoch": 1211.4473684210527, "grad_norm": 1.1077730655670166, "learning_rate": 0.0001, "loss": 0.0115, "step": 184140 }, { "epoch": 1211.5131578947369, "grad_norm": 1.626789927482605, "learning_rate": 0.0001, "loss": 0.0131, "step": 184150 }, { "epoch": 1211.578947368421, "grad_norm": 1.7170615196228027, "learning_rate": 0.0001, "loss": 0.0102, "step": 184160 }, { "epoch": 1211.6447368421052, "grad_norm": 1.6954624652862549, "learning_rate": 0.0001, "loss": 0.0157, "step": 184170 }, { "epoch": 1211.7105263157894, "grad_norm": 1.0054237842559814, "learning_rate": 0.0001, "loss": 0.0125, "step": 184180 }, { "epoch": 1211.7763157894738, "grad_norm": 1.3658738136291504, "learning_rate": 0.0001, "loss": 0.0086, "step": 184190 }, { "epoch": 1211.842105263158, "grad_norm": 1.9204076528549194, "learning_rate": 0.0001, "loss": 0.0115, "step": 184200 }, { "epoch": 1211.907894736842, "grad_norm": 1.3313957452774048, "learning_rate": 0.0001, "loss": 0.0111, "step": 184210 }, { "epoch": 1211.9736842105262, "grad_norm": 1.1426889896392822, "learning_rate": 0.0001, "loss": 0.0114, "step": 184220 }, { "epoch": 1212.0394736842106, "grad_norm": 1.4107810258865356, "learning_rate": 0.0001, "loss": 0.013, "step": 184230 }, { "epoch": 1212.1052631578948, "grad_norm": 1.3066743612289429, "learning_rate": 0.0001, "loss": 0.0096, "step": 184240 }, { "epoch": 1212.171052631579, "grad_norm": 1.2285373210906982, "learning_rate": 0.0001, "loss": 0.0114, "step": 184250 }, { "epoch": 1212.2368421052631, "grad_norm": 1.0905097723007202, "learning_rate": 0.0001, "loss": 0.0134, "step": 184260 }, { "epoch": 1212.3026315789473, "grad_norm": 1.2892394065856934, "learning_rate": 0.0001, "loss": 0.0113, "step": 184270 }, { "epoch": 1212.3684210526317, "grad_norm": 1.2929456233978271, "learning_rate": 0.0001, "loss": 0.0154, "step": 184280 }, { "epoch": 1212.4342105263158, "grad_norm": 1.3117426633834839, "learning_rate": 0.0001, "loss": 0.0135, "step": 184290 }, { "epoch": 1212.5, "grad_norm": 1.4227375984191895, "learning_rate": 0.0001, "loss": 0.0092, "step": 184300 }, { "epoch": 1212.5657894736842, "grad_norm": 1.4891667366027832, "learning_rate": 0.0001, "loss": 0.0142, "step": 184310 }, { "epoch": 1212.6315789473683, "grad_norm": 1.6523514986038208, "learning_rate": 0.0001, "loss": 0.011, "step": 184320 }, { "epoch": 1212.6973684210527, "grad_norm": 1.3494019508361816, "learning_rate": 0.0001, "loss": 0.0154, "step": 184330 }, { "epoch": 1212.7631578947369, "grad_norm": 1.2214930057525635, "learning_rate": 0.0001, "loss": 0.0094, "step": 184340 }, { "epoch": 1212.828947368421, "grad_norm": 1.3928627967834473, "learning_rate": 0.0001, "loss": 0.0111, "step": 184350 }, { "epoch": 1212.8947368421052, "grad_norm": 1.0390739440917969, "learning_rate": 0.0001, "loss": 0.0102, "step": 184360 }, { "epoch": 1212.9605263157894, "grad_norm": 1.2532490491867065, "learning_rate": 0.0001, "loss": 0.0117, "step": 184370 }, { "epoch": 1213.0263157894738, "grad_norm": 1.0870311260223389, "learning_rate": 0.0001, "loss": 0.0137, "step": 184380 }, { "epoch": 1213.092105263158, "grad_norm": 1.4610052108764648, "learning_rate": 0.0001, "loss": 0.0112, "step": 184390 }, { "epoch": 1213.157894736842, "grad_norm": 2.019118070602417, "learning_rate": 0.0001, "loss": 0.0118, "step": 184400 }, { "epoch": 1213.2236842105262, "grad_norm": 1.9361543655395508, "learning_rate": 0.0001, "loss": 0.0139, "step": 184410 }, { "epoch": 1213.2894736842106, "grad_norm": 1.8678159713745117, "learning_rate": 0.0001, "loss": 0.0133, "step": 184420 }, { "epoch": 1213.3552631578948, "grad_norm": 2.064701557159424, "learning_rate": 0.0001, "loss": 0.0111, "step": 184430 }, { "epoch": 1213.421052631579, "grad_norm": 1.5778220891952515, "learning_rate": 0.0001, "loss": 0.0116, "step": 184440 }, { "epoch": 1213.4868421052631, "grad_norm": 1.3935149908065796, "learning_rate": 0.0001, "loss": 0.0096, "step": 184450 }, { "epoch": 1213.5526315789473, "grad_norm": 1.4208654165267944, "learning_rate": 0.0001, "loss": 0.0144, "step": 184460 }, { "epoch": 1213.6184210526317, "grad_norm": 1.5904535055160522, "learning_rate": 0.0001, "loss": 0.0096, "step": 184470 }, { "epoch": 1213.6842105263158, "grad_norm": 1.5873074531555176, "learning_rate": 0.0001, "loss": 0.0134, "step": 184480 }, { "epoch": 1213.75, "grad_norm": 1.7314131259918213, "learning_rate": 0.0001, "loss": 0.0135, "step": 184490 }, { "epoch": 1213.8157894736842, "grad_norm": 1.5396795272827148, "learning_rate": 0.0001, "loss": 0.0119, "step": 184500 }, { "epoch": 1213.8815789473683, "grad_norm": 1.329467535018921, "learning_rate": 0.0001, "loss": 0.0081, "step": 184510 }, { "epoch": 1213.9473684210527, "grad_norm": 1.6333223581314087, "learning_rate": 0.0001, "loss": 0.0086, "step": 184520 }, { "epoch": 1214.0131578947369, "grad_norm": 1.479758620262146, "learning_rate": 0.0001, "loss": 0.0123, "step": 184530 }, { "epoch": 1214.078947368421, "grad_norm": 1.206411600112915, "learning_rate": 0.0001, "loss": 0.0128, "step": 184540 }, { "epoch": 1214.1447368421052, "grad_norm": 1.420303225517273, "learning_rate": 0.0001, "loss": 0.0132, "step": 184550 }, { "epoch": 1214.2105263157894, "grad_norm": 0.9874785542488098, "learning_rate": 0.0001, "loss": 0.0087, "step": 184560 }, { "epoch": 1214.2763157894738, "grad_norm": 0.9819043874740601, "learning_rate": 0.0001, "loss": 0.0137, "step": 184570 }, { "epoch": 1214.342105263158, "grad_norm": 0.9744272828102112, "learning_rate": 0.0001, "loss": 0.0149, "step": 184580 }, { "epoch": 1214.407894736842, "grad_norm": 1.1718404293060303, "learning_rate": 0.0001, "loss": 0.0117, "step": 184590 }, { "epoch": 1214.4736842105262, "grad_norm": 1.156461238861084, "learning_rate": 0.0001, "loss": 0.0102, "step": 184600 }, { "epoch": 1214.5394736842106, "grad_norm": 1.3270618915557861, "learning_rate": 0.0001, "loss": 0.0134, "step": 184610 }, { "epoch": 1214.6052631578948, "grad_norm": 1.3951714038848877, "learning_rate": 0.0001, "loss": 0.0112, "step": 184620 }, { "epoch": 1214.671052631579, "grad_norm": 1.6431571245193481, "learning_rate": 0.0001, "loss": 0.0131, "step": 184630 }, { "epoch": 1214.7368421052631, "grad_norm": 1.9087117910385132, "learning_rate": 0.0001, "loss": 0.0133, "step": 184640 }, { "epoch": 1214.8026315789473, "grad_norm": 1.917041301727295, "learning_rate": 0.0001, "loss": 0.0094, "step": 184650 }, { "epoch": 1214.8684210526317, "grad_norm": 1.5243728160858154, "learning_rate": 0.0001, "loss": 0.0129, "step": 184660 }, { "epoch": 1214.9342105263158, "grad_norm": 1.213013768196106, "learning_rate": 0.0001, "loss": 0.0124, "step": 184670 }, { "epoch": 1215.0, "grad_norm": 1.5323652029037476, "learning_rate": 0.0001, "loss": 0.0113, "step": 184680 }, { "epoch": 1215.0657894736842, "grad_norm": 1.3278688192367554, "learning_rate": 0.0001, "loss": 0.0106, "step": 184690 }, { "epoch": 1215.1315789473683, "grad_norm": 1.4059085845947266, "learning_rate": 0.0001, "loss": 0.0165, "step": 184700 }, { "epoch": 1215.1973684210527, "grad_norm": 1.7069703340530396, "learning_rate": 0.0001, "loss": 0.0093, "step": 184710 }, { "epoch": 1215.2631578947369, "grad_norm": 1.8088420629501343, "learning_rate": 0.0001, "loss": 0.0106, "step": 184720 }, { "epoch": 1215.328947368421, "grad_norm": 1.0482141971588135, "learning_rate": 0.0001, "loss": 0.0104, "step": 184730 }, { "epoch": 1215.3947368421052, "grad_norm": 1.3755805492401123, "learning_rate": 0.0001, "loss": 0.012, "step": 184740 }, { "epoch": 1215.4605263157894, "grad_norm": 1.024950385093689, "learning_rate": 0.0001, "loss": 0.0124, "step": 184750 }, { "epoch": 1215.5263157894738, "grad_norm": 1.2939929962158203, "learning_rate": 0.0001, "loss": 0.0087, "step": 184760 }, { "epoch": 1215.592105263158, "grad_norm": 1.3546000719070435, "learning_rate": 0.0001, "loss": 0.0127, "step": 184770 }, { "epoch": 1215.657894736842, "grad_norm": 1.1594167947769165, "learning_rate": 0.0001, "loss": 0.0144, "step": 184780 }, { "epoch": 1215.7236842105262, "grad_norm": 1.7729315757751465, "learning_rate": 0.0001, "loss": 0.0138, "step": 184790 }, { "epoch": 1215.7894736842106, "grad_norm": 1.2759640216827393, "learning_rate": 0.0001, "loss": 0.01, "step": 184800 }, { "epoch": 1215.8552631578948, "grad_norm": 1.5985288619995117, "learning_rate": 0.0001, "loss": 0.0124, "step": 184810 }, { "epoch": 1215.921052631579, "grad_norm": 1.34328031539917, "learning_rate": 0.0001, "loss": 0.0147, "step": 184820 }, { "epoch": 1215.9868421052631, "grad_norm": 1.6018837690353394, "learning_rate": 0.0001, "loss": 0.0114, "step": 184830 }, { "epoch": 1216.0526315789473, "grad_norm": 1.7020334005355835, "learning_rate": 0.0001, "loss": 0.0128, "step": 184840 }, { "epoch": 1216.1184210526317, "grad_norm": 1.2418631315231323, "learning_rate": 0.0001, "loss": 0.0123, "step": 184850 }, { "epoch": 1216.1842105263158, "grad_norm": 1.1971856355667114, "learning_rate": 0.0001, "loss": 0.0128, "step": 184860 }, { "epoch": 1216.25, "grad_norm": 1.1491758823394775, "learning_rate": 0.0001, "loss": 0.0157, "step": 184870 }, { "epoch": 1216.3157894736842, "grad_norm": 1.5500383377075195, "learning_rate": 0.0001, "loss": 0.0087, "step": 184880 }, { "epoch": 1216.3815789473683, "grad_norm": 1.3927955627441406, "learning_rate": 0.0001, "loss": 0.0123, "step": 184890 }, { "epoch": 1216.4473684210527, "grad_norm": 1.1050338745117188, "learning_rate": 0.0001, "loss": 0.0109, "step": 184900 }, { "epoch": 1216.5131578947369, "grad_norm": 1.123747706413269, "learning_rate": 0.0001, "loss": 0.014, "step": 184910 }, { "epoch": 1216.578947368421, "grad_norm": 1.410691499710083, "learning_rate": 0.0001, "loss": 0.0114, "step": 184920 }, { "epoch": 1216.6447368421052, "grad_norm": 1.1344605684280396, "learning_rate": 0.0001, "loss": 0.0115, "step": 184930 }, { "epoch": 1216.7105263157894, "grad_norm": 1.3063597679138184, "learning_rate": 0.0001, "loss": 0.0099, "step": 184940 }, { "epoch": 1216.7763157894738, "grad_norm": 1.2193470001220703, "learning_rate": 0.0001, "loss": 0.009, "step": 184950 }, { "epoch": 1216.842105263158, "grad_norm": 1.6197489500045776, "learning_rate": 0.0001, "loss": 0.0089, "step": 184960 }, { "epoch": 1216.907894736842, "grad_norm": 1.4535894393920898, "learning_rate": 0.0001, "loss": 0.0135, "step": 184970 }, { "epoch": 1216.9736842105262, "grad_norm": 1.5054553747177124, "learning_rate": 0.0001, "loss": 0.0127, "step": 184980 }, { "epoch": 1217.0394736842106, "grad_norm": 1.565679907798767, "learning_rate": 0.0001, "loss": 0.0143, "step": 184990 }, { "epoch": 1217.1052631578948, "grad_norm": 1.8641942739486694, "learning_rate": 0.0001, "loss": 0.0153, "step": 185000 }, { "epoch": 1217.171052631579, "grad_norm": 1.2680609226226807, "learning_rate": 0.0001, "loss": 0.0112, "step": 185010 }, { "epoch": 1217.2368421052631, "grad_norm": 1.7093956470489502, "learning_rate": 0.0001, "loss": 0.0087, "step": 185020 }, { "epoch": 1217.3026315789473, "grad_norm": 1.6461366415023804, "learning_rate": 0.0001, "loss": 0.0176, "step": 185030 }, { "epoch": 1217.3684210526317, "grad_norm": 1.180574893951416, "learning_rate": 0.0001, "loss": 0.0134, "step": 185040 }, { "epoch": 1217.4342105263158, "grad_norm": 1.5092123746871948, "learning_rate": 0.0001, "loss": 0.0091, "step": 185050 }, { "epoch": 1217.5, "grad_norm": 1.6742804050445557, "learning_rate": 0.0001, "loss": 0.0108, "step": 185060 }, { "epoch": 1217.5657894736842, "grad_norm": 1.4610114097595215, "learning_rate": 0.0001, "loss": 0.0109, "step": 185070 }, { "epoch": 1217.6315789473683, "grad_norm": 1.4147202968597412, "learning_rate": 0.0001, "loss": 0.0129, "step": 185080 }, { "epoch": 1217.6973684210527, "grad_norm": 1.7437200546264648, "learning_rate": 0.0001, "loss": 0.0126, "step": 185090 }, { "epoch": 1217.7631578947369, "grad_norm": 1.0298084020614624, "learning_rate": 0.0001, "loss": 0.0101, "step": 185100 }, { "epoch": 1217.828947368421, "grad_norm": 0.9712632894515991, "learning_rate": 0.0001, "loss": 0.0133, "step": 185110 }, { "epoch": 1217.8947368421052, "grad_norm": 1.4199920892715454, "learning_rate": 0.0001, "loss": 0.0126, "step": 185120 }, { "epoch": 1217.9605263157894, "grad_norm": 1.292150855064392, "learning_rate": 0.0001, "loss": 0.0107, "step": 185130 }, { "epoch": 1218.0263157894738, "grad_norm": 1.0838444232940674, "learning_rate": 0.0001, "loss": 0.0082, "step": 185140 }, { "epoch": 1218.092105263158, "grad_norm": 1.6976882219314575, "learning_rate": 0.0001, "loss": 0.0131, "step": 185150 }, { "epoch": 1218.157894736842, "grad_norm": 1.7641984224319458, "learning_rate": 0.0001, "loss": 0.0129, "step": 185160 }, { "epoch": 1218.2236842105262, "grad_norm": 1.330698013305664, "learning_rate": 0.0001, "loss": 0.0112, "step": 185170 }, { "epoch": 1218.2894736842106, "grad_norm": 1.457452416419983, "learning_rate": 0.0001, "loss": 0.0105, "step": 185180 }, { "epoch": 1218.3552631578948, "grad_norm": 1.2262333631515503, "learning_rate": 0.0001, "loss": 0.0128, "step": 185190 }, { "epoch": 1218.421052631579, "grad_norm": 1.4454998970031738, "learning_rate": 0.0001, "loss": 0.0123, "step": 185200 }, { "epoch": 1218.4868421052631, "grad_norm": 1.4521052837371826, "learning_rate": 0.0001, "loss": 0.013, "step": 185210 }, { "epoch": 1218.5526315789473, "grad_norm": 1.8480271100997925, "learning_rate": 0.0001, "loss": 0.0127, "step": 185220 }, { "epoch": 1218.6184210526317, "grad_norm": 2.012345314025879, "learning_rate": 0.0001, "loss": 0.0136, "step": 185230 }, { "epoch": 1218.6842105263158, "grad_norm": 1.6842195987701416, "learning_rate": 0.0001, "loss": 0.0126, "step": 185240 }, { "epoch": 1218.75, "grad_norm": 1.8047178983688354, "learning_rate": 0.0001, "loss": 0.0105, "step": 185250 }, { "epoch": 1218.8157894736842, "grad_norm": 1.8870514631271362, "learning_rate": 0.0001, "loss": 0.012, "step": 185260 }, { "epoch": 1218.8815789473683, "grad_norm": 1.5601634979248047, "learning_rate": 0.0001, "loss": 0.0124, "step": 185270 }, { "epoch": 1218.9473684210527, "grad_norm": 1.2651504278182983, "learning_rate": 0.0001, "loss": 0.0138, "step": 185280 }, { "epoch": 1219.0131578947369, "grad_norm": 1.2345330715179443, "learning_rate": 0.0001, "loss": 0.0115, "step": 185290 }, { "epoch": 1219.078947368421, "grad_norm": 1.5301337242126465, "learning_rate": 0.0001, "loss": 0.0084, "step": 185300 }, { "epoch": 1219.1447368421052, "grad_norm": 2.0324270725250244, "learning_rate": 0.0001, "loss": 0.0148, "step": 185310 }, { "epoch": 1219.2105263157894, "grad_norm": 1.208821415901184, "learning_rate": 0.0001, "loss": 0.0119, "step": 185320 }, { "epoch": 1219.2763157894738, "grad_norm": 1.9915457963943481, "learning_rate": 0.0001, "loss": 0.0109, "step": 185330 }, { "epoch": 1219.342105263158, "grad_norm": 1.3698558807373047, "learning_rate": 0.0001, "loss": 0.0104, "step": 185340 }, { "epoch": 1219.407894736842, "grad_norm": 1.386894941329956, "learning_rate": 0.0001, "loss": 0.0136, "step": 185350 }, { "epoch": 1219.4736842105262, "grad_norm": 1.6654905080795288, "learning_rate": 0.0001, "loss": 0.0142, "step": 185360 }, { "epoch": 1219.5394736842106, "grad_norm": 1.6149836778640747, "learning_rate": 0.0001, "loss": 0.0097, "step": 185370 }, { "epoch": 1219.6052631578948, "grad_norm": 1.9179326295852661, "learning_rate": 0.0001, "loss": 0.0123, "step": 185380 }, { "epoch": 1219.671052631579, "grad_norm": 1.365022897720337, "learning_rate": 0.0001, "loss": 0.0158, "step": 185390 }, { "epoch": 1219.7368421052631, "grad_norm": 1.9684892892837524, "learning_rate": 0.0001, "loss": 0.0079, "step": 185400 }, { "epoch": 1219.8026315789473, "grad_norm": 2.071988821029663, "learning_rate": 0.0001, "loss": 0.0113, "step": 185410 }, { "epoch": 1219.8684210526317, "grad_norm": 1.5238938331604004, "learning_rate": 0.0001, "loss": 0.0104, "step": 185420 }, { "epoch": 1219.9342105263158, "grad_norm": 1.3021224737167358, "learning_rate": 0.0001, "loss": 0.0104, "step": 185430 }, { "epoch": 1220.0, "grad_norm": 1.6684153079986572, "learning_rate": 0.0001, "loss": 0.0143, "step": 185440 }, { "epoch": 1220.0657894736842, "grad_norm": 1.519324779510498, "learning_rate": 0.0001, "loss": 0.015, "step": 185450 }, { "epoch": 1220.1315789473683, "grad_norm": 1.171239972114563, "learning_rate": 0.0001, "loss": 0.0115, "step": 185460 }, { "epoch": 1220.1973684210527, "grad_norm": 1.5505027770996094, "learning_rate": 0.0001, "loss": 0.0143, "step": 185470 }, { "epoch": 1220.2631578947369, "grad_norm": 1.4494495391845703, "learning_rate": 0.0001, "loss": 0.0132, "step": 185480 }, { "epoch": 1220.328947368421, "grad_norm": 1.2187467813491821, "learning_rate": 0.0001, "loss": 0.0159, "step": 185490 }, { "epoch": 1220.3947368421052, "grad_norm": 1.5204311609268188, "learning_rate": 0.0001, "loss": 0.0096, "step": 185500 }, { "epoch": 1220.4605263157894, "grad_norm": 1.2457242012023926, "learning_rate": 0.0001, "loss": 0.0116, "step": 185510 }, { "epoch": 1220.5263157894738, "grad_norm": 1.4198485612869263, "learning_rate": 0.0001, "loss": 0.0131, "step": 185520 }, { "epoch": 1220.592105263158, "grad_norm": 1.151708960533142, "learning_rate": 0.0001, "loss": 0.0113, "step": 185530 }, { "epoch": 1220.657894736842, "grad_norm": 1.2889879941940308, "learning_rate": 0.0001, "loss": 0.012, "step": 185540 }, { "epoch": 1220.7236842105262, "grad_norm": 1.0350147485733032, "learning_rate": 0.0001, "loss": 0.0108, "step": 185550 }, { "epoch": 1220.7894736842106, "grad_norm": 1.6225847005844116, "learning_rate": 0.0001, "loss": 0.0095, "step": 185560 }, { "epoch": 1220.8552631578948, "grad_norm": 1.6824581623077393, "learning_rate": 0.0001, "loss": 0.0086, "step": 185570 }, { "epoch": 1220.921052631579, "grad_norm": 1.5492675304412842, "learning_rate": 0.0001, "loss": 0.0102, "step": 185580 }, { "epoch": 1220.9868421052631, "grad_norm": 1.4750715494155884, "learning_rate": 0.0001, "loss": 0.0114, "step": 185590 }, { "epoch": 1221.0526315789473, "grad_norm": 1.5382888317108154, "learning_rate": 0.0001, "loss": 0.0106, "step": 185600 }, { "epoch": 1221.1184210526317, "grad_norm": 1.0776573419570923, "learning_rate": 0.0001, "loss": 0.0133, "step": 185610 }, { "epoch": 1221.1842105263158, "grad_norm": 1.3154047727584839, "learning_rate": 0.0001, "loss": 0.0093, "step": 185620 }, { "epoch": 1221.25, "grad_norm": 1.096662163734436, "learning_rate": 0.0001, "loss": 0.0135, "step": 185630 }, { "epoch": 1221.3157894736842, "grad_norm": 1.3762978315353394, "learning_rate": 0.0001, "loss": 0.0131, "step": 185640 }, { "epoch": 1221.3815789473683, "grad_norm": 1.3324611186981201, "learning_rate": 0.0001, "loss": 0.0119, "step": 185650 }, { "epoch": 1221.4473684210527, "grad_norm": 1.6090370416641235, "learning_rate": 0.0001, "loss": 0.0101, "step": 185660 }, { "epoch": 1221.5131578947369, "grad_norm": 1.3649297952651978, "learning_rate": 0.0001, "loss": 0.0139, "step": 185670 }, { "epoch": 1221.578947368421, "grad_norm": 1.1497465372085571, "learning_rate": 0.0001, "loss": 0.0085, "step": 185680 }, { "epoch": 1221.6447368421052, "grad_norm": 1.508803129196167, "learning_rate": 0.0001, "loss": 0.0103, "step": 185690 }, { "epoch": 1221.7105263157894, "grad_norm": 1.3225653171539307, "learning_rate": 0.0001, "loss": 0.0111, "step": 185700 }, { "epoch": 1221.7763157894738, "grad_norm": 1.0272597074508667, "learning_rate": 0.0001, "loss": 0.0162, "step": 185710 }, { "epoch": 1221.842105263158, "grad_norm": 1.736828327178955, "learning_rate": 0.0001, "loss": 0.0123, "step": 185720 }, { "epoch": 1221.907894736842, "grad_norm": 1.4108797311782837, "learning_rate": 0.0001, "loss": 0.0138, "step": 185730 }, { "epoch": 1221.9736842105262, "grad_norm": 1.5005989074707031, "learning_rate": 0.0001, "loss": 0.0107, "step": 185740 }, { "epoch": 1222.0394736842106, "grad_norm": 1.2629597187042236, "learning_rate": 0.0001, "loss": 0.0095, "step": 185750 }, { "epoch": 1222.1052631578948, "grad_norm": 1.4760695695877075, "learning_rate": 0.0001, "loss": 0.0099, "step": 185760 }, { "epoch": 1222.171052631579, "grad_norm": 1.4859511852264404, "learning_rate": 0.0001, "loss": 0.0093, "step": 185770 }, { "epoch": 1222.2368421052631, "grad_norm": 1.2202013731002808, "learning_rate": 0.0001, "loss": 0.0084, "step": 185780 }, { "epoch": 1222.3026315789473, "grad_norm": 1.28665030002594, "learning_rate": 0.0001, "loss": 0.0161, "step": 185790 }, { "epoch": 1222.3684210526317, "grad_norm": 1.3198258876800537, "learning_rate": 0.0001, "loss": 0.0166, "step": 185800 }, { "epoch": 1222.4342105263158, "grad_norm": 1.5411956310272217, "learning_rate": 0.0001, "loss": 0.0152, "step": 185810 }, { "epoch": 1222.5, "grad_norm": 1.0327428579330444, "learning_rate": 0.0001, "loss": 0.0103, "step": 185820 }, { "epoch": 1222.5657894736842, "grad_norm": 1.1744312047958374, "learning_rate": 0.0001, "loss": 0.0112, "step": 185830 }, { "epoch": 1222.6315789473683, "grad_norm": 1.1016768217086792, "learning_rate": 0.0001, "loss": 0.0123, "step": 185840 }, { "epoch": 1222.6973684210527, "grad_norm": 1.5468484163284302, "learning_rate": 0.0001, "loss": 0.0116, "step": 185850 }, { "epoch": 1222.7631578947369, "grad_norm": 1.6745069026947021, "learning_rate": 0.0001, "loss": 0.0088, "step": 185860 }, { "epoch": 1222.828947368421, "grad_norm": 1.4369230270385742, "learning_rate": 0.0001, "loss": 0.0131, "step": 185870 }, { "epoch": 1222.8947368421052, "grad_norm": 1.3410733938217163, "learning_rate": 0.0001, "loss": 0.0102, "step": 185880 }, { "epoch": 1222.9605263157894, "grad_norm": 1.4378278255462646, "learning_rate": 0.0001, "loss": 0.0152, "step": 185890 }, { "epoch": 1223.0263157894738, "grad_norm": 1.2808226346969604, "learning_rate": 0.0001, "loss": 0.0112, "step": 185900 }, { "epoch": 1223.092105263158, "grad_norm": 1.3826842308044434, "learning_rate": 0.0001, "loss": 0.0151, "step": 185910 }, { "epoch": 1223.157894736842, "grad_norm": 1.3876241445541382, "learning_rate": 0.0001, "loss": 0.0119, "step": 185920 }, { "epoch": 1223.2236842105262, "grad_norm": 1.3640385866165161, "learning_rate": 0.0001, "loss": 0.012, "step": 185930 }, { "epoch": 1223.2894736842106, "grad_norm": 1.3494640588760376, "learning_rate": 0.0001, "loss": 0.0092, "step": 185940 }, { "epoch": 1223.3552631578948, "grad_norm": 1.4720512628555298, "learning_rate": 0.0001, "loss": 0.0109, "step": 185950 }, { "epoch": 1223.421052631579, "grad_norm": 1.5240892171859741, "learning_rate": 0.0001, "loss": 0.0104, "step": 185960 }, { "epoch": 1223.4868421052631, "grad_norm": 1.288507103919983, "learning_rate": 0.0001, "loss": 0.0121, "step": 185970 }, { "epoch": 1223.5526315789473, "grad_norm": 1.4103084802627563, "learning_rate": 0.0001, "loss": 0.0135, "step": 185980 }, { "epoch": 1223.6184210526317, "grad_norm": 1.2645916938781738, "learning_rate": 0.0001, "loss": 0.0116, "step": 185990 }, { "epoch": 1223.6842105263158, "grad_norm": 1.345458745956421, "learning_rate": 0.0001, "loss": 0.0148, "step": 186000 }, { "epoch": 1223.75, "grad_norm": 1.3799728155136108, "learning_rate": 0.0001, "loss": 0.0135, "step": 186010 }, { "epoch": 1223.8157894736842, "grad_norm": 1.329045295715332, "learning_rate": 0.0001, "loss": 0.0153, "step": 186020 }, { "epoch": 1223.8815789473683, "grad_norm": 1.5184131860733032, "learning_rate": 0.0001, "loss": 0.0103, "step": 186030 }, { "epoch": 1223.9473684210527, "grad_norm": 1.2311162948608398, "learning_rate": 0.0001, "loss": 0.0084, "step": 186040 }, { "epoch": 1224.0131578947369, "grad_norm": 1.3773889541625977, "learning_rate": 0.0001, "loss": 0.0124, "step": 186050 }, { "epoch": 1224.078947368421, "grad_norm": 1.4337095022201538, "learning_rate": 0.0001, "loss": 0.0136, "step": 186060 }, { "epoch": 1224.1447368421052, "grad_norm": 1.4860265254974365, "learning_rate": 0.0001, "loss": 0.0107, "step": 186070 }, { "epoch": 1224.2105263157894, "grad_norm": 1.1884121894836426, "learning_rate": 0.0001, "loss": 0.0097, "step": 186080 }, { "epoch": 1224.2763157894738, "grad_norm": 1.453677773475647, "learning_rate": 0.0001, "loss": 0.0135, "step": 186090 }, { "epoch": 1224.342105263158, "grad_norm": 1.6178107261657715, "learning_rate": 0.0001, "loss": 0.0091, "step": 186100 }, { "epoch": 1224.407894736842, "grad_norm": 1.8729474544525146, "learning_rate": 0.0001, "loss": 0.012, "step": 186110 }, { "epoch": 1224.4736842105262, "grad_norm": 1.2602884769439697, "learning_rate": 0.0001, "loss": 0.0145, "step": 186120 }, { "epoch": 1224.5394736842106, "grad_norm": 1.6642605066299438, "learning_rate": 0.0001, "loss": 0.0117, "step": 186130 }, { "epoch": 1224.6052631578948, "grad_norm": 1.919637680053711, "learning_rate": 0.0001, "loss": 0.0102, "step": 186140 }, { "epoch": 1224.671052631579, "grad_norm": 1.7325564622879028, "learning_rate": 0.0001, "loss": 0.0116, "step": 186150 }, { "epoch": 1224.7368421052631, "grad_norm": 1.8681632280349731, "learning_rate": 0.0001, "loss": 0.0137, "step": 186160 }, { "epoch": 1224.8026315789473, "grad_norm": 1.4144201278686523, "learning_rate": 0.0001, "loss": 0.0102, "step": 186170 }, { "epoch": 1224.8684210526317, "grad_norm": 1.0156645774841309, "learning_rate": 0.0001, "loss": 0.0121, "step": 186180 }, { "epoch": 1224.9342105263158, "grad_norm": 1.226349949836731, "learning_rate": 0.0001, "loss": 0.0129, "step": 186190 }, { "epoch": 1225.0, "grad_norm": 1.2518917322158813, "learning_rate": 0.0001, "loss": 0.0096, "step": 186200 }, { "epoch": 1225.0657894736842, "grad_norm": 1.1597141027450562, "learning_rate": 0.0001, "loss": 0.0144, "step": 186210 }, { "epoch": 1225.1315789473683, "grad_norm": 1.2288533449172974, "learning_rate": 0.0001, "loss": 0.0129, "step": 186220 }, { "epoch": 1225.1973684210527, "grad_norm": 1.2304229736328125, "learning_rate": 0.0001, "loss": 0.0135, "step": 186230 }, { "epoch": 1225.2631578947369, "grad_norm": 1.0284425020217896, "learning_rate": 0.0001, "loss": 0.0121, "step": 186240 }, { "epoch": 1225.328947368421, "grad_norm": 1.4674208164215088, "learning_rate": 0.0001, "loss": 0.0088, "step": 186250 }, { "epoch": 1225.3947368421052, "grad_norm": 1.1936970949172974, "learning_rate": 0.0001, "loss": 0.0122, "step": 186260 }, { "epoch": 1225.4605263157894, "grad_norm": 1.676556944847107, "learning_rate": 0.0001, "loss": 0.0105, "step": 186270 }, { "epoch": 1225.5263157894738, "grad_norm": 0.8802648782730103, "learning_rate": 0.0001, "loss": 0.0155, "step": 186280 }, { "epoch": 1225.592105263158, "grad_norm": 1.0834344625473022, "learning_rate": 0.0001, "loss": 0.0158, "step": 186290 }, { "epoch": 1225.657894736842, "grad_norm": 1.451967716217041, "learning_rate": 0.0001, "loss": 0.0097, "step": 186300 }, { "epoch": 1225.7236842105262, "grad_norm": 1.752821922302246, "learning_rate": 0.0001, "loss": 0.01, "step": 186310 }, { "epoch": 1225.7894736842106, "grad_norm": 1.5218796730041504, "learning_rate": 0.0001, "loss": 0.0093, "step": 186320 }, { "epoch": 1225.8552631578948, "grad_norm": 1.422034502029419, "learning_rate": 0.0001, "loss": 0.0095, "step": 186330 }, { "epoch": 1225.921052631579, "grad_norm": 1.7518209218978882, "learning_rate": 0.0001, "loss": 0.0154, "step": 186340 }, { "epoch": 1225.9868421052631, "grad_norm": 1.9660474061965942, "learning_rate": 0.0001, "loss": 0.0141, "step": 186350 }, { "epoch": 1226.0526315789473, "grad_norm": 1.5986175537109375, "learning_rate": 0.0001, "loss": 0.0084, "step": 186360 }, { "epoch": 1226.1184210526317, "grad_norm": 1.4242116212844849, "learning_rate": 0.0001, "loss": 0.0103, "step": 186370 }, { "epoch": 1226.1842105263158, "grad_norm": 1.4517080783843994, "learning_rate": 0.0001, "loss": 0.0097, "step": 186380 }, { "epoch": 1226.25, "grad_norm": 1.6316462755203247, "learning_rate": 0.0001, "loss": 0.0133, "step": 186390 }, { "epoch": 1226.3157894736842, "grad_norm": 1.3076238632202148, "learning_rate": 0.0001, "loss": 0.0109, "step": 186400 }, { "epoch": 1226.3815789473683, "grad_norm": 1.189081072807312, "learning_rate": 0.0001, "loss": 0.0124, "step": 186410 }, { "epoch": 1226.4473684210527, "grad_norm": 1.6210204362869263, "learning_rate": 0.0001, "loss": 0.0097, "step": 186420 }, { "epoch": 1226.5131578947369, "grad_norm": 1.4125860929489136, "learning_rate": 0.0001, "loss": 0.0115, "step": 186430 }, { "epoch": 1226.578947368421, "grad_norm": 1.6866066455841064, "learning_rate": 0.0001, "loss": 0.0109, "step": 186440 }, { "epoch": 1226.6447368421052, "grad_norm": 1.4750332832336426, "learning_rate": 0.0001, "loss": 0.0106, "step": 186450 }, { "epoch": 1226.7105263157894, "grad_norm": 1.4151909351348877, "learning_rate": 0.0001, "loss": 0.0111, "step": 186460 }, { "epoch": 1226.7763157894738, "grad_norm": 1.193528175354004, "learning_rate": 0.0001, "loss": 0.0149, "step": 186470 }, { "epoch": 1226.842105263158, "grad_norm": 1.1475127935409546, "learning_rate": 0.0001, "loss": 0.0179, "step": 186480 }, { "epoch": 1226.907894736842, "grad_norm": 1.064515233039856, "learning_rate": 0.0001, "loss": 0.0158, "step": 186490 }, { "epoch": 1226.9736842105262, "grad_norm": 1.3560389280319214, "learning_rate": 0.0001, "loss": 0.009, "step": 186500 }, { "epoch": 1227.0394736842106, "grad_norm": 1.3526021242141724, "learning_rate": 0.0001, "loss": 0.012, "step": 186510 }, { "epoch": 1227.1052631578948, "grad_norm": 0.9027003645896912, "learning_rate": 0.0001, "loss": 0.013, "step": 186520 }, { "epoch": 1227.171052631579, "grad_norm": 1.3438175916671753, "learning_rate": 0.0001, "loss": 0.014, "step": 186530 }, { "epoch": 1227.2368421052631, "grad_norm": 1.6142451763153076, "learning_rate": 0.0001, "loss": 0.0099, "step": 186540 }, { "epoch": 1227.3026315789473, "grad_norm": 1.465907335281372, "learning_rate": 0.0001, "loss": 0.0152, "step": 186550 }, { "epoch": 1227.3684210526317, "grad_norm": 1.6604645252227783, "learning_rate": 0.0001, "loss": 0.01, "step": 186560 }, { "epoch": 1227.4342105263158, "grad_norm": 1.1752151250839233, "learning_rate": 0.0001, "loss": 0.0098, "step": 186570 }, { "epoch": 1227.5, "grad_norm": 1.1686537265777588, "learning_rate": 0.0001, "loss": 0.0135, "step": 186580 }, { "epoch": 1227.5657894736842, "grad_norm": 1.1957318782806396, "learning_rate": 0.0001, "loss": 0.0101, "step": 186590 }, { "epoch": 1227.6315789473683, "grad_norm": 1.1260662078857422, "learning_rate": 0.0001, "loss": 0.0092, "step": 186600 }, { "epoch": 1227.6973684210527, "grad_norm": 1.223948359489441, "learning_rate": 0.0001, "loss": 0.0124, "step": 186610 }, { "epoch": 1227.7631578947369, "grad_norm": 1.5837284326553345, "learning_rate": 0.0001, "loss": 0.0127, "step": 186620 }, { "epoch": 1227.828947368421, "grad_norm": 1.58645498752594, "learning_rate": 0.0001, "loss": 0.01, "step": 186630 }, { "epoch": 1227.8947368421052, "grad_norm": 1.4450926780700684, "learning_rate": 0.0001, "loss": 0.0127, "step": 186640 }, { "epoch": 1227.9605263157894, "grad_norm": 1.3724219799041748, "learning_rate": 0.0001, "loss": 0.015, "step": 186650 }, { "epoch": 1228.0263157894738, "grad_norm": 1.2548567056655884, "learning_rate": 0.0001, "loss": 0.0152, "step": 186660 }, { "epoch": 1228.092105263158, "grad_norm": 1.1233254671096802, "learning_rate": 0.0001, "loss": 0.015, "step": 186670 }, { "epoch": 1228.157894736842, "grad_norm": 1.2199525833129883, "learning_rate": 0.0001, "loss": 0.009, "step": 186680 }, { "epoch": 1228.2236842105262, "grad_norm": 1.6841953992843628, "learning_rate": 0.0001, "loss": 0.0107, "step": 186690 }, { "epoch": 1228.2894736842106, "grad_norm": 1.713651180267334, "learning_rate": 0.0001, "loss": 0.0101, "step": 186700 }, { "epoch": 1228.3552631578948, "grad_norm": 1.294198751449585, "learning_rate": 0.0001, "loss": 0.0114, "step": 186710 }, { "epoch": 1228.421052631579, "grad_norm": 1.544203519821167, "learning_rate": 0.0001, "loss": 0.0128, "step": 186720 }, { "epoch": 1228.4868421052631, "grad_norm": 1.3033077716827393, "learning_rate": 0.0001, "loss": 0.013, "step": 186730 }, { "epoch": 1228.5526315789473, "grad_norm": 1.2464998960494995, "learning_rate": 0.0001, "loss": 0.01, "step": 186740 }, { "epoch": 1228.6184210526317, "grad_norm": 0.9687883853912354, "learning_rate": 0.0001, "loss": 0.0123, "step": 186750 }, { "epoch": 1228.6842105263158, "grad_norm": 1.2450151443481445, "learning_rate": 0.0001, "loss": 0.0082, "step": 186760 }, { "epoch": 1228.75, "grad_norm": 1.350901484489441, "learning_rate": 0.0001, "loss": 0.0114, "step": 186770 }, { "epoch": 1228.8157894736842, "grad_norm": 1.600067377090454, "learning_rate": 0.0001, "loss": 0.0141, "step": 186780 }, { "epoch": 1228.8815789473683, "grad_norm": 1.7469316720962524, "learning_rate": 0.0001, "loss": 0.0158, "step": 186790 }, { "epoch": 1228.9473684210527, "grad_norm": 1.556720495223999, "learning_rate": 0.0001, "loss": 0.011, "step": 186800 }, { "epoch": 1229.0131578947369, "grad_norm": 1.271970272064209, "learning_rate": 0.0001, "loss": 0.0094, "step": 186810 }, { "epoch": 1229.078947368421, "grad_norm": 1.4863730669021606, "learning_rate": 0.0001, "loss": 0.0099, "step": 186820 }, { "epoch": 1229.1447368421052, "grad_norm": 1.4766895771026611, "learning_rate": 0.0001, "loss": 0.0099, "step": 186830 }, { "epoch": 1229.2105263157894, "grad_norm": 1.4288305044174194, "learning_rate": 0.0001, "loss": 0.0122, "step": 186840 }, { "epoch": 1229.2763157894738, "grad_norm": 1.5560302734375, "learning_rate": 0.0001, "loss": 0.0099, "step": 186850 }, { "epoch": 1229.342105263158, "grad_norm": 1.494767189025879, "learning_rate": 0.0001, "loss": 0.016, "step": 186860 }, { "epoch": 1229.407894736842, "grad_norm": 1.0699474811553955, "learning_rate": 0.0001, "loss": 0.0101, "step": 186870 }, { "epoch": 1229.4736842105262, "grad_norm": 1.5875247716903687, "learning_rate": 0.0001, "loss": 0.0103, "step": 186880 }, { "epoch": 1229.5394736842106, "grad_norm": 1.2542126178741455, "learning_rate": 0.0001, "loss": 0.0122, "step": 186890 }, { "epoch": 1229.6052631578948, "grad_norm": 1.3350906372070312, "learning_rate": 0.0001, "loss": 0.0082, "step": 186900 }, { "epoch": 1229.671052631579, "grad_norm": 1.585632562637329, "learning_rate": 0.0001, "loss": 0.0132, "step": 186910 }, { "epoch": 1229.7368421052631, "grad_norm": 1.8530007600784302, "learning_rate": 0.0001, "loss": 0.0098, "step": 186920 }, { "epoch": 1229.8026315789473, "grad_norm": 1.577314019203186, "learning_rate": 0.0001, "loss": 0.0104, "step": 186930 }, { "epoch": 1229.8684210526317, "grad_norm": 1.219190239906311, "learning_rate": 0.0001, "loss": 0.0128, "step": 186940 }, { "epoch": 1229.9342105263158, "grad_norm": 1.6022969484329224, "learning_rate": 0.0001, "loss": 0.0169, "step": 186950 }, { "epoch": 1230.0, "grad_norm": 1.5154931545257568, "learning_rate": 0.0001, "loss": 0.0136, "step": 186960 }, { "epoch": 1230.0657894736842, "grad_norm": 1.2880514860153198, "learning_rate": 0.0001, "loss": 0.0084, "step": 186970 }, { "epoch": 1230.1315789473683, "grad_norm": 1.5828510522842407, "learning_rate": 0.0001, "loss": 0.0141, "step": 186980 }, { "epoch": 1230.1973684210527, "grad_norm": 1.5434490442276, "learning_rate": 0.0001, "loss": 0.0115, "step": 186990 }, { "epoch": 1230.2631578947369, "grad_norm": 1.1487635374069214, "learning_rate": 0.0001, "loss": 0.0098, "step": 187000 }, { "epoch": 1230.328947368421, "grad_norm": 1.7184579372406006, "learning_rate": 0.0001, "loss": 0.009, "step": 187010 }, { "epoch": 1230.3947368421052, "grad_norm": 1.5467289686203003, "learning_rate": 0.0001, "loss": 0.0108, "step": 187020 }, { "epoch": 1230.4605263157894, "grad_norm": 1.1702204942703247, "learning_rate": 0.0001, "loss": 0.0145, "step": 187030 }, { "epoch": 1230.5263157894738, "grad_norm": 1.4904712438583374, "learning_rate": 0.0001, "loss": 0.0112, "step": 187040 }, { "epoch": 1230.592105263158, "grad_norm": 1.8912031650543213, "learning_rate": 0.0001, "loss": 0.0111, "step": 187050 }, { "epoch": 1230.657894736842, "grad_norm": 1.5232757329940796, "learning_rate": 0.0001, "loss": 0.01, "step": 187060 }, { "epoch": 1230.7236842105262, "grad_norm": 1.5427812337875366, "learning_rate": 0.0001, "loss": 0.0144, "step": 187070 }, { "epoch": 1230.7894736842106, "grad_norm": 0.9992271065711975, "learning_rate": 0.0001, "loss": 0.0127, "step": 187080 }, { "epoch": 1230.8552631578948, "grad_norm": 1.3270256519317627, "learning_rate": 0.0001, "loss": 0.0148, "step": 187090 }, { "epoch": 1230.921052631579, "grad_norm": 1.350113868713379, "learning_rate": 0.0001, "loss": 0.0125, "step": 187100 }, { "epoch": 1230.9868421052631, "grad_norm": 0.9797785878181458, "learning_rate": 0.0001, "loss": 0.0085, "step": 187110 }, { "epoch": 1231.0526315789473, "grad_norm": 1.6168041229248047, "learning_rate": 0.0001, "loss": 0.0136, "step": 187120 }, { "epoch": 1231.1184210526317, "grad_norm": 1.6816306114196777, "learning_rate": 0.0001, "loss": 0.0092, "step": 187130 }, { "epoch": 1231.1842105263158, "grad_norm": 1.2918051481246948, "learning_rate": 0.0001, "loss": 0.012, "step": 187140 }, { "epoch": 1231.25, "grad_norm": 1.689465045928955, "learning_rate": 0.0001, "loss": 0.0165, "step": 187150 }, { "epoch": 1231.3157894736842, "grad_norm": 1.5666035413742065, "learning_rate": 0.0001, "loss": 0.012, "step": 187160 }, { "epoch": 1231.3815789473683, "grad_norm": 1.0837864875793457, "learning_rate": 0.0001, "loss": 0.0089, "step": 187170 }, { "epoch": 1231.4473684210527, "grad_norm": 1.3867465257644653, "learning_rate": 0.0001, "loss": 0.0139, "step": 187180 }, { "epoch": 1231.5131578947369, "grad_norm": 1.658773422241211, "learning_rate": 0.0001, "loss": 0.01, "step": 187190 }, { "epoch": 1231.578947368421, "grad_norm": 0.9982593059539795, "learning_rate": 0.0001, "loss": 0.0115, "step": 187200 }, { "epoch": 1231.6447368421052, "grad_norm": 1.6778508424758911, "learning_rate": 0.0001, "loss": 0.0121, "step": 187210 }, { "epoch": 1231.7105263157894, "grad_norm": 1.951829433441162, "learning_rate": 0.0001, "loss": 0.015, "step": 187220 }, { "epoch": 1231.7763157894738, "grad_norm": 1.3029693365097046, "learning_rate": 0.0001, "loss": 0.0118, "step": 187230 }, { "epoch": 1231.842105263158, "grad_norm": 1.5949296951293945, "learning_rate": 0.0001, "loss": 0.012, "step": 187240 }, { "epoch": 1231.907894736842, "grad_norm": 1.5016978979110718, "learning_rate": 0.0001, "loss": 0.0096, "step": 187250 }, { "epoch": 1231.9736842105262, "grad_norm": 1.5016579627990723, "learning_rate": 0.0001, "loss": 0.0097, "step": 187260 }, { "epoch": 1232.0394736842106, "grad_norm": 1.1917173862457275, "learning_rate": 0.0001, "loss": 0.0122, "step": 187270 }, { "epoch": 1232.1052631578948, "grad_norm": 1.0438281297683716, "learning_rate": 0.0001, "loss": 0.0121, "step": 187280 }, { "epoch": 1232.171052631579, "grad_norm": 1.309383511543274, "learning_rate": 0.0001, "loss": 0.0123, "step": 187290 }, { "epoch": 1232.2368421052631, "grad_norm": 1.2002941370010376, "learning_rate": 0.0001, "loss": 0.0096, "step": 187300 }, { "epoch": 1232.3026315789473, "grad_norm": 1.3872478008270264, "learning_rate": 0.0001, "loss": 0.0103, "step": 187310 }, { "epoch": 1232.3684210526317, "grad_norm": 1.278991937637329, "learning_rate": 0.0001, "loss": 0.0102, "step": 187320 }, { "epoch": 1232.4342105263158, "grad_norm": 1.2829192876815796, "learning_rate": 0.0001, "loss": 0.012, "step": 187330 }, { "epoch": 1232.5, "grad_norm": 1.5369820594787598, "learning_rate": 0.0001, "loss": 0.0152, "step": 187340 }, { "epoch": 1232.5657894736842, "grad_norm": 1.238689661026001, "learning_rate": 0.0001, "loss": 0.0114, "step": 187350 }, { "epoch": 1232.6315789473683, "grad_norm": 1.128347396850586, "learning_rate": 0.0001, "loss": 0.0105, "step": 187360 }, { "epoch": 1232.6973684210527, "grad_norm": 1.4671591520309448, "learning_rate": 0.0001, "loss": 0.0081, "step": 187370 }, { "epoch": 1232.7631578947369, "grad_norm": 0.872137725353241, "learning_rate": 0.0001, "loss": 0.0162, "step": 187380 }, { "epoch": 1232.828947368421, "grad_norm": 1.5420763492584229, "learning_rate": 0.0001, "loss": 0.0132, "step": 187390 }, { "epoch": 1232.8947368421052, "grad_norm": 1.8042247295379639, "learning_rate": 0.0001, "loss": 0.0128, "step": 187400 }, { "epoch": 1232.9605263157894, "grad_norm": 1.3737123012542725, "learning_rate": 0.0001, "loss": 0.0122, "step": 187410 }, { "epoch": 1233.0263157894738, "grad_norm": 1.6340569257736206, "learning_rate": 0.0001, "loss": 0.0099, "step": 187420 }, { "epoch": 1233.092105263158, "grad_norm": 1.3368340730667114, "learning_rate": 0.0001, "loss": 0.015, "step": 187430 }, { "epoch": 1233.157894736842, "grad_norm": 1.0265758037567139, "learning_rate": 0.0001, "loss": 0.008, "step": 187440 }, { "epoch": 1233.2236842105262, "grad_norm": 1.4790596961975098, "learning_rate": 0.0001, "loss": 0.011, "step": 187450 }, { "epoch": 1233.2894736842106, "grad_norm": 1.0537124872207642, "learning_rate": 0.0001, "loss": 0.0115, "step": 187460 }, { "epoch": 1233.3552631578948, "grad_norm": 1.4174271821975708, "learning_rate": 0.0001, "loss": 0.0143, "step": 187470 }, { "epoch": 1233.421052631579, "grad_norm": 1.3839364051818848, "learning_rate": 0.0001, "loss": 0.0156, "step": 187480 }, { "epoch": 1233.4868421052631, "grad_norm": 1.3759301900863647, "learning_rate": 0.0001, "loss": 0.009, "step": 187490 }, { "epoch": 1233.5526315789473, "grad_norm": 1.5330803394317627, "learning_rate": 0.0001, "loss": 0.015, "step": 187500 }, { "epoch": 1233.6184210526317, "grad_norm": 1.296339511871338, "learning_rate": 0.0001, "loss": 0.0117, "step": 187510 }, { "epoch": 1233.6842105263158, "grad_norm": 1.079909086227417, "learning_rate": 0.0001, "loss": 0.0098, "step": 187520 }, { "epoch": 1233.75, "grad_norm": 1.2006052732467651, "learning_rate": 0.0001, "loss": 0.0153, "step": 187530 }, { "epoch": 1233.8157894736842, "grad_norm": 1.3053808212280273, "learning_rate": 0.0001, "loss": 0.011, "step": 187540 }, { "epoch": 1233.8815789473683, "grad_norm": 1.2900358438491821, "learning_rate": 0.0001, "loss": 0.0099, "step": 187550 }, { "epoch": 1233.9473684210527, "grad_norm": 1.2189429998397827, "learning_rate": 0.0001, "loss": 0.0123, "step": 187560 }, { "epoch": 1234.0131578947369, "grad_norm": 1.7092583179473877, "learning_rate": 0.0001, "loss": 0.0091, "step": 187570 }, { "epoch": 1234.078947368421, "grad_norm": 1.5097122192382812, "learning_rate": 0.0001, "loss": 0.0117, "step": 187580 }, { "epoch": 1234.1447368421052, "grad_norm": 1.440686821937561, "learning_rate": 0.0001, "loss": 0.013, "step": 187590 }, { "epoch": 1234.2105263157894, "grad_norm": 1.610979676246643, "learning_rate": 0.0001, "loss": 0.0101, "step": 187600 }, { "epoch": 1234.2763157894738, "grad_norm": 1.8601603507995605, "learning_rate": 0.0001, "loss": 0.0109, "step": 187610 }, { "epoch": 1234.342105263158, "grad_norm": 1.0236046314239502, "learning_rate": 0.0001, "loss": 0.0096, "step": 187620 }, { "epoch": 1234.407894736842, "grad_norm": 1.4829447269439697, "learning_rate": 0.0001, "loss": 0.0107, "step": 187630 }, { "epoch": 1234.4736842105262, "grad_norm": 1.6395221948623657, "learning_rate": 0.0001, "loss": 0.0095, "step": 187640 }, { "epoch": 1234.5394736842106, "grad_norm": 1.1678965091705322, "learning_rate": 0.0001, "loss": 0.0117, "step": 187650 }, { "epoch": 1234.6052631578948, "grad_norm": 1.57101571559906, "learning_rate": 0.0001, "loss": 0.0144, "step": 187660 }, { "epoch": 1234.671052631579, "grad_norm": 1.3109700679779053, "learning_rate": 0.0001, "loss": 0.012, "step": 187670 }, { "epoch": 1234.7368421052631, "grad_norm": 1.5511560440063477, "learning_rate": 0.0001, "loss": 0.0125, "step": 187680 }, { "epoch": 1234.8026315789473, "grad_norm": 0.8985216617584229, "learning_rate": 0.0001, "loss": 0.0144, "step": 187690 }, { "epoch": 1234.8684210526317, "grad_norm": 1.6481925249099731, "learning_rate": 0.0001, "loss": 0.0119, "step": 187700 }, { "epoch": 1234.9342105263158, "grad_norm": 1.5669918060302734, "learning_rate": 0.0001, "loss": 0.0119, "step": 187710 }, { "epoch": 1235.0, "grad_norm": 1.5476218461990356, "learning_rate": 0.0001, "loss": 0.0121, "step": 187720 }, { "epoch": 1235.0657894736842, "grad_norm": 1.9322723150253296, "learning_rate": 0.0001, "loss": 0.0114, "step": 187730 }, { "epoch": 1235.1315789473683, "grad_norm": 1.006665825843811, "learning_rate": 0.0001, "loss": 0.0096, "step": 187740 }, { "epoch": 1235.1973684210527, "grad_norm": 1.1930922269821167, "learning_rate": 0.0001, "loss": 0.0113, "step": 187750 }, { "epoch": 1235.2631578947369, "grad_norm": 1.5866047143936157, "learning_rate": 0.0001, "loss": 0.0099, "step": 187760 }, { "epoch": 1235.328947368421, "grad_norm": 1.316576361656189, "learning_rate": 0.0001, "loss": 0.0095, "step": 187770 }, { "epoch": 1235.3947368421052, "grad_norm": 1.517080307006836, "learning_rate": 0.0001, "loss": 0.0115, "step": 187780 }, { "epoch": 1235.4605263157894, "grad_norm": 1.9503000974655151, "learning_rate": 0.0001, "loss": 0.0127, "step": 187790 }, { "epoch": 1235.5263157894738, "grad_norm": 1.7528408765792847, "learning_rate": 0.0001, "loss": 0.0142, "step": 187800 }, { "epoch": 1235.592105263158, "grad_norm": 1.8097025156021118, "learning_rate": 0.0001, "loss": 0.0155, "step": 187810 }, { "epoch": 1235.657894736842, "grad_norm": 1.8871924877166748, "learning_rate": 0.0001, "loss": 0.0098, "step": 187820 }, { "epoch": 1235.7236842105262, "grad_norm": 1.7694309949874878, "learning_rate": 0.0001, "loss": 0.0112, "step": 187830 }, { "epoch": 1235.7894736842106, "grad_norm": 1.962103009223938, "learning_rate": 0.0001, "loss": 0.0089, "step": 187840 }, { "epoch": 1235.8552631578948, "grad_norm": 1.4663466215133667, "learning_rate": 0.0001, "loss": 0.01, "step": 187850 }, { "epoch": 1235.921052631579, "grad_norm": 1.562362790107727, "learning_rate": 0.0001, "loss": 0.0161, "step": 187860 }, { "epoch": 1235.9868421052631, "grad_norm": 1.4779391288757324, "learning_rate": 0.0001, "loss": 0.0122, "step": 187870 }, { "epoch": 1236.0526315789473, "grad_norm": 1.7222504615783691, "learning_rate": 0.0001, "loss": 0.0088, "step": 187880 }, { "epoch": 1236.1184210526317, "grad_norm": 1.3163697719573975, "learning_rate": 0.0001, "loss": 0.0139, "step": 187890 }, { "epoch": 1236.1842105263158, "grad_norm": 1.7121599912643433, "learning_rate": 0.0001, "loss": 0.0114, "step": 187900 }, { "epoch": 1236.25, "grad_norm": 1.2869031429290771, "learning_rate": 0.0001, "loss": 0.0096, "step": 187910 }, { "epoch": 1236.3157894736842, "grad_norm": 1.234609603881836, "learning_rate": 0.0001, "loss": 0.0143, "step": 187920 }, { "epoch": 1236.3815789473683, "grad_norm": 1.4175763130187988, "learning_rate": 0.0001, "loss": 0.012, "step": 187930 }, { "epoch": 1236.4473684210527, "grad_norm": 1.2817703485488892, "learning_rate": 0.0001, "loss": 0.0087, "step": 187940 }, { "epoch": 1236.5131578947369, "grad_norm": 1.566161036491394, "learning_rate": 0.0001, "loss": 0.014, "step": 187950 }, { "epoch": 1236.578947368421, "grad_norm": 2.005720376968384, "learning_rate": 0.0001, "loss": 0.0098, "step": 187960 }, { "epoch": 1236.6447368421052, "grad_norm": 1.626930832862854, "learning_rate": 0.0001, "loss": 0.0144, "step": 187970 }, { "epoch": 1236.7105263157894, "grad_norm": 1.415019154548645, "learning_rate": 0.0001, "loss": 0.0148, "step": 187980 }, { "epoch": 1236.7763157894738, "grad_norm": 1.3900014162063599, "learning_rate": 0.0001, "loss": 0.0108, "step": 187990 }, { "epoch": 1236.842105263158, "grad_norm": 1.4563220739364624, "learning_rate": 0.0001, "loss": 0.0123, "step": 188000 }, { "epoch": 1236.907894736842, "grad_norm": 1.2663719654083252, "learning_rate": 0.0001, "loss": 0.0092, "step": 188010 }, { "epoch": 1236.9736842105262, "grad_norm": 1.0483157634735107, "learning_rate": 0.0001, "loss": 0.0099, "step": 188020 }, { "epoch": 1237.0394736842106, "grad_norm": 1.6009870767593384, "learning_rate": 0.0001, "loss": 0.0101, "step": 188030 }, { "epoch": 1237.1052631578948, "grad_norm": 1.6978583335876465, "learning_rate": 0.0001, "loss": 0.0134, "step": 188040 }, { "epoch": 1237.171052631579, "grad_norm": 1.3074815273284912, "learning_rate": 0.0001, "loss": 0.0085, "step": 188050 }, { "epoch": 1237.2368421052631, "grad_norm": 1.3459731340408325, "learning_rate": 0.0001, "loss": 0.0094, "step": 188060 }, { "epoch": 1237.3026315789473, "grad_norm": 1.3090083599090576, "learning_rate": 0.0001, "loss": 0.0131, "step": 188070 }, { "epoch": 1237.3684210526317, "grad_norm": 1.1945072412490845, "learning_rate": 0.0001, "loss": 0.0131, "step": 188080 }, { "epoch": 1237.4342105263158, "grad_norm": 1.6186485290527344, "learning_rate": 0.0001, "loss": 0.0132, "step": 188090 }, { "epoch": 1237.5, "grad_norm": 1.2889909744262695, "learning_rate": 0.0001, "loss": 0.0132, "step": 188100 }, { "epoch": 1237.5657894736842, "grad_norm": 1.6321250200271606, "learning_rate": 0.0001, "loss": 0.0119, "step": 188110 }, { "epoch": 1237.6315789473683, "grad_norm": 1.502039909362793, "learning_rate": 0.0001, "loss": 0.0102, "step": 188120 }, { "epoch": 1237.6973684210527, "grad_norm": 1.6207425594329834, "learning_rate": 0.0001, "loss": 0.0124, "step": 188130 }, { "epoch": 1237.7631578947369, "grad_norm": 1.6124345064163208, "learning_rate": 0.0001, "loss": 0.0123, "step": 188140 }, { "epoch": 1237.828947368421, "grad_norm": 1.6950571537017822, "learning_rate": 0.0001, "loss": 0.0104, "step": 188150 }, { "epoch": 1237.8947368421052, "grad_norm": 1.4480212926864624, "learning_rate": 0.0001, "loss": 0.0128, "step": 188160 }, { "epoch": 1237.9605263157894, "grad_norm": 1.5294209718704224, "learning_rate": 0.0001, "loss": 0.0124, "step": 188170 }, { "epoch": 1238.0263157894738, "grad_norm": 1.1310430765151978, "learning_rate": 0.0001, "loss": 0.0116, "step": 188180 }, { "epoch": 1238.092105263158, "grad_norm": 1.2693934440612793, "learning_rate": 0.0001, "loss": 0.0133, "step": 188190 }, { "epoch": 1238.157894736842, "grad_norm": 1.578615665435791, "learning_rate": 0.0001, "loss": 0.0106, "step": 188200 }, { "epoch": 1238.2236842105262, "grad_norm": 1.5749168395996094, "learning_rate": 0.0001, "loss": 0.0132, "step": 188210 }, { "epoch": 1238.2894736842106, "grad_norm": 1.1281225681304932, "learning_rate": 0.0001, "loss": 0.0142, "step": 188220 }, { "epoch": 1238.3552631578948, "grad_norm": 1.8087717294692993, "learning_rate": 0.0001, "loss": 0.0086, "step": 188230 }, { "epoch": 1238.421052631579, "grad_norm": 1.5969092845916748, "learning_rate": 0.0001, "loss": 0.0099, "step": 188240 }, { "epoch": 1238.4868421052631, "grad_norm": 1.4751534461975098, "learning_rate": 0.0001, "loss": 0.0087, "step": 188250 }, { "epoch": 1238.5526315789473, "grad_norm": 1.6547346115112305, "learning_rate": 0.0001, "loss": 0.0131, "step": 188260 }, { "epoch": 1238.6184210526317, "grad_norm": 1.4046969413757324, "learning_rate": 0.0001, "loss": 0.0129, "step": 188270 }, { "epoch": 1238.6842105263158, "grad_norm": 1.4577733278274536, "learning_rate": 0.0001, "loss": 0.0101, "step": 188280 }, { "epoch": 1238.75, "grad_norm": 1.477444052696228, "learning_rate": 0.0001, "loss": 0.0117, "step": 188290 }, { "epoch": 1238.8157894736842, "grad_norm": 1.4072753190994263, "learning_rate": 0.0001, "loss": 0.0156, "step": 188300 }, { "epoch": 1238.8815789473683, "grad_norm": 1.3772335052490234, "learning_rate": 0.0001, "loss": 0.0108, "step": 188310 }, { "epoch": 1238.9473684210527, "grad_norm": 1.2917381525039673, "learning_rate": 0.0001, "loss": 0.015, "step": 188320 }, { "epoch": 1239.0131578947369, "grad_norm": 1.2358161211013794, "learning_rate": 0.0001, "loss": 0.0123, "step": 188330 }, { "epoch": 1239.078947368421, "grad_norm": 1.5891001224517822, "learning_rate": 0.0001, "loss": 0.0095, "step": 188340 }, { "epoch": 1239.1447368421052, "grad_norm": 1.3213045597076416, "learning_rate": 0.0001, "loss": 0.0116, "step": 188350 }, { "epoch": 1239.2105263157894, "grad_norm": 1.3893072605133057, "learning_rate": 0.0001, "loss": 0.0126, "step": 188360 }, { "epoch": 1239.2763157894738, "grad_norm": 1.364715814590454, "learning_rate": 0.0001, "loss": 0.011, "step": 188370 }, { "epoch": 1239.342105263158, "grad_norm": 1.1261929273605347, "learning_rate": 0.0001, "loss": 0.0126, "step": 188380 }, { "epoch": 1239.407894736842, "grad_norm": 1.2998170852661133, "learning_rate": 0.0001, "loss": 0.0128, "step": 188390 }, { "epoch": 1239.4736842105262, "grad_norm": 1.5396602153778076, "learning_rate": 0.0001, "loss": 0.012, "step": 188400 }, { "epoch": 1239.5394736842106, "grad_norm": 1.8425266742706299, "learning_rate": 0.0001, "loss": 0.0163, "step": 188410 }, { "epoch": 1239.6052631578948, "grad_norm": 1.3921197652816772, "learning_rate": 0.0001, "loss": 0.0154, "step": 188420 }, { "epoch": 1239.671052631579, "grad_norm": 1.5367727279663086, "learning_rate": 0.0001, "loss": 0.0114, "step": 188430 }, { "epoch": 1239.7368421052631, "grad_norm": 1.572137475013733, "learning_rate": 0.0001, "loss": 0.0095, "step": 188440 }, { "epoch": 1239.8026315789473, "grad_norm": 1.4167301654815674, "learning_rate": 0.0001, "loss": 0.0123, "step": 188450 }, { "epoch": 1239.8684210526317, "grad_norm": 1.5970900058746338, "learning_rate": 0.0001, "loss": 0.0083, "step": 188460 }, { "epoch": 1239.9342105263158, "grad_norm": 1.5554560422897339, "learning_rate": 0.0001, "loss": 0.0114, "step": 188470 }, { "epoch": 1240.0, "grad_norm": 1.4442768096923828, "learning_rate": 0.0001, "loss": 0.0106, "step": 188480 }, { "epoch": 1240.0657894736842, "grad_norm": 1.984649419784546, "learning_rate": 0.0001, "loss": 0.0099, "step": 188490 }, { "epoch": 1240.1315789473683, "grad_norm": 1.7370553016662598, "learning_rate": 0.0001, "loss": 0.0102, "step": 188500 }, { "epoch": 1240.1973684210527, "grad_norm": 1.4932905435562134, "learning_rate": 0.0001, "loss": 0.0107, "step": 188510 }, { "epoch": 1240.2631578947369, "grad_norm": 1.5851545333862305, "learning_rate": 0.0001, "loss": 0.0113, "step": 188520 }, { "epoch": 1240.328947368421, "grad_norm": 1.0655962228775024, "learning_rate": 0.0001, "loss": 0.0143, "step": 188530 }, { "epoch": 1240.3947368421052, "grad_norm": 1.1478749513626099, "learning_rate": 0.0001, "loss": 0.0122, "step": 188540 }, { "epoch": 1240.4605263157894, "grad_norm": 1.4391857385635376, "learning_rate": 0.0001, "loss": 0.0093, "step": 188550 }, { "epoch": 1240.5263157894738, "grad_norm": 1.1079204082489014, "learning_rate": 0.0001, "loss": 0.0143, "step": 188560 }, { "epoch": 1240.592105263158, "grad_norm": 1.5307257175445557, "learning_rate": 0.0001, "loss": 0.0112, "step": 188570 }, { "epoch": 1240.657894736842, "grad_norm": 1.5487220287322998, "learning_rate": 0.0001, "loss": 0.0119, "step": 188580 }, { "epoch": 1240.7236842105262, "grad_norm": 1.3366806507110596, "learning_rate": 0.0001, "loss": 0.0112, "step": 188590 }, { "epoch": 1240.7894736842106, "grad_norm": 1.6373807191848755, "learning_rate": 0.0001, "loss": 0.013, "step": 188600 }, { "epoch": 1240.8552631578948, "grad_norm": 1.6534180641174316, "learning_rate": 0.0001, "loss": 0.0112, "step": 188610 }, { "epoch": 1240.921052631579, "grad_norm": 1.3593904972076416, "learning_rate": 0.0001, "loss": 0.0134, "step": 188620 }, { "epoch": 1240.9868421052631, "grad_norm": 1.3949198722839355, "learning_rate": 0.0001, "loss": 0.0127, "step": 188630 }, { "epoch": 1241.0526315789473, "grad_norm": 1.333738923072815, "learning_rate": 0.0001, "loss": 0.0086, "step": 188640 }, { "epoch": 1241.1184210526317, "grad_norm": 1.0223886966705322, "learning_rate": 0.0001, "loss": 0.0084, "step": 188650 }, { "epoch": 1241.1842105263158, "grad_norm": 1.1351253986358643, "learning_rate": 0.0001, "loss": 0.0106, "step": 188660 }, { "epoch": 1241.25, "grad_norm": 1.1798430681228638, "learning_rate": 0.0001, "loss": 0.0086, "step": 188670 }, { "epoch": 1241.3157894736842, "grad_norm": 1.0092498064041138, "learning_rate": 0.0001, "loss": 0.0101, "step": 188680 }, { "epoch": 1241.3815789473683, "grad_norm": 1.33372962474823, "learning_rate": 0.0001, "loss": 0.0099, "step": 188690 }, { "epoch": 1241.4473684210527, "grad_norm": 1.5603476762771606, "learning_rate": 0.0001, "loss": 0.0155, "step": 188700 }, { "epoch": 1241.5131578947369, "grad_norm": 2.0204625129699707, "learning_rate": 0.0001, "loss": 0.0122, "step": 188710 }, { "epoch": 1241.578947368421, "grad_norm": 2.079630136489868, "learning_rate": 0.0001, "loss": 0.0145, "step": 188720 }, { "epoch": 1241.6447368421052, "grad_norm": 1.7905396223068237, "learning_rate": 0.0001, "loss": 0.016, "step": 188730 }, { "epoch": 1241.7105263157894, "grad_norm": 1.8321408033370972, "learning_rate": 0.0001, "loss": 0.0141, "step": 188740 }, { "epoch": 1241.7763157894738, "grad_norm": 1.6478861570358276, "learning_rate": 0.0001, "loss": 0.0137, "step": 188750 }, { "epoch": 1241.842105263158, "grad_norm": 1.7604302167892456, "learning_rate": 0.0001, "loss": 0.011, "step": 188760 }, { "epoch": 1241.907894736842, "grad_norm": 1.9463324546813965, "learning_rate": 0.0001, "loss": 0.0111, "step": 188770 }, { "epoch": 1241.9736842105262, "grad_norm": 1.461969256401062, "learning_rate": 0.0001, "loss": 0.012, "step": 188780 }, { "epoch": 1242.0394736842106, "grad_norm": 1.2992874383926392, "learning_rate": 0.0001, "loss": 0.0147, "step": 188790 }, { "epoch": 1242.1052631578948, "grad_norm": 1.097364068031311, "learning_rate": 0.0001, "loss": 0.011, "step": 188800 }, { "epoch": 1242.171052631579, "grad_norm": 1.2398276329040527, "learning_rate": 0.0001, "loss": 0.0111, "step": 188810 }, { "epoch": 1242.2368421052631, "grad_norm": 1.431533694267273, "learning_rate": 0.0001, "loss": 0.0128, "step": 188820 }, { "epoch": 1242.3026315789473, "grad_norm": 1.3040326833724976, "learning_rate": 0.0001, "loss": 0.012, "step": 188830 }, { "epoch": 1242.3684210526317, "grad_norm": 1.5704102516174316, "learning_rate": 0.0001, "loss": 0.0143, "step": 188840 }, { "epoch": 1242.4342105263158, "grad_norm": 1.647234320640564, "learning_rate": 0.0001, "loss": 0.0082, "step": 188850 }, { "epoch": 1242.5, "grad_norm": 1.4851460456848145, "learning_rate": 0.0001, "loss": 0.0125, "step": 188860 }, { "epoch": 1242.5657894736842, "grad_norm": 0.9322664141654968, "learning_rate": 0.0001, "loss": 0.0106, "step": 188870 }, { "epoch": 1242.6315789473683, "grad_norm": 1.3439987897872925, "learning_rate": 0.0001, "loss": 0.0128, "step": 188880 }, { "epoch": 1242.6973684210527, "grad_norm": 1.3179444074630737, "learning_rate": 0.0001, "loss": 0.0132, "step": 188890 }, { "epoch": 1242.7631578947369, "grad_norm": 1.1636196374893188, "learning_rate": 0.0001, "loss": 0.013, "step": 188900 }, { "epoch": 1242.828947368421, "grad_norm": 1.541171669960022, "learning_rate": 0.0001, "loss": 0.013, "step": 188910 }, { "epoch": 1242.8947368421052, "grad_norm": 1.206251621246338, "learning_rate": 0.0001, "loss": 0.0128, "step": 188920 }, { "epoch": 1242.9605263157894, "grad_norm": 1.8435523509979248, "learning_rate": 0.0001, "loss": 0.0081, "step": 188930 }, { "epoch": 1243.0263157894738, "grad_norm": 1.6505327224731445, "learning_rate": 0.0001, "loss": 0.0088, "step": 188940 }, { "epoch": 1243.092105263158, "grad_norm": 1.427636981010437, "learning_rate": 0.0001, "loss": 0.0091, "step": 188950 }, { "epoch": 1243.157894736842, "grad_norm": 1.3567993640899658, "learning_rate": 0.0001, "loss": 0.0166, "step": 188960 }, { "epoch": 1243.2236842105262, "grad_norm": 1.41843581199646, "learning_rate": 0.0001, "loss": 0.0109, "step": 188970 }, { "epoch": 1243.2894736842106, "grad_norm": 1.4685204029083252, "learning_rate": 0.0001, "loss": 0.01, "step": 188980 }, { "epoch": 1243.3552631578948, "grad_norm": 1.1298918724060059, "learning_rate": 0.0001, "loss": 0.0104, "step": 188990 }, { "epoch": 1243.421052631579, "grad_norm": 1.2800707817077637, "learning_rate": 0.0001, "loss": 0.0094, "step": 189000 }, { "epoch": 1243.4868421052631, "grad_norm": 1.416928768157959, "learning_rate": 0.0001, "loss": 0.0109, "step": 189010 }, { "epoch": 1243.5526315789473, "grad_norm": 1.5628687143325806, "learning_rate": 0.0001, "loss": 0.0098, "step": 189020 }, { "epoch": 1243.6184210526317, "grad_norm": 1.2126277685165405, "learning_rate": 0.0001, "loss": 0.0168, "step": 189030 }, { "epoch": 1243.6842105263158, "grad_norm": 1.6904410123825073, "learning_rate": 0.0001, "loss": 0.0119, "step": 189040 }, { "epoch": 1243.75, "grad_norm": 1.244384527206421, "learning_rate": 0.0001, "loss": 0.0148, "step": 189050 }, { "epoch": 1243.8157894736842, "grad_norm": 1.4827367067337036, "learning_rate": 0.0001, "loss": 0.0112, "step": 189060 }, { "epoch": 1243.8815789473683, "grad_norm": 1.1831129789352417, "learning_rate": 0.0001, "loss": 0.0167, "step": 189070 }, { "epoch": 1243.9473684210527, "grad_norm": 1.2990151643753052, "learning_rate": 0.0001, "loss": 0.0105, "step": 189080 }, { "epoch": 1244.0131578947369, "grad_norm": 1.9535707235336304, "learning_rate": 0.0001, "loss": 0.0107, "step": 189090 }, { "epoch": 1244.078947368421, "grad_norm": 1.3884416818618774, "learning_rate": 0.0001, "loss": 0.0127, "step": 189100 }, { "epoch": 1244.1447368421052, "grad_norm": 1.655884027481079, "learning_rate": 0.0001, "loss": 0.0124, "step": 189110 }, { "epoch": 1244.2105263157894, "grad_norm": 1.2264890670776367, "learning_rate": 0.0001, "loss": 0.0116, "step": 189120 }, { "epoch": 1244.2763157894738, "grad_norm": 2.013638496398926, "learning_rate": 0.0001, "loss": 0.0109, "step": 189130 }, { "epoch": 1244.342105263158, "grad_norm": 1.4915450811386108, "learning_rate": 0.0001, "loss": 0.0115, "step": 189140 }, { "epoch": 1244.407894736842, "grad_norm": 1.47694993019104, "learning_rate": 0.0001, "loss": 0.0102, "step": 189150 }, { "epoch": 1244.4736842105262, "grad_norm": 0.936847448348999, "learning_rate": 0.0001, "loss": 0.0146, "step": 189160 }, { "epoch": 1244.5394736842106, "grad_norm": 1.3996247053146362, "learning_rate": 0.0001, "loss": 0.0099, "step": 189170 }, { "epoch": 1244.6052631578948, "grad_norm": 1.0557795763015747, "learning_rate": 0.0001, "loss": 0.0097, "step": 189180 }, { "epoch": 1244.671052631579, "grad_norm": 1.3839939832687378, "learning_rate": 0.0001, "loss": 0.0129, "step": 189190 }, { "epoch": 1244.7368421052631, "grad_norm": 1.8998852968215942, "learning_rate": 0.0001, "loss": 0.0161, "step": 189200 }, { "epoch": 1244.8026315789473, "grad_norm": 1.6814992427825928, "learning_rate": 0.0001, "loss": 0.0127, "step": 189210 }, { "epoch": 1244.8684210526317, "grad_norm": 1.6075084209442139, "learning_rate": 0.0001, "loss": 0.0101, "step": 189220 }, { "epoch": 1244.9342105263158, "grad_norm": 1.1196130514144897, "learning_rate": 0.0001, "loss": 0.0114, "step": 189230 }, { "epoch": 1245.0, "grad_norm": 1.6660783290863037, "learning_rate": 0.0001, "loss": 0.0112, "step": 189240 }, { "epoch": 1245.0657894736842, "grad_norm": 1.2243919372558594, "learning_rate": 0.0001, "loss": 0.0132, "step": 189250 }, { "epoch": 1245.1315789473683, "grad_norm": 1.1593178510665894, "learning_rate": 0.0001, "loss": 0.0107, "step": 189260 }, { "epoch": 1245.1973684210527, "grad_norm": 1.2236372232437134, "learning_rate": 0.0001, "loss": 0.0094, "step": 189270 }, { "epoch": 1245.2631578947369, "grad_norm": 1.498616099357605, "learning_rate": 0.0001, "loss": 0.0162, "step": 189280 }, { "epoch": 1245.328947368421, "grad_norm": 1.2673587799072266, "learning_rate": 0.0001, "loss": 0.0096, "step": 189290 }, { "epoch": 1245.3947368421052, "grad_norm": 1.1079916954040527, "learning_rate": 0.0001, "loss": 0.0096, "step": 189300 }, { "epoch": 1245.4605263157894, "grad_norm": 1.1374287605285645, "learning_rate": 0.0001, "loss": 0.0095, "step": 189310 }, { "epoch": 1245.5263157894738, "grad_norm": 1.2303516864776611, "learning_rate": 0.0001, "loss": 0.0117, "step": 189320 }, { "epoch": 1245.592105263158, "grad_norm": 1.4187815189361572, "learning_rate": 0.0001, "loss": 0.0088, "step": 189330 }, { "epoch": 1245.657894736842, "grad_norm": 1.5332001447677612, "learning_rate": 0.0001, "loss": 0.0111, "step": 189340 }, { "epoch": 1245.7236842105262, "grad_norm": 1.2719203233718872, "learning_rate": 0.0001, "loss": 0.0109, "step": 189350 }, { "epoch": 1245.7894736842106, "grad_norm": 2.081799268722534, "learning_rate": 0.0001, "loss": 0.0138, "step": 189360 }, { "epoch": 1245.8552631578948, "grad_norm": 1.713487148284912, "learning_rate": 0.0001, "loss": 0.0113, "step": 189370 }, { "epoch": 1245.921052631579, "grad_norm": 1.796809434890747, "learning_rate": 0.0001, "loss": 0.0184, "step": 189380 }, { "epoch": 1245.9868421052631, "grad_norm": 1.103472352027893, "learning_rate": 0.0001, "loss": 0.011, "step": 189390 }, { "epoch": 1246.0526315789473, "grad_norm": 1.3745062351226807, "learning_rate": 0.0001, "loss": 0.0137, "step": 189400 }, { "epoch": 1246.1184210526317, "grad_norm": 1.3763147592544556, "learning_rate": 0.0001, "loss": 0.008, "step": 189410 }, { "epoch": 1246.1842105263158, "grad_norm": 1.044684886932373, "learning_rate": 0.0001, "loss": 0.0114, "step": 189420 }, { "epoch": 1246.25, "grad_norm": 1.550567865371704, "learning_rate": 0.0001, "loss": 0.0114, "step": 189430 }, { "epoch": 1246.3157894736842, "grad_norm": 1.6554319858551025, "learning_rate": 0.0001, "loss": 0.0172, "step": 189440 }, { "epoch": 1246.3815789473683, "grad_norm": 1.5499228239059448, "learning_rate": 0.0001, "loss": 0.0111, "step": 189450 }, { "epoch": 1246.4473684210527, "grad_norm": 1.3780838251113892, "learning_rate": 0.0001, "loss": 0.0145, "step": 189460 }, { "epoch": 1246.5131578947369, "grad_norm": 1.3796188831329346, "learning_rate": 0.0001, "loss": 0.0092, "step": 189470 }, { "epoch": 1246.578947368421, "grad_norm": 1.414645791053772, "learning_rate": 0.0001, "loss": 0.013, "step": 189480 }, { "epoch": 1246.6447368421052, "grad_norm": 1.729429006576538, "learning_rate": 0.0001, "loss": 0.0098, "step": 189490 }, { "epoch": 1246.7105263157894, "grad_norm": 1.2156496047973633, "learning_rate": 0.0001, "loss": 0.0124, "step": 189500 }, { "epoch": 1246.7763157894738, "grad_norm": 1.8882704973220825, "learning_rate": 0.0001, "loss": 0.0099, "step": 189510 }, { "epoch": 1246.842105263158, "grad_norm": 2.0644149780273438, "learning_rate": 0.0001, "loss": 0.0138, "step": 189520 }, { "epoch": 1246.907894736842, "grad_norm": 1.4637160301208496, "learning_rate": 0.0001, "loss": 0.0097, "step": 189530 }, { "epoch": 1246.9736842105262, "grad_norm": 1.3734159469604492, "learning_rate": 0.0001, "loss": 0.0107, "step": 189540 }, { "epoch": 1247.0394736842106, "grad_norm": 1.4401284456253052, "learning_rate": 0.0001, "loss": 0.0127, "step": 189550 }, { "epoch": 1247.1052631578948, "grad_norm": 1.3940129280090332, "learning_rate": 0.0001, "loss": 0.0098, "step": 189560 }, { "epoch": 1247.171052631579, "grad_norm": 1.6251165866851807, "learning_rate": 0.0001, "loss": 0.0124, "step": 189570 }, { "epoch": 1247.2368421052631, "grad_norm": 1.3453118801116943, "learning_rate": 0.0001, "loss": 0.0144, "step": 189580 }, { "epoch": 1247.3026315789473, "grad_norm": 1.2882159948349, "learning_rate": 0.0001, "loss": 0.0106, "step": 189590 }, { "epoch": 1247.3684210526317, "grad_norm": 1.51069176197052, "learning_rate": 0.0001, "loss": 0.013, "step": 189600 }, { "epoch": 1247.4342105263158, "grad_norm": 1.2701802253723145, "learning_rate": 0.0001, "loss": 0.0101, "step": 189610 }, { "epoch": 1247.5, "grad_norm": 1.6081314086914062, "learning_rate": 0.0001, "loss": 0.0107, "step": 189620 }, { "epoch": 1247.5657894736842, "grad_norm": 1.519302248954773, "learning_rate": 0.0001, "loss": 0.0113, "step": 189630 }, { "epoch": 1247.6315789473683, "grad_norm": 1.0871062278747559, "learning_rate": 0.0001, "loss": 0.0141, "step": 189640 }, { "epoch": 1247.6973684210527, "grad_norm": 1.0744022130966187, "learning_rate": 0.0001, "loss": 0.0107, "step": 189650 }, { "epoch": 1247.7631578947369, "grad_norm": 1.294748067855835, "learning_rate": 0.0001, "loss": 0.0136, "step": 189660 }, { "epoch": 1247.828947368421, "grad_norm": 1.5523370504379272, "learning_rate": 0.0001, "loss": 0.0111, "step": 189670 }, { "epoch": 1247.8947368421052, "grad_norm": 1.2805988788604736, "learning_rate": 0.0001, "loss": 0.0113, "step": 189680 }, { "epoch": 1247.9605263157894, "grad_norm": 1.1143829822540283, "learning_rate": 0.0001, "loss": 0.0144, "step": 189690 }, { "epoch": 1248.0263157894738, "grad_norm": 1.4559193849563599, "learning_rate": 0.0001, "loss": 0.0119, "step": 189700 }, { "epoch": 1248.092105263158, "grad_norm": 1.0076192617416382, "learning_rate": 0.0001, "loss": 0.0131, "step": 189710 }, { "epoch": 1248.157894736842, "grad_norm": 1.2520503997802734, "learning_rate": 0.0001, "loss": 0.0106, "step": 189720 }, { "epoch": 1248.2236842105262, "grad_norm": 1.1544119119644165, "learning_rate": 0.0001, "loss": 0.0114, "step": 189730 }, { "epoch": 1248.2894736842106, "grad_norm": 1.6735754013061523, "learning_rate": 0.0001, "loss": 0.0098, "step": 189740 }, { "epoch": 1248.3552631578948, "grad_norm": 1.1997829675674438, "learning_rate": 0.0001, "loss": 0.009, "step": 189750 }, { "epoch": 1248.421052631579, "grad_norm": 1.9764834642410278, "learning_rate": 0.0001, "loss": 0.0138, "step": 189760 }, { "epoch": 1248.4868421052631, "grad_norm": 1.3676649332046509, "learning_rate": 0.0001, "loss": 0.0123, "step": 189770 }, { "epoch": 1248.5526315789473, "grad_norm": 1.2702423334121704, "learning_rate": 0.0001, "loss": 0.0123, "step": 189780 }, { "epoch": 1248.6184210526317, "grad_norm": 1.4028338193893433, "learning_rate": 0.0001, "loss": 0.0154, "step": 189790 }, { "epoch": 1248.6842105263158, "grad_norm": 1.3201837539672852, "learning_rate": 0.0001, "loss": 0.0103, "step": 189800 }, { "epoch": 1248.75, "grad_norm": 1.2628744840621948, "learning_rate": 0.0001, "loss": 0.0138, "step": 189810 }, { "epoch": 1248.8157894736842, "grad_norm": 1.368425965309143, "learning_rate": 0.0001, "loss": 0.0117, "step": 189820 }, { "epoch": 1248.8815789473683, "grad_norm": 1.327419638633728, "learning_rate": 0.0001, "loss": 0.0116, "step": 189830 }, { "epoch": 1248.9473684210527, "grad_norm": 1.11650550365448, "learning_rate": 0.0001, "loss": 0.0134, "step": 189840 }, { "epoch": 1249.0131578947369, "grad_norm": 1.7066142559051514, "learning_rate": 0.0001, "loss": 0.0108, "step": 189850 }, { "epoch": 1249.078947368421, "grad_norm": 1.3205373287200928, "learning_rate": 0.0001, "loss": 0.0128, "step": 189860 }, { "epoch": 1249.1447368421052, "grad_norm": 1.6488929986953735, "learning_rate": 0.0001, "loss": 0.0121, "step": 189870 }, { "epoch": 1249.2105263157894, "grad_norm": 1.549689769744873, "learning_rate": 0.0001, "loss": 0.0084, "step": 189880 }, { "epoch": 1249.2763157894738, "grad_norm": 1.3551318645477295, "learning_rate": 0.0001, "loss": 0.0165, "step": 189890 }, { "epoch": 1249.342105263158, "grad_norm": 1.647302269935608, "learning_rate": 0.0001, "loss": 0.0117, "step": 189900 }, { "epoch": 1249.407894736842, "grad_norm": 1.264799952507019, "learning_rate": 0.0001, "loss": 0.0148, "step": 189910 }, { "epoch": 1249.4736842105262, "grad_norm": 1.2793551683425903, "learning_rate": 0.0001, "loss": 0.0093, "step": 189920 }, { "epoch": 1249.5394736842106, "grad_norm": 1.3335483074188232, "learning_rate": 0.0001, "loss": 0.0144, "step": 189930 }, { "epoch": 1249.6052631578948, "grad_norm": 1.1720322370529175, "learning_rate": 0.0001, "loss": 0.0122, "step": 189940 }, { "epoch": 1249.671052631579, "grad_norm": 1.116316556930542, "learning_rate": 0.0001, "loss": 0.01, "step": 189950 }, { "epoch": 1249.7368421052631, "grad_norm": 1.4310252666473389, "learning_rate": 0.0001, "loss": 0.0139, "step": 189960 }, { "epoch": 1249.8026315789473, "grad_norm": 1.0430002212524414, "learning_rate": 0.0001, "loss": 0.0106, "step": 189970 }, { "epoch": 1249.8684210526317, "grad_norm": 1.5487061738967896, "learning_rate": 0.0001, "loss": 0.0125, "step": 189980 }, { "epoch": 1249.9342105263158, "grad_norm": 1.3532556295394897, "learning_rate": 0.0001, "loss": 0.01, "step": 189990 }, { "epoch": 1250.0, "grad_norm": 1.5788315534591675, "learning_rate": 0.0001, "loss": 0.0104, "step": 190000 }, { "epoch": 1250.0657894736842, "grad_norm": 1.3719502687454224, "learning_rate": 0.0001, "loss": 0.0128, "step": 190010 }, { "epoch": 1250.1315789473683, "grad_norm": 1.664448618888855, "learning_rate": 0.0001, "loss": 0.0111, "step": 190020 }, { "epoch": 1250.1973684210527, "grad_norm": 1.3845494985580444, "learning_rate": 0.0001, "loss": 0.0122, "step": 190030 }, { "epoch": 1250.2631578947369, "grad_norm": 1.6711679697036743, "learning_rate": 0.0001, "loss": 0.0086, "step": 190040 }, { "epoch": 1250.328947368421, "grad_norm": 1.3698298931121826, "learning_rate": 0.0001, "loss": 0.0101, "step": 190050 }, { "epoch": 1250.3947368421052, "grad_norm": 1.2129210233688354, "learning_rate": 0.0001, "loss": 0.0111, "step": 190060 }, { "epoch": 1250.4605263157894, "grad_norm": 1.2107239961624146, "learning_rate": 0.0001, "loss": 0.0102, "step": 190070 }, { "epoch": 1250.5263157894738, "grad_norm": 1.5592801570892334, "learning_rate": 0.0001, "loss": 0.0126, "step": 190080 }, { "epoch": 1250.592105263158, "grad_norm": 1.5552825927734375, "learning_rate": 0.0001, "loss": 0.0135, "step": 190090 }, { "epoch": 1250.657894736842, "grad_norm": 1.2636033296585083, "learning_rate": 0.0001, "loss": 0.0111, "step": 190100 }, { "epoch": 1250.7236842105262, "grad_norm": 1.410332441329956, "learning_rate": 0.0001, "loss": 0.0092, "step": 190110 }, { "epoch": 1250.7894736842106, "grad_norm": 1.6032708883285522, "learning_rate": 0.0001, "loss": 0.0146, "step": 190120 }, { "epoch": 1250.8552631578948, "grad_norm": 1.2786768674850464, "learning_rate": 0.0001, "loss": 0.0128, "step": 190130 }, { "epoch": 1250.921052631579, "grad_norm": 1.3591766357421875, "learning_rate": 0.0001, "loss": 0.0125, "step": 190140 }, { "epoch": 1250.9868421052631, "grad_norm": 1.0541025400161743, "learning_rate": 0.0001, "loss": 0.0128, "step": 190150 }, { "epoch": 1251.0526315789473, "grad_norm": 1.7094758749008179, "learning_rate": 0.0001, "loss": 0.0083, "step": 190160 }, { "epoch": 1251.1184210526317, "grad_norm": 1.2273483276367188, "learning_rate": 0.0001, "loss": 0.0182, "step": 190170 }, { "epoch": 1251.1842105263158, "grad_norm": 1.1227610111236572, "learning_rate": 0.0001, "loss": 0.0104, "step": 190180 }, { "epoch": 1251.25, "grad_norm": 1.5208064317703247, "learning_rate": 0.0001, "loss": 0.0085, "step": 190190 }, { "epoch": 1251.3157894736842, "grad_norm": 1.5492832660675049, "learning_rate": 0.0001, "loss": 0.0172, "step": 190200 }, { "epoch": 1251.3815789473683, "grad_norm": 1.0976364612579346, "learning_rate": 0.0001, "loss": 0.014, "step": 190210 }, { "epoch": 1251.4473684210527, "grad_norm": 1.5065250396728516, "learning_rate": 0.0001, "loss": 0.0096, "step": 190220 }, { "epoch": 1251.5131578947369, "grad_norm": 1.586617350578308, "learning_rate": 0.0001, "loss": 0.0119, "step": 190230 }, { "epoch": 1251.578947368421, "grad_norm": 1.6507632732391357, "learning_rate": 0.0001, "loss": 0.0114, "step": 190240 }, { "epoch": 1251.6447368421052, "grad_norm": 1.4912408590316772, "learning_rate": 0.0001, "loss": 0.0114, "step": 190250 }, { "epoch": 1251.7105263157894, "grad_norm": 1.2673338651657104, "learning_rate": 0.0001, "loss": 0.0134, "step": 190260 }, { "epoch": 1251.7763157894738, "grad_norm": 2.046435594558716, "learning_rate": 0.0001, "loss": 0.0114, "step": 190270 }, { "epoch": 1251.842105263158, "grad_norm": 1.666674256324768, "learning_rate": 0.0001, "loss": 0.0103, "step": 190280 }, { "epoch": 1251.907894736842, "grad_norm": 1.6258139610290527, "learning_rate": 0.0001, "loss": 0.0107, "step": 190290 }, { "epoch": 1251.9736842105262, "grad_norm": 1.5544371604919434, "learning_rate": 0.0001, "loss": 0.0103, "step": 190300 }, { "epoch": 1252.0394736842106, "grad_norm": 1.4277957677841187, "learning_rate": 0.0001, "loss": 0.0118, "step": 190310 }, { "epoch": 1252.1052631578948, "grad_norm": 1.4460911750793457, "learning_rate": 0.0001, "loss": 0.0145, "step": 190320 }, { "epoch": 1252.171052631579, "grad_norm": 1.796073317527771, "learning_rate": 0.0001, "loss": 0.009, "step": 190330 }, { "epoch": 1252.2368421052631, "grad_norm": 1.4430218935012817, "learning_rate": 0.0001, "loss": 0.0092, "step": 190340 }, { "epoch": 1252.3026315789473, "grad_norm": 1.6279692649841309, "learning_rate": 0.0001, "loss": 0.0093, "step": 190350 }, { "epoch": 1252.3684210526317, "grad_norm": 0.9765366911888123, "learning_rate": 0.0001, "loss": 0.0106, "step": 190360 }, { "epoch": 1252.4342105263158, "grad_norm": 1.3538861274719238, "learning_rate": 0.0001, "loss": 0.0136, "step": 190370 }, { "epoch": 1252.5, "grad_norm": 1.5953599214553833, "learning_rate": 0.0001, "loss": 0.0149, "step": 190380 }, { "epoch": 1252.5657894736842, "grad_norm": 1.3542821407318115, "learning_rate": 0.0001, "loss": 0.0107, "step": 190390 }, { "epoch": 1252.6315789473683, "grad_norm": 1.2282804250717163, "learning_rate": 0.0001, "loss": 0.0105, "step": 190400 }, { "epoch": 1252.6973684210527, "grad_norm": 1.706947684288025, "learning_rate": 0.0001, "loss": 0.0113, "step": 190410 }, { "epoch": 1252.7631578947369, "grad_norm": 1.194290280342102, "learning_rate": 0.0001, "loss": 0.0145, "step": 190420 }, { "epoch": 1252.828947368421, "grad_norm": 1.3038921356201172, "learning_rate": 0.0001, "loss": 0.012, "step": 190430 }, { "epoch": 1252.8947368421052, "grad_norm": 1.6871020793914795, "learning_rate": 0.0001, "loss": 0.0102, "step": 190440 }, { "epoch": 1252.9605263157894, "grad_norm": 1.5476869344711304, "learning_rate": 0.0001, "loss": 0.0147, "step": 190450 }, { "epoch": 1253.0263157894738, "grad_norm": 1.1597234010696411, "learning_rate": 0.0001, "loss": 0.0108, "step": 190460 }, { "epoch": 1253.092105263158, "grad_norm": 1.21586275100708, "learning_rate": 0.0001, "loss": 0.0144, "step": 190470 }, { "epoch": 1253.157894736842, "grad_norm": 1.1218515634536743, "learning_rate": 0.0001, "loss": 0.0106, "step": 190480 }, { "epoch": 1253.2236842105262, "grad_norm": 1.3199115991592407, "learning_rate": 0.0001, "loss": 0.0103, "step": 190490 }, { "epoch": 1253.2894736842106, "grad_norm": 1.5963683128356934, "learning_rate": 0.0001, "loss": 0.0129, "step": 190500 }, { "epoch": 1253.3552631578948, "grad_norm": 1.34187912940979, "learning_rate": 0.0001, "loss": 0.0119, "step": 190510 }, { "epoch": 1253.421052631579, "grad_norm": 0.9594612717628479, "learning_rate": 0.0001, "loss": 0.0122, "step": 190520 }, { "epoch": 1253.4868421052631, "grad_norm": 1.5940543413162231, "learning_rate": 0.0001, "loss": 0.0111, "step": 190530 }, { "epoch": 1253.5526315789473, "grad_norm": 1.8667718172073364, "learning_rate": 0.0001, "loss": 0.0114, "step": 190540 }, { "epoch": 1253.6184210526317, "grad_norm": 1.5602627992630005, "learning_rate": 0.0001, "loss": 0.0118, "step": 190550 }, { "epoch": 1253.6842105263158, "grad_norm": 1.0933715105056763, "learning_rate": 0.0001, "loss": 0.0118, "step": 190560 }, { "epoch": 1253.75, "grad_norm": 1.0730783939361572, "learning_rate": 0.0001, "loss": 0.0142, "step": 190570 }, { "epoch": 1253.8157894736842, "grad_norm": 1.585573434829712, "learning_rate": 0.0001, "loss": 0.0126, "step": 190580 }, { "epoch": 1253.8815789473683, "grad_norm": 1.1127636432647705, "learning_rate": 0.0001, "loss": 0.012, "step": 190590 }, { "epoch": 1253.9473684210527, "grad_norm": 1.4591351747512817, "learning_rate": 0.0001, "loss": 0.0104, "step": 190600 }, { "epoch": 1254.0131578947369, "grad_norm": 1.2149617671966553, "learning_rate": 0.0001, "loss": 0.0135, "step": 190610 }, { "epoch": 1254.078947368421, "grad_norm": 1.2622812986373901, "learning_rate": 0.0001, "loss": 0.0098, "step": 190620 }, { "epoch": 1254.1447368421052, "grad_norm": 1.850969672203064, "learning_rate": 0.0001, "loss": 0.0117, "step": 190630 }, { "epoch": 1254.2105263157894, "grad_norm": 1.6436702013015747, "learning_rate": 0.0001, "loss": 0.0128, "step": 190640 }, { "epoch": 1254.2763157894738, "grad_norm": 1.5692013502120972, "learning_rate": 0.0001, "loss": 0.0159, "step": 190650 }, { "epoch": 1254.342105263158, "grad_norm": 1.5510470867156982, "learning_rate": 0.0001, "loss": 0.0088, "step": 190660 }, { "epoch": 1254.407894736842, "grad_norm": 1.2766847610473633, "learning_rate": 0.0001, "loss": 0.0143, "step": 190670 }, { "epoch": 1254.4736842105262, "grad_norm": 1.5815972089767456, "learning_rate": 0.0001, "loss": 0.0173, "step": 190680 }, { "epoch": 1254.5394736842106, "grad_norm": 1.4993146657943726, "learning_rate": 0.0001, "loss": 0.0088, "step": 190690 }, { "epoch": 1254.6052631578948, "grad_norm": 1.2724297046661377, "learning_rate": 0.0001, "loss": 0.0108, "step": 190700 }, { "epoch": 1254.671052631579, "grad_norm": 1.4601752758026123, "learning_rate": 0.0001, "loss": 0.0128, "step": 190710 }, { "epoch": 1254.7368421052631, "grad_norm": 1.8703995943069458, "learning_rate": 0.0001, "loss": 0.0122, "step": 190720 }, { "epoch": 1254.8026315789473, "grad_norm": 1.5326392650604248, "learning_rate": 0.0001, "loss": 0.0084, "step": 190730 }, { "epoch": 1254.8684210526317, "grad_norm": 1.3046648502349854, "learning_rate": 0.0001, "loss": 0.0094, "step": 190740 }, { "epoch": 1254.9342105263158, "grad_norm": 1.525443434715271, "learning_rate": 0.0001, "loss": 0.0101, "step": 190750 }, { "epoch": 1255.0, "grad_norm": 1.7460570335388184, "learning_rate": 0.0001, "loss": 0.01, "step": 190760 }, { "epoch": 1255.0657894736842, "grad_norm": 1.2808879613876343, "learning_rate": 0.0001, "loss": 0.0136, "step": 190770 }, { "epoch": 1255.1315789473683, "grad_norm": 1.5430388450622559, "learning_rate": 0.0001, "loss": 0.0131, "step": 190780 }, { "epoch": 1255.1973684210527, "grad_norm": 1.6189066171646118, "learning_rate": 0.0001, "loss": 0.0134, "step": 190790 }, { "epoch": 1255.2631578947369, "grad_norm": 1.6164370775222778, "learning_rate": 0.0001, "loss": 0.0103, "step": 190800 }, { "epoch": 1255.328947368421, "grad_norm": 1.899985432624817, "learning_rate": 0.0001, "loss": 0.0086, "step": 190810 }, { "epoch": 1255.3947368421052, "grad_norm": 1.5315253734588623, "learning_rate": 0.0001, "loss": 0.016, "step": 190820 }, { "epoch": 1255.4605263157894, "grad_norm": 1.1712099313735962, "learning_rate": 0.0001, "loss": 0.0117, "step": 190830 }, { "epoch": 1255.5263157894738, "grad_norm": 1.265657901763916, "learning_rate": 0.0001, "loss": 0.0108, "step": 190840 }, { "epoch": 1255.592105263158, "grad_norm": 0.8383251428604126, "learning_rate": 0.0001, "loss": 0.0119, "step": 190850 }, { "epoch": 1255.657894736842, "grad_norm": 1.153056263923645, "learning_rate": 0.0001, "loss": 0.0112, "step": 190860 }, { "epoch": 1255.7236842105262, "grad_norm": 1.4510115385055542, "learning_rate": 0.0001, "loss": 0.014, "step": 190870 }, { "epoch": 1255.7894736842106, "grad_norm": 1.6297136545181274, "learning_rate": 0.0001, "loss": 0.0107, "step": 190880 }, { "epoch": 1255.8552631578948, "grad_norm": 1.4515138864517212, "learning_rate": 0.0001, "loss": 0.0121, "step": 190890 }, { "epoch": 1255.921052631579, "grad_norm": 1.2852915525436401, "learning_rate": 0.0001, "loss": 0.0178, "step": 190900 }, { "epoch": 1255.9868421052631, "grad_norm": 1.3383771181106567, "learning_rate": 0.0001, "loss": 0.011, "step": 190910 }, { "epoch": 1256.0526315789473, "grad_norm": 1.5045208930969238, "learning_rate": 0.0001, "loss": 0.0114, "step": 190920 }, { "epoch": 1256.1184210526317, "grad_norm": 1.5198183059692383, "learning_rate": 0.0001, "loss": 0.0126, "step": 190930 }, { "epoch": 1256.1842105263158, "grad_norm": 1.4660662412643433, "learning_rate": 0.0001, "loss": 0.0105, "step": 190940 }, { "epoch": 1256.25, "grad_norm": 1.6902488470077515, "learning_rate": 0.0001, "loss": 0.0152, "step": 190950 }, { "epoch": 1256.3157894736842, "grad_norm": 1.6587859392166138, "learning_rate": 0.0001, "loss": 0.0114, "step": 190960 }, { "epoch": 1256.3815789473683, "grad_norm": 1.0616333484649658, "learning_rate": 0.0001, "loss": 0.0135, "step": 190970 }, { "epoch": 1256.4473684210527, "grad_norm": 1.5454760789871216, "learning_rate": 0.0001, "loss": 0.0135, "step": 190980 }, { "epoch": 1256.5131578947369, "grad_norm": 1.7550216913223267, "learning_rate": 0.0001, "loss": 0.0097, "step": 190990 }, { "epoch": 1256.578947368421, "grad_norm": 1.2714236974716187, "learning_rate": 0.0001, "loss": 0.0115, "step": 191000 }, { "epoch": 1256.6447368421052, "grad_norm": 1.5641555786132812, "learning_rate": 0.0001, "loss": 0.0119, "step": 191010 }, { "epoch": 1256.7105263157894, "grad_norm": 1.0524654388427734, "learning_rate": 0.0001, "loss": 0.0103, "step": 191020 }, { "epoch": 1256.7763157894738, "grad_norm": 1.3678069114685059, "learning_rate": 0.0001, "loss": 0.0136, "step": 191030 }, { "epoch": 1256.842105263158, "grad_norm": 1.2396842241287231, "learning_rate": 0.0001, "loss": 0.0117, "step": 191040 }, { "epoch": 1256.907894736842, "grad_norm": 1.2225215435028076, "learning_rate": 0.0001, "loss": 0.0123, "step": 191050 }, { "epoch": 1256.9736842105262, "grad_norm": 1.3660228252410889, "learning_rate": 0.0001, "loss": 0.0115, "step": 191060 }, { "epoch": 1257.0394736842106, "grad_norm": 1.6938291788101196, "learning_rate": 0.0001, "loss": 0.0195, "step": 191070 }, { "epoch": 1257.1052631578948, "grad_norm": 1.353825330734253, "learning_rate": 0.0001, "loss": 0.0116, "step": 191080 }, { "epoch": 1257.171052631579, "grad_norm": 1.3855561017990112, "learning_rate": 0.0001, "loss": 0.0119, "step": 191090 }, { "epoch": 1257.2368421052631, "grad_norm": 1.2827026844024658, "learning_rate": 0.0001, "loss": 0.0109, "step": 191100 }, { "epoch": 1257.3026315789473, "grad_norm": 1.53569757938385, "learning_rate": 0.0001, "loss": 0.0125, "step": 191110 }, { "epoch": 1257.3684210526317, "grad_norm": 1.626470685005188, "learning_rate": 0.0001, "loss": 0.0135, "step": 191120 }, { "epoch": 1257.4342105263158, "grad_norm": 1.454260230064392, "learning_rate": 0.0001, "loss": 0.0097, "step": 191130 }, { "epoch": 1257.5, "grad_norm": 1.2690001726150513, "learning_rate": 0.0001, "loss": 0.0168, "step": 191140 }, { "epoch": 1257.5657894736842, "grad_norm": 1.6682275533676147, "learning_rate": 0.0001, "loss": 0.0082, "step": 191150 }, { "epoch": 1257.6315789473683, "grad_norm": 1.3062965869903564, "learning_rate": 0.0001, "loss": 0.0121, "step": 191160 }, { "epoch": 1257.6973684210527, "grad_norm": 1.3287923336029053, "learning_rate": 0.0001, "loss": 0.0121, "step": 191170 }, { "epoch": 1257.7631578947369, "grad_norm": 1.7857649326324463, "learning_rate": 0.0001, "loss": 0.013, "step": 191180 }, { "epoch": 1257.828947368421, "grad_norm": 1.6642358303070068, "learning_rate": 0.0001, "loss": 0.0121, "step": 191190 }, { "epoch": 1257.8947368421052, "grad_norm": 1.5534014701843262, "learning_rate": 0.0001, "loss": 0.0099, "step": 191200 }, { "epoch": 1257.9605263157894, "grad_norm": 1.5319881439208984, "learning_rate": 0.0001, "loss": 0.0144, "step": 191210 }, { "epoch": 1258.0263157894738, "grad_norm": 1.2136201858520508, "learning_rate": 0.0001, "loss": 0.0118, "step": 191220 }, { "epoch": 1258.092105263158, "grad_norm": 1.6551669836044312, "learning_rate": 0.0001, "loss": 0.0112, "step": 191230 }, { "epoch": 1258.157894736842, "grad_norm": 1.7054789066314697, "learning_rate": 0.0001, "loss": 0.0137, "step": 191240 }, { "epoch": 1258.2236842105262, "grad_norm": 1.7269275188446045, "learning_rate": 0.0001, "loss": 0.0084, "step": 191250 }, { "epoch": 1258.2894736842106, "grad_norm": 1.426511526107788, "learning_rate": 0.0001, "loss": 0.0129, "step": 191260 }, { "epoch": 1258.3552631578948, "grad_norm": 1.4228838682174683, "learning_rate": 0.0001, "loss": 0.0128, "step": 191270 }, { "epoch": 1258.421052631579, "grad_norm": 1.2049365043640137, "learning_rate": 0.0001, "loss": 0.0124, "step": 191280 }, { "epoch": 1258.4868421052631, "grad_norm": 1.2521229982376099, "learning_rate": 0.0001, "loss": 0.0128, "step": 191290 }, { "epoch": 1258.5526315789473, "grad_norm": 1.5001434087753296, "learning_rate": 0.0001, "loss": 0.0111, "step": 191300 }, { "epoch": 1258.6184210526317, "grad_norm": 1.7611615657806396, "learning_rate": 0.0001, "loss": 0.0147, "step": 191310 }, { "epoch": 1258.6842105263158, "grad_norm": 1.9975165128707886, "learning_rate": 0.0001, "loss": 0.0126, "step": 191320 }, { "epoch": 1258.75, "grad_norm": 2.007824659347534, "learning_rate": 0.0001, "loss": 0.0119, "step": 191330 }, { "epoch": 1258.8157894736842, "grad_norm": 1.5822454690933228, "learning_rate": 0.0001, "loss": 0.0111, "step": 191340 }, { "epoch": 1258.8815789473683, "grad_norm": 1.3105270862579346, "learning_rate": 0.0001, "loss": 0.0098, "step": 191350 }, { "epoch": 1258.9473684210527, "grad_norm": 1.6545612812042236, "learning_rate": 0.0001, "loss": 0.0123, "step": 191360 }, { "epoch": 1259.0131578947369, "grad_norm": 1.278125524520874, "learning_rate": 0.0001, "loss": 0.0108, "step": 191370 }, { "epoch": 1259.078947368421, "grad_norm": 1.267965316772461, "learning_rate": 0.0001, "loss": 0.0136, "step": 191380 }, { "epoch": 1259.1447368421052, "grad_norm": 1.0020933151245117, "learning_rate": 0.0001, "loss": 0.0127, "step": 191390 }, { "epoch": 1259.2105263157894, "grad_norm": 1.2433074712753296, "learning_rate": 0.0001, "loss": 0.0132, "step": 191400 }, { "epoch": 1259.2763157894738, "grad_norm": 1.4634921550750732, "learning_rate": 0.0001, "loss": 0.0133, "step": 191410 }, { "epoch": 1259.342105263158, "grad_norm": 1.426160454750061, "learning_rate": 0.0001, "loss": 0.0098, "step": 191420 }, { "epoch": 1259.407894736842, "grad_norm": 1.4230906963348389, "learning_rate": 0.0001, "loss": 0.0119, "step": 191430 }, { "epoch": 1259.4736842105262, "grad_norm": 1.4460489749908447, "learning_rate": 0.0001, "loss": 0.0135, "step": 191440 }, { "epoch": 1259.5394736842106, "grad_norm": 1.5604302883148193, "learning_rate": 0.0001, "loss": 0.0116, "step": 191450 }, { "epoch": 1259.6052631578948, "grad_norm": 1.7160940170288086, "learning_rate": 0.0001, "loss": 0.0107, "step": 191460 }, { "epoch": 1259.671052631579, "grad_norm": 1.303029179573059, "learning_rate": 0.0001, "loss": 0.0134, "step": 191470 }, { "epoch": 1259.7368421052631, "grad_norm": 1.5148805379867554, "learning_rate": 0.0001, "loss": 0.0101, "step": 191480 }, { "epoch": 1259.8026315789473, "grad_norm": 1.215320110321045, "learning_rate": 0.0001, "loss": 0.0105, "step": 191490 }, { "epoch": 1259.8684210526317, "grad_norm": 1.4170331954956055, "learning_rate": 0.0001, "loss": 0.0095, "step": 191500 }, { "epoch": 1259.9342105263158, "grad_norm": 1.0388798713684082, "learning_rate": 0.0001, "loss": 0.0112, "step": 191510 }, { "epoch": 1260.0, "grad_norm": 1.2267718315124512, "learning_rate": 0.0001, "loss": 0.0104, "step": 191520 }, { "epoch": 1260.0657894736842, "grad_norm": 1.471057653427124, "learning_rate": 0.0001, "loss": 0.0131, "step": 191530 }, { "epoch": 1260.1315789473683, "grad_norm": 1.4447238445281982, "learning_rate": 0.0001, "loss": 0.0115, "step": 191540 }, { "epoch": 1260.1973684210527, "grad_norm": 1.6482760906219482, "learning_rate": 0.0001, "loss": 0.0102, "step": 191550 }, { "epoch": 1260.2631578947369, "grad_norm": 1.3965450525283813, "learning_rate": 0.0001, "loss": 0.0151, "step": 191560 }, { "epoch": 1260.328947368421, "grad_norm": 1.4942821264266968, "learning_rate": 0.0001, "loss": 0.0143, "step": 191570 }, { "epoch": 1260.3947368421052, "grad_norm": 1.6296881437301636, "learning_rate": 0.0001, "loss": 0.0129, "step": 191580 }, { "epoch": 1260.4605263157894, "grad_norm": 1.7367396354675293, "learning_rate": 0.0001, "loss": 0.0086, "step": 191590 }, { "epoch": 1260.5263157894738, "grad_norm": 1.8149445056915283, "learning_rate": 0.0001, "loss": 0.0124, "step": 191600 }, { "epoch": 1260.592105263158, "grad_norm": 1.7393450736999512, "learning_rate": 0.0001, "loss": 0.0125, "step": 191610 }, { "epoch": 1260.657894736842, "grad_norm": 1.8563323020935059, "learning_rate": 0.0001, "loss": 0.0116, "step": 191620 }, { "epoch": 1260.7236842105262, "grad_norm": 1.6682010889053345, "learning_rate": 0.0001, "loss": 0.01, "step": 191630 }, { "epoch": 1260.7894736842106, "grad_norm": 2.0320887565612793, "learning_rate": 0.0001, "loss": 0.0106, "step": 191640 }, { "epoch": 1260.8552631578948, "grad_norm": 2.7114739418029785, "learning_rate": 0.0001, "loss": 0.0115, "step": 191650 }, { "epoch": 1260.921052631579, "grad_norm": 2.2300217151641846, "learning_rate": 0.0001, "loss": 0.0142, "step": 191660 }, { "epoch": 1260.9868421052631, "grad_norm": 1.8059883117675781, "learning_rate": 0.0001, "loss": 0.0102, "step": 191670 }, { "epoch": 1261.0526315789473, "grad_norm": 1.7402253150939941, "learning_rate": 0.0001, "loss": 0.013, "step": 191680 }, { "epoch": 1261.1184210526317, "grad_norm": 1.3515090942382812, "learning_rate": 0.0001, "loss": 0.0103, "step": 191690 }, { "epoch": 1261.1842105263158, "grad_norm": 1.6627336740493774, "learning_rate": 0.0001, "loss": 0.0105, "step": 191700 }, { "epoch": 1261.25, "grad_norm": 1.637242078781128, "learning_rate": 0.0001, "loss": 0.0133, "step": 191710 }, { "epoch": 1261.3157894736842, "grad_norm": 1.4326914548873901, "learning_rate": 0.0001, "loss": 0.0097, "step": 191720 }, { "epoch": 1261.3815789473683, "grad_norm": 1.6895947456359863, "learning_rate": 0.0001, "loss": 0.0164, "step": 191730 }, { "epoch": 1261.4473684210527, "grad_norm": 1.7857955694198608, "learning_rate": 0.0001, "loss": 0.0128, "step": 191740 }, { "epoch": 1261.5131578947369, "grad_norm": 1.3858757019042969, "learning_rate": 0.0001, "loss": 0.0087, "step": 191750 }, { "epoch": 1261.578947368421, "grad_norm": 1.6404622793197632, "learning_rate": 0.0001, "loss": 0.0096, "step": 191760 }, { "epoch": 1261.6447368421052, "grad_norm": 1.6858524084091187, "learning_rate": 0.0001, "loss": 0.0123, "step": 191770 }, { "epoch": 1261.7105263157894, "grad_norm": 1.2182061672210693, "learning_rate": 0.0001, "loss": 0.0142, "step": 191780 }, { "epoch": 1261.7763157894738, "grad_norm": 1.0016107559204102, "learning_rate": 0.0001, "loss": 0.0083, "step": 191790 }, { "epoch": 1261.842105263158, "grad_norm": 1.194925308227539, "learning_rate": 0.0001, "loss": 0.0116, "step": 191800 }, { "epoch": 1261.907894736842, "grad_norm": 1.5095171928405762, "learning_rate": 0.0001, "loss": 0.0093, "step": 191810 }, { "epoch": 1261.9736842105262, "grad_norm": 1.7027227878570557, "learning_rate": 0.0001, "loss": 0.0102, "step": 191820 }, { "epoch": 1262.0394736842106, "grad_norm": 1.858562707901001, "learning_rate": 0.0001, "loss": 0.0117, "step": 191830 }, { "epoch": 1262.1052631578948, "grad_norm": 1.874465823173523, "learning_rate": 0.0001, "loss": 0.0113, "step": 191840 }, { "epoch": 1262.171052631579, "grad_norm": 1.2103296518325806, "learning_rate": 0.0001, "loss": 0.0096, "step": 191850 }, { "epoch": 1262.2368421052631, "grad_norm": 1.6160951852798462, "learning_rate": 0.0001, "loss": 0.0153, "step": 191860 }, { "epoch": 1262.3026315789473, "grad_norm": 1.3358789682388306, "learning_rate": 0.0001, "loss": 0.009, "step": 191870 }, { "epoch": 1262.3684210526317, "grad_norm": 1.1438720226287842, "learning_rate": 0.0001, "loss": 0.0106, "step": 191880 }, { "epoch": 1262.4342105263158, "grad_norm": 1.4197324514389038, "learning_rate": 0.0001, "loss": 0.0111, "step": 191890 }, { "epoch": 1262.5, "grad_norm": 1.3763989210128784, "learning_rate": 0.0001, "loss": 0.0097, "step": 191900 }, { "epoch": 1262.5657894736842, "grad_norm": 1.3714754581451416, "learning_rate": 0.0001, "loss": 0.014, "step": 191910 }, { "epoch": 1262.6315789473683, "grad_norm": 1.503989815711975, "learning_rate": 0.0001, "loss": 0.0139, "step": 191920 }, { "epoch": 1262.6973684210527, "grad_norm": 1.2050484418869019, "learning_rate": 0.0001, "loss": 0.0128, "step": 191930 }, { "epoch": 1262.7631578947369, "grad_norm": 1.1349623203277588, "learning_rate": 0.0001, "loss": 0.0132, "step": 191940 }, { "epoch": 1262.828947368421, "grad_norm": 1.102834939956665, "learning_rate": 0.0001, "loss": 0.0108, "step": 191950 }, { "epoch": 1262.8947368421052, "grad_norm": 0.8997935056686401, "learning_rate": 0.0001, "loss": 0.0124, "step": 191960 }, { "epoch": 1262.9605263157894, "grad_norm": 1.3189176321029663, "learning_rate": 0.0001, "loss": 0.012, "step": 191970 }, { "epoch": 1263.0263157894738, "grad_norm": 1.3184282779693604, "learning_rate": 0.0001, "loss": 0.0097, "step": 191980 }, { "epoch": 1263.092105263158, "grad_norm": 1.2052809000015259, "learning_rate": 0.0001, "loss": 0.0156, "step": 191990 }, { "epoch": 1263.157894736842, "grad_norm": 1.3716421127319336, "learning_rate": 0.0001, "loss": 0.0141, "step": 192000 }, { "epoch": 1263.2236842105262, "grad_norm": 1.5055348873138428, "learning_rate": 0.0001, "loss": 0.0136, "step": 192010 }, { "epoch": 1263.2894736842106, "grad_norm": 1.4107284545898438, "learning_rate": 0.0001, "loss": 0.0094, "step": 192020 }, { "epoch": 1263.3552631578948, "grad_norm": 1.4552310705184937, "learning_rate": 0.0001, "loss": 0.0104, "step": 192030 }, { "epoch": 1263.421052631579, "grad_norm": 1.4062234163284302, "learning_rate": 0.0001, "loss": 0.0109, "step": 192040 }, { "epoch": 1263.4868421052631, "grad_norm": 1.100051999092102, "learning_rate": 0.0001, "loss": 0.0082, "step": 192050 }, { "epoch": 1263.5526315789473, "grad_norm": 1.475049614906311, "learning_rate": 0.0001, "loss": 0.0104, "step": 192060 }, { "epoch": 1263.6184210526317, "grad_norm": 1.508169174194336, "learning_rate": 0.0001, "loss": 0.0107, "step": 192070 }, { "epoch": 1263.6842105263158, "grad_norm": 1.2549591064453125, "learning_rate": 0.0001, "loss": 0.0122, "step": 192080 }, { "epoch": 1263.75, "grad_norm": 1.2174192667007446, "learning_rate": 0.0001, "loss": 0.0119, "step": 192090 }, { "epoch": 1263.8157894736842, "grad_norm": 1.2091870307922363, "learning_rate": 0.0001, "loss": 0.0114, "step": 192100 }, { "epoch": 1263.8815789473683, "grad_norm": 1.3363373279571533, "learning_rate": 0.0001, "loss": 0.0126, "step": 192110 }, { "epoch": 1263.9473684210527, "grad_norm": 1.2405507564544678, "learning_rate": 0.0001, "loss": 0.0144, "step": 192120 }, { "epoch": 1264.0131578947369, "grad_norm": 0.9098891615867615, "learning_rate": 0.0001, "loss": 0.0123, "step": 192130 }, { "epoch": 1264.078947368421, "grad_norm": 0.9148688316345215, "learning_rate": 0.0001, "loss": 0.0107, "step": 192140 }, { "epoch": 1264.1447368421052, "grad_norm": 1.3301359415054321, "learning_rate": 0.0001, "loss": 0.0106, "step": 192150 }, { "epoch": 1264.2105263157894, "grad_norm": 1.4964561462402344, "learning_rate": 0.0001, "loss": 0.0101, "step": 192160 }, { "epoch": 1264.2763157894738, "grad_norm": 1.0365371704101562, "learning_rate": 0.0001, "loss": 0.0125, "step": 192170 }, { "epoch": 1264.342105263158, "grad_norm": 1.303623914718628, "learning_rate": 0.0001, "loss": 0.0127, "step": 192180 }, { "epoch": 1264.407894736842, "grad_norm": 1.8314601182937622, "learning_rate": 0.0001, "loss": 0.013, "step": 192190 }, { "epoch": 1264.4736842105262, "grad_norm": 1.726592779159546, "learning_rate": 0.0001, "loss": 0.0101, "step": 192200 }, { "epoch": 1264.5394736842106, "grad_norm": 1.2396094799041748, "learning_rate": 0.0001, "loss": 0.0098, "step": 192210 }, { "epoch": 1264.6052631578948, "grad_norm": 1.6071609258651733, "learning_rate": 0.0001, "loss": 0.0144, "step": 192220 }, { "epoch": 1264.671052631579, "grad_norm": 1.841871976852417, "learning_rate": 0.0001, "loss": 0.0096, "step": 192230 }, { "epoch": 1264.7368421052631, "grad_norm": 1.1426730155944824, "learning_rate": 0.0001, "loss": 0.0119, "step": 192240 }, { "epoch": 1264.8026315789473, "grad_norm": 1.5859407186508179, "learning_rate": 0.0001, "loss": 0.0135, "step": 192250 }, { "epoch": 1264.8684210526317, "grad_norm": 1.4320513010025024, "learning_rate": 0.0001, "loss": 0.0105, "step": 192260 }, { "epoch": 1264.9342105263158, "grad_norm": 1.3662606477737427, "learning_rate": 0.0001, "loss": 0.0147, "step": 192270 }, { "epoch": 1265.0, "grad_norm": 1.5245001316070557, "learning_rate": 0.0001, "loss": 0.011, "step": 192280 }, { "epoch": 1265.0657894736842, "grad_norm": 1.5755460262298584, "learning_rate": 0.0001, "loss": 0.014, "step": 192290 }, { "epoch": 1265.1315789473683, "grad_norm": 1.4086629152297974, "learning_rate": 0.0001, "loss": 0.0102, "step": 192300 }, { "epoch": 1265.1973684210527, "grad_norm": 1.9000595808029175, "learning_rate": 0.0001, "loss": 0.0098, "step": 192310 }, { "epoch": 1265.2631578947369, "grad_norm": 1.1683077812194824, "learning_rate": 0.0001, "loss": 0.0117, "step": 192320 }, { "epoch": 1265.328947368421, "grad_norm": 1.2609524726867676, "learning_rate": 0.0001, "loss": 0.0129, "step": 192330 }, { "epoch": 1265.3947368421052, "grad_norm": 1.3410248756408691, "learning_rate": 0.0001, "loss": 0.0136, "step": 192340 }, { "epoch": 1265.4605263157894, "grad_norm": 1.29062819480896, "learning_rate": 0.0001, "loss": 0.0118, "step": 192350 }, { "epoch": 1265.5263157894738, "grad_norm": 0.9727499485015869, "learning_rate": 0.0001, "loss": 0.0093, "step": 192360 }, { "epoch": 1265.592105263158, "grad_norm": 1.520969271659851, "learning_rate": 0.0001, "loss": 0.0119, "step": 192370 }, { "epoch": 1265.657894736842, "grad_norm": 1.5251214504241943, "learning_rate": 0.0001, "loss": 0.013, "step": 192380 }, { "epoch": 1265.7236842105262, "grad_norm": 1.9422893524169922, "learning_rate": 0.0001, "loss": 0.0103, "step": 192390 }, { "epoch": 1265.7894736842106, "grad_norm": 1.0773531198501587, "learning_rate": 0.0001, "loss": 0.0129, "step": 192400 }, { "epoch": 1265.8552631578948, "grad_norm": 1.306951880455017, "learning_rate": 0.0001, "loss": 0.0104, "step": 192410 }, { "epoch": 1265.921052631579, "grad_norm": 1.4634400606155396, "learning_rate": 0.0001, "loss": 0.012, "step": 192420 }, { "epoch": 1265.9868421052631, "grad_norm": 1.9158499240875244, "learning_rate": 0.0001, "loss": 0.0093, "step": 192430 }, { "epoch": 1266.0526315789473, "grad_norm": 1.3879451751708984, "learning_rate": 0.0001, "loss": 0.0118, "step": 192440 }, { "epoch": 1266.1184210526317, "grad_norm": 2.0075926780700684, "learning_rate": 0.0001, "loss": 0.0114, "step": 192450 }, { "epoch": 1266.1842105263158, "grad_norm": 1.6351969242095947, "learning_rate": 0.0001, "loss": 0.0101, "step": 192460 }, { "epoch": 1266.25, "grad_norm": 1.3880047798156738, "learning_rate": 0.0001, "loss": 0.0103, "step": 192470 }, { "epoch": 1266.3157894736842, "grad_norm": 1.684583306312561, "learning_rate": 0.0001, "loss": 0.0127, "step": 192480 }, { "epoch": 1266.3815789473683, "grad_norm": 1.2137985229492188, "learning_rate": 0.0001, "loss": 0.0139, "step": 192490 }, { "epoch": 1266.4473684210527, "grad_norm": 1.3090797662734985, "learning_rate": 0.0001, "loss": 0.0109, "step": 192500 }, { "epoch": 1266.5131578947369, "grad_norm": 1.4489542245864868, "learning_rate": 0.0001, "loss": 0.0081, "step": 192510 }, { "epoch": 1266.578947368421, "grad_norm": 1.5294829607009888, "learning_rate": 0.0001, "loss": 0.0118, "step": 192520 }, { "epoch": 1266.6447368421052, "grad_norm": 1.5496721267700195, "learning_rate": 0.0001, "loss": 0.0121, "step": 192530 }, { "epoch": 1266.7105263157894, "grad_norm": 1.6464624404907227, "learning_rate": 0.0001, "loss": 0.0087, "step": 192540 }, { "epoch": 1266.7763157894738, "grad_norm": 1.2911632061004639, "learning_rate": 0.0001, "loss": 0.013, "step": 192550 }, { "epoch": 1266.842105263158, "grad_norm": 1.6624919176101685, "learning_rate": 0.0001, "loss": 0.0137, "step": 192560 }, { "epoch": 1266.907894736842, "grad_norm": 1.1178282499313354, "learning_rate": 0.0001, "loss": 0.0135, "step": 192570 }, { "epoch": 1266.9736842105262, "grad_norm": 1.221545934677124, "learning_rate": 0.0001, "loss": 0.0124, "step": 192580 }, { "epoch": 1267.0394736842106, "grad_norm": 1.4132145643234253, "learning_rate": 0.0001, "loss": 0.0115, "step": 192590 }, { "epoch": 1267.1052631578948, "grad_norm": 1.3228265047073364, "learning_rate": 0.0001, "loss": 0.0081, "step": 192600 }, { "epoch": 1267.171052631579, "grad_norm": 1.2603497505187988, "learning_rate": 0.0001, "loss": 0.0103, "step": 192610 }, { "epoch": 1267.2368421052631, "grad_norm": 1.4889984130859375, "learning_rate": 0.0001, "loss": 0.01, "step": 192620 }, { "epoch": 1267.3026315789473, "grad_norm": 1.0667765140533447, "learning_rate": 0.0001, "loss": 0.0094, "step": 192630 }, { "epoch": 1267.3684210526317, "grad_norm": 1.4012938737869263, "learning_rate": 0.0001, "loss": 0.0128, "step": 192640 }, { "epoch": 1267.4342105263158, "grad_norm": 1.1279754638671875, "learning_rate": 0.0001, "loss": 0.0092, "step": 192650 }, { "epoch": 1267.5, "grad_norm": 1.086767315864563, "learning_rate": 0.0001, "loss": 0.0112, "step": 192660 }, { "epoch": 1267.5657894736842, "grad_norm": 1.0166661739349365, "learning_rate": 0.0001, "loss": 0.0115, "step": 192670 }, { "epoch": 1267.6315789473683, "grad_norm": 1.3487985134124756, "learning_rate": 0.0001, "loss": 0.0132, "step": 192680 }, { "epoch": 1267.6973684210527, "grad_norm": 1.2115246057510376, "learning_rate": 0.0001, "loss": 0.0125, "step": 192690 }, { "epoch": 1267.7631578947369, "grad_norm": 1.8769789934158325, "learning_rate": 0.0001, "loss": 0.0126, "step": 192700 }, { "epoch": 1267.828947368421, "grad_norm": 1.354053020477295, "learning_rate": 0.0001, "loss": 0.0137, "step": 192710 }, { "epoch": 1267.8947368421052, "grad_norm": 1.8558393716812134, "learning_rate": 0.0001, "loss": 0.0149, "step": 192720 }, { "epoch": 1267.9605263157894, "grad_norm": 1.2391917705535889, "learning_rate": 0.0001, "loss": 0.0167, "step": 192730 }, { "epoch": 1268.0263157894738, "grad_norm": 1.661022663116455, "learning_rate": 0.0001, "loss": 0.0118, "step": 192740 }, { "epoch": 1268.092105263158, "grad_norm": 1.0947152376174927, "learning_rate": 0.0001, "loss": 0.0138, "step": 192750 }, { "epoch": 1268.157894736842, "grad_norm": 1.153960108757019, "learning_rate": 0.0001, "loss": 0.0102, "step": 192760 }, { "epoch": 1268.2236842105262, "grad_norm": 1.313031554222107, "learning_rate": 0.0001, "loss": 0.013, "step": 192770 }, { "epoch": 1268.2894736842106, "grad_norm": 1.2353712320327759, "learning_rate": 0.0001, "loss": 0.0098, "step": 192780 }, { "epoch": 1268.3552631578948, "grad_norm": 1.2444449663162231, "learning_rate": 0.0001, "loss": 0.0114, "step": 192790 }, { "epoch": 1268.421052631579, "grad_norm": 1.4239391088485718, "learning_rate": 0.0001, "loss": 0.0103, "step": 192800 }, { "epoch": 1268.4868421052631, "grad_norm": 1.2565033435821533, "learning_rate": 0.0001, "loss": 0.0119, "step": 192810 }, { "epoch": 1268.5526315789473, "grad_norm": 1.1818509101867676, "learning_rate": 0.0001, "loss": 0.0159, "step": 192820 }, { "epoch": 1268.6184210526317, "grad_norm": 1.3147032260894775, "learning_rate": 0.0001, "loss": 0.0124, "step": 192830 }, { "epoch": 1268.6842105263158, "grad_norm": 0.925413191318512, "learning_rate": 0.0001, "loss": 0.0103, "step": 192840 }, { "epoch": 1268.75, "grad_norm": 1.5169589519500732, "learning_rate": 0.0001, "loss": 0.0087, "step": 192850 }, { "epoch": 1268.8157894736842, "grad_norm": 1.3577513694763184, "learning_rate": 0.0001, "loss": 0.0124, "step": 192860 }, { "epoch": 1268.8815789473683, "grad_norm": 1.2797380685806274, "learning_rate": 0.0001, "loss": 0.0125, "step": 192870 }, { "epoch": 1268.9473684210527, "grad_norm": 1.5020214319229126, "learning_rate": 0.0001, "loss": 0.014, "step": 192880 }, { "epoch": 1269.0131578947369, "grad_norm": 1.1494967937469482, "learning_rate": 0.0001, "loss": 0.0132, "step": 192890 }, { "epoch": 1269.078947368421, "grad_norm": 1.4326987266540527, "learning_rate": 0.0001, "loss": 0.0081, "step": 192900 }, { "epoch": 1269.1447368421052, "grad_norm": 1.5607494115829468, "learning_rate": 0.0001, "loss": 0.0113, "step": 192910 }, { "epoch": 1269.2105263157894, "grad_norm": 1.0457606315612793, "learning_rate": 0.0001, "loss": 0.01, "step": 192920 }, { "epoch": 1269.2763157894738, "grad_norm": 1.4826918840408325, "learning_rate": 0.0001, "loss": 0.0142, "step": 192930 }, { "epoch": 1269.342105263158, "grad_norm": 1.606326699256897, "learning_rate": 0.0001, "loss": 0.0116, "step": 192940 }, { "epoch": 1269.407894736842, "grad_norm": 1.4485268592834473, "learning_rate": 0.0001, "loss": 0.0128, "step": 192950 }, { "epoch": 1269.4736842105262, "grad_norm": 1.6782457828521729, "learning_rate": 0.0001, "loss": 0.0098, "step": 192960 }, { "epoch": 1269.5394736842106, "grad_norm": 1.675614356994629, "learning_rate": 0.0001, "loss": 0.0102, "step": 192970 }, { "epoch": 1269.6052631578948, "grad_norm": 1.8058358430862427, "learning_rate": 0.0001, "loss": 0.0082, "step": 192980 }, { "epoch": 1269.671052631579, "grad_norm": 1.391639232635498, "learning_rate": 0.0001, "loss": 0.0152, "step": 192990 }, { "epoch": 1269.7368421052631, "grad_norm": 1.2373005151748657, "learning_rate": 0.0001, "loss": 0.0193, "step": 193000 }, { "epoch": 1269.8026315789473, "grad_norm": 1.2821376323699951, "learning_rate": 0.0001, "loss": 0.011, "step": 193010 }, { "epoch": 1269.8684210526317, "grad_norm": 1.008122444152832, "learning_rate": 0.0001, "loss": 0.0107, "step": 193020 }, { "epoch": 1269.9342105263158, "grad_norm": 1.3900279998779297, "learning_rate": 0.0001, "loss": 0.0119, "step": 193030 }, { "epoch": 1270.0, "grad_norm": 1.8195942640304565, "learning_rate": 0.0001, "loss": 0.0127, "step": 193040 }, { "epoch": 1270.0657894736842, "grad_norm": 1.494661569595337, "learning_rate": 0.0001, "loss": 0.0134, "step": 193050 }, { "epoch": 1270.1315789473683, "grad_norm": 1.4063525199890137, "learning_rate": 0.0001, "loss": 0.013, "step": 193060 }, { "epoch": 1270.1973684210527, "grad_norm": 1.0986353158950806, "learning_rate": 0.0001, "loss": 0.0127, "step": 193070 }, { "epoch": 1270.2631578947369, "grad_norm": 1.1659358739852905, "learning_rate": 0.0001, "loss": 0.0126, "step": 193080 }, { "epoch": 1270.328947368421, "grad_norm": 1.3729950189590454, "learning_rate": 0.0001, "loss": 0.0104, "step": 193090 }, { "epoch": 1270.3947368421052, "grad_norm": 1.477184772491455, "learning_rate": 0.0001, "loss": 0.0084, "step": 193100 }, { "epoch": 1270.4605263157894, "grad_norm": 1.854168176651001, "learning_rate": 0.0001, "loss": 0.0109, "step": 193110 }, { "epoch": 1270.5263157894738, "grad_norm": 1.6690055131912231, "learning_rate": 0.0001, "loss": 0.0101, "step": 193120 }, { "epoch": 1270.592105263158, "grad_norm": 1.5198184251785278, "learning_rate": 0.0001, "loss": 0.0126, "step": 193130 }, { "epoch": 1270.657894736842, "grad_norm": 1.4615118503570557, "learning_rate": 0.0001, "loss": 0.0122, "step": 193140 }, { "epoch": 1270.7236842105262, "grad_norm": 1.3408520221710205, "learning_rate": 0.0001, "loss": 0.0087, "step": 193150 }, { "epoch": 1270.7894736842106, "grad_norm": 1.0863430500030518, "learning_rate": 0.0001, "loss": 0.0122, "step": 193160 }, { "epoch": 1270.8552631578948, "grad_norm": 1.3029648065567017, "learning_rate": 0.0001, "loss": 0.0157, "step": 193170 }, { "epoch": 1270.921052631579, "grad_norm": 1.4733997583389282, "learning_rate": 0.0001, "loss": 0.0126, "step": 193180 }, { "epoch": 1270.9868421052631, "grad_norm": 1.940772533416748, "learning_rate": 0.0001, "loss": 0.0114, "step": 193190 }, { "epoch": 1271.0526315789473, "grad_norm": 1.8459850549697876, "learning_rate": 0.0001, "loss": 0.011, "step": 193200 }, { "epoch": 1271.1184210526317, "grad_norm": 1.8570476770401, "learning_rate": 0.0001, "loss": 0.0157, "step": 193210 }, { "epoch": 1271.1842105263158, "grad_norm": 1.5898140668869019, "learning_rate": 0.0001, "loss": 0.0144, "step": 193220 }, { "epoch": 1271.25, "grad_norm": 1.5548959970474243, "learning_rate": 0.0001, "loss": 0.0091, "step": 193230 }, { "epoch": 1271.3157894736842, "grad_norm": 1.5370464324951172, "learning_rate": 0.0001, "loss": 0.0128, "step": 193240 }, { "epoch": 1271.3815789473683, "grad_norm": 1.3661730289459229, "learning_rate": 0.0001, "loss": 0.013, "step": 193250 }, { "epoch": 1271.4473684210527, "grad_norm": 1.5332212448120117, "learning_rate": 0.0001, "loss": 0.0127, "step": 193260 }, { "epoch": 1271.5131578947369, "grad_norm": 1.8114264011383057, "learning_rate": 0.0001, "loss": 0.0103, "step": 193270 }, { "epoch": 1271.578947368421, "grad_norm": 1.436700463294983, "learning_rate": 0.0001, "loss": 0.0102, "step": 193280 }, { "epoch": 1271.6447368421052, "grad_norm": 1.9243316650390625, "learning_rate": 0.0001, "loss": 0.0102, "step": 193290 }, { "epoch": 1271.7105263157894, "grad_norm": 1.2795900106430054, "learning_rate": 0.0001, "loss": 0.0095, "step": 193300 }, { "epoch": 1271.7763157894738, "grad_norm": 1.3168522119522095, "learning_rate": 0.0001, "loss": 0.0112, "step": 193310 }, { "epoch": 1271.842105263158, "grad_norm": 1.1062508821487427, "learning_rate": 0.0001, "loss": 0.0086, "step": 193320 }, { "epoch": 1271.907894736842, "grad_norm": 1.196282982826233, "learning_rate": 0.0001, "loss": 0.0114, "step": 193330 }, { "epoch": 1271.9736842105262, "grad_norm": 1.4630215167999268, "learning_rate": 0.0001, "loss": 0.0141, "step": 193340 }, { "epoch": 1272.0394736842106, "grad_norm": 1.4132589101791382, "learning_rate": 0.0001, "loss": 0.0129, "step": 193350 }, { "epoch": 1272.1052631578948, "grad_norm": 1.2961416244506836, "learning_rate": 0.0001, "loss": 0.0119, "step": 193360 }, { "epoch": 1272.171052631579, "grad_norm": 1.410634994506836, "learning_rate": 0.0001, "loss": 0.0111, "step": 193370 }, { "epoch": 1272.2368421052631, "grad_norm": 1.6290030479431152, "learning_rate": 0.0001, "loss": 0.0113, "step": 193380 }, { "epoch": 1272.3026315789473, "grad_norm": 1.4977837800979614, "learning_rate": 0.0001, "loss": 0.0104, "step": 193390 }, { "epoch": 1272.3684210526317, "grad_norm": 1.3355638980865479, "learning_rate": 0.0001, "loss": 0.0102, "step": 193400 }, { "epoch": 1272.4342105263158, "grad_norm": 1.4426878690719604, "learning_rate": 0.0001, "loss": 0.0103, "step": 193410 }, { "epoch": 1272.5, "grad_norm": 1.6184237003326416, "learning_rate": 0.0001, "loss": 0.0129, "step": 193420 }, { "epoch": 1272.5657894736842, "grad_norm": 1.5555498600006104, "learning_rate": 0.0001, "loss": 0.0125, "step": 193430 }, { "epoch": 1272.6315789473683, "grad_norm": 1.7550380229949951, "learning_rate": 0.0001, "loss": 0.0157, "step": 193440 }, { "epoch": 1272.6973684210527, "grad_norm": 1.8884336948394775, "learning_rate": 0.0001, "loss": 0.0088, "step": 193450 }, { "epoch": 1272.7631578947369, "grad_norm": 1.698150873184204, "learning_rate": 0.0001, "loss": 0.0114, "step": 193460 }, { "epoch": 1272.828947368421, "grad_norm": 1.375796914100647, "learning_rate": 0.0001, "loss": 0.0105, "step": 193470 }, { "epoch": 1272.8947368421052, "grad_norm": 1.695778489112854, "learning_rate": 0.0001, "loss": 0.0126, "step": 193480 }, { "epoch": 1272.9605263157894, "grad_norm": 1.521026372909546, "learning_rate": 0.0001, "loss": 0.0126, "step": 193490 }, { "epoch": 1273.0263157894738, "grad_norm": 1.209970235824585, "learning_rate": 0.0001, "loss": 0.0098, "step": 193500 }, { "epoch": 1273.092105263158, "grad_norm": 1.4713468551635742, "learning_rate": 0.0001, "loss": 0.0079, "step": 193510 }, { "epoch": 1273.157894736842, "grad_norm": 1.365129828453064, "learning_rate": 0.0001, "loss": 0.0112, "step": 193520 }, { "epoch": 1273.2236842105262, "grad_norm": 1.0996628999710083, "learning_rate": 0.0001, "loss": 0.0117, "step": 193530 }, { "epoch": 1273.2894736842106, "grad_norm": 1.3214454650878906, "learning_rate": 0.0001, "loss": 0.0111, "step": 193540 }, { "epoch": 1273.3552631578948, "grad_norm": 1.468995213508606, "learning_rate": 0.0001, "loss": 0.0126, "step": 193550 }, { "epoch": 1273.421052631579, "grad_norm": 1.4067988395690918, "learning_rate": 0.0001, "loss": 0.014, "step": 193560 }, { "epoch": 1273.4868421052631, "grad_norm": 1.3602898120880127, "learning_rate": 0.0001, "loss": 0.0128, "step": 193570 }, { "epoch": 1273.5526315789473, "grad_norm": 1.7130556106567383, "learning_rate": 0.0001, "loss": 0.0126, "step": 193580 }, { "epoch": 1273.6184210526317, "grad_norm": 1.4610404968261719, "learning_rate": 0.0001, "loss": 0.0122, "step": 193590 }, { "epoch": 1273.6842105263158, "grad_norm": 1.4654154777526855, "learning_rate": 0.0001, "loss": 0.0111, "step": 193600 }, { "epoch": 1273.75, "grad_norm": 1.2223594188690186, "learning_rate": 0.0001, "loss": 0.0122, "step": 193610 }, { "epoch": 1273.8157894736842, "grad_norm": 1.4926629066467285, "learning_rate": 0.0001, "loss": 0.0146, "step": 193620 }, { "epoch": 1273.8815789473683, "grad_norm": 1.3783537149429321, "learning_rate": 0.0001, "loss": 0.0109, "step": 193630 }, { "epoch": 1273.9473684210527, "grad_norm": 1.3036952018737793, "learning_rate": 0.0001, "loss": 0.0116, "step": 193640 }, { "epoch": 1274.0131578947369, "grad_norm": 1.3284658193588257, "learning_rate": 0.0001, "loss": 0.0123, "step": 193650 }, { "epoch": 1274.078947368421, "grad_norm": 1.5519990921020508, "learning_rate": 0.0001, "loss": 0.0087, "step": 193660 }, { "epoch": 1274.1447368421052, "grad_norm": 1.2357844114303589, "learning_rate": 0.0001, "loss": 0.0124, "step": 193670 }, { "epoch": 1274.2105263157894, "grad_norm": 1.42560613155365, "learning_rate": 0.0001, "loss": 0.0107, "step": 193680 }, { "epoch": 1274.2763157894738, "grad_norm": 1.3909744024276733, "learning_rate": 0.0001, "loss": 0.0129, "step": 193690 }, { "epoch": 1274.342105263158, "grad_norm": 1.2642546892166138, "learning_rate": 0.0001, "loss": 0.0091, "step": 193700 }, { "epoch": 1274.407894736842, "grad_norm": 1.6007391214370728, "learning_rate": 0.0001, "loss": 0.0093, "step": 193710 }, { "epoch": 1274.4736842105262, "grad_norm": 1.7262052297592163, "learning_rate": 0.0001, "loss": 0.0146, "step": 193720 }, { "epoch": 1274.5394736842106, "grad_norm": 1.4193522930145264, "learning_rate": 0.0001, "loss": 0.0086, "step": 193730 }, { "epoch": 1274.6052631578948, "grad_norm": 1.518455147743225, "learning_rate": 0.0001, "loss": 0.0156, "step": 193740 }, { "epoch": 1274.671052631579, "grad_norm": 1.5624829530715942, "learning_rate": 0.0001, "loss": 0.0082, "step": 193750 }, { "epoch": 1274.7368421052631, "grad_norm": 1.6415040493011475, "learning_rate": 0.0001, "loss": 0.0108, "step": 193760 }, { "epoch": 1274.8026315789473, "grad_norm": 1.4885714054107666, "learning_rate": 0.0001, "loss": 0.0155, "step": 193770 }, { "epoch": 1274.8684210526317, "grad_norm": 1.7826919555664062, "learning_rate": 0.0001, "loss": 0.013, "step": 193780 }, { "epoch": 1274.9342105263158, "grad_norm": 1.2037490606307983, "learning_rate": 0.0001, "loss": 0.0147, "step": 193790 }, { "epoch": 1275.0, "grad_norm": 1.172440767288208, "learning_rate": 0.0001, "loss": 0.0121, "step": 193800 }, { "epoch": 1275.0657894736842, "grad_norm": 1.6881886720657349, "learning_rate": 0.0001, "loss": 0.0109, "step": 193810 }, { "epoch": 1275.1315789473683, "grad_norm": 0.9996401071548462, "learning_rate": 0.0001, "loss": 0.0119, "step": 193820 }, { "epoch": 1275.1973684210527, "grad_norm": 1.3741120100021362, "learning_rate": 0.0001, "loss": 0.0125, "step": 193830 }, { "epoch": 1275.2631578947369, "grad_norm": 1.1436951160430908, "learning_rate": 0.0001, "loss": 0.0105, "step": 193840 }, { "epoch": 1275.328947368421, "grad_norm": 1.3189871311187744, "learning_rate": 0.0001, "loss": 0.011, "step": 193850 }, { "epoch": 1275.3947368421052, "grad_norm": 1.2861653566360474, "learning_rate": 0.0001, "loss": 0.0105, "step": 193860 }, { "epoch": 1275.4605263157894, "grad_norm": 1.4157824516296387, "learning_rate": 0.0001, "loss": 0.0136, "step": 193870 }, { "epoch": 1275.5263157894738, "grad_norm": 1.4549120664596558, "learning_rate": 0.0001, "loss": 0.0095, "step": 193880 }, { "epoch": 1275.592105263158, "grad_norm": 1.9997326135635376, "learning_rate": 0.0001, "loss": 0.0086, "step": 193890 }, { "epoch": 1275.657894736842, "grad_norm": 1.9062570333480835, "learning_rate": 0.0001, "loss": 0.011, "step": 193900 }, { "epoch": 1275.7236842105262, "grad_norm": 1.5876567363739014, "learning_rate": 0.0001, "loss": 0.0097, "step": 193910 }, { "epoch": 1275.7894736842106, "grad_norm": 1.469223976135254, "learning_rate": 0.0001, "loss": 0.013, "step": 193920 }, { "epoch": 1275.8552631578948, "grad_norm": 1.4488425254821777, "learning_rate": 0.0001, "loss": 0.0136, "step": 193930 }, { "epoch": 1275.921052631579, "grad_norm": 1.5092676877975464, "learning_rate": 0.0001, "loss": 0.0134, "step": 193940 }, { "epoch": 1275.9868421052631, "grad_norm": 1.8818596601486206, "learning_rate": 0.0001, "loss": 0.0144, "step": 193950 }, { "epoch": 1276.0526315789473, "grad_norm": 1.68748939037323, "learning_rate": 0.0001, "loss": 0.0119, "step": 193960 }, { "epoch": 1276.1184210526317, "grad_norm": 1.9196990728378296, "learning_rate": 0.0001, "loss": 0.0119, "step": 193970 }, { "epoch": 1276.1842105263158, "grad_norm": 1.5050196647644043, "learning_rate": 0.0001, "loss": 0.0141, "step": 193980 }, { "epoch": 1276.25, "grad_norm": 1.1322920322418213, "learning_rate": 0.0001, "loss": 0.0102, "step": 193990 }, { "epoch": 1276.3157894736842, "grad_norm": 1.6208046674728394, "learning_rate": 0.0001, "loss": 0.0089, "step": 194000 }, { "epoch": 1276.3815789473683, "grad_norm": 1.3068652153015137, "learning_rate": 0.0001, "loss": 0.0117, "step": 194010 }, { "epoch": 1276.4473684210527, "grad_norm": 1.788398027420044, "learning_rate": 0.0001, "loss": 0.0131, "step": 194020 }, { "epoch": 1276.5131578947369, "grad_norm": 1.3241409063339233, "learning_rate": 0.0001, "loss": 0.01, "step": 194030 }, { "epoch": 1276.578947368421, "grad_norm": 2.0177383422851562, "learning_rate": 0.0001, "loss": 0.0106, "step": 194040 }, { "epoch": 1276.6447368421052, "grad_norm": 1.4328066110610962, "learning_rate": 0.0001, "loss": 0.0119, "step": 194050 }, { "epoch": 1276.7105263157894, "grad_norm": 1.4020713567733765, "learning_rate": 0.0001, "loss": 0.0113, "step": 194060 }, { "epoch": 1276.7763157894738, "grad_norm": 1.486399531364441, "learning_rate": 0.0001, "loss": 0.0125, "step": 194070 }, { "epoch": 1276.842105263158, "grad_norm": 1.196495532989502, "learning_rate": 0.0001, "loss": 0.0136, "step": 194080 }, { "epoch": 1276.907894736842, "grad_norm": 1.0819227695465088, "learning_rate": 0.0001, "loss": 0.0115, "step": 194090 }, { "epoch": 1276.9736842105262, "grad_norm": 1.705570936203003, "learning_rate": 0.0001, "loss": 0.0094, "step": 194100 }, { "epoch": 1277.0394736842106, "grad_norm": 1.2824360132217407, "learning_rate": 0.0001, "loss": 0.0101, "step": 194110 }, { "epoch": 1277.1052631578948, "grad_norm": 1.289859652519226, "learning_rate": 0.0001, "loss": 0.009, "step": 194120 }, { "epoch": 1277.171052631579, "grad_norm": 1.4174752235412598, "learning_rate": 0.0001, "loss": 0.0118, "step": 194130 }, { "epoch": 1277.2368421052631, "grad_norm": 1.307294249534607, "learning_rate": 0.0001, "loss": 0.0158, "step": 194140 }, { "epoch": 1277.3026315789473, "grad_norm": 1.3318461179733276, "learning_rate": 0.0001, "loss": 0.009, "step": 194150 }, { "epoch": 1277.3684210526317, "grad_norm": 1.633668065071106, "learning_rate": 0.0001, "loss": 0.0133, "step": 194160 }, { "epoch": 1277.4342105263158, "grad_norm": 1.9268498420715332, "learning_rate": 0.0001, "loss": 0.0107, "step": 194170 }, { "epoch": 1277.5, "grad_norm": 1.6397942304611206, "learning_rate": 0.0001, "loss": 0.0109, "step": 194180 }, { "epoch": 1277.5657894736842, "grad_norm": 1.0934667587280273, "learning_rate": 0.0001, "loss": 0.0105, "step": 194190 }, { "epoch": 1277.6315789473683, "grad_norm": 1.3711780309677124, "learning_rate": 0.0001, "loss": 0.0138, "step": 194200 }, { "epoch": 1277.6973684210527, "grad_norm": 1.4456249475479126, "learning_rate": 0.0001, "loss": 0.0133, "step": 194210 }, { "epoch": 1277.7631578947369, "grad_norm": 1.7314233779907227, "learning_rate": 0.0001, "loss": 0.0084, "step": 194220 }, { "epoch": 1277.828947368421, "grad_norm": 1.348433256149292, "learning_rate": 0.0001, "loss": 0.0104, "step": 194230 }, { "epoch": 1277.8947368421052, "grad_norm": 1.095196008682251, "learning_rate": 0.0001, "loss": 0.013, "step": 194240 }, { "epoch": 1277.9605263157894, "grad_norm": 1.5867928266525269, "learning_rate": 0.0001, "loss": 0.0129, "step": 194250 }, { "epoch": 1278.0263157894738, "grad_norm": 1.3254761695861816, "learning_rate": 0.0001, "loss": 0.0117, "step": 194260 }, { "epoch": 1278.092105263158, "grad_norm": 1.4453281164169312, "learning_rate": 0.0001, "loss": 0.0099, "step": 194270 }, { "epoch": 1278.157894736842, "grad_norm": 1.633110761642456, "learning_rate": 0.0001, "loss": 0.0098, "step": 194280 }, { "epoch": 1278.2236842105262, "grad_norm": 1.1296957731246948, "learning_rate": 0.0001, "loss": 0.0121, "step": 194290 }, { "epoch": 1278.2894736842106, "grad_norm": 1.2077959775924683, "learning_rate": 0.0001, "loss": 0.0135, "step": 194300 }, { "epoch": 1278.3552631578948, "grad_norm": 1.2093229293823242, "learning_rate": 0.0001, "loss": 0.0156, "step": 194310 }, { "epoch": 1278.421052631579, "grad_norm": 1.3385474681854248, "learning_rate": 0.0001, "loss": 0.0101, "step": 194320 }, { "epoch": 1278.4868421052631, "grad_norm": 1.3307387828826904, "learning_rate": 0.0001, "loss": 0.0149, "step": 194330 }, { "epoch": 1278.5526315789473, "grad_norm": 1.0651873350143433, "learning_rate": 0.0001, "loss": 0.0145, "step": 194340 }, { "epoch": 1278.6184210526317, "grad_norm": 1.12366783618927, "learning_rate": 0.0001, "loss": 0.0096, "step": 194350 }, { "epoch": 1278.6842105263158, "grad_norm": 1.1269867420196533, "learning_rate": 0.0001, "loss": 0.0111, "step": 194360 }, { "epoch": 1278.75, "grad_norm": 1.313978910446167, "learning_rate": 0.0001, "loss": 0.0097, "step": 194370 }, { "epoch": 1278.8157894736842, "grad_norm": 1.3342114686965942, "learning_rate": 0.0001, "loss": 0.014, "step": 194380 }, { "epoch": 1278.8815789473683, "grad_norm": 1.3443326950073242, "learning_rate": 0.0001, "loss": 0.0118, "step": 194390 }, { "epoch": 1278.9473684210527, "grad_norm": 1.286808967590332, "learning_rate": 0.0001, "loss": 0.0115, "step": 194400 }, { "epoch": 1279.0131578947369, "grad_norm": 1.528770089149475, "learning_rate": 0.0001, "loss": 0.0116, "step": 194410 }, { "epoch": 1279.078947368421, "grad_norm": 1.3164819478988647, "learning_rate": 0.0001, "loss": 0.0128, "step": 194420 }, { "epoch": 1279.1447368421052, "grad_norm": 1.546485424041748, "learning_rate": 0.0001, "loss": 0.0115, "step": 194430 }, { "epoch": 1279.2105263157894, "grad_norm": 1.4799854755401611, "learning_rate": 0.0001, "loss": 0.0108, "step": 194440 }, { "epoch": 1279.2763157894738, "grad_norm": 1.301312804222107, "learning_rate": 0.0001, "loss": 0.0109, "step": 194450 }, { "epoch": 1279.342105263158, "grad_norm": 1.694224238395691, "learning_rate": 0.0001, "loss": 0.0108, "step": 194460 }, { "epoch": 1279.407894736842, "grad_norm": 1.6678112745285034, "learning_rate": 0.0001, "loss": 0.0145, "step": 194470 }, { "epoch": 1279.4736842105262, "grad_norm": 0.9262312650680542, "learning_rate": 0.0001, "loss": 0.01, "step": 194480 }, { "epoch": 1279.5394736842106, "grad_norm": 1.412380576133728, "learning_rate": 0.0001, "loss": 0.013, "step": 194490 }, { "epoch": 1279.6052631578948, "grad_norm": 1.602345585823059, "learning_rate": 0.0001, "loss": 0.0116, "step": 194500 }, { "epoch": 1279.671052631579, "grad_norm": 1.4983748197555542, "learning_rate": 0.0001, "loss": 0.0097, "step": 194510 }, { "epoch": 1279.7368421052631, "grad_norm": 1.4890427589416504, "learning_rate": 0.0001, "loss": 0.0136, "step": 194520 }, { "epoch": 1279.8026315789473, "grad_norm": 1.698594570159912, "learning_rate": 0.0001, "loss": 0.0124, "step": 194530 }, { "epoch": 1279.8684210526317, "grad_norm": 1.4999479055404663, "learning_rate": 0.0001, "loss": 0.0125, "step": 194540 }, { "epoch": 1279.9342105263158, "grad_norm": 1.4159389734268188, "learning_rate": 0.0001, "loss": 0.0122, "step": 194550 }, { "epoch": 1280.0, "grad_norm": 1.4871934652328491, "learning_rate": 0.0001, "loss": 0.009, "step": 194560 }, { "epoch": 1280.0657894736842, "grad_norm": 1.2961952686309814, "learning_rate": 0.0001, "loss": 0.009, "step": 194570 }, { "epoch": 1280.1315789473683, "grad_norm": 1.5507038831710815, "learning_rate": 0.0001, "loss": 0.0109, "step": 194580 }, { "epoch": 1280.1973684210527, "grad_norm": 1.4241849184036255, "learning_rate": 0.0001, "loss": 0.0102, "step": 194590 }, { "epoch": 1280.2631578947369, "grad_norm": 1.1529428958892822, "learning_rate": 0.0001, "loss": 0.0105, "step": 194600 }, { "epoch": 1280.328947368421, "grad_norm": 0.9973099231719971, "learning_rate": 0.0001, "loss": 0.0128, "step": 194610 }, { "epoch": 1280.3947368421052, "grad_norm": 1.4431898593902588, "learning_rate": 0.0001, "loss": 0.0121, "step": 194620 }, { "epoch": 1280.4605263157894, "grad_norm": 1.6229592561721802, "learning_rate": 0.0001, "loss": 0.0115, "step": 194630 }, { "epoch": 1280.5263157894738, "grad_norm": 1.3448392152786255, "learning_rate": 0.0001, "loss": 0.0118, "step": 194640 }, { "epoch": 1280.592105263158, "grad_norm": 2.121741771697998, "learning_rate": 0.0001, "loss": 0.0102, "step": 194650 }, { "epoch": 1280.657894736842, "grad_norm": 1.536938190460205, "learning_rate": 0.0001, "loss": 0.0147, "step": 194660 }, { "epoch": 1280.7236842105262, "grad_norm": 1.460384726524353, "learning_rate": 0.0001, "loss": 0.0125, "step": 194670 }, { "epoch": 1280.7894736842106, "grad_norm": 1.3383735418319702, "learning_rate": 0.0001, "loss": 0.0118, "step": 194680 }, { "epoch": 1280.8552631578948, "grad_norm": 1.7443805932998657, "learning_rate": 0.0001, "loss": 0.0128, "step": 194690 }, { "epoch": 1280.921052631579, "grad_norm": 1.7306735515594482, "learning_rate": 0.0001, "loss": 0.0104, "step": 194700 }, { "epoch": 1280.9868421052631, "grad_norm": 1.383326768875122, "learning_rate": 0.0001, "loss": 0.011, "step": 194710 }, { "epoch": 1281.0526315789473, "grad_norm": 1.557889461517334, "learning_rate": 0.0001, "loss": 0.014, "step": 194720 }, { "epoch": 1281.1184210526317, "grad_norm": 1.5540975332260132, "learning_rate": 0.0001, "loss": 0.0145, "step": 194730 }, { "epoch": 1281.1842105263158, "grad_norm": 1.336601972579956, "learning_rate": 0.0001, "loss": 0.0083, "step": 194740 }, { "epoch": 1281.25, "grad_norm": 1.2306444644927979, "learning_rate": 0.0001, "loss": 0.0115, "step": 194750 }, { "epoch": 1281.3157894736842, "grad_norm": 1.71311616897583, "learning_rate": 0.0001, "loss": 0.009, "step": 194760 }, { "epoch": 1281.3815789473683, "grad_norm": 1.114808440208435, "learning_rate": 0.0001, "loss": 0.009, "step": 194770 }, { "epoch": 1281.4473684210527, "grad_norm": 1.2775568962097168, "learning_rate": 0.0001, "loss": 0.0119, "step": 194780 }, { "epoch": 1281.5131578947369, "grad_norm": 1.57463538646698, "learning_rate": 0.0001, "loss": 0.0136, "step": 194790 }, { "epoch": 1281.578947368421, "grad_norm": 1.3282345533370972, "learning_rate": 0.0001, "loss": 0.011, "step": 194800 }, { "epoch": 1281.6447368421052, "grad_norm": 0.9532556533813477, "learning_rate": 0.0001, "loss": 0.0127, "step": 194810 }, { "epoch": 1281.7105263157894, "grad_norm": 1.5856482982635498, "learning_rate": 0.0001, "loss": 0.0126, "step": 194820 }, { "epoch": 1281.7763157894738, "grad_norm": 1.7465391159057617, "learning_rate": 0.0001, "loss": 0.0112, "step": 194830 }, { "epoch": 1281.842105263158, "grad_norm": 1.556449055671692, "learning_rate": 0.0001, "loss": 0.0093, "step": 194840 }, { "epoch": 1281.907894736842, "grad_norm": 1.0261619091033936, "learning_rate": 0.0001, "loss": 0.0136, "step": 194850 }, { "epoch": 1281.9736842105262, "grad_norm": 1.5770583152770996, "learning_rate": 0.0001, "loss": 0.0135, "step": 194860 }, { "epoch": 1282.0394736842106, "grad_norm": 1.1632695198059082, "learning_rate": 0.0001, "loss": 0.01, "step": 194870 }, { "epoch": 1282.1052631578948, "grad_norm": 1.2560468912124634, "learning_rate": 0.0001, "loss": 0.0115, "step": 194880 }, { "epoch": 1282.171052631579, "grad_norm": 1.1730115413665771, "learning_rate": 0.0001, "loss": 0.0142, "step": 194890 }, { "epoch": 1282.2368421052631, "grad_norm": 1.3295518159866333, "learning_rate": 0.0001, "loss": 0.0119, "step": 194900 }, { "epoch": 1282.3026315789473, "grad_norm": 1.1519763469696045, "learning_rate": 0.0001, "loss": 0.0113, "step": 194910 }, { "epoch": 1282.3684210526317, "grad_norm": 1.338244915008545, "learning_rate": 0.0001, "loss": 0.0121, "step": 194920 }, { "epoch": 1282.4342105263158, "grad_norm": 1.266430139541626, "learning_rate": 0.0001, "loss": 0.0133, "step": 194930 }, { "epoch": 1282.5, "grad_norm": 1.3689656257629395, "learning_rate": 0.0001, "loss": 0.0102, "step": 194940 }, { "epoch": 1282.5657894736842, "grad_norm": 1.3184010982513428, "learning_rate": 0.0001, "loss": 0.0131, "step": 194950 }, { "epoch": 1282.6315789473683, "grad_norm": 1.583019733428955, "learning_rate": 0.0001, "loss": 0.0139, "step": 194960 }, { "epoch": 1282.6973684210527, "grad_norm": 1.0512572526931763, "learning_rate": 0.0001, "loss": 0.012, "step": 194970 }, { "epoch": 1282.7631578947369, "grad_norm": 1.3607137203216553, "learning_rate": 0.0001, "loss": 0.0109, "step": 194980 }, { "epoch": 1282.828947368421, "grad_norm": 1.6974642276763916, "learning_rate": 0.0001, "loss": 0.011, "step": 194990 }, { "epoch": 1282.8947368421052, "grad_norm": 1.817360758781433, "learning_rate": 0.0001, "loss": 0.0093, "step": 195000 }, { "epoch": 1282.9605263157894, "grad_norm": 1.5202239751815796, "learning_rate": 0.0001, "loss": 0.0103, "step": 195010 }, { "epoch": 1283.0263157894738, "grad_norm": 1.9993226528167725, "learning_rate": 0.0001, "loss": 0.0149, "step": 195020 }, { "epoch": 1283.092105263158, "grad_norm": 1.0841659307479858, "learning_rate": 0.0001, "loss": 0.0149, "step": 195030 }, { "epoch": 1283.157894736842, "grad_norm": 1.6599265336990356, "learning_rate": 0.0001, "loss": 0.0122, "step": 195040 }, { "epoch": 1283.2236842105262, "grad_norm": 1.3706092834472656, "learning_rate": 0.0001, "loss": 0.0146, "step": 195050 }, { "epoch": 1283.2894736842106, "grad_norm": 1.3955153226852417, "learning_rate": 0.0001, "loss": 0.0133, "step": 195060 }, { "epoch": 1283.3552631578948, "grad_norm": 1.4944769144058228, "learning_rate": 0.0001, "loss": 0.0102, "step": 195070 }, { "epoch": 1283.421052631579, "grad_norm": 1.1388076543807983, "learning_rate": 0.0001, "loss": 0.0084, "step": 195080 }, { "epoch": 1283.4868421052631, "grad_norm": 1.4018279314041138, "learning_rate": 0.0001, "loss": 0.0106, "step": 195090 }, { "epoch": 1283.5526315789473, "grad_norm": 1.0601798295974731, "learning_rate": 0.0001, "loss": 0.008, "step": 195100 }, { "epoch": 1283.6184210526317, "grad_norm": 1.7024662494659424, "learning_rate": 0.0001, "loss": 0.0113, "step": 195110 }, { "epoch": 1283.6842105263158, "grad_norm": 1.3089712858200073, "learning_rate": 0.0001, "loss": 0.0105, "step": 195120 }, { "epoch": 1283.75, "grad_norm": 1.0353457927703857, "learning_rate": 0.0001, "loss": 0.0129, "step": 195130 }, { "epoch": 1283.8157894736842, "grad_norm": 1.4909896850585938, "learning_rate": 0.0001, "loss": 0.0132, "step": 195140 }, { "epoch": 1283.8815789473683, "grad_norm": 1.2616701126098633, "learning_rate": 0.0001, "loss": 0.0104, "step": 195150 }, { "epoch": 1283.9473684210527, "grad_norm": 1.371006727218628, "learning_rate": 0.0001, "loss": 0.012, "step": 195160 }, { "epoch": 1284.0131578947369, "grad_norm": 1.0881515741348267, "learning_rate": 0.0001, "loss": 0.0096, "step": 195170 }, { "epoch": 1284.078947368421, "grad_norm": 0.9920979738235474, "learning_rate": 0.0001, "loss": 0.0129, "step": 195180 }, { "epoch": 1284.1447368421052, "grad_norm": 1.2408076524734497, "learning_rate": 0.0001, "loss": 0.0151, "step": 195190 }, { "epoch": 1284.2105263157894, "grad_norm": 1.471251368522644, "learning_rate": 0.0001, "loss": 0.0114, "step": 195200 }, { "epoch": 1284.2763157894738, "grad_norm": 1.2596298456192017, "learning_rate": 0.0001, "loss": 0.011, "step": 195210 }, { "epoch": 1284.342105263158, "grad_norm": 1.566172480583191, "learning_rate": 0.0001, "loss": 0.0114, "step": 195220 }, { "epoch": 1284.407894736842, "grad_norm": 1.6102761030197144, "learning_rate": 0.0001, "loss": 0.0081, "step": 195230 }, { "epoch": 1284.4736842105262, "grad_norm": 1.2752114534378052, "learning_rate": 0.0001, "loss": 0.0129, "step": 195240 }, { "epoch": 1284.5394736842106, "grad_norm": 1.2875956296920776, "learning_rate": 0.0001, "loss": 0.0096, "step": 195250 }, { "epoch": 1284.6052631578948, "grad_norm": 1.7833781242370605, "learning_rate": 0.0001, "loss": 0.0123, "step": 195260 }, { "epoch": 1284.671052631579, "grad_norm": 1.6451637744903564, "learning_rate": 0.0001, "loss": 0.0096, "step": 195270 }, { "epoch": 1284.7368421052631, "grad_norm": 1.7091633081436157, "learning_rate": 0.0001, "loss": 0.0114, "step": 195280 }, { "epoch": 1284.8026315789473, "grad_norm": 1.2568111419677734, "learning_rate": 0.0001, "loss": 0.0122, "step": 195290 }, { "epoch": 1284.8684210526317, "grad_norm": 1.1830557584762573, "learning_rate": 0.0001, "loss": 0.0139, "step": 195300 }, { "epoch": 1284.9342105263158, "grad_norm": 1.0136579275131226, "learning_rate": 0.0001, "loss": 0.0121, "step": 195310 }, { "epoch": 1285.0, "grad_norm": 1.8137075901031494, "learning_rate": 0.0001, "loss": 0.0138, "step": 195320 }, { "epoch": 1285.0657894736842, "grad_norm": 1.4338423013687134, "learning_rate": 0.0001, "loss": 0.0101, "step": 195330 }, { "epoch": 1285.1315789473683, "grad_norm": 1.5012482404708862, "learning_rate": 0.0001, "loss": 0.0095, "step": 195340 }, { "epoch": 1285.1973684210527, "grad_norm": 1.471792459487915, "learning_rate": 0.0001, "loss": 0.0103, "step": 195350 }, { "epoch": 1285.2631578947369, "grad_norm": 1.7009921073913574, "learning_rate": 0.0001, "loss": 0.0144, "step": 195360 }, { "epoch": 1285.328947368421, "grad_norm": 1.1747276782989502, "learning_rate": 0.0001, "loss": 0.0144, "step": 195370 }, { "epoch": 1285.3947368421052, "grad_norm": 1.251381754875183, "learning_rate": 0.0001, "loss": 0.0095, "step": 195380 }, { "epoch": 1285.4605263157894, "grad_norm": 0.8448769450187683, "learning_rate": 0.0001, "loss": 0.0088, "step": 195390 }, { "epoch": 1285.5263157894738, "grad_norm": 1.3792529106140137, "learning_rate": 0.0001, "loss": 0.0106, "step": 195400 }, { "epoch": 1285.592105263158, "grad_norm": 1.2793585062026978, "learning_rate": 0.0001, "loss": 0.0157, "step": 195410 }, { "epoch": 1285.657894736842, "grad_norm": 1.4228911399841309, "learning_rate": 0.0001, "loss": 0.0101, "step": 195420 }, { "epoch": 1285.7236842105262, "grad_norm": 1.3415451049804688, "learning_rate": 0.0001, "loss": 0.0143, "step": 195430 }, { "epoch": 1285.7894736842106, "grad_norm": 1.716862440109253, "learning_rate": 0.0001, "loss": 0.0129, "step": 195440 }, { "epoch": 1285.8552631578948, "grad_norm": 0.9988474249839783, "learning_rate": 0.0001, "loss": 0.0099, "step": 195450 }, { "epoch": 1285.921052631579, "grad_norm": 1.5303847789764404, "learning_rate": 0.0001, "loss": 0.0126, "step": 195460 }, { "epoch": 1285.9868421052631, "grad_norm": 1.69696044921875, "learning_rate": 0.0001, "loss": 0.0146, "step": 195470 }, { "epoch": 1286.0526315789473, "grad_norm": 1.4921505451202393, "learning_rate": 0.0001, "loss": 0.0124, "step": 195480 }, { "epoch": 1286.1184210526317, "grad_norm": 1.2862184047698975, "learning_rate": 0.0001, "loss": 0.0106, "step": 195490 }, { "epoch": 1286.1842105263158, "grad_norm": 1.3279634714126587, "learning_rate": 0.0001, "loss": 0.0097, "step": 195500 }, { "epoch": 1286.25, "grad_norm": 1.4926239252090454, "learning_rate": 0.0001, "loss": 0.0124, "step": 195510 }, { "epoch": 1286.3157894736842, "grad_norm": 1.3658199310302734, "learning_rate": 0.0001, "loss": 0.0083, "step": 195520 }, { "epoch": 1286.3815789473683, "grad_norm": 1.2578257322311401, "learning_rate": 0.0001, "loss": 0.0141, "step": 195530 }, { "epoch": 1286.4473684210527, "grad_norm": 1.3927369117736816, "learning_rate": 0.0001, "loss": 0.0121, "step": 195540 }, { "epoch": 1286.5131578947369, "grad_norm": 1.1038955450057983, "learning_rate": 0.0001, "loss": 0.0131, "step": 195550 }, { "epoch": 1286.578947368421, "grad_norm": 1.4457621574401855, "learning_rate": 0.0001, "loss": 0.0131, "step": 195560 }, { "epoch": 1286.6447368421052, "grad_norm": 1.8256984949111938, "learning_rate": 0.0001, "loss": 0.0105, "step": 195570 }, { "epoch": 1286.7105263157894, "grad_norm": 1.9306994676589966, "learning_rate": 0.0001, "loss": 0.0083, "step": 195580 }, { "epoch": 1286.7763157894738, "grad_norm": 1.158150315284729, "learning_rate": 0.0001, "loss": 0.0132, "step": 195590 }, { "epoch": 1286.842105263158, "grad_norm": 1.1430864334106445, "learning_rate": 0.0001, "loss": 0.0118, "step": 195600 }, { "epoch": 1286.907894736842, "grad_norm": 1.3336906433105469, "learning_rate": 0.0001, "loss": 0.0146, "step": 195610 }, { "epoch": 1286.9736842105262, "grad_norm": 1.5766475200653076, "learning_rate": 0.0001, "loss": 0.0113, "step": 195620 }, { "epoch": 1287.0394736842106, "grad_norm": 1.334237813949585, "learning_rate": 0.0001, "loss": 0.0117, "step": 195630 }, { "epoch": 1287.1052631578948, "grad_norm": 1.4015352725982666, "learning_rate": 0.0001, "loss": 0.0132, "step": 195640 }, { "epoch": 1287.171052631579, "grad_norm": 1.633946418762207, "learning_rate": 0.0001, "loss": 0.0142, "step": 195650 }, { "epoch": 1287.2368421052631, "grad_norm": 1.1694414615631104, "learning_rate": 0.0001, "loss": 0.0113, "step": 195660 }, { "epoch": 1287.3026315789473, "grad_norm": 1.4419766664505005, "learning_rate": 0.0001, "loss": 0.0148, "step": 195670 }, { "epoch": 1287.3684210526317, "grad_norm": 1.7385433912277222, "learning_rate": 0.0001, "loss": 0.01, "step": 195680 }, { "epoch": 1287.4342105263158, "grad_norm": 1.539107322692871, "learning_rate": 0.0001, "loss": 0.014, "step": 195690 }, { "epoch": 1287.5, "grad_norm": 1.6568849086761475, "learning_rate": 0.0001, "loss": 0.0088, "step": 195700 }, { "epoch": 1287.5657894736842, "grad_norm": 1.4078606367111206, "learning_rate": 0.0001, "loss": 0.0115, "step": 195710 }, { "epoch": 1287.6315789473683, "grad_norm": 1.6540697813034058, "learning_rate": 0.0001, "loss": 0.0112, "step": 195720 }, { "epoch": 1287.6973684210527, "grad_norm": 1.2902621030807495, "learning_rate": 0.0001, "loss": 0.0099, "step": 195730 }, { "epoch": 1287.7631578947369, "grad_norm": 1.3904560804367065, "learning_rate": 0.0001, "loss": 0.0107, "step": 195740 }, { "epoch": 1287.828947368421, "grad_norm": 1.0531516075134277, "learning_rate": 0.0001, "loss": 0.0102, "step": 195750 }, { "epoch": 1287.8947368421052, "grad_norm": 1.4353127479553223, "learning_rate": 0.0001, "loss": 0.0105, "step": 195760 }, { "epoch": 1287.9605263157894, "grad_norm": 1.6096217632293701, "learning_rate": 0.0001, "loss": 0.0119, "step": 195770 }, { "epoch": 1288.0263157894738, "grad_norm": 1.616578459739685, "learning_rate": 0.0001, "loss": 0.0123, "step": 195780 }, { "epoch": 1288.092105263158, "grad_norm": 1.6803081035614014, "learning_rate": 0.0001, "loss": 0.0128, "step": 195790 }, { "epoch": 1288.157894736842, "grad_norm": 1.546682357788086, "learning_rate": 0.0001, "loss": 0.0079, "step": 195800 }, { "epoch": 1288.2236842105262, "grad_norm": 1.2217278480529785, "learning_rate": 0.0001, "loss": 0.0113, "step": 195810 }, { "epoch": 1288.2894736842106, "grad_norm": 1.5263535976409912, "learning_rate": 0.0001, "loss": 0.0142, "step": 195820 }, { "epoch": 1288.3552631578948, "grad_norm": 1.610371708869934, "learning_rate": 0.0001, "loss": 0.0092, "step": 195830 }, { "epoch": 1288.421052631579, "grad_norm": 1.595047950744629, "learning_rate": 0.0001, "loss": 0.0093, "step": 195840 }, { "epoch": 1288.4868421052631, "grad_norm": 1.2551499605178833, "learning_rate": 0.0001, "loss": 0.0142, "step": 195850 }, { "epoch": 1288.5526315789473, "grad_norm": 1.6678508520126343, "learning_rate": 0.0001, "loss": 0.0138, "step": 195860 }, { "epoch": 1288.6184210526317, "grad_norm": 1.158804178237915, "learning_rate": 0.0001, "loss": 0.0099, "step": 195870 }, { "epoch": 1288.6842105263158, "grad_norm": 1.0715278387069702, "learning_rate": 0.0001, "loss": 0.0108, "step": 195880 }, { "epoch": 1288.75, "grad_norm": 1.3633081912994385, "learning_rate": 0.0001, "loss": 0.0127, "step": 195890 }, { "epoch": 1288.8157894736842, "grad_norm": 1.4432525634765625, "learning_rate": 0.0001, "loss": 0.01, "step": 195900 }, { "epoch": 1288.8815789473683, "grad_norm": 1.4032613039016724, "learning_rate": 0.0001, "loss": 0.0122, "step": 195910 }, { "epoch": 1288.9473684210527, "grad_norm": 1.0240659713745117, "learning_rate": 0.0001, "loss": 0.0089, "step": 195920 }, { "epoch": 1289.0131578947369, "grad_norm": 1.2118139266967773, "learning_rate": 0.0001, "loss": 0.0112, "step": 195930 }, { "epoch": 1289.078947368421, "grad_norm": 1.1931631565093994, "learning_rate": 0.0001, "loss": 0.0107, "step": 195940 }, { "epoch": 1289.1447368421052, "grad_norm": 1.907524824142456, "learning_rate": 0.0001, "loss": 0.0109, "step": 195950 }, { "epoch": 1289.2105263157894, "grad_norm": 1.6139106750488281, "learning_rate": 0.0001, "loss": 0.0099, "step": 195960 }, { "epoch": 1289.2763157894738, "grad_norm": 1.0693460702896118, "learning_rate": 0.0001, "loss": 0.0164, "step": 195970 }, { "epoch": 1289.342105263158, "grad_norm": 1.1685292720794678, "learning_rate": 0.0001, "loss": 0.0128, "step": 195980 }, { "epoch": 1289.407894736842, "grad_norm": 1.3675293922424316, "learning_rate": 0.0001, "loss": 0.0134, "step": 195990 }, { "epoch": 1289.4736842105262, "grad_norm": 1.142886757850647, "learning_rate": 0.0001, "loss": 0.0135, "step": 196000 }, { "epoch": 1289.5394736842106, "grad_norm": 1.1162549257278442, "learning_rate": 0.0001, "loss": 0.01, "step": 196010 }, { "epoch": 1289.6052631578948, "grad_norm": 0.8187001943588257, "learning_rate": 0.0001, "loss": 0.0125, "step": 196020 }, { "epoch": 1289.671052631579, "grad_norm": 0.9087451696395874, "learning_rate": 0.0001, "loss": 0.0123, "step": 196030 }, { "epoch": 1289.7368421052631, "grad_norm": 1.641540765762329, "learning_rate": 0.0001, "loss": 0.0099, "step": 196040 }, { "epoch": 1289.8026315789473, "grad_norm": 1.173595666885376, "learning_rate": 0.0001, "loss": 0.0114, "step": 196050 }, { "epoch": 1289.8684210526317, "grad_norm": 1.504899024963379, "learning_rate": 0.0001, "loss": 0.0111, "step": 196060 }, { "epoch": 1289.9342105263158, "grad_norm": 1.615070104598999, "learning_rate": 0.0001, "loss": 0.0115, "step": 196070 }, { "epoch": 1290.0, "grad_norm": 1.4019829034805298, "learning_rate": 0.0001, "loss": 0.0124, "step": 196080 }, { "epoch": 1290.0657894736842, "grad_norm": 1.0903794765472412, "learning_rate": 0.0001, "loss": 0.009, "step": 196090 }, { "epoch": 1290.1315789473683, "grad_norm": 1.3336774110794067, "learning_rate": 0.0001, "loss": 0.0125, "step": 196100 }, { "epoch": 1290.1973684210527, "grad_norm": 1.7573009729385376, "learning_rate": 0.0001, "loss": 0.0164, "step": 196110 }, { "epoch": 1290.2631578947369, "grad_norm": 1.5126938819885254, "learning_rate": 0.0001, "loss": 0.0113, "step": 196120 }, { "epoch": 1290.328947368421, "grad_norm": 1.8301945924758911, "learning_rate": 0.0001, "loss": 0.0121, "step": 196130 }, { "epoch": 1290.3947368421052, "grad_norm": 1.6175265312194824, "learning_rate": 0.0001, "loss": 0.0116, "step": 196140 }, { "epoch": 1290.4605263157894, "grad_norm": 1.2549638748168945, "learning_rate": 0.0001, "loss": 0.0079, "step": 196150 }, { "epoch": 1290.5263157894738, "grad_norm": 1.4520403146743774, "learning_rate": 0.0001, "loss": 0.0129, "step": 196160 }, { "epoch": 1290.592105263158, "grad_norm": 1.2882359027862549, "learning_rate": 0.0001, "loss": 0.01, "step": 196170 }, { "epoch": 1290.657894736842, "grad_norm": 1.5426782369613647, "learning_rate": 0.0001, "loss": 0.0119, "step": 196180 }, { "epoch": 1290.7236842105262, "grad_norm": 1.728389859199524, "learning_rate": 0.0001, "loss": 0.0094, "step": 196190 }, { "epoch": 1290.7894736842106, "grad_norm": 1.368285894393921, "learning_rate": 0.0001, "loss": 0.0157, "step": 196200 }, { "epoch": 1290.8552631578948, "grad_norm": 1.539076328277588, "learning_rate": 0.0001, "loss": 0.0139, "step": 196210 }, { "epoch": 1290.921052631579, "grad_norm": 1.6274081468582153, "learning_rate": 0.0001, "loss": 0.0095, "step": 196220 }, { "epoch": 1290.9868421052631, "grad_norm": 1.359209656715393, "learning_rate": 0.0001, "loss": 0.0106, "step": 196230 }, { "epoch": 1291.0526315789473, "grad_norm": 1.3263275623321533, "learning_rate": 0.0001, "loss": 0.0126, "step": 196240 }, { "epoch": 1291.1184210526317, "grad_norm": 1.0191303491592407, "learning_rate": 0.0001, "loss": 0.0136, "step": 196250 }, { "epoch": 1291.1842105263158, "grad_norm": 1.3403555154800415, "learning_rate": 0.0001, "loss": 0.0084, "step": 196260 }, { "epoch": 1291.25, "grad_norm": 1.3035671710968018, "learning_rate": 0.0001, "loss": 0.0106, "step": 196270 }, { "epoch": 1291.3157894736842, "grad_norm": 1.3200390338897705, "learning_rate": 0.0001, "loss": 0.0156, "step": 196280 }, { "epoch": 1291.3815789473683, "grad_norm": 1.319429636001587, "learning_rate": 0.0001, "loss": 0.0116, "step": 196290 }, { "epoch": 1291.4473684210527, "grad_norm": 1.6896789073944092, "learning_rate": 0.0001, "loss": 0.0113, "step": 196300 }, { "epoch": 1291.5131578947369, "grad_norm": 1.421579122543335, "learning_rate": 0.0001, "loss": 0.0108, "step": 196310 }, { "epoch": 1291.578947368421, "grad_norm": 1.2445350885391235, "learning_rate": 0.0001, "loss": 0.0102, "step": 196320 }, { "epoch": 1291.6447368421052, "grad_norm": 1.8927805423736572, "learning_rate": 0.0001, "loss": 0.0109, "step": 196330 }, { "epoch": 1291.7105263157894, "grad_norm": 1.3375822305679321, "learning_rate": 0.0001, "loss": 0.0101, "step": 196340 }, { "epoch": 1291.7763157894738, "grad_norm": 1.7202752828598022, "learning_rate": 0.0001, "loss": 0.0147, "step": 196350 }, { "epoch": 1291.842105263158, "grad_norm": 1.0776466131210327, "learning_rate": 0.0001, "loss": 0.0152, "step": 196360 }, { "epoch": 1291.907894736842, "grad_norm": 1.6703341007232666, "learning_rate": 0.0001, "loss": 0.0096, "step": 196370 }, { "epoch": 1291.9736842105262, "grad_norm": 1.5976158380508423, "learning_rate": 0.0001, "loss": 0.0103, "step": 196380 }, { "epoch": 1292.0394736842106, "grad_norm": 1.2442171573638916, "learning_rate": 0.0001, "loss": 0.0082, "step": 196390 }, { "epoch": 1292.1052631578948, "grad_norm": 1.5573220252990723, "learning_rate": 0.0001, "loss": 0.0082, "step": 196400 }, { "epoch": 1292.171052631579, "grad_norm": 1.1820240020751953, "learning_rate": 0.0001, "loss": 0.0096, "step": 196410 }, { "epoch": 1292.2368421052631, "grad_norm": 1.4764512777328491, "learning_rate": 0.0001, "loss": 0.0165, "step": 196420 }, { "epoch": 1292.3026315789473, "grad_norm": 1.2185909748077393, "learning_rate": 0.0001, "loss": 0.015, "step": 196430 }, { "epoch": 1292.3684210526317, "grad_norm": 1.702009677886963, "learning_rate": 0.0001, "loss": 0.0128, "step": 196440 }, { "epoch": 1292.4342105263158, "grad_norm": 1.4892781972885132, "learning_rate": 0.0001, "loss": 0.0117, "step": 196450 }, { "epoch": 1292.5, "grad_norm": 1.3532543182373047, "learning_rate": 0.0001, "loss": 0.0145, "step": 196460 }, { "epoch": 1292.5657894736842, "grad_norm": 1.1583327054977417, "learning_rate": 0.0001, "loss": 0.0124, "step": 196470 }, { "epoch": 1292.6315789473683, "grad_norm": 1.5262893438339233, "learning_rate": 0.0001, "loss": 0.008, "step": 196480 }, { "epoch": 1292.6973684210527, "grad_norm": 1.6670200824737549, "learning_rate": 0.0001, "loss": 0.0138, "step": 196490 }, { "epoch": 1292.7631578947369, "grad_norm": 1.274789571762085, "learning_rate": 0.0001, "loss": 0.0114, "step": 196500 }, { "epoch": 1292.828947368421, "grad_norm": 1.349419355392456, "learning_rate": 0.0001, "loss": 0.0084, "step": 196510 }, { "epoch": 1292.8947368421052, "grad_norm": 0.9030333757400513, "learning_rate": 0.0001, "loss": 0.0094, "step": 196520 }, { "epoch": 1292.9605263157894, "grad_norm": 1.1657692193984985, "learning_rate": 0.0001, "loss": 0.0134, "step": 196530 }, { "epoch": 1293.0263157894738, "grad_norm": 1.0142998695373535, "learning_rate": 0.0001, "loss": 0.0112, "step": 196540 }, { "epoch": 1293.092105263158, "grad_norm": 1.1721947193145752, "learning_rate": 0.0001, "loss": 0.0104, "step": 196550 }, { "epoch": 1293.157894736842, "grad_norm": 1.3065943717956543, "learning_rate": 0.0001, "loss": 0.0114, "step": 196560 }, { "epoch": 1293.2236842105262, "grad_norm": 1.4744793176651, "learning_rate": 0.0001, "loss": 0.0147, "step": 196570 }, { "epoch": 1293.2894736842106, "grad_norm": 1.2413330078125, "learning_rate": 0.0001, "loss": 0.0125, "step": 196580 }, { "epoch": 1293.3552631578948, "grad_norm": 0.9846093058586121, "learning_rate": 0.0001, "loss": 0.0113, "step": 196590 }, { "epoch": 1293.421052631579, "grad_norm": 1.3219425678253174, "learning_rate": 0.0001, "loss": 0.01, "step": 196600 }, { "epoch": 1293.4868421052631, "grad_norm": 1.102089762687683, "learning_rate": 0.0001, "loss": 0.0084, "step": 196610 }, { "epoch": 1293.5526315789473, "grad_norm": 1.3986903429031372, "learning_rate": 0.0001, "loss": 0.0144, "step": 196620 }, { "epoch": 1293.6184210526317, "grad_norm": 1.4949157238006592, "learning_rate": 0.0001, "loss": 0.0136, "step": 196630 }, { "epoch": 1293.6842105263158, "grad_norm": 1.370129108428955, "learning_rate": 0.0001, "loss": 0.0137, "step": 196640 }, { "epoch": 1293.75, "grad_norm": 1.4469399452209473, "learning_rate": 0.0001, "loss": 0.0103, "step": 196650 }, { "epoch": 1293.8157894736842, "grad_norm": 1.2180843353271484, "learning_rate": 0.0001, "loss": 0.0121, "step": 196660 }, { "epoch": 1293.8815789473683, "grad_norm": 1.82256281375885, "learning_rate": 0.0001, "loss": 0.0155, "step": 196670 }, { "epoch": 1293.9473684210527, "grad_norm": 1.5446345806121826, "learning_rate": 0.0001, "loss": 0.0096, "step": 196680 }, { "epoch": 1294.0131578947369, "grad_norm": 1.362130880355835, "learning_rate": 0.0001, "loss": 0.0095, "step": 196690 }, { "epoch": 1294.078947368421, "grad_norm": 1.538934588432312, "learning_rate": 0.0001, "loss": 0.0083, "step": 196700 }, { "epoch": 1294.1447368421052, "grad_norm": 1.475374698638916, "learning_rate": 0.0001, "loss": 0.0135, "step": 196710 }, { "epoch": 1294.2105263157894, "grad_norm": 1.1893017292022705, "learning_rate": 0.0001, "loss": 0.0171, "step": 196720 }, { "epoch": 1294.2763157894738, "grad_norm": 1.3044679164886475, "learning_rate": 0.0001, "loss": 0.0101, "step": 196730 }, { "epoch": 1294.342105263158, "grad_norm": 1.4511487483978271, "learning_rate": 0.0001, "loss": 0.0109, "step": 196740 }, { "epoch": 1294.407894736842, "grad_norm": 1.6160025596618652, "learning_rate": 0.0001, "loss": 0.0169, "step": 196750 }, { "epoch": 1294.4736842105262, "grad_norm": 1.5420442819595337, "learning_rate": 0.0001, "loss": 0.0129, "step": 196760 }, { "epoch": 1294.5394736842106, "grad_norm": 1.9031540155410767, "learning_rate": 0.0001, "loss": 0.0142, "step": 196770 }, { "epoch": 1294.6052631578948, "grad_norm": 1.766747236251831, "learning_rate": 0.0001, "loss": 0.0101, "step": 196780 }, { "epoch": 1294.671052631579, "grad_norm": 1.402960181236267, "learning_rate": 0.0001, "loss": 0.01, "step": 196790 }, { "epoch": 1294.7368421052631, "grad_norm": 1.2595183849334717, "learning_rate": 0.0001, "loss": 0.0088, "step": 196800 }, { "epoch": 1294.8026315789473, "grad_norm": 1.3533591032028198, "learning_rate": 0.0001, "loss": 0.009, "step": 196810 }, { "epoch": 1294.8684210526317, "grad_norm": 1.7627451419830322, "learning_rate": 0.0001, "loss": 0.0086, "step": 196820 }, { "epoch": 1294.9342105263158, "grad_norm": 1.7872440814971924, "learning_rate": 0.0001, "loss": 0.0118, "step": 196830 }, { "epoch": 1295.0, "grad_norm": 1.1817466020584106, "learning_rate": 0.0001, "loss": 0.0127, "step": 196840 }, { "epoch": 1295.0657894736842, "grad_norm": 1.5018869638442993, "learning_rate": 0.0001, "loss": 0.0138, "step": 196850 }, { "epoch": 1295.1315789473683, "grad_norm": 1.321934700012207, "learning_rate": 0.0001, "loss": 0.0095, "step": 196860 }, { "epoch": 1295.1973684210527, "grad_norm": 1.0786755084991455, "learning_rate": 0.0001, "loss": 0.0131, "step": 196870 }, { "epoch": 1295.2631578947369, "grad_norm": 1.0388506650924683, "learning_rate": 0.0001, "loss": 0.0101, "step": 196880 }, { "epoch": 1295.328947368421, "grad_norm": 1.659907341003418, "learning_rate": 0.0001, "loss": 0.0157, "step": 196890 }, { "epoch": 1295.3947368421052, "grad_norm": 1.476244568824768, "learning_rate": 0.0001, "loss": 0.0097, "step": 196900 }, { "epoch": 1295.4605263157894, "grad_norm": 1.6103196144104004, "learning_rate": 0.0001, "loss": 0.0106, "step": 196910 }, { "epoch": 1295.5263157894738, "grad_norm": 1.6579355001449585, "learning_rate": 0.0001, "loss": 0.0132, "step": 196920 }, { "epoch": 1295.592105263158, "grad_norm": 1.681727409362793, "learning_rate": 0.0001, "loss": 0.0091, "step": 196930 }, { "epoch": 1295.657894736842, "grad_norm": 1.1645762920379639, "learning_rate": 0.0001, "loss": 0.0081, "step": 196940 }, { "epoch": 1295.7236842105262, "grad_norm": 1.6644623279571533, "learning_rate": 0.0001, "loss": 0.0094, "step": 196950 }, { "epoch": 1295.7894736842106, "grad_norm": 1.2000460624694824, "learning_rate": 0.0001, "loss": 0.0162, "step": 196960 }, { "epoch": 1295.8552631578948, "grad_norm": 1.3495875597000122, "learning_rate": 0.0001, "loss": 0.013, "step": 196970 }, { "epoch": 1295.921052631579, "grad_norm": 1.6026846170425415, "learning_rate": 0.0001, "loss": 0.0133, "step": 196980 }, { "epoch": 1295.9868421052631, "grad_norm": 1.4488924741744995, "learning_rate": 0.0001, "loss": 0.0085, "step": 196990 }, { "epoch": 1296.0526315789473, "grad_norm": 1.4746428728103638, "learning_rate": 0.0001, "loss": 0.0095, "step": 197000 }, { "epoch": 1296.1184210526317, "grad_norm": 1.1972970962524414, "learning_rate": 0.0001, "loss": 0.0107, "step": 197010 }, { "epoch": 1296.1842105263158, "grad_norm": 1.6660914421081543, "learning_rate": 0.0001, "loss": 0.0126, "step": 197020 }, { "epoch": 1296.25, "grad_norm": 1.4298049211502075, "learning_rate": 0.0001, "loss": 0.0125, "step": 197030 }, { "epoch": 1296.3157894736842, "grad_norm": 1.2128300666809082, "learning_rate": 0.0001, "loss": 0.01, "step": 197040 }, { "epoch": 1296.3815789473683, "grad_norm": 1.3273345232009888, "learning_rate": 0.0001, "loss": 0.0109, "step": 197050 }, { "epoch": 1296.4473684210527, "grad_norm": 1.3786698579788208, "learning_rate": 0.0001, "loss": 0.0121, "step": 197060 }, { "epoch": 1296.5131578947369, "grad_norm": 1.432735562324524, "learning_rate": 0.0001, "loss": 0.0119, "step": 197070 }, { "epoch": 1296.578947368421, "grad_norm": 1.591711401939392, "learning_rate": 0.0001, "loss": 0.0122, "step": 197080 }, { "epoch": 1296.6447368421052, "grad_norm": 1.5009766817092896, "learning_rate": 0.0001, "loss": 0.013, "step": 197090 }, { "epoch": 1296.7105263157894, "grad_norm": 1.81857168674469, "learning_rate": 0.0001, "loss": 0.0158, "step": 197100 }, { "epoch": 1296.7763157894738, "grad_norm": 1.718021273612976, "learning_rate": 0.0001, "loss": 0.0098, "step": 197110 }, { "epoch": 1296.842105263158, "grad_norm": 1.469592809677124, "learning_rate": 0.0001, "loss": 0.0106, "step": 197120 }, { "epoch": 1296.907894736842, "grad_norm": 1.8590810298919678, "learning_rate": 0.0001, "loss": 0.0109, "step": 197130 }, { "epoch": 1296.9736842105262, "grad_norm": 1.75460946559906, "learning_rate": 0.0001, "loss": 0.0092, "step": 197140 }, { "epoch": 1297.0394736842106, "grad_norm": 1.173612117767334, "learning_rate": 0.0001, "loss": 0.0099, "step": 197150 }, { "epoch": 1297.1052631578948, "grad_norm": 1.3705613613128662, "learning_rate": 0.0001, "loss": 0.0108, "step": 197160 }, { "epoch": 1297.171052631579, "grad_norm": 1.370798945426941, "learning_rate": 0.0001, "loss": 0.0097, "step": 197170 }, { "epoch": 1297.2368421052631, "grad_norm": 1.7196338176727295, "learning_rate": 0.0001, "loss": 0.0106, "step": 197180 }, { "epoch": 1297.3026315789473, "grad_norm": 1.544797658920288, "learning_rate": 0.0001, "loss": 0.0092, "step": 197190 }, { "epoch": 1297.3684210526317, "grad_norm": 1.616844892501831, "learning_rate": 0.0001, "loss": 0.0099, "step": 197200 }, { "epoch": 1297.4342105263158, "grad_norm": 1.6374205350875854, "learning_rate": 0.0001, "loss": 0.0127, "step": 197210 }, { "epoch": 1297.5, "grad_norm": 1.5029399394989014, "learning_rate": 0.0001, "loss": 0.0126, "step": 197220 }, { "epoch": 1297.5657894736842, "grad_norm": 1.5983309745788574, "learning_rate": 0.0001, "loss": 0.0154, "step": 197230 }, { "epoch": 1297.6315789473683, "grad_norm": 1.6279442310333252, "learning_rate": 0.0001, "loss": 0.0114, "step": 197240 }, { "epoch": 1297.6973684210527, "grad_norm": 1.7294777631759644, "learning_rate": 0.0001, "loss": 0.0129, "step": 197250 }, { "epoch": 1297.7631578947369, "grad_norm": 1.5548158884048462, "learning_rate": 0.0001, "loss": 0.0106, "step": 197260 }, { "epoch": 1297.828947368421, "grad_norm": 1.0935637950897217, "learning_rate": 0.0001, "loss": 0.0124, "step": 197270 }, { "epoch": 1297.8947368421052, "grad_norm": 1.0490403175354004, "learning_rate": 0.0001, "loss": 0.0114, "step": 197280 }, { "epoch": 1297.9605263157894, "grad_norm": 1.3563964366912842, "learning_rate": 0.0001, "loss": 0.012, "step": 197290 }, { "epoch": 1298.0263157894738, "grad_norm": 1.370434284210205, "learning_rate": 0.0001, "loss": 0.0141, "step": 197300 }, { "epoch": 1298.092105263158, "grad_norm": 1.4689116477966309, "learning_rate": 0.0001, "loss": 0.0101, "step": 197310 }, { "epoch": 1298.157894736842, "grad_norm": 1.5825235843658447, "learning_rate": 0.0001, "loss": 0.0096, "step": 197320 }, { "epoch": 1298.2236842105262, "grad_norm": 1.7217488288879395, "learning_rate": 0.0001, "loss": 0.0169, "step": 197330 }, { "epoch": 1298.2894736842106, "grad_norm": 1.363359808921814, "learning_rate": 0.0001, "loss": 0.009, "step": 197340 }, { "epoch": 1298.3552631578948, "grad_norm": 1.491866946220398, "learning_rate": 0.0001, "loss": 0.0122, "step": 197350 }, { "epoch": 1298.421052631579, "grad_norm": 1.7066009044647217, "learning_rate": 0.0001, "loss": 0.0113, "step": 197360 }, { "epoch": 1298.4868421052631, "grad_norm": 1.7196056842803955, "learning_rate": 0.0001, "loss": 0.01, "step": 197370 }, { "epoch": 1298.5526315789473, "grad_norm": 0.97200608253479, "learning_rate": 0.0001, "loss": 0.0124, "step": 197380 }, { "epoch": 1298.6184210526317, "grad_norm": 1.3363248109817505, "learning_rate": 0.0001, "loss": 0.0112, "step": 197390 }, { "epoch": 1298.6842105263158, "grad_norm": 1.6085249185562134, "learning_rate": 0.0001, "loss": 0.0104, "step": 197400 }, { "epoch": 1298.75, "grad_norm": 1.2761956453323364, "learning_rate": 0.0001, "loss": 0.0136, "step": 197410 }, { "epoch": 1298.8157894736842, "grad_norm": 1.0767223834991455, "learning_rate": 0.0001, "loss": 0.0096, "step": 197420 }, { "epoch": 1298.8815789473683, "grad_norm": 1.0166902542114258, "learning_rate": 0.0001, "loss": 0.0121, "step": 197430 }, { "epoch": 1298.9473684210527, "grad_norm": 1.6756744384765625, "learning_rate": 0.0001, "loss": 0.0134, "step": 197440 }, { "epoch": 1299.0131578947369, "grad_norm": 1.6708258390426636, "learning_rate": 0.0001, "loss": 0.012, "step": 197450 }, { "epoch": 1299.078947368421, "grad_norm": 1.2126331329345703, "learning_rate": 0.0001, "loss": 0.0091, "step": 197460 }, { "epoch": 1299.1447368421052, "grad_norm": 1.5658271312713623, "learning_rate": 0.0001, "loss": 0.0099, "step": 197470 }, { "epoch": 1299.2105263157894, "grad_norm": 1.3876359462738037, "learning_rate": 0.0001, "loss": 0.0091, "step": 197480 }, { "epoch": 1299.2763157894738, "grad_norm": 1.585305094718933, "learning_rate": 0.0001, "loss": 0.0084, "step": 197490 }, { "epoch": 1299.342105263158, "grad_norm": 1.6671804189682007, "learning_rate": 0.0001, "loss": 0.0106, "step": 197500 }, { "epoch": 1299.407894736842, "grad_norm": 1.3508164882659912, "learning_rate": 0.0001, "loss": 0.0122, "step": 197510 }, { "epoch": 1299.4736842105262, "grad_norm": 1.799986720085144, "learning_rate": 0.0001, "loss": 0.0103, "step": 197520 }, { "epoch": 1299.5394736842106, "grad_norm": 1.247213363647461, "learning_rate": 0.0001, "loss": 0.0171, "step": 197530 }, { "epoch": 1299.6052631578948, "grad_norm": 1.3036375045776367, "learning_rate": 0.0001, "loss": 0.0091, "step": 197540 }, { "epoch": 1299.671052631579, "grad_norm": 1.2280793190002441, "learning_rate": 0.0001, "loss": 0.0184, "step": 197550 }, { "epoch": 1299.7368421052631, "grad_norm": 0.9601597785949707, "learning_rate": 0.0001, "loss": 0.0121, "step": 197560 }, { "epoch": 1299.8026315789473, "grad_norm": 1.7272025346755981, "learning_rate": 0.0001, "loss": 0.0116, "step": 197570 }, { "epoch": 1299.8684210526317, "grad_norm": 1.508544921875, "learning_rate": 0.0001, "loss": 0.0126, "step": 197580 }, { "epoch": 1299.9342105263158, "grad_norm": 1.5402089357376099, "learning_rate": 0.0001, "loss": 0.0131, "step": 197590 }, { "epoch": 1300.0, "grad_norm": 1.3868767023086548, "learning_rate": 0.0001, "loss": 0.0117, "step": 197600 }, { "epoch": 1300.0657894736842, "grad_norm": 1.4312489032745361, "learning_rate": 0.0001, "loss": 0.0109, "step": 197610 }, { "epoch": 1300.1315789473683, "grad_norm": 1.3305604457855225, "learning_rate": 0.0001, "loss": 0.0109, "step": 197620 }, { "epoch": 1300.1973684210527, "grad_norm": 1.0445644855499268, "learning_rate": 0.0001, "loss": 0.0133, "step": 197630 }, { "epoch": 1300.2631578947369, "grad_norm": 1.6266824007034302, "learning_rate": 0.0001, "loss": 0.0114, "step": 197640 }, { "epoch": 1300.328947368421, "grad_norm": 1.4672564268112183, "learning_rate": 0.0001, "loss": 0.0113, "step": 197650 }, { "epoch": 1300.3947368421052, "grad_norm": 1.2151185274124146, "learning_rate": 0.0001, "loss": 0.013, "step": 197660 }, { "epoch": 1300.4605263157894, "grad_norm": 1.0574753284454346, "learning_rate": 0.0001, "loss": 0.0123, "step": 197670 }, { "epoch": 1300.5263157894738, "grad_norm": 0.9672297835350037, "learning_rate": 0.0001, "loss": 0.0094, "step": 197680 }, { "epoch": 1300.592105263158, "grad_norm": 1.4655665159225464, "learning_rate": 0.0001, "loss": 0.0124, "step": 197690 }, { "epoch": 1300.657894736842, "grad_norm": 1.7817891836166382, "learning_rate": 0.0001, "loss": 0.0102, "step": 197700 }, { "epoch": 1300.7236842105262, "grad_norm": 1.7057970762252808, "learning_rate": 0.0001, "loss": 0.0124, "step": 197710 }, { "epoch": 1300.7894736842106, "grad_norm": 1.3237411975860596, "learning_rate": 0.0001, "loss": 0.0151, "step": 197720 }, { "epoch": 1300.8552631578948, "grad_norm": 1.3300174474716187, "learning_rate": 0.0001, "loss": 0.0115, "step": 197730 }, { "epoch": 1300.921052631579, "grad_norm": 1.3035141229629517, "learning_rate": 0.0001, "loss": 0.0135, "step": 197740 }, { "epoch": 1300.9868421052631, "grad_norm": 1.123279333114624, "learning_rate": 0.0001, "loss": 0.0101, "step": 197750 }, { "epoch": 1301.0526315789473, "grad_norm": 1.6201026439666748, "learning_rate": 0.0001, "loss": 0.0172, "step": 197760 }, { "epoch": 1301.1184210526317, "grad_norm": 1.1721503734588623, "learning_rate": 0.0001, "loss": 0.0122, "step": 197770 }, { "epoch": 1301.1842105263158, "grad_norm": 1.2685233354568481, "learning_rate": 0.0001, "loss": 0.0098, "step": 197780 }, { "epoch": 1301.25, "grad_norm": 1.817500352859497, "learning_rate": 0.0001, "loss": 0.01, "step": 197790 }, { "epoch": 1301.3157894736842, "grad_norm": 1.3257431983947754, "learning_rate": 0.0001, "loss": 0.0091, "step": 197800 }, { "epoch": 1301.3815789473683, "grad_norm": 1.5465737581253052, "learning_rate": 0.0001, "loss": 0.0142, "step": 197810 }, { "epoch": 1301.4473684210527, "grad_norm": 1.467252492904663, "learning_rate": 0.0001, "loss": 0.0109, "step": 197820 }, { "epoch": 1301.5131578947369, "grad_norm": 1.7829439640045166, "learning_rate": 0.0001, "loss": 0.0126, "step": 197830 }, { "epoch": 1301.578947368421, "grad_norm": 1.9544154405593872, "learning_rate": 0.0001, "loss": 0.0142, "step": 197840 }, { "epoch": 1301.6447368421052, "grad_norm": 1.1579872369766235, "learning_rate": 0.0001, "loss": 0.0118, "step": 197850 }, { "epoch": 1301.7105263157894, "grad_norm": 1.7622578144073486, "learning_rate": 0.0001, "loss": 0.0108, "step": 197860 }, { "epoch": 1301.7763157894738, "grad_norm": 1.3199057579040527, "learning_rate": 0.0001, "loss": 0.0112, "step": 197870 }, { "epoch": 1301.842105263158, "grad_norm": 1.6892050504684448, "learning_rate": 0.0001, "loss": 0.0107, "step": 197880 }, { "epoch": 1301.907894736842, "grad_norm": 1.6662218570709229, "learning_rate": 0.0001, "loss": 0.0114, "step": 197890 }, { "epoch": 1301.9736842105262, "grad_norm": 1.9423253536224365, "learning_rate": 0.0001, "loss": 0.0102, "step": 197900 }, { "epoch": 1302.0394736842106, "grad_norm": 1.5827174186706543, "learning_rate": 0.0001, "loss": 0.0115, "step": 197910 }, { "epoch": 1302.1052631578948, "grad_norm": 1.93584144115448, "learning_rate": 0.0001, "loss": 0.01, "step": 197920 }, { "epoch": 1302.171052631579, "grad_norm": 1.3108471632003784, "learning_rate": 0.0001, "loss": 0.012, "step": 197930 }, { "epoch": 1302.2368421052631, "grad_norm": 1.4882813692092896, "learning_rate": 0.0001, "loss": 0.0166, "step": 197940 }, { "epoch": 1302.3026315789473, "grad_norm": 1.3866856098175049, "learning_rate": 0.0001, "loss": 0.0074, "step": 197950 }, { "epoch": 1302.3684210526317, "grad_norm": 1.5581793785095215, "learning_rate": 0.0001, "loss": 0.0119, "step": 197960 }, { "epoch": 1302.4342105263158, "grad_norm": 1.573397159576416, "learning_rate": 0.0001, "loss": 0.0127, "step": 197970 }, { "epoch": 1302.5, "grad_norm": 1.1770020723342896, "learning_rate": 0.0001, "loss": 0.0104, "step": 197980 }, { "epoch": 1302.5657894736842, "grad_norm": 1.4915260076522827, "learning_rate": 0.0001, "loss": 0.0096, "step": 197990 }, { "epoch": 1302.6315789473683, "grad_norm": 1.5092756748199463, "learning_rate": 0.0001, "loss": 0.0077, "step": 198000 }, { "epoch": 1302.6973684210527, "grad_norm": 1.0719187259674072, "learning_rate": 0.0001, "loss": 0.011, "step": 198010 }, { "epoch": 1302.7631578947369, "grad_norm": 1.4989134073257446, "learning_rate": 0.0001, "loss": 0.0112, "step": 198020 }, { "epoch": 1302.828947368421, "grad_norm": 1.6140834093093872, "learning_rate": 0.0001, "loss": 0.0093, "step": 198030 }, { "epoch": 1302.8947368421052, "grad_norm": 1.3529183864593506, "learning_rate": 0.0001, "loss": 0.0154, "step": 198040 }, { "epoch": 1302.9605263157894, "grad_norm": 1.1328006982803345, "learning_rate": 0.0001, "loss": 0.0147, "step": 198050 }, { "epoch": 1303.0263157894738, "grad_norm": 1.4376813173294067, "learning_rate": 0.0001, "loss": 0.0095, "step": 198060 }, { "epoch": 1303.092105263158, "grad_norm": 1.7530723810195923, "learning_rate": 0.0001, "loss": 0.0125, "step": 198070 }, { "epoch": 1303.157894736842, "grad_norm": 1.586902379989624, "learning_rate": 0.0001, "loss": 0.0108, "step": 198080 }, { "epoch": 1303.2236842105262, "grad_norm": 1.4388808012008667, "learning_rate": 0.0001, "loss": 0.0084, "step": 198090 }, { "epoch": 1303.2894736842106, "grad_norm": 1.1478376388549805, "learning_rate": 0.0001, "loss": 0.0134, "step": 198100 }, { "epoch": 1303.3552631578948, "grad_norm": 1.1047660112380981, "learning_rate": 0.0001, "loss": 0.0116, "step": 198110 }, { "epoch": 1303.421052631579, "grad_norm": 1.2820541858673096, "learning_rate": 0.0001, "loss": 0.0118, "step": 198120 }, { "epoch": 1303.4868421052631, "grad_norm": 1.6208032369613647, "learning_rate": 0.0001, "loss": 0.0124, "step": 198130 }, { "epoch": 1303.5526315789473, "grad_norm": 1.388958215713501, "learning_rate": 0.0001, "loss": 0.0124, "step": 198140 }, { "epoch": 1303.6184210526317, "grad_norm": 1.6423381567001343, "learning_rate": 0.0001, "loss": 0.0128, "step": 198150 }, { "epoch": 1303.6842105263158, "grad_norm": 1.5401625633239746, "learning_rate": 0.0001, "loss": 0.0155, "step": 198160 }, { "epoch": 1303.75, "grad_norm": 1.5685638189315796, "learning_rate": 0.0001, "loss": 0.0103, "step": 198170 }, { "epoch": 1303.8157894736842, "grad_norm": 1.5939788818359375, "learning_rate": 0.0001, "loss": 0.0151, "step": 198180 }, { "epoch": 1303.8815789473683, "grad_norm": 1.4985275268554688, "learning_rate": 0.0001, "loss": 0.0082, "step": 198190 }, { "epoch": 1303.9473684210527, "grad_norm": 1.544608473777771, "learning_rate": 0.0001, "loss": 0.0111, "step": 198200 }, { "epoch": 1304.0131578947369, "grad_norm": 1.5485577583312988, "learning_rate": 0.0001, "loss": 0.0095, "step": 198210 }, { "epoch": 1304.078947368421, "grad_norm": 1.3753407001495361, "learning_rate": 0.0001, "loss": 0.0116, "step": 198220 }, { "epoch": 1304.1447368421052, "grad_norm": 1.4745062589645386, "learning_rate": 0.0001, "loss": 0.012, "step": 198230 }, { "epoch": 1304.2105263157894, "grad_norm": 1.3646448850631714, "learning_rate": 0.0001, "loss": 0.0139, "step": 198240 }, { "epoch": 1304.2763157894738, "grad_norm": 1.2284380197525024, "learning_rate": 0.0001, "loss": 0.0099, "step": 198250 }, { "epoch": 1304.342105263158, "grad_norm": 1.3590102195739746, "learning_rate": 0.0001, "loss": 0.0106, "step": 198260 }, { "epoch": 1304.407894736842, "grad_norm": 1.3691328763961792, "learning_rate": 0.0001, "loss": 0.0145, "step": 198270 }, { "epoch": 1304.4736842105262, "grad_norm": 0.919756293296814, "learning_rate": 0.0001, "loss": 0.0098, "step": 198280 }, { "epoch": 1304.5394736842106, "grad_norm": 1.4767156839370728, "learning_rate": 0.0001, "loss": 0.012, "step": 198290 }, { "epoch": 1304.6052631578948, "grad_norm": 1.0884180068969727, "learning_rate": 0.0001, "loss": 0.012, "step": 198300 }, { "epoch": 1304.671052631579, "grad_norm": 1.227967619895935, "learning_rate": 0.0001, "loss": 0.0132, "step": 198310 }, { "epoch": 1304.7368421052631, "grad_norm": 1.7190531492233276, "learning_rate": 0.0001, "loss": 0.0102, "step": 198320 }, { "epoch": 1304.8026315789473, "grad_norm": 1.0657734870910645, "learning_rate": 0.0001, "loss": 0.0124, "step": 198330 }, { "epoch": 1304.8684210526317, "grad_norm": 1.3204760551452637, "learning_rate": 0.0001, "loss": 0.009, "step": 198340 }, { "epoch": 1304.9342105263158, "grad_norm": 1.595741629600525, "learning_rate": 0.0001, "loss": 0.0115, "step": 198350 }, { "epoch": 1305.0, "grad_norm": 1.4656217098236084, "learning_rate": 0.0001, "loss": 0.0114, "step": 198360 }, { "epoch": 1305.0657894736842, "grad_norm": 1.6814669370651245, "learning_rate": 0.0001, "loss": 0.0081, "step": 198370 }, { "epoch": 1305.1315789473683, "grad_norm": 1.2192461490631104, "learning_rate": 0.0001, "loss": 0.0188, "step": 198380 }, { "epoch": 1305.1973684210527, "grad_norm": 1.8424705266952515, "learning_rate": 0.0001, "loss": 0.0111, "step": 198390 }, { "epoch": 1305.2631578947369, "grad_norm": 1.574769377708435, "learning_rate": 0.0001, "loss": 0.0083, "step": 198400 }, { "epoch": 1305.328947368421, "grad_norm": 1.6011419296264648, "learning_rate": 0.0001, "loss": 0.0121, "step": 198410 }, { "epoch": 1305.3947368421052, "grad_norm": 1.1989537477493286, "learning_rate": 0.0001, "loss": 0.0113, "step": 198420 }, { "epoch": 1305.4605263157894, "grad_norm": 1.1652997732162476, "learning_rate": 0.0001, "loss": 0.0131, "step": 198430 }, { "epoch": 1305.5263157894738, "grad_norm": 0.9572839736938477, "learning_rate": 0.0001, "loss": 0.0153, "step": 198440 }, { "epoch": 1305.592105263158, "grad_norm": 1.138667106628418, "learning_rate": 0.0001, "loss": 0.0146, "step": 198450 }, { "epoch": 1305.657894736842, "grad_norm": 1.3694090843200684, "learning_rate": 0.0001, "loss": 0.0101, "step": 198460 }, { "epoch": 1305.7236842105262, "grad_norm": 1.3389142751693726, "learning_rate": 0.0001, "loss": 0.0113, "step": 198470 }, { "epoch": 1305.7894736842106, "grad_norm": 1.4597853422164917, "learning_rate": 0.0001, "loss": 0.0122, "step": 198480 }, { "epoch": 1305.8552631578948, "grad_norm": 1.4686343669891357, "learning_rate": 0.0001, "loss": 0.0097, "step": 198490 }, { "epoch": 1305.921052631579, "grad_norm": 1.3644616603851318, "learning_rate": 0.0001, "loss": 0.0112, "step": 198500 }, { "epoch": 1305.9868421052631, "grad_norm": 1.5255290269851685, "learning_rate": 0.0001, "loss": 0.0079, "step": 198510 }, { "epoch": 1306.0526315789473, "grad_norm": 0.9928224086761475, "learning_rate": 0.0001, "loss": 0.0141, "step": 198520 }, { "epoch": 1306.1184210526317, "grad_norm": 1.1365574598312378, "learning_rate": 0.0001, "loss": 0.008, "step": 198530 }, { "epoch": 1306.1842105263158, "grad_norm": 1.0682445764541626, "learning_rate": 0.0001, "loss": 0.0121, "step": 198540 }, { "epoch": 1306.25, "grad_norm": 1.5291615724563599, "learning_rate": 0.0001, "loss": 0.0083, "step": 198550 }, { "epoch": 1306.3157894736842, "grad_norm": 1.2633222341537476, "learning_rate": 0.0001, "loss": 0.0108, "step": 198560 }, { "epoch": 1306.3815789473683, "grad_norm": 2.0023722648620605, "learning_rate": 0.0001, "loss": 0.0135, "step": 198570 }, { "epoch": 1306.4473684210527, "grad_norm": 1.7567204236984253, "learning_rate": 0.0001, "loss": 0.0095, "step": 198580 }, { "epoch": 1306.5131578947369, "grad_norm": 1.3060896396636963, "learning_rate": 0.0001, "loss": 0.0112, "step": 198590 }, { "epoch": 1306.578947368421, "grad_norm": 1.8851571083068848, "learning_rate": 0.0001, "loss": 0.0152, "step": 198600 }, { "epoch": 1306.6447368421052, "grad_norm": 1.1215596199035645, "learning_rate": 0.0001, "loss": 0.0126, "step": 198610 }, { "epoch": 1306.7105263157894, "grad_norm": 1.34795081615448, "learning_rate": 0.0001, "loss": 0.0111, "step": 198620 }, { "epoch": 1306.7763157894738, "grad_norm": 1.094225525856018, "learning_rate": 0.0001, "loss": 0.01, "step": 198630 }, { "epoch": 1306.842105263158, "grad_norm": 1.6957693099975586, "learning_rate": 0.0001, "loss": 0.0128, "step": 198640 }, { "epoch": 1306.907894736842, "grad_norm": 1.6389567852020264, "learning_rate": 0.0001, "loss": 0.0123, "step": 198650 }, { "epoch": 1306.9736842105262, "grad_norm": 1.698239803314209, "learning_rate": 0.0001, "loss": 0.0144, "step": 198660 }, { "epoch": 1307.0394736842106, "grad_norm": 0.9875311851501465, "learning_rate": 0.0001, "loss": 0.0139, "step": 198670 }, { "epoch": 1307.1052631578948, "grad_norm": 1.1503053903579712, "learning_rate": 0.0001, "loss": 0.0136, "step": 198680 }, { "epoch": 1307.171052631579, "grad_norm": 1.3703856468200684, "learning_rate": 0.0001, "loss": 0.0107, "step": 198690 }, { "epoch": 1307.2368421052631, "grad_norm": 1.8833959102630615, "learning_rate": 0.0001, "loss": 0.0127, "step": 198700 }, { "epoch": 1307.3026315789473, "grad_norm": 1.746276617050171, "learning_rate": 0.0001, "loss": 0.0097, "step": 198710 }, { "epoch": 1307.3684210526317, "grad_norm": 1.4862931966781616, "learning_rate": 0.0001, "loss": 0.0103, "step": 198720 }, { "epoch": 1307.4342105263158, "grad_norm": 1.41881263256073, "learning_rate": 0.0001, "loss": 0.0085, "step": 198730 }, { "epoch": 1307.5, "grad_norm": 1.102656602859497, "learning_rate": 0.0001, "loss": 0.015, "step": 198740 }, { "epoch": 1307.5657894736842, "grad_norm": 1.6399058103561401, "learning_rate": 0.0001, "loss": 0.0082, "step": 198750 }, { "epoch": 1307.6315789473683, "grad_norm": 1.19680917263031, "learning_rate": 0.0001, "loss": 0.015, "step": 198760 }, { "epoch": 1307.6973684210527, "grad_norm": 1.431357502937317, "learning_rate": 0.0001, "loss": 0.0099, "step": 198770 }, { "epoch": 1307.7631578947369, "grad_norm": 1.3065096139907837, "learning_rate": 0.0001, "loss": 0.0123, "step": 198780 }, { "epoch": 1307.828947368421, "grad_norm": 0.9725250005722046, "learning_rate": 0.0001, "loss": 0.013, "step": 198790 }, { "epoch": 1307.8947368421052, "grad_norm": 1.4921574592590332, "learning_rate": 0.0001, "loss": 0.0111, "step": 198800 }, { "epoch": 1307.9605263157894, "grad_norm": 1.376435399055481, "learning_rate": 0.0001, "loss": 0.0117, "step": 198810 }, { "epoch": 1308.0263157894738, "grad_norm": 1.2847039699554443, "learning_rate": 0.0001, "loss": 0.0118, "step": 198820 }, { "epoch": 1308.092105263158, "grad_norm": 1.634950041770935, "learning_rate": 0.0001, "loss": 0.0109, "step": 198830 }, { "epoch": 1308.157894736842, "grad_norm": 1.8010963201522827, "learning_rate": 0.0001, "loss": 0.0093, "step": 198840 }, { "epoch": 1308.2236842105262, "grad_norm": 1.330321192741394, "learning_rate": 0.0001, "loss": 0.0093, "step": 198850 }, { "epoch": 1308.2894736842106, "grad_norm": 1.5262354612350464, "learning_rate": 0.0001, "loss": 0.0158, "step": 198860 }, { "epoch": 1308.3552631578948, "grad_norm": 1.0879638195037842, "learning_rate": 0.0001, "loss": 0.0107, "step": 198870 }, { "epoch": 1308.421052631579, "grad_norm": 1.481779932975769, "learning_rate": 0.0001, "loss": 0.0113, "step": 198880 }, { "epoch": 1308.4868421052631, "grad_norm": 1.1638965606689453, "learning_rate": 0.0001, "loss": 0.0122, "step": 198890 }, { "epoch": 1308.5526315789473, "grad_norm": 1.3012914657592773, "learning_rate": 0.0001, "loss": 0.0121, "step": 198900 }, { "epoch": 1308.6184210526317, "grad_norm": 1.0249589681625366, "learning_rate": 0.0001, "loss": 0.0103, "step": 198910 }, { "epoch": 1308.6842105263158, "grad_norm": 1.2326916456222534, "learning_rate": 0.0001, "loss": 0.0116, "step": 198920 }, { "epoch": 1308.75, "grad_norm": 1.6439963579177856, "learning_rate": 0.0001, "loss": 0.015, "step": 198930 }, { "epoch": 1308.8157894736842, "grad_norm": 2.1229665279388428, "learning_rate": 0.0001, "loss": 0.009, "step": 198940 }, { "epoch": 1308.8815789473683, "grad_norm": 1.666763424873352, "learning_rate": 0.0001, "loss": 0.0114, "step": 198950 }, { "epoch": 1308.9473684210527, "grad_norm": 1.5527347326278687, "learning_rate": 0.0001, "loss": 0.0139, "step": 198960 }, { "epoch": 1309.0131578947369, "grad_norm": 1.4941685199737549, "learning_rate": 0.0001, "loss": 0.0119, "step": 198970 }, { "epoch": 1309.078947368421, "grad_norm": 1.6273014545440674, "learning_rate": 0.0001, "loss": 0.0112, "step": 198980 }, { "epoch": 1309.1447368421052, "grad_norm": 1.467624306678772, "learning_rate": 0.0001, "loss": 0.0109, "step": 198990 }, { "epoch": 1309.2105263157894, "grad_norm": 1.8301664590835571, "learning_rate": 0.0001, "loss": 0.0121, "step": 199000 }, { "epoch": 1309.2763157894738, "grad_norm": 2.010899543762207, "learning_rate": 0.0001, "loss": 0.0128, "step": 199010 }, { "epoch": 1309.342105263158, "grad_norm": 1.2190951108932495, "learning_rate": 0.0001, "loss": 0.0128, "step": 199020 }, { "epoch": 1309.407894736842, "grad_norm": 1.1863065958023071, "learning_rate": 0.0001, "loss": 0.0134, "step": 199030 }, { "epoch": 1309.4736842105262, "grad_norm": 1.4664791822433472, "learning_rate": 0.0001, "loss": 0.0136, "step": 199040 }, { "epoch": 1309.5394736842106, "grad_norm": 1.3376736640930176, "learning_rate": 0.0001, "loss": 0.0102, "step": 199050 }, { "epoch": 1309.6052631578948, "grad_norm": 1.9028323888778687, "learning_rate": 0.0001, "loss": 0.0127, "step": 199060 }, { "epoch": 1309.671052631579, "grad_norm": 1.30656898021698, "learning_rate": 0.0001, "loss": 0.0123, "step": 199070 }, { "epoch": 1309.7368421052631, "grad_norm": 1.3616361618041992, "learning_rate": 0.0001, "loss": 0.0102, "step": 199080 }, { "epoch": 1309.8026315789473, "grad_norm": 1.6629582643508911, "learning_rate": 0.0001, "loss": 0.0117, "step": 199090 }, { "epoch": 1309.8684210526317, "grad_norm": 1.3587548732757568, "learning_rate": 0.0001, "loss": 0.0099, "step": 199100 }, { "epoch": 1309.9342105263158, "grad_norm": 1.7386447191238403, "learning_rate": 0.0001, "loss": 0.0115, "step": 199110 }, { "epoch": 1310.0, "grad_norm": 1.4337846040725708, "learning_rate": 0.0001, "loss": 0.0111, "step": 199120 }, { "epoch": 1310.0657894736842, "grad_norm": 1.4422374963760376, "learning_rate": 0.0001, "loss": 0.0113, "step": 199130 }, { "epoch": 1310.1315789473683, "grad_norm": 1.7544358968734741, "learning_rate": 0.0001, "loss": 0.0082, "step": 199140 }, { "epoch": 1310.1973684210527, "grad_norm": 1.3759169578552246, "learning_rate": 0.0001, "loss": 0.0121, "step": 199150 }, { "epoch": 1310.2631578947369, "grad_norm": 1.572752833366394, "learning_rate": 0.0001, "loss": 0.0099, "step": 199160 }, { "epoch": 1310.328947368421, "grad_norm": 1.1463420391082764, "learning_rate": 0.0001, "loss": 0.0136, "step": 199170 }, { "epoch": 1310.3947368421052, "grad_norm": 1.2287657260894775, "learning_rate": 0.0001, "loss": 0.0138, "step": 199180 }, { "epoch": 1310.4605263157894, "grad_norm": 1.452967882156372, "learning_rate": 0.0001, "loss": 0.0109, "step": 199190 }, { "epoch": 1310.5263157894738, "grad_norm": 1.1882458925247192, "learning_rate": 0.0001, "loss": 0.0083, "step": 199200 }, { "epoch": 1310.592105263158, "grad_norm": 1.5081965923309326, "learning_rate": 0.0001, "loss": 0.0142, "step": 199210 }, { "epoch": 1310.657894736842, "grad_norm": 1.5056697130203247, "learning_rate": 0.0001, "loss": 0.0103, "step": 199220 }, { "epoch": 1310.7236842105262, "grad_norm": 1.2097231149673462, "learning_rate": 0.0001, "loss": 0.0139, "step": 199230 }, { "epoch": 1310.7894736842106, "grad_norm": 1.1792248487472534, "learning_rate": 0.0001, "loss": 0.0155, "step": 199240 }, { "epoch": 1310.8552631578948, "grad_norm": 1.10612154006958, "learning_rate": 0.0001, "loss": 0.0118, "step": 199250 }, { "epoch": 1310.921052631579, "grad_norm": 1.4230669736862183, "learning_rate": 0.0001, "loss": 0.0109, "step": 199260 }, { "epoch": 1310.9868421052631, "grad_norm": 1.309181571006775, "learning_rate": 0.0001, "loss": 0.0101, "step": 199270 }, { "epoch": 1311.0526315789473, "grad_norm": 1.6290217638015747, "learning_rate": 0.0001, "loss": 0.0162, "step": 199280 }, { "epoch": 1311.1184210526317, "grad_norm": 1.493257761001587, "learning_rate": 0.0001, "loss": 0.0145, "step": 199290 }, { "epoch": 1311.1842105263158, "grad_norm": 1.1969066858291626, "learning_rate": 0.0001, "loss": 0.0097, "step": 199300 }, { "epoch": 1311.25, "grad_norm": 1.3676666021347046, "learning_rate": 0.0001, "loss": 0.0102, "step": 199310 }, { "epoch": 1311.3157894736842, "grad_norm": 1.5877317190170288, "learning_rate": 0.0001, "loss": 0.0094, "step": 199320 }, { "epoch": 1311.3815789473683, "grad_norm": 1.2935152053833008, "learning_rate": 0.0001, "loss": 0.0137, "step": 199330 }, { "epoch": 1311.4473684210527, "grad_norm": 1.2592240571975708, "learning_rate": 0.0001, "loss": 0.0104, "step": 199340 }, { "epoch": 1311.5131578947369, "grad_norm": 1.4854706525802612, "learning_rate": 0.0001, "loss": 0.0111, "step": 199350 }, { "epoch": 1311.578947368421, "grad_norm": 1.411371111869812, "learning_rate": 0.0001, "loss": 0.0143, "step": 199360 }, { "epoch": 1311.6447368421052, "grad_norm": 1.5561668872833252, "learning_rate": 0.0001, "loss": 0.0118, "step": 199370 }, { "epoch": 1311.7105263157894, "grad_norm": 1.0715560913085938, "learning_rate": 0.0001, "loss": 0.0085, "step": 199380 }, { "epoch": 1311.7763157894738, "grad_norm": 1.2230346202850342, "learning_rate": 0.0001, "loss": 0.0134, "step": 199390 }, { "epoch": 1311.842105263158, "grad_norm": 1.545196294784546, "learning_rate": 0.0001, "loss": 0.0097, "step": 199400 }, { "epoch": 1311.907894736842, "grad_norm": 1.6043832302093506, "learning_rate": 0.0001, "loss": 0.0104, "step": 199410 }, { "epoch": 1311.9736842105262, "grad_norm": 1.795943021774292, "learning_rate": 0.0001, "loss": 0.0097, "step": 199420 }, { "epoch": 1312.0394736842106, "grad_norm": 1.3205782175064087, "learning_rate": 0.0001, "loss": 0.0184, "step": 199430 }, { "epoch": 1312.1052631578948, "grad_norm": 1.582242727279663, "learning_rate": 0.0001, "loss": 0.0112, "step": 199440 }, { "epoch": 1312.171052631579, "grad_norm": 1.3755384683609009, "learning_rate": 0.0001, "loss": 0.0092, "step": 199450 }, { "epoch": 1312.2368421052631, "grad_norm": 1.7492140531539917, "learning_rate": 0.0001, "loss": 0.0099, "step": 199460 }, { "epoch": 1312.3026315789473, "grad_norm": 1.4358792304992676, "learning_rate": 0.0001, "loss": 0.0117, "step": 199470 }, { "epoch": 1312.3684210526317, "grad_norm": 1.1530174016952515, "learning_rate": 0.0001, "loss": 0.0122, "step": 199480 }, { "epoch": 1312.4342105263158, "grad_norm": 1.6079716682434082, "learning_rate": 0.0001, "loss": 0.0127, "step": 199490 }, { "epoch": 1312.5, "grad_norm": 1.7356090545654297, "learning_rate": 0.0001, "loss": 0.0101, "step": 199500 }, { "epoch": 1312.5657894736842, "grad_norm": 1.6084128618240356, "learning_rate": 0.0001, "loss": 0.0092, "step": 199510 }, { "epoch": 1312.6315789473683, "grad_norm": 1.6727089881896973, "learning_rate": 0.0001, "loss": 0.0091, "step": 199520 }, { "epoch": 1312.6973684210527, "grad_norm": 1.7959717512130737, "learning_rate": 0.0001, "loss": 0.0119, "step": 199530 }, { "epoch": 1312.7631578947369, "grad_norm": 1.8181443214416504, "learning_rate": 0.0001, "loss": 0.0149, "step": 199540 }, { "epoch": 1312.828947368421, "grad_norm": 1.6131657361984253, "learning_rate": 0.0001, "loss": 0.0091, "step": 199550 }, { "epoch": 1312.8947368421052, "grad_norm": 1.7579141855239868, "learning_rate": 0.0001, "loss": 0.0108, "step": 199560 }, { "epoch": 1312.9605263157894, "grad_norm": 1.3980156183242798, "learning_rate": 0.0001, "loss": 0.0144, "step": 199570 }, { "epoch": 1313.0263157894738, "grad_norm": 1.915858507156372, "learning_rate": 0.0001, "loss": 0.0113, "step": 199580 }, { "epoch": 1313.092105263158, "grad_norm": 2.0933597087860107, "learning_rate": 0.0001, "loss": 0.0101, "step": 199590 }, { "epoch": 1313.157894736842, "grad_norm": 1.6109707355499268, "learning_rate": 0.0001, "loss": 0.0104, "step": 199600 }, { "epoch": 1313.2236842105262, "grad_norm": 1.467077374458313, "learning_rate": 0.0001, "loss": 0.014, "step": 199610 }, { "epoch": 1313.2894736842106, "grad_norm": 1.3479164838790894, "learning_rate": 0.0001, "loss": 0.0084, "step": 199620 }, { "epoch": 1313.3552631578948, "grad_norm": 1.6668022871017456, "learning_rate": 0.0001, "loss": 0.0095, "step": 199630 }, { "epoch": 1313.421052631579, "grad_norm": 1.4289848804473877, "learning_rate": 0.0001, "loss": 0.0104, "step": 199640 }, { "epoch": 1313.4868421052631, "grad_norm": 1.2061938047409058, "learning_rate": 0.0001, "loss": 0.011, "step": 199650 }, { "epoch": 1313.5526315789473, "grad_norm": 1.218173861503601, "learning_rate": 0.0001, "loss": 0.0164, "step": 199660 }, { "epoch": 1313.6184210526317, "grad_norm": 1.719723105430603, "learning_rate": 0.0001, "loss": 0.0105, "step": 199670 }, { "epoch": 1313.6842105263158, "grad_norm": 1.6718589067459106, "learning_rate": 0.0001, "loss": 0.0127, "step": 199680 }, { "epoch": 1313.75, "grad_norm": 1.5722473859786987, "learning_rate": 0.0001, "loss": 0.0099, "step": 199690 }, { "epoch": 1313.8157894736842, "grad_norm": 1.3049246072769165, "learning_rate": 0.0001, "loss": 0.0144, "step": 199700 }, { "epoch": 1313.8815789473683, "grad_norm": 1.5187950134277344, "learning_rate": 0.0001, "loss": 0.0085, "step": 199710 }, { "epoch": 1313.9473684210527, "grad_norm": 1.3747398853302002, "learning_rate": 0.0001, "loss": 0.0109, "step": 199720 }, { "epoch": 1314.0131578947369, "grad_norm": 1.0671650171279907, "learning_rate": 0.0001, "loss": 0.0111, "step": 199730 }, { "epoch": 1314.078947368421, "grad_norm": 1.3606116771697998, "learning_rate": 0.0001, "loss": 0.0107, "step": 199740 }, { "epoch": 1314.1447368421052, "grad_norm": 1.3228884935379028, "learning_rate": 0.0001, "loss": 0.0151, "step": 199750 }, { "epoch": 1314.2105263157894, "grad_norm": 1.2829936742782593, "learning_rate": 0.0001, "loss": 0.0089, "step": 199760 }, { "epoch": 1314.2763157894738, "grad_norm": 1.672290325164795, "learning_rate": 0.0001, "loss": 0.0078, "step": 199770 }, { "epoch": 1314.342105263158, "grad_norm": 1.7484502792358398, "learning_rate": 0.0001, "loss": 0.0125, "step": 199780 }, { "epoch": 1314.407894736842, "grad_norm": 1.6489511728286743, "learning_rate": 0.0001, "loss": 0.011, "step": 199790 }, { "epoch": 1314.4736842105262, "grad_norm": 1.422753930091858, "learning_rate": 0.0001, "loss": 0.0135, "step": 199800 }, { "epoch": 1314.5394736842106, "grad_norm": 1.3863699436187744, "learning_rate": 0.0001, "loss": 0.01, "step": 199810 }, { "epoch": 1314.6052631578948, "grad_norm": 1.959757924079895, "learning_rate": 0.0001, "loss": 0.0121, "step": 199820 }, { "epoch": 1314.671052631579, "grad_norm": 1.3332706689834595, "learning_rate": 0.0001, "loss": 0.0114, "step": 199830 }, { "epoch": 1314.7368421052631, "grad_norm": 1.5069550275802612, "learning_rate": 0.0001, "loss": 0.013, "step": 199840 }, { "epoch": 1314.8026315789473, "grad_norm": 1.7566163539886475, "learning_rate": 0.0001, "loss": 0.0146, "step": 199850 }, { "epoch": 1314.8684210526317, "grad_norm": 1.1012603044509888, "learning_rate": 0.0001, "loss": 0.0089, "step": 199860 }, { "epoch": 1314.9342105263158, "grad_norm": 1.6832550764083862, "learning_rate": 0.0001, "loss": 0.0109, "step": 199870 }, { "epoch": 1315.0, "grad_norm": 1.0250837802886963, "learning_rate": 0.0001, "loss": 0.0124, "step": 199880 }, { "epoch": 1315.0657894736842, "grad_norm": 1.459280014038086, "learning_rate": 0.0001, "loss": 0.0108, "step": 199890 }, { "epoch": 1315.1315789473683, "grad_norm": 1.5642324686050415, "learning_rate": 0.0001, "loss": 0.0141, "step": 199900 }, { "epoch": 1315.1973684210527, "grad_norm": 1.1517722606658936, "learning_rate": 0.0001, "loss": 0.0162, "step": 199910 }, { "epoch": 1315.2631578947369, "grad_norm": 1.1472991704940796, "learning_rate": 0.0001, "loss": 0.012, "step": 199920 }, { "epoch": 1315.328947368421, "grad_norm": 1.2933464050292969, "learning_rate": 0.0001, "loss": 0.0128, "step": 199930 }, { "epoch": 1315.3947368421052, "grad_norm": 1.1778473854064941, "learning_rate": 0.0001, "loss": 0.0136, "step": 199940 }, { "epoch": 1315.4605263157894, "grad_norm": 1.3669072389602661, "learning_rate": 0.0001, "loss": 0.0111, "step": 199950 }, { "epoch": 1315.5263157894738, "grad_norm": 1.8453913927078247, "learning_rate": 0.0001, "loss": 0.0092, "step": 199960 }, { "epoch": 1315.592105263158, "grad_norm": 1.6726374626159668, "learning_rate": 0.0001, "loss": 0.009, "step": 199970 }, { "epoch": 1315.657894736842, "grad_norm": 1.3069124221801758, "learning_rate": 0.0001, "loss": 0.0102, "step": 199980 }, { "epoch": 1315.7236842105262, "grad_norm": 1.5160958766937256, "learning_rate": 0.0001, "loss": 0.0126, "step": 199990 }, { "epoch": 1315.7894736842106, "grad_norm": 1.461305022239685, "learning_rate": 0.0001, "loss": 0.0096, "step": 200000 }, { "epoch": 1315.7894736842106, "step": 200000, "total_flos": 0.0, "train_loss": 0.022312619141340254, "train_runtime": 90855.7174, "train_samples_per_second": 140.883, "train_steps_per_second": 2.201 } ], "logging_steps": 10, "max_steps": 200000, "num_input_tokens_seen": 0, "num_train_epochs": 1316, "save_steps": 5000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 64, "trial_name": null, "trial_params": null }